Browse Source

基本完成FastAPI添加直播间;扫描直播间

mrh 2 years ago
parent
commit
54a9414084
16 changed files with 962 additions and 119 deletions
  1. 2 1
      .gitignore
  2. 76 0
      bililive_api.py
  3. 34 0
      client_detection.py
  4. 36 0
      conf/detect_list copy 2.json
  5. 39 0
      conf/detect_list copy.json
  6. 41 0
      conf/detect_list.json
  7. 11 0
      conf/settings.py
  8. 204 0
      detect_list.py
  9. 0 89
      detect_live.py
  10. 222 0
      detection.py
  11. 46 0
      main.py
  12. 50 0
      readme.md
  13. 2 1
      requirements.txt
  14. 137 0
      task.py
  15. 56 25
      test/test.py
  16. 6 3
      test/xpath_test.py

+ 2 - 1
.gitignore

@@ -1,2 +1,3 @@
 output
-__pycache__
+__pycache__
+test

+ 76 - 0
bililive_api.py

@@ -0,0 +1,76 @@
+import requests
+import json
+from loguru import logger
+
+class BiliLiveAPI:
+    def __init__(self, base_url="https://bililive.sv-v.magong.site"):
+        self.base_url = base_url
+
+    def get_info(self):
+        response = requests.get(self.base_url + "/api/info")
+        return response.json()
+
+    def get_lives(self):
+        response = requests.get(self.base_url + "/api/lives")
+        return response.json()
+
+    def get_live_by_id(self, id):
+        response = requests.get(self.base_url + f"/api/lives/{id}")
+        return response.json()
+    
+    '''
+    url
+        - https://live.douyin.com/403624505949
+    return
+        - 正常添加 [{'id': 'ee9b0b1908cfece9fa6c38f7f94cd098', 'live_url': 'https://live.douyin.com/403624505949', 'platform_cn_name': '抖音', 'host_name': '芗里香亲(1号)', 'room_name': '进来看看,你空手出去算我输', 'status': False, 'listening': True, 'recording': False, 'initializing': False, 'audio_only': False}]
+        - 重复添加 None
+        - 添加了一个不存在的房间号 [{'id': '25e7daffbd502c376ba98567950e86c7', 'live_url': 'https://live.douyin.com/33223521594974', 'platform_cn_name': '抖音', 'host_name': '', 'room_name': '', 'status': False, 'listening': True, 'recording': False, 'initializing': False, 'audio_only': False}]
+          - 🎈🎈  https://live.douyin.com/{任意数字} ,仍会添加成功,并返回成功的数据,只不过它无法识别到主播信息
+    '''
+    # 添加重复的 url 会返回 None
+    # 正常返回: 
+    def add_live(self, url, listen=True):
+        data = json.dumps([{"url": url, "listen": listen}])
+        logger.debug(f"POST {self.base_url}" + "/api/lives" + f"\n{data}")
+        response = requests.post(self.base_url + "/api/lives", data=data)
+        logger.debug(f"response.status_code: {response.status_code}")
+        return response.json()
+
+    def delete_live_by_id(self, id):
+        response = requests.delete(self.base_url + f"/api/lives/{id}")
+        return response.json()
+
+    def start_listen_live_by_id(self, id):
+        response = requests.get(self.base_url + f"/api/lives/{id}/start")
+        return response.json()
+
+    def stop_listen_and_record_live_by_id(self, id):
+        response = requests.get(self.base_url + f"/api/lives/{id}/stop")
+        return response.json()
+    
+    def get_config(self):
+        response = requests.get(self.base_url + "/api/config")
+        return response.json()
+
+    def save_lives_to_config(self):
+        response = requests.put(self.base_url + "/api/config")
+        return response.json()
+
+    def get_raw_config(self):
+        response = requests.get(self.base_url + "/api/raw-config")
+        return response.json()
+
+    def save_raw_config(self, config):
+        data = json.dumps({"config": config})
+        response = requests.put(self.base_url + "/api/raw-config", data=data, logger=logger)
+        return response.json()
+  
+def main():
+    api = BiliLiveAPI("https://bililive.sv-v.magong.site")
+    # print(api.get_info())
+    # print(api.get_lives())
+    print(api.add_live('https://live.douyin.com/212140271'))
+
+
+if __name__ == "__main__":
+    main()

+ 34 - 0
client_detection.py

@@ -0,0 +1,34 @@
+import requests
+import json
+
+class ClientDetection:
+    def __init__(self, base_url):
+        self.base_url = base_url
+
+    def list_detect_list(self):
+        response = requests.get(f'{self.base_url}/')
+        return response.json()
+
+    def add_live(self, data):
+        headers = {'Content-Type': 'application/json'}
+        response = requests.post(f'{self.base_url}/', data=json.dumps(data), headers=headers)
+        return response.json()
+
+def main():
+    # 使用示例
+    client = ClientDetection('http://127.0.0.1:9081')
+    print(client.list_detect_list())
+
+    data = [
+        {
+            "str": "https://live.bilibili.com/14917277",
+        },
+        {
+            "str": "https://live.bilibili.com/14917277",
+        }
+    ]
+    print(client.add_live(data))
+  
+
+if __name__ == "__main__":
+    main()

+ 36 - 0
conf/detect_list copy 2.json

@@ -0,0 +1,36 @@
+[
+        {
+            "str": ""
+        },
+        {
+            "str": ""
+        },
+        {
+            "str": "9- #在抖音,记录美好生活#【帆书(原樊登读书)天天服务中心】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v.douyin.com/iRUT9Gkk/ 4@4.com 07/22"
+        },
+        {
+            "name": "",
+            "str": "1- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEK8bN/ 9@1.com 05/29",
+            "homepage": "",
+            "user_id": ""
+        },
+        {
+            "name": "旭旭宝宝",
+            "str": "个9- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEK4Aj/ 1@3.com 08/06",
+            "homepage": "",
+            "live_url": "",
+            "user_id": ""
+        },
+        {
+            "name": "参哥认知圈",
+            "str": "3- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEwW1B/ 5@3.com 07/02",
+            "homepage": "",
+            "user_id": ""
+        },
+        {
+            "name": "纯然商业(IP变现)",
+            "str": "https://www.douyin.com/user/MS4wLjABAAAAypYrepTCjVDXDFHWlfbX-cjNGVu8J_qX4JW6WvoNGyhKRuIxyiARhU69uzUUUEiP",
+            "homepage": "",
+            "user_id": ""
+        }
+]

+ 39 - 0
conf/detect_list copy.json

@@ -0,0 +1,39 @@
+[
+    {
+        "str": " 2- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEQwEc/ 0@9.com 09/09"
+    },
+    {
+        "str": "3- #在抖音,记录美好生活#【斌的世界🪐】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v.douyin.com/iRUTyoY3/ 9@0.com 04/03"
+    },
+    {
+        "str": "1- #在抖音,记录美好生活#【参校长 不讲废话】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v.douyin.com/iRUEJewo/ 5@1.com 07/13"
+    },
+    {
+        "str": "长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRUoeWw8/"
+    },
+    {
+        "name": "",
+        "str": "1- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEK8bN/ 9@1.com 05/29",
+        "homepage": "",
+        "user_id": ""
+    },
+    {
+        "name": "旭旭宝宝",
+        "str": "个9- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEK4Aj/ 1@3.com 08/06",
+        "homepage": "",
+        "live_url": "",
+        "user_id": ""
+    },
+    {
+        "name": "参哥认知圈",
+        "str": "3- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEwW1B/ 5@3.com 07/02",
+        "homepage": "",
+        "user_id": ""
+    },
+    {
+        "name": "纯然商业(IP变现)",
+        "str": "https://www.douyin.com/user/MS4wLjABAAAAypYrepTCjVDXDFHWlfbX-cjNGVu8J_qX4JW6WvoNGyhKRuIxyiARhU69uzUUUEiP",
+        "homepage": "",
+        "user_id": ""
+    }
+]

+ 41 - 0
conf/detect_list.json

@@ -0,0 +1,41 @@
+[
+    {
+        "str": " 2- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEQwEc/ 0@9.com 09/09",
+        "homepage": "https://www.douyin.com/user/MS4wLjABAAAAD0nvMmQRM5qw_UH3A6NcQpr612083QbBResdJhl9FSXWfrmn5S-30sR4P9_fBni_",
+        "name": "阿宁 情感倾听(休息♨️)",
+        "live_url": ""
+    },
+    {
+        "str": "长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRUoeWw8/",
+        "homepage": "https://www.douyin.com/user/MS4wLjABAAAA_BNHk7z-Cb6siiuXuGb068BrWZP0y303EoMyXlFPhdVGozhlm836OvGEERpe9DAt",
+        "name": "鹤老师",
+        "live_url": ""
+    },
+    {
+        "name": "旭旭宝宝",
+        "str": "个9- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEK4Aj/ 1@3.com 08/06",
+        "homepage": "https://www.douyin.com/user/MS4wLjABAAAAXcusadpsns9kBnsCcbvD8-Xuv2pFqH4X2rs-P2fnw7U",
+        "live_url": "",
+        "user_id": ""
+    },
+    {
+        "name": "参哥认知圈",
+        "str": "3- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEwW1B/ 5@3.com 07/02",
+        "homepage": "https://www.douyin.com/user/MS4wLjABAAAApznugMz3XON_YtODE197z0yvEbp7XzWfyePCcs7VGXA5J6e-Cy5ji1FF9VmGjRVz",
+        "user_id": "",
+        "live_url": ""
+    },
+    {
+        "name": "纯然商业(IP变现)",
+        "str": "https://www.douyin.com/user/MS4wLjABAAAAypYrepTCjVDXDFHWlfbX-cjNGVu8J_qX4JW6WvoNGyhKRuIxyiARhU69uzUUUEiP",
+        "homepage": "https://www.douyin.com/user/MS4wLjABAAAAypYrepTCjVDXDFHWlfbX-cjNGVu8J_qX4JW6WvoNGyhKRuIxyiARhU69uzUUUEiP",
+        "user_id": "",
+        "live_url": ""
+    },
+    {
+        "str": "0- #在抖音,记录美好生活#【十八八穿搭】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v.douyin.com/iRy8Ur8Q/ 4@7.com 06/16"
+    },
+    {
+        "str": "! https://v.douyin.com/iRy8Ur8Q/ 4@7.com 06/16"
+    }
+]

+ 11 - 0
conf/settings.py

@@ -1,12 +1,23 @@
 import sys
 import os
 from DrissionPage import ChromiumOptions
+from loguru import logger
+HOST='127.0.0.1'
+PORT='9081'
+
+Detection_Interval = 15
 CONF_DIR =  os.path.dirname(__file__)
 WORK_DIR = os.path.dirname(CONF_DIR)
 OUTPUT = os.path.join(WORK_DIR, 'output')
 if not os.path.exists(OUTPUT):
     os.mkdir(OUTPUT)
 
+logger.remove()
+# logger.add(sys.stderr, format='<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>')
+logger.add(sys.stderr, format='<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{file}</cyan>:<cyan>{line}</cyan> :<cyan>{function}</cyan> - {message}')
+logger.add(os.path.join(OUTPUT, "all.log"), format='<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{file}</cyan>:<cyan>{line}</cyan> :<cyan>{function}</cyan> - {message}')
+
+
 # BROWSER_PATH 值理论无需手动设置,因为该模块会自动在默认路径查找 Chrome ,因此下列判断可以删去
 if sys.platform == 'win32':
     #BROWSER_PATH = r'C:\Program Files\Google\Chrome\Application\chrome.exe' 

+ 204 - 0
detect_list.py

@@ -0,0 +1,204 @@
+import datetime
+import json
+import os
+import sys
+import time
+import re
+from DrissionPage import ChromiumPage
+from requests_html import HTML
+import requests_html
+from conf.settings import chome_options,OUTPUT,logger,CONF_DIR
+from typing import List
+
+class DetectList:
+    def __init__(self, 
+                 detect_list_path=os.path.join(CONF_DIR, 'detect_list.json'), 
+                 input_path_txt = os.path.join(CONF_DIR, 'input_string.txt'), 
+                 ) -> None:
+        self.list_path = detect_list_path
+        if not os.path.exists(self.list_path):
+            self.gen_list()
+        self.list = self.load_list()
+        self.duplicate()
+        self.input_path = input_path_txt
+        
+    def load_list(self) -> List:
+        with open(self.list_path, 'r') as f:
+            res = json.load(f)
+        self.list = res
+        logger.debug(f"{self.list}")
+        return self.list
+    
+    def gen_list(self):
+        '''
+        - str: 
+          - 分享主页: 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRMFSx59/
+          - 直播分享: 3- #在抖音,记录美好生活#【麦穗儿🦋129】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v.douyin.com/iRMFCkqF/ 1@5.com 12/29
+        - homepage:
+          - PC版用户主页: https://www.douyin.com/user/MS4wLjABAAAAml99q0O4A4rk9SCLeVJXaWGi5e2pKxvQ1Oe0CjfTSHg
+        - live_url:
+          - 直播间地址 https://live.douyin.com/363178125769?camera_id=0
+          
+        🎈 name 和 user_id 非必填,它稍后由程序自动生成
+        🎈 str,homepage,live_url 三选一,最终程序会获取到 live_url ,这是最终的直播间地址
+        '''
+        dat = {
+            "list": [
+                {
+                    "name":"",
+                    "str":"",
+                    "homepage":"",
+                    "live_url":"",
+                    "user_id":""
+                }
+            ]
+        }
+        self.list = dat["list"]
+        self.dump_list()
+
+    def dump_list(self):
+        with open(self.list_path, 'w') as f:
+            json.dump(self.list, f, indent=4, ensure_ascii=False)
+    
+    """
+        Check if the input dictionary contains at least one of the keys: "str", "homepage", "live_url".
+        If not, raise a ValueError.
+    """
+    def validate_dict(self, dict):
+        keys = ['str', 'homepage', 'live_url']
+        if not any(key in dict for key in keys):
+            logger.info('The input dictionary must contain at least one of the following keys: "str", "homepage", "live_url"')
+            return
+        return True
+    '''
+    input_data
+        - [{'str': 1}, {'str': 2}, {'str': 3}]
+        - {'str': 1}
+    '''
+    def append_list(self, input_data):
+        logger.debug(f'{input_data}')
+        if isinstance(input_data, dict):
+            # self.validate_dict(input_data)
+            self.list.append(input_data)
+        elif isinstance(input_data, list):
+            # for item in input_data:
+            #     if self.validate_dict(item):
+            self.list.extend(input_data)
+        else:
+            logger.info('Input data must be a dictionary or a list of dictionaries.')
+        self.dump_list()
+        
+    def update_list(self, index, dict):
+        logger.debug(f'{index} {dict}')
+        # if not self.validate_dict(dict):
+        #     return
+        self.list[index].update(dict)
+        self.dump_list()
+
+    def delete_list(self, index):
+        if isinstance(index, list):
+            index.sort(reverse=True)  # 从大到小排序
+            for i in index:
+                logger.debug(f"del {self.list[i]}")
+                del self.list[i]
+        elif isinstance(index, int):
+            logger.debug(f"del {self.list[index]}")
+            del self.list[index]        
+        self.dump_list()
+      
+    '''
+    return
+        - [
+            {"index": " 1","str ": "value"},
+            {"index": " 2","str ": "value"},
+            ...
+          ]
+        - []
+    '''
+    def get_all_str(self):
+        result = []
+        for i, item in enumerate(self.list):
+            result.append({
+                "index": str(i + 1),
+                "str": item.get("str", "value")
+            })
+        # logger.debug(result)
+        return result
+
+    def get_best_key(self, item):
+        live_url = item.get('live_url')
+        if live_url:
+            return 'live_url', live_url
+            
+        homepage = item.get('homepage') 
+        if homepage:
+            return 'homepage', homepage
+        
+        str_val = item.get('str')
+        if str_val:
+            return 'str', str_val
+        return '' ,''
+
+    def duplicate(self):
+        urls = []
+        duplicate_indexes = []
+        
+        for i, item in enumerate(self.list):
+            homepage = item.get('homepage')
+            if homepage and homepage in urls:
+                duplicate_indexes.append(i)
+                
+            live_url = item.get('live_url')
+            if live_url and live_url in urls:
+                duplicate_indexes.append(i)
+                
+            urls.append(homepage)
+            urls.append(live_url)
+        
+        if duplicate_indexes:
+            self.delete_list(duplicate_indexes)
+   
+def main():
+    d = DetectList()
+    # logger.info(d.get_duplicate_indexes())
+    # d.validate_dict( [{'str': 1}, {'str': 2}, {'str': 3}])
+    # d.validate_dict({'str': 1})
+    dat = [
+            {
+                "name":"旭旭宝宝",
+                "str":"个9- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEK4Aj/ 1@3.com 08/06",
+                "homepage":"",
+                "live_url":"",
+                "user_id":""
+            },
+            {
+                "name":"参哥认知圈",
+                "str":"3- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEwW1B/ 5@3.com 07/02",
+                "homepage":"",
+                "user_id":""
+            },
+            {
+                "name":"纯然商业(IP变现)",
+                "str":"",
+                "homepage":"https://www.douyin.com/user/MS4wLjABAAAAypYrepTCjVDXDFHWlfbX-cjNGVu8J_qX4JW6WvoNGyhKRuIxyiARhU69uzUUUEiP",
+                "user_id":""
+            },
+            {
+                "name":"",
+                "str":" 2- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEQwEc/ 0@9.com 09/09",
+                "homepage":"",
+                "user_id":""
+            },
+            {
+                "name":"",
+                "str":"1- 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrEK8bN/ 9@1.com 05/29",
+                "homepage":"",
+                "user_id":""
+            },
+        ]
+        
+    # d.list = dat["list"]
+    # d.dump_list()
+
+if __name__ == "__main__":
+    main()

+ 0 - 89
detect_live.py

@@ -1,89 +0,0 @@
-import os
-import sys
-import time
-import re
-from DrissionPage import ChromiumPage
-from requests_html import HTML
-import requests_html
-from conf.settings import chome_options,OUTPUT
-
-chromium = ChromiumPage(chome_options)
-
-def user_homepage_live_link(html):
-    print(f"user_homepage_live_link:{html[:10]}...")
-    with open(os.path.join(OUTPUT,'dy.html'), 'w') as f:
-        f.write(html)
-    html = HTML(html=html)
-    user = html.find('div[data-e2e="user-detail"]', first=True)
-    if not user:
-        print('can not find user-detail')
-        return
-    a = user.find('a', first=True)
-    # 如果找到 a 标签,说明正在直播
-    if a:
-        return analyze_url(a.attrs['href'])
-    else:
-        return None
-# user_url:  https://v.douyin.com/iRjUsNmH/
-# return: 
-# - 用户主页 https://www.douyin.com/user/MS4wLjABAAAAml99q0O4A4rk9SCLeVJXaWGi5e2pKxvQ1Oe0CjfTSHg
-# - 直播间地址: https://live.douyin.com/363178125769?camera_id=0
-def get_share_url(share_url):
-    print(f'get_share_url:{share_url}')
-    chromium.get(share_url)
-    chromium.wait.load_complete()
-    # 等待重定向
-    time.sleep(1)
-    return analyze_url(chromium.url)
-    
-    
-# 用户主页: https://www.douyin.com/user/MS4wLjABAAAAntqRkNukyySSAR-L2F21LOVViboaWRtDkyPFnCz-UnXIwofkb7zr4GFa3YETH1hb
-# 分享地址: 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRMFSx59/
-# 直播分享: 3- #在抖音,记录美好生活#【麦穗儿🦋129】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v.douyin.com/iRMFCkqF/ 1@5.com 12/29
-# 直播间地址: https://live.douyin.com/192990287232
-def analyze_url(url):
-    res = re.search(r'(https?://([\w]*)\.douyin.com[^ ]*)', url)
-    print(f'analyze_url:{url}  re:{res.groups()}')
-    
-    if not res:
-        return
-    if not len(res.groups()) == 2:
-        return
-    live_link = ''
-    # 分享链接,短链接 https://v.douyin.com/iRMFSx59/
-    if res.groups()[1] == 'v':
-        live_link = get_share_url(res.groups()[0])
-    # 用户主页: https://www.douyin.com/user/MS4wLjA...
-    elif res.groups()[1] == 'www':
-        live_link = user_homepage_live_link(chromium.html)
-    # res.groups()[0] = https://live.douyin.com/363178125769?camera_id=0 或 https://live.douyin.com/192990287232
-    elif res.groups()[1] == 'live':
-        search_res = re.search(r'(https?://live.douyin.com/[\d]+)', res.groups()[0])
-        if search_res:
-            live_link = search_res.group(0)
-        else:
-            print("live link error:", res.groups()[0])
-    return live_link
-    
-
-def main():
-    # links = start_test_spider('https://www.douyin.com/user/MS4wLjABAAAAntqRkNukyySSAR-L2F21LOVViboaWRtDkyPFnCz-UnXIwofkb7zr4GFa3YETH1hb')
-    # start_test_spider('https://www.douyin.com/user/MS4wLjABAAAAml99q0O4A4rk9SCLeVJXaWGi5e2pKxvQ1Oe0CjfTSHg')
-    # 用户主页: https://www.douyin.com/user/MS4wLjABAAAAntqRkNukyySSAR-L2F21LOVViboaWRtDkyPFnCz-UnXIwofkb7zr4GFa3YETH1hb
-    # 分享地址: 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRMFSx59/
-    # 直播分享: 3- #在抖音,记录美好生活#【麦穗儿🦋129】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v.douyin.com/iRMFCkqF/ 1@5.com 12/29
-
-    # analyze_url("https://www.douyin.com/user/MS4wLjABAAAAntqRkNukyySSAR-L2F21LOVViboaWRtDkyPFnCz-UnXIwofkb7zr4GFa3YETH1hb")
-    # analyze_url("长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRMFSx59/")
-    # analyze_url("3- #在抖音,记录美好生活#【麦穗儿🦋129】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v.douyin.com/iRMFCkqF/ 1@5.com 12/29")
-    # analyze_url("阿道夫https://live.douyin.com/192990287232")
-    # live_link = analyze_url("长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRrQwD9T/")
-    # analyze_url("3- #在抖音,记录美好生活#【麦穗儿🦋129】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v..com/iRMFCkqF/ 1@5.com 12/29")
-    
-    # 给一个不存在的地址,异常测试
-    live_link = analyze_url("长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iR1T/")
-    print(f"analyze_url result live link:{live_link}")
-    
-
-if __name__ == "__main__":
-    main()

+ 222 - 0
detection.py

@@ -0,0 +1,222 @@
+import datetime
+import json
+import os
+import sys
+import time
+import re
+import signal
+from DrissionPage import ChromiumPage
+from requests_html import HTML
+import requests_html
+from conf.settings import chome_options,OUTPUT,logger
+
+
+class Detection():
+    def __init__(self) -> None:
+        self.page = ChromiumPage(chome_options)
+        # signal.signal(signal.SIGINT, self.handle_interrupt)
+    # def __del__(self):
+    #     self.page.close_tabs() 
+    # def handle_interrupt(self, signum, frame):
+    #     logger.info("Received interrupt signal. Cleaning up...")
+    #     self.page.close_tabs()
+    #     sys.exit(0)
+
+    '''
+    string
+        - 任何抖音链接、分享链接
+        - 分享主页: 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRMFSx59/
+        - 直播分享: 3- #在抖音,记录美好生活#【麦穗儿🦋129】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v.douyin.com/iRMFCkqF/ 1@5.com 12/29
+        - 用户主页  https://www.douyin.com/user/MS4wLjABAAAAml99q0O4A4rk9SCLeVJXaWGi5e2pKxvQ1Oe0CjfTSHg
+        - 直播间地址: https://live.douyin.com/363178125769?camera_id=0
+    return 
+        - {"homepage": "https://www.douyin.com/user/MS4wLjABAAAAml99q0O4A4rk9SCLeVJXaWGi5e2pKxvQ1Oe0CjfTSHg"}
+        - {"live_url": "https://live.douyin.com/363178125769"}
+        - {}
+    '''
+    def analyze_link(self, string):
+        res = re.search(r'(https?://([\w]*)\.douyin.com[^ ]*)', string)
+        logger.debug(f'analyze_url:{string}  re:{res if not res  else res.groups()}')
+        
+        if not res:
+            logger.info(f"格式错误 {string} ,支持以下链接形式:\n""""
+        - 分享主页: 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRMFSx59/
+        - 直播分享: 3- #在抖音,记录美好生活#【麦穗儿🦋129】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v.douyin.com/iRMFCkqF/ 1@5.com 12/29
+        - 用户主页  https://www.douyin.com/user/MS4wLjABAAAAml99q0O4A4rk9SCLeVJXaWGi5e2pKxvQ1Oe0CjfTSHg
+        - 直播间地址: https://live.douyin.com/363178125769?camera_id=0
+                        """)
+            return
+        url, subdomain =  res.groups()
+        ret = {}
+        # 分享链接,短链接 https://v.douyin.com/iRMFSx59/
+        if subdomain == 'v':
+            ret = self.analyze_short_url(url)
+        # 用户主页: https://www.douyin.com/user/MS4wLjA...
+        elif subdomain == 'www':
+            ret = self.analyze_homepage_url(url)
+        # res.groups()[0] = https://live.douyin.com/363178125769?camera_id=0 或 https://live.douyin.com/192990287232
+        elif subdomain == 'live':
+            ret = self.analyze_live_url(url, check=True)
+        logger.debug(f"ret:{ret}")
+        return ret
+
+    '''
+    share_url:  
+        - 分享地址: 长按复制此条消息,打开抖音搜索,查看TA的更多作品。 https://v.douyin.com/iRMFSx59/
+        - 直播分享: 3- #在抖音,记录美好生活#【麦穗儿🦋129】正在直播,来和我一起支持Ta吧。复制下方链接,打开【抖音】,直接观看直播! https://v.douyin.com/iRMFCkqF/ 1@5.com 12/29
+    return:
+        - {"homepage": "https://www.douyin.com/user/MS4wLjABAAAAml99q0O4A4rk9SCLeVJXaWGi5e2pKxvQ1Oe0CjfTSHg"}
+        - {"live_url": "https://live.douyin.com/363178125769"}
+        - None
+    '''
+    def analyze_short_url(self, share_url):
+        logger.debug(f'share_url: {share_url}')
+        self.page.get(share_url)
+        self.page.wait.load_complete()
+        # 等待重定向
+        time.sleep(1)
+        if 'user' in self.page.url:
+            return {"homepage": self.page.url, "name":self.get_user_name('span[class="Nu66P_ba"]')}
+        elif 'live' in self.page.url:
+            return {"live_url": self.page.url, "name":self.get_user_name('div[data-e2e="live-room-nickname"]')} 
+    
+    def get_user_name(self,selector):
+        html = HTML(html=self.page.html)
+        name_ele = html.find(selector, first=True)
+        if name_ele:
+            return name_ele.text
+        return ''
+    '''
+    url:  
+        - 直播间地址: https://live.douyin.com/363178125769?camera_id=0
+        - a 标签地址: https://live.douyin.com/21289753259?enter_from_merge=web_others_homepage&ent...
+    return:
+        - if check: {"live_url": "https://live.douyin.com/363178125769"} (正在直播)
+        - None  
+          - 直播间 url 格式正确,但是没有这个直播间,网页提示:该内容无法查看
+          - url 正则表达式地址错误
+    '''
+    def analyze_live_url(self, url, check=False):
+        search_res = re.search(r'(https?://live.douyin.com/[\d]+)', url)
+        if search_res:
+            live_url = search_res.group(0)
+            live_page_user_name = {}
+            if check:
+                exist = self.check_live_exist(live_url)
+                if not exist:
+                    logger.warning(f"live room not exist:{live_url}")
+                    return
+                # 只有在check的时候会访问 live URL ,此时可以通过 data-e2e="live-room-nickname" 找到页面元素
+                live_page_user_name = {"name":self.get_user_name('div[data-e2e="live-room-nickname"]')}
+            ret = {"live_url": live_url}
+            ret.update(live_page_user_name)
+            return ret
+        else:
+            logger.warning(f"live link error:{live_url}")
+
+    '''
+    url
+        - 用户主页  https://www.douyin.com/user/MS4wLjABAAAAml99q0O4A4rk9SCLeVJXaWGi5e2pKxvQ1Oe0CjfTSHg
+    return 
+        - 正在直播 {"live_url": "https://live.douyin.com/363178125769", "name":"xxx", "homepage":"https://www.douyin.com/user/MS4wLjABAAAAml99q0O4A4rk9SCLeVJXaWGi5e2pKxvQ1Oe0CjfTSHg"} 
+        - 没有直播 {"live_url": "", "name":"xxx", "homepage":"https://www.douyin.com/user/MS4wLjABAAAAml99q0O4A4rk9SCLeVJXaWGi5e2pKxvQ1Oe0CjfTSHg"} 
+    '''
+    def analyze_homepage_url(self, url):
+        logger.debug(f'{url}')
+        self.page.get(url)
+        self.page.wait.load_complete()
+        ret = {"name":self.get_user_name('span[class="Nu66P_ba"]'), "homepage":url}
+        live_url = self.get_user_homepage_live_link(self.page.html)
+        if live_url:
+            ret.update(live_url)
+        else:
+            ret.update({"live_url": ""})
+        logger.debug(f'ret {ret}')
+        return ret
+    '''
+    html:  
+        - 用户主页 html(https://www.douyin.com/user/MS4wLjABAAAAml99q0O4A4rk9SCLeVJXaWGi5e2pKxvQ1Oe0CjfTSHg)
+    return:
+        - {'live_url': 'https://live.douyin.com/324749687667', 'name': 'xxx'} (正在直播)
+        - None (没有直播)
+    '''
+    def get_user_homepage_live_link(self, html):
+        logger.debug(f"{html[:10]}...")
+        html = HTML(html=html)
+        user = html.find('div[data-e2e="user-detail"]', first=True)
+        if not user:
+            path = self.save_html(html)
+            logger.warning(f"Can not find <div[data-e2e=\"user-detail\"]> in user home page.Html save to:{path}")
+            return
+        a = user.find('a', first=True)
+        # 如果找到 a 标签,说明正在直播
+        if a:
+            logger.debug(a.attrs['href'])
+            # 因为 analyze_live_url 没有使用 check 参数,不会主动去访问 live URL, 此时还在 homepage 页面,需要查找 homepage 中的用户名元素: span[class="Nu66P_ba"]
+            return self.analyze_live_url(a.attrs['href'])
+        else:
+            logger.info("The user is not live streaming")
+            return {}
+    
+    
+    '''是否存在这个直播间地址
+    url:  
+        - https://live.douyin.com/363178125769
+    return:
+        - 1 | 0
+    '''
+    def check_live_exist(self, url):
+        self.page.get(url)
+        element = self.page.ele("xpath://div[@class='LV2pOyWA __leftContainer']")
+        if element:
+            return 1
+
+    
+    '''
+    url:  
+        - https://live.douyin.com/363178125769
+    return:
+        - 0 直播已结束
+        - 1 正常
+        - 2 链接不存在,页面提示“该内容暂时无法查看” (链接出错)
+        - 3 直播链接仅限手机观看(不影响录制)
+    '''
+    def check_live_url(self, url):
+        self.page.get(url)
+        self.page.wait.load_complete()
+        html = HTML(html=self.page.html)
+        basic_ele = html.find('div.basicPlayer', first=True)
+        if not basic_ele:
+            return 2
+        if '结束' in basic_ele.text:
+            return 0
+        
+
+    def save_html(self, content):
+        directory = OUTPUT
+        if not os.path.exists(directory):
+            os.mkdir(directory)
+        # 获取目录中的文件
+        files = os.listdir(directory)
+
+        # 如果文件数量超过最大值,则删除最旧的文件
+        if len(files) >= 30:
+            files.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)))
+            os.remove(os.path.join(directory, files[0]))
+
+        # 保存新的文件
+        filename = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S_%f")[:-3] + '.html'
+        file_path = os.path.join(OUTPUT, filename)
+        with open((file_path), 'w') as f:
+            f.write(content)
+        return file_path
+    
+def main():
+    d = Detection()
+    # logger.info(d.analyze_link("https://live.douyin.com/324749687667"))
+    # logger.info(d.analyze_link("https://www.douyin.com/user/MS4wLjABAAAAtMD22LJGHKwwo1V1WR3Rh5KOUs1C4Jk0Fl5EWzPzuOhbCbC2yUHK9vuPu7nZ_rm4"))
+    # logger.info(d.analyze_live_url("https://live.douyin.com/3651787257", check=True))
+    # logger.info(d.analyze_live_url("https://v.douyin.com/3651787257", check=True))
+
+if __name__ == "__main__":
+    main()

+ 46 - 0
main.py

@@ -0,0 +1,46 @@
+import uvicorn
+import logging
+from fastapi import FastAPI, BackgroundTasks, Request
+from contextlib import asynccontextmanager
+from pydantic import BaseModel
+from typing import List
+import threading
+import time
+from conf.settings import logger,HOST,PORT
+from task import Thread_que,run_task,detect_list
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # 在应用启动前运行的函数
+    thread = threading.Thread(target=run_task)
+    thread.start()
+    yield
+    # 在应用关闭后运行的函数
+    Thread_que.put_nowait("stop")
+    thread.join()
+    logger.info("Shutting down...")
+
+logger.info("FastAPI docs: ")
+app = FastAPI(lifespan=lifespan)
+
+class Item(BaseModel):
+    str: str
+
+@app.get("/")
+async def list_detect_list():
+    return detect_list.list
+
+@app.post("/")
+async def add_live(request: Request):
+    json_body = await request.json()
+    Thread_que.put_nowait({"add":json_body})
+
+    # 你的代码
+    return {"status": "ok"}
+
+def main():
+    uvicorn.run(app, host=HOST, port=PORT, log_level="info")
+
+if __name__ == "__main__":
+    main()

+ 50 - 0
readme.md

@@ -31,3 +31,53 @@ unzip Linux_x64_1047731_chrome-linux.zip
 
 ```
 
+## FastAPI
+```shell
+pip install "uvicorn[standard]"
+pip install fastapi
+uvicorn main:app --port 9081
+```
+
+'''
+GET / 
+list detect list
+- Request:
+    ```text
+    method: GET
+    path: http://127.0.0.1:9081/
+    ```
+- Response:
+    ```json
+    [{'str': 1}, {'str': 2}, {'str': 3}]
+    ```
+'''
+
+'''
+Add live
+- Request:  
+    ```text
+    method: POST
+    path: http://127.0.0.1:9081/
+    body: 
+        [
+            {
+                "str": "https://live.bilibili.com/14917277",
+            },
+            {
+                "str": "https://live.bilibili.com/14917277",
+            }
+        ]
+    ```
+- Response:
+    ```json
+        [
+            {
+                "res": "https://live.bilibili.com/14917277",
+            },
+            {
+                "res": "https://live.bilibili.com/14917277",
+            }
+        ]
+    ```
+'''
+

+ 2 - 1
requirements.txt

@@ -1,3 +1,4 @@
 #python_version>='3.10'
 DrissionPage==3.2.34
-requests-html==0.10.0
+requests-html==0.10.0
+loguru==0.7.2

+ 137 - 0
task.py

@@ -0,0 +1,137 @@
+import os
+import queue
+import asyncio
+import sys
+import threading
+import time
+from bililive_api import BiliLiveAPI
+from detect_list import DetectList
+from conf.settings import WORK_DIR,CONF_DIR,OUTPUT,logger,Detection_Interval
+from detection import Detection
+
+Thread_que = queue.Queue()
+scan_live_busy = False
+detection = Detection()
+detect_list = DetectList()
+biliapi = BiliLiveAPI()
+
+
+def scan_lives():
+    logger.debug(f"Detection_Interval :{Detection_Interval} , run scan_lives... ")
+    # str_list = detect_list.get_all_str()
+    # 数组遍历过程中,不能删除数据,否则下标会错乱,需要先临时记录要删除的下标到 del_index 中,循环结束后再删除
+    del_index = []
+    for index in range(len(detect_list.list)):
+        item = detect_list.list[index]
+        logger.debug(f'analyze item:{item}')
+        key, url = detect_list.get_best_key(item)
+        if not key:
+            del_index.append(index)
+            continue
+        
+        res = detection.analyze_link(url)
+        if not res:
+            continue
+        if res.get("homepage"):
+            logger.debug(f'update_list:{index}  {res}')
+            # 将用户拷贝的字符串替换成解析结果,以便下次使用
+            detect_list.update_list(index, res)
+        elif res.get("live_url"):
+            post_res = biliapi.add_live(res.get("live_url"))
+            del_index.append(index)
+    
+    detect_list.delete_list(del_index)
+    logger.debug(f"scan_lives finish,detect_list = {detect_list.list}")
+
+
+async def auto_detect_interval(que:asyncio.Queue, finish_event:asyncio.Event):
+    while True:
+        # await que.put 是因为 thread_scan_lives 执行过久,需要先等待它执行完毕,才能重新开始计时
+        await que.put('detect')
+        await finish_event.wait()
+        finish_event.clear()
+        await asyncio.sleep(Detection_Interval)
+        # await asyncio.sleep(3)
+
+async def auto_detect_task(auto_detect_queue:asyncio.Queue):
+    loop = asyncio.get_event_loop()
+    finish_event = asyncio.Event()
+    asyncio.create_task(auto_detect_interval(auto_detect_queue, finish_event))
+    while 1:
+        msg = await auto_detect_queue.get()
+        logger.debug(f'get msg {msg}')
+        if msg == 'stop':
+            break
+        elif msg == 'detect':
+            thread_scan_lives = threading.Thread(target=scan_lives)
+            thread_scan_lives.start()
+            await loop.run_in_executor(None, thread_scan_lives.join)
+            finish_event.set()
+        else :
+            add_lives = msg.get('add', [])
+            if add_lives:
+                detect_list.append_list(add_lives)
+            continue
+            thread_add_lives = threading.Thread(target=add_live, args=add_lives)
+            thread_add_lives.start()
+            await loop.run_in_executor(None, thread_add_lives.join)
+            
+    
+def add_live(lives):
+    detect_list.append_list(lives)
+    scan_lives()
+    # while True:
+    #     msg = await input_msg.get()
+    #     logger.info(msg)
+    #     # 新开一个浏览器,否则会和 auto_detect_task 中的浏览器重叠
+    #     res = Detection().analyze_link(msg)
+    #     if res:
+    #         detect_list.append_list(res)
+        
+            
+async def main_task():
+    logger.debug('start task')
+    analyze_queue = asyncio.Queue()
+    auto_detect_queue = asyncio.Queue()
+    # asyncio.create_task(anayze_input_string(analyze_queue))
+    task_auto_detect = asyncio.create_task(auto_detect_task(auto_detect_queue))
+    while True:
+        if Thread_que.empty():
+            await asyncio.sleep(0.1)
+            continue
+        msg = Thread_que.get_nowait()
+        logger.debug(f"get msg from Thread_que:{msg}")
+        if msg == "stop":
+            logger.debug('wait task_auto_detect')
+            detection.page.close_tabs()
+            # await task_auto_detect
+            break
+        else:
+            auto_detect_queue.put_nowait(msg)
+    logger.debug('break')
+    
+def run_task():
+    asyncio.run(main_task())
+
+# 模拟服务器发送队列信息
+def imitate_server():
+    
+    thread = threading.Thread(target=run_task)
+    thread.start()
+    while True:
+        try:
+            indat = input('input:')
+            Thread_que.put_nowait(indat)
+            # Thread_que.put_nowait("123")
+            # time.sleep(1)
+        except KeyboardInterrupt:
+            logger.info("KeyboardInterrupt")
+            detection.page.close_tabs()
+            break
+    Thread_que.put_nowait("stop")
+    logger.info('put stop msg')
+    thread.join()
+    logger.info('exit')
+    
+if __name__ == "__main__":
+    imitate_server()

+ 56 - 25
test/test.py

@@ -1,29 +1,60 @@
+import asyncio
+import os
+import queue
+import sys
+import threading
 import time
-from DrissionPage import ChromiumPage
-from conf.settings import chome_options
 
-# 创建页面对象,并启动或接管浏览器
-page = ChromiumPage(chome_options)
-# initial_url = 'https://v.douyin.com/iRryVgd6/'
-initial_url = 'https://v.douyin.com/iRMFSx59/'
-page.get(initial_url)
-page.wait.load_complete()
-# 页面有重定向,等待最终加载
-time.sleep(1)
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+from conf.settings import WORK_DIR,CONF_DIR,OUTPUT,logger,Detection_Interval
 
-# if page.url != initial_url:
-#     print("页面已重定向到:", page.url)
-#     page._wait_loaded
-# else:
-#     print("页面没有重定向。当前URL:", page.url)
+Thread_que = queue.Queue()
+# from detection import Detection
+# def analyze():
+#     detection = Detection()
+#     detection.analyze_link('123')
+
+async def main_task():
+    stop_event = asyncio.Event()
+    task_auto_detect = asyncio.create_task(auto_detect_task(stop_event))
+    while True:
+        logger.info("m")
+        await asyncio.sleep(1)
+
+def run_task():
+    asyncio.run(main_task())
+    
+async def auto_detect_task(stop_event:asyncio.Event=None):
+    loop = asyncio.get_event_loop()
+    while 1:
+        # await asyncio.sleep(Detection_Interval)
+        await asyncio.sleep(1)
+        logger.debug(f"thread_scan_lives finish:{stop_event.is_set()}")
+        if stop_event.is_set():
+            logger.debug("stop event")
+            break
+
+def imitate_server():
+    thread = threading.Thread(target=run_task)
+    thread.start()
+    while True:
+        try:
+            indat = input('input:')
+            
+        except KeyboardInterrupt:
+            logger.info("KeyboardInterrupt")
+            break
+    # Thread_que.put_nowait("stop")
+    logger.info('put stop msg')
+    thread.join()
+    logger.info('exit')
+    # sys.exit(0)
     
-print(page.url)
-# 定位到账号文本框,获取文本框元素
-# ele = page.ele('#user_login')
-# 输入对文本框输入账号
-# ele.input('您的账号')
-# 定位到密码文本框并输入密码
-# page.ele('#user_password').input('您的密码')
-# 点击登录按钮
-# page.ele('@value=登 录').click()
-# page.close_tabs()
+if __name__ == "__main__":
+    # imitate_server()
+    live_page_user_name = {}
+    ret = {"live_url": "live_url"}
+    ret.update(live_page_user_name)
+    print(ret.update(live_page_user_name))
+    print(ret)
+    

+ 6 - 3
test/xpath_test.py

@@ -1,6 +1,9 @@
 from requests_html import HTML
-doc = open('dy.html').read()
+doc = open('t.html').read()
 html = HTML(html=doc)
-links = html.find('div[data-e2e="user-detail"] a', first=True).attrs['href']
+# links = html.find('div[class="basicPlayer oxOnVjsP tLRjpY1B living_player xgplayer xgplayer_autohide xgplayer-pc xgplayer-nostart"]', first=True)
+links = html.find('div.basicPlayer', first=True)
+print(links)
+print(links.text)
 
-print(links)
+# basic_ele = self.page.html.find