Переглянути джерело

page 设置为专属用户目录;解析直播在线人数测试;

mrh 1 рік тому
батько
коміт
ad8e9ee6ed
8 змінених файлів з 712 додано та 39 видалено
  1. 1 1
      conf/config.py
  2. 3 7
      conf/dp_configs.ini
  3. 0 4
      dp/audience.py
  4. 18 6
      dp/cookies.py
  5. 6 12
      dp/page.py
  6. 110 0
      dp/req_api.py
  7. 0 9
      main.py
  8. 574 0
      testm/get_live_data.py

+ 1 - 1
conf/config.py

@@ -41,7 +41,7 @@ def find_edge_path_in_registry():
     return path  
 
 logger.debug(f"find_edge browser path: {find_edge_path_in_registry()}")
-USER_DATA = os.path.join(OUTPUT, "UserData")
+USER_DATA = os.path.join(OUTPUT, "userData_ai-yunying")
 # BROWSER_PATH 值理论无需手动设置,因为该模块会自动在默认路径查找 Chrome ,因此下列判断可以删去
 if 'win' in sys.platform:
     if not os.path.exists(INI_PATH):

+ 3 - 7
conf/dp_configs.ini

@@ -3,9 +3,9 @@ download_path =
 tmp_path = 
 
 [chromium_options]
-address = 127.0.0.1:9226
+address = 127.0.0.1:9225
 browser_path = C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe
-arguments = ['--no-default-browser-check', '--disable-suggestions-ui', '--no-first-run', '--disable-infobars', '--disable-popup-blocking', '--hide-crash-restore-bubble', '--disable-features=PrivacySandboxSettings4', '--user-data-dir=I:\\code\\ai-yunying\\live-online-people\\output\\UserData9226']
+arguments = ['--no-default-browser-check', '--disable-suggestions-ui', '--no-first-run', '--disable-infobars', '--disable-popup-blocking', '--hide-crash-restore-bubble', '--disable-features=PrivacySandboxSettings4', '--user-data-dir=I:\\code\\ai-yunying\\live-online-people\\output\\userData_ai-yunying']
 extensions = []
 prefs = {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}}
 flags = {}
@@ -16,11 +16,7 @@ system_user_path = False
 existing_only = False
 
 [session_options]
-headers = {
-    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'connection': 'keep-alive', 'accept-charset': 'GB2312,utf-8;q=0.7,*;q=0.7',
-    'Sec-Ch-Ua-Platform:':'macOS',
-    "Sec-Ch-Ua": ""
-    }
+headers = {}
 
 [timeouts]
 base = 10

+ 0 - 4
dp/audience.py

@@ -1,4 +0,0 @@
-from .page import page
-# 查看在线观众
-
-# 1. 获取在线观众列表

+ 18 - 6
dp/cookies.py

@@ -16,6 +16,7 @@ from faker import Faker
 import DrissionPage
 from fake_useragent import UserAgent
 from dataset import Table
+from collections import OrderedDict
 table:Table = db['cookie']
 
 def get_browser_fake_info():
@@ -129,12 +130,15 @@ def try_gen_douyin_cookies():
         except Exception as e:
             logger.exception(e)
 
-def get_from_table():
+def get_from_table(id=None):
     count = table.count()
     # random choise
     if count > 0:
-        index = random.randint(0, count-1)
-        row = table.find_one(id=index)
+        if id:
+            row = table.find_one(id=id)
+        else:
+            index = random.randint(0, count-1)
+            row = table.find_one(id=index)
         # logger.info(f"{row['id']}") 
         # logger.info(f"row {row}")
         return row
@@ -142,12 +146,20 @@ def get_from_table():
         logger.info("no cookie")
         return None
 
+def delete_old_and_create_new(row:OrderedDict):
+    res = try_gen_douyin_cookies()
+    if res:
+        res['id'] = row['id']
+        id = table.upsert(res, ['id'])
+        logger.info(f"upsert {id}")
+        return table.find_one(id=row['id'])
+
 def main():
-    
+    # delete_old_and_create_new()
     # ua = create_ua_header()
     # gen_douyin_cookies()
-    # try_gen_douyin_cookies()
-    get_from_table()
+    try_gen_douyin_cookies()
+    # get_from_table()
     # table.create_column('key')
     # table.delete(id=6)
 

+ 6 - 12
dp/page.py

@@ -13,9 +13,9 @@ from DrissionPage import ChromiumOptions
 from DrissionPage.common import Settings
 
 Settings.raise_when_ele_not_found=True
-chrome_options.set_user_agent("Mozilla/5.0 (Windows NT 5.2) AppleWebKit/536.1 (KHTML, like Gecko) Chrome/52.0.893.0 Safari/536.1")
 
 page = ChromiumPage(chrome_options)
+
 logger.debug(f"address {chrome_options.address}")
 logger.debug(f"user_data_path {chrome_options.user_data_path}")
 logger.debug(f"start '{page._chromium_options._browser_path}'")
@@ -24,14 +24,8 @@ logger.debug(f"process_id {page.process_id}")
 # 因此设置为 none 是最高效率、最迅速的,甚至不用 page.stop_loading() 因为停止过程中也要花费时间,而是直接请求空页面 about:blank 断开所有连接
 page.set.load_mode.none()
 # page.set.NoneElement_value('没找到')
-page.get("edge://version/")
-# page.new_tab("http://www.baidu.com")
-
-def gen_cookies():
-    ret = {
-        "cookies": "",
-        "user-agent":"",
-        "update_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())),
-        "create_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())),
-    }
-    page
+# page.get("edge://version/")
+page.get("http://www.douyin.com")
+cookies = page.cookies()
+# logger.debug(f"{cookies}")
+# page.new_tab("edge://version/")

+ 110 - 0
dp/req_api.py

@@ -0,0 +1,110 @@
+import requests
+from requests import Request,Response
+import DrissionPage
+import cookies
+from conf.config import logger
+import jsonpath
+
+params = {
+    "url": "https://www.douyin.com/note/12345678910",
+    "source": True
+}
+def get_user_info():
+    row = cookies.get_from_table()
+
+    params = {
+    "url": "https://www.douyin.com/user/MS4wLjABAAAAQ6pjxWtdMnxF3LjL6oAGNuU-irhAXJNhhCACx2fTo_IHERUBDVrJJiWCfZQmLi0S",
+    "source": "true",
+    "cookie": row['cookies'],
+    "token": None
+    }
+
+    # response = requests.post("http://10.0.0.12:5000/user/", json=params)
+    # http://192.168.2.11:5000
+    response = requests.post("http://192.168.2.11:5000/user/", json=params)
+    if 200 == response.status_code:
+        logger.info(f"{response.json()}")
+    else:
+        logger.error(f"{response.status_code}: {response.raw}")
+
+
+'''
+直播推流数据接口
+获取直播推流数据
+
+请求接口:/live/
+
+请求参数
+
+{
+  "url": "直播链接,支持多直播,字符串,必需参数",
+  "source": "是否返回原始数据,布尔值,可选参数,默认值: false",
+  "cookie": "抖音 cookie,字符串,可选参数",
+  "token": "自定义参数"
+}
+响应参数
+
+{
+  "data": [
+    "直播数据-1,JSON 格式",
+    "直播数据-2,JSON 格式",
+    "..."
+  ],
+  "message": "success"
+}'''
+def rq_live_url():
+    row = cookies.get_from_table()
+    params = {
+        # "url": "https://live.douyin.com/137439305439",
+        "url": "https://live.douyin.com/660292215268",
+        "source": True,
+        "cookie": row['cookies'],
+        "token": None
+    }
+    response = requests.post("http://localhost:5000/live/", json=params)
+    logger.info(f"{response.status_code}")
+    if 200 == response.status_code:
+        data = response.json()
+        logger.info(f"{data}")
+        return data
+
+def get_anchor(data:dict):
+    status_code = jsonpath.jsonpath(data, '$...status_code')
+    if status_code and status_code[0] != 0:
+        logger.info(f"响应错误:{data}")
+        return None
+    user = jsonpath.jsonpath(data, '$...user')[0]
+    logger.info(f"user {user}")
+    nickname = user.get("nickname")
+    room_view_stats = jsonpath.jsonpath(data, '$...room_view_stats')[0]
+    if not room_view_stats:
+        
+        logger.info(f"{nickname} 没有开启直播")
+        return
+    logger.info(f"room_view_stats {room_view_stats}")
+    display_value = room_view_stats.get("display_value")
+    anchor = room_view_stats.get("display_long_anchor")
+    ret = f"主播: {nickname}  |  {anchor} ({display_value})"
+    logger.info(ret)
+    return ret
+
+
+def get_live_data(url):
+    display_value, anchor = get_anchor(data.get("data")[0])
+    name = data.get("user").get("nickname")
+    ret = f"主播: {name}  |  {anchor} ({display_value})"
+
+
+from faker import Faker
+
+def main():
+    # res = cookies.get_from_table()
+    # logger.info(f"{res}")
+    rq_live_url()
+    # get_user_info()
+    # fake = Faker()
+    # name = fake.user_agent()
+    # print(name)
+
+if __name__ == "__main__":
+    main()

+ 0 - 9
main.py

@@ -26,15 +26,6 @@ def get_url_enterdata(tab:ChromiumPage, url):
     tab.listen.stop()
     return data
 
-def get_anchor(data:dict):
-    room_view_stats = data.get("room_view_stats")
-    if not room_view_stats:
-        logger.error(f"room_view_stats not fount")
-        return None, None
-    display_value = room_view_stats.get("display_value")
-    # "display_long_anchor"
-    anchor = room_view_stats.get("display_long_anchor")
-    return display_value, anchor
 
 
 @route('/')

Різницю між файлами не показано, бо вона завелика
+ 574 - 0
testm/get_live_data.py


Деякі файли не було показано, через те що забагато файлів було змінено