|
|
@@ -0,0 +1,42 @@
|
|
|
+import time
|
|
|
+import re
|
|
|
+import math
|
|
|
+from DrissionPage import ChromiumPage
|
|
|
+from DrissionPage.easy_set import set_paths
|
|
|
+from DrissionPage import ChromiumOptions
|
|
|
+from DrissionPage.easy_set import set_headless, set_paths
|
|
|
+from requests_html import HTML
|
|
|
+
|
|
|
+set_headless(True)
|
|
|
+
|
|
|
+co = ChromiumOptions()
|
|
|
+co.set_argument('--incognito')
|
|
|
+co.set_argument('--no-sandbox')
|
|
|
+set_paths(browser_path=r'/opt/google/chrome/google-chrome')
|
|
|
+#set_paths(browser_path=r'C:/Users/AAA/AppData/Local/Google/Chrome/Application/chrome.exe')
|
|
|
+
|
|
|
+
|
|
|
+def analyze_live_adress(html):
|
|
|
+ # with open('dy.html', 'w') as f:
|
|
|
+ # f.write(page.html)
|
|
|
+ html = HTML(html=html)
|
|
|
+ links = html.find('div[data-e2e="user-detail"] a', first=True).attrs['href']
|
|
|
+ return links
|
|
|
+
|
|
|
+def start_test_spider(input_url):
|
|
|
+ # 用 d 模式创建页面对象(默认模式)
|
|
|
+ page = ChromiumPage()
|
|
|
+ # 跳转到登录页面
|
|
|
+ page.get(input_url)
|
|
|
+ print("page.html ok")
|
|
|
+ addr = analyze_live_adress(page.html)
|
|
|
+ #关闭浏览器
|
|
|
+ page.close_tabs()
|
|
|
+ return addr
|
|
|
+
|
|
|
+def main():
|
|
|
+ start_test_spider('https://www.douyin.com/user/MS4wLjABAAAAntqRkNukyySSAR-L2F21LOVViboaWRtDkyPFnCz-UnXIwofkb7zr4GFa3YETH1hb')
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|