Browse Source

first commit

mrh 2 years ago
commit
0beaa7f842
5 changed files with 72 additions and 0 deletions
  1. 1 0
      .gitignore
  2. 42 0
      detect_live.py
  3. 20 0
      readme.md
  4. 3 0
      requirements.txt
  5. 6 0
      xpath_test.py

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+output

+ 42 - 0
detect_live.py

@@ -0,0 +1,42 @@
+import time
+import re
+import math
+from DrissionPage import ChromiumPage
+from DrissionPage.easy_set import set_paths
+from DrissionPage import ChromiumOptions
+from DrissionPage.easy_set import set_headless, set_paths
+from requests_html import HTML
+
+set_headless(True)
+
+co = ChromiumOptions()
+co.set_argument('--incognito')
+co.set_argument('--no-sandbox')
+set_paths(browser_path=r'/opt/google/chrome/google-chrome')
+#set_paths(browser_path=r'C:/Users/AAA/AppData/Local/Google/Chrome/Application/chrome.exe')
+
+
+def analyze_live_adress(html):
+    # with open('dy.html', 'w') as f:
+    #     f.write(page.html)
+    html = HTML(html=html)
+    links = html.find('div[data-e2e="user-detail"] a', first=True).attrs['href']
+    return links
+
+def start_test_spider(input_url):  
+    # 用 d 模式创建页面对象(默认模式)
+    page = ChromiumPage()
+    # 跳转到登录页面
+    page.get(input_url)
+    print("page.html ok")
+    addr = analyze_live_adress(page.html)
+    #关闭浏览器
+    page.close_tabs()
+    return addr
+
+def main():
+    start_test_spider('https://www.douyin.com/user/MS4wLjABAAAAntqRkNukyySSAR-L2F21LOVViboaWRtDkyPFnCz-UnXIwofkb7zr4GFa3YETH1hb')
+    
+
+if __name__ == "__main__":
+    main()

+ 20 - 0
readme.md

@@ -0,0 +1,20 @@
+# 介绍
+http://g1879.gitee.io/drissionpagedocs/get_start/installation_and_import/#_2
+
+```shell
+conda create -p env --file requirements.txt
+
+pip install DrissionPage
+```
+
+
+## 安装 Linux Chrome
+https://blog.csdn.net/sinat_39327967/article/details/132181129?spm=1001.2014.3001.5501
+
+```shell
+sudo apt-get install -f
+wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
+sudo dpkg -i google-chrome-stable_current_amd64.deb
+ls /opt/google/chrome/google-chrome
+```
+

+ 3 - 0
requirements.txt

@@ -0,0 +1,3 @@
+python_version>='3.10'
+DrissionPage==3.2.34
+requests-html==0.10.0

+ 6 - 0
xpath_test.py

@@ -0,0 +1,6 @@
+from requests_html import HTML
+doc = open('dy.html').read()
+html = HTML(html=doc)
+links = html.find('div[data-e2e="user-detail"] a', first=True).attrs['href']
+
+print(links)