drission_page.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. import os
  2. import time
  3. from DrissionPage import Chromium, ChromiumOptions, ChromiumPage
  4. from pathlib import Path
  5. from config.settings import OUTPUT_DIR, WORK_DIR, CONFIG_DIR
  6. from mylib.random_ua import get_random_user_agent
  7. from mylib.logu import logger
  8. from fake_useragent import UserAgent
  9. BROWSER_PATH=r"C:\Program Files\Google\Chrome\Application\chrome.exe"
  10. def genarate_chrome_ini(address="localhost:9321"):
  11. port = address.split(':')[1]
  12. chrome_options = ChromiumOptions().set_browser_path(BROWSER_PATH)
  13. chrome_options.set_address(address)
  14. chrome_options.set_user_data_path(str(OUTPUT_DIR / f'user_data_dir_{port}'))
  15. # chrome_options.no_imgs(True).mute(True)
  16. # chrome_options.incognito(True)
  17. path = chrome_options.save(CONFIG_DIR / f'{port}.ini')
  18. return path
  19. def load_chrome_from_ini(path=CONFIG_DIR / '9321.ini', headless=False, proxy=None, browser_path=None, no_imgs=True, auto_port=True):
  20. chrome_options = ChromiumOptions(ini_path=path)
  21. if browser_path:
  22. chrome_options.set_browser_path(browser_path)
  23. if proxy:
  24. chrome_options.set_proxy(proxy)
  25. # 如果存在代理环境变量
  26. elif 'HTTP_PROXY' in os.environ:
  27. chrome_options.set_proxy(os.environ['HTTP_PROXY'])
  28. chrome_options.auto_port(auto_port)
  29. chrome_options.no_imgs(no_imgs)
  30. logger.info(f"proxy {proxy}")
  31. page = ChromiumPage(chrome_options)
  32. return page
  33. def fake_ua():
  34. # 创建一个 UserAgent 对象
  35. ua = UserAgent()
  36. # 生成支持的浏览器的 User-Agent 字符串
  37. chrome_ua = ua.chrome # Chrome 浏览器
  38. firefox_ua = ua.firefox # Firefox 浏览器
  39. safari_ua = ua.safari # Safari 浏览器
  40. edge_ua = ua.edge # Chromium Edge 浏览器
  41. # 打印生成的 User-Agent 字符串
  42. print("Chrome User-Agent:", chrome_ua)
  43. print("Firefox User-Agent:", firefox_ua)
  44. print("Safari User-Agent:", safari_ua)
  45. print("Edge User-Agent:", edge_ua)
  46. return chrome_ua
  47. def load_random_ua_chrome(headless=False):
  48. chrome_options = ChromiumOptions()
  49. chrome_options.auto_port(True)
  50. chrome_options.no_imgs(False)
  51. chrome_options.set_user_agent(fake_ua())
  52. chrome_options.arguments.append("--lang=en")
  53. chrome_options.headless(headless)
  54. page = ChromiumPage(chrome_options)
  55. # page.set.auto_handle_alert(True)
  56. return page
  57. def test_random_ua_chrome():
  58. page = load_random_ua_chrome()
  59. tab = page.latest_tab
  60. keyword = "Acalypha rivularis essential oil"
  61. url = f"https://www.google.com/search?q={keyword}"
  62. # url = f"https://www.google.com/"
  63. # url = "https://bot.sannysoft.com/"
  64. tab.get(url)
  65. print(tab.url)
  66. if page.browser._chromium_options.is_headless:
  67. tab.get_screenshot('./1.png')
  68. # page.quit()
  69. def test_normal_chrome():
  70. # genarate_chrome_ini()
  71. page = load_chrome_from_ini(proxy='http://localhost:1881')
  72. tab = page.latest_tab
  73. keyword = "Acalypha rivularis essential oil"
  74. url = f"https://www.google.com/search?q={keyword}"
  75. url = "https://bot.sannysoft.com/"
  76. # recaptcha 验证码检测
  77. # url = "https://patrickhlauke.github.io/recaptcha/"
  78. tab.get(url)
  79. tab.scroll.to_bottom()
  80. # tab.get_screenshot('./1.png')
  81. # page.quit()
  82. def main():
  83. test_random_ua_chrome()
  84. # test_normal_chrome()
  85. if __name__ == "__main__":
  86. main()