drission_page.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. import os
  2. import time
  3. from typing import Optional
  4. from DrissionPage import Chromium, ChromiumOptions, ChromiumPage
  5. from pathlib import Path
  6. from config.settings import OUTPUT_DIR, WORK_DIR, BROWSER_CONFIG_DIR
  7. from utils.logu import logger
  8. from pydantic import BaseModel
  9. BROWSER_PATH=r"C:\Program Files\Google\Chrome\Application\chrome.exe"
  10. def genarate_chrome_ini(address="localhost:9321"):
  11. port = address.split(':')[1]
  12. chrome_options = ChromiumOptions().set_browser_path(BROWSER_PATH)
  13. chrome_options.set_address(address)
  14. chrome_options.set_user_data_path(str(OUTPUT_DIR / f'user_data_dir_{port}'))
  15. # chrome_options.no_imgs(True).mute(True)
  16. # chrome_options.incognito(True)
  17. path = chrome_options.save(BROWSER_CONFIG_DIR / f'{port}.ini')
  18. return path
  19. class ChromeOptions(BaseModel):
  20. ini_path: Optional[str] = BROWSER_CONFIG_DIR / '9321.ini'
  21. browser_path: Optional[str] = BROWSER_PATH
  22. user_data_dir: Optional[str] = str(OUTPUT_DIR / 'user_data_dir')
  23. address: Optional[str] = "localhost:9321"
  24. headless: Optional[bool] = False
  25. proxy: Optional[str] = "localhost:8851"
  26. no_imgs: Optional[bool] = False
  27. auto_port: Optional[bool] = False
  28. save: Optional[bool] = False
  29. def load_chrome_from_ini(options:ChromeOptions):
  30. chrome_options = ChromiumOptions(ini_path=options.ini_path)
  31. if options.browser_path:
  32. chrome_options.set_browser_path(options.browser_path)
  33. if options.proxy:
  34. chrome_options.set_proxy(options.proxy)
  35. if options.user_data_dir:
  36. chrome_options.set_user_data_path(options.user_data_dir)
  37. # 如果存在代理环境变量
  38. elif 'HTTP_PROXY' in os.environ:
  39. chrome_options.set_proxy(os.environ['HTTP_PROXY'])
  40. chrome_options.auto_port(options.auto_port)
  41. chrome_options.no_imgs(options.no_imgs)
  42. chrome_options.headless(options.headless)
  43. chrome_options.set_address(options.address)
  44. if options.save:
  45. chrome_options.save(options.ini_path)
  46. logger.info(f"proxy {options.proxy}")
  47. page = ChromiumPage(chrome_options)
  48. return page
  49. def fake_ua():
  50. # 创建一个 UserAgent 对象
  51. ua = UserAgent()
  52. # 生成支持的浏览器的 User-Agent 字符串
  53. chrome_ua = ua.chrome # Chrome 浏览器
  54. firefox_ua = ua.firefox # Firefox 浏览器
  55. safari_ua = ua.safari # Safari 浏览器
  56. edge_ua = ua.edge # Chromium Edge 浏览器
  57. # 打印生成的 User-Agent 字符串
  58. print("Chrome User-Agent:", chrome_ua)
  59. print("Firefox User-Agent:", firefox_ua)
  60. print("Safari User-Agent:", safari_ua)
  61. print("Edge User-Agent:", edge_ua)
  62. return chrome_ua
  63. def load_random_ua_chrome(headless=False):
  64. chrome_options = ChromiumOptions()
  65. chrome_options.auto_port(True)
  66. chrome_options.no_imgs(False)
  67. chrome_options.set_user_agent(fake_ua())
  68. chrome_options.arguments.append("--lang=en")
  69. chrome_options.headless(headless)
  70. page = ChromiumPage(chrome_options)
  71. # page.set.auto_handle_alert(True)
  72. return page
  73. def test_random_ua_chrome():
  74. page = load_random_ua_chrome()
  75. tab = page.latest_tab
  76. keyword = "Acalypha rivularis essential oil"
  77. url = f"https://www.google.com/search?q={keyword}"
  78. # url = f"https://www.google.com/"
  79. # url = "https://bot.sannysoft.com/"
  80. tab.get(url)
  81. print(tab.url)
  82. if page.browser._chromium_options.is_headless:
  83. tab.get_screenshot('./1.png')
  84. # page.quit()
  85. def test_normal_chrome():
  86. # genarate_chrome_ini()
  87. page = load_chrome_from_ini(proxy='http://localhost:1881')
  88. tab = page.latest_tab
  89. keyword = "Acalypha rivularis essential oil"
  90. url = f"https://www.google.com/search?q={keyword}"
  91. url = "https://bot.sannysoft.com/"
  92. # recaptcha 验证码检测
  93. # url = "https://patrickhlauke.github.io/recaptcha/"
  94. tab.get(url)
  95. tab.scroll.to_bottom()
  96. # tab.get_screenshot('./1.png')
  97. # page.quit()
  98. def main():
  99. test_random_ua_chrome()
  100. # test_normal_chrome()
  101. if __name__ == "__main__":
  102. main()