drission_page.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. import os
  2. import time
  3. from typing import Optional
  4. from DrissionPage import Chromium, ChromiumOptions, ChromiumPage
  5. from pathlib import Path
  6. from config.settings import OUTPUT_DIR, WORK_DIR, BROWSER_CONFIG_DIR
  7. from utils.logu import logger
  8. from pydantic import BaseModel
  9. BROWSER_PATH=r"C:\Program Files\Google\Chrome\Application\chrome.exe"
  10. def genarate_chrome_ini(address="localhost:9321"):
  11. port = address.split(':')[1]
  12. chrome_options = ChromiumOptions().set_browser_path(BROWSER_PATH)
  13. chrome_options.set_address(address)
  14. chrome_options.set_user_data_path(str(OUTPUT_DIR / f'user_data_dir_{port}'))
  15. # chrome_options.no_imgs(True).mute(True)
  16. # chrome_options.incognito(True)
  17. path = chrome_options.save(BROWSER_CONFIG_DIR / f'{port}.ini')
  18. return path
  19. class ChromeOptions(BaseModel):
  20. ini_path: Optional[str] = BROWSER_CONFIG_DIR / '9321.ini'
  21. browser_path: Optional[str] = None
  22. user_data_dir: Optional[str] = None
  23. address: Optional[str] = None
  24. headless: Optional[bool] = False
  25. proxy: Optional[str] = None
  26. no_imgs: Optional[bool] = False
  27. auto_port: Optional[bool] = False
  28. save: Optional[bool] = False
  29. def load_chrome_from_ini(options:ChromeOptions):
  30. chrome_options = ChromiumOptions(ini_path=options.ini_path)
  31. if options.browser_path:
  32. chrome_options.set_browser_path(options.browser_path)
  33. if options.proxy:
  34. chrome_options.set_proxy(options.proxy)
  35. if options.user_data_dir:
  36. chrome_options.set_user_data_path(options.user_data_dir)
  37. # 如果存在代理环境变量
  38. elif 'HTTP_PROXY' in os.environ:
  39. chrome_options.set_proxy(os.environ['HTTP_PROXY'])
  40. if options.auto_port:
  41. chrome_options.auto_port(options.auto_port)
  42. if options.no_imgs:
  43. chrome_options.no_imgs(options.no_imgs)
  44. if options.address:
  45. chrome_options.headless(options.headless)
  46. if options.address:
  47. chrome_options.set_address(options.address)
  48. if options.save:
  49. chrome_options.save(options.ini_path)
  50. logger.info(f"proxy {options.proxy}")
  51. page = ChromiumPage(chrome_options)
  52. return page
  53. def fake_ua():
  54. # 创建一个 UserAgent 对象
  55. ua = UserAgent()
  56. # 生成支持的浏览器的 User-Agent 字符串
  57. chrome_ua = ua.chrome # Chrome 浏览器
  58. firefox_ua = ua.firefox # Firefox 浏览器
  59. safari_ua = ua.safari # Safari 浏览器
  60. edge_ua = ua.edge # Chromium Edge 浏览器
  61. # 打印生成的 User-Agent 字符串
  62. print("Chrome User-Agent:", chrome_ua)
  63. print("Firefox User-Agent:", firefox_ua)
  64. print("Safari User-Agent:", safari_ua)
  65. print("Edge User-Agent:", edge_ua)
  66. return chrome_ua
  67. def load_random_ua_chrome(headless=False):
  68. chrome_options = ChromiumOptions()
  69. chrome_options.auto_port(True)
  70. chrome_options.no_imgs(False)
  71. chrome_options.set_user_agent(fake_ua())
  72. chrome_options.arguments.append("--lang=en")
  73. chrome_options.headless(headless)
  74. page = ChromiumPage(chrome_options)
  75. # page.set.auto_handle_alert(True)
  76. return page
  77. def test_random_ua_chrome():
  78. page = load_random_ua_chrome()
  79. tab = page.latest_tab
  80. keyword = "Acalypha rivularis essential oil"
  81. url = f"https://www.google.com/search?q={keyword}"
  82. # url = f"https://www.google.com/"
  83. # url = "https://bot.sannysoft.com/"
  84. tab.get(url)
  85. print(tab.url)
  86. if page.browser._chromium_options.is_headless:
  87. tab.get_screenshot('./1.png')
  88. # page.quit()
  89. def test_normal_chrome():
  90. # genarate_chrome_ini()
  91. page = load_chrome_from_ini(proxy='http://localhost:1881')
  92. tab = page.latest_tab
  93. keyword = "Acalypha rivularis essential oil"
  94. url = f"https://www.google.com/search?q={keyword}"
  95. url = "https://bot.sannysoft.com/"
  96. # recaptcha 验证码检测
  97. # url = "https://patrickhlauke.github.io/recaptcha/"
  98. tab.get(url)
  99. tab.scroll.to_bottom()
  100. # tab.get_screenshot('./1.png')
  101. # page.quit()
  102. def main():
  103. test_random_ua_chrome()
  104. # test_normal_chrome()
  105. if __name__ == "__main__":
  106. main()