| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 |
- import os
- import time
- from typing import Optional
- from DrissionPage import Chromium, ChromiumOptions, ChromiumPage
- from pathlib import Path
- from config.settings import OUTPUT_DIR, WORK_DIR, BROWSER_CONFIG_DIR
- from utils.logu import logger
- from pydantic import BaseModel
- BROWSER_PATH=r"C:\Program Files\Google\Chrome\Application\chrome.exe"
- def genarate_chrome_ini(address="localhost:9321"):
- port = address.split(':')[1]
- chrome_options = ChromiumOptions().set_browser_path(BROWSER_PATH)
- chrome_options.set_address(address)
- chrome_options.set_user_data_path(str(OUTPUT_DIR / f'user_data_dir_{port}'))
- # chrome_options.no_imgs(True).mute(True)
- # chrome_options.incognito(True)
- path = chrome_options.save(BROWSER_CONFIG_DIR / f'{port}.ini')
- return path
- class ChromeOptions(BaseModel):
- ini_path: Optional[str] = BROWSER_CONFIG_DIR / '9321.ini'
- browser_path: Optional[str] = None
- user_data_dir: Optional[str] = None
- address: Optional[str] = None
- headless: Optional[bool] = False
- proxy: Optional[str] = None
- no_imgs: Optional[bool] = False
- auto_port: Optional[bool] = False
- save: Optional[bool] = False
- def load_chrome_from_ini(options:ChromeOptions):
- chrome_options = ChromiumOptions(ini_path=options.ini_path)
- if options.browser_path:
- chrome_options.set_browser_path(options.browser_path)
- if options.proxy:
- chrome_options.set_proxy(options.proxy)
- if options.user_data_dir:
- chrome_options.set_user_data_path(options.user_data_dir)
- # 如果存在代理环境变量
- elif 'HTTP_PROXY' in os.environ:
- chrome_options.set_proxy(os.environ['HTTP_PROXY'])
- if options.auto_port:
- chrome_options.auto_port(options.auto_port)
- if options.no_imgs:
- chrome_options.no_imgs(options.no_imgs)
- if options.address:
- chrome_options.headless(options.headless)
- if options.address:
- chrome_options.set_address(options.address)
- if options.save:
- chrome_options.save(options.ini_path)
- logger.info(f"proxy {options.proxy}")
- page = ChromiumPage(chrome_options)
- return page
- def fake_ua():
- # 创建一个 UserAgent 对象
- ua = UserAgent()
- # 生成支持的浏览器的 User-Agent 字符串
- chrome_ua = ua.chrome # Chrome 浏览器
- firefox_ua = ua.firefox # Firefox 浏览器
- safari_ua = ua.safari # Safari 浏览器
- edge_ua = ua.edge # Chromium Edge 浏览器
- # 打印生成的 User-Agent 字符串
- print("Chrome User-Agent:", chrome_ua)
- print("Firefox User-Agent:", firefox_ua)
- print("Safari User-Agent:", safari_ua)
- print("Edge User-Agent:", edge_ua)
- return chrome_ua
- def load_random_ua_chrome(headless=False):
- chrome_options = ChromiumOptions()
- chrome_options.auto_port(True)
- chrome_options.no_imgs(False)
- chrome_options.set_user_agent(fake_ua())
- chrome_options.arguments.append("--lang=en")
- chrome_options.headless(headless)
- page = ChromiumPage(chrome_options)
- # page.set.auto_handle_alert(True)
- return page
- def test_random_ua_chrome():
- page = load_random_ua_chrome()
- tab = page.latest_tab
- keyword = "Acalypha rivularis essential oil"
- url = f"https://www.google.com/search?q={keyword}"
- # url = f"https://www.google.com/"
- # url = "https://bot.sannysoft.com/"
- tab.get(url)
- print(tab.url)
- if page.browser._chromium_options.is_headless:
- tab.get_screenshot('./1.png')
- # page.quit()
- def test_normal_chrome():
- # genarate_chrome_ini()
- page = load_chrome_from_ini(proxy='http://localhost:1881')
- tab = page.latest_tab
- keyword = "Acalypha rivularis essential oil"
- url = f"https://www.google.com/search?q={keyword}"
- url = "https://bot.sannysoft.com/"
- # recaptcha 验证码检测
- # url = "https://patrickhlauke.github.io/recaptcha/"
- tab.get(url)
- tab.scroll.to_bottom()
- # tab.get_screenshot('./1.png')
- # page.quit()
- def main():
- test_random_ua_chrome()
- # test_normal_chrome()
-
- if __name__ == "__main__":
- main()
|