import os import time from typing import Optional from DrissionPage import Chromium, ChromiumOptions, ChromiumPage from pathlib import Path from config.settings import OUTPUT_DIR, WORK_DIR, BROWSER_CONFIG_DIR from utils.logu import logger from pydantic import BaseModel BROWSER_PATH=r"C:\Program Files\Google\Chrome\Application\chrome.exe" def genarate_chrome_ini(address="localhost:9321"): port = address.split(':')[1] chrome_options = ChromiumOptions().set_browser_path(BROWSER_PATH) chrome_options.set_address(address) chrome_options.set_user_data_path(str(OUTPUT_DIR / f'user_data_dir_{port}')) # chrome_options.no_imgs(True).mute(True) # chrome_options.incognito(True) path = chrome_options.save(BROWSER_CONFIG_DIR / f'{port}.ini') return path class ChromeOptions(BaseModel): ini_path: Optional[str] = BROWSER_CONFIG_DIR / '9321.ini' browser_path: Optional[str] = BROWSER_PATH user_data_dir: Optional[str] = str(OUTPUT_DIR / 'user_data_dir') address: Optional[str] = "localhost:9321" headless: Optional[bool] = False proxy: Optional[str] = "localhost:8851" no_imgs: Optional[bool] = False auto_port: Optional[bool] = False save: Optional[bool] = False def load_chrome_from_ini(options:ChromeOptions): chrome_options = ChromiumOptions(ini_path=options.ini_path) if options.browser_path: chrome_options.set_browser_path(options.browser_path) if options.proxy: chrome_options.set_proxy(options.proxy) if options.user_data_dir: chrome_options.set_user_data_path(options.user_data_dir) # 如果存在代理环境变量 elif 'HTTP_PROXY' in os.environ: chrome_options.set_proxy(os.environ['HTTP_PROXY']) chrome_options.auto_port(options.auto_port) chrome_options.no_imgs(options.no_imgs) chrome_options.headless(options.headless) chrome_options.set_address(options.address) if options.save: chrome_options.save(options.ini_path) logger.info(f"proxy {options.proxy}") page = ChromiumPage(chrome_options) return page def fake_ua(): # 创建一个 UserAgent 对象 ua = UserAgent() # 生成支持的浏览器的 User-Agent 字符串 chrome_ua = ua.chrome # Chrome 浏览器 firefox_ua = ua.firefox # Firefox 浏览器 safari_ua = ua.safari # Safari 浏览器 edge_ua = ua.edge # Chromium Edge 浏览器 # 打印生成的 User-Agent 字符串 print("Chrome User-Agent:", chrome_ua) print("Firefox User-Agent:", firefox_ua) print("Safari User-Agent:", safari_ua) print("Edge User-Agent:", edge_ua) return chrome_ua def load_random_ua_chrome(headless=False): chrome_options = ChromiumOptions() chrome_options.auto_port(True) chrome_options.no_imgs(False) chrome_options.set_user_agent(fake_ua()) chrome_options.arguments.append("--lang=en") chrome_options.headless(headless) page = ChromiumPage(chrome_options) # page.set.auto_handle_alert(True) return page def test_random_ua_chrome(): page = load_random_ua_chrome() tab = page.latest_tab keyword = "Acalypha rivularis essential oil" url = f"https://www.google.com/search?q={keyword}" # url = f"https://www.google.com/" # url = "https://bot.sannysoft.com/" tab.get(url) print(tab.url) if page.browser._chromium_options.is_headless: tab.get_screenshot('./1.png') # page.quit() def test_normal_chrome(): # genarate_chrome_ini() page = load_chrome_from_ini(proxy='http://localhost:1881') tab = page.latest_tab keyword = "Acalypha rivularis essential oil" url = f"https://www.google.com/search?q={keyword}" url = "https://bot.sannysoft.com/" # recaptcha 验证码检测 # url = "https://patrickhlauke.github.io/recaptcha/" tab.get(url) tab.scroll.to_bottom() # tab.get_screenshot('./1.png') # page.quit() def main(): test_random_ua_chrome() # test_normal_chrome() if __name__ == "__main__": main()