| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- import os
- import time
- from DrissionPage import Chromium, ChromiumOptions, ChromiumPage
- from pathlib import Path
- from config.settings import OUTPUT_DIR, WORK_DIR, CONFIG_DIR
- from mylib.random_ua import get_random_user_agent
- from mylib.logu import logger
- from fake_useragent import UserAgent
- BROWSER_PATH=r"C:\Program Files\Google\Chrome\Application\chrome.exe"
- def genarate_chrome_ini(address="localhost:9321"):
- port = address.split(':')[1]
- chrome_options = ChromiumOptions().set_browser_path(BROWSER_PATH)
- chrome_options.set_address(address)
- chrome_options.set_user_data_path(str(OUTPUT_DIR / f'user_data_dir_{port}'))
- # chrome_options.no_imgs(True).mute(True)
- # chrome_options.incognito(True)
- path = chrome_options.save(CONFIG_DIR / f'{port}.ini')
- return path
- def load_chrome_from_ini(path=CONFIG_DIR / '9321.ini', headless=False, proxy=None, browser_path=None, no_imgs=True, auto_port=True):
- chrome_options = ChromiumOptions(ini_path=path)
- if browser_path:
- chrome_options.set_browser_path(browser_path)
- if proxy:
- chrome_options.set_proxy(proxy)
- # 如果存在代理环境变量
- elif 'HTTP_PROXY' in os.environ:
- chrome_options.set_proxy(os.environ['HTTP_PROXY'])
- chrome_options.auto_port(auto_port)
- chrome_options.no_imgs(no_imgs)
- logger.info(f"proxy {proxy}")
- page = ChromiumPage(chrome_options)
- return page
- def fake_ua():
- # 创建一个 UserAgent 对象
- ua = UserAgent()
- # 生成支持的浏览器的 User-Agent 字符串
- chrome_ua = ua.chrome # Chrome 浏览器
- firefox_ua = ua.firefox # Firefox 浏览器
- safari_ua = ua.safari # Safari 浏览器
- edge_ua = ua.edge # Chromium Edge 浏览器
- # 打印生成的 User-Agent 字符串
- print("Chrome User-Agent:", chrome_ua)
- print("Firefox User-Agent:", firefox_ua)
- print("Safari User-Agent:", safari_ua)
- print("Edge User-Agent:", edge_ua)
- return chrome_ua
- def load_random_ua_chrome(headless=False):
- chrome_options = ChromiumOptions()
- chrome_options.auto_port(True)
- chrome_options.no_imgs(False)
- chrome_options.set_user_agent(fake_ua())
- chrome_options.arguments.append("--lang=en")
- chrome_options.headless(headless)
- page = ChromiumPage(chrome_options)
- # page.set.auto_handle_alert(True)
- return page
- def test_random_ua_chrome():
- page = load_random_ua_chrome()
- tab = page.latest_tab
- keyword = "Acalypha rivularis essential oil"
- url = f"https://www.google.com/search?q={keyword}"
- # url = f"https://www.google.com/"
- # url = "https://bot.sannysoft.com/"
- tab.get(url)
- print(tab.url)
- if page.browser._chromium_options.is_headless:
- tab.get_screenshot('./1.png')
- # page.quit()
- def test_normal_chrome():
- # genarate_chrome_ini()
- page = load_chrome_from_ini(proxy='http://localhost:1881')
- tab = page.latest_tab
- keyword = "Acalypha rivularis essential oil"
- url = f"https://www.google.com/search?q={keyword}"
- url = "https://bot.sannysoft.com/"
- # recaptcha 验证码检测
- # url = "https://patrickhlauke.github.io/recaptcha/"
- tab.get(url)
- tab.scroll.to_bottom()
- # tab.get_screenshot('./1.png')
- # page.quit()
- def main():
- test_random_ua_chrome()
- # test_normal_chrome()
-
- if __name__ == "__main__":
- main()
|