| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- import time
- from DrissionPage import Chromium, ChromiumOptions, ChromiumPage
- from pathlib import Path
- from config.settings import OUTPUT_DIR, WORK_DIR, CONFIG_DIR
- from mylib.random_ua import get_random_user_agent
- from fake_useragent import UserAgent
- BROWSER_PATH=r"C:\Program Files\Google\Chrome\Application\chrome.exe"
- def genarate_chrome_ini(address="localhost:9321"):
- port = address.split(':')[1]
- chrome_options = ChromiumOptions().set_browser_path(BROWSER_PATH)
- chrome_options.set_address(address)
- chrome_options.set_user_data_path(str(OUTPUT_DIR / f'user_data_dir_{port}'))
- chrome_options.no_imgs(True).mute(True)
- # chrome_options.incognito(True)
- path = chrome_options.save(CONFIG_DIR / f'{port}.ini')
- return path
- def load_chrome_from_ini(path=CONFIG_DIR / '9321.ini', headless=False):
- chrome_options = ChromiumOptions(ini_path=path)
- page = ChromiumPage(chrome_options)
- return page
- def fake_ua():
- # 创建一个 UserAgent 对象
- ua = UserAgent()
- # 生成支持的浏览器的 User-Agent 字符串
- chrome_ua = ua.chrome # Chrome 浏览器
- firefox_ua = ua.firefox # Firefox 浏览器
- safari_ua = ua.safari # Safari 浏览器
- edge_ua = ua.edge # Chromium Edge 浏览器
- # 打印生成的 User-Agent 字符串
- print("Chrome User-Agent:", chrome_ua)
- print("Firefox User-Agent:", firefox_ua)
- print("Safari User-Agent:", safari_ua)
- print("Edge User-Agent:", edge_ua)
- return chrome_ua
- def load_random_ua_chrome(headless=False):
- chrome_options = ChromiumOptions()
- chrome_options.auto_port(True)
- chrome_options.no_imgs(False)
- chrome_options.set_user_agent(fake_ua())
- chrome_options.arguments.append("--lang=en")
- chrome_options.headless(headless)
- page = ChromiumPage(chrome_options)
- # page.set.auto_handle_alert(True)
- return page
- def test_random_ua_chrome():
- page = load_random_ua_chrome()
- tab = page.latest_tab
- keyword = "Acalypha rivularis essential oil"
- url = f"https://www.google.com/search?q={keyword}"
- # url = f"https://www.google.com/"
- # url = "https://bot.sannysoft.com/"
- tab.get(url)
- print(tab.url)
- if page.browser._chromium_options.is_headless:
- tab.get_screenshot('./1.png')
- # page.quit()
- def test_normal_chrome():
- # genarate_chrome_ini()
- page = load_chrome_from_ini()
- tab = page.latest_tab
- keyword = "Acalypha rivularis essential oil"
- url = f"https://www.google.com/search?q={keyword}"
- # url = "https://bot.sannysoft.com/"
- # recaptcha 验证码检测
- # url = "https://patrickhlauke.github.io/recaptcha/"
- tab.get(url)
- # tab.get_screenshot('./1.png')
- # page.quit()
- def main():
- test_random_ua_chrome()
- # test_normal_chrome()
-
- if __name__ == "__main__":
- main()
|