| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- import time
- from camoufox import Camoufox
- from camoufox.server import launch_server
- from camoufox.async_api import AsyncCamoufox
- import asyncio
- import signal
- from worker.search_engine.camoufox_broswer import BrowserConfig
- from worker.search_engine.google_search import GoogleSearchHandler
- from mylib.logu import get_logger
- logger = get_logger('test')
- page = None
- async def aio_main():
- global page
- async with AsyncCamoufox(
- headless=False,
- geoip=True,
- proxy={
- 'server': 'http://localhost:1881',
- }
- ) as browser:
- page = await browser.new_page()
- await page.goto("https://www.browserscan.net")
-
- while not page.is_closed():
- # print("page.is_closed() = ", page.is_closed())
- await asyncio.sleep(1) # 每隔一秒检查一次
-
- print("Browser has been closed, exiting...")
- # 清理操作
- await browser.close()
- def luanch_browser():
- playwright = Camoufox(
- geoip=True,
- proxy={
- 'server': 'http://localhost:1881',
- # 'username': 'username',
- # 'password': 'password'
- }
- ).start()
- browser = playwright.chromium.launch(headless=False)
- print(f"playwright.chromium.executable_path {playwright.chromium.executable_path}")
- # 创建一个新的浏览器上下文
- context = browser.new_context()
- page = context.new_page()
- page.goto("https://www.browserscan.net")
- def connet_ws():
- with sync_playwright() as p:
- ws_file = Path(r'K:\code\upwork\zhang_crawl_bio\output\ws.txt')
- content = ws_file.read_text()
- ws_url_pattern = r'ws://[^\s]+'
- match = re.search(ws_url_pattern, content)
- ws_url = match.group(0)
- browser = p.firefox.connect(ws_url)
- page = browser.new_page()
- print(page.url)
- return
- def server_brower():
- launch_server(
- headless=False,
- geoip=True,
- proxy={
- 'server': 'http://localhost:1881',
- 'username': 'username',
- 'password': 'password'
- }
- )
- return
- async def search_keyword(keyword, max_result_items=200, skip_existing=True, config: BrowserConfig = BrowserConfig()):
- ret = {'error': 0, 'msg': '', 'data': None}
- config_dict = config.model_dump()
- logger.info(f"BrowserConfig {config_dict}")
- # config_dict['config'] = {'navigator.cookieEnabled': False}
- async with AsyncCamoufox(**config_dict) as browser:
- try:
-
- self = GoogleSearchHandler(await browser.new_page())
- await self.page.goto(r'G:\code\upwork\zhang_crawl_bio\output\debug\查询不到搜索框-两个textarea.html', wait_until=None, timeout=10000)
- textarea = self.page.locator('xpath=//body/textarea')
- logger.info(f"textarea {textarea}")
- await textarea.fill('', timeout=10000) # 使用 textarea.fill() 而不是 page.fill()
- await textarea.press('Enter') # 使用 press 方法模拟按下 Enter 键
- await self.page.wait_for_load_state(state='load', timeout=10000)
- while True:
- await asyncio.sleep(1)
- # kw = await search_handler.process_keyword(keyword, max_result_items=max_result_items, skip_existing=skip_existing)
- # if not kw:
- # ret['error'] = 1
- # html_path = await search_handler.save_current_page(keyword, filename=f"warning_{time.strftime('%Y%m%d_%H%M%S')}")
- # logger.warning(f"关键词任务未完成: {keyword} html_path: {html_path}")
- # ret['msg'] = f"关键词任务未完成: {keyword}"
- # ret['data'] = html_path
- # return ret
- # ret['data'] = kw.model_dump()
- # return ret
- except Exception as e:
- # html_path = await self.save_current_page(keyword, filename=f"error_{time.strftime('%Y%m%d_%H%M%S')}")
- logger.exception(f"失败: {str(e)} html_path:")
- ret['error'] = 1
- ret['msg'] = f"失败: {str(e)}"
- return ret
- def main():
- asyncio.run(search_keyword('123'))
- if __name__ == "__main__":
- main()
|