camoufox_broswer.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. from camoufox import Camoufox
  2. from camoufox.server import launch_server
  3. from camoufox.async_api import AsyncCamoufox
  4. from playwright.async_api import Browser, Page
  5. import asyncio
  6. import signal
  7. import os
  8. import datetime
  9. from typing import Optional, Dict, Type, Protocol
  10. import logging
  11. from pydantic import BaseModel
  12. from config.settings import OUTPUT_DIR, WORK_DIR
  13. # ------------------- Base Interfaces -------------------
  14. class IBrowserCore(Protocol):
  15. """浏览器核心操作抽象基类"""
  16. async def initialize(self): ...
  17. async def close(self): ...
  18. async def take_screenshot(self, filename: str) -> str: ...
  19. async def get_page_info(self) -> dict: ...
  20. class ISearchHandler(Protocol):
  21. """搜索操作抽象接口"""
  22. async def search(self, query: str) -> dict: ...
  23. async def next_page(self) -> dict: ...
  24. async def validate_search_result(self) -> bool: ...
  25. # ------------------- Core Implementation -------------------
  26. class BrowserConfig(BaseModel):
  27. """浏览器基础配置模型"""
  28. headless: bool = False
  29. geoip: bool = True
  30. proxy: Optional[Dict] = {'server': 'http://localhost:1881'}
  31. init_url: str = "about:blank"
  32. screenshot_dir: str = OUTPUT_DIR / "screenshots"
  33. class BrowserCore(IBrowserCore):
  34. """浏览器核心功能实现(仅管理浏览器生命周期和基础操作)"""
  35. _instance = None
  36. _lock = asyncio.Lock()
  37. def __init__(self, config: BrowserConfig):
  38. self.config = config
  39. self.browser: Browser = None
  40. self.page: Page = None
  41. self.status: str = 'stopped'
  42. self.last_activity: datetime.datetime = None
  43. @classmethod
  44. async def get_instance(cls, config: BrowserConfig = BrowserConfig()) -> "BrowserCore":
  45. """获取单例实例"""
  46. if not cls._instance:
  47. async with cls._lock:
  48. if not cls._instance:
  49. cls._instance = cls(config)
  50. await cls._instance.initialize()
  51. return cls._instance
  52. async def initialize(self):
  53. """初始化浏览器实例"""
  54. try:
  55. self.browser = await AsyncCamoufox(
  56. headless=self.config.headless,
  57. geoip=self.config.geoip,
  58. proxy=self.config.proxy
  59. ).__aenter__()
  60. self.page = await self.browser.new_page()
  61. await self.page.goto(self.config.init_url)
  62. self.status = 'running'
  63. self.last_activity = datetime.datetime.now()
  64. logging.info(f"Browser session initialized | URL: {self.page.url}")
  65. except Exception as e:
  66. self.status = 'error'
  67. logging.error(f"Browser initialization failed: {str(e)}")
  68. raise
  69. async def close(self):
  70. """关闭浏览器实例"""
  71. if self.browser:
  72. await self.browser.__aexit__(None, None, None)
  73. self.status = 'stopped'
  74. logging.info("Browser session closed")
  75. async def goto(self, url: str):
  76. """导航到指定URL"""
  77. await self.page.goto(url)
  78. self.last_activity = datetime.datetime.now()
  79. async def take_screenshot(self, filename: str) -> str:
  80. """截图操作"""
  81. os.makedirs(self.config.screenshot_dir, exist_ok=True)
  82. timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
  83. path = os.path.join(self.config.screenshot_dir, f"{timestamp}_{filename}")
  84. await self.page.screenshot(path=path, full_page=True)
  85. return path
  86. async def get_page_info(self) -> dict:
  87. pass
  88. # ------------------- Search Engine Implementation -------------------
  89. class GoogleSearchHandler(ISearchHandler):
  90. """搜索引擎专用处理器(可继承扩展其他引擎)"""
  91. def __init__(self, browser_core: BrowserCore):
  92. self.core = browser_core
  93. self.page = self.core.page
  94. async def goto_home_page(self):
  95. url = "https://www.google.com"
  96. if self.page.url != url:
  97. await self.page.goto(url)
  98. async def search(self, query: str) -> dict:
  99. """执行搜索操作"""
  100. try:
  101. await self.goto_home_page()
  102. # 使用 aria-label 定位 textarea 并填入数据
  103. await self.page.fill('textarea[aria-label="Search"]', query)
  104. # 模拟按下回车键
  105. await self.page.press('textarea[aria-label="Search"]', 'Enter')
  106. return await self.page.content()
  107. except Exception as e:
  108. logging.error(f"Search failed: {str(e)}")
  109. return {"status": "error", "message": str(e)}
  110. async def next_page(self) -> dict:
  111. pass
  112. # ------------------- API Service -------------------
  113. async def aio_main(config: BrowserConfig = BrowserConfig()):
  114. """API服务主循环"""
  115. core = await BrowserCore.get_instance(config)
  116. search_handler = GoogleSearchHandler(core)
  117. try:
  118. await search_handler.search('python')
  119. logging.info(f"API服务已启动 | 初始页面: {search_handler.page.url}")
  120. while core.status == 'running':
  121. await asyncio.sleep(5)
  122. except KeyboardInterrupt:
  123. logging.info("接收到终止信号,关闭浏览器...")
  124. except Exception as e:
  125. logging.error(f"API服务异常: {str(e)}")
  126. finally:
  127. await core.close()
  128. logging.info("API服务已停止")
  129. def main():
  130. asyncio.run(aio_main())
  131. if __name__ == "__main__":
  132. config = BrowserConfig(
  133. headless=True,
  134. init_url="https://www.google.com"
  135. )
  136. main()