|
|
@@ -1,74 +1,53 @@
|
|
|
from camoufox import Camoufox
|
|
|
from camoufox.server import launch_server
|
|
|
from camoufox.async_api import AsyncCamoufox
|
|
|
-from playwright.async_api import Browser,Page
|
|
|
+from playwright.async_api import Browser, Page
|
|
|
import asyncio
|
|
|
import signal
|
|
|
import os
|
|
|
import datetime
|
|
|
-from typing import Optional, Dict
|
|
|
+from typing import Optional, Dict, Type, Protocol
|
|
|
import logging
|
|
|
from pydantic import BaseModel
|
|
|
-from typing import Optional, Dict
|
|
|
from config.settings import OUTPUT_DIR, WORK_DIR
|
|
|
|
|
|
+# ------------------- Base Interfaces -------------------
|
|
|
+class IBrowserCore(Protocol):
|
|
|
+ """浏览器核心操作抽象基类"""
|
|
|
+ async def initialize(self): ...
|
|
|
+ async def close(self): ...
|
|
|
+ async def take_screenshot(self, filename: str) -> str: ...
|
|
|
+ async def get_page_info(self) -> dict: ...
|
|
|
+
|
|
|
+class ISearchHandler(Protocol):
|
|
|
+ """搜索操作抽象接口"""
|
|
|
+ async def search(self, query: str) -> dict: ...
|
|
|
+ async def next_page(self) -> dict: ...
|
|
|
+ async def validate_search_result(self) -> bool: ...
|
|
|
+
|
|
|
+# ------------------- Core Implementation -------------------
|
|
|
class BrowserConfig(BaseModel):
|
|
|
- """浏览器配置参数模型"""
|
|
|
+ """浏览器基础配置模型"""
|
|
|
headless: bool = False
|
|
|
geoip: bool = True
|
|
|
proxy: Optional[Dict] = {'server': 'http://localhost:1881'}
|
|
|
- init_url: str = "https://www.browserscan.net"
|
|
|
- screenshot_dir: str = OUTPUT_DIR / "screenshots"
|
|
|
+ init_url: str = "about:blank"
|
|
|
+ screenshot_dir: str = OUTPUT_DIR / "screenshots"
|
|
|
|
|
|
-class PageOperations:
|
|
|
- """封装页面交互操作"""
|
|
|
- def __init__(self, page: Page, config: BrowserConfig):
|
|
|
- self.page = page
|
|
|
- self.config = config
|
|
|
-
|
|
|
- async def search_element(self, selector: str, timeout: float = 30.0):
|
|
|
- """等待并返回指定元素"""
|
|
|
- return await self.page.wait_for_selector(selector, timeout=timeout)
|
|
|
-
|
|
|
- async def click_element(self, selector: str):
|
|
|
- """点击指定选择器的元素"""
|
|
|
- element = await self.search_element(selector)
|
|
|
- await element.click()
|
|
|
-
|
|
|
- async def take_screenshot(self, filename: str):
|
|
|
- """带时间戳的截图保存"""
|
|
|
- os.makedirs(self.config.screenshot_dir, exist_ok=True)
|
|
|
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
- path = os.path.join(self.config.screenshot_dir, f"{timestamp}_{filename}")
|
|
|
- await self.page.screenshot(path=path, full_page=True)
|
|
|
- return path
|
|
|
-
|
|
|
- async def fill_input(self, selector: str, text: str):
|
|
|
- """在指定输入框填充文本"""
|
|
|
- element = await self.search_element(selector)
|
|
|
- await element.fill(text)
|
|
|
-
|
|
|
- async def press_enter(self):
|
|
|
- """执行键盘回车操作"""
|
|
|
- await self.page.keyboard.press("Enter")
|
|
|
-
|
|
|
-class BrowserManager:
|
|
|
- """全局浏览器会话管理器(单例模式)"""
|
|
|
+class BrowserCore(IBrowserCore):
|
|
|
+ """浏览器核心功能实现(仅管理浏览器生命周期和基础操作)"""
|
|
|
_instance = None
|
|
|
_lock = asyncio.Lock()
|
|
|
|
|
|
- def __init__(self, config: BrowserConfig = BrowserConfig()):
|
|
|
- if not hasattr(self, '_initialized'):
|
|
|
- self.config = config
|
|
|
- self.browser: Browser = None
|
|
|
- self.page: Page = None
|
|
|
- self.page_ops: PageOperations = None
|
|
|
- self.status: str = 'stopped' # running/stopped/error
|
|
|
- self.last_activity: datetime.datetime = None
|
|
|
- self._initialized = True
|
|
|
+ def __init__(self, config: BrowserConfig):
|
|
|
+ self.config = config
|
|
|
+ self.browser: Browser = None
|
|
|
+ self.page: Page = None
|
|
|
+ self.status: str = 'stopped'
|
|
|
+ self.last_activity: datetime.datetime = None
|
|
|
|
|
|
@classmethod
|
|
|
- async def get_instance(cls, config: BrowserConfig = BrowserConfig()) -> "BrowserManager":
|
|
|
+ async def get_instance(cls, config: BrowserConfig = BrowserConfig()) -> "BrowserCore":
|
|
|
"""获取单例实例"""
|
|
|
if not cls._instance:
|
|
|
async with cls._lock:
|
|
|
@@ -87,7 +66,6 @@ class BrowserManager:
|
|
|
).__aenter__()
|
|
|
self.page = await self.browser.new_page()
|
|
|
await self.page.goto(self.config.init_url)
|
|
|
- self.page_ops = PageOperations(self.page, self.config)
|
|
|
self.status = 'running'
|
|
|
self.last_activity = datetime.datetime.now()
|
|
|
logging.info(f"Browser session initialized | URL: {self.page.url}")
|
|
|
@@ -103,75 +81,59 @@ class BrowserManager:
|
|
|
self.status = 'stopped'
|
|
|
logging.info("Browser session closed")
|
|
|
|
|
|
- async def ensure_active_session(self):
|
|
|
- """确保会话有效性"""
|
|
|
- if self.status != 'running' or self.page.is_closed():
|
|
|
- await self.close()
|
|
|
- await self.initialize()
|
|
|
+ async def goto(self, url: str):
|
|
|
+ """导航到指定URL"""
|
|
|
+ await self.page.goto(url)
|
|
|
+ self.last_activity = datetime.datetime.now()
|
|
|
+
|
|
|
+ async def take_screenshot(self, filename: str) -> str:
|
|
|
+ """截图操作"""
|
|
|
+ os.makedirs(self.config.screenshot_dir, exist_ok=True)
|
|
|
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
+ path = os.path.join(self.config.screenshot_dir, f"{timestamp}_{filename}")
|
|
|
+ await self.page.screenshot(path=path, full_page=True)
|
|
|
+ return path
|
|
|
|
|
|
- # API操作方法
|
|
|
- async def search(self, query: str, engine: str = "google"):
|
|
|
+ async def get_page_info(self) -> dict:
|
|
|
+ pass
|
|
|
+
|
|
|
+# ------------------- Search Engine Implementation -------------------
|
|
|
+class GoogleSearchHandler(ISearchHandler):
|
|
|
+ """搜索引擎专用处理器(可继承扩展其他引擎)"""
|
|
|
+ def __init__(self, browser_core: BrowserCore):
|
|
|
+ self.core = browser_core
|
|
|
+ self.page = self.core.page
|
|
|
+ async def goto_home_page(self):
|
|
|
+ url = "https://www.google.com"
|
|
|
+ if self.page.url != url:
|
|
|
+ await self.page.goto(url)
|
|
|
+ async def search(self, query: str) -> dict:
|
|
|
"""执行搜索操作"""
|
|
|
try:
|
|
|
- await self.ensure_active_session()
|
|
|
-
|
|
|
- # 导航到搜索引擎
|
|
|
- search_url = f"https://www.{engine}.com/search?q={query}"
|
|
|
- await self.page.goto(search_url)
|
|
|
-
|
|
|
- # 等待搜索结果加载
|
|
|
- await self.page.wait_for_selector('#search', timeout=5000)
|
|
|
- self.last_activity = datetime.datetime.now()
|
|
|
-
|
|
|
- # 记录截图
|
|
|
- screenshot_path = await self.page_ops.take_screenshot(f"search_{query}.png")
|
|
|
- return {"status": "success", "screenshot": screenshot_path}
|
|
|
-
|
|
|
+ await self.goto_home_page()
|
|
|
+ # 使用 aria-label 定位 textarea 并填入数据
|
|
|
+ await self.page.fill('textarea[aria-label="Search"]', query)
|
|
|
+ # 模拟按下回车键
|
|
|
+ await self.page.press('textarea[aria-label="Search"]', 'Enter')
|
|
|
+ return await self.page.content()
|
|
|
except Exception as e:
|
|
|
- self.status = 'error'
|
|
|
logging.error(f"Search failed: {str(e)}")
|
|
|
return {"status": "error", "message": str(e)}
|
|
|
|
|
|
- async def get_page_info(self):
|
|
|
- """获取当前页面信息"""
|
|
|
- try:
|
|
|
- await self.ensure_active_session()
|
|
|
- return {
|
|
|
- "url": self.page.url,
|
|
|
- "title": await self.page.title(),
|
|
|
- "content": await self.page.content(),
|
|
|
- "timestamp": datetime.datetime.now().isoformat()
|
|
|
- }
|
|
|
- except Exception as e:
|
|
|
- logging.error(f"Get page info failed: {str(e)}")
|
|
|
- return {"status": "error", "message": str(e)}
|
|
|
+ async def next_page(self) -> dict:
|
|
|
+ pass
|
|
|
|
|
|
- async def next_page(self):
|
|
|
- """跳转到下一页"""
|
|
|
- try:
|
|
|
- await self.ensure_active_session()
|
|
|
- next_btn = await self.page_ops.search_element('a:has-text("Next")')
|
|
|
- await next_btn.click()
|
|
|
- await self.page.wait_for_load_state('networkidle')
|
|
|
- self.last_activity = datetime.datetime.now()
|
|
|
- return {"status": "success", "new_url": self.page.url}
|
|
|
- except Exception as e:
|
|
|
- logging.error(f"Next page failed: {str(e)}")
|
|
|
- return {"status": "error", "message": str(e)}
|
|
|
|
|
|
-async def aio_main(config: BrowserConfig = BrowserConfig(init_url="https://www.google.com")):
|
|
|
+# ------------------- API Service -------------------
|
|
|
+async def aio_main(config: BrowserConfig = BrowserConfig()):
|
|
|
"""API服务主循环"""
|
|
|
- manager = await BrowserManager.get_instance(config)
|
|
|
+ core = await BrowserCore.get_instance(config)
|
|
|
+ search_handler = GoogleSearchHandler(core)
|
|
|
|
|
|
try:
|
|
|
- logging.info(f"API服务已启动 | 初始页面: {manager.page.url}")
|
|
|
- while manager.status == 'running':
|
|
|
- # 保持心跳并检查会话状态
|
|
|
- if (datetime.datetime.now() - manager.last_activity).total_seconds() > 300:
|
|
|
- logging.info("检测到会话超时,重新初始化浏览器...")
|
|
|
- await manager.close()
|
|
|
- await manager.initialize()
|
|
|
-
|
|
|
+ await search_handler.search('python')
|
|
|
+ logging.info(f"API服务已启动 | 初始页面: {search_handler.page.url}")
|
|
|
+ while core.status == 'running':
|
|
|
await asyncio.sleep(5)
|
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
@@ -179,17 +141,15 @@ async def aio_main(config: BrowserConfig = BrowserConfig(init_url="https://www.g
|
|
|
except Exception as e:
|
|
|
logging.error(f"API服务异常: {str(e)}")
|
|
|
finally:
|
|
|
- await manager.close()
|
|
|
+ await core.close()
|
|
|
logging.info("API服务已停止")
|
|
|
|
|
|
def main():
|
|
|
- # 初始化并保持浏览器会话
|
|
|
asyncio.run(aio_main())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- # 启动时初始化配置
|
|
|
config = BrowserConfig(
|
|
|
headless=True,
|
|
|
init_url="https://www.google.com"
|
|
|
)
|
|
|
- main()
|
|
|
+ main()
|