|
|
@@ -1,137 +0,0 @@
|
|
|
-import os
|
|
|
-import httpx
|
|
|
-import asyncio
|
|
|
-from fastapi import FastAPI
|
|
|
-from contextlib import asynccontextmanager
|
|
|
-from datetime import datetime
|
|
|
-from typing import Optional
|
|
|
-from pydantic import BaseModel
|
|
|
-from dotenv import load_dotenv
|
|
|
-from worker.search_engine.camoufox_broswer import BrowserCore, BrowserConfig
|
|
|
-from worker.search_engine.google_search import GoogleSearchHandler
|
|
|
-from database.search_result_db import SearchResultManager
|
|
|
-from mylib.logu import get_logger
|
|
|
-from worker.api_schemas import WorkerRegistration, WorkerHeartbeat
|
|
|
-
|
|
|
-logger = get_logger("worker_server")
|
|
|
-load_dotenv()
|
|
|
-logger.info(f"http_proxy: {os.environ.get('HTTP_PROXY')}")
|
|
|
-logger.info(f"HTTPS_PROXY: {os.environ.get('HTTPS_PROXY')}")
|
|
|
-
|
|
|
-class WorkerConfig(BaseModel):
|
|
|
- master_url: str = os.getenv("MASTER_URL", "http://localhost:9300")
|
|
|
- heartbeat_interval: int = 30
|
|
|
- max_retries: int = 5
|
|
|
- browser_headless: bool = True
|
|
|
- host: str = os.getenv("WORKER_HOST", "0.0.0.0")
|
|
|
- port: int = int(os.getenv("WORKER_PORT", 8001))
|
|
|
- capacity: int = int(os.getenv("WORKER_CAPACITY", 1))
|
|
|
-
|
|
|
-@asynccontextmanager
|
|
|
-async def lifespan(app: FastAPI):
|
|
|
- """生命周期管理:仅初始化数据库"""
|
|
|
- try:
|
|
|
- global browser_core, search_handler
|
|
|
- browser_config = BrowserConfig(
|
|
|
- headless=config.browser_headless,
|
|
|
- proxy=None
|
|
|
- )
|
|
|
- browser_core = await BrowserCore.get_instance(browser_config)
|
|
|
- search_handler = GoogleSearchHandler(browser_core.page)
|
|
|
-
|
|
|
- await register_worker()
|
|
|
- asyncio.create_task(heartbeat_loop())
|
|
|
-
|
|
|
- yield
|
|
|
-
|
|
|
- if browser_core:
|
|
|
- await browser_core.close()
|
|
|
- logger.info("Worker shutdown completed")
|
|
|
-
|
|
|
- finally:
|
|
|
- logger.info("Master service shutdown")
|
|
|
-
|
|
|
-app = FastAPI(lifespan=lifespan)
|
|
|
-config = WorkerConfig()
|
|
|
-worker_id: Optional[str] = None
|
|
|
-current_load: int = 0
|
|
|
-browser_core: Optional[BrowserCore] = None
|
|
|
-search_handler: Optional[GoogleSearchHandler] = None
|
|
|
-
|
|
|
-async def register_worker():
|
|
|
- global worker_id
|
|
|
- async with httpx.AsyncClient() as client:
|
|
|
- try:
|
|
|
- registration_data = WorkerRegistration(
|
|
|
- host=config.host,
|
|
|
- port=config.port,
|
|
|
- capacity=config.capacity,
|
|
|
- capabilities=["web_search"]
|
|
|
- ).model_dump()
|
|
|
-
|
|
|
- logger.info(f"Registering to {config.master_url}")
|
|
|
- resp = await client.post(
|
|
|
- f"{config.master_url}/api/workers/register",
|
|
|
- json=registration_data
|
|
|
- )
|
|
|
- logger.info(f"Registration response: {resp.json()}")
|
|
|
- worker_id = resp.json()["worker_id"]
|
|
|
- logger.success(f"Registered worker ID: {worker_id}")
|
|
|
- except Exception as e:
|
|
|
- logger.error(f"Registration failed: {str(e)}")
|
|
|
- raise
|
|
|
-
|
|
|
-async def send_heartbeat():
|
|
|
- async with httpx.AsyncClient() as client:
|
|
|
- try:
|
|
|
- heartbeat_data = WorkerHeartbeat(
|
|
|
- current_load=current_load,
|
|
|
- cpu_usage=None, # 暂时保留为None,后续可以添加监控数据
|
|
|
- memory_usage=None,
|
|
|
- last_ping=datetime.now()
|
|
|
- ).model_dump(mode="json") # 使用mode="json"来序列化datetime
|
|
|
-
|
|
|
- await client.post(
|
|
|
- f"{config.master_url}/api/workers/{worker_id}/heartbeat",
|
|
|
- json=heartbeat_data
|
|
|
- )
|
|
|
- except Exception as e:
|
|
|
- logger.error(f"Heartbeat failed: {str(e)}")
|
|
|
-
|
|
|
-async def heartbeat_loop():
|
|
|
- while True:
|
|
|
- await send_heartbeat()
|
|
|
- await asyncio.sleep(config.heartbeat_interval)
|
|
|
-
|
|
|
-@app.get("/health")
|
|
|
-async def health_check():
|
|
|
- return {
|
|
|
- "status": "healthy",
|
|
|
- "browser_connected": browser_core is not None,
|
|
|
- "current_load": current_load
|
|
|
- }
|
|
|
-
|
|
|
-@app.post("/tasks")
|
|
|
-async def handle_task(task: dict):
|
|
|
- global current_load
|
|
|
- current_load += 1
|
|
|
- try:
|
|
|
- logger.info(f"Processing task: {task['keyword']}")
|
|
|
-
|
|
|
- result = await search_handler.process_keyword(task['keyword'])
|
|
|
-
|
|
|
- manager = SearchResultManager()
|
|
|
- await manager.save_page_results(
|
|
|
- keyword=task['keyword'],
|
|
|
- page_number=result.current_page,
|
|
|
- results_count=len(result.items),
|
|
|
- has_next_page=result.has_next_page
|
|
|
- )
|
|
|
-
|
|
|
- return {"status": "success", "processed_pages": result.current_page}
|
|
|
- finally:
|
|
|
- current_load -= 1
|
|
|
-
|
|
|
-if __name__ == "__main__":
|
|
|
- import uvicorn
|
|
|
- uvicorn.run(app, host=config.host, port=config.port)
|