|
|
@@ -1,8 +1,9 @@
|
|
|
# tasks/save_tasks.py
|
|
|
+import os
|
|
|
from config.celery import app # Keep celery app
|
|
|
from src.browser.crawl_asin import Crawler
|
|
|
from src.browser.crawl_amz_search_key import CrawlerAmzSearchKey,CrawlerSearchKeyInput
|
|
|
-from config.settings import CFG
|
|
|
+from config.settings import CFG, read_config, get_config_path
|
|
|
from utils.drission_page import ChromeOptions
|
|
|
from utils.file import check_exists, save_to_file,s3_uri_to_http_url
|
|
|
from utils.logu import get_logger
|
|
|
@@ -10,6 +11,8 @@ from utils.drission_page import load_chrome_from_ini,ChromeOptions
|
|
|
|
|
|
logger = get_logger('worker')
|
|
|
import asyncio
|
|
|
+logger.info(f"CONFIG_PATH {os.environ.get('CONFIG_PATH')}")
|
|
|
+CFG = read_config(get_config_path())
|
|
|
logger.info(f"Worker初始化完成,当前配置版本: {CFG.version}")
|
|
|
logger.info(f"浏览器配置: {CFG.chrome_config_ini}")
|
|
|
chrome_options = ChromeOptions(ini_path=CFG.chrome_config_ini)
|
|
|
@@ -61,4 +64,34 @@ def get_amz_search_key_suggestion(self, input_data: CrawlerSearchKeyInput):
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.exception(f"任务失败:{e}")
|
|
|
- self.retry(exc=e, countdown=60)
|
|
|
+ self.retry(exc=e, countdown=60)
|
|
|
+
|
|
|
+from pydantic import BaseModel
|
|
|
+from typing import Optional
|
|
|
+
|
|
|
+class TestInput(BaseModel):
|
|
|
+ mhtml_path:Optional[str] = None
|
|
|
+ overwrite:Optional[bool] = False
|
|
|
+@app.task(
|
|
|
+ bind=True,
|
|
|
+ name='tasks.crawl_asin_save_task.test_task',
|
|
|
+ )
|
|
|
+def test_task(self, input_data: TestInput):
|
|
|
+ logger.info(f"input type {type(input_data)}")
|
|
|
+ inputdata = TestInput(**input_data)
|
|
|
+ logger.info(f"任务开始: test_task {inputdata.model_dump()}")
|
|
|
+
|
|
|
+ logger.info(f"{self.request.hostname}")
|
|
|
+ return {'status':'success', 'worker_name': self.request.hostname, 'args':input_data}
|
|
|
+
|
|
|
+from celery import signals
|
|
|
+@signals.worker_shutdown.connect
|
|
|
+def on_worker_shutdown(sender=None, **kwargs):
|
|
|
+ """Worker关闭时执行清理操作"""
|
|
|
+ logger.info("Worker正在关闭,执行清理操作...")
|
|
|
+
|
|
|
+ # 示例:关闭数据库连接、释放资源等
|
|
|
+ chrome_options = ChromeOptions(ini_path=CFG.chrome_config_ini)
|
|
|
+ crawler = Crawler(chrome_options=chrome_options)
|
|
|
+ crawler.page.quit()
|
|
|
+ logger.info("Worker关闭完成")
|