8 ماه پیش · b9e74a95f7
--- a/.clinerules
+++ b/.clinerules
@@ -1,3 +1,3 @@
 
				 重要：
			
 
				-- 编码遵循高内聚、低耦合、可扩展，符合最佳程序设计，符合最佳实践。
			
 
				+- 编码遵循模块化、高内聚、低耦合、可扩展，符合最佳程序设计，符合最佳实践。
			
 
				 - 不要硬编码，必须要通用可复用、可扩展。
			
--- a/src/ai/agent_product.py
+++ b/src/ai/agent_product.py
@@ -1,3 +1,4 @@
 
				+from abc import ABC, abstractmethod
			
 
				 import json
			
 
				 from typing import Optional, Union
			
 
				 from llama_index.core import PromptTemplate
			
@@ -20,29 +21,36 @@ from src.models.field_config import FieldConfig
 
				 load_dotenv()
			
 
				 logger = get_logger('ai')
			
 
				 
			
 
				-# 默认包含的字段配置
			
 
				-DEFAULT_FIELD_CONFIG = FieldConfig(
			
 
				-    include_fields={
			
 
				-        "ProductImageInfo": {
			
 
				-            "main_text"  # 产品图片主要文字
			
 
				-        },
			
 
				-        "TrafficKeywordResult": {
			
 
				-            "traffic_keyword",  # 流量关键词名称
			
 
				-            "monthly_searches"  # 关键词月搜索量
			
 
				-        },
			
 
				-        "ProductBaseInfo": {
			
 
				-            "name", "content", "material", "color", "size",
			
 
				-            "packaging_size", "weight", "main_usage", "selling_point"
			
 
				-        },
			
 
				-        "CompetitorCrawlData": {
			
 
				-            "asin",
			
 
				+class ConfigManager:
			
 
				+    _instance = None
			
 
				+    _config = FieldConfig(
			
 
				+        include_fields={
			
 
				+            "ProductImageInfo": {"main_text"},
			
 
				+            "TrafficKeywordResult": {"traffic_keyword", "monthly_searches"},
			
 
				+            "ProductBaseInfo": {
			
 
				+                "name", "content", "material", "color", "size",
			
 
				+                "packaging_size", "weight", "main_usage", "selling_point"
			
 
				+            },
			
 
				+            "CompetitorCrawlData": {"asin"}
			
 
				         }
			
 
				-    }
			
 
				-)
			
 
				+    )
			
 
				+
			
 
				+    def __new__(cls):
			
 
				+        if cls._instance is None:
			
 
				+            cls._instance = super().__new__(cls)
			
 
				+        return cls._instance
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_field_config(cls) -> FieldConfig:
			
 
				+        return cls._config
			
 
				+
			
 
				+    @classmethod
			
 
				+    def update_config(cls, new_config: FieldConfig):
			
 
				+        cls._config = new_config
			
 
				 
			
 
				 def get_competitor_prompt_data(
			
 
				     product: Product,
			
 
				-    field_config: FieldConfig = DEFAULT_FIELD_CONFIG
			
 
				+    field_config: FieldConfig = ConfigManager.get_field_config()
			
 
				 ) -> list:
			
 
				     """
			
 
				     获取竞品提示数据
			
@@ -80,7 +88,7 @@ def get_competitor_prompt_data(
 
				 
			
 
				 def get_field_descriptions(
			
 
				     model_class: BaseModel,
			
 
				-    field_config: FieldConfig = DEFAULT_FIELD_CONFIG,
			
 
				+    field_config: FieldConfig = ConfigManager.get_field_config(),
			
 
				     model_name: Optional[str] = None
			
 
				 ) -> dict:
			
 
				     """
			
@@ -96,34 +104,51 @@ def get_field_descriptions(
 
				     """
			
 
				     return field_config.get_model_fields(model_class, model_name)
			
 
				 
			
 
				-def format_output(fields_desc: dict, format_type: str = "json", notes: Optional[dict] = None):
			
 
				-    """根据字段描述生成输出格式(支持嵌套字典结构)
			
 
				-    
			
 
				-    Args:
			
 
				-        fields_desc: 字段描述字典(支持嵌套)
			
 
				-        format_type: 输出格式类型(json/human)
			
 
				-        notes: 格式特定的额外说明信息字典，如 {"json": "JSON格式说明", "human": "人类可读说明"}
			
 
				-        
			
 
				-    Returns:
			
 
				-        格式化后的输出模板
			
 
				-    """
			
 
				-    def process_dict(d, format_type):
			
 
				-        if format_type == "human":
			
 
				+class Formatter(ABC):
			
 
				+    """格式化器抽象基类"""
			
 
				+    def __init__(self, notes: Optional[dict] = None):
			
 
				+        self.notes = notes or {}
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def format(self, fields_desc: dict) -> str:
			
 
				+        pass
			
 
				+
			
 
				+class JSONFormatter(Formatter):
			
 
				+    """JSON格式处理器"""
			
 
				+    def format(self, fields_desc: dict) -> str:
			
 
				+        json_output = json.dumps(fields_desc, indent=2, ensure_ascii=False)
			
 
				+        if self.notes.get('json'):
			
 
				+            return f"```json\n{json_output}\n```\n{self.notes['json']}"
			
 
				+        return f"```json\n{json_output}\n```"
			
 
				+
			
 
				+class HumanFormatter(Formatter):
			
 
				+    """人类可读格式处理器"""
			
 
				+    def format(self, fields_desc: dict) -> str:
			
 
				+        def process_dict(d, indent=0):
			
 
				             lines = []
			
 
				             for key, value in d.items():
			
 
				+                prefix = " " * indent
			
 
				                 if isinstance(value, dict):
			
 
				-                    nested = process_dict(value, format_type)
			
 
				-                    lines.append(f"{key}:\n{nested}")
			
 
				+                    lines.append(f"{prefix}{key}:")
			
 
				+                    lines.append(process_dict(value, indent + 2))
			
 
				                 else:
			
 
				-                    lines.append(f"{value}: {{{key}}}")
			
 
				+                    lines.append(f"{prefix}{value}: {{{key}}}")
			
 
				             return "\n".join(lines)
			
 
				-        else:
			
 
				-            return f"```json\n{json.dumps(d, indent=2, ensure_ascii=False)}\n```" if format_type == "json" else d
			
 
				 
			
 
				-    result = process_dict(fields_desc, format_type)
			
 
				-    if notes and notes.get(format_type):
			
 
				-        result += f"\n{notes[format_type]}"
			
 
				-    return result
			
 
				+        result = process_dict(fields_desc)
			
 
				+        if self.notes.get('human'):
			
 
				+            result += f"\n{self.notes['human']}"
			
 
				+        return result
			
 
				+
			
 
				+class FormatterFactory:
			
 
				+    """格式化器工厂类"""
			
 
				+    @staticmethod
			
 
				+    def create_formatter(format_type: str, notes: Optional[dict] = None) -> Formatter:
			
 
				+        if format_type == "json":
			
 
				+            return JSONFormatter(notes)
			
 
				+        elif format_type == "human":
			
 
				+            return HumanFormatter(notes)
			
 
				+        raise ValueError(f"Unsupported format type: {format_type}")
			
 
				 
			
 
				 async def test_product_mongo(main_key_num=3, format_type: str = "json"):
			
 
				     db_mongo = BaseMongoManager()
			
@@ -184,94 +209,117 @@ async def test_product_mongo(main_key_num=3, format_type: str = "json"):
 
				     logger.info(formatted_output)
			
 
				     return formatted_output
			
 
				 
			
 
				-async def analyze_with_llm(
			
 
				-    prompt: str,
			
 
				-    model: str = "openai/deepseek-chat",
			
 
				-    max_retries: int = 3,
			
 
				-    retry_delay: float = 1.0,
			
 
				-    format_type: str = "json"
			
 
				-) -> Union[dict, str]:
			
 
				-    """使用LLM分析数据并返回结果
			
 
				-    
			
 
				-    Args:
			
 
				-        prompt: 完整的提示词
			
 
				-        model: 模型名称
			
 
				-        max_retries: 最大重试次数
			
 
				-        retry_delay: 重试延迟(秒)
			
 
				-        format_type: 输出格式类型(json/human)
			
 
				-        
			
 
				-    Returns:
			
 
				-        dict: 当format_type为json时的解析结果
			
 
				-        str: 当format_type为human时的原始文本
			
 
				-        
			
 
				-    Raises:
			
 
				-        ValueError: 当无法获取有效响应时
			
 
				-    """
			
 
				-    llm_kwargs = {}
			
 
				-    if format_type == "json":
			
 
				-        llm_kwargs["additional_kwargs"] = {"response_format": {"type": "json_object"}}
			
 
				-    
			
 
				-    llm = LiteLLM(model=model, **llm_kwargs)
			
 
				-    
			
 
				-    for attempt in range(max_retries):
			
 
				-        try:
			
 
				-            logger.info(f"尝试第 {attempt + 1} 次LLM调用...")
			
 
				-            completion = await llm.acomplete(prompt)
			
 
				-            response_text = completion.text
			
 
				-            
			
 
				-            if format_type == "json":
			
 
				-                # 尝试从Markdown代码块中提取JSON
			
 
				-                if "```json" in response_text:
			
 
				-                    json_str = response_text.split("```json")[1].split("```")[0].strip()
			
 
				-                else:
			
 
				-                    json_str = response_text
			
 
				-                    
			
 
				-                result = json.loads(json_str)
			
 
				-                
			
 
				-                if not isinstance(result, dict):
			
 
				-                    raise ValueError("响应不是有效的JSON对象")
			
 
				-                    
			
 
				-                logger.debug(f"LLM响应验证通过: {json.dumps(result, indent=2, ensure_ascii=False)}")
			
 
				-                return result
			
 
				-            else:
			
 
				-                # 直接返回原始文本
			
 
				-                return response_text
			
 
				-                
			
 
				-        except (json.JSONDecodeError, ValueError) as e:
			
 
				-            if format_type == "json":
			
 
				-                logger.warning(f"JSON解析失败(尝试 {attempt + 1}/{max_retries}): {str(e)}")
			
 
				-                if attempt < max_retries - 1:
			
 
				-                    await asyncio.sleep(retry_delay)
			
 
				-                else:
			
 
				-                    raise ValueError(f"无法获取有效的JSON响应: {str(e)}")
			
 
				-            else:
			
 
				+class LLMService:
			
 
				+    """LLM服务抽象类"""
			
 
				+    @abstractmethod
			
 
				+    async def analyze(self, prompt: str) -> Union[dict, str]:
			
 
				+        pass
			
 
				+
			
 
				+class LiteLLMService(LLMService):
			
 
				+    """LiteLLM实现"""
			
 
				+    def __init__(self, model: str = "openai/deepseek-chat", max_retries: int = 3,
			
 
				+                 retry_delay: float = 1.0, format_type: str = "json"):
			
 
				+        self.model = model
			
 
				+        self.max_retries = max_retries
			
 
				+        self.retry_delay = retry_delay
			
 
				+        self.format_type = format_type
			
 
				+
			
 
				+    async def analyze(self, prompt: str) -> Union[dict, str]:
			
 
				+        llm_kwargs = {}
			
 
				+        if self.format_type == "json":
			
 
				+            llm_kwargs["additional_kwargs"] = {"response_format": {"type": "json_object"}}
			
 
				+
			
 
				+        llm = LiteLLM(model=self.model, **llm_kwargs)
			
 
				+
			
 
				+        for attempt in range(self.max_retries):
			
 
				+            try:
			
 
				+                logger.info(f"尝试第 {attempt + 1} 次LLM调用...")
			
 
				+                completion = await llm.acomplete(prompt)
			
 
				+                return self._process_response(completion.text)
			
 
				+            except (json.JSONDecodeError, ValueError) as e:
			
 
				+                if self.format_type == "json":
			
 
				+                    logger.warning(f"JSON解析失败(尝试 {attempt + 1}/{self.max_retries}): {str(e)}")
			
 
				+                    if attempt < self.max_retries - 1:
			
 
				+                        await asyncio.sleep(self.retry_delay)
			
 
				+                    else:
			
 
				+                        raise ValueError(f"无法获取有效的JSON响应: {str(e)}")
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"LLM调用失败: {str(e)}")
			
 
				                 raise
			
 
				-        except Exception as e:
			
 
				-            logger.error(f"LLM调用失败: {str(e)}")
			
 
				-            raise
			
 
				+
			
 
				+    def _process_response(self, response_text: str) -> Union[dict, str]:
			
 
				+        if self.format_type == "json":
			
 
				+            if "```json" in response_text:
			
 
				+                json_str = response_text.split("```json")[1].split("```")[0].strip()
			
 
				+            else:
			
 
				+                json_str = response_text
			
 
				+
			
 
				+            result = json.loads(json_str)
			
 
				+            if not isinstance(result, dict):
			
 
				+                raise ValueError("响应不是有效的JSON对象")
			
 
				+
			
 
				+            logger.debug(f"LLM响应验证通过: {json.dumps(result, indent=2, ensure_ascii=False)}")
			
 
				+            return result
			
 
				+        return response_text
			
 
				+
			
 
				+class AnalysisService:
			
 
				+    """分析领域服务"""
			
 
				+    def __init__(self, llm_service: LLMService, db_manager: BaseMongoManager):
			
 
				+        self.llm_service = llm_service
			
 
				+        self.db_manager = db_manager
			
 
				+
			
 
				+    async def execute_analysis(self, product_name: str, format_type: str = "json") -> Union[dict, str]:
			
 
				+        await self.db_manager.initialize()
			
 
				+        product = await Product.find_one(Product.basic_info.name == product_name)
			
 
				+        if not product:
			
 
				+            raise ValueError(f"未找到产品: {product_name}")
			
 
				+
			
 
				+        prompt = await self._prepare_prompt(product, format_type)
			
 
				+        return await self.llm_service.analyze(prompt)
			
 
				+
			
 
				+    async def _prepare_prompt(self, product: Product, format_type: str) -> str:
			
 
				+        competitor_data = get_competitor_prompt_data(product)
			
 
				+        output_fields = {
			
 
				+            "results": {
			
 
				+                "asin": "商品（竞品）编号",
			
 
				+                "main_key": "主要关键词",
			
 
				+                "monthly_searches": "月搜索量",
			
 
				+                "reason": "分析理由"
			
 
				+            },
			
 
				+            "supplement": "补充说明"
			
 
				+        }
			
 
				+
			
 
				+        formatter = FormatterFactory.create_formatter(format_type)
			
 
				+        output_format = formatter.format(output_fields)
			
 
				+
			
 
				+        return f'''各个字段说明：
			
 
				+{get_field_descriptions(CompetitorCrawlData)}
			
 
				+{get_field_descriptions(ProductImageInfo)}
			
 
				+{get_field_descriptions(TrafficKeywordResult)}
			
 
				+
			
 
				+竞品数据：
			
 
				+{competitor_data}
			
 
				+----
			
 
				+我是日本站的亚马逊运营，正在为产品 {product.basic_info.name} 选主要关键词。
			
 
				+请根据以上 {len(competitor_data)} 个竞品数据，按以下规则分析：
			
 
				+1. 优先选择搜索量1万以上的相同关键词
			
 
				+2. 不足时选择搜索量前十且相关性强的关键词
			
 
				+3. 结合日本市场特点分析
			
 
				+
			
 
				+输出格式：
			
 
				+{output_format}'''
			
 
				 
			
 
				 async def main():
			
 
				     logger.info(f"base url {os.environ.get('OPENAI_API_BASE')}")
			
 
				-    format_type = 'human'  # 可以从配置或参数获取
			
 
				-    format_type = 'json'  
			
 
				-    analyze_competitor_main_keyword_prompt = await test_product_mongo(format_type=format_type)
			
 
				+    db_manager = BaseMongoManager()
			
 
				+    llm_service = LiteLLMService(format_type='json')
			
 
				+    analysis_service = AnalysisService(llm_service, db_manager)
			
 
				+
			
 
				     try:
			
 
				-        result = await analyze_with_llm(
			
 
				-            analyze_competitor_main_keyword_prompt,
			
 
				-            format_type=format_type
			
 
				-        )
			
 
				-        
			
 
				-        if format_type == "json":
			
 
				-            logger.info(f"分析结果: {json.dumps(result, indent=2, ensure_ascii=False)}")
			
 
				-        else:
			
 
				-            logger.info(f"分析结果:\n{result}")
			
 
				-        
			
 
				-        # 这里可以添加结果处理逻辑
			
 
				-        # 例如保存到数据库或进一步处理
			
 
				-        
			
 
				+        result = await analysis_service.execute_analysis("电线保护套")
			
 
				+        logger.info(f"分析结果: {json.dumps(result, indent=2, ensure_ascii=False)}")
			
 
				     except ValueError as e:
			
 
				         logger.error(f"分析失败: {str(e)}")
			
 
				-        # 可以添加失败处理逻辑
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     asyncio.run(main())