Sfoglia il codice sorgente

新增长尾词分析,AI基于竞品数据给出结果。新增模型字段表述词标准通用方法。备份

mrh 8 mesi fa
parent
commit
7750b43f7b

+ 98 - 0
docs/gpt/agent_product.md

@@ -6,8 +6,106 @@
 例如,在某个竞品的产品信息中(product_info.main_text)“MEL Chemistry大径 肉厚 ペットコード ペット 犬 猫 キャット ドッグ 噛みつき 防止 感電 保護 家電 チャージ コード 配線 プロテクター カバー 螺旋 スパイラル チューブ ラップ 被覆 破れ 防止 破損防止 補強 収納 収束 結束 まとめる TPU 約93cm (ブラック 黒)B0B658JC22”
 - 属性词是: 大径 肉厚 噛みつき 防止 破損防止 螺旋
 
+# 模型字段
+@/tests\mytest\models\t_filed.py 
+@/src\models\product_model.py 
+
+output_fields = get_field_descriptions(AICompetitorAnalyzeMainKeywordsResult)
+
+
+示例输出格式:
+
+{
+  "results": [
+    {
+      "asin": "商品编号描述",
+      "main_key": "主要关键词描述",
+      "monthly_searches": "月搜索量描述"
+    }
+  ],
+  "supplement": "补充说明描述",
+  "tail_keys": [
+    {
+      "tail_key": "长尾关键词描述",
+      "monthly_searches": "月搜索量描述"
+    }
+  ]
+}
+
+不要修改 product_model.py 。帮我实现获取该模型的字段名和描述词。
+
+要通用可扩展,如果我有许多模型,需要能有一个通用的函数,可以处理任意模型。
+编程必须最小模块化,单一职责,不要硬编码。
+
+
+## 筛选字段
+class AICompetitorAnalyzeMainKeywords(BaseModel):
+
+    asin:str = Field(default=None, description="商品(竞品)编号")
+    main_key:str = Field(default=None, description="主要关键词")
+    monthly_searches: Optional[int] = Field(default=None, description="月搜索量")
+    reason:Optional[str] = Field(default=None, description="选择该主关键词原因")
+    crawl_result: Optional[SearchAmazoneKeyResult] = Field(
+        default=None,
+        description="爬取AI分析出来的主关键词" 
+    )
+    created_at:Optional[datetime] = Field(default_factory=datetime.now)
+
+class AICompetitorAnalyzeMainKeywordsResult(BaseModel):
+    class TailKey(BaseModel):
+        tail_key:str = Field(default=None, description="长尾关键词")
+        monthly_searches:int = Field(default=None, description="月搜索量")
+    results:Optional[List[AICompetitorAnalyzeMainKeywords]] = []
+    supplement:Optional[str] = Field(
+        default=None,
+        description="补充说明"
+    )
+    tail_keys: Optional[List[TailKey]] = Field(
+        default=[],
+    )
+---
 
 
+OUTPUT:
+{'results': [{'asin': '商品(竞品)编号', 'main_key': '主要关键词', 'monthly_searches': '月搜索量', 'reason': '选择该主关键词原因', 'crawl_result': {'search_key': '', 'suggestions': [''], 'mhtml_path': '', 'screenshot': '', 'error': '', 'msg': '', 'created_at': ''}, 'created_at': ''}], 'supplement': '补充说明', 'tail_keys': [{'tail_key': '长尾关键词', 'monthly_searches': '月搜索量'}]}
+
+编程规范:
+要通用可扩展,如果我有许多模型,需要能有一个通用的函数,可以处理任意模型。
+编程必须最小模块化,单一职责,不要硬编码。
+
+现在我想实现类似 model_dump_json 的功能 include exclude ,不传参时默认全部。为了单一职责应该创建另一个函数来实现对吗
+
+## fuck
+@/src\ai\agent_product.py
+
+@/src\models\field_config.py
+
+@/src\models\product_model.py
+
+get_field_descriptions 有没有更加优雅的实现方式?我不想要 FieldConfig ConfigManager 。
+只想通过传参获取描述:
+output_fields = get_field_descriptions(AICompetitorAnalyzeMainKeywordsResult)
+
+
+示例输出格式:
+
+{
+  "results": [
+    {
+      "asin": "商品编号描述",
+      "main_key": "主要关键词描述",
+      "monthly_searches": "月搜索量描述"
+    }
+  ],
+  "supplement": "补充说明描述",
+  "tail_keys": [
+    {
+      "tail_key": "长尾关键词描述",
+      "monthly_searches": "月搜索量描述"
+    }
+  ]
+}
+
 # agent_product 存入 mongodb
 @/src\ai\agent_product.py 
 @/src\models\product_model.py 

+ 14 - 49
src/ai/agent_product.py

@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 import json
-from typing import Optional, Union
+from typing import Optional, Set, Union
 from llama_index.core import PromptTemplate
 import asyncio
 import aiofiles
@@ -10,14 +10,14 @@ from dotenv import load_dotenv
 from pydantic import BaseModel
 from src.models.product_model import (
     Product, CompetitorCrawlData, AICompetitorAnalyzeMainKeywords,
-    TrafficKeywordResult, ProductImageInfo,
+    TrafficKeywordResult, ProductImageInfo,AICompetitorAnalyzeMainKeywordsResult,
     SearchAmazoneKeyResult, ProductBaseInfo, Variant
 )
 from llama_index.llms.openai import OpenAI
 from llama_index.llms.litellm import LiteLLM
 from src.manager.core.db_mongo import BaseMongoManager
 from utils.logu import get_logger
-from src.models.field_config import FieldConfig
+from src.models.field_config import FieldConfig,get_field_descriptions
 load_dotenv()
 logger = get_logger('ai')
 
@@ -86,24 +86,6 @@ def get_competitor_prompt_data(
             
     return list_data
 
-def get_field_descriptions(
-    model_class: BaseModel,
-    field_config: FieldConfig = ConfigManager.get_field_config(),
-    model_name: Optional[str] = None
-) -> dict:
-    """
-    获取模型字段描述信息
-    
-    Args:
-        model_class: Pydantic模型类
-        field_config: 字段配置
-        model_name: 模型名称(用于查找配置)
-        
-    Returns:
-        字段名到描述的字典
-    """
-    return field_config.get_model_fields(model_class, model_name)
-
 class Formatter(ABC):
     """格式化器抽象基类"""
     def __init__(self, notes: Optional[dict] = None):
@@ -207,47 +189,29 @@ class LiteLLMService(LLMService):
 class AnalysisService:
     """分析领域服务"""
     def __init__(self, llm_service: LLMService, db_manager: BaseMongoManager):
-        self.llm_service = llm_service
+        self.llm_service:LiteLLMService = llm_service
         self.db_manager = db_manager
 
     async def execute_analysis(self, product:Product, format_type: str = "json", dry_run=False) -> tuple[dict, str]:
         prompt = await self._prepare_prompt(product, format_type)
         logger.info(f"prompt: {prompt}")
-        
-        if dry_run:
-            mock_result = {
-                "results": [{
-                    "asin": "MOCK_ASIN",
-                    "main_key": "MOCK_KEYWORD",
-                    "monthly_searches": "1,000",
-                    "reason": "模拟分析结果"
-                }],
-                "supplement": "模拟补充信息"
-            }
-            return mock_result, prompt
-            
         analysis_result = await self.llm_service.analyze(prompt)
         return analysis_result, prompt
 
-    async def _prepare_prompt(self, product: Product, format_type: str = "json", main_key_num: int = 3) -> str:
+    async def _prepare_prompt(self, product: Product, format_type: str = "json", main_key_num: int = 3, tail_key_num:int = 12) -> str:
         competitor_data = get_competitor_prompt_data(product)
-        output_fields = {
-            "results": {
-                "asin": "商品(竞品)编号",
-                "main_key": "主要关键词",
-                "monthly_searches": "月搜索量",
-                "reason": "分析理由"
-            },
-            "supplement": "补充说明"
-        }
-
+        # 从数据模型获取输出字段描述
+        output_fields = get_field_descriptions(
+            AICompetitorAnalyzeMainKeywordsResult,
+            exclude=['results.crawl_result', 'results.created_at']
+            )
         formatter = FormatterFactory.create_formatter(format_type)
         output_format = formatter.format(output_fields)
 
         return f'''各个字段说明:
-{get_field_descriptions(CompetitorCrawlData)}
-{get_field_descriptions(ProductImageInfo)}
-{get_field_descriptions(TrafficKeywordResult)}
+{get_field_descriptions(CompetitorCrawlData, include=['asin'])}
+{get_field_descriptions(ProductImageInfo, include=['main_text'])}
+{get_field_descriptions(TrafficKeywordResult, include=['traffic_keyword', 'monthly_searches'])}
 
 竞品数据:
 {competitor_data}
@@ -255,6 +219,7 @@ class AnalysisService:
 我是日本站的亚马逊运营,我在给产品名称为 {product.basic_info.name} 选主要关键词,请根据以上 {len(competitor_data)} 个竞品数据,按以下规则分析:
 - 选出搜索量在1万以上的相同关键词作为主要关键词{main_key_num}个。
 - 如果竞品的搜索量都不足1万,则从排名前十的关键词中筛选 {main_key_num} 个搜索量最大且相关性最强的词。
+- 根据我的产品基本信息,从竞品的主要信息和同类竞品的相似关键词中,筛选出最符合我产品的长尾关键词 tail_keys {tail_key_num} 个以上
 - 结合日本市场特点分析
 输出格式:
 {output_format}'''

+ 26 - 17
src/manager/manager_task.py

@@ -13,6 +13,7 @@ from src.models.product_model import (
     Product,CompetitorCrawlData,AICompetitorAnalyzeMainKeywords, 
     SearchAmazoneKeyResult, ProductBaseInfo, Variant,AICompetitorAnalyzeMainKeywordsResult
     )
+from src.models.field_config import FieldConfig
 from utils.logu import get_logger
 from upath import UPath
 logger = get_logger('main')
@@ -194,26 +195,34 @@ class ManagerTask:
             
             
             # 执行实际分析
-            analyze_result, prompt = await analysis_service.execute_analysis(product)
+            analyze_result, prompt = await analysis_service.execute_analysis(product, dry_run=dry_run)
             logger.info(f"提示词:{prompt}")
-            logger.info(f"分析结果: {analyze_result}")
-            # 转换结果格式
-            results = [
-                AICompetitorAnalyzeMainKeywords(
-                    asin=item["asin"],
-                    main_key=item["main_key"],
-                    monthly_searches=int(item["monthly_searches"].replace(",", "")),
-                    reason=item["reason"]
-                ) for item in analyze_result["results"]
-            ]
+            logger.info(f"分析结果: {json.dumps(analyze_result, indent=2, ensure_ascii=False)}")
+            
+            # 转换monthly_searches字段为整数
+            def convert_monthly_searches(value):
+                if value is None:
+                    return None
+                if isinstance(value, str):
+                    if not value.strip():
+                        return None
+                    return int(value.replace(',', ''))
+                return value
+            
+            if 'results' in analyze_result:
+                for result in analyze_result['results']:
+                    if 'monthly_searches' in result:
+                        result['monthly_searches'] = convert_monthly_searches(result['monthly_searches'])
+            
+            if 'tail_keys' in analyze_result:
+                for tail_key in analyze_result['tail_keys']:
+                    if 'monthly_searches' in tail_key:
+                        tail_key['monthly_searches'] = convert_monthly_searches(tail_key['monthly_searches'])
             
             # 更新产品分析结果
-            product.competitor_analyze = AICompetitorAnalyzeMainKeywordsResult(
-                results=results,
-                supplement=analyze_result.get("supplement")
-            )
+            product.competitor_analyze = AICompetitorAnalyzeMainKeywordsResult(**analyze_result)
             if dry_run:
-                logger.info(f"{product.competitor_analyze.model_dump_json(indent=2, ensure_ascii=False)}")
+                # logger.info(f"{product.competitor_analyze.model_dump_json(indent=2)}")
                 return 
             # 保存到数据库
             await product.save()
@@ -255,7 +264,7 @@ async def main():
     await manager.db_mongo.initialize()
     product = await Product.find_one(Product.basic_info.name == "电线保护套")
     # await manager.extract_competitor_analysis(product)
-    await manager.async_analyze_and_save(product, dry_run=True, over_write=True)
+    await manager.async_analyze_and_save(product, dry_run=False, over_write=True)
     # await manager.submit_search_mainkeyword(product)
     return
     for asin in product.competitor_crawl_data.keys():

+ 107 - 12
src/models/field_config.py

@@ -1,5 +1,71 @@
-from typing import Dict, List, Optional, Set
 from pydantic import BaseModel
+from typing import Dict, Type, List, Optional,Set, get_origin, get_args, Union
+
+def get_field_descriptions(
+    model: Type[BaseModel],
+    include: Optional[List[str]] = None,
+    exclude: Optional[List[str]] = None,
+    current_path: str = ""
+) -> dict:
+    """
+    递归获取Pydantic模型字段描述,支持字段路径过滤
+    :param model: Pydantic模型类
+    :param include: 要包含的字段路径列表(支持点号分隔的路径)
+    :param exclude: 要排除的字段路径列表(支持点号分隔的路径)
+    :param current_path: 当前处理的字段路径(内部使用)
+    :return: 过滤后的字段描述字典
+    """
+    example = {}
+    for field_name, field_info in model.model_fields.items():
+        # 构建当前字段的完整路径
+        field_path = f"{current_path}.{field_name}" if current_path else field_name
+        
+        # 排除逻辑:如果匹配任何排除路径,则跳过
+        if exclude and any(field_path.startswith(excl) for excl in exclude):
+            continue
+        
+        # 包含逻辑:如果包含列表存在且不匹配任何路径,则跳过
+        if include and not any(field_path.startswith(inc) for inc in include):
+            continue
+        
+        description = field_info.description or ""
+        annotation = field_info.annotation
+        
+        # 处理Optional类型
+        origin = get_origin(annotation)
+        args = get_args(annotation)
+        if origin is Union and type(None) in args:
+            non_none_args = [arg for arg in args if arg is not type(None)]
+            if len(non_none_args) == 1:
+                annotation = non_none_args[0]
+                origin = get_origin(annotation)
+                args = get_args(annotation)
+        
+        # 处理列表类型
+        if origin is list:
+            item_type = args[0]
+            if issubclass(item_type, BaseModel):
+                # 递归处理嵌套模型,传递当前路径
+                example[field_name] = [get_field_descriptions(
+                    item_type,
+                    include=include,
+                    exclude=exclude,
+                    current_path=field_path
+                )]
+            else:
+                example[field_name] = [description]
+        # 处理嵌套模型
+        elif isinstance(annotation, type) and issubclass(annotation, BaseModel):
+            example[field_name] = get_field_descriptions(
+                annotation,
+                include=include,
+                exclude=exclude,
+                current_path=field_path
+            )
+        # 基础类型
+        else:
+            example[field_name] = description
+    return example
 
 class FieldConfig:
     """字段配置管理器(仅包含模式)"""
@@ -22,32 +88,61 @@ class FieldConfig:
     def get_model_fields(
         self,
         model_class: BaseModel,
-        model_name: Optional[str] = None
+        model_name: Optional[str] = None,
+        include_fields: Optional[Set[str]] = None,
+        exclude_fields: Optional[Set[str]] = None,
+        nesting: Optional[str] = None
     ) -> Dict[str, str]:
         """
-        获取模型的字段配置
+        获取模型的字段配置(支持动态字段过滤)
         
         Args:
             model_class: Pydantic模型类
             model_name: 模型名称(用于查找配置)
+            include_fields: 要包含的字段集合(覆盖配置)
+            exclude_fields: 要排除的字段集合
+            nesting: 嵌套字段前缀(用于递归处理)
         
         Returns:
             字段名到描述的字典
         """
         model_name = model_name or model_class.__name__
-        include = self.include_fields.get(model_name, set())
+        # 当未指定包含/排除字段时,默认包含所有字段
+        if include_fields is None and exclude_fields is None:
+            include = set(model_class.model_fields.keys())
+            exclude = set()
+        else:
+            include = include_fields or self.include_fields.get(model_name, set())
+            exclude = exclude_fields or set()
         
         field_descriptions = {}
         for field_name, field_info in model_class.model_fields.items():
-            # 只返回包含列表中指定的字段
-            if field_name in include:
-                # 优先使用配置中的描述,其次使用模型中的描述
-                description = (
-                    self.field_descriptions.get(model_name, {}).get(field_name)
-                    or field_info.description
+            # 应用字段过滤规则
+            if field_name not in include or field_name in exclude:
+                continue
+                
+            # 处理嵌套模型
+            full_name = f"{nesting}.{field_name}" if nesting else field_name
+            if hasattr(field_info.annotation, 'model_fields'):
+                # 递归获取嵌套字段
+                # 递归时保持include/exclude为None以继承默认行为
+                nested_fields = self.get_model_fields(
+                    field_info.annotation,
+                    model_name=field_info.annotation.__name__,
+                    include_fields=None,  # 强制使用默认包含
+                    exclude_fields=None,   # 强制使用默认排除
+                    nesting=full_name
                 )
-                if description:
-                    field_descriptions[field_name] = description
+                field_descriptions.update(nested_fields)
+                continue
+            
+            # 获取字段描述
+            description = (
+                self.field_descriptions.get(model_name, {}).get(field_name)
+                or field_info.description
+            )
+            if description:
+                field_descriptions[full_name] = description
                 
         return field_descriptions
 

+ 17 - 9
src/models/product_model.py

@@ -115,24 +115,32 @@ class SearchAmazoneKeyResult(BaseModel):
     msg:Optional[str] = None
     created_at:Optional[datetime] = Field(default_factory=datetime.now)
 
+
 class AICompetitorAnalyzeMainKeywords(BaseModel):
-    asin:str
-    main_key:str
-    monthly_searches: Optional[int] = 0
-    reason:Optional[str] = None
+
+    asin:str = Field(default=None, description="商品(竞品)编号")
+    main_key:str = Field(default=None, description="主要关键词")
+    monthly_searches: Optional[int] = Field(default=None, description="月搜索量")
+    reason:Optional[str] = Field(default=None, description="选择该主关键词原因")
     crawl_result: Optional[SearchAmazoneKeyResult] = Field(
         default=None,
         description="爬取AI分析出来的主关键词" 
     )
-    tail_keys: Optional[List[str]] = Field(
-        default=[],
-        description="根据主关键词去amazone搜索,得出长尾关键词" 
-    )
     created_at:Optional[datetime] = Field(default_factory=datetime.now)
 
 class AICompetitorAnalyzeMainKeywordsResult(BaseModel):
+    class TailKey(BaseModel):
+        tail_key:str = Field(default=None, description="长尾关键词")
+        monthly_searches:Optional[int] = Field(default=None, description="月搜索量")
     results:Optional[List[AICompetitorAnalyzeMainKeywords]] = []
-    supplement:Optional[str] = None
+    supplement:Optional[str] = Field(
+        default=None,
+        description="补充说明,非必填。如果你有额外的信息或建议,可以在这里添加。"
+    )
+    tail_keys: Optional[List[TailKey]] = Field(
+        default=[],
+    )
+
 
 class CompetitorAnalyze(BaseModel):
     ai_analyze_main_keywords: Optional[List[AICompetitorAnalyzeMainKeywords]] = Field(

+ 112 - 0
tests/mytest/models/t_filed_qwenpy

@@ -0,0 +1,112 @@
+import json
+from typing import Dict, List, Optional, Union, Type, Any
+from pydantic import BaseModel, Field
+from src.models.product_model import AICompetitorAnalyzeMainKeywordsResult, AICompetitorAnalyzeMainKeywords
+from src.models.product_model import AICompetitorAnalyzeMainKeywordsResult as ResultModel
+from typing import Type, List, Optional, get_origin, get_args, Union
+from pydantic import BaseModel
+from pydantic.fields import FieldInfo
+from utils.logu import get_logger
+from src.models.field_config import FieldConfig
+logger = get_logger('test')
+
+def get_field_descriptions(
+    model: Type[BaseModel],
+    include: Optional[List[str]] = None,
+    exclude: Optional[List[str]] = None,
+    current_path: str = ""
+) -> dict:
+    """
+    递归获取Pydantic模型字段描述,支持字段路径过滤
+    :param model: Pydantic模型类
+    :param include: 要包含的字段路径列表(支持点号分隔的路径)
+    :param exclude: 要排除的字段路径列表(支持点号分隔的路径)
+    :param current_path: 当前处理的字段路径(内部使用)
+    :return: 过滤后的字段描述字典
+    """
+    example = {}
+    for field_name, field_info in model.model_fields.items():
+        # 构建当前字段的完整路径
+        field_path = f"{current_path}.{field_name}" if current_path else field_name
+        
+        # 排除逻辑:如果匹配任何排除路径,则跳过
+        if exclude and any(field_path.startswith(excl) for excl in exclude):
+            continue
+        
+        # 包含逻辑:如果包含列表存在且不匹配任何路径,则跳过
+        if include and not any(field_path.startswith(inc) for inc in include):
+            continue
+        
+        description = field_info.description or ""
+        annotation = field_info.annotation
+        
+        # 处理Optional类型
+        origin = get_origin(annotation)
+        args = get_args(annotation)
+        if origin is Union and type(None) in args:
+            non_none_args = [arg for arg in args if arg is not type(None)]
+            if len(non_none_args) == 1:
+                annotation = non_none_args[0]
+                origin = get_origin(annotation)
+                args = get_args(annotation)
+        
+        # 处理列表类型
+        if origin is list:
+            item_type = args[0]
+            if issubclass(item_type, BaseModel):
+                # 递归处理嵌套模型,传递当前路径
+                example[field_name] = [get_field_descriptions(
+                    item_type,
+                    include=include,
+                    exclude=exclude,
+                    current_path=field_path
+                )]
+            else:
+                example[field_name] = [description]
+        # 处理嵌套模型
+        elif isinstance(annotation, type) and issubclass(annotation, BaseModel):
+            example[field_name] = get_field_descriptions(
+                annotation,
+                include=include,
+                exclude=exclude,
+                current_path=field_path
+            )
+        # 基础类型
+        else:
+            example[field_name] = description
+    return example
+
+def model_description_dump(
+    model: Type[BaseModel],
+    include: Optional[List[str]] = None,
+    exclude: Optional[List[str]] = None
+) -> dict:
+    """
+    生成模型描述字典,支持字段路径过滤
+    :param model: Pydantic模型类
+    :param include: 要包含的字段路径列表(如 ["results.asin", "tail_keys"])
+    :param exclude: 要排除的字段路径列表
+    :return: 过滤后的字段描述字典
+    """
+    return get_field_descriptions(model, include=include, exclude=exclude)
+def main():
+    # 默认输出所有字段
+    example_full = model_description_dump(
+        AICompetitorAnalyzeMainKeywordsResult,
+        exclude=['results.crawl_result', 'results.created_at']
+        )
+    logger.info(f"example_full {json.dumps(example_full, indent=2, ensure_ascii=False)}")
+    # 包含特定字段
+    example_included = model_description_dump(
+        AICompetitorAnalyzeMainKeywordsResult,
+        include=["results", "tail_keys"]
+    )
+    logger.info(f"example_included {example_included}")
+    # 排除特定字段
+    example_excluded = model_description_dump(
+        AICompetitorAnalyzeMainKeywordsResult,
+        exclude=["crawl_result", "created_at"]
+    )
+    logger.info(f"example_excluded {example_excluded}")
+if __name__ == "__main__":
+    main()