mrh 7 月之前
父節點
當前提交
f618fbfed2

+ 3 - 1
.gitignore

@@ -11,4 +11,6 @@ wheels/
 output/
 .env
 .aider*
-.history
+.history
+src/cli
+docs/network_doc

+ 1 - 0
README.md

@@ -22,6 +22,7 @@ echo $timestr
 # 恢复到 test 数据库
 G:\program\MongoDB\mongodb-database-tools-windows-x86_64-100.11.0\bin\mongorestore.exe --uri="mongodb://sv-v2:27017/test" "I:\eng\backup\mongo\20250327_01_04_08\amazone\Product.bson"
 
+G:\program\MongoDB\mongodb-database-tools-windows-x86_64-100.11.0\bin\mongorestore.exe --uri="mongodb://mrh:123@sv-v2:27017" "I:\eng\backup\mongo\20250331_14_14_17"
 G:\program\MongoDB\mongodb-database-tools-windows-x86_64-100.11.0\bin\mongorestore.exe --uri="mongodb://sv-v2:27017/backup_amazone" --collection=Product G:\code\amazone\copywriting_production\output\temp
 mongoimport --uri="mongodb://sv-v2:27017" --collection=users --file=G:\code\amazone\copywriting_production\output\mongodb_backup\users.json
 ```

+ 172 - 0
ai/demo/agent_ai_analysis.py

@@ -0,0 +1,172 @@
+import litellm
+from litellm import completion
+from pydantic import BaseModel
+from typing import Any, Dict, Optional
+from src.models.product_model import (
+    Product,
+    AIAnalyzeCompare,
+    AICompetitorAnalyzeMainKeywordsResult,
+    MarketingInfo
+)
+
+class AnalysisExecutor(BaseModel):
+    """分析执行器基类"""
+    product: Product
+    model_provider: str
+    
+    async def prepare_input_data(self) -> Dict:
+        """准备分析输入数据"""
+        raise NotImplementedError
+        
+    async def execute_analysis(self, input_data: Dict) -> Dict:
+        """执行AI分析并返回结构化结果"""
+        raise NotImplementedError
+
+class CompetitorAnalysisExecutor(AnalysisExecutor):
+    """竞品分析执行器"""
+    
+    async def prepare_input_data(self) -> Dict:
+        """组合竞品分析输入数据"""
+        return {
+            "base_info": self.product.basic_info.dict(),
+            "competitors": [
+                c.dict() for c in self.product.competitor_crawl_data.values()
+            ],
+            "extra": self.product.extra_result
+        }
+    
+    async def execute_analysis(self, input_data: Dict) -> Dict:
+        """执行竞品分析"""
+        prompt_template = f"""
+        你是一位专业的亚马逊市场分析师,请根据以下数据生成竞品分析报告:
+        产品基本信息:{input_data['base_info']}
+        竞品数据:{input_data['competitors']}
+        额外数据:{input_data['extra']}
+        
+        请按以下结构输出JSON:
+        {{
+            "main_keywords": [{{"keyword": "...", "reason": "..."}}],
+            "tail_keywords": [{{"keyword": "...", "monthly_searches": ...}}],
+            "comparison_analysis": "..." 
+        }}
+        """
+        
+        response = await completion(
+            model=self.model_provider,
+            messages=[{"role": "user", "content": prompt_template}],
+            response_format={"type": "json_object"}
+        )
+        
+        raw_result = response.choices[0].message.content
+        return AICompetitorAnalyzeMainKeywordsResult.parse_raw(raw_result)
+
+class MarketingAnalysisExecutor(AnalysisExecutor):
+    """营销分析执行器"""
+    
+    async def prepare_input_data(self) -> Dict:
+        """组合营销分析输入数据"""
+        return {
+            "product_info": self.product.basic_info.dict(),
+            "current_marketing": self.product.marketing.dict(),
+            "sales_data": self.product.variants
+        }
+    
+    async def execute_analysis(self, input_data: Dict) -> Dict:
+        """执行营销分析"""
+        prompt_template = f"""
+        根据产品信息生成营销方案:
+        产品信息:{input_data['product_info']}
+        当前营销数据:{input_data['current_marketing']}
+        销售数据:{input_data['sales_data']}
+        
+        输出要求:
+        {{
+            "title": "产品标题",
+            "selling_points": ["卖点1", "卖点2"],
+            "improvement_suggestions": "..." 
+        }}
+        """
+        
+        response = await completion(
+            model=self.model_provider,
+            messages=[{"role": "user", "content": prompt_template}],
+            response_format={"type": "json_object"}
+        )
+        
+        raw_result = response.choices[0].message.content
+        return MarketingInfo.parse_raw(raw_result)
+
+class AnalysisService:
+    """AI分析核心服务"""
+    
+    def __init__(self, product: Product):
+        self.product = product
+        self.snapshot_service = SnapshotService()
+
+    def _get_executor(self, analysis_type: AnalysisType, provider: ModelProvider):
+        """获取执行器实例"""
+        executor_map = {
+            AnalysisType.COMPETITOR: CompetitorAnalysisExecutor,
+            AnalysisType.MARKETING: MarketingAnalysisExecutor
+        }
+        return executor_map[analysis_type](
+            product=self.product,
+            model_provider=provider.value
+        )
+
+    async def execute_analysis(
+        self,
+        analysis_type: AnalysisType,
+        model_provider: ModelProvider,
+        compare_with_previous: bool = True
+    ) -> AIAnalyzeCompare:
+        """执行完整分析流程"""
+        # 1. 准备并执行分析
+        executor = self._get_executor(analysis_type, model_provider)
+        input_data = await executor.prepare_input_data()
+        raw_result = await executor.execute_analysis(input_data)
+
+        # 2. 创建分析快照
+        snapshot = await self.snapshot_service.create_snapshot(
+            product_id=str(self.product.id),
+            analysis_type=analysis_type,
+            model_provider=model_provider,
+            input_data=input_data,
+            output_data=raw_result.dict()
+        )
+
+        # 3. 构建并存储对比结果
+        compare_result = AIAnalyzeCompare(
+            model=model_provider.value,
+            analysis_type=analysis_type,
+            input_data_hash=snapshot.input_hash,
+            output_data_hash=snapshot.output_hash,
+            analysis_result=raw_result
+        )
+
+        # 4. 更新产品记录
+        await self._update_product_record(model_provider, analysis_type, compare_result)
+
+        # 5. 结果对比(如果需要)
+        if compare_with_previous:
+            previous = await self._get_previous_analysis(model_provider, analysis_type)
+            if previous:
+                compare_result.differences = self._compare_results(raw_result, previous)
+
+        return compare_result
+
+    async def _update_product_record(self, provider: ModelProvider, analysis_type: AnalysisType, result: AIAnalyzeCompare):
+        """更新产品分析记录"""
+        update_field = f"ai_analysis_compare.{provider.value}_{analysis_type.value}"
+        await self.product.update({update_field: result.dict()})
+
+    async def _get_previous_analysis(self, provider: ModelProvider, analysis_type: AnalysisType):
+        """获取最近一次同类型分析结果"""
+        field_name = f"{provider.value}_{analysis_type.value}"
+        return self.product.ai_analysis_compare.get(field_name)
+
+    def _compare_results(self, current: BaseModel, previous: BaseModel) -> Dict:
+        """对比两次分析结果差异"""
+        current_dict = current.dict()
+        previous_dict = previous.dict()
+        return DeepDiff(previous_dict, current_dict, ignore_order=True).to_dict()

+ 408 - 0
ai/demo/agent_product.py

@@ -0,0 +1,408 @@
+from abc import ABC, abstractmethod
+import json
+from typing import Optional, Set, Union
+from llama_index.core import PromptTemplate
+import asyncio
+import aiofiles
+import os
+import sys
+from dotenv import load_dotenv
+from pydantic import BaseModel
+from src.models.product_model import (
+    AIAnalyzeCompare, Product, CompetitorCrawlData, AICompetitorAnalyzeMainKeywords,
+    TrafficKeywordResult, ProductImageInfo,AICompetitorAnalyzeMainKeywordsResult,
+    SearchAmazoneKeyResult, ProductBaseInfo, Variant,MarketingInfo,
+)
+from src.models.config_model import (UserConfig, AIPromptConfig, )
+from llama_index.llms.openai import OpenAI
+from llama_index.llms.litellm import LiteLLM
+from src.manager.core.db_mongo import BaseMongoManager
+from utils.logu import get_logger
+from src.models.field_config import FieldConfig,get_field_descriptions
+load_dotenv()
+logger = get_logger('ai')
+
+class ConfigManager:
+    _instance = None
+    _config = FieldConfig(
+        include_fields={
+            "ProductImageInfo": {"main_text"},
+            "TrafficKeywordResult": {"traffic_keyword", "monthly_searches"},
+            "ProductBaseInfo": {
+                "name", "content", "material", "color", "size",
+                "packaging_size", "weight", "main_usage", "selling_point"
+            },
+            "CompetitorCrawlData": {"asin"}
+        }
+    )
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    @classmethod
+    def get_field_config(cls) -> FieldConfig:
+        return cls._config
+
+    @classmethod
+    def update_config(cls, new_config: FieldConfig):
+        cls._config = new_config
+
+def get_competitor_prompt_data(
+    product: Product,
+    field_config: FieldConfig = ConfigManager.get_field_config()
+) -> list:
+    """
+    获取竞品提示数据
+    
+    Args:
+        product: 产品对象
+        field_config: 字段配置
+        
+    Returns:
+        结构化竞品数据列表
+    """
+    competitor_crawl_data = product.competitor_crawl_data
+    list_data = []
+    
+    for asin, crawl_data in competitor_crawl_data.items():
+        if crawl_data.extra_result:
+            structured_result = {"asin": asin}
+            
+            if crawl_data.extra_result.product_info:
+                structured_result["product_info"] = field_config.filter_model_dump(
+                    crawl_data.extra_result.product_info,
+                    "ProductImageInfo"
+                )
+                
+            if crawl_data.extra_result.result_table:
+                structured_result["result_table"] = [
+                    field_config.filter_model_dump(item, "TrafficKeywordResult")
+                    for item in crawl_data.extra_result.result_table
+                ]
+                
+            logger.debug(f"Structured result for LLM: {json.dumps(structured_result, indent=4, ensure_ascii=False)}")
+            list_data.append(structured_result)
+            
+    return list_data
+
+class PromptFormatter:
+    """LLM提示词模板格式化器"""
+    def __init__(self, template: str, **kwargs):
+        self.template = template
+        self.kwargs = kwargs
+        self.partial_kwargs = {}
+        self.var_mappings = {}
+        self.function_mappings = {}
+
+    def partial_format(self, **kwargs) -> "PromptFormatter":
+        """部分格式化模板"""
+        self.partial_kwargs.update(kwargs)
+        return self
+
+    def map_variables(self, **mappings) -> "PromptFormatter":
+        """映射模板变量名"""
+        self.var_mappings.update(mappings)
+        return self
+
+    def map_functions(self, **functions) -> "PromptFormatter":
+        """映射模板处理函数"""
+        self.function_mappings.update(functions)
+        return self
+
+    def format(self, **kwargs) -> str:
+        """最终格式化提示词"""
+        # 合并所有参数
+        all_kwargs = {**self.partial_kwargs, **kwargs}
+        
+        # 应用变量名映射
+        mapped_kwargs = {}
+        for key, value in all_kwargs.items():
+            mapped_key = self.var_mappings.get(key, key)
+            mapped_kwargs[mapped_key] = value
+        
+        # 应用函数处理
+        for key, func in self.function_mappings.items():
+            if key in mapped_kwargs:
+                mapped_kwargs[key] = func(**mapped_kwargs)
+        
+        return self.template.format(**mapped_kwargs)
+
+class Formatter(ABC):
+    """格式化器抽象基类"""
+    def __init__(self, notes: Optional[dict] = None):
+        self.notes = notes or {}
+
+    @abstractmethod
+    def format(self, fields_desc: dict) -> str:
+        pass
+
+class JSONFormatter(Formatter):
+    """JSON格式处理器"""
+    def format(self, fields_desc: dict) -> str:
+        json_output = json.dumps(fields_desc, indent=2, ensure_ascii=False)
+        if self.notes.get('json'):
+            return f"```json\n{json_output}\n```\n{self.notes['json']}"
+        return f"```json\n{json_output}\n```"
+
+class HumanFormatter(Formatter):
+    """人类可读格式处理器"""
+    def format(self, fields_desc: dict) -> str:
+        def process_dict(d, indent=0):
+            lines = []
+            for key, value in d.items():
+                prefix = " " * indent
+                if isinstance(value, dict):
+                    lines.append(f"{prefix}{key}:")
+                    lines.append(process_dict(value, indent + 2))
+                else:
+                    lines.append(f"{prefix}{value}: {{{key}}}")
+            return "\n".join(lines)
+
+        result = process_dict(fields_desc)
+        if self.notes.get('human'):
+            result += f"\n{self.notes['human']}"
+        return result
+
+class FormatterFactory:
+    """格式化器工厂类"""
+    @staticmethod
+    def create_formatter(format_type: str, notes: Optional[dict] = None) -> Formatter:
+        if format_type == "json":
+            return JSONFormatter(notes)
+        elif format_type == "human":
+            return HumanFormatter(notes)
+        raise ValueError(f"Unsupported format type: {format_type}")
+
+
+class LLMService:
+    """LLM服务抽象类"""
+    @abstractmethod
+    async def analyze(self, prompt: str) -> Union[dict, str]:
+        pass
+
+class LiteLLMService(LLMService):
+    """LiteLLM实现"""
+    def __init__(self, model: str = "openai/deepseek-chat", max_retries: int = 3,
+                 retry_delay: float = 1.0, format_type: str = "json"):
+        self.model = model
+        self.max_retries = max_retries
+        self.retry_delay = retry_delay
+        self.format_type = format_type
+
+    async def analyze(self, prompt: str) -> Union[dict, str]:
+        llm_kwargs = {}
+        if self.format_type == "json":
+            # if 'deepseek-r' not in self.model:
+                # llm_kwargs["additional_kwargs"] = {"response_format": {"type": "json_object"}}
+            prompt += "\n请确保输出的是有效的JSON对象。"
+        logger.info(f"{self.model} 调用参数: {llm_kwargs}")
+        llm = LiteLLM(model=self.model, **llm_kwargs)
+
+        for attempt in range(self.max_retries):
+            try:
+                logger.info(f"尝试第 {attempt + 1} 次LLM调用...")
+                completion = await llm.acomplete(prompt)
+                return self._process_response(completion.text)
+            except (json.JSONDecodeError, ValueError) as e:
+                if self.format_type == "json":
+                    logger.warning(f"JSON解析失败(尝试 {attempt + 1}/{self.max_retries}): {str(e)}")
+                    if attempt < self.max_retries - 1:
+                        await asyncio.sleep(self.retry_delay)
+                    else:
+                        raise ValueError(f"无法获取有效的JSON响应: {str(e)}")
+            except Exception as e:
+                logger.exception(f"LLM调用失败: {str(e)}")
+                raise
+
+    def _process_response(self, response_text: str) -> Union[dict, str]:
+        if self.format_type == "json":
+            if "```json" in response_text:
+                json_str = response_text.split("```json")[1].split("```")[0].strip()
+            else:
+                json_str = response_text
+
+            result = json.loads(json_str)
+            if not isinstance(result, dict):
+                raise ValueError("响应不是有效的JSON对象")
+
+            logger.debug(f"LLM响应验证通过: {json.dumps(result, indent=2, ensure_ascii=False)}")
+            return result
+        return response_text
+
+class AnalysisService:
+    """分析领域服务"""
+    def __init__(self, llm_service: LLMService, db_manager: BaseMongoManager):
+        self.llm_service:LiteLLMService = llm_service
+        self.db_manager = db_manager
+
+    async def execute_analysis(self, product:Product, format_type: str = "json", dry_run=False, template: Optional[AIPromptConfig] = None) -> tuple[dict, str]:
+        # if template:
+        #     formatter = PromptFormatter(template.template)
+        #     if template.keywords:
+        #         formatter.partial_format(**template.keywords)
+        #     prompt = formatter.format(
+        #         product=product,
+        #         format_type=format_type
+        #     )
+        # else:
+        #     prompt = await self._prepare_prompt(product, format_type)
+            
+        # logger.info(f"prompt: {prompt}")
+        # analysis_result = await self.llm_service.analyze(prompt)
+        # return analysis_result, prompt
+        pass
+
+    async def execute_marketing_analysis(self, product: Product, format_type: str = "json", template: Optional[AIPromptConfig] = None) -> tuple[MarketingInfo, str]:
+        """
+        执行营销文案分析
+        
+        Args:
+            product: 产品对象
+            format_type: 输出格式
+            template: 自定义提示模板
+            
+        Returns:
+            (分析结果, 使用的提示词)
+        """
+        if template:
+            formatter = PromptFormatter(template.template)
+            if template.keywords:
+                formatter.partial_format(**template.keywords)
+            prompt = formatter.format(
+                product=product,
+                format_type=format_type
+            )
+        else:
+            prompt = f'''我是亚马逊运营,请为产品 {product.basic_info.name} 生成营销文案。
+            
+产品信息:
+{product.basic_info.model_dump_json(indent=2)}
+
+要求:
+- 突出产品卖点: {', '.join(product.basic_info.selling_point)}
+- 适合日本市场风格
+- 包含吸引人的标题和详细描述'''
+
+        logger.info(f"营销分析提示词: {prompt}")
+        analysis_result = await self.llm_service.analyze(prompt)
+        
+        try:
+            marketing_info = MarketingInfo(**analysis_result)
+            return marketing_info, prompt
+        except Exception as e:
+            logger.error(f"营销分析结果解析失败: {str(e)}")
+            raise ValueError("营销分析结果格式不正确") from e
+
+    async def _prepare_competitor_prompt(self, product: Product, template: AIPromptConfig) -> str:
+        """使用llmaindex模板方法格式化提示词
+        
+        Args:
+            product: 产品对象
+            template: 提示模板配置
+            
+        Returns:
+            格式化后的提示词字符串
+        """
+        output_fields = get_field_descriptions(
+            AICompetitorAnalyzeMainKeywordsResult,
+            exclude=['results.crawl_result', 'results.created_at']
+            )
+        formatter = FormatterFactory.create_formatter(self.llm_service.format_type)
+        output_format = formatter.format(output_fields)
+
+        competitor_data = get_competitor_prompt_data(product)
+        basic_template =f'''各个字段说明:
+{get_field_descriptions(CompetitorCrawlData, include=['asin'])}
+{get_field_descriptions(ProductImageInfo, include=['main_text'])}
+{get_field_descriptions(TrafficKeywordResult, include=['traffic_keyword', 'monthly_searches'])}
+
+竞品数据:
+{competitor_data}
+
+我的产品信息如下:
+{product.basic_info.model_dump_json(indent=2)}
+
+返回格式:
+{output_format}
+----
+'''
+        template.template = basic_template + template.template
+        return template.template
+
+    @staticmethod
+    def convert_monthly_searches(value):
+        if value is None:
+            return None
+        if isinstance(value, str):
+            if not value.strip():
+                return None
+            return int(value.replace(',', ''))
+        return value
+    async def run_competitor_analysis(self, product: Product,
+        ai_analyze_compare_model: AIAnalyzeCompare,
+        format_type: str = 'json',):
+        prompt = await self._prepare_competitor_prompt(product, ai_analyze_compare_model.competitor_template)
+        logger.info(f"_prepare_competitor_prompt {prompt}")
+        analyze_result = await self.llm_service.analyze(prompt)
+        if 'results' in analyze_result:
+            for result in analyze_result['results']:
+                if 'monthly_searches' in result:
+                    result['monthly_searches'] = self.convert_monthly_searches(result['monthly_searches'])
+        
+        if 'tail_keys' in analyze_result:
+            for tail_key in analyze_result['tail_keys']:
+                if 'monthly_searches' in tail_key:
+                    tail_key['monthly_searches'] = self.convert_monthly_searches(tail_key['monthly_searches'])
+        return analyze_result
+    async def _prepare_prompt(self, product: Product, format_type: str = "json", main_key_num: int = 3, tail_key_num:int = 12) -> str:
+        competitor_data = get_competitor_prompt_data(product)
+        # 从数据模型获取输出字段描述
+        output_fields = get_field_descriptions(
+            AICompetitorAnalyzeMainKeywordsResult,
+            exclude=['results.crawl_result', 'results.created_at']
+            )
+        formatter = FormatterFactory.create_formatter(format_type)
+        output_format = formatter.format(output_fields)
+
+        return f'''各个字段说明:
+{get_field_descriptions(CompetitorCrawlData, include=['asin'])}
+{get_field_descriptions(ProductImageInfo, include=['main_text'])}
+{get_field_descriptions(TrafficKeywordResult, include=['traffic_keyword', 'monthly_searches'])}
+
+竞品数据:
+{competitor_data}
+
+我的产品信息如下:
+{product.basic_info.model_dump_json(indent=2)}
+----
+我是日本站的亚马逊运营,我在给产品名称为 {product.basic_info.name} 选主要关键词和长尾关键词。
+
+请根据以上 {len(competitor_data)} 个竞品数据,按以下规则分析:
+- 选出搜索量在1万以上的相同关键词作为主要关键词{main_key_num}个。
+- 如果竞品的搜索量都不足1万,则从排名前十的关键词中筛选 {main_key_num} 个搜索量最大且相关性最强的词。
+- 结合日本市场特点分析
+- 根据我的产品基本信息,从竞品的主要信息和同类竞品的相似关键词中,筛选出最符合我产品的长尾关键词 tail_keys {tail_key_num} 个以上
+
+筛选长尾词的示例:
+- 假设我的产品是电线保护,那么竞品关键词中,“隐藏排线管” 就不符合长尾词
+- 假设我的产品是“防老化、防动物咬”用途,你就不能在竞品数据中选择不属于我这个使用场景的长尾关键词。
+
+输出格式:
+{output_format}'''
+
+async def main():
+    logger.info(f"base url {os.environ.get('OPENAI_API_BASE')}")
+    db_manager = BaseMongoManager()
+    llm_service = LiteLLMService(format_type='json')
+    analysis_service = AnalysisService(llm_service, db_manager)
+
+    try:
+        result = await analysis_service.execute_analysis("电线保护套")
+        logger.info(f"分析结果: {json.dumps(result, indent=2, ensure_ascii=False)}")
+    except ValueError as e:
+        logger.error(f"分析失败: {str(e)}")
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 172 - 0
ai/demo/mongo_template_agent.py

@@ -0,0 +1,172 @@
+from typing import Optional, Dict, Any, List
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.openai import OpenAI
+from pydantic import BaseModel, Field
+from src.manager.template_manager import TemplateManager, TemplateService
+from src.models.template_model import Template, TemplateType
+import inspect
+from utils.logu import get_logger
+
+logger = get_logger('mongo_agent')
+
+class TemplateCreateRequest(BaseModel):
+    """模板创建参数"""
+    name: str = Field(..., min_length=3, description="唯一模板标识符")
+    template_str: str = Field(..., description="Jinja2模板内容")
+    template_type: TemplateType = Field(TemplateType.AGGREGATION,
+                                      description="模板类型")
+    description: Optional[str] = Field(None,
+                                     description="模板功能描述")
+    collection_name: Optional[str] = Field(None,
+                                        description="关联的MongoDB集合")
+
+class TemplateUpdateRequest(TemplateCreateRequest):
+    """模板更新参数"""
+    if_exists: str = Field("update", description="存在时处理方式:update/ignore")
+
+class TemplateExecuteRequest(BaseModel):
+    """模板执行参数"""
+    template_name: str = Field(..., description="要执行的模板名称")
+    context: Dict[str, Any] = Field(default_factory=dict,
+                                  description="模板变量上下文")
+    collection_name: Optional[str] = Field(None,
+                                         description="覆盖默认集合")
+
+class MongoTemplateAgent:
+    """MongoDB模板管理智能体,集成LLM功能"""
+    
+    def __init__(self, template_manager: TemplateManager, llm: OpenAI):
+        self.manager = template_manager
+        self.llm = llm
+        self._register_core_tools()
+
+    def _register_core_tools(self):
+        """注册核心模板管理工具"""
+        self.tools = [
+            self._create_tool(
+                fn=self.create_template,
+                name="create_mongo_template",
+                description="创建新的MongoDB操作模板"
+            ),
+            self._create_tool(
+                fn=self.execute_template,
+                name="execute_mongo_template",
+                description="执行MongoDB聚合模板"
+            ),
+            self._create_tool(
+                fn=self.list_templates,
+                name="list_mongo_templates",
+                description="列出可用模板"
+            ),
+            self._create_tool(
+                fn=self.update_template,
+                name="update_mongo_template",
+                description="更新现有模板"
+            ),
+            self._create_tool(
+                fn=self.delete_template,
+                name="delete_mongo_template",
+                description="删除指定模板"
+            )
+        ]
+
+    def _create_tool(self, fn, name: str, description: str) -> FunctionTool:
+        """工具工厂方法"""
+        schema = inspect.signature(fn).parameters['request'].annotation
+        return FunctionTool.from_defaults(
+            fn=fn,
+            name=name,
+            description=description,
+            fn_schema=schema
+        )
+
+    async def create_template(self, request: TemplateCreateRequest) -> Template:
+        """创建模板(带智能参数验证)"""
+        try:
+            return await self.manager.create_or_update_template(
+                name=request.name,
+                template_str=request.template_str,
+                template_type=request.template_type,
+                description=request.description,
+                collection_name=request.collection_name,
+                if_exists="update"
+            )
+        except Exception as e:
+            logger.error(f"Template creation failed: {str(e)}")
+            raise ValueError(f"创建失败: {e}") from e
+
+    async def update_template(self, request: TemplateUpdateRequest) -> Template:
+        """更新模板"""
+        return await self.manager.create_or_update_template(
+            **request.dict()
+        )
+
+    async def delete_template(self, template_name: str) -> bool:
+        """删除模板"""
+        return await self.manager.delete_template(template_name)
+
+    async def execute_template(self, request: TemplateExecuteRequest) -> list:
+        """执行模板(带参数校验)"""
+        try:
+            result = await self.manager.execute_template(
+                name=request.template_name,
+                context=request.context,
+                collection_name=request.collection_name
+            )
+            logger.info(f"Template {request.template_name} 执行成功,返回{len(result)}条结果")
+            return result
+        except Exception as e:
+            logger.error(f"模板执行失败: {str(e)}")
+            raise RuntimeError(f"执行失败: {e}") from e
+
+    async def list_templates(self, type_filter: Optional[str] = None) -> List[Template]:
+        """获取模板列表"""
+        template_type = TemplateType(type_filter) if type_filter else None
+        return await self.manager.list_templates(template_type)
+
+    async def predict_and_execute(self, query: str) -> Any:
+        """执行LLM预测并调用工具"""
+        response = await self.llm.apredict_and_call(
+            tools=self.tools,
+            query=query,
+            allow_parallel_tool_calls=False,
+            verbose=True
+        )
+        return self._format_response(response)
+
+    def _format_response(self, raw_response) -> Dict:
+        """标准化响应格式"""
+        return {
+            "operation": raw_response.sources[0].tool_name,
+            "result": str(raw_response.sources[0].raw_output),
+            "success": not any(s.error for s in raw_response.sources)
+        }
+
+    @classmethod
+    async def build(cls, llm_model: str = "gpt-4o-mini") -> "MongoTemplateAgent":
+        """构建实例的工厂方法"""
+        manager = await TemplateService.get_instance()
+        llm = OpenAI(model=llm_model, temperature=0.1, max_retries=3)
+        return cls(template_manager=manager, llm=llm)
+
+class AdvancedTemplateAgent(MongoTemplateAgent):
+    """增强版模板管理智能体"""
+    
+    def _register_core_tools(self):
+        super()._register_core_tools()
+        self.tools.extend([
+            self._create_tool(
+                fn=self.analyze_template_usage,
+                name="analyze_template_metrics",
+                description="分析模板使用指标"
+            )
+        ])
+
+    async def analyze_template_usage(self, template_name: str) -> Dict:
+        """分析模板使用模式"""
+        # 实现指标分析逻辑
+        return {
+            "template": template_name,
+            "usage_count": 0,
+            "last_executed": None
+        }

+ 44 - 0
ai/demo/run_service.py

@@ -0,0 +1,44 @@
+from src.models.product_model import Product
+from src.models.config_model import UserConfig
+from src.models.ai_execution_record import (
+    CompetitorKeywordAnalysis, MarketingContentGeneration, LLMConfig,
+    AICompetitorAnalyzeMainKeywordsResult,
+    MarketingInfo,)
+from src.manager.core.db_mongo import BaseMongoManager
+from src.ai.ai_executor import TaskRunner
+import asyncio
+from utils.logu import get_logger
+from src.models.field_config import FieldConfig, get_field_descriptions
+from dotenv import load_dotenv
+load_dotenv()
+logger = get_logger('ai')
+
+async def example_usage():
+    """更新后的示例调用代码"""
+    db_manager = BaseMongoManager()
+    await db_manager.initialize()
+    
+    # 获取产品及用户配置
+    product = await Product.find_one(Product.basic_info.name == "电线保护套")
+    llm_config = LLMConfig(model_name="openai/deepseek-chat")
+    
+    # 竞品分析任务
+    competitor_model = CompetitorKeywordAnalysis(
+        product=product, 
+        executor_config=llm_config,
+        prompting=user.prompting.get("竞品和长尾词分析")
+    )
+    await task_runner.run_task(competitor_model)
+    logger.info(f"竞品分析结果: {competitor_model.result}")
+    
+    # 营销文案生成任务
+    marketing_model = MarketingContentGeneration(
+        product=product,
+        executor_config=llm_config,
+        prompting=user.prompting.get("营销文案"), 
+    )
+    await task_runner.run_task(marketing_model)
+    logger.info(f"营销文案结果: {marketing_model.result}")
+
+if __name__ == "__main__":
+    asyncio.run(example_usage())

+ 3 - 0
ai/dev.md

@@ -0,0 +1,3 @@
+# 让 llama_index 生成结构化数据模型
+https://docs.llamaindex.ai/en/stable/examples/output_parsing/llm_program/
+https://docs.llamaindex.ai/en/stable/examples/output_parsing/guidance_pydantic_program/

+ 128 - 0
ai/marketting_agent.py

@@ -0,0 +1,128 @@
+import asyncio
+import aiofiles
+import os
+import sys
+from typing import List, Dict, Any
+from typing import Any, Dict, List, Optional, Union
+from pydantic import BaseModel
+from typing import List
+from llama_index.core.program import LLMTextCompletionProgram
+from llama_index.core.output_parsers import PydanticOutputParser
+from llama_index.llms.openai import OpenAI
+from llama_index.llms.litellm import LiteLLM
+from llama_index.core.llms.llm import LLM
+from src.models.product_model import Product
+from src.manager.template_manager import TemplateManager, TemplateService, TemplateType
+from src.models.ai_execution_record import MarketingInfo, LLMConfig, SuperPromptMixin, AgentConfig, AgentContent, AICompetitorAnalyzeMainKeywords, AICompetitorAnalyzeMainKeywordsResult, MarketingContentGeneration
+from config.settings import MONGO_URL, MONGO_DB_NAME,LITELLM_API_BASE, LITELLM_API_KEY
+from utils.logu import get_logger
+logger = get_logger("ai")
+
+class BaseAgent:
+    def __init__(self, llm:LiteLLM, template_manager:TemplateManager):
+        self.llm = llm
+        self.template_manager = template_manager
+    
+    def get_mainkeys_tailkeys(self, template_str: str):
+        pass
+
+class MarketingAgent(BaseAgent):
+    async def get_mainkeys_tailkeys_prompt(self, product_name, prompt: str='', verbose=False):
+        base_prompt = "{{product_info}}\n{{competitor_info}}\n"
+        prompt_mainkyes = prompt or '''\
+你是日本站的亚马逊运营,请你根据产品信息为用户选出主要关键词和长尾关键词。
+
+
+请根据竞品数据,按以下规则分析:
+- 选出搜索量在1万以上的相同关键词作为主要关键词3个。
+- 如果竞品的搜索量都不足1万,则从排名前十的关键词中筛选3个搜索量最大且相关性最强的词。
+- 结合日本市场特点分析
+- 根据我的产品基本信息,从竞品的主要信息和同类竞品的相似关键词中,筛选出最符合我产品的长尾关键词 10个以上
+
+
+筛选长尾词的示例:
+- 假设我的产品是电线保护,那么竞品关键词中,“隐藏排线管” 就不符合长尾词
+- 假设我的产品是“防老化、防动物咬”用途,你就不能在竞品数据中选择不属于我这个使用场景的长尾关键词。
+
+生成的内容满足以下要求:
+- reason 、 suggestions 必须写成中文
+- monthly_searches 必须是 int ,按照从大到小排序,别的字段按照源数据填写即可。
+'''
+        variables = {'product_name': product_name}
+        product_info = await self.template_manager.execute_template("product_info", variables)
+        competitor_info = await self.template_manager.execute_template("competitor_for_llm", variables)
+        program = LLMTextCompletionProgram.from_defaults(
+            output_parser=PydanticOutputParser(output_cls=AICompetitorAnalyzeMainKeywordsResult),
+            llm=self.llm,
+            prompt_template_str=base_prompt + prompt_mainkyes,
+            verbose=verbose,
+        )
+        competitor = program(product_info=product_info, competitor_info=competitor_info)
+        logger.info(f"{competitor}")
+        return competitor
+    async def gen_mainkeys_tailkeys(self, product_name, prompt: str='', verbose=False, overwrite=False):
+        agent_model = await AgentContent.find_one(AgentContent.model_name == self.llm._get_model_name() and AgentContent.product_name == product_name)
+        if not overwrite and agent_model:
+            logger.info(f"agent_model exist")
+            return agent_model
+        competitor = await self.get_mainkeys_tailkeys_prompt(product_name, prompt, verbose)
+        product_model = await Product.find_one(Product.basic_info.name == product_name)
+        agent_model = AgentContent(
+            model_name=self.llm._get_model_name(), 
+            product=product_model,
+            product_name=product_name,
+            competitor=competitor,
+        )
+        await agent_model.save() 
+        return agent_model
+
+    async def get_marketing_prompt(self, product_name, prompt: str='', verbose=False, llm=None):
+        prompt_marketing = prompt or '''\
+你是日本站的亚马逊运营,请你根据产品信息、竞品相似主关键词、竞品长尾词,为用户生成营销文案和5个卖点文案,需要用日文。
+
+请根据竞品数据,按以下规则分析:
+- 选出搜索量在1万以上的相同关键词作为主要关键词3个。
+- 如果竞品的搜索量都不足1万,则从排名前十的关键词中筛选3个搜索量最大且相关性最强的词。
+- 结合日本市场特点分析
+- 根据我的产品基本信息,从竞品的主要信息和同类竞品的相似关键词中,筛选出最符合我产品的长尾关键词 10个以上
+
+
+筛选长尾词的示例:
+- 假设我的产品是电线保护,那么竞品关键词中,“隐藏排线管” 就不符合长尾词
+- 假设我的产品是“防老化、防动物咬”用途,你就不能在竞品数据中选择不属于我这个使用场景的长尾关键词。
+
+生成的内容满足以下要求:
+- reason 、 suggestions 必须写成中文
+- monthly_searches 必须是 int ,按照从大到小排序,别的字段按照源数据填写即可。
+'''
+        all_keywords = self.template_manager.execute_template('agent.mainkeys_tailkeys')
+        variables = {'product_name': product_name}
+        product_info = await self.template_manager.execute_template("product_info", variables)
+        program = LLMTextCompletionProgram.from_defaults(
+            output_parser=PydanticOutputParser(output_cls=MarketingInfo),
+            llm=llm or self.llm,
+            prompt_template_str="{{product_info}}\n{{all_keywords}}\n" + prompt_marketing,
+            verbose=verbose,
+        )
+        competitor = program(product_info=product_info, all_keywords=all_keywords)
+        logger.info(f"{competitor}")
+        return competitor
+
+    async def gen_marketing_content(self, product_name, prompt: str='', llm=None, verbose=False):
+        pass
+
+async def task():
+    m = TemplateManager(MONGO_URL, MONGO_DB_NAME)
+    await m.initialize()
+    model = 'groq/groq/llama-3.1-8b-instant'
+    # model = 'groq/groq/qwen-2.5-coder-32b'
+    model = 'groq/groq/qwen-2.5-32b'
+    llm = LiteLLM(model=model, api_key=LITELLM_API_KEY, api_base=LITELLM_API_BASE)
+    agent = MarketingAgent(llm=llm, template_manager=m)
+    agent_model = await agent.gen_mainkeys_tailkeys(product_name='电线保护套')
+    logger.info(f"{agent_model.competitor}")
+def main():
+    asyncio.run(task())
+
+if __name__ == "__main__":
+    main()

+ 2 - 0
config/settings.py

@@ -19,6 +19,8 @@ BROWSER_CONFIG_DIR = CONFIG_DIR / 'dp_conf'
 TEMP_PAGE_DIR = OUTPUT_DIR / 'page' / 'temp'
 TEMP_PAGE_DIR.mkdir(parents=True, exist_ok=True)
 
+LITELLM_API_KEY = os.environ.get('LITELLM_API_KEY')
+LITELLM_API_BASE=os.environ.get('LITELLM_API_BASE')
 
 class Config(BaseModel):
     storage: str = "local"

+ 1 - 1
docs/gpt/agent_product.md

@@ -107,7 +107,7 @@ product.marketing = ai_analyze_marketing(model, prompt)
 保持简洁:虽然 200 个字符是允许的最大字符数,但我们建议您使用 80 个或更少字符,因为手机屏幕会缩短较长的商品名称避免冗余:请勿在商品名称中包含冗余信息、不必要的同义词或过多的关键词优化词序:仅包含有助于买家快速识别和了解商品的信息,将词语排序以优先展示最重要的商品信息。如果适用,您可以考虑以下顺序:品牌名称-口味/款式-商品类型名称-关键属性(即商品的唯一销售主张)-颜色-尺寸/包装数量-型号。
 用户可自行附加额外的内容:我一般会加用途,卖点。其实“商品的唯一销售主张”就是卖点。
 ----
-例如,在某个竞品的产品信息中(product_info.main_text)“MEL Chemistry大径 肉厚 ペットコード ペット 犬 猫 キャット ドッグ 噛みつき 防止 感電 保護 家電 チャージ コード 配線 プロテクター カバー 螺旋 スパイラル チューブ ラップ 被覆 破れ 防止 破損防止 補強 収納 収束 結束 まとめる TPU 約93cm (ブラック 黒)B0B658JC22”
+例如,在某个竞品的产品信息中“MEL Chemistry大径 肉厚 ペットコード ペット 犬 猫 キャット ドッグ 噛みつき 防止 感電 保護 家電 チャージ コード 配線 プロテクター カバー 螺旋 スパイラル チューブ ラップ 被覆 破れ 防止 破損防止 補強 収納 収束 結束 まとめる TPU 約93cm (ブラック 黒)B0B658JC22”
 - 属性词是: 大径 肉厚 噛みつき 防止 破損防止 螺旋
 
 # 模型字段

+ 65 - 0
tests/demo/get_field_desc.py

@@ -0,0 +1,65 @@
+import json
+from src.models.field_config import FieldConfig,get_field_descriptions
+from src.models.ai_execution_record import (
+    Product, MarketingContentGeneration, AICompetitorAnalyzeMainKeywords,
+    AICompetitorAnalyzeMainKeywordsResult,
+    SearchAmazoneKeyResult,MarketingInfo,
+)
+
+import asyncio
+import aiofiles
+import os
+import sys
+{
+    "main_keys": [
+        {
+            "asin": "商品(竞品)编号",
+            "main_key": "主要关键词",
+            "monthly_searches": "月搜索量",
+            "reason": "选择该主关键词原因",
+        }
+    ],
+    "supplement": "补充说明,非必填。如果你有额外的信息或建议,可以在这里添加。",
+    "tail_keys": [
+        {
+            "tail_key": "长尾关键词",
+            "monthly_searches": "月搜索量",
+            "reason": "选择该长尾关键词原因"
+        }
+    ]
+}
+{
+    "id": "MongoDB document ObjectID",
+    "revision_id": "",
+    "product": "",
+    "prompting": {
+        "name": "名称",
+        "introduce": "介绍",
+        "template": "llamaindex 提示模板",
+        "keywords": "llamaindex 模板中的变量"
+    },
+    "input_data": "完整的输入数据",
+    "output_data": "原始的AI输出结果",
+    "executor_config": "",
+    "helpful_level": "",
+    "created_at": "",
+    "task_type": "",
+    "result": {
+        "title": "",
+        "st_search": "",
+        "selling_point": [
+            ""
+        ],
+        "product_introduction": ""
+    }
+}
+async def task():
+    res = get_field_descriptions(AICompetitorAnalyzeMainKeywordsResult)
+    print(json.dumps(res, indent=4,ensure_ascii=False))
+    res = get_field_descriptions(MarketingContentGeneration)
+    print(json.dumps(res, indent=4,ensure_ascii=False))
+def main():
+    asyncio.run(task())
+
+if __name__ == "__main__":
+    main()

+ 72 - 0
tests/demo/template_mode.py

@@ -0,0 +1,72 @@
+import argparse
+import asyncio
+import json
+import re
+from typing import List, Dict, Any
+from pydantic import ValidationError
+from config.settings import MONGO_URL, MONGO_DB_NAME
+from utils.logu import get_logger
+from src.manager.template_manager import TemplateManager
+logger = get_logger('cli')
+import asyncio
+import aiofiles
+import os
+import sys
+template_str = '''[
+        {
+            "$match": {"product_name": "{{ product_name }}"}
+        }, 
+    {
+        '$project': {
+            'combined': {
+                '$concatArrays': [
+                    {
+                        '$map': {
+                            'input': '$competitor.results', 
+                            'as': 'result', 
+                            'in': {
+                                'asin': '$$result.asin', 
+                                'main_key': '$$result.main_key', 
+                                'monthly_searches': '$$result.monthly_searches', 
+                                'type': 'result'
+                            }
+                        }
+                    }, {
+                        '$map': {
+                            'input': '$competitor.tail_keys', 
+                            'as': 'tail', 
+                            'in': {
+                                'tail_key': '$$tail.tail_key', 
+                                'monthly_searches': '$$tail.monthly_searches', 
+                                'type': 'tail_key'
+                            }
+                        }
+                    }
+                ]
+            }
+        }
+    }, {
+        '$unwind': '$combined'
+    }, {
+        '$replaceRoot': {
+            'newRoot': '$combined'
+        }
+    }
+]'''
+async def task():
+    collections = ['Product', 'templates', ]
+    m = TemplateManager()
+    await m.initialize()
+    name = 'agent.mainkeys_tailkeys'
+    await m.create_or_update_template(
+        name=name,
+        collection_name='agent.product',
+        template_str=template_str, 
+    description='获取Agent有关的关键词信息,避免 reason 干扰下一步分析', )
+    res = await m.execute_template(name, context={'product_name': '电线保护套'})
+    print(res)
+def main():
+    asyncio.run(task())
+
+if __name__ == "__main__":
+    main()

+ 2 - 1
tests/mytest/llamaindex_t/t_llm_to_pydantic.py

@@ -1,2 +1,3 @@
 # https://docs.llamaindex.ai/en/stable/examples/metadata_extraction/PydanticExtractor/
-# https://docs.llamaindex.ai/en/stable/examples/output_parsing/guidance_pydantic_program/
+# https://docs.llamaindex.ai/en/stable/examples/output_parsing/guidance_pydantic_program/
+

+ 104 - 0
tests/mytest/models/t_pymongo_test.py

@@ -0,0 +1,104 @@
+import asyncio
+from collections.abc import Generator
+import json
+from typing import Any
+
+from pymongo import MongoClient
+from bson.objectid import ObjectId
+from datetime import datetime
+
+class FindTool():
+    def _invoke(self, tool_parameters: dict[str, Any]):
+        client = MongoClient(self.runtime.credentials["uri"],
+                    username=self.runtime.credentials["username"],
+                    password=self.runtime.credentials["password"])
+        db = client[tool_parameters["database_name"]]
+        collection = db[tool_parameters["collection_name"]]
+        print((tool_parameters["query"]))
+        items = collection.find(eval(tool_parameters["query"]))
+        items = list(items)
+        df_list = []
+        for item in items:
+            df_dict = {}
+            for key,value in item.items():
+                if isinstance(value, ObjectId):
+                    df_dict[key] = value.binary
+                elif isinstance(value, datetime):
+                    df_dict[key] = str(value)
+                elif isinstance(value, (str,int,bool)):
+                    df_dict[key] = value
+                elif isinstance(value, list):
+                    dl = []
+                    for v in value:
+                        if isinstance(v, ObjectId):
+                            dl.append(v.binary)
+                        else:
+                            dl.append(v)
+                    df_dict[key] = dl
+            df_list.append(df_dict)
+        mongo_json = {"mongodb": df_list}
+        yield self.create_text_message(str(mongo_json))
+
+async def main():
+    url = 'mongodb://sv-v2.lan:27017/amazone?authSource=amazone'
+    user_name = 'gpt'
+    password = 'gpt123'
+    client = MongoClient(url, username=user_name, password=password)
+    db = client['amazone']
+    collection = db['agent.product']
+    query =  '[{"$match": {"basic_info.name": "电线保护套"}}, {"$project": {"competitor_crawl_data": 1}}, {"$addFields": {"competitors": {"$objectToArray": "$competitor_crawl_data"}}}, {"$unwind": "$competitors"}, {"$project": {"_id": 0, "asin": "$competitors.k", "product_info": {"main_text": "$competitors.v.extra_result.product_info.main_text"}, "result_table": {"$map": {"input": "$competitors.v.extra_result.result_table", "as": "item", "in": {"traffic_keyword": "$$item.traffic_keyword", "monthly_searches": "$$item.monthly_searches"}}}}}]'
+    pipeline = [
+    {
+        '$project': {
+            'combined': {
+                '$concatArrays': [
+                    {
+                        '$map': {
+                            'input': '$competitor.results', 
+                            'as': 'result', 
+                            'in': {
+                                'asin': '$$result.asin', 
+                                'main_key': '$$result.main_key', 
+                                'monthly_searches': '$$result.monthly_searches', 
+                                'type': 'result'
+                            }
+                        }
+                    }, {
+                        '$map': {
+                            'input': '$competitor.tail_keys', 
+                            'as': 'tail', 
+                            'in': {
+                                'tail_key': '$$tail.tail_key', 
+                                'monthly_searches': '$$tail.monthly_searches', 
+                                'type': 'tail_key'
+                            }
+                        }
+                    }
+                ]
+            }
+        }
+    }, {
+        '$unwind': '$combined'
+    }, {
+        '$replaceRoot': {
+            'newRoot': '$combined'
+        }
+    }
+]
+    # query = '[]'
+    # query = json.loads(query)
+    # res = db.aggregate(json.loads(query))
+    res = collection.aggregate(pipeline).to_list()
+    print(f"{res}")
+    return
+    tool_parameters = {
+       "query": query, 
+    }
+    find_qurey = '{"basic_info.name": "电线保护套"}'
+    items = collection.find(eval(find_qurey))
+    items = list(items)
+    print(items)
+    df_list = []
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 5 - 3
utils/file.py

@@ -18,7 +18,7 @@ s3 = boto3.client(
     endpoint_url=CFG.s3_endpoint,
     config=Config(signature_version='s3v4', retries={'mode': 'standard'}),
 )
-resource = boto3.resource('s3')
+# resource = boto3.resource('s3')
 
 def s3_uri_to_http_url(s3_uri):
     """
@@ -86,6 +86,8 @@ def get_s3_uri_info(s3_uri:str):
 def upload_to_s3(content, filename:str, **extra_args):
     bucket_name, object_name, upload_args = get_s3_uri_info(filename)
     upload_args.update(extra_args)
+    if isinstance(content, dict):  # 处理字典类型
+        content = json.dumps(content, ensure_ascii=False)
     if isinstance(content, str):
         content = content.encode('utf-8')
     print(bucket_name, object_name)
@@ -125,12 +127,12 @@ def save_to_file(content, filename:Path, **extra_args):
             file.write(content)
     return filename
 
-def read_file(file_uri:str):
+def read_file(file_uri:str, mode='r'):
     # if str(file_uri).startswith('s3://'):
     #     bucket_name, object_name, upload_args = get_s3_uri_info(file_uri)
     #     response = s3.get_object(Bucket=bucket_name, Key=object_name)
     #     return response['Body'].read()
-    with open(file_uri, 'r', transport_params={'client': s3}) as f:
+    with open(file_uri, mode or 'r', transport_params={'client': s3}) as f:
         # 文件存在,继续操作
         return f.read()