|
|
@@ -1,44 +1,138 @@
|
|
|
+import json
|
|
|
+from typing import Optional
|
|
|
from llama_index.core import PromptTemplate
|
|
|
-
|
|
|
import asyncio
|
|
|
import aiofiles
|
|
|
import os
|
|
|
import sys
|
|
|
-from src.models.product_model import Product,CompetitorCrawlData,AICompetitorAnalyzeMainKeywords, SearchAmazoneKeyResult, ProductBaseInfo, Variant
|
|
|
+from pydantic import BaseModel
|
|
|
+from src.models.product_model import (
|
|
|
+ Product, CompetitorCrawlData, AICompetitorAnalyzeMainKeywords,
|
|
|
+ TrafficKeywordResult, ProductImageInfo,
|
|
|
+ SearchAmazoneKeyResult, ProductBaseInfo, Variant
|
|
|
+)
|
|
|
from src.manager.core.db_mongo import BaseMongoManager
|
|
|
from utils.logu import get_logger
|
|
|
+from src.models.field_config import FieldConfig
|
|
|
+
|
|
|
logger = get_logger('ai')
|
|
|
-async def task():
|
|
|
- db_mongo = BaseMongoManager()
|
|
|
- await db_mongo.initialize()
|
|
|
- product = await Product.find_one(Product.basic_info.name == "电线保护套")
|
|
|
+
|
|
|
+# 默认包含的字段配置
|
|
|
+DEFAULT_FIELD_CONFIG = FieldConfig(
|
|
|
+ include_fields={
|
|
|
+ "ProductImageInfo": {
|
|
|
+ "main_text" # 产品图片主要文字
|
|
|
+ },
|
|
|
+ "TrafficKeywordResult": {
|
|
|
+ "traffic_keyword", # 流量关键词名称
|
|
|
+ "monthly_searches" # 关键词月搜索量
|
|
|
+ },
|
|
|
+ "ProductBaseInfo": {
|
|
|
+ "name", "content", "material", "color", "size",
|
|
|
+ "packaging_size", "weight", "main_usage", "selling_point"
|
|
|
+ },
|
|
|
+ "CompetitorCrawlData": {
|
|
|
+ "asin",
|
|
|
+ }
|
|
|
+ }
|
|
|
+)
|
|
|
+
|
|
|
+def get_competitor_prompt_data(
|
|
|
+ product: Product,
|
|
|
+ field_config: FieldConfig = DEFAULT_FIELD_CONFIG
|
|
|
+) -> list:
|
|
|
+ """
|
|
|
+ 获取竞品提示数据
|
|
|
+
|
|
|
+ Args:
|
|
|
+ product: 产品对象
|
|
|
+ field_config: 字段配置
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 结构化竞品数据列表
|
|
|
+ """
|
|
|
competitor_crawl_data = product.competitor_crawl_data
|
|
|
- for asin,crawl_data in competitor_crawl_data.items():
|
|
|
- logger.info(f"{asin}")
|
|
|
- logger.info(f"{crawl_data.extra_result.product_info}")
|
|
|
- logger.info(f"{crawl_data.extra_result.result_table}")
|
|
|
- break
|
|
|
- return
|
|
|
+ list_data = []
|
|
|
+
|
|
|
+ for asin, crawl_data in competitor_crawl_data.items():
|
|
|
+ if crawl_data.extra_result:
|
|
|
+ structured_result = {"asin": asin}
|
|
|
+
|
|
|
+ if crawl_data.extra_result.product_info:
|
|
|
+ structured_result["product_info"] = field_config.filter_model_dump(
|
|
|
+ crawl_data.extra_result.product_info,
|
|
|
+ "ProductImageInfo"
|
|
|
+ )
|
|
|
+
|
|
|
+ if crawl_data.extra_result.result_table:
|
|
|
+ structured_result["result_table"] = [
|
|
|
+ field_config.filter_model_dump(item, "TrafficKeywordResult")
|
|
|
+ for item in crawl_data.extra_result.result_table
|
|
|
+ ]
|
|
|
+
|
|
|
+ logger.debug(f"Structured result for LLM: {json.dumps(structured_result, indent=4, ensure_ascii=False)}")
|
|
|
+ list_data.append(structured_result)
|
|
|
+
|
|
|
+ return list_data
|
|
|
+
|
|
|
+def get_field_descriptions(
|
|
|
+ model_class: BaseModel,
|
|
|
+ field_config: FieldConfig = DEFAULT_FIELD_CONFIG,
|
|
|
+ model_name: Optional[str] = None
|
|
|
+) -> dict:
|
|
|
+ """
|
|
|
+ 获取模型字段描述信息
|
|
|
+
|
|
|
+ Args:
|
|
|
+ model_class: Pydantic模型类
|
|
|
+ field_config: 字段配置
|
|
|
+ model_name: 模型名称(用于查找配置)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 字段名到描述的字典
|
|
|
+ """
|
|
|
+ return field_config.get_model_fields(model_class, model_name)
|
|
|
|
|
|
async def test_product_mongo():
|
|
|
db_mongo = BaseMongoManager()
|
|
|
await db_mongo.initialize()
|
|
|
product = await Product.find_one(Product.basic_info.name == "电线保护套")
|
|
|
- for crawl_data in product.competitor_crawl_data.values():
|
|
|
- logger.info(f"{crawl_data.extra_result}")
|
|
|
- break
|
|
|
+ product_name = product.basic_info.name
|
|
|
+ # 使用默认配置
|
|
|
+ competitor_data = get_competitor_prompt_data(product)
|
|
|
+ competitor_desc = get_field_descriptions(CompetitorCrawlData)
|
|
|
+ product_info_desc = get_field_descriptions(ProductImageInfo)
|
|
|
+ keyword_result_desc = get_field_descriptions(TrafficKeywordResult)
|
|
|
+
|
|
|
+ output_format = [{"asin": "", "main_key":"", "monthly_searches":"", "reason":""}]
|
|
|
+ # logger.info(f"competitor_data {competitor_data}")
|
|
|
+ logger.info(f"competitor_desc {competitor_desc}")
|
|
|
+ logger.info(f"product_info_desc {product_info_desc}")
|
|
|
+ logger.info(f"keyword_result_desc {keyword_result_desc}")
|
|
|
+
|
|
|
analyz_main_keyword_template_str = '''
|
|
|
- 我是亚马逊运营,我在给产品名称为数据线保护套选主要关键词,附件中是我从同类竞品的关键词搜索量数据。
|
|
|
- 例如 B0B658JC22 是第一个商品的竞品数据,它所在的列是它的关键词,第二列是该竞品搜索关键词月搜索量。
|
|
|
- 末尾包含了该产品的所有信息。
|
|
|
- 往右是第二个商B0CQ1SHD8V 也是一样。帮我选出他们两个的相同关键词并且搜索量在1万以上来作为我产品的主要关键词3个。
|
|
|
- 如果竞品的所有关键词搜索量都没有达到1万以上的话,就刷选,就从排名前十的关键词里筛选三个搜索量最大相关性最强的词最为主关键词。
|
|
|
- '''
|
|
|
- text_qa_template = PromptTemplate(analyz_main_keyword_template_str)
|
|
|
-
|
|
|
+各个字段说明:
|
|
|
+{desc}
|
|
|
|
|
|
+竞品数据:
|
|
|
+{competitor_data}
|
|
|
+----
|
|
|
+我是亚马逊运营,我在给产品名称为 {product_name} 选主要关键词,以上数据是我从同类竞品的关键词搜索量数据,总共有 {competitor_count} 个竞品数据。
|
|
|
+请帮我分析这些竞品数据,选出搜索量在1万以上的相同关键词作为主要关键词3个。
|
|
|
+如果竞品的搜索量都不足1万,则从排名前十的关键词中筛选三个搜索量最大且相关性最强的词。
|
|
|
+输出格式:
|
|
|
+{output_format}
|
|
|
+'''
|
|
|
+ text_qa_template = PromptTemplate(analyz_main_keyword_template_str)
|
|
|
+ logger.info(f"{text_qa_template.format(
|
|
|
+ desc=(competitor_desc, product_info_desc, keyword_result_desc),
|
|
|
+ product_name=product_name,
|
|
|
+ competitor_data=competitor_data,
|
|
|
+ competitor_count=len(competitor_data),
|
|
|
+ output_format=output_format,
|
|
|
+)}")
|
|
|
def main():
|
|
|
- asyncio.run(task())
|
|
|
+ asyncio.run(test_product_mongo())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|