|
|
@@ -0,0 +1,254 @@
|
|
|
+import base64
|
|
|
+from openpyxl.worksheet.worksheet import Worksheet
|
|
|
+from openpyxl.styles import Font, PatternFill, Alignment
|
|
|
+from openpyxl.utils import get_column_letter
|
|
|
+from pathlib import Path
|
|
|
+from typing import Dict, Any,List
|
|
|
+import json
|
|
|
+import pandas as pd
|
|
|
+from openpyxl.drawing.image import Image
|
|
|
+from openpyxl.formatting.rule import CellIsRule
|
|
|
+from io import BytesIO
|
|
|
+from utils.file import read_file
|
|
|
+from utils.logu import get_logger
|
|
|
+from openpyxl import load_workbook,Workbook
|
|
|
+from .base_writer import ExcelWriterBase
|
|
|
+
|
|
|
+logger = get_logger('excel')
|
|
|
+
|
|
|
+class ProductDataProcessor:
|
|
|
+ """JSON数据处理中心"""
|
|
|
+ def __init__(self, json_data: Dict, asin: str):
|
|
|
+ self.json_data = json_data
|
|
|
+ self.asin = asin
|
|
|
+ self._validate_data()
|
|
|
+
|
|
|
+ def _validate_data(self):
|
|
|
+ """数据校验"""
|
|
|
+ if 'result_table' not in self.json_data:
|
|
|
+ raise ValueError("Missing required 'result_table' in JSON data")
|
|
|
+
|
|
|
+ def get_sorted_dataframe(self) -> pd.DataFrame:
|
|
|
+ """获取排序后的DataFrame"""
|
|
|
+ df = pd.DataFrame(self.json_data['result_table'])
|
|
|
+
|
|
|
+ # 数据清洗和类型转换
|
|
|
+ df['monthly_searches'] = df['monthly_searches'].apply(
|
|
|
+ lambda x: int(str(x).replace(',', '')) if x else 0
|
|
|
+ )
|
|
|
+
|
|
|
+ # 过滤无效数据并排序
|
|
|
+ df = df[df['traffic_keyword'].notna()].sort_values(
|
|
|
+ by='monthly_searches',
|
|
|
+ ascending=False
|
|
|
+ )
|
|
|
+ return df.reset_index(drop=True)
|
|
|
+
|
|
|
+ @property
|
|
|
+ def product_info(self) -> Dict:
|
|
|
+ """获取产品信息"""
|
|
|
+ return self.json_data.get('product_info', {})
|
|
|
+
|
|
|
+ @property
|
|
|
+ def unique_words(self) -> List[str]:
|
|
|
+ """获取唯一词列表"""
|
|
|
+ return [
|
|
|
+ str(word['word']).strip()
|
|
|
+ for word in self.json_data.get('unique_words', [])
|
|
|
+ if 'word' in word
|
|
|
+ ]
|
|
|
+
|
|
|
+class CompetitiveAnalysisWriter(ExcelWriterBase):
|
|
|
+ """竞品分析工作表写入器"""
|
|
|
+ COLUMN_SPACING = 3
|
|
|
+ HEADER_FILL = PatternFill(start_color='4F81BD', fill_type='solid')
|
|
|
+ HEADER_FONT = Font(bold=True, color='FFFFFF')
|
|
|
+ RED_FILL = PatternFill(start_color='FF0000',end_color="FF0000", fill_type='solid') # 修正为RGB格式
|
|
|
+
|
|
|
+ def __init__(self, work_book:Workbook, sheet_index: int=0, sheet_name: str="竞品关键词调研"):
|
|
|
+ super().__init__(work_book, sheet_index, sheet_name)
|
|
|
+ self.current_col = 1
|
|
|
+ self.product_cols = []
|
|
|
+ self.max_data_rows = 0
|
|
|
+
|
|
|
+ def _init_worksheet(self):
|
|
|
+ if self.sheet_name in self.wb.sheetnames:
|
|
|
+ self.ws = self.wb[self.sheet_name]
|
|
|
+ current_index = self.wb.index(self.ws)
|
|
|
+ offset = self.sheet_index - current_index
|
|
|
+ # 移动工作表
|
|
|
+ self.wb.move_sheet(self.ws, offset=offset)
|
|
|
+ else:
|
|
|
+ self.ws = self.wb.create_sheet(self.sheet_name, index=self.sheet_index)
|
|
|
+ logger.info(f"新建工作表: {self.sheet_name}")
|
|
|
+
|
|
|
+ def add_data(self, data: List[Dict[str, Any]]):
|
|
|
+ for product_data in data:
|
|
|
+ logger.info(f"{product_data['asin']}, 处理中...")
|
|
|
+ self.add_product(product_data['extra_result_data'], product_data['asin'])
|
|
|
+
|
|
|
+ self.apply_formatting()
|
|
|
+
|
|
|
+ def add_product(self, data: dict, asin: str):
|
|
|
+ """添加产品数据"""
|
|
|
+ try:
|
|
|
+ # 加载并处理数据
|
|
|
+ processor = ProductDataProcessor(data, asin)
|
|
|
+
|
|
|
+ # 记录产品起始列
|
|
|
+ self.product_cols.append(self.current_col)
|
|
|
+
|
|
|
+ # 写入主数据表
|
|
|
+ self._write_main_table(processor, asin)
|
|
|
+
|
|
|
+ # 写入附加信息
|
|
|
+ self._write_additional_info(processor)
|
|
|
+
|
|
|
+ # 插入产品图片
|
|
|
+ self._insert_product_image(processor.product_info)
|
|
|
+
|
|
|
+ # 移动到下一组列
|
|
|
+ self.current_col += self.COLUMN_SPACING
|
|
|
+
|
|
|
+ except (json.JSONDecodeError, ValueError) as e:
|
|
|
+ logger.error(f'Error processing {data}: {e}')
|
|
|
+
|
|
|
+
|
|
|
+ def _write_main_table(self, processor: ProductDataProcessor, asin: str):
|
|
|
+ """写入主表格数据"""
|
|
|
+ df = processor.get_sorted_dataframe()
|
|
|
+
|
|
|
+ # 写入表头
|
|
|
+ # 标题行下移到第3行(图片占1-2行)
|
|
|
+ # 标题行调整到第2行
|
|
|
+ asin_cell = self.ws.cell(2, self.current_col, asin)
|
|
|
+ asin_cell.font = Font(bold=True, color='0000FF', underline='single') # 添加蓝色下划线
|
|
|
+ asin_cell.fill = self.HEADER_FILL
|
|
|
+ asin_cell.alignment = Alignment(horizontal='center', vertical='center')
|
|
|
+
|
|
|
+ search_volume_cell = self.ws.cell(2, self.current_col + 1, "搜索量")
|
|
|
+ search_volume_cell.font = self.HEADER_FONT
|
|
|
+ search_volume_cell.fill = self.HEADER_FILL
|
|
|
+ search_volume_cell.alignment = Alignment(horizontal='center', vertical='center')
|
|
|
+
|
|
|
+ # 使用pandas写入数据
|
|
|
+ # 数据从第3行开始(标题行下方直接开始数据)
|
|
|
+ for idx, row in df.iterrows():
|
|
|
+ data_row = idx + 3
|
|
|
+
|
|
|
+ # 关键词(带超链接)
|
|
|
+ kw_cell = self.ws.cell(data_row, self.current_col, row['traffic_keyword'])
|
|
|
+ if pd.notna(row.get('amazon_search_link')):
|
|
|
+ kw_cell.hyperlink = row['amazon_search_link']
|
|
|
+ kw_cell.font = Font(color='0000FF', underline='single') # 添加蓝色下划线样式
|
|
|
+
|
|
|
+ # 搜索量
|
|
|
+ search_cell = self.ws.cell(data_row, self.current_col + 1, int(row['monthly_searches']))
|
|
|
+ search_cell.number_format = 'General'
|
|
|
+ search_cell.value = int(search_cell.value) # 确保存储为整数类型
|
|
|
+
|
|
|
+ # 更新最大行数
|
|
|
+ self.max_data_rows = max(self.max_data_rows, len(df) + 2) # 修正最大行号计算
|
|
|
+
|
|
|
+ # 设置初始列宽
|
|
|
+ self.ws.column_dimensions[get_column_letter(self.current_col)].width = 35
|
|
|
+ self.ws.column_dimensions[get_column_letter(self.current_col + 1)].width = 15
|
|
|
+
|
|
|
+ def _write_additional_info(self, processor: ProductDataProcessor):
|
|
|
+ """写入附加信息"""
|
|
|
+ start_row = self.max_data_rows + 3 # 间隔3行
|
|
|
+
|
|
|
+ # 产品信息
|
|
|
+ self.ws.cell(start_row, self.current_col, "产品信息").font = Font(bold=True)
|
|
|
+ # 从product_info提取实际存在的字段
|
|
|
+ info_text = processor.product_info.get('main_text', '')
|
|
|
+ if processor.product_info.get('goto_amazon'):
|
|
|
+ info_text += f"\n产品链接: {processor.product_info['goto_amazon']}"
|
|
|
+ info_cell = self.ws.cell(start_row+1, self.current_col, info_text)
|
|
|
+ info_cell.alignment = Alignment(wrap_text=True, vertical='top')
|
|
|
+ self.ws.column_dimensions[get_column_letter(self.current_col)].width = 35
|
|
|
+
|
|
|
+ # 唯一词
|
|
|
+ self.ws.cell(start_row+4, self.current_col, "唯一词").font = Font(bold=True)
|
|
|
+ for idx, word in enumerate(processor.unique_words, start=1):
|
|
|
+ self.ws.cell(start_row+4+idx, self.current_col, word)
|
|
|
+
|
|
|
+ def _insert_product_image(self, product_info: Dict):
|
|
|
+ """插入产品图片"""
|
|
|
+ img_base64 = product_info.get('imgbase64')
|
|
|
+ if not img_base64:
|
|
|
+ return
|
|
|
+
|
|
|
+ try:
|
|
|
+ img_data = base64.b64decode(img_base64)
|
|
|
+ img = Image(BytesIO(img_data))
|
|
|
+
|
|
|
+ # 图片位置:附加信息上方
|
|
|
+ # 图片插入到第1行(标题之前)
|
|
|
+ img_row = 1
|
|
|
+ img.anchor = f'{get_column_letter(self.current_col)}{img_row}'
|
|
|
+ self.ws.add_image(img)
|
|
|
+
|
|
|
+ # 调整行高并预留空间
|
|
|
+ self.ws.row_dimensions[img_row].height = 150
|
|
|
+ # 更新最大数据行数(数据从第5行开始)
|
|
|
+ self.max_data_rows = max(self.max_data_rows, 5)
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f'图片插入失败: {e}')
|
|
|
+
|
|
|
+ def apply_formatting(self):
|
|
|
+ """应用最终格式"""
|
|
|
+ self._apply_conditional_formatting()
|
|
|
+ # self._adjust_column_widths()
|
|
|
+ self._set_global_alignment()
|
|
|
+
|
|
|
+ def _apply_conditional_formatting(self):
|
|
|
+ """应用条件格式"""
|
|
|
+ # 修正颜色定义(使用RGB格式)
|
|
|
+
|
|
|
+ # 创建条件格式规则(移除字体设置)
|
|
|
+ red_rule = CellIsRule(
|
|
|
+ operator='greaterThan',
|
|
|
+ formula=['10000'],
|
|
|
+ stopIfTrue=True,
|
|
|
+ fill=self.RED_FILL
|
|
|
+ )
|
|
|
+
|
|
|
+ # 计算目标列字母(B=2, E=5, H=8...)
|
|
|
+ target_columns = []
|
|
|
+ # 使用记录的product_cols计算目标列
|
|
|
+ for start_col in self.product_cols:
|
|
|
+ search_col = start_col + 1 # 搜索量列是起始列+1
|
|
|
+ target_columns.append(get_column_letter(search_col))
|
|
|
+
|
|
|
+ # 应用条件格式到所有目标列
|
|
|
+ for col_letter in target_columns:
|
|
|
+ cell_range = f"{col_letter}3:{col_letter}{self.max_data_rows}"
|
|
|
+ self.ws.conditional_formatting.add(cell_range, red_rule)
|
|
|
+
|
|
|
+ def _adjust_column_widths(self):
|
|
|
+ """自动调整列宽"""
|
|
|
+ for col in range(1, self.current_col):
|
|
|
+ max_length = 0
|
|
|
+ col_letter = get_column_letter(col)
|
|
|
+
|
|
|
+ for cell in self.ws[col_letter]:
|
|
|
+ try:
|
|
|
+ value_length = len(str(cell.value))
|
|
|
+ if value_length > max_length:
|
|
|
+ max_length = value_length
|
|
|
+ except:
|
|
|
+ pass
|
|
|
+
|
|
|
+ adjusted_width = (max_length + 2) * 1.2
|
|
|
+ self.ws.column_dimensions[col_letter].width = adjusted_width
|
|
|
+
|
|
|
+ def _set_global_alignment(self):
|
|
|
+ """设置全局对齐"""
|
|
|
+ for row in self.ws.iter_rows():
|
|
|
+ for cell in row:
|
|
|
+ cell.alignment = Alignment(
|
|
|
+ horizontal='left' if cell.column % self.COLUMN_SPACING == 1 else 'center',
|
|
|
+ vertical='center',
|
|
|
+ wrap_text=True
|
|
|
+ )
|