|
|
@@ -0,0 +1,174 @@
|
|
|
+from dataclasses import dataclass
|
|
|
+import json
|
|
|
+from openpyxl import Workbook
|
|
|
+from openpyxl.drawing.image import Image
|
|
|
+from openpyxl.formatting.rule import CellIsRule
|
|
|
+from openpyxl.styles import PatternFill, Font, Alignment
|
|
|
+from openpyxl.utils import get_column_letter
|
|
|
+from io import BytesIO
|
|
|
+import base64
|
|
|
+from typing import Dict, List, Any
|
|
|
+from pathlib import Path
|
|
|
+from utils.file import read_file
|
|
|
+from utils.logu import logger
|
|
|
+
|
|
|
+RED_FILL = PatternFill(start_color='FFFF0000', end_color='FFFF0000', fill_type='solid')
|
|
|
+HEADER_FONT = Font(bold=True, color='FFFFFF')
|
|
|
+HEADER_FILL = PatternFill(start_color='4F81BD', patternType='solid')
|
|
|
+
|
|
|
+@dataclass
|
|
|
+class DataProcessor:
|
|
|
+ """处理单个JSON文件的数据转换"""
|
|
|
+ json_data: Dict[str, Any]
|
|
|
+ asin: str
|
|
|
+
|
|
|
+ def process_table_data(self) -> List[Dict[str, Any]]:
|
|
|
+ """处理表格主体数据"""
|
|
|
+ return [
|
|
|
+ {
|
|
|
+ 'traffic_keyword': item.get('traffic_keyword', ''),
|
|
|
+ 'amazon_search_link': item.get('amazon_search_link', ''),
|
|
|
+ 'monthly_searches': item.get('monthly_searches', '0').replace(',', '')
|
|
|
+ }
|
|
|
+ for item in self.json_data.get('result_table', [])
|
|
|
+ if item.get('traffic_keyword')
|
|
|
+ ]
|
|
|
+
|
|
|
+ @property
|
|
|
+ def product_info(self) -> Dict[str, str]:
|
|
|
+ """提取产品基础信息"""
|
|
|
+ return self.json_data.get('product_info', {})
|
|
|
+
|
|
|
+ @property
|
|
|
+ def unique_words(self) -> List[str]:
|
|
|
+ """提取唯一词列表"""
|
|
|
+ return [word.get('word', '') for word in self.json_data.get('unique_words', [])]
|
|
|
+
|
|
|
+class ExcelWriter:
|
|
|
+ """Excel文件写入器"""
|
|
|
+
|
|
|
+ def __init__(self, output_path: str):
|
|
|
+ self.wb = Workbook()
|
|
|
+ self.ws = self.wb.active
|
|
|
+ self.output_path = Path(output_path)
|
|
|
+ self.current_col = 1 # 当前写入列位置
|
|
|
+
|
|
|
+ def add_json_data(self, json_path: str, asin: str):
|
|
|
+ """添加单个JSON文件数据"""
|
|
|
+ str_data = read_file(json_path)
|
|
|
+ data = json.loads(str_data)
|
|
|
+ processor = DataProcessor(data, asin)
|
|
|
+
|
|
|
+ # 写入表头
|
|
|
+ self._write_header(processor)
|
|
|
+
|
|
|
+ # 写入表格数据
|
|
|
+ self._write_table_data(processor)
|
|
|
+
|
|
|
+ # 写入附加信息
|
|
|
+ self._write_additional_info(processor)
|
|
|
+
|
|
|
+ self.current_col += 3 # 数据列+间隔列
|
|
|
+
|
|
|
+ def _write_header(self, processor: DataProcessor):
|
|
|
+ """写入表头(含图片)"""
|
|
|
+ # ASIN标题
|
|
|
+ header_cell = self.ws.cell(
|
|
|
+ row=1,
|
|
|
+ column=self.current_col,
|
|
|
+ value=processor.asin
|
|
|
+ )
|
|
|
+ header_cell.font = HEADER_FONT
|
|
|
+ header_cell.fill = HEADER_FILL
|
|
|
+
|
|
|
+ # 插入Base64图片
|
|
|
+ img_data = processor.product_info.get('imgbase64', '')
|
|
|
+ if img_data and img_data.startswith('/9j/'):
|
|
|
+ try:
|
|
|
+ img = Image(BytesIO(base64.b64decode(img_data)))
|
|
|
+ img.anchor = f'{get_column_letter(self.current_col)}2'
|
|
|
+ self.ws.add_image(img)
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f'图片插入失败: {e}')
|
|
|
+
|
|
|
+ # 搜索量标题
|
|
|
+ self.ws.cell(
|
|
|
+ row=1,
|
|
|
+ column=self.current_col + 1,
|
|
|
+ value='搜索量'
|
|
|
+ ).font = HEADER_FONT
|
|
|
+
|
|
|
+ def _write_table_data(self, processor: DataProcessor):
|
|
|
+ """写入表格主体数据"""
|
|
|
+ for row_idx, item in enumerate(processor.process_table_data(), start=3):
|
|
|
+ # 关键词超链接
|
|
|
+ self.ws.cell(
|
|
|
+ row=row_idx,
|
|
|
+ column=self.current_col,
|
|
|
+ value=item['traffic_keyword']
|
|
|
+ ).hyperlink = item['amazon_search_link']
|
|
|
+
|
|
|
+ # 搜索量数值处理
|
|
|
+ search_volume = item['monthly_searches']
|
|
|
+ cell = self.ws.cell(
|
|
|
+ row=row_idx,
|
|
|
+ column=self.current_col + 1,
|
|
|
+ value=int(search_volume) if search_volume.isdigit() else 0
|
|
|
+ )
|
|
|
+
|
|
|
+ # 条件格式设置(超过1万标红)
|
|
|
+ if cell.value > 10000:
|
|
|
+ cell.fill = RED_FILL
|
|
|
+
|
|
|
+ def _write_additional_info(self, processor: DataProcessor):
|
|
|
+ """写入附加信息到表格下方"""
|
|
|
+ max_row = self.ws.max_row
|
|
|
+ base_row = max_row + 2
|
|
|
+
|
|
|
+ # 产品信息
|
|
|
+ self.ws.cell(base_row, self.current_col, '产品信息:').font = Font(bold=True)
|
|
|
+ self.ws.cell(base_row + 1, self.current_col, processor.product_info.get('main_text', ''))
|
|
|
+
|
|
|
+ # 唯一词列表
|
|
|
+ self.ws.cell(base_row + 3, self.current_col, '唯一词:').font = Font(bold=True)
|
|
|
+ for idx, word in enumerate(processor.unique_words, start=1):
|
|
|
+ self.ws.cell(base_row + 3 + idx, self.current_col, word)
|
|
|
+
|
|
|
+ def apply_styles(self):
|
|
|
+ """应用全局样式"""
|
|
|
+ # 设置列宽自适应
|
|
|
+ for col in self.ws.columns:
|
|
|
+ max_length = max(
|
|
|
+ len(str(cell.value)) for cell in col
|
|
|
+ if cell.value is not None
|
|
|
+ )
|
|
|
+ self.ws.column_dimensions[get_column_letter(col[0].column)].width = max_length + 2
|
|
|
+
|
|
|
+ # 设置标题对齐
|
|
|
+ for row in self.ws.iter_rows(min_row=1, max_row=1):
|
|
|
+ for cell in row:
|
|
|
+ cell.alignment = Alignment(horizontal='center')
|
|
|
+
|
|
|
+ def save(self):
|
|
|
+ """保存文件"""
|
|
|
+ self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
+ self.wb.save(self.output_path)
|
|
|
+ logger.info(f'Excel文件已保存至: {self.output_path}')
|
|
|
+
|
|
|
+def main():
|
|
|
+ output = r"G:\code\amazone\copywriting_production\output\multi_data.xlsx"
|
|
|
+ json_files = [
|
|
|
+ (r"G:\code\amazone\copywriting_production\output\page\debug\B0B658JC22_extract.json", "B0B658JC22"),
|
|
|
+ # 添加更多文件示例
|
|
|
+ # (r"path\to\other.json", "ASIN123")
|
|
|
+ ]
|
|
|
+ logger.info(f"{json_files}")
|
|
|
+ writer = ExcelWriter(output)
|
|
|
+ for json_path, asin in json_files:
|
|
|
+ writer.add_json_data(json_path, asin)
|
|
|
+
|
|
|
+ writer.apply_styles()
|
|
|
+ writer.save()
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|