|
|
@@ -4,7 +4,7 @@ import sys
|
|
|
import logging
|
|
|
from pathlib import Path
|
|
|
from mylib.pdfzh_translator import OpenAITranslator
|
|
|
-from brand_add_url_link import create_hyperlink
|
|
|
+from brand_add_url_link import create_hyperlink, create_asin_link
|
|
|
from mylib.logging_config import setup_logging
|
|
|
|
|
|
# Setup custom logging
|
|
|
@@ -43,40 +43,57 @@ def read_csv(file_path):
|
|
|
logger.error("Failed to read file with all attempted encodings")
|
|
|
sys.exit(1)
|
|
|
|
|
|
-def insert_empty_column(data, column_index):
|
|
|
- """在指定列之后插入一个空列"""
|
|
|
+def insert_empty_columns(data, column_indices):
|
|
|
+ """在指定列之后插入空列"""
|
|
|
try:
|
|
|
+ # 按从大到小排序,防止插入影响后续索引
|
|
|
+ column_indices.sort(reverse=True)
|
|
|
for row in data:
|
|
|
- row.insert(column_index + 1, '')
|
|
|
+ for index in column_indices:
|
|
|
+ row.insert(index + 1, '')
|
|
|
return data
|
|
|
except Exception as e:
|
|
|
- logger.error(f"Error inserting empty column at index {column_index}: {e}")
|
|
|
+ logger.error(f"Error inserting empty columns: {e}")
|
|
|
sys.exit(1)
|
|
|
|
|
|
-def process_batch_translations(data, search_term_index):
|
|
|
+def process_batch_translations(data, search_term_index, category_indices):
|
|
|
"""批量处理翻译"""
|
|
|
try:
|
|
|
# 初始化翻译器
|
|
|
translator = OpenAITranslator("openai", "zh-CN", "en", "gpt-3.5-turbo")
|
|
|
|
|
|
# 收集所有需要翻译的文本
|
|
|
- search_terms = [row[search_term_index] for row in data[1:]]
|
|
|
+ translation_batches = {
|
|
|
+ 'search_terms': [row[search_term_index] for row in data[1:]],
|
|
|
+ 'categories': []
|
|
|
+ }
|
|
|
+
|
|
|
+ # 收集类别翻译
|
|
|
+ for index in category_indices:
|
|
|
+ translation_batches['categories'].extend([row[index] for row in data[1:]])
|
|
|
|
|
|
# 批量翻译
|
|
|
- logger.info(f"Starting batch translation of {len(search_terms)} terms")
|
|
|
- translations = translator.translate(search_terms)
|
|
|
- logger.info("Batch translation completed")
|
|
|
+ logger.info("Starting batch translations...")
|
|
|
+ search_translations = translator.translate(translation_batches['search_terms'])
|
|
|
+ category_translations = translator.translate(translation_batches['categories'])
|
|
|
+ logger.info("Batch translations completed")
|
|
|
|
|
|
# 更新数据
|
|
|
for i, row in enumerate(data[1:], start=1):
|
|
|
try:
|
|
|
- # 更新翻译列
|
|
|
- row[search_term_index + 1] = translations[i-1]
|
|
|
+ # 更新搜索词翻译列
|
|
|
+ row[search_term_index + 1] = search_translations[i-1]
|
|
|
|
|
|
# 添加亚马逊搜索链接
|
|
|
amazon_url = f"https://www.amazon.co.jp/s?k={row[search_term_index]}"
|
|
|
row[search_term_index] = create_hyperlink(row[search_term_index], amazon_url)
|
|
|
|
|
|
+ # 更新类别翻译
|
|
|
+ category_trans_index = 0
|
|
|
+ for cat_index in category_indices:
|
|
|
+ row[cat_index + 1] = category_translations[category_trans_index]
|
|
|
+ category_trans_index += 1
|
|
|
+
|
|
|
except Exception as e:
|
|
|
logger.error(f"Error processing row {i}: {e}")
|
|
|
sys.exit(1)
|
|
|
@@ -86,6 +103,24 @@ def process_batch_translations(data, search_term_index):
|
|
|
logger.error(f"Error in batch translation: {e}")
|
|
|
sys.exit(1)
|
|
|
|
|
|
+def add_brand_asin_links(data, brand_indices, asin_indices):
|
|
|
+ """为品牌和ASIN列添加链接"""
|
|
|
+ try:
|
|
|
+ for row in data[1:]:
|
|
|
+ # 处理品牌列
|
|
|
+ for index in brand_indices:
|
|
|
+ if index < len(row) and row[index]:
|
|
|
+ row[index] = create_hyperlink(row[index], 'https://www.amazon.co.jp/s?k=')
|
|
|
+
|
|
|
+ # 处理ASIN列
|
|
|
+ for index in asin_indices:
|
|
|
+ if index < len(row) and row[index]:
|
|
|
+ row[index] = create_asin_link(row[index])
|
|
|
+ return data
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"Error adding brand/ASIN links: {e}")
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
def save_csv(data, file_path):
|
|
|
try:
|
|
|
with open(file_path, 'w', encoding='utf-8-sig', newline='') as f:
|
|
|
@@ -100,17 +135,28 @@ def main(input_file, output_file):
|
|
|
# Read CSV with proper encoding
|
|
|
data = read_csv(input_file)
|
|
|
|
|
|
- # Insert empty column for translations after search term column
|
|
|
- search_term_index = 1 # Search term is in second column
|
|
|
- data = insert_empty_column(data, search_term_index)
|
|
|
+ # 定义需要处理的列索引
|
|
|
+ search_term_index = 1 # 搜索词列
|
|
|
+ brand_indices = [2, 3, 4] # 品牌列
|
|
|
+ asin_indices = [7, 11, 15] # ASIN列
|
|
|
+ category_indices = [5, 6, 7] # 类别列
|
|
|
+
|
|
|
+ # 插入空列用于翻译
|
|
|
+ insert_indices = [search_term_index] + category_indices
|
|
|
+ data = insert_empty_columns(data, insert_indices)
|
|
|
+
|
|
|
+ # 更新标题行
|
|
|
+ data[0][search_term_index + 1] = "中文翻译"
|
|
|
+ for index in category_indices:
|
|
|
+ data[0].insert(index + 1, "中文翻译")
|
|
|
|
|
|
- # Update header row with new column name
|
|
|
- data[0].insert(search_term_index + 1, "中文翻译")
|
|
|
+ # 处理翻译
|
|
|
+ data = process_batch_translations(data, search_term_index, category_indices)
|
|
|
|
|
|
- # Process translations in batch
|
|
|
- data = process_batch_translations(data, search_term_index)
|
|
|
+ # 添加品牌和ASIN链接
|
|
|
+ data = add_brand_asin_links(data, brand_indices, asin_indices)
|
|
|
|
|
|
- # Save processed data
|
|
|
+ # 保存处理后的数据
|
|
|
save_csv(data, output_file)
|
|
|
logger.info(f"Successfully processed and saved to {output_file}")
|
|
|
|