1 年之前 · 964f7042ce
--- a/mylib/translate_utils.py
+++ b/mylib/translate_utils.py
@@ -0,0 +1,70 @@
 
				+import os
			
 
				+import logging
			
 
				+from typing import List
			
 
				+from mylib.pdfzh_translator import OpenAITranslator
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+def insert_empty_columns(data: List[List[str]], column_indices: List[int]) -> List[List[str]]:
			
 
				+    """在指定列之后插入空列"""
			
 
				+    try:
			
 
				+        # 按从大到小排序，防止插入影响后续索引
			
 
				+        column_indices.sort(reverse=True)
			
 
				+        for row in data:
			
 
				+            for index in column_indices:
			
 
				+                row.insert(index + 1, '')
			
 
				+        return data
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"Error inserting empty columns: {e}")
			
 
				+        raise
			
 
				+
			
 
				+def process_batch_translations(data: List[List[str]], search_term_index: int, category_indices: List[int]) -> List[List[str]]:
			
 
				+    """批量处理翻译"""
			
 
				+    try:
			
 
				+        # 初始化翻译器
			
 
				+        translator = OpenAITranslator("openai", "zh-CN", "en", "gpt-3.5-turbo")
			
 
				+        
			
 
				+        # 收集所有需要翻译的文本
			
 
				+        translation_batches = {
			
 
				+            'search_terms': [row[search_term_index] for row in data[2:]],  # 从第三行开始
			
 
				+            'categories': []
			
 
				+        }
			
 
				+        
			
 
				+        # 收集类别翻译
			
 
				+        for index in category_indices:
			
 
				+            translation_batches['categories'].extend([row[index] for row in data[2:]])  # 从第三行开始
			
 
				+        
			
 
				+        # 批量翻译
			
 
				+        logger.info("Starting batch translations...")
			
 
				+        
			
 
				+        if os.getenv('DEBUG', '').lower() in ('true', '1', 't'):
			
 
				+            # DEBUG模式：使用模拟翻译
			
 
				+            search_translations = [f"{text} 翻译测试" for text in translation_batches['search_terms']]
			
 
				+            category_translations = [f"{text} 翻译测试" for text in translation_batches['categories']]
			
 
				+        else:
			
 
				+            # 正常模式：调用真实翻译
			
 
				+            search_translations = translator.translate(translation_batches['search_terms'])
			
 
				+            category_translations = translator.translate(translation_batches['categories'])
			
 
				+        
			
 
				+        logger.info("Batch translations completed")
			
 
				+        
			
 
				+        # 更新数据
			
 
				+        for i, row in enumerate(data[2:], start=2):  # 从第三行开始处理
			
 
				+            try:
			
 
				+                # 更新搜索词翻译列
			
 
				+                row[search_term_index + 1] = search_translations[i-2]
			
 
				+                
			
 
				+                # 更新类别翻译
			
 
				+                category_trans_index = (i-2) * len(category_indices)
			
 
				+                for cat_index in category_indices:
			
 
				+                    row[cat_index + 1] = category_translations[category_trans_index]
			
 
				+                    category_trans_index += 1
			
 
				+                
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"Error processing row {i}: {e}")
			
 
				+                raise
			
 
				+                
			
 
				+        return data
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"Error in batch translation: {e}")
			
 
				+        raise
			
--- a/process_data.py
+++ b/process_data.py
@@ -3,7 +3,7 @@ import chardet
 
				 import sys
			
 
				 import logging
			
 
				 from pathlib import Path
			
 
				-from mylib.pdfzh_translator import OpenAITranslator
			
 
				+from mylib.translate_utils import insert_empty_columns, process_batch_translations
			
 
				 from brand_add_url_link import create_hyperlink, create_asin_link
			
 
				 from mylib.logging_config import setup_logging
			
 
				 
			
@@ -43,66 +43,6 @@ def read_csv(file_path):
 
				     logger.error("Failed to read file with all attempted encodings")
			
 
				     sys.exit(1)
			
 
				 
			
 
				-def insert_empty_columns(data, column_indices):
			
 
				-    """在指定列之后插入空列"""
			
 
				-    try:
			
 
				-        # 按从大到小排序，防止插入影响后续索引
			
 
				-        column_indices.sort(reverse=True)
			
 
				-        for row in data:
			
 
				-            for index in column_indices:
			
 
				-                row.insert(index + 1, '')
			
 
				-        return data
			
 
				-    except Exception as e:
			
 
				-        logger.error(f"Error inserting empty columns: {e}")
			
 
				-        sys.exit(1)
			
 
				-
			
 
				-def process_batch_translations(data, search_term_index, category_indices):
			
 
				-    """批量处理翻译"""
			
 
				-    try:
			
 
				-        # 初始化翻译器
			
 
				-        translator = OpenAITranslator("openai", "zh-CN", "en", "gpt-3.5-turbo")
			
 
				-        
			
 
				-        # 收集所有需要翻译的文本
			
 
				-        translation_batches = {
			
 
				-            'search_terms': [row[search_term_index] for row in data[2:]],  # 从第三行开始
			
 
				-            'categories': []
			
 
				-        }
			
 
				-        
			
 
				-        # 收集类别翻译
			
 
				-        for index in category_indices:
			
 
				-            translation_batches['categories'].extend([row[index] for row in data[2:]])  # 从第三行开始
			
 
				-        
			
 
				-        # 批量翻译
			
 
				-        logger.info("Starting batch translations...")
			
 
				-        search_translations = translator.translate(translation_batches['search_terms'])
			
 
				-        category_translations = translator.translate(translation_batches['categories'])
			
 
				-        logger.info("Batch translations completed")
			
 
				-        
			
 
				-        # 更新数据
			
 
				-        for i, row in enumerate(data[2:], start=2):  # 从第三行开始处理
			
 
				-            try:
			
 
				-                # 更新搜索词翻译列
			
 
				-                row[search_term_index + 1] = search_translations[i-2]
			
 
				-                
			
 
				-                # 添加亚马逊搜索链接（跳过标题行）
			
 
				-                amazon_url = f"https://www.amazon.co.jp/s?k={row[search_term_index]}"
			
 
				-                row[search_term_index] = create_hyperlink(row[search_term_index], amazon_url)
			
 
				-                
			
 
				-                # 更新类别翻译
			
 
				-                category_trans_index = (i-2) * len(category_indices)
			
 
				-                for cat_index in category_indices:
			
 
				-                    row[cat_index + 1] = category_translations[category_trans_index]
			
 
				-                    category_trans_index += 1
			
 
				-                
			
 
				-            except Exception as e:
			
 
				-                logger.error(f"Error processing row {i}: {e}")
			
 
				-                sys.exit(1)
			
 
				-                
			
 
				-        return data
			
 
				-    except Exception as e:
			
 
				-        logger.error(f"Error in batch translation: {e}")
			
 
				-        sys.exit(1)
			
 
				-
			
 
				 def add_brand_asin_links(data, brand_indices, asin_indices):
			
 
				     """为品牌和ASIN列添加链接"""
			
 
				     try: