Jelajahi Sumber

feat: Add sample data extraction before batch translations

mrh (aider) 1 tahun lalu
induk
melakukan
d5eca791da
1 mengubah file dengan 22 tambahan dan 4 penghapusan
  1. 22 4
      mylib/translate_utils.py

+ 22 - 4
mylib/translate_utils.py

@@ -1,6 +1,6 @@
 import os
 import logging
-from typing import List
+from typing import List, Tuple
 from mylib.pdfzh_translator import OpenAITranslator
 
 logger = logging.getLogger(__name__)
@@ -18,9 +18,27 @@ def insert_empty_columns(data: List[List[str]], column_indices: List[int]) -> Li
         logger.error(f"Error inserting empty columns: {e}")
         raise
 
-def process_batch_translations(data: List[List[str]], search_term_index: int, category_indices: List[int], start_row: int = 3) -> List[List[str]]:
+def extract_sample_data(data: List[List[str]], n: int = 2, m: int = 2) -> List[List[str]]:
+    """提取前n行m列数据用于检查"""
+    try:
+        sample = []
+        for row in data[:n]:
+            sample.append(row[:m])
+        return sample
+    except Exception as e:
+        logger.error(f"Error extracting sample data: {e}")
+        raise
+
+def process_batch_translations(data: List[List[str]], 
+                             search_term_index: int, 
+                             category_indices: List[int], 
+                             start_row: int = 3) -> Tuple[List[List[str]], List[List[str]]]:
     """批量处理翻译"""
     try:
+        # 首先提取样本数据用于检查
+        sample_data = extract_sample_data(data)
+        logger.info(f"Sample data extracted for inspection:\n{sample_data}")
+        
         # 初始化翻译器
         translator = OpenAITranslator()
         
@@ -64,7 +82,7 @@ def process_batch_translations(data: List[List[str]], search_term_index: int, ca
                 logger.error(f"Error processing row {i}: {e}")
                 raise
                 
-        return data
+        return data, sample_data
     except Exception as e:
         logger.error(f"Error in batch translation: {e}")
         raise
@@ -73,4 +91,4 @@ def main():
     pass
 
 if __name__ == "__main__":
-    main()
+    main()