Просмотр исходного кода

feat: improve encoding detection and translation handling

Your Name (aider) 1 год назад
Родитель
Сommit
e5a4d4c7e5
1 измененных файлов с 29 добавлено и 20 удалено
  1. 29 20
      translate_new_col.py

+ 29 - 20
translate_new_col.py

@@ -1,37 +1,47 @@
 import os
 import os
 import csv
 import csv
-from ai_trans import translate_sentences
 from urllib.parse import quote
 from urllib.parse import quote
-
-# 设置 OpenAI API 配置
-os.environ['OPENAI_API_KEY'] = 'sk-NscqaCD1PfVm7soEF3C3E6297bE14d7fB595Be8f17F39aFf'
-os.environ['OPENAI_API_BASE'] = 'https://aiapi.magong.site/v1'
+from ai_trans import translate_sentences
 
 
 def create_search_link(value):
 def create_search_link(value):
     """为搜索词创建亚马逊搜索链接"""
     """为搜索词创建亚马逊搜索链接"""
     return f'=HYPERLINK("https://www.amazon.co.jp/s?k={quote(value)}", "{value}")'
     return f'=HYPERLINK("https://www.amazon.co.jp/s?k={quote(value)}", "{value}")'
 
 
+def detect_encoding(file_path):
+    """检测文件编码"""
+    import chardet
+    with open(file_path, 'rb') as f:
+        raw_data = f.read()
+        result = chardet.detect(raw_data)
+        return result['encoding']
+
 def read_csv(file_path):
 def read_csv(file_path):
     """读取CSV文件并返回数据列表"""
     """读取CSV文件并返回数据列表"""
-    encodings = ['utf-8-sig', 'shift_jis', 'euc-jp', 'utf-16']  # 添加日文常用编码
+    encodings = ['utf-8-sig', 'shift_jis', 'euc-jp', 'utf-16', 'cp932']
+    detected_encoding = detect_encoding(file_path)
+    if detected_encoding:
+        encodings.insert(0, detected_encoding)
+    
     for encoding in encodings:
     for encoding in encodings:
         try:
         try:
             with open(file_path, mode='r', encoding=encoding) as file:
             with open(file_path, mode='r', encoding=encoding) as file:
                 reader = csv.reader(file)
                 reader = csv.reader(file)
                 data = [row for row in reader]
                 data = [row for row in reader]
-            return data
+                # 验证第一行是否包含有效数据
+                if len(data) > 0 and len(data[0]) > 0:
+                    return data, encoding
         except UnicodeDecodeError:
         except UnicodeDecodeError:
             continue
             continue
         except Exception as e:
         except Exception as e:
             print(f"Error reading CSV file with encoding {encoding}: {e}")
             print(f"Error reading CSV file with encoding {encoding}: {e}")
-            return []
+            continue
     print("Failed to read CSV file with any of the specified encodings.")
     print("Failed to read CSV file with any of the specified encodings.")
-    return []
+    return [], None
 
 
 def insert_empty_column(data, column_index):
 def insert_empty_column(data, column_index):
     """在指定列之后插入一个空列"""
     """在指定列之后插入一个空列"""
     for row in data:
     for row in data:
-        row.insert(column_index + 1, '')  # 插入在目标列后面
+        row.insert(column_index + 1, '')
     return data
     return data
 
 
 def translate_column(data, column_index, start_row=0, target_language='zh'):
 def translate_column(data, column_index, start_row=0, target_language='zh'):
@@ -48,15 +58,12 @@ def translate_column(data, column_index, start_row=0, target_language='zh'):
     if sentences_to_translate:
     if sentences_to_translate:
         try:
         try:
             print(f"Translating {len(sentences_to_translate)} sentences to {target_language}.")
             print(f"Translating {len(sentences_to_translate)} sentences to {target_language}.")
-            print(f"Input sentences: {sentences_to_translate}")
-            translated_output = translate_sentences(sentences_to_translate, target_language, api_key='YOUR_API_KEY')
-            print(f"Translated output: {translated_output}")
+            translated_output = translate_sentences(sentences_to_translate, target_language)
             translations = translated_output.get("translations", [])
             translations = translated_output.get("translations", [])
             
             
             for i, row in enumerate(data[start_row:], start=start_row):
             for i, row in enumerate(data[start_row:], start=start_row):
                 if len(row) > column_index and i - start_row < len(translations):
                 if len(row) > column_index and i - start_row < len(translations):
-                    row[column_index + 1] = translations[i - start_row]  # 写入到下一列
-                    print(f"Translated row {i}: {row[column_index]} -> {row[column_index + 1]}")
+                    row[column_index + 1] = translations[i - start_row]
         except Exception as e:
         except Exception as e:
             print(f"Error translating rows: {e}")
             print(f"Error translating rows: {e}")
     
     
@@ -69,10 +76,10 @@ def add_search_links(data, column_index, start_row=0):
             row[column_index] = create_search_link(row[column_index])
             row[column_index] = create_search_link(row[column_index])
     return data
     return data
 
 
-def save_csv(data, file_path):
+def save_csv(data, file_path, encoding='utf-8-sig'):
     """将数据保存为CSV文件"""
     """将数据保存为CSV文件"""
     try:
     try:
-        with open(file_path, mode='w', newline='', encoding='utf-8-sig') as file:  # 使用utf-8-sig编码
+        with open(file_path, mode='w', newline='', encoding=encoding) as file:
             writer = csv.writer(file)
             writer = csv.writer(file)
             writer.writerows(data)
             writer.writerows(data)
     except Exception as e:
     except Exception as e:
@@ -80,7 +87,9 @@ def save_csv(data, file_path):
 
 
 def process_csv(input_file, output_file, column_index, start_row=0, target_language='zh'):
 def process_csv(input_file, output_file, column_index, start_row=0, target_language='zh'):
     """处理CSV文件的主要函数"""
     """处理CSV文件的主要函数"""
-    data = read_csv(input_file)
+    data, detected_encoding = read_csv(input_file)
+    if not data:
+        return
 
 
     # 插入空列
     # 插入空列
     data = insert_empty_column(data, column_index)
     data = insert_empty_column(data, column_index)
@@ -91,8 +100,8 @@ def process_csv(input_file, output_file, column_index, start_row=0, target_langu
     # 为搜索词添加超链接
     # 为搜索词添加超链接
     data = add_search_links(data, column_index, start_row)
     data = add_search_links(data, column_index, start_row)
 
 
-    # 保存为新文件
-    save_csv(data, output_file)
+    # 保存为新文件,使用检测到的编码或默认utf-8-sig
+    save_csv(data, output_file, encoding=detected_encoding or 'utf-8-sig')
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
     input_file = "测试.csv"
     input_file = "测试.csv"