Просмотр исходного кода

refactor: Improve encoding detection and error handling in CSV processing

Your Name (aider) 1 год назад
Родитель
Сommit
38be57e8b7
1 измененных файлов с 49 добавлено и 15 удалено
  1. 49 15
      translate_new_col.py

+ 49 - 15
translate_new_col.py

@@ -10,7 +10,7 @@ def create_search_link(value):
 def detect_encoding(file_path):
     """检测文件编码"""
     # 常见日文编码列表
-    encodings = ['utf-8-sig', 'shift_jis', 'euc-jp', 'utf-16', 'cp932']
+    encodings = ['utf-8-sig', 'shift_jis', 'euc-jp', 'utf-16', 'cp932', 'iso-2022-jp']
     
     # 尝试读取文件
     for encoding in encodings:
@@ -20,21 +20,42 @@ def detect_encoding(file_path):
                 return encoding
         except UnicodeDecodeError:
             continue
-    return 'utf-8-sig'  # 默认返回utf-8-sig
+    
+    # 如果常见编码都失败,尝试二进制读取检测BOM
+    with open(file_path, 'rb') as f:
+        bom = f.read(4)
+        if bom.startswith(b'\xef\xbb\xbf'):
+            return 'utf-8-sig'
+        elif bom.startswith(b'\xff\xfe'):
+            return 'utf-16'
+        elif bom.startswith(b'\xfe\xff'):
+            return 'utf-16-be'
+    
+    return 'cp932'  # 默认返回日文常用编码
 
 def read_csv(file_path):
     """读取CSV文件并返回数据列表"""
     encoding = detect_encoding(file_path)
+    print(f"Detected encoding: {encoding}")
     
     try:
         with open(file_path, mode='r', encoding=encoding) as file:
-            reader = csv.reader(file)
+            # 使用csv.Sniffer检测分隔符
+            dialect = csv.Sniffer().sniff(file.read(1024))
+            file.seek(0)
+            
+            reader = csv.reader(file, dialect)
             data = [row for row in reader]
+            
             # 验证第一行是否包含有效数据
             if len(data) > 0 and len(data[0]) > 0:
                 return data, encoding
+            else:
+                raise ValueError("Empty or invalid CSV file")
+                
     except Exception as e:
         print(f"Error reading CSV file: {e}")
+        raise
     
     return [], None
 
@@ -66,6 +87,7 @@ def translate_column(data, column_index, start_row=0, target_language='zh'):
                     row[column_index + 1] = translations[i - start_row]
         except Exception as e:
             print(f"Error translating rows: {e}")
+            raise
     
     return data
 
@@ -84,24 +106,32 @@ def save_csv(data, file_path, encoding='utf-8-sig'):
             writer.writerows(data)
     except Exception as e:
         print(f"Error saving CSV file: {e}")
+        raise
 
 def process_csv(input_file, output_file, column_index, start_row=0, target_language='zh'):
     """处理CSV文件的主要函数"""
-    data, detected_encoding = read_csv(input_file)
-    if not data:
-        return
+    try:
+        data, detected_encoding = read_csv(input_file)
+        if not data:
+            raise ValueError("No data found in CSV file")
 
-    # 插入空列
-    data = insert_empty_column(data, column_index)
+        # 插入空列
+        data = insert_empty_column(data, column_index)
 
-    # 翻译第二列的文本并保存到下一列
-    data = translate_column(data, column_index, start_row, target_language)
+        # 翻译第二列的文本并保存到下一列
+        data = translate_column(data, column_index, start_row, target_language)
 
-    # 为搜索词添加超链接
-    data = add_search_links(data, column_index, start_row)
+        # 为搜索词添加超链接
+        data = add_search_links(data, column_index, start_row)
 
-    # 保存为新文件,使用检测到的编码或默认utf-8-sig
-    save_csv(data, output_file, encoding=detected_encoding or 'utf-8-sig')
+        # 保存为新文件,使用检测到的编码或默认utf-8-sig
+        save_csv(data, output_file, encoding=detected_encoding or 'utf-8-sig')
+        
+        print(f"Successfully processed and saved to {output_file}")
+        
+    except Exception as e:
+        print(f"Error processing CSV file: {e}")
+        raise
 
 if __name__ == "__main__":
     input_file = "测试.csv"
@@ -109,4 +139,8 @@ if __name__ == "__main__":
     column_index = 1  # 插入空列的列索引(第2列)
     start_row = 2  # 从第2行开始翻译(通常第0行是标题)
 
-    process_csv(input_file, output_file, column_index, start_row)
+    try:
+        process_csv(input_file, output_file, column_index, start_row)
+    except Exception as e:
+        print(f"Fatal error: {e}")
+        exit(1)