před 1 rokem · ad43844bf3
--- a/translate_new_col.py
+++ b/translate_new_col.py
@@ -2,6 +2,7 @@ import os
 
															 import csv
														
 
															 from urllib.parse import quote
														
 
															 from ai_trans import translate_sentences
														
 
															+import chardet
														
 
															 def create_search_link(value):
														
 
															     """为搜索词创建亚马逊搜索链接"""
														
@@ -9,29 +10,25 @@ def create_search_link(value):
 
															 def detect_encoding(file_path):
														
 
															     """检测文件编码"""
														
 
															-    # 常见日文编码列表
														
 
															-    encodings = ['utf-8-sig', 'shift_jis', 'euc-jp', 'utf-16', 'cp932', 'iso-2022-jp']
														
 
															-    
														
 
															-    # 尝试读取文件
														
 
															-    for encoding in encodings:
														
 
															-        try:
														
 
															-            with open(file_path, 'r', encoding=encoding) as f:
														
 
															-                f.read(1024)  # 读取前1024字节测试
														
 
															-                return encoding
														
 
															-        except UnicodeDecodeError:
														
 
															-            continue
														
 
															-    
														
 
															-    # 如果常见编码都失败，尝试二进制读取检测BOM
														
 
															+    # 使用chardet进行更可靠的编码检测
														
 
															     with open(file_path, 'rb') as f:
														
 
															-        bom = f.read(4)
														
 
															-        if bom.startswith(b'\xef\xbb\xbf'):
														
 
															-            return 'utf-8-sig'
														
 
															-        elif bom.startswith(b'\xff\xfe'):
														
 
															-            return 'utf-16'
														
 
															-        elif bom.startswith(b'\xfe\xff'):
														
 
															-            return 'utf-16-be'
														
 
															-    
														
 
															-    return 'cp932'  # 默认返回日文常用编码
														
 
															+        raw_data = f.read(10000)  # 读取前10000字节用于检测
														
 
															+        result = chardet.detect(raw_data)
														
 
															+        encoding = result['encoding']
														
 
															+        
														
 
															+        # 处理一些常见的不准确检测结果
														
 
															+        if encoding == 'SHIFT_JIS':
														
 
															+            return 'shift_jis'
														
 
															+        elif encoding == 'EUC-JP':
														
 
															+            return 'euc-jp'
														
 
															+        elif encoding == 'ISO-8859-1':
														
 
															+            # 可能是UTF-8被误判为ISO-8859-1
														
 
															+            try:
														
 
															+                raw_data.decode('utf-8')
														
 
															+                return 'utf-8'
														
 
															+            except:
														
 
															+                return 'cp932'
														
 
															+        return encoding or 'utf-8'
														
 
															 def read_csv(file_path):
														
 
															     """读取CSV文件并返回数据列表"""
														
@@ -39,11 +36,17 @@ def read_csv(file_path):
 
															     print(f"Detected encoding: {encoding}")
														
 
															     try:
														
 
															-        with open(file_path, mode='r', encoding=encoding) as file:
														
 
															+        with open(file_path, mode='r', encoding=encoding, errors='replace') as file:
														
 
															             # 使用csv.Sniffer检测分隔符
														
 
															-            dialect = csv.Sniffer().sniff(file.read(1024))
														
 
															+            sample = file.read(1024)
														
 
															             file.seek(0)
														
 
															+            try:
														
 
															+                dialect = csv.Sniffer().sniff(sample)
														
 
															+            except:
														
 
															+                # 如果无法自动检测，使用默认设置
														
 
															+                dialect = csv.excel
														
 
															+                
														
 
															             reader = csv.reader(file, dialect)
														
 
															             data = [row for row in reader]