|
|
@@ -10,7 +10,7 @@ def create_search_link(value):
|
|
|
def detect_encoding(file_path):
|
|
|
"""检测文件编码"""
|
|
|
# 常见日文编码列表
|
|
|
- encodings = ['utf-8-sig', 'shift_jis', 'euc-jp', 'utf-16', 'cp932']
|
|
|
+ encodings = ['utf-8-sig', 'shift_jis', 'euc-jp', 'utf-16', 'cp932', 'iso-2022-jp']
|
|
|
|
|
|
# 尝试读取文件
|
|
|
for encoding in encodings:
|
|
|
@@ -20,21 +20,42 @@ def detect_encoding(file_path):
|
|
|
return encoding
|
|
|
except UnicodeDecodeError:
|
|
|
continue
|
|
|
- return 'utf-8-sig' # 默认返回utf-8-sig
|
|
|
+
|
|
|
+ # 如果常见编码都失败,尝试二进制读取检测BOM
|
|
|
+ with open(file_path, 'rb') as f:
|
|
|
+ bom = f.read(4)
|
|
|
+ if bom.startswith(b'\xef\xbb\xbf'):
|
|
|
+ return 'utf-8-sig'
|
|
|
+ elif bom.startswith(b'\xff\xfe'):
|
|
|
+ return 'utf-16'
|
|
|
+ elif bom.startswith(b'\xfe\xff'):
|
|
|
+ return 'utf-16-be'
|
|
|
+
|
|
|
+ return 'cp932' # 默认返回日文常用编码
|
|
|
|
|
|
def read_csv(file_path):
|
|
|
"""读取CSV文件并返回数据列表"""
|
|
|
encoding = detect_encoding(file_path)
|
|
|
+ print(f"Detected encoding: {encoding}")
|
|
|
|
|
|
try:
|
|
|
with open(file_path, mode='r', encoding=encoding) as file:
|
|
|
- reader = csv.reader(file)
|
|
|
+ # 使用csv.Sniffer检测分隔符
|
|
|
+ dialect = csv.Sniffer().sniff(file.read(1024))
|
|
|
+ file.seek(0)
|
|
|
+
|
|
|
+ reader = csv.reader(file, dialect)
|
|
|
data = [row for row in reader]
|
|
|
+
|
|
|
# 验证第一行是否包含有效数据
|
|
|
if len(data) > 0 and len(data[0]) > 0:
|
|
|
return data, encoding
|
|
|
+ else:
|
|
|
+ raise ValueError("Empty or invalid CSV file")
|
|
|
+
|
|
|
except Exception as e:
|
|
|
print(f"Error reading CSV file: {e}")
|
|
|
+ raise
|
|
|
|
|
|
return [], None
|
|
|
|
|
|
@@ -66,6 +87,7 @@ def translate_column(data, column_index, start_row=0, target_language='zh'):
|
|
|
row[column_index + 1] = translations[i - start_row]
|
|
|
except Exception as e:
|
|
|
print(f"Error translating rows: {e}")
|
|
|
+ raise
|
|
|
|
|
|
return data
|
|
|
|
|
|
@@ -84,24 +106,32 @@ def save_csv(data, file_path, encoding='utf-8-sig'):
|
|
|
writer.writerows(data)
|
|
|
except Exception as e:
|
|
|
print(f"Error saving CSV file: {e}")
|
|
|
+ raise
|
|
|
|
|
|
def process_csv(input_file, output_file, column_index, start_row=0, target_language='zh'):
|
|
|
"""处理CSV文件的主要函数"""
|
|
|
- data, detected_encoding = read_csv(input_file)
|
|
|
- if not data:
|
|
|
- return
|
|
|
+ try:
|
|
|
+ data, detected_encoding = read_csv(input_file)
|
|
|
+ if not data:
|
|
|
+ raise ValueError("No data found in CSV file")
|
|
|
|
|
|
- # 插入空列
|
|
|
- data = insert_empty_column(data, column_index)
|
|
|
+ # 插入空列
|
|
|
+ data = insert_empty_column(data, column_index)
|
|
|
|
|
|
- # 翻译第二列的文本并保存到下一列
|
|
|
- data = translate_column(data, column_index, start_row, target_language)
|
|
|
+ # 翻译第二列的文本并保存到下一列
|
|
|
+ data = translate_column(data, column_index, start_row, target_language)
|
|
|
|
|
|
- # 为搜索词添加超链接
|
|
|
- data = add_search_links(data, column_index, start_row)
|
|
|
+ # 为搜索词添加超链接
|
|
|
+ data = add_search_links(data, column_index, start_row)
|
|
|
|
|
|
- # 保存为新文件,使用检测到的编码或默认utf-8-sig
|
|
|
- save_csv(data, output_file, encoding=detected_encoding or 'utf-8-sig')
|
|
|
+ # 保存为新文件,使用检测到的编码或默认utf-8-sig
|
|
|
+ save_csv(data, output_file, encoding=detected_encoding or 'utf-8-sig')
|
|
|
+
|
|
|
+ print(f"Successfully processed and saved to {output_file}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(f"Error processing CSV file: {e}")
|
|
|
+ raise
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
input_file = "测试.csv"
|
|
|
@@ -109,4 +139,8 @@ if __name__ == "__main__":
|
|
|
column_index = 1 # 插入空列的列索引(第2列)
|
|
|
start_row = 2 # 从第2行开始翻译(通常第0行是标题)
|
|
|
|
|
|
- process_csv(input_file, output_file, column_index, start_row)
|
|
|
+ try:
|
|
|
+ process_csv(input_file, output_file, column_index, start_row)
|
|
|
+ except Exception as e:
|
|
|
+ print(f"Fatal error: {e}")
|
|
|
+ exit(1)
|