translate_new_col.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. import os
  2. import csv
  3. from urllib.parse import quote
  4. from ai_trans import translate_sentences
  5. import chardet
  6. def create_search_link(value):
  7. """为搜索词创建亚马逊搜索链接"""
  8. return f'=HYPERLINK("https://www.amazon.co.jp/s?k={quote(value)}", "{value}")'
  9. def detect_encoding(file_path):
  10. """检测文件编码"""
  11. # 使用chardet进行更可靠的编码检测
  12. with open(file_path, 'rb') as f:
  13. raw_data = f.read(10000) # 读取前10000字节用于检测
  14. result = chardet.detect(raw_data)
  15. encoding = result['encoding']
  16. # 处理一些常见的不准确检测结果
  17. if encoding == 'SHIFT_JIS':
  18. return 'shift_jis'
  19. elif encoding == 'EUC-JP':
  20. return 'euc-jp'
  21. elif encoding == 'ISO-8859-1':
  22. # 可能是UTF-8被误判为ISO-8859-1
  23. try:
  24. raw_data.decode('utf-8')
  25. return 'utf-8'
  26. except:
  27. return 'cp932'
  28. return encoding or 'utf-8'
  29. def read_csv(file_path):
  30. """读取CSV文件并返回数据列表"""
  31. encoding = detect_encoding(file_path)
  32. print(f"Detected encoding: {encoding}")
  33. try:
  34. with open(file_path, mode='r', encoding=encoding, errors='replace') as file:
  35. # 使用csv.Sniffer检测分隔符
  36. sample = file.read(1024)
  37. file.seek(0)
  38. try:
  39. dialect = csv.Sniffer().sniff(sample)
  40. except:
  41. # 如果无法自动检测,使用默认设置
  42. dialect = csv.excel
  43. reader = csv.reader(file, dialect)
  44. data = [row for row in reader]
  45. # 验证第一行是否包含有效数据
  46. if len(data) > 0 and len(data[0]) > 0:
  47. return data, encoding
  48. else:
  49. raise ValueError("Empty or invalid CSV file")
  50. except Exception as e:
  51. print(f"Error reading CSV file: {e}")
  52. raise
  53. return [], None
  54. def insert_empty_column(data, column_index):
  55. """在指定列之后插入一个空列"""
  56. for row in data:
  57. row.insert(column_index + 1, '')
  58. return data
  59. def translate_column(data, column_index, start_row=0, target_language='zh'):
  60. """
  61. 翻译指定列的文本,并将结果保存到下一列
  62. :param data: CSV数据列表
  63. :param column_index: 需要翻译的列索引
  64. :param start_row: 开始翻译的行索引,默认为第0行
  65. :param target_language: 目标语言,默认为中文
  66. :return: 翻译后的数据列表
  67. """
  68. sentences_to_translate = [row[column_index] for i, row in enumerate(data[start_row:], start=start_row) if len(row) > column_index]
  69. if sentences_to_translate:
  70. try:
  71. print(f"Translating {len(sentences_to_translate)} sentences to {target_language}.")
  72. translated_output = translate_sentences(sentences_to_translate, target_language)
  73. translations = translated_output.get("translations", [])
  74. for i, row in enumerate(data[start_row:], start=start_row):
  75. if len(row) > column_index and i - start_row < len(translations):
  76. row[column_index + 1] = translations[i - start_row]
  77. except Exception as e:
  78. print(f"Error translating rows: {e}")
  79. raise
  80. return data
  81. def add_search_links(data, column_index, start_row=0):
  82. """为搜索词列添加超链接"""
  83. for i, row in enumerate(data[start_row:], start=start_row):
  84. if len(row) > column_index:
  85. row[column_index] = create_search_link(row[column_index])
  86. return data
  87. def save_csv(data, file_path, encoding='utf-8-sig'):
  88. """将数据保存为CSV文件"""
  89. try:
  90. with open(file_path, mode='w', newline='', encoding=encoding) as file:
  91. writer = csv.writer(file)
  92. writer.writerows(data)
  93. except Exception as e:
  94. print(f"Error saving CSV file: {e}")
  95. raise
  96. def process_csv(input_file, output_file, column_index, start_row=0, target_language='zh'):
  97. """处理CSV文件的主要函数"""
  98. try:
  99. data, detected_encoding = read_csv(input_file)
  100. if not data:
  101. raise ValueError("No data found in CSV file")
  102. # 插入空列
  103. data = insert_empty_column(data, column_index)
  104. # 翻译第二列的文本并保存到下一列
  105. data = translate_column(data, column_index, start_row, target_language)
  106. # 为搜索词添加超链接
  107. data = add_search_links(data, column_index, start_row)
  108. # 保存为新文件,使用检测到的编码或默认utf-8-sig
  109. save_csv(data, output_file, encoding=detected_encoding or 'utf-8-sig')
  110. print(f"Successfully processed and saved to {output_file}")
  111. except Exception as e:
  112. print(f"Error processing CSV file: {e}")
  113. raise
  114. if __name__ == "__main__":
  115. input_file = "测试.csv"
  116. output_file = "测试_processed.csv"
  117. column_index = 1 # 插入空列的列索引(第2列)
  118. start_row = 2 # 从第2行开始翻译(通常第0行是标题)
  119. try:
  120. process_csv(input_file, output_file, column_index, start_row)
  121. except Exception as e:
  122. print(f"Fatal error: {e}")
  123. exit(1)