process_data.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. import csv
  2. import chardet
  3. from ai_trans import translate_sentences
  4. from brand_add_url_link import create_hyperlink
  5. def detect_encoding(file_path):
  6. with open(file_path, 'rb') as f:
  7. raw_data = f.read()
  8. result = chardet.detect(raw_data)
  9. return result['encoding']
  10. def read_csv(file_path):
  11. encodings_to_try = ['utf-8-sig', 'gb18030', 'shift_jis', 'euc-jp']
  12. detected_encoding = detect_encoding(file_path)
  13. print(f"Detected encoding: {detected_encoding}")
  14. if detected_encoding:
  15. encodings_to_try.insert(0, detected_encoding)
  16. for encoding in encodings_to_try:
  17. try:
  18. with open(file_path, 'r', encoding=encoding) as f:
  19. reader = csv.reader(f)
  20. return list(reader)
  21. except UnicodeDecodeError:
  22. continue
  23. except Exception as e:
  24. print(f"Error with encoding {encoding}: {e}")
  25. continue
  26. raise Exception("Failed to read file with all attempted encodings")
  27. def process_row(row, search_term_index):
  28. # Add translation column after search term
  29. search_term = row[search_term_index]
  30. try:
  31. print(f"Translating: {search_term}")
  32. translations = translate_sentences([search_term])
  33. print(f"Translation result: {translations}")
  34. if not translations or len(translations) == 0:
  35. translated = "翻译失败(无结果)"
  36. elif isinstance(translations, int): # Handle case where function returns error code
  37. translated = f"翻译失败(错误码:{translations})"
  38. else:
  39. translated = translations[0]
  40. except Exception as e:
  41. print(f"Translation error for '{search_term}': {str(e)}")
  42. translated = f"翻译失败(异常:{str(e)})"
  43. row.insert(search_term_index + 1, translated)
  44. # Add Amazon search link
  45. amazon_url = f"https://www.amazon.co.jp/s?k={search_term}"
  46. row[search_term_index] = create_hyperlink(search_term, amazon_url)
  47. return row
  48. def save_csv(data, file_path):
  49. with open(file_path, 'w', encoding='utf-8-sig', newline='') as f:
  50. writer = csv.writer(f)
  51. writer.writerows(data)
  52. def main(input_file, output_file):
  53. try:
  54. # Read CSV with proper encoding
  55. data = read_csv(input_file)
  56. # Process each row (skip header row)
  57. search_term_index = 1 # Search term is in second column
  58. for i, row in enumerate(data[1:], start=1):
  59. try:
  60. print(f"\nProcessing row {i}")
  61. data[i] = process_row(row, search_term_index)
  62. print(f"Processed row {i} successfully")
  63. except Exception as e:
  64. print(f"Error processing row {i}: {str(e)}")
  65. # Insert empty translation column to maintain structure
  66. row.insert(search_term_index + 1, "翻译失败(处理错误)")
  67. data[i] = row
  68. break
  69. # Save processed data
  70. save_csv(data, output_file)
  71. print(f"Successfully processed and saved to {output_file}")
  72. except Exception as e:
  73. print(f"Error processing file: {e}")
  74. raise
  75. if __name__ == "__main__":
  76. input_file = "测试.csv"
  77. output_file = "processed_测试.csv"
  78. main(input_file, output_file)