process_data.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. import csv
  2. import chardet
  3. import logging
  4. import sys
  5. from pathlib import Path
  6. from ai_trans import translate_sentences
  7. from brand_add_url_link import create_hyperlink
  8. # Configure logging
  9. logging.basicConfig(
  10. level=logging.INFO,
  11. format='%(asctime)s - %(levelname)s - %(message)s',
  12. handlers=[
  13. logging.FileHandler('process_data.log'),
  14. logging.StreamHandler()
  15. ]
  16. )
  17. logger = logging.getLogger(__name__)
  18. def detect_encoding(file_path):
  19. try:
  20. with open(file_path, 'rb') as f:
  21. raw_data = f.read()
  22. result = chardet.detect(raw_data)
  23. return result['encoding']
  24. except Exception as e:
  25. logger.error(f"Error detecting encoding for {file_path}: {e}")
  26. sys.exit(1)
  27. def read_csv(file_path):
  28. encodings_to_try = ['utf-8-sig', 'gb18030', 'shift_jis', 'euc-jp']
  29. detected_encoding = detect_encoding(file_path)
  30. logger.info(f"Detected encoding: {detected_encoding}")
  31. if detected_encoding:
  32. encodings_to_try.insert(0, detected_encoding)
  33. for encoding in encodings_to_try:
  34. try:
  35. with open(file_path, 'r', encoding=encoding) as f:
  36. reader = csv.reader(f)
  37. return list(reader)
  38. except UnicodeDecodeError:
  39. continue
  40. except Exception as e:
  41. logger.error(f"Error with encoding {encoding}: {e}")
  42. continue
  43. logger.error("Failed to read file with all attempted encodings")
  44. sys.exit(1)
  45. def insert_empty_column(data, column_index):
  46. """在指定列之后插入一个空列"""
  47. try:
  48. for row in data:
  49. row.insert(column_index + 1, '')
  50. return data
  51. except Exception as e:
  52. logger.error(f"Error inserting empty column at index {column_index}: {e}")
  53. sys.exit(1)
  54. def process_row(row, search_term_index):
  55. try:
  56. # Add translation column after search term
  57. search_term = row[search_term_index]
  58. logger.info(f"Translating: {search_term}")
  59. translation_result = translate_sentences([search_term])
  60. logger.info(f"Translation result: {translation_result}")
  61. # Handle translation result
  62. if not translation_result or 'translations' not in translation_result:
  63. translated = "翻译失败(无结果)"
  64. logger.error(f"Translation failed for '{search_term}': Invalid result format")
  65. sys.exit(1)
  66. translations = translation_result['translations']
  67. if not translations or len(translations) == 0:
  68. translated = "翻译失败(无结果)"
  69. logger.error(f"Translation failed for '{search_term}': No translations in result")
  70. sys.exit(1)
  71. else:
  72. translated = translations[0]
  73. # Update the row with translation in the new column
  74. row[search_term_index + 1] = translated
  75. # Add Amazon search link
  76. amazon_url = f"https://www.amazon.co.jp/s?k={search_term}"
  77. row[search_term_index] = create_hyperlink(search_term, amazon_url)
  78. return row
  79. except Exception as e:
  80. logger.error(f"Error processing row: {e}")
  81. sys.exit(1)
  82. def save_csv(data, file_path):
  83. try:
  84. with open(file_path, 'w', encoding='utf-8-sig', newline='') as f:
  85. writer = csv.writer(f)
  86. writer.writerows(data)
  87. except Exception as e:
  88. logger.error(f"Error saving CSV to {file_path}: {e}")
  89. sys.exit(1)
  90. def main(input_file, output_file):
  91. try:
  92. # Read CSV with proper encoding
  93. data = read_csv(input_file)
  94. # Insert empty column for translations after search term column
  95. search_term_index = 1 # Search term is in second column
  96. data = insert_empty_column(data, search_term_index)
  97. # Update header row with new column name
  98. data[0].insert(search_term_index + 1, "中文翻译")
  99. # Process each row (skip header row)
  100. for i, row in enumerate(data[1:], start=1):
  101. try:
  102. logger.info(f"\nProcessing row {i}")
  103. data[i] = process_row(row, search_term_index)
  104. logger.info(f"Processed row {i} successfully")
  105. except Exception as e:
  106. logger.error(f"Error processing row {i}: {str(e)}")
  107. sys.exit(1)
  108. # Save processed data
  109. save_csv(data, output_file)
  110. logger.info(f"Successfully processed and saved to {output_file}")
  111. except Exception as e:
  112. logger.error(f"Error processing file: {e}")
  113. sys.exit(1)
  114. if __name__ == "__main__":
  115. output_dir = Path('temp')
  116. input_file = output_dir/"测试.csv"
  117. output_file = output_dir/"processed_测试.csv"
  118. main(input_file, output_file)