process_data.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. import csv
  2. import chardet
  3. import sys
  4. import logging
  5. from pathlib import Path
  6. from mylib.new_col_translate import insert_empty_columns, process_batch_translations
  7. from brand_add_url_link import create_hyperlink, create_asin_link
  8. from mylib.logging_config import setup_logging
  9. # Setup custom logging
  10. setup_logging()
  11. logger = logging.getLogger(__name__)
  12. def add_brand_asin_links(data, brand_indices, asin_indices):
  13. """为品牌和ASIN列添加链接"""
  14. try:
  15. for row in data[2:]: # 从第三行开始处理
  16. # 处理品牌列
  17. for index in brand_indices:
  18. if index < len(row) and row[index]:
  19. row[index] = create_hyperlink(row[index], 'https://www.amazon.co.jp/s?k=')
  20. # 处理ASIN列
  21. for index in asin_indices:
  22. if index < len(row) and row[index]:
  23. row[index] = create_asin_link(row[index])
  24. return data
  25. except Exception as e:
  26. logger.error(f"Error adding brand/ASIN links: {e}")
  27. sys.exit(1)
  28. def save_csv(data, file_path):
  29. try:
  30. with open(file_path, 'w', encoding='utf-8-sig', newline='') as f:
  31. writer = csv.writer(f)
  32. writer.writerows(data)
  33. except Exception as e:
  34. logger.error(f"Error saving CSV to {file_path}: {e}")
  35. sys.exit(1)
  36. def main(input_file, output_file):
  37. try:
  38. # Read CSV with proper encoding
  39. data = read_csv(input_file)
  40. # 定义需要处理的列索引
  41. search_term_index = 1 # 搜索词列
  42. brand_indices = [2, 3, 4] # 品牌列
  43. asin_indices = [7, 11, 15] # ASIN列
  44. category_indices = [5, 6, 7] # 类别列
  45. # 插入空列用于翻译
  46. insert_indices = [search_term_index] + category_indices
  47. data = insert_empty_columns(data, insert_indices)
  48. # 更新标题行
  49. data[0][search_term_index + 1] = "中文翻译"
  50. for index in category_indices:
  51. data[0].insert(index + 1, "中文翻译")
  52. # 处理翻译
  53. data = process_batch_translations(data, search_term_index, category_indices)
  54. # 添加品牌和ASIN链接
  55. data = add_brand_asin_links(data, brand_indices, asin_indices)
  56. # 保存处理后的数据
  57. save_csv(data, output_file)
  58. logger.info(f"Successfully processed and saved to {output_file}")
  59. except Exception as e:
  60. logger.error(f"Error processing file: {e}")
  61. sys.exit(1)
  62. if __name__ == "__main__":
  63. output_dir = Path('temp')
  64. input_file = output_dir/"测试.csv"
  65. output_file = output_dir/"processed_测试.csv"
  66. main(input_file, output_file)