brand_add_url_link.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import pandas as pd
  2. from urllib.parse import quote
  3. from pathlib import Path
  4. def create_hyperlink(value, base_url):
  5. """为给定的值创建亚马逊搜索页面的超链接"""
  6. return f'=HYPERLINK("{base_url}{quote(value)}&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99", "{value}")'
  7. def create_asin_link(asin):
  8. """为ASIN创建产品详情页链接"""
  9. return f'=HYPERLINK("https://www.amazon.co.jp/dp/{asin}", "{asin}")'
  10. def process_row(row, indices, processor):
  11. """为指定索引的单元格应用处理器函数"""
  12. for index in indices:
  13. if index < len(row): # 确保索引在范围内
  14. row[index] = processor(row[index])
  15. return row
  16. def column_letter_to_index(col_letter):
  17. """将Excel列字母转换为0基索引"""
  18. return sum((ord(c) - ord('A') + 1) * (26 ** i) for i, c in enumerate(reversed(col_letter.upper()))) - 1
  19. def read_and_process_need_search_url_csv(input_file, output_file, columns, use_letters, base_url='', start_row=0):
  20. """读取CSV文件并处理需要添加搜索链接的列,然后写入新文件"""
  21. # 确保输出目录存在
  22. output_dir = Path(output_file).parent
  23. output_dir.mkdir(parents=True, exist_ok=True)
  24. # 使用pandas读取CSV文件
  25. df = pd.read_csv(input_file, encoding='cp936', keep_default_na=False)
  26. # 获取要处理的列的索引
  27. if use_letters: # 如果使用列字母
  28. indices = [column_letter_to_index(col) for col in columns]
  29. else: # 如果使用列索引(假设从0开始)
  30. indices = [int(col) for col in columns]
  31. # 遍历每一行并处理
  32. for i in range(start_row, len(df)):
  33. row = df.iloc[i].tolist()
  34. processed_row = process_row(row, indices, lambda value: create_hyperlink(value, base_url))
  35. df.iloc[i] = processed_row
  36. # 将处理后的数据写入新的CSV文件
  37. df.to_csv(output_file, index=False, encoding='utf-8')
  38. def read_and_process_asin_csv(input_file, output_file, columns, use_letters, start_row=0):
  39. """读取CSV文件并处理ASIN列,然后写入新文件"""
  40. # 确保输出目录存在
  41. output_dir = Path(output_file).parent
  42. output_dir.mkdir(parents=True, exist_ok=True)
  43. # 使用pandas读取CSV文件
  44. df = pd.read_csv(input_file, encoding='cp936', keep_default_na=False)
  45. # 获取要处理的列的索引
  46. if use_letters: # 如果使用列字母
  47. indices = [column_letter_to_index(col) for col in columns]
  48. else: # 如果使用列索引(假设从0开始)
  49. indices = [int(col) for col in columns]
  50. # 遍历每一行并处理
  51. for i in range(start_row, len(df)):
  52. row = df.iloc[i].tolist()
  53. processed_row = process_row(row, indices, create_asin_link)
  54. df.iloc[i] = processed_row
  55. # 将处理后的数据写入新的CSV文件
  56. df.to_csv(output_file, index=False, encoding='utf-8')
  57. # 定义要处理的CSV文件路径、输出文件路径以及需要添加超链接的列索引或列字母和亚马逊搜索的基础URL
  58. output_dir = Path('temp')
  59. input_csv_path = Path('/home/mrh/code/excel_tool/temp/测试.csv')
  60. need_search_url_output_csv_path = output_dir / '测试_need_search_url_processed.csv'
  61. asin_output_csv_path = output_dir / '测试_asin_processed.csv'
  62. need_search_url_columns = ['B', 'C', 'D', 'E'] # 列
  63. asin_columns = ['I', 'M', 'Q'] # ASIN列
  64. use_letters = True # 是否使用列字母代替索引
  65. amazon_search_base_url = 'https://www.amazon.co.jp/s?k='
  66. start_row = 1 # 从第2行开始处理(索引从0开始)
  67. # 执行CSV处理
  68. read_and_process_need_search_url_csv(input_csv_path, need_search_url_output_csv_path, need_search_url_columns, use_letters, amazon_search_base_url, start_row)
  69. # 使用处理后的文件作为输入来处理ASIN列
  70. read_and_process_asin_csv(need_search_url_output_csv_path, asin_output_csv_path, asin_columns, use_letters, start_row=start_row)