| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- import pandas as pd
- from urllib.parse import quote
- from pathlib import Path
- def create_hyperlink(value, base_url):
- """为给定的值创建亚马逊搜索页面的超链接"""
- return f'=HYPERLINK("{base_url}{quote(value)}&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99", "{value}")'
- def create_asin_link(asin):
- """为ASIN创建产品详情页链接"""
- return f'=HYPERLINK("https://www.amazon.co.jp/dp/{asin}", "{asin}")'
- def process_row(row, indices, processor):
- """为指定索引的单元格应用处理器函数"""
- for index in indices:
- if index < len(row): # 确保索引在范围内
- row[index] = processor(row[index])
- return row
- def column_letter_to_index(col_letter):
- """将Excel列字母转换为0基索引"""
- return sum((ord(c) - ord('A') + 1) * (26 ** i) for i, c in enumerate(reversed(col_letter.upper()))) - 1
- def process_csv(df, columns, use_letters, processor, start_row=0):
- """处理CSV数据框中的指定列"""
- # 获取要处理的列的索引
- if use_letters: # 如果使用列字母
- indices = [column_letter_to_index(col) for col in columns]
- else: # 如果使用列索引(假设从0开始)
- indices = [int(col) for col in columns]
- # 遍历每一行并处理
- for i in range(start_row, len(df)):
- row = df.iloc[i].tolist()
- processed_row = process_row(row, indices, processor)
- df.iloc[i] = processed_row
- def read_and_process_csv(input_file, output_file, need_search_url_columns, asin_columns, use_letters, base_url, start_row=0):
- """读取CSV文件,处理需要添加搜索链接的列和ASIN列,然后写入新文件"""
- # 确保输出目录存在
- output_dir = Path(output_file).parent
- output_dir.mkdir(parents=True, exist_ok=True)
-
- # 使用pandas读取CSV文件
- df = pd.read_csv(input_file, encoding='cp936', keep_default_na=False)
-
- # 处理需要添加搜索链接的列
- process_csv(df, need_search_url_columns, use_letters, lambda value: create_hyperlink(value, base_url), start_row)
-
- # 处理ASIN列
- process_csv(df, asin_columns, use_letters, create_asin_link, start_row + 1)
-
- # 将处理后的数据写入新的CSV文件
- df.to_csv(output_file, index=False, encoding='utf-8')
- # 定义要处理的CSV文件路径、输出文件路径以及需要添加超链接的列索引或列字母和亚马逊搜索的基础URL
- output_dir = Path('temp')
- input_csv_path = Path('/home/mrh/code/excel_tool/temp/测试.csv')
- output_csv_path = output_dir / '测试_processed.csv'
- need_search_url_columns = ['B', 'C', 'D', 'E'] # 列
- asin_columns = ['I', 'M', 'Q'] # ASIN列
- use_letters = True # 是否使用列字母代替索引
- amazon_search_base_url = 'https://www.amazon.co.jp/s?k='
- start_row = 1 # 从第2行开始处理(索引从0开始)
- # 执行CSV处理
- read_and_process_csv(input_csv_path, output_csv_path, need_search_url_columns, asin_columns, use_letters, amazon_search_base_url, start_row)
|