Răsfoiți Sursa

refactor: Combine CSV processing functions into a single function

mrh (aider) 1 an în urmă
părinte
comite
5847ce4133
1 a modificat fișierele cu 14 adăugiri și 34 ștergeri
  1. 14 34
      brand_add_url_link.py

+ 14 - 34
brand_add_url_link.py

@@ -21,15 +21,8 @@ def column_letter_to_index(col_letter):
     """将Excel列字母转换为0基索引"""
     return sum((ord(c) - ord('A') + 1) * (26 ** i) for i, c in enumerate(reversed(col_letter.upper()))) - 1
 
-def read_and_process_need_search_url_csv(input_file, output_file, columns, use_letters, base_url='', start_row=0):
-    """读取CSV文件并处理需要添加搜索链接的列,然后写入新文件"""
-    # 确保输出目录存在
-    output_dir = Path(output_file).parent
-    output_dir.mkdir(parents=True, exist_ok=True)
-    
-    # 使用pandas读取CSV文件
-    df = pd.read_csv(input_file, encoding='cp936', keep_default_na=False)
-    
+def process_csv(df, columns, use_letters, processor):
+    """处理CSV数据框中的指定列"""
     # 获取要处理的列的索引
     if use_letters:  # 如果使用列字母
         indices = [column_letter_to_index(col) for col in columns]
@@ -37,16 +30,13 @@ def read_and_process_need_search_url_csv(input_file, output_file, columns, use_l
         indices = [int(col) for col in columns]
 
     # 遍历每一行并处理
-    for i in range(start_row, len(df)):
+    for i in range(len(df)):
         row = df.iloc[i].tolist()
-        processed_row = process_row(row, indices, lambda value: create_hyperlink(value, base_url))
+        processed_row = process_row(row, indices, processor)
         df.iloc[i] = processed_row
 
-    # 将处理后的数据写入新的CSV文件
-    df.to_csv(output_file, index=False, encoding='utf-8')
-
-def read_and_process_asin_csv(input_file, output_file, columns, use_letters, start_row=0):
-    """读取CSV文件并处理ASIN列,然后写入新文件"""
+def read_and_process_csv(input_file, output_file, need_search_url_columns, asin_columns, use_letters, base_url, start_row=0):
+    """读取CSV文件,处理需要添加搜索链接的列和ASIN列,然后写入新文件"""
     # 确保输出目录存在
     output_dir = Path(output_file).parent
     output_dir.mkdir(parents=True, exist_ok=True)
@@ -54,26 +44,19 @@ def read_and_process_asin_csv(input_file, output_file, columns, use_letters, sta
     # 使用pandas读取CSV文件
     df = pd.read_csv(input_file, encoding='cp936', keep_default_na=False)
     
-    # 获取要处理的列的索引
-    if use_letters:  # 如果使用列字母
-        indices = [column_letter_to_index(col) for col in columns]
-    else:  # 如果使用列索引(假设从0开始)
-        indices = [int(col) for col in columns]
-
-    # 遍历每一行并处理
-    for i in range(start_row, len(df)):
-        row = df.iloc[i].tolist()
-        processed_row = process_row(row, indices, create_asin_link)
-        df.iloc[i] = processed_row
-
+    # 处理需要添加搜索链接的列
+    process_csv(df, need_search_url_columns, use_letters, lambda value: create_hyperlink(value, base_url))
+    
+    # 处理ASIN列
+    process_csv(df, asin_columns, use_letters, create_asin_link)
+    
     # 将处理后的数据写入新的CSV文件
     df.to_csv(output_file, index=False, encoding='utf-8')
 
 # 定义要处理的CSV文件路径、输出文件路径以及需要添加超链接的列索引或列字母和亚马逊搜索的基础URL
 output_dir = Path('temp')
 input_csv_path = Path('/home/mrh/code/excel_tool/temp/测试.csv')
-need_search_url_output_csv_path = output_dir / '测试_need_search_url_processed.csv'
-asin_output_csv_path = output_dir / '测试_asin_processed.csv'
+output_csv_path = output_dir / '测试_processed.csv'
 need_search_url_columns = ['B', 'C', 'D', 'E']  # 列
 asin_columns = ['I', 'M', 'Q']   # ASIN列
 use_letters = True  # 是否使用列字母代替索引
@@ -81,7 +64,4 @@ amazon_search_base_url = 'https://www.amazon.co.jp/s?k='
 start_row = 1  # 从第2行开始处理(索引从0开始)
 
 # 执行CSV处理
-read_and_process_need_search_url_csv(input_csv_path, need_search_url_output_csv_path, need_search_url_columns, use_letters, amazon_search_base_url, start_row)
-
-# 使用处理后的文件作为输入来处理ASIN列
-read_and_process_asin_csv(need_search_url_output_csv_path, asin_output_csv_path, asin_columns, use_letters, start_row=start_row)
+read_and_process_csv(input_csv_path, output_csv_path, need_search_url_columns, asin_columns, use_letters, amazon_search_base_url, start_row)