Переглянути джерело

refactor: Split read_and_process_csv into two functions

mrh (aider) 1 рік тому
батько
коміт
88b6785ff2
1 змінених файлів з 29 додано та 5 видалено
  1. 29 5
      brand_add_url_link.py

+ 29 - 5
brand_add_url_link.py

@@ -21,8 +21,8 @@ def column_letter_to_index(col_letter):
     """将Excel列字母转换为0基索引"""
     return sum((ord(c) - ord('A') + 1) * (26 ** i) for i, c in enumerate(reversed(col_letter.upper()))) - 1
 
-def read_and_process_csv(input_file, output_file, columns, use_letters, processor, base_url='', start_row=0):
-    """读取CSV文件并处理指定列的数据,然后写入新文件"""
+def read_and_process_need_search_url_csv(input_file, output_file, columns, use_letters, base_url='', start_row=0):
+    """读取CSV文件并处理需要添加搜索链接的列,然后写入新文件"""
     # 确保输出目录存在
     output_dir = Path(output_file).parent
     output_dir.mkdir(parents=True, exist_ok=True)
@@ -39,7 +39,31 @@ def read_and_process_csv(input_file, output_file, columns, use_letters, processo
     # 遍历每一行并处理
     for i in range(start_row, len(df)):
         row = df.iloc[i].tolist()
-        processed_row = process_row(row, indices, processor)
+        processed_row = process_row(row, indices, lambda value: create_hyperlink(value, base_url))
+        df.iloc[i] = processed_row
+
+    # 将处理后的数据写入新的CSV文件
+    df.to_csv(output_file, index=False, encoding='utf-8')
+
+def read_and_process_asin_csv(input_file, output_file, columns, use_letters, start_row=0):
+    """读取CSV文件并处理ASIN列,然后写入新文件"""
+    # 确保输出目录存在
+    output_dir = Path(output_file).parent
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # 使用pandas读取CSV文件
+    df = pd.read_csv(input_file, encoding='cp936', keep_default_na=False)
+    
+    # 获取要处理的列的索引
+    if use_letters:  # 如果使用列字母
+        indices = [column_letter_to_index(col) for col in columns]
+    else:  # 如果使用列索引(假设从0开始)
+        indices = [int(col) for col in columns]
+
+    # 遍历每一行并处理
+    for i in range(start_row, len(df)):
+        row = df.iloc[i].tolist()
+        processed_row = process_row(row, indices, create_asin_link)
         df.iloc[i] = processed_row
 
     # 将处理后的数据写入新的CSV文件
@@ -57,7 +81,7 @@ amazon_search_base_url = 'https://www.amazon.co.jp/s?k='
 start_row = 1  # 从第2行开始处理(索引从0开始)
 
 # 执行CSV处理
-read_and_process_csv(input_csv_path, need_search_url_output_csv_path, need_search_url_columns, use_letters, lambda value: create_hyperlink(value, amazon_search_base_url), amazon_search_base_url, start_row)
+read_and_process_need_search_url_csv(input_csv_path, need_search_url_output_csv_path, need_search_url_columns, use_letters, amazon_search_base_url, start_row)
 
 # 使用处理后的文件作为输入来处理ASIN列
-read_and_process_csv(need_search_url_output_csv_path, asin_output_csv_path, asin_columns, use_letters, create_asin_link, start_row=start_row)
+read_and_process_asin_csv(need_search_url_output_csv_path, asin_output_csv_path, asin_columns, use_letters, start_row=start_row)