Browse Source

refactor: Use pandas to read and write CSV files

mrh (aider) 1 year ago
parent
commit
867c9fd9ad
1 changed files with 20 additions and 26 deletions
  1. 20 26
      brand_add_url_link.py

+ 20 - 26
brand_add_url_link.py

@@ -1,13 +1,13 @@
-import csv
+import pandas as pd
 from urllib.parse import quote
 from pathlib import Path
 
 def create_hyperlink(value, base_url):
-    """为给定的值创建亚马逊搜索页面的超链接"""
+    """为给定的值创建亚马逊搜索页面的超链接"""
     return f'=HYPERLINK("{base_url}{quote(value)}&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99", "{value}")'
 
 def create_asin_link(asin):
-    """为ASIN创建产品详情页链接"""
+    """为ASIN创建产品详情页链接"""
     return f'=HYPERLINK("https://www.amazon.co.jp/dp/{asin}", "{asin}")'
 
 def process_row(row, brand_indices, asin_indices, base_url):
@@ -34,31 +34,25 @@ def read_and_process_csv(input_file, output_file, brand_columns, asin_columns, u
     output_dir = Path(output_file).parent
     output_dir.mkdir(parents=True, exist_ok=True)
     
-    with open(input_file, 'r', encoding='utf-8', errors='ignore') as csvfile, \
-            open(output_file, 'w', newline='', encoding='utf-8') as new_csvfile:
-
-        reader = csv.reader(csvfile)
-        writer = csv.writer(new_csvfile)
-        headers = next(reader)  # 读取标题行
-
-        # 写入标题行到新的CSV文件
-        writer.writerow(headers)
+    # 使用pandas读取CSV文件
+    df = pd.read_csv(input_file, encoding='utf-8', keep_default_na=False)
+    
+    # 获取要处理的列的索引
+    if use_letters:  # 如果使用列字母
+        brand_indices = [column_letter_to_index(col) for col in brand_columns]
+        asin_indices = [column_letter_to_index(col) for col in asin_columns]
+    else:  # 如果使用列索引(假设从0开始)
+        brand_indices = [int(col) for col in brand_columns]
+        asin_indices = [int(col) for col in asin_columns]
 
-        # 获取要处理的列的索引
-        if use_letters:  # 如果使用列字母
-            brand_indices = [column_letter_to_index(col) for col in brand_columns]
-            asin_indices = [column_letter_to_index(col) for col in asin_columns]
-        else:  # 如果使用列索引(假设从0开始)
-            brand_indices = [int(col) for col in brand_columns]
-            asin_indices = [int(col) for col in asin_columns]
+    # 遍历每一行并处理
+    for i in range(start_row, len(df)):
+        row = df.iloc[i].tolist()
+        processed_row = process_row(row, brand_indices, asin_indices, base_url)
+        df.iloc[i] = processed_row
 
-        # 遍历每一行并处理
-        for i, row in enumerate(reader):
-            if i < start_row:
-                writer.writerow(row)  # 直接写入未处理的行
-            else:
-                processed_row = process_row(row, brand_indices, asin_indices, base_url)
-                writer.writerow(processed_row)
+    # 将处理后的数据写入新的CSV文件
+    df.to_csv(output_file, index=False, encoding='utf-8')
 
 # 定义要处理的CSV文件路径、输出文件路径以及需要添加超链接的列索引或列字母和亚马逊搜索的基础URL
 output_dir = Path('temp')