瀏覽代碼

feat: add CSV processing with translation and Amazon link generation

Your Name (aider) 1 年之前
父節點
當前提交
0c5845a90a
共有 1 個文件被更改,包括 71 次插入0 次删除
  1. 71 0
      process_data.py

+ 71 - 0
process_data.py

@@ -0,0 +1,71 @@
+import csv
+import chardet
+from ai_trans import translate_sentences
+from brand_add_url_link import create_hyperlink
+
+def detect_encoding(file_path):
+    with open(file_path, 'rb') as f:
+        raw_data = f.read()
+        result = chardet.detect(raw_data)
+        return result['encoding']
+
+def read_csv(file_path):
+    encodings_to_try = ['utf-8-sig', 'gb18030', 'shift_jis', 'euc-jp']
+    detected_encoding = detect_encoding(file_path)
+    print(f"Detected encoding: {detected_encoding}")
+    
+    if detected_encoding:
+        encodings_to_try.insert(0, detected_encoding)
+    
+    for encoding in encodings_to_try:
+        try:
+            with open(file_path, 'r', encoding=encoding) as f:
+                reader = csv.reader(f)
+                return list(reader)
+        except UnicodeDecodeError:
+            continue
+        except Exception as e:
+            print(f"Error with encoding {encoding}: {e}")
+            continue
+    
+    raise Exception("Failed to read file with all attempted encodings")
+
+def process_row(row, search_term_index):
+    # Add translation column after search term
+    search_term = row[search_term_index]
+    translated = translate_sentences([search_term])[0]
+    row.insert(search_term_index + 1, translated)
+    
+    # Add Amazon search link
+    amazon_url = f"https://www.amazon.co.jp/s?k={search_term}"
+    row[search_term_index] = create_hyperlink(search_term, amazon_url)
+    
+    return row
+
+def save_csv(data, file_path):
+    with open(file_path, 'w', encoding='utf-8-sig', newline='') as f:
+        writer = csv.writer(f)
+        writer.writerows(data)
+
+def main(input_file, output_file):
+    try:
+        # Read CSV with proper encoding
+        data = read_csv(input_file)
+        
+        # Process each row (skip header row)
+        search_term_index = 1  # Search term is in second column
+        for i, row in enumerate(data[1:], start=1):
+            data[i] = process_row(row, search_term_index)
+        
+        # Save processed data
+        save_csv(data, output_file)
+        print(f"Successfully processed and saved to {output_file}")
+        
+    except Exception as e:
+        print(f"Error processing file: {e}")
+        raise
+
+if __name__ == "__main__":
+    input_file = "测试.csv"
+    output_file = "processed_测试.csv"
+    main(input_file, output_file)