| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- import csv
- import chardet
- import logging
- import sys
- from pathlib import Path
- from ai_trans import translate_sentences
- from brand_add_url_link import create_hyperlink
- # Configure logging
- logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s - %(levelname)s - %(message)s',
- handlers=[
- logging.FileHandler('process_data.log'),
- logging.StreamHandler()
- ]
- )
- logger = logging.getLogger(__name__)
- def detect_encoding(file_path):
- try:
- with open(file_path, 'rb') as f:
- raw_data = f.read()
- result = chardet.detect(raw_data)
- return result['encoding']
- except Exception as e:
- logger.error(f"Error detecting encoding for {file_path}: {e}")
- sys.exit(1)
- def read_csv(file_path):
- encodings_to_try = ['utf-8-sig', 'gb18030', 'shift_jis', 'euc-jp']
- detected_encoding = detect_encoding(file_path)
- logger.info(f"Detected encoding: {detected_encoding}")
-
- if detected_encoding:
- encodings_to_try.insert(0, detected_encoding)
-
- for encoding in encodings_to_try:
- try:
- with open(file_path, 'r', encoding=encoding) as f:
- reader = csv.reader(f)
- return list(reader)
- except UnicodeDecodeError:
- continue
- except Exception as e:
- logger.error(f"Error with encoding {encoding}: {e}")
- continue
-
- logger.error("Failed to read file with all attempted encodings")
- sys.exit(1)
- def insert_empty_column(data, column_index):
- """在指定列之后插入一个空列"""
- try:
- for row in data:
- row.insert(column_index + 1, '')
- return data
- except Exception as e:
- logger.error(f"Error inserting empty column at index {column_index}: {e}")
- sys.exit(1)
- def process_row(row, search_term_index):
- try:
- # Add translation column after search term
- search_term = row[search_term_index]
- logger.info(f"Translating: {search_term}")
- translation_result = translate_sentences([search_term])
- logger.info(f"Translation result: {translation_result}")
-
- # Handle translation result
- if not translation_result or 'translations' not in translation_result:
- translated = "翻译失败(无结果)"
- logger.error(f"Translation failed for '{search_term}': Invalid result format")
- sys.exit(1)
-
- translations = translation_result['translations']
- if not translations or len(translations) == 0:
- translated = "翻译失败(无结果)"
- logger.error(f"Translation failed for '{search_term}': No translations in result")
- sys.exit(1)
- else:
- translated = translations[0]
-
- # Update the row with translation in the new column
- row[search_term_index + 1] = translated
-
- # Add Amazon search link
- amazon_url = f"https://www.amazon.co.jp/s?k={search_term}"
- row[search_term_index] = create_hyperlink(search_term, amazon_url)
-
- return row
- except Exception as e:
- logger.error(f"Error processing row: {e}")
- sys.exit(1)
- def save_csv(data, file_path):
- try:
- with open(file_path, 'w', encoding='utf-8-sig', newline='') as f:
- writer = csv.writer(f)
- writer.writerows(data)
- except Exception as e:
- logger.error(f"Error saving CSV to {file_path}: {e}")
- sys.exit(1)
- def main(input_file, output_file):
- try:
- # Read CSV with proper encoding
- data = read_csv(input_file)
-
- # Insert empty column for translations after search term column
- search_term_index = 1 # Search term is in second column
- data = insert_empty_column(data, search_term_index)
-
- # Update header row with new column name
- data[0].insert(search_term_index + 1, "中文翻译")
-
- # Process each row (skip header row)
- for i, row in enumerate(data[1:], start=1):
- try:
- logger.info(f"\nProcessing row {i}")
- data[i] = process_row(row, search_term_index)
- logger.info(f"Processed row {i} successfully")
- except Exception as e:
- logger.error(f"Error processing row {i}: {str(e)}")
- sys.exit(1)
-
- # Save processed data
- save_csv(data, output_file)
- logger.info(f"Successfully processed and saved to {output_file}")
-
- except Exception as e:
- logger.error(f"Error processing file: {e}")
- sys.exit(1)
- if __name__ == "__main__":
- output_dir = Path('temp')
- input_file = output_dir/"测试.csv"
- output_file = output_dir/"processed_测试.csv"
- main(input_file, output_file)
|