import csv import csv import chardet import sys import logging logger = logging.getLogger(__name__) def detect_encoding(file_path): try: with open(file_path, 'rb') as f: raw_data = f.read() result = chardet.detect(raw_data) return result['encoding'] except Exception as e: logger.error(f"Error detecting encoding for {file_path}: {e}") sys.exit(1) def read_csv(file_path, to_encode=''): encodings_to_try = ['utf-8-sig', 'gb18030', 'shift_jis', 'euc-jp'] detected_encoding = detect_encoding(file_path) logger.info(f"Detected encoding: {detected_encoding}") if detected_encoding: encodings_to_try.insert(0, detected_encoding) for encoding in encodings_to_try: try: with open(file_path, 'r', encoding=encoding) as f: reader = csv.reader(f) return list(reader) except UnicodeDecodeError: continue except Exception as e: logger.error(f"Error with encoding {encoding}: {e}") continue logger.error("Failed to read file with all attempted encodings") sys.exit(1)