|
|
@@ -1,5 +1,4 @@
|
|
|
import csv
|
|
|
-import csv
|
|
|
import chardet
|
|
|
import sys
|
|
|
import logging
|
|
|
@@ -18,8 +17,7 @@ def detect_encoding(file_path):
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
-
|
|
|
-def read_csv(file_path, to_encode=''):
|
|
|
+def read_csv(file_path, to_encode='utf-8'):
|
|
|
encodings_to_try = ['utf-8-sig', 'gb18030', 'shift_jis', 'euc-jp']
|
|
|
detected_encoding = detect_encoding(file_path)
|
|
|
logger.info(f"Detected encoding: {detected_encoding}")
|
|
|
@@ -31,7 +29,16 @@ def read_csv(file_path, to_encode=''):
|
|
|
try:
|
|
|
with open(file_path, 'r', encoding=encoding) as f:
|
|
|
reader = csv.reader(f)
|
|
|
- return list(reader)
|
|
|
+ data = list(reader)
|
|
|
+
|
|
|
+ # Convert to UTF-8 if needed
|
|
|
+ if encoding.lower() != to_encode.lower():
|
|
|
+ data = [
|
|
|
+ [cell.encode('utf-8').decode('utf-8') if isinstance(cell, str) else cell
|
|
|
+ for cell in row]
|
|
|
+ for row in data
|
|
|
+ ]
|
|
|
+ return data
|
|
|
except UnicodeDecodeError:
|
|
|
continue
|
|
|
except Exception as e:
|
|
|
@@ -39,4 +46,4 @@ def read_csv(file_path, to_encode=''):
|
|
|
continue
|
|
|
|
|
|
logger.error("Failed to read file with all attempted encodings")
|
|
|
- sys.exit(1)
|
|
|
+ sys.exit(1)
|