|
@@ -1,7 +1,7 @@
|
|
|
import csv
|
|
import csv
|
|
|
import chardet
|
|
import chardet
|
|
|
import logging
|
|
import logging
|
|
|
-from typing import List
|
|
|
|
|
|
|
+from typing import List, Optional
|
|
|
import os
|
|
import os
|
|
|
from mylib.logging_config import setup_logging
|
|
from mylib.logging_config import setup_logging
|
|
|
setup_logging()
|
|
setup_logging()
|
|
@@ -46,11 +46,12 @@ def detect_encoding(file_path: str, sample_size: int = 100000) -> str:
|
|
|
return 'shift_jis' # 返回日文编码作为默认值
|
|
return 'shift_jis' # 返回日文编码作为默认值
|
|
|
|
|
|
|
|
|
|
|
|
|
-def read_csv(file_path: str) -> List[List[str]]:
|
|
|
|
|
|
|
+def read_csv(file_path: str, specified_encoding: Optional[str] = None) -> List[List[str]]:
|
|
|
"""读取CSV文件并转换为utf-8编码
|
|
"""读取CSV文件并转换为utf-8编码
|
|
|
|
|
|
|
|
Args:
|
|
Args:
|
|
|
file_path: 文件路径
|
|
file_path: 文件路径
|
|
|
|
|
+ specified_encoding: 用户指定的编码方式
|
|
|
|
|
|
|
|
Returns:
|
|
Returns:
|
|
|
包含CSV数据的二维列表
|
|
包含CSV数据的二维列表
|
|
@@ -70,10 +71,14 @@ def read_csv(file_path: str) -> List[List[str]]:
|
|
|
'latin1'
|
|
'latin1'
|
|
|
]
|
|
]
|
|
|
|
|
|
|
|
- # 先尝试检测编码
|
|
|
|
|
- detected_encoding = detect_encoding(file_path)
|
|
|
|
|
- if detected_encoding:
|
|
|
|
|
- encodings_to_try.insert(0, detected_encoding)
|
|
|
|
|
|
|
+ # 如果用户指定了编码,优先使用
|
|
|
|
|
+ if specified_encoding:
|
|
|
|
|
+ encodings_to_try.insert(0, specified_encoding)
|
|
|
|
|
+ else:
|
|
|
|
|
+ # 先尝试检测编码
|
|
|
|
|
+ detected_encoding = detect_encoding(file_path)
|
|
|
|
|
+ if detected_encoding:
|
|
|
|
|
+ encodings_to_try.insert(0, detected_encoding)
|
|
|
|
|
|
|
|
# 尝试用不同编码读取文件
|
|
# 尝试用不同编码读取文件
|
|
|
for encoding in encodings_to_try:
|
|
for encoding in encodings_to_try:
|