read_encoding_cvs.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import csv
  2. import csv
  3. import chardet
  4. import sys
  5. import logging
  6. logger = logging.getLogger(__name__)
  7. def detect_encoding(file_path):
  8. try:
  9. with open(file_path, 'rb') as f:
  10. raw_data = f.read()
  11. result = chardet.detect(raw_data)
  12. return result['encoding']
  13. except Exception as e:
  14. logger.error(f"Error detecting encoding for {file_path}: {e}")
  15. sys.exit(1)
  16. def read_csv(file_path, to_encode=''):
  17. encodings_to_try = ['utf-8-sig', 'gb18030', 'shift_jis', 'euc-jp']
  18. detected_encoding = detect_encoding(file_path)
  19. logger.info(f"Detected encoding: {detected_encoding}")
  20. if detected_encoding:
  21. encodings_to_try.insert(0, detected_encoding)
  22. for encoding in encodings_to_try:
  23. try:
  24. with open(file_path, 'r', encoding=encoding) as f:
  25. reader = csv.reader(f)
  26. return list(reader)
  27. except UnicodeDecodeError:
  28. continue
  29. except Exception as e:
  30. logger.error(f"Error with encoding {encoding}: {e}")
  31. continue
  32. logger.error("Failed to read file with all attempted encodings")
  33. sys.exit(1)