test_encoding.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. import csv
  2. import chardet
  3. def detect_encoding(file_path):
  4. with open(file_path, 'rb') as f:
  5. raw_data = f.read()
  6. result = chardet.detect(raw_data)
  7. return result['encoding']
  8. def read_csv(file_path):
  9. encodings_to_try = ['utf-8-sig', 'gb18030', 'shift_jis', 'euc-jp']
  10. detected_encoding = detect_encoding(file_path)
  11. print(f"Detected encoding: {detected_encoding}")
  12. # Add detected encoding to the front of the list
  13. if detected_encoding:
  14. encodings_to_try.insert(0, detected_encoding)
  15. for encoding in encodings_to_try:
  16. try:
  17. print(f"Trying encoding: {encoding}")
  18. with open(file_path, 'r', encoding=encoding) as f:
  19. reader = csv.reader(f)
  20. rows = list(reader)
  21. # Print first 5 rows
  22. for i, row in enumerate(rows[:5]):
  23. print(f"Row {i}: {row}")
  24. return rows
  25. except UnicodeDecodeError:
  26. print(f"Failed with encoding: {encoding}")
  27. continue
  28. except Exception as e:
  29. print(f"Error with encoding {encoding}: {e}")
  30. continue
  31. raise Exception("Failed to read file with all attempted encodings")
  32. if __name__ == "__main__":
  33. input_file = "测试.csv"
  34. print(f"Testing file: {input_file}")
  35. try:
  36. data = read_csv(input_file)
  37. print("\nFile read successfully!")
  38. except Exception as e:
  39. print(f"\nError reading file: {e}")