Browse Source

feat: add CSV encoding detection and reading test script

Your Name (aider) 11 months ago
parent
commit
d74f76368b
1 changed files with 45 additions and 0 deletions
  1. 45 0
      test_encoding.py

+ 45 - 0
test_encoding.py

@@ -0,0 +1,45 @@
+import csv
+import chardet
+
+def detect_encoding(file_path):
+    with open(file_path, 'rb') as f:
+        raw_data = f.read()
+        result = chardet.detect(raw_data)
+        return result['encoding']
+
+def read_csv(file_path):
+    encodings_to_try = ['utf-8-sig', 'gb18030', 'shift_jis', 'euc-jp']
+    detected_encoding = detect_encoding(file_path)
+    print(f"Detected encoding: {detected_encoding}")
+    
+    # Add detected encoding to the front of the list
+    if detected_encoding:
+        encodings_to_try.insert(0, detected_encoding)
+    
+    for encoding in encodings_to_try:
+        try:
+            print(f"Trying encoding: {encoding}")
+            with open(file_path, 'r', encoding=encoding) as f:
+                reader = csv.reader(f)
+                rows = list(reader)
+                # Print first 5 rows
+                for i, row in enumerate(rows[:5]):
+                    print(f"Row {i}: {row}")
+                return rows
+        except UnicodeDecodeError:
+            print(f"Failed with encoding: {encoding}")
+            continue
+        except Exception as e:
+            print(f"Error with encoding {encoding}: {e}")
+            continue
+    
+    raise Exception("Failed to read file with all attempted encodings")
+
+if __name__ == "__main__":
+    input_file = "测试.csv"
+    print(f"Testing file: {input_file}")
+    try:
+        data = read_csv(input_file)
+        print("\nFile read successfully!")
+    except Exception as e:
+        print(f"\nError reading file: {e}")