|
@@ -103,45 +103,10 @@ def extract_column_data(df: pd.DataFrame, column_identifier: Union[str, int], st
|
|
|
raise
|
|
raise
|
|
|
|
|
|
|
|
def test_column_extraction():
|
|
def test_column_extraction():
|
|
|
- """测试列提取功能"""
|
|
|
|
|
- # 创建测试数据
|
|
|
|
|
- test_data = [
|
|
|
|
|
- ['', 'Ignore this row'], # 第1行
|
|
|
|
|
- ['Col1', 'Col2', 'Col3'], # 第2行(标题行)
|
|
|
|
|
- ['1a', '2a', '3a'], # 第3行
|
|
|
|
|
- ['1b', '2b', '3b'], # 第4行
|
|
|
|
|
- ['1c', '2c', '3c'] # 第5行
|
|
|
|
|
- ]
|
|
|
|
|
-
|
|
|
|
|
- # 创建临时目录
|
|
|
|
|
output_dir = Path('temp')
|
|
output_dir = Path('temp')
|
|
|
- output_dir.mkdir(exist_ok=True)
|
|
|
|
|
-
|
|
|
|
|
- # 保存测试文件
|
|
|
|
|
- test_file = output_dir / 'test.csv'
|
|
|
|
|
- with open(test_file, 'w', encoding='utf-8-sig') as f:
|
|
|
|
|
- writer = csv.writer(f)
|
|
|
|
|
- writer.writerows(test_data)
|
|
|
|
|
-
|
|
|
|
|
- try:
|
|
|
|
|
- # 读取CSV文件,指定标题行在第2行(索引1)
|
|
|
|
|
- df = read_csv_with_header(test_file, header_row=1)
|
|
|
|
|
-
|
|
|
|
|
- # 提取第2列(索引1),从第3行(索引2)开始
|
|
|
|
|
- column_data = extract_column_data(df, column_identifier=1, start_row=2)
|
|
|
|
|
-
|
|
|
|
|
- # 验证结果
|
|
|
|
|
- expected = pd.Series(['2b', '2c'], index=[3, 4], name='Col2')
|
|
|
|
|
- pd.testing.assert_series_equal(column_data, expected)
|
|
|
|
|
-
|
|
|
|
|
- print("测试通过!")
|
|
|
|
|
- print("提取的列数据:")
|
|
|
|
|
- print(column_data)
|
|
|
|
|
-
|
|
|
|
|
- finally:
|
|
|
|
|
- # 清理测试文件
|
|
|
|
|
- test_file.unlink()
|
|
|
|
|
- output_dir.rmdir()
|
|
|
|
|
|
|
+ input_file = output_dir/"测试.csv"
|
|
|
|
|
+ output_file = output_dir/"processed_测试.csv"
|
|
|
|
|
+
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
|
test_column_extraction()
|
|
test_column_extraction()
|