|
|
@@ -103,40 +103,10 @@ def extract_column_data(df: pd.DataFrame, column_identifier: Union[str, int], st
|
|
|
raise
|
|
|
|
|
|
def test_column_extraction():
|
|
|
- """测试列数据提取功能"""
|
|
|
- # 创建测试数据
|
|
|
- test_data = [
|
|
|
- ['', 'Ignore this row'], # 第1行
|
|
|
- ['Col1', 'Col2', 'Col3'], # 第2行(标题行)
|
|
|
- ['1a', '2a', '3a'], # 第3行
|
|
|
- ['1b', '2b', '3b'], # 第4行
|
|
|
- ['1c', '2c', '3c'] # 第5行
|
|
|
- ]
|
|
|
-
|
|
|
- # 创建DataFrame
|
|
|
- df = pd.DataFrame(test_data[2:], columns=test_data[1])
|
|
|
-
|
|
|
- try:
|
|
|
- # 测试提取第二列(Col2),从第三行开始
|
|
|
- result = extract_column_data(df, column_identifier=1, start_row=1, header_row=1)
|
|
|
-
|
|
|
- # 预期结果
|
|
|
- expected = pd.Series(['2b', '2c'], name='Col2')
|
|
|
-
|
|
|
- # 验证结果
|
|
|
- if result.equals(expected):
|
|
|
- print("测试通过!")
|
|
|
- print("提取结果:")
|
|
|
- print(result)
|
|
|
- else:
|
|
|
- print("测试失败!")
|
|
|
- print("预期结果:")
|
|
|
- print(expected)
|
|
|
- print("实际结果:")
|
|
|
- print(result)
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- print(f"测试失败:{e}")
|
|
|
+ output_dir = Path('temp')
|
|
|
+ input_file = output_dir/"测试.csv"
|
|
|
+ output_file = output_dir/"processed_测试.csv"
|
|
|
+
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
test_column_extraction()
|