|
|
@@ -1,5 +1,5 @@
|
|
|
import pandas as pd
|
|
|
-from typing import List
|
|
|
+from typing import List, Optional
|
|
|
import logging
|
|
|
from mylib.logging_config import setup_logging
|
|
|
|
|
|
@@ -21,19 +21,23 @@ class ExcelProcessor:
|
|
|
index = index * 26 + (ord(char) - ord('A') + 1)
|
|
|
return index - 1
|
|
|
|
|
|
- def read_file(self, file_path: str) -> pd.DataFrame:
|
|
|
+ def read_file(self, file_path: str, header_row: Optional[int] = 0) -> pd.DataFrame:
|
|
|
"""
|
|
|
读取文件并返回DataFrame
|
|
|
支持Excel和CSV文件
|
|
|
+
|
|
|
+ Args:
|
|
|
+ file_path (str): 文件路径
|
|
|
+ header_row (int, optional): 表头所在行号,从0开始计数. Defaults to 0.
|
|
|
"""
|
|
|
try:
|
|
|
if file_path.endswith('.csv'):
|
|
|
- df = pd.read_csv(file_path)
|
|
|
- logger.info(f"成功读取CSV文件: {file_path}")
|
|
|
+ df = pd.read_csv(file_path, header=header_row)
|
|
|
+ logger.info(f"成功读取CSV文件: {file_path}, 表头行: {header_row}")
|
|
|
else:
|
|
|
# 对于Excel文件,指定engine参数
|
|
|
- df = pd.read_excel(file_path, engine='openpyxl')
|
|
|
- logger.info(f"成功读取Excel文件: {file_path}")
|
|
|
+ df = pd.read_excel(file_path, engine='openpyxl', header=header_row)
|
|
|
+ logger.info(f"成功读取Excel文件: {file_path}, 表头行: {header_row}")
|
|
|
return df
|
|
|
except Exception as e:
|
|
|
logger.error(f"读取文件失败: {file_path}, 错误: {str(e)}")
|
|
|
@@ -95,6 +99,6 @@ if __name__ == '__main__':
|
|
|
# 测试代码
|
|
|
processor = ExcelProcessor()
|
|
|
# 测试CSV文件
|
|
|
- df = processor.read_file('/home/mrh/code/excel_tool/temp/测试.csv.utf8.csv')
|
|
|
+ df = processor.read_file('/home/mrh/code/excel_tool/temp/测试.csv.utf8.csv', header_row=1)
|
|
|
df = processor.insert_column(df, 'B', 'New Column')
|
|
|
processor.save_file(df, '/home/mrh/code/excel_tool/temp/测试_process.csv')
|