瀏覽代碼

feat: add header_row parameter to specify table header row in read_file

mrh (aider) 1 年之前
父節點
當前提交
c68ddb7eab
共有 1 個文件被更改,包括 11 次插入7 次删除
  1. 11 7
      function_calling/excel_processor.py

+ 11 - 7
function_calling/excel_processor.py

@@ -1,5 +1,5 @@
 import pandas as pd
-from typing import List
+from typing import List, Optional
 import logging
 from mylib.logging_config import setup_logging
 
@@ -21,19 +21,23 @@ class ExcelProcessor:
             index = index * 26 + (ord(char) - ord('A') + 1)
         return index - 1
 
-    def read_file(self, file_path: str) -> pd.DataFrame:
+    def read_file(self, file_path: str, header_row: Optional[int] = 0) -> pd.DataFrame:
         """
         读取文件并返回DataFrame
         支持Excel和CSV文件
+        
+        Args:
+            file_path (str): 文件路径
+            header_row (int, optional): 表头所在行号,从0开始计数. Defaults to 0.
         """
         try:
             if file_path.endswith('.csv'):
-                df = pd.read_csv(file_path)
-                logger.info(f"成功读取CSV文件: {file_path}")
+                df = pd.read_csv(file_path, header=header_row)
+                logger.info(f"成功读取CSV文件: {file_path}, 表头行: {header_row}")
             else:
                 # 对于Excel文件,指定engine参数
-                df = pd.read_excel(file_path, engine='openpyxl')
-                logger.info(f"成功读取Excel文件: {file_path}")
+                df = pd.read_excel(file_path, engine='openpyxl', header=header_row)
+                logger.info(f"成功读取Excel文件: {file_path}, 表头行: {header_row}")
             return df
         except Exception as e:
             logger.error(f"读取文件失败: {file_path}, 错误: {str(e)}")
@@ -95,6 +99,6 @@ if __name__ == '__main__':
     # 测试代码
     processor = ExcelProcessor()
     # 测试CSV文件
-    df = processor.read_file('/home/mrh/code/excel_tool/temp/测试.csv.utf8.csv')
+    df = processor.read_file('/home/mrh/code/excel_tool/temp/测试.csv.utf8.csv', header_row=1)
     df = processor.insert_column(df, 'B', 'New Column')
     processor.save_file(df, '/home/mrh/code/excel_tool/temp/测试_process.csv')