Parcourir la source

feat: add search_headers method to ExcelProcessor for keyword search in headers

mrh (aider) il y a 1 an
Parent
commit
764ecd3a50
1 fichiers modifiés avec 47 ajouts et 1 suppressions
  1. 47 1
      function_calling/excel_processor.py

+ 47 - 1
function_calling/excel_processor.py

@@ -1,5 +1,5 @@
 import pandas as pd
-from typing import List, Optional
+from typing import List, Optional, Union, Tuple
 import logging
 from mylib.logging_config import setup_logging
 
@@ -23,6 +23,48 @@ class ExcelProcessor:
             index = index * 26 + (ord(char) - ord('A') + 1)
         return index - 1
 
+    def _index_to_column_letter(self, index: int) -> str:
+        """将列索引转换为Excel列字母(0=A, 1=B,...)"""
+        if index < 0:
+            raise ValueError("列索引不能为负数")
+        letters = []
+        while index >= 0:
+            letters.append(chr(ord('A') + (index % 26)))
+            index = index // 26 - 1
+        return ''.join(reversed(letters))
+
+    def search_headers(self, keywords: Union[str, List[str]]) -> List[Tuple[str, str]]:
+        """
+        从表头搜索关键词,返回匹配的列名和列字母
+        
+        Args:
+            keywords (Union[str, List[str]]): 要搜索的关键词或关键词列表
+            
+        Returns:
+            List[Tuple[str, str]]: 匹配的列名和列字母列表,格式为[(列名, 列字母), ...]
+        """
+        try:
+            # 如果输入是单个字符串,转换为列表
+            if isinstance(keywords, str):
+                keywords = [keywords]
+                
+            # 获取所有列名
+            columns = self.df.columns.tolist()
+            
+            # 查找匹配的列
+            matches = []
+            for idx, col in enumerate(columns):
+                if any(keyword.lower() in str(col).lower() for keyword in keywords):
+                    col_letter = self._index_to_column_letter(idx)
+                    matches.append((col, col_letter))
+                    
+            logger.info(f"搜索关键词 {keywords} 找到 {len(matches)} 个匹配列")
+            return matches
+            
+        except Exception as e:
+            logger.error(f"搜索表头失败: {str(e)}")
+            raise
+
     def read_file(self, file_path: str, header_row: Optional[int] = 0) -> pd.DataFrame:
         """
         读取文件并返回DataFrame
@@ -105,4 +147,8 @@ if __name__ == '__main__':
     # 测试CSV文件
     processor.insert_column('B', '翻译结果')
     
+    # 测试搜索表头
+    matches = processor.search_headers(['名称', '描述'])
+    print(f"找到的匹配列: {matches}")
+    
     # processor.save_file(df, '/home/mrh/code/excel_tool/temp/测试_process.csv')