Browse Source

feat: Add header_row parameter to specify title row in CSV processing functions

mrh (aider) 1 year ago
parent
commit
5946e895de
1 changed files with 10 additions and 9 deletions
  1. 10 9
      mylib/translate_utils.py

+ 10 - 9
mylib/translate_utils.py

@@ -29,13 +29,14 @@ def column_letter_to_index(col_letter: str) -> int:
         logger.error(f"列字母转换时出错: {e}")
         raise
 
-def extract_column_data(df: pd.DataFrame, column_identifier: Union[str, int], start_row: int = 2) -> pd.Series:
+def extract_column_data(df: pd.DataFrame, column_identifier: Union[str, int], start_row: int = 2, header_row: int = 1) -> pd.Series:
     """提取指定列的数据,默认从第3行开始
     
     Args:
         df: pandas DataFrame
         column_identifier: 要提取的列名或列号(从0开始),也可以是列字母(如 'A', 'B')
         start_row: 开始提取的行号,默认为2(第3行)
+        header_row: 标题行号,默认为1(第2行)
     
     Returns:
         包含指定列数据的Series
@@ -70,7 +71,7 @@ def extract_column_data(df: pd.DataFrame, column_identifier: Union[str, int], st
         logger.error(f"提取列数据时出错: {e}")
         raise
 
-def insert_empty_columns(df: pd.DataFrame, column_names: List[Union[str, int]]) -> pd.DataFrame:
+def insert_empty_columns(df: pd.DataFrame, column_names: List[Union[str, int]], header_row: int = 1) -> pd.DataFrame:
     """在指定列之后插入空列"""
     try:
         # 按从大到小排序,防止插入影响后续索引
@@ -95,7 +96,7 @@ def insert_empty_columns(df: pd.DataFrame, column_names: List[Union[str, int]])
         logger.error(f"插入空列时出错: {e}")
         raise
 
-def extract_sample_data(df: pd.DataFrame, start_row: int = 0, column_name: str = None, n: int = 3) -> pd.DataFrame:
+def extract_sample_data(df: pd.DataFrame, start_row: int = 0, column_name: str = None, n: int = 3, header_row: int = 1) -> pd.DataFrame:
     """提取指定行和列开始的样本数据"""
     try:
         # 确保不超过数据范围
@@ -108,7 +109,7 @@ def extract_sample_data(df: pd.DataFrame, start_row: int = 0, column_name: str =
         logger.error(f"提取样本数据时出错: {e}")
         raise
 
-def log_data_details(df: pd.DataFrame, search_term_col: str, start_row: int = 2):
+def log_data_details(df: pd.DataFrame, search_term_col: str, start_row: int = 2, header_row: int = 1):
     """记录数据详细信息"""
     try:
         # 记录行号和列号
@@ -125,15 +126,15 @@ def log_data_details(df: pd.DataFrame, search_term_col: str, start_row: int = 2)
 
 def process_batch_translations(df: pd.DataFrame, 
                              search_term_col: str,
-                             start_row: int = 2) -> Tuple[pd.DataFrame, pd.DataFrame]:
+                             start_row: int = 2, header_row: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """批量处理搜索词翻译"""
     try:
         # 首先提取样本数据用于检查
-        sample_data = extract_sample_data(df, start_row, search_term_col)
+        sample_data = extract_sample_data(df, start_row, search_term_col, header_row=header_row)
         logger.info(f"从第{start_row}行{search_term_col}列开始的样本数据:\n{sample_data}")
         
         # 记录数据详细信息
-        log_data_details(df, search_term_col, start_row)
+        log_data_details(df, search_term_col, start_row, header_row)
         
         # 初始化翻译器
         translator = OpenAITranslator()
@@ -172,10 +173,10 @@ def main():
     df = pd.DataFrame(data[1:], columns=data[0])
     
     # 提取列数据
-    extract_column_data(df, 'B', start_row=2)  # 示例:从第3行开始提取第2列(即'B'列)的数据
+    extract_column_data(df, 'B', start_row=2, header_row=1)  # 示例:从第3行开始提取第2列(即'B'列)的数据
     
     # 插入空列
-    df = insert_empty_columns(df, ['B'])  # 示例:在'B'列后插入空列
+    df = insert_empty_columns(df, ['B'], header_row=1)  # 示例:在'B'列后插入空列
     
     # 处理翻译
     # df, _ = process_batch_translations(df, '搜索词')