|
|
@@ -29,13 +29,14 @@ def column_letter_to_index(col_letter: str) -> int:
|
|
|
logger.error(f"列字母转换时出错: {e}")
|
|
|
raise
|
|
|
|
|
|
-def extract_column_data(df: pd.DataFrame, column_identifier: Union[str, int], start_row: int = 2) -> pd.Series:
|
|
|
+def extract_column_data(df: pd.DataFrame, column_identifier: Union[str, int], start_row: int = 2, header_row: int = 1) -> pd.Series:
|
|
|
"""提取指定列的数据,默认从第3行开始
|
|
|
|
|
|
Args:
|
|
|
df: pandas DataFrame
|
|
|
column_identifier: 要提取的列名或列号(从0开始),也可以是列字母(如 'A', 'B')
|
|
|
start_row: 开始提取的行号,默认为2(第3行)
|
|
|
+ header_row: 标题行号,默认为1(第2行)
|
|
|
|
|
|
Returns:
|
|
|
包含指定列数据的Series
|
|
|
@@ -70,7 +71,7 @@ def extract_column_data(df: pd.DataFrame, column_identifier: Union[str, int], st
|
|
|
logger.error(f"提取列数据时出错: {e}")
|
|
|
raise
|
|
|
|
|
|
-def insert_empty_columns(df: pd.DataFrame, column_names: List[Union[str, int]]) -> pd.DataFrame:
|
|
|
+def insert_empty_columns(df: pd.DataFrame, column_names: List[Union[str, int]], header_row: int = 1) -> pd.DataFrame:
|
|
|
"""在指定列之后插入空列"""
|
|
|
try:
|
|
|
# 按从大到小排序,防止插入影响后续索引
|
|
|
@@ -95,7 +96,7 @@ def insert_empty_columns(df: pd.DataFrame, column_names: List[Union[str, int]])
|
|
|
logger.error(f"插入空列时出错: {e}")
|
|
|
raise
|
|
|
|
|
|
-def extract_sample_data(df: pd.DataFrame, start_row: int = 0, column_name: str = None, n: int = 3) -> pd.DataFrame:
|
|
|
+def extract_sample_data(df: pd.DataFrame, start_row: int = 0, column_name: str = None, n: int = 3, header_row: int = 1) -> pd.DataFrame:
|
|
|
"""提取指定行和列开始的样本数据"""
|
|
|
try:
|
|
|
# 确保不超过数据范围
|
|
|
@@ -108,7 +109,7 @@ def extract_sample_data(df: pd.DataFrame, start_row: int = 0, column_name: str =
|
|
|
logger.error(f"提取样本数据时出错: {e}")
|
|
|
raise
|
|
|
|
|
|
-def log_data_details(df: pd.DataFrame, search_term_col: str, start_row: int = 2):
|
|
|
+def log_data_details(df: pd.DataFrame, search_term_col: str, start_row: int = 2, header_row: int = 1):
|
|
|
"""记录数据详细信息"""
|
|
|
try:
|
|
|
# 记录行号和列号
|
|
|
@@ -125,15 +126,15 @@ def log_data_details(df: pd.DataFrame, search_term_col: str, start_row: int = 2)
|
|
|
|
|
|
def process_batch_translations(df: pd.DataFrame,
|
|
|
search_term_col: str,
|
|
|
- start_row: int = 2) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
|
+ start_row: int = 2, header_row: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
|
"""批量处理搜索词翻译"""
|
|
|
try:
|
|
|
# 首先提取样本数据用于检查
|
|
|
- sample_data = extract_sample_data(df, start_row, search_term_col)
|
|
|
+ sample_data = extract_sample_data(df, start_row, search_term_col, header_row=header_row)
|
|
|
logger.info(f"从第{start_row}行{search_term_col}列开始的样本数据:\n{sample_data}")
|
|
|
|
|
|
# 记录数据详细信息
|
|
|
- log_data_details(df, search_term_col, start_row)
|
|
|
+ log_data_details(df, search_term_col, start_row, header_row)
|
|
|
|
|
|
# 初始化翻译器
|
|
|
translator = OpenAITranslator()
|
|
|
@@ -172,10 +173,10 @@ def main():
|
|
|
df = pd.DataFrame(data[1:], columns=data[0])
|
|
|
|
|
|
# 提取列数据
|
|
|
- extract_column_data(df, 'B', start_row=2) # 示例:从第3行开始提取第2列(即'B'列)的数据
|
|
|
+ extract_column_data(df, 'B', start_row=2, header_row=1) # 示例:从第3行开始提取第2列(即'B'列)的数据
|
|
|
|
|
|
# 插入空列
|
|
|
- df = insert_empty_columns(df, ['B']) # 示例:在'B'列后插入空列
|
|
|
+ df = insert_empty_columns(df, ['B'], header_row=1) # 示例:在'B'列后插入空列
|
|
|
|
|
|
# 处理翻译
|
|
|
# df, _ = process_batch_translations(df, '搜索词')
|