| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- import os
- import logging
- from typing import List, Tuple
- from mylib.pdfzh_translator import OpenAITranslator
- logger = logging.getLogger(__name__)
- def insert_empty_columns(data: List[List[str]], column_indices: List[int]) -> List[List[str]]:
- """在指定列之后插入空列"""
- try:
- # 按从大到小排序,防止插入影响后续索引
- column_indices.sort(reverse=True)
- for row in data:
- for index in column_indices:
- row.insert(index + 1, '')
- return data
- except Exception as e:
- logger.error(f"Error inserting empty columns: {e}")
- raise
- def extract_sample_data(data: List[List[str]], n: int = 2, m: int = 2) -> List[List[str]]:
- """提取前n行m列数据用于检查"""
- try:
- sample = []
- for row in data[:n]:
- sample.append(row[:m])
- return sample
- except Exception as e:
- logger.error(f"Error extracting sample data: {e}")
- raise
- def process_batch_translations(data: List[List[str]],
- search_term_index: int,
- category_indices: List[int],
- start_row: int = 3) -> Tuple[List[List[str]], List[List[str]]]:
- """批量处理翻译"""
- try:
- # 首先提取样本数据用于检查
- sample_data = extract_sample_data(data)
- logger.info(f"Sample data extracted for inspection:\n{sample_data}")
-
- # 初始化翻译器
- translator = OpenAITranslator()
-
- # 直接提取需要翻译的搜索词
- search_terms = [row[search_term_index] for row in data[start_row-1:]]
-
- # 直接提取需要翻译的类别
- categories = []
- for index in category_indices:
- categories.extend([row[index] for row in data[start_row-1:]])
-
- # 批量翻译
- logger.info("Starting batch translations...")
-
- if os.getenv('DEBUG', '').lower() in ('true', '1', 't'):
- # DEBUG模式:使用模拟翻译
- search_translations = [f"{text} 翻译测试" for text in search_terms]
- category_translations = [f"{text} 翻译测试" for text in categories]
- else:
- # 正常模式:调用真实翻译
- search_translations = translator.translate(search_terms)
- category_translations = translator.translate(categories)
-
- logger.info("Batch translations completed")
-
- # 更新数据
- for i, row in enumerate(data[start_row-1:], start=start_row-1):
- try:
- # 更新搜索词翻译列
- row[search_term_index + 1] = search_translations[i-(start_row-1)]
-
- # 更新类别翻译
- category_trans_index = (i-(start_row-1)) * len(category_indices)
- for cat_index in category_indices:
- row[cat_index + 1] = category_translations[category_trans_index]
- category_trans_index += 1
-
- except Exception as e:
- logger.error(f"Error processing row {i}: {e}")
- raise
-
- return data, sample_data
- except Exception as e:
- logger.error(f"Error in batch translation: {e}")
- raise
- def main():
- pass
- if __name__ == "__main__":
- main()
|