| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- import pandas as pd
- from dotenv import load_dotenv
- from mylib.pdfzh_translator import OpenAITranslator
- import time
- import logging
- # 配置日志
- logging.basicConfig(level=logging.INFO)
- logger = logging.getLogger(__name__)
- def translate_csv(input_file, chunk_size=100):
- # 加载环境变量
- load_dotenv()
- # 读取CSV文件
- df = pd.read_csv(input_file)
- # 在B列右边插入一列空列
- df.insert(2, 'C', '')
- # 初始化翻译器
- translator = OpenAITranslator()
- # 分块处理数据
- total_rows = len(df)
- start_row = 1 # 从第二行开始(跳过标题)
-
- while start_row < total_rows:
- end_row = min(start_row + chunk_size, total_rows)
- logger.info(f"Processing rows {start_row} to {end_row}")
-
- try:
- # 获取当前块的数据
- data_to_translate = df.iloc[start_row:end_row, 1].tolist()
-
- # 批量翻译
- translated_texts = translator._batch_translate(data_to_translate)
-
- # 将翻译结果写入C列
- df.iloc[start_row:end_row, 2] = translated_texts
-
- # 保存中间结果
- output_file = input_file.replace('.csv', '_translated.csv')
- df.to_csv(output_file, index=False)
-
- # 更新起始行
- start_row = end_row
-
- # 添加延迟以避免速率限制
- time.sleep(1)
-
- except Exception as e:
- logger.error(f"Error processing rows {start_row}-{end_row}: {str(e)}")
- logger.info("Retrying after 5 seconds...")
- time.sleep(5)
- logger.info("Translation completed successfully")
- if __name__ == '__main__':
- input_file = '/home/mrh/code/excel_tool/temp/测试.csv.utf8.csv'
- translate_csv(input_file)
|