translate_utils.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. import os
  2. import logging
  3. from typing import List, Tuple
  4. from mylib.pdfzh_translator import OpenAITranslator
  5. logger = logging.getLogger(__name__)
  6. def insert_empty_columns(data: List[List[str]], column_indices: List[int]) -> List[List[str]]:
  7. """在指定列之后插入空列"""
  8. try:
  9. # 按从大到小排序,防止插入影响后续索引
  10. column_indices.sort(reverse=True)
  11. for row in data:
  12. for index in column_indices:
  13. row.insert(index + 1, '')
  14. return data
  15. except Exception as e:
  16. logger.error(f"Error inserting empty columns: {e}")
  17. raise
  18. def extract_sample_data(data: List[List[str]], n: int = 2, m: int = 2) -> List[List[str]]:
  19. """提取前n行m列数据用于检查"""
  20. try:
  21. sample = []
  22. for row in data[:n]:
  23. sample.append(row[:m])
  24. return sample
  25. except Exception as e:
  26. logger.error(f"Error extracting sample data: {e}")
  27. raise
  28. def process_batch_translations(data: List[List[str]],
  29. search_term_index: int,
  30. category_indices: List[int],
  31. start_row: int = 3) -> Tuple[List[List[str]], List[List[str]]]:
  32. """批量处理翻译"""
  33. try:
  34. # 首先提取样本数据用于检查
  35. sample_data = extract_sample_data(data)
  36. logger.info(f"Sample data extracted for inspection:\n{sample_data}")
  37. # 初始化翻译器
  38. translator = OpenAITranslator()
  39. # 直接提取需要翻译的搜索词
  40. search_terms = [row[search_term_index] for row in data[start_row-1:]]
  41. # 直接提取需要翻译的类别
  42. categories = []
  43. for index in category_indices:
  44. categories.extend([row[index] for row in data[start_row-1:]])
  45. # 批量翻译
  46. logger.info("Starting batch translations...")
  47. if os.getenv('DEBUG', '').lower() in ('true', '1', 't'):
  48. # DEBUG模式:使用模拟翻译
  49. search_translations = [f"{text} 翻译测试" for text in search_terms]
  50. category_translations = [f"{text} 翻译测试" for text in categories]
  51. else:
  52. # 正常模式:调用真实翻译
  53. search_translations = translator.translate(search_terms)
  54. category_translations = translator.translate(categories)
  55. logger.info("Batch translations completed")
  56. # 更新数据
  57. for i, row in enumerate(data[start_row-1:], start=start_row-1):
  58. try:
  59. # 更新搜索词翻译列
  60. row[search_term_index + 1] = search_translations[i-(start_row-1)]
  61. # 更新类别翻译
  62. category_trans_index = (i-(start_row-1)) * len(category_indices)
  63. for cat_index in category_indices:
  64. row[cat_index + 1] = category_translations[category_trans_index]
  65. category_trans_index += 1
  66. except Exception as e:
  67. logger.error(f"Error processing row {i}: {e}")
  68. raise
  69. return data, sample_data
  70. except Exception as e:
  71. logger.error(f"Error in batch translation: {e}")
  72. raise
  73. def main():
  74. pass
  75. if __name__ == "__main__":
  76. main()