# 改编自: https://github.com/Byaidu/PDFMathTranslate/blob/main/pdf2zh/translator.py import html import logging import os import re from json import dumps, loads from dotenv import load_dotenv import openai import requests # Load environment variables load_dotenv() class BaseTranslator: def __init__(self, service, lang_out, lang_in, model): self.service = service self.lang_out = lang_out self.lang_in = lang_in self.model = model def translate(self, text) -> str: ... # noqa: E704 def __str__(self): return f"{self.service} {self.lang_out} {self.lang_in}" class GoogleTranslator(BaseTranslator): def __init__(self, service, lang_out, lang_in, model): lang_out = "zh-CN" if lang_out == "auto" else lang_out lang_in = "en" if lang_in == "auto" else lang_in super().__init__(service, lang_out, lang_in, model) self.session = requests.Session() self.base_link = "http://translate.google.com/m" self.headers = { "User-Agent": "Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727;.NET CLR 3.0.04506.30)" # noqa: E501 } def translate(self, text): text = text[:5000] # google translate max length response = self.session.get( self.base_link, params={"tl": self.lang_out, "sl": self.lang_in, "q": text}, headers=self.headers, ) re_result = re.findall( r'(?s)class="(?:t0|result-container)">(.*?)<', response.text ) if response.status_code == 400: result = "IRREPARABLE TRANSLATION ERROR" elif len(re_result) == 0: raise ValueError("Empty translation result") else: result = html.unescape(re_result[0]) return result class OpenAITranslator(BaseTranslator): def __init__(self, service, lang_out, lang_in, model): lang_out = "zh-CN" if lang_out == "auto" else lang_out lang_in = "en" if lang_in == "auto" else lang_in super().__init__(service, lang_out, lang_in, model) self.options = {"temperature": 0} # 随机采样可能会打断公式标记 # Configure OpenAI client with environment variables self.client = openai.OpenAI( api_key=os.getenv('OPENAI_API_KEY') ) def translate(self, text) -> str: response = self.client.chat.completions.create( model=os.getenv('LLM_MODEL', self.model), # Use env var or fallback to default **self.options, messages=[ { "role": "system", "content": "You are a professional,authentic machine translation engine.", }, { "role": "user", "content": f"Translate the following markdown source text to {self.lang_out}. Keep the formula notation $v*$ unchanged. Output translation directly without any additional text.\nSource Text: {text}\nTranslated Text:", # noqa: E501 }, ], ) return response.choices[0].message.content.strip()