Byaidu před 1 rokem
rodič
revize
f631a5014d
3 změnil soubory, kde provedl 19 přidání a 16 odebrání
  1. 4 1
      pdf2zh/converter.py
  2. 2 0
      pdf2zh/gui.py
  3. 13 15
      pdf2zh/translator.py

+ 4 - 1
pdf2zh/converter.py

@@ -19,6 +19,7 @@ from pdf2zh import cache
 from pdf2zh.translator import (
     BaseTranslator,
     GoogleTranslator,
+    BingTranslator,
     DeepLTranslator,
     DeepLXTranslator,
     OllamaTranslator,
@@ -138,7 +139,7 @@ class TranslateConverter(PDFConverterEx):
         param = service.split(":", 1)
         service_name = param[0]
         service_model = param[1] if len(param) > 1 else None
-        for translator in [GoogleTranslator, DeepLTranslator, DeepLXTranslator, OllamaTranslator, OpenAITranslator, AzureTranslator, TencentTranslator]:
+        for translator in [GoogleTranslator, BingTranslator, DeepLTranslator, DeepLXTranslator, OllamaTranslator, OpenAITranslator, AzureTranslator, TencentTranslator]:
             if service_name == translator.name:
                 self.translator = translator(service, lang_out, lang_in, service_model)
         if not self.translator:
@@ -320,6 +321,8 @@ class TranslateConverter(PDFConverterEx):
 
         @retry(wait=wait_fixed(1))
         def worker(s: str):  # 多线程翻译
+            if re.match(r"^\$v\d+\$$", s):  # 公式不翻译
+                return s
             try:
                 hash_key_paragraph = cache.deterministic_hash(
                     (s, str(self.translator))

+ 2 - 0
pdf2zh/gui.py

@@ -6,6 +6,7 @@ from pdf2zh.pdf2zh import extract_text
 from pdf2zh.translator import (
     BaseTranslator,
     GoogleTranslator,
+    BingTranslator,
     DeepLTranslator,
     DeepLXTranslator,
     OllamaTranslator,
@@ -25,6 +26,7 @@ import cgi
 # five value, padding with None
 service_map: dict[str, BaseTranslator] = {
     "Google": GoogleTranslator,
+    "Bing": BingTranslator,
     "DeepL": DeepLTranslator,
     "DeepLX": DeepLXTranslator,
     "Ollama": OllamaTranslator,

+ 13 - 15
pdf2zh/translator.py

@@ -85,33 +85,31 @@ class BingTranslator(BaseTranslator):
     name = "bing"
 
     def __init__(self, service, lang_out, lang_in, model):
-        lang_out = "zh" if lang_out == "auto" else lang_out
+        lang_out = "zh-Hans" if lang_out == "auto" else lang_out
         lang_in = "en" if lang_in == "auto" else lang_in
         super().__init__(service, lang_out, lang_in, model)
         self.session = requests.Session()
-        self.endpoint = "https://www.bing.com/ttranslatev3?isVertical=1"
+        self.endpoint = "https://www.bing.com/ttranslatev3"
+        self.headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",  # noqa: E501
+        }
 
     def fineSID(self):
         resp = self.session.get("https://www.bing.com/translator")
-        result = re.findall(
+        ig = re.findall(r"\"ig\":\"(.*?)\"", resp.text)[0]
+        iid = re.findall(r"data-iid=\"(.*?)\"", resp.text)[-1]
+        key, token = re.findall(
             r"params_AbusePreventionHelper\s=\s\[(.*?),\"(.*?)\",", resp.text
         )[0]
-        return result
+        return ig, iid, key, token
 
     def translate(self, text):
-        sid = self.fineSID()
+        ig, iid, key, token = self.fineSID()
         resp = self.session.post(
-            self.endpoint,
-            data={
-                "fromLang": self.lang_in,
-                "text": text,
-                "to": self.lang_out,
-                "tryFetchingGenderDebiasedTranslations": True,
-                "token": sid[1],
-                "key": sid[0],
-            },
+            f"{self.endpoint}?IG={ig}&IID={iid}",
+            data={"fromLang": self.lang_in, "to": self.lang_out, "text": text, "token": token, "key": key},
+            headers=self.headers,
         )
-        print(resp.json())
         return resp.json()[0]["translations"][0]["text"]