Parcourir la source

feat (translator): add supports for DeepLX

Rongxin il y a 1 an
Parent
commit
88b54565d9
2 fichiers modifiés avec 67 ajouts et 6 suppressions
  1. 5 0
      pdf2zh/converter.py
  2. 62 6
      pdf2zh/translator.py

+ 5 - 0
pdf2zh/converter.py

@@ -23,6 +23,7 @@ from pdf2zh.translator import (
     BaseTranslator,
     GoogleTranslator,
     DeepLTranslator,
+    DeepLXTranslator,
     OllamaTranslator,
     OpenAITranslator,
     AzureTranslator,
@@ -375,6 +376,10 @@ class TextConverter(PDFConverter[AnyIO]):
             self.translator: BaseTranslator = DeepLTranslator(
                 service, lang_out, lang_in, None
             )
+        elif param[0] == "deeplx":
+            self.translator: BaseTranslator = DeepLXTranslator(
+                service, lang_out, lang_in, None
+            )
         elif param[0] == "ollama":
             self.translator: BaseTranslator = OllamaTranslator(
                 service, lang_out, lang_in, param[1]

+ 62 - 6
pdf2zh/translator.py

@@ -1,11 +1,13 @@
 import html
-import re
+import logging
 import os
+import re
+from json import dumps, loads
+
+import deepl
 import ollama
-import logging
-import requests
 import openai
-import deepl
+import requests
 from azure.ai.translation.text import TextTranslationClient
 from azure.core.credentials import AzureKeyCredential
 
@@ -29,8 +31,8 @@ class BaseTranslator:
 
 class GoogleTranslator(BaseTranslator):
     def __init__(self, service, lang_out, lang_in, model):
-        lang_out='zh-CN' if lang_out=='auto' else lang_out
-        lang_in='en' if lang_in=='auto' else lang_in
+        lang_out = "zh-CN" if lang_out == "auto" else lang_out
+        lang_in = "en" if lang_in == "auto" else lang_in
         super().__init__(service, lang_out, lang_in, model)
         self.session = requests.Session()
         self.base_link = "http://translate.google.com/m"
@@ -55,6 +57,60 @@ class GoogleTranslator(BaseTranslator):
         return result
 
 
+class DeepLXTranslator(BaseTranslator):
+    def __init__(self, service, lang_out, lang_in, model):
+        lang_out = "zh" if lang_out == "auto" else lang_out
+        lang_in = "en" if lang_in == "auto" else lang_in
+        super().__init__(service, lang_out, lang_in, model)
+        try:
+            auth_key = os.getenv("DEEPLX_AUTH_KEY")
+            server_url = (
+                "https://api.deeplx.org"
+                if not os.getenv("DEEPLX_SERVER_URL")
+                else os.getenv("DEEPLX_SERVER_URL")
+            )
+        except KeyError as e:
+            missing_var = e.args[0]
+            raise ValueError(
+                f"The environment variable '{missing_var}' is required but not set."
+            ) from e
+
+        self.session = requests.Session()
+        self.base_link = f"{server_url}/{auth_key}/translate"
+        self.headers = {
+            "User-Agent": "Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727;.NET CLR 3.0.04506.30)"
+        }
+
+    def translate(self, text):
+        text = text[:5000]  # google translate max length
+        response = self.session.post(
+            self.base_link,
+            dumps(
+                {
+                    "target_lang": self.lang_out,
+                    "text": text,
+                }
+            ),
+            headers=self.headers,
+        )
+        # 1. Status code test
+        if response.status_code == 200:
+            result = loads(response.text)
+        else:
+            raise ValueError("HTTP error: " + str(response.status_code))
+        # 2. Result test
+        try:
+            result = result["data"]
+            return result
+        except KeyError:
+            result = ""
+            raise ValueError("No valid key in DeepLX's response")
+        # 3. Result length check
+        if len(result) == 0:
+            raise ValueError("Empty translation result")
+        return result
+
+
 class DeepLTranslator(BaseTranslator):
     def __init__(self, service, lang_out, lang_in, model):
         lang_out='ZH' if lang_out=='auto' else lang_out