Browse Source

chore: check http status

Byaidu 1 year ago
parent
commit
b8817ffe21
4 changed files with 41 additions and 38 deletions
  1. 1 1
      README.md
  2. 1 1
      README_zh-CN.md
  3. 1 1
      pdf2zh/converter.py
  4. 38 35
      pdf2zh/translator.py

+ 1 - 1
README.md

@@ -162,7 +162,7 @@ In the following table, we list all advanced options for reference:
 | `-t`  | [Multi-threads](#threads) | `pdf2zh example.pdf -t 1` |
 | `-o`  | Output dir | `pdf2zh example.pdf -o output` |
 | `-f`, `-c` | [Exceptions](#exceptions) | `pdf2zh example.pdf -f "(MS.*)"` |
-| `--share` | get gradio public link | `pdf2zh -i --share` |
+| `--share` | Get gradio public link | `pdf2zh -i --share` |
 
 <h3 id="partial">Full / partial document translation</h3>
 

+ 1 - 1
README_zh-CN.md

@@ -162,7 +162,7 @@
 | `-t`  | [多线程](#threads) | `pdf2zh example.pdf -t 1` |
 | `-o`  | 输出目录 | `pdf2zh example.pdf -o output` |
 | `-f`, `-c` | [例外规则](#exceptions) | `pdf2zh example.pdf -f "(MS.*)"` |
-| `--share` | 获取gradio公开链接 | `pdf2zh -i --share` |
+| `--share` | 获取 gradio 公开链接 | `pdf2zh -i --share` |
 
 <h3 id="partial">全文或部分文档翻译</h3>
 

+ 1 - 1
pdf2zh/converter.py

@@ -144,7 +144,7 @@ class TranslateConverter(PDFConverterEx):
         for translator in [GoogleTranslator, BingTranslator, DeepLTranslator, DeepLXTranslator, OllamaTranslator, AzureOpenAITranslator,
                            OpenAITranslator, ZhipuTranslator, SiliconTranslator, AzureTranslator, TencentTranslator]:
             if service_name == translator.name:
-                self.translator = translator(lang_out, lang_in, service_model)
+                self.translator = translator(lang_in, lang_out, service_model)
         if not self.translator:
             raise ValueError("Unsupported translation service")
 

+ 38 - 35
pdf2zh/translator.py

@@ -25,11 +25,11 @@ class BaseTranslator:
     envs = {}
     lang_map = {}
 
-    def __init__(self, lang_out: str, lang_in: str, model):
-        lang_out = self.lang_map.get(lang_out.lower(), lang_out)
+    def __init__(self, lang_in, lang_out, model):
         lang_in = self.lang_map.get(lang_in.lower(), lang_in)
-        self.lang_out = lang_out
+        lang_out = self.lang_map.get(lang_out.lower(), lang_out)
         self.lang_in = lang_in
+        self.lang_out = lang_out
         self.model = model
 
     def translate(self, text):
@@ -48,15 +48,15 @@ class BaseTranslator:
         ]
 
     def __str__(self):
-        return f"{self.name} {self.lang_out} {self.lang_in} {self.model}"
+        return f"{self.name} {self.lang_in} {self.lang_out} {self.model}"
 
 
 class GoogleTranslator(BaseTranslator):
     name = "google"
     lang_map = {"zh": "zh-CN"}
 
-    def __init__(self, lang_out, lang_in, model):
-        super().__init__(lang_out, lang_in, model)
+    def __init__(self, lang_in, lang_out, model):
+        super().__init__(lang_in, lang_out, model)
         self.session = requests.Session()
         self.endpoint = "http://translate.google.com/m"
         self.headers = {
@@ -76,6 +76,7 @@ class GoogleTranslator(BaseTranslator):
         if response.status_code == 400:
             result = "IRREPARABLE TRANSLATION ERROR"
         else:
+            response.raise_for_status()
             result = html.unescape(re_result[0])
         return remove_control_characters(result)
 
@@ -85,8 +86,8 @@ class BingTranslator(BaseTranslator):
     name = "bing"
     lang_map = {"zh": "zh-Hans"}
 
-    def __init__(self, lang_out, lang_in, model):
-        super().__init__(lang_out, lang_in, model)
+    def __init__(self, lang_in, lang_out, model):
+        super().__init__(lang_in, lang_out, model)
         self.session = requests.Session()
         self.endpoint = "https://www.bing.com/ttranslatev3"
         self.headers = {
@@ -94,18 +95,19 @@ class BingTranslator(BaseTranslator):
         }
 
     def fineSID(self):
-        resp = self.session.get("https://www.bing.com/translator")
-        ig = re.findall(r"\"ig\":\"(.*?)\"", resp.text)[0]
-        iid = re.findall(r"data-iid=\"(.*?)\"", resp.text)[-1]
+        response = self.session.get("https://www.bing.com/translator")
+        response.raise_for_status()
+        ig = re.findall(r"\"ig\":\"(.*?)\"", response.text)[0]
+        iid = re.findall(r"data-iid=\"(.*?)\"", response.text)[-1]
         key, token = re.findall(
-            r"params_AbusePreventionHelper\s=\s\[(.*?),\"(.*?)\",", resp.text
+            r"params_AbusePreventionHelper\s=\s\[(.*?),\"(.*?)\",", response.text
         )[0]
         return ig, iid, key, token
 
     def translate(self, text):
         text = text[:1000]  # bing translate max length
         ig, iid, key, token = self.fineSID()
-        resp = self.session.post(
+        response = self.session.post(
             f"{self.endpoint}?IG={ig}&IID={iid}",
             data={
                 "fromLang": self.lang_in,
@@ -116,7 +118,8 @@ class BingTranslator(BaseTranslator):
             },
             headers=self.headers,
         )
-        return resp.json()[0]["translations"][0]["text"]
+        response.raise_for_status()
+        return response.json()[0]["translations"][0]["text"]
 
 
 class DeepLTranslator(BaseTranslator):
@@ -128,9 +131,8 @@ class DeepLTranslator(BaseTranslator):
     }
     lang_map = {"zh": "zh-Hans"}
 
-    def __init__(self, lang_out, lang_in, model):
-        super().__init__(lang_out, lang_in, model)
-        self.session = requests.Session()
+    def __init__(self, lang_in, lang_out, model):
+        super().__init__(lang_in, lang_out, model)
         server_url = os.getenv("DEEPL_SERVER_URL", self.envs["DEEPL_SERVER_URL"])
         auth_key = os.getenv("DEEPL_AUTH_KEY")
         self.client = deepl.Translator(auth_key, server_url=server_url)
@@ -150,13 +152,13 @@ class DeepLXTranslator(BaseTranslator):
     }
     lang_map = {"zh": "zh-Hans"}
 
-    def __init__(self, lang_out, lang_in, model):
-        super().__init__(lang_out, lang_in, model)
+    def __init__(self, lang_in, lang_out, model):
+        super().__init__(lang_in, lang_out, model)
         self.endpoint = os.getenv("DEEPLX_ENDPOINT", self.envs["DEEPLX_ENDPOINT"])
         self.session = requests.Session()
 
     def translate(self, text):
-        resp = self.session.post(
+        response = self.session.post(
             self.endpoint,
             json={
                 "source_lang": self.lang_in,
@@ -164,7 +166,8 @@ class DeepLXTranslator(BaseTranslator):
                 "text": text,
             },
         )
-        return resp.json()["data"]
+        response.raise_for_status()
+        return response.json()["data"]
 
 
 class OllamaTranslator(BaseTranslator):
@@ -175,10 +178,10 @@ class OllamaTranslator(BaseTranslator):
         "OLLAMA_MODEL": "gemma2",
     }
 
-    def __init__(self, lang_out, lang_in, model):
+    def __init__(self, lang_in, lang_out, model):
         if not model:
             model = os.getenv("OLLAMA_MODEL", self.envs["OLLAMA_MODEL"])
-        super().__init__(lang_out, lang_in, model)
+        super().__init__(lang_in, lang_out, model)
         self.options = {"temperature": 0}  # 随机采样可能会打断公式标记
         self.client = ollama.Client()
 
@@ -200,10 +203,10 @@ class OpenAITranslator(BaseTranslator):
         "OPENAI_MODEL": "gpt-4o-mini",
     }
 
-    def __init__(self, lang_out, lang_in, model, base_url=None, api_key=None):
+    def __init__(self, lang_in, lang_out, model, base_url=None, api_key=None):
         if not model:
             model = os.getenv("OPENAI_MODEL", self.envs["OPENAI_MODEL"])
-        super().__init__(lang_out, lang_in, model)
+        super().__init__(lang_in, lang_out, model)
         self.options = {"temperature": 0}  # 随机采样可能会打断公式标记
         self.client = openai.OpenAI(base_url=base_url, api_key=api_key)
 
@@ -224,14 +227,14 @@ class AzureOpenAITranslator(BaseTranslator):
         "AZURE_OPENAI_MODEL": "gpt-4o-mini",
     }
 
-    def __init__(self, lang_out, lang_in, model, base_url=None, api_key=None):
+    def __init__(self, lang_in, lang_out, model, base_url=None, api_key=None):
         base_url = os.getenv(
             "AZURE_OPENAI_BASE_URL", self.envs["AZURE_OPENAI_BASE_URL"]
         )
         api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-06-01")
         if not model:
             model = os.getenv("AZURE_OPENAI_MODEL", self.envs["AZURE_OPENAI_MODEL"])
-        super().__init__(lang_out, lang_in, model)
+        super().__init__(lang_in, lang_out, model)
         self.options = {"temperature": 0}
         self.client = openai.AzureOpenAI(
             azure_endpoint=base_url,
@@ -257,12 +260,12 @@ class ZhipuTranslator(OpenAITranslator):
         "ZHIPU_MODEL": "glm-4-flash",
     }
 
-    def __init__(self, lang_out, lang_in, model):
+    def __init__(self, lang_in, lang_out, model):
         base_url = "https://open.bigmodel.cn/api/paas/v4"
         api_key = os.getenv("ZHIPU_API_KEY")
         if not model:
             model = os.getenv("ZHIPU_MODEL", self.envs["ZHIPU_MODEL"])
-        super().__init__(lang_out, lang_in, model, base_url=base_url, api_key=api_key)
+        super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
 
 
 class SiliconTranslator(OpenAITranslator):
@@ -273,12 +276,12 @@ class SiliconTranslator(OpenAITranslator):
         "SILICON_MODEL": "Qwen/Qwen2.5-7B-Instruct",
     }
 
-    def __init__(self, lang_out, lang_in, model):
+    def __init__(self, lang_in, lang_out, model):
         base_url = "https://api.siliconflow.cn/v1"
         api_key = os.getenv("SILICON_API_KEY")
         if not model:
             model = os.getenv("SILICON_MODEL", self.envs["SILICON_MODEL"])
-        super().__init__(lang_out, lang_in, model, base_url=base_url, api_key=api_key)
+        super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
 
 
 class AzureTranslator(BaseTranslator):
@@ -290,8 +293,8 @@ class AzureTranslator(BaseTranslator):
     }
     lang_map = {"zh": "zh-Hans"}
 
-    def __init__(self, lang_out, lang_in, model):
-        super().__init__(lang_out, lang_in, model)
+    def __init__(self, lang_in, lang_out, model):
+        super().__init__(lang_in, lang_out, model)
         endpoint = os.getenv("AZURE_ENDPOINT", self.envs["AZURE_ENDPOINT"])
         api_key = os.getenv("AZURE_API_KEY")
         credential = AzureKeyCredential(api_key)
@@ -320,8 +323,8 @@ class TencentTranslator(BaseTranslator):
         "TENCENTCLOUD_SECRET_KEY": None,
     }
 
-    def __init__(self, lang_out, lang_in, model):
-        super().__init__(lang_out, lang_in, model)
+    def __init__(self, lang_in, lang_out, model):
+        super().__init__(lang_in, lang_out, model)
         cred = credential.DefaultCredentialProvider().get_credential()
         self.client = TmtClient(cred, "ap-beijing")
         self.req = TextTranslateRequest()