Просмотр исходного кода

Merge pull request #58 from Hanaasagi/azure

Byaidu 1 год назад
Родитель
Сommit
a5780f68af
4 измененных файлов с 61 добавлено и 3 удалено
  1. 14 0
      README.md
  2. 5 0
      pdf2zh/converter.py
  3. 41 3
      pdf2zh/translator.py
  4. 1 0
      setup.py

+ 14 - 0
README.md

@@ -98,6 +98,20 @@ Set ENVs to construct an endpoint like: `{OPENAI_BASE_URL}/chat/completions`
 pdf2zh example.pdf -s openai:gpt-4o
 ```
 
+### Translate with Azure Text Translation
+
+See [What is Azure Text Translation?](https://docs.azure.cn/en-us/ai-services/translator/text-translation-overview)
+
+Following ENVs are required.
+- `AZURE_APIKEY`, e.g., `export AZURE_APIKEY=xxx`
+- `AZURE_ENDPOINT`, e.g, `export AZURE_ENDPOINT=https://api.translator.azure.cn/`
+- `AZURE_REGION`, e.g., `export AZURE_REGION=chinaeast2`
+
+
+```bash
+pdf2zh example.pdf -s azure
+```
+
 ### Use regex to specify formula fonts and characters that need to be preserved
 
 ```bash

+ 5 - 0
pdf2zh/converter.py

@@ -25,6 +25,7 @@ from pdf2zh.translator import (
     DeepLTranslator,
     OllamaTranslator,
     OpenAITranslator,
+    AzureTranslator,
 )
 def remove_control_characters(s):
     return "".join(ch for ch in s if unicodedata.category(ch)[0]!="C")
@@ -382,6 +383,10 @@ class TextConverter(PDFConverter[AnyIO]):
             self.translator: BaseTranslator = OpenAITranslator(
                 service, lang_out, lang_in, param[1]
             )
+        elif param[0] == 'azure':
+            self.translator: BaseTranslator = AzureTranslator(
+                service, lang_out, lang_in, None
+            )
         else:
             raise ValueError("Unsupported translation service")
 

+ 41 - 3
pdf2zh/translator.py

@@ -2,9 +2,13 @@ import html
 import re
 import os
 import ollama
+import logging
 import requests
 import openai
 import deepl
+from azure.ai.translation.text import TextTranslationClient
+from azure.core.credentials import AzureKeyCredential
+
 
 class BaseTranslator:
     def __init__(self, service, lang_out, lang_in, model):
@@ -13,8 +17,8 @@ class BaseTranslator:
         self.lang_in = lang_in
         self.model = model
 
-    def translate(self, text):
-        pass
+    def translate(self, text) -> str:
+        ...
 
     def __str__(self):
         pass
@@ -106,7 +110,7 @@ class OpenAITranslator(BaseTranslator):
         # OPENAI_API_KEY
         self.client = openai.OpenAI()
 
-    def translate(self, text):
+    def translate(self, text) -> str:
         response = self.client.chat.completions.create(
             model=self.model,
             **self.options,
@@ -122,3 +126,37 @@ class OpenAITranslator(BaseTranslator):
             ],
         )
         return response.choices[0].message.content.strip()
+
+
+class AzureTranslator(BaseTranslator):
+    def __init__(self, service, lang_out, lang_in, model):
+        lang_out='zh-Hans' if lang_out=='auto' else lang_out
+        lang_in='en' if lang_in=='auto' else lang_in
+        super().__init__(service, lang_out, lang_in, model)
+
+        try:
+            api_key = os.environ["AZURE_APIKEY"]
+            endpoint = os.environ["AZURE_ENDPOINT"]
+            region = os.environ["AZURE_REGION"]
+        except KeyError as e:
+            missing_var = e.args[0]
+            raise ValueError(f"The environment variable '{missing_var}' is required but not set.") from e
+
+        credential = AzureKeyCredential(api_key)
+        self.client = TextTranslationClient(
+            endpoint=endpoint, credential=credential, region=region
+        )
+
+        # https://github.com/Azure/azure-sdk-for-python/issues/9422
+        logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy")
+        logger.setLevel(logging.WARNING)
+
+    def translate(self, text) -> str:
+        response = self.client.translate(
+            body=[text],
+            from_language=self.lang_in,
+            to_language=[self.lang_out],
+        )
+
+        translated_text = response[0].translations[0].text
+        return translated_text

+ 1 - 0
setup.py

@@ -29,6 +29,7 @@ setup(
         "ollama",
         "deepl<1.19.1",
         "openai",
+        "azure-ai-translation-text<=1.0.1",
     ],
     classifiers=[
         "Programming Language :: Python :: 3",