Ver Fonte

Merge pull request #107 from hellofinch/dev-guide

添加腾讯机器翻译接口
Byaidu há 1 ano atrás
pai
commit
b17759dfde
1 ficheiros alterados com 100 adições e 2 exclusões
  1. 100 2
      pdf2zh/translator.py

+ 100 - 2
pdf2zh/translator.py

@@ -11,6 +11,10 @@ import requests
 from azure.ai.translation.text import TextTranslationClient
 from azure.core.credentials import AzureKeyCredential
 
+import hmac
+import hashlib
+import time
+from datetime import datetime,UTC
 
 class BaseTranslator:
     def __init__(self, service, lang_out, lang_in, model):
@@ -58,6 +62,96 @@ class GoogleTranslator(BaseTranslator):
             result = html.unescape(re_result[0])
         return result
 
+class TencentTranslator(BaseTranslator):
+    def sign(self,key, msg):
+        return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
+
+    def __init__(self, service, lang_out, lang_in, model):
+        lang_out = "zh" if lang_out == "auto" else lang_out
+        lang_in = "en" if lang_in == "auto" else lang_in
+        super().__init__(service, lang_out, lang_in, model)
+        try:
+            server_url = (
+                "tmt.tencentcloudapi.com"
+            )
+            self.secret_id = os.getenv("Tencent_SECRET_ID")
+            self.secret_key = os.getenv("Tencent_SECRET_KEY")
+            
+        except KeyError as e:
+            missing_var = e.args[0]
+            raise ValueError(
+                f"The environment variable '{missing_var}' is required but not set."
+            ) from e
+
+        self.session = requests.Session()
+        self.base_link = f"{server_url}"
+
+    def translate(self, text):
+        text = text[:5000]
+        data={
+            "SourceText":text,
+            "Source":self.lang_in,
+            "Target":self.lang_out,
+            "ProjectId":0
+        }
+        payloadx = dumps(data)
+        hashed_request_payload = hashlib.sha256(payloadx.encode("utf-8")).hexdigest()
+        canonical_request = ("POST" + "\n" +
+                            "/" + "\n" +
+                            "" + "\n" +
+                            "content-type:application/json; charset=utf-8\nhost:tmt.tencentcloudapi.com\nx-tc-action:texttranslate\n" + "\n" +
+                            "content-type;host;x-tc-action" + "\n" +
+                            hashed_request_payload)
+
+        timestamp = int(time.time())
+        date = datetime.fromtimestamp(timestamp, UTC).strftime("%Y-%m-%d")
+        credential_scope = date + "/tmt/tc3_request"
+        hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
+        algorithm = "TC3-HMAC-SHA256"
+        string_to_sign = (algorithm + "\n" +
+                        str(timestamp) + "\n" +
+                        credential_scope + "\n" +
+                        hashed_canonical_request)
+        secret_date = self.sign(("TC3" + self.secret_key).encode("utf-8"), date)
+        secret_service = self.sign(secret_date, "tmt")
+        secret_signing = self.sign(secret_service, "tc3_request")
+        signed_headers = "content-type;host;x-tc-action"
+        signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
+        authorization = (algorithm + " " +
+                 "Credential=" + self.secret_id + "/" + credential_scope + ", " +
+                 "SignedHeaders=" + signed_headers + ", " +
+                 "Signature=" + signature)
+        self.headers = {
+            "Authorization": authorization,
+            "Content-Type": "application/json; charset=utf-8",
+            "Host": "tmt.tencentcloudapi.com",
+            "X-TC-Action": "TextTranslate",
+            "X-TC-Region":"ap-beijing",
+            "X-TC-Timestamp": str(timestamp),
+            "X-TC-Version": "2018-03-21"
+        }
+
+        response = self.session.post(
+            "https://"+self.base_link,
+            json=data,
+            headers=self.headers,
+        )
+        # 1. Status code test
+        if response.status_code == 200:
+            result = loads(response.text)
+        else:
+            raise ValueError("HTTP error: " + str(response.status_code))
+        # 2. Result test
+        try:
+            result = result['Response']['TargetText']
+            return result
+        except KeyError:
+            result = ""
+            raise ValueError("No valid key in Tencent's response")
+        # 3. Result length check
+        if len(result) == 0:
+            raise ValueError("Empty translation result")
+        return result
 
 class DeepLXTranslator(BaseTranslator):
     def __init__(self, service, lang_out, lang_in, model):
@@ -78,7 +172,11 @@ class DeepLXTranslator(BaseTranslator):
             ) from e
 
         self.session = requests.Session()
-        self.base_link = f"{server_url}/{auth_key}/translate"
+        server_url=server_url.rstrip('/')
+        if auth_key:
+            self.base_link = f"{server_url}/{auth_key}/translate"
+        else:
+            self.base_link = f"{server_url}/translate"
         self.headers = {
             "User-Agent": "Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727;.NET CLR 3.0.04506.30)"
         }
@@ -112,7 +210,6 @@ class DeepLXTranslator(BaseTranslator):
             raise ValueError("Empty translation result")
         return result
 
-
 class DeepLTranslator(BaseTranslator):
     def __init__(self, service, lang_out, lang_in, model):
         lang_out='ZH' if lang_out=='auto' else lang_out
@@ -218,3 +315,4 @@ class AzureTranslator(BaseTranslator):
 
         translated_text = response[0].translations[0].text
         return translated_text
+