|
|
@@ -1,13 +1,7 @@
|
|
|
-import hashlib
|
|
|
-import hmac
|
|
|
import html
|
|
|
import logging
|
|
|
import os
|
|
|
import re
|
|
|
-import time
|
|
|
-from datetime import timezone, datetime
|
|
|
-
|
|
|
-from json import dumps, loads
|
|
|
import unicodedata
|
|
|
|
|
|
import deepl
|
|
|
@@ -16,6 +10,10 @@ import openai
|
|
|
import requests
|
|
|
from azure.ai.translation.text import TextTranslationClient
|
|
|
from azure.core.credentials import AzureKeyCredential
|
|
|
+from tencentcloud.common import credential
|
|
|
+from tencentcloud.tmt.v20180321.tmt_client import TmtClient
|
|
|
+from tencentcloud.tmt.v20180321.models import TextTranslateRequest
|
|
|
+from tencentcloud.tmt.v20180321.models import TextTranslateResponse
|
|
|
|
|
|
|
|
|
def remove_control_characters(s):
|
|
|
@@ -23,13 +21,16 @@ def remove_control_characters(s):
|
|
|
|
|
|
|
|
|
class BaseTranslator:
|
|
|
+ envs = {}
|
|
|
+
|
|
|
def __init__(self, service, lang_out, lang_in, model):
|
|
|
self.service = service
|
|
|
self.lang_out = lang_out
|
|
|
self.lang_in = lang_in
|
|
|
self.model = model
|
|
|
|
|
|
- def translate(self, text) -> str: ... # noqa: E704
|
|
|
+ def translate(self, text):
|
|
|
+ pass
|
|
|
|
|
|
def __str__(self):
|
|
|
return f"{self.service} {self.lang_out} {self.lang_in}"
|
|
|
@@ -41,7 +42,7 @@ class GoogleTranslator(BaseTranslator):
|
|
|
lang_in = "en" if lang_in == "auto" else lang_in
|
|
|
super().__init__(service, lang_out, lang_in, model)
|
|
|
self.session = requests.Session()
|
|
|
- self.base_link = "http://translate.google.com/m"
|
|
|
+ self.endpoint = "http://translate.google.com/m"
|
|
|
self.headers = {
|
|
|
"User-Agent": "Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727;.NET CLR 3.0.04506.30)" # noqa: E501
|
|
|
}
|
|
|
@@ -49,7 +50,7 @@ class GoogleTranslator(BaseTranslator):
|
|
|
def translate(self, text):
|
|
|
text = text[:5000] # google translate max length
|
|
|
response = self.session.get(
|
|
|
- self.base_link,
|
|
|
+ self.endpoint,
|
|
|
params={"tl": self.lang_out, "sl": self.lang_in, "q": text},
|
|
|
headers=self.headers,
|
|
|
)
|
|
|
@@ -58,195 +59,74 @@ class GoogleTranslator(BaseTranslator):
|
|
|
)
|
|
|
if response.status_code == 400:
|
|
|
result = "IRREPARABLE TRANSLATION ERROR"
|
|
|
- elif len(re_result) == 0:
|
|
|
- raise ValueError("Empty translation result")
|
|
|
else:
|
|
|
result = html.unescape(re_result[0])
|
|
|
return remove_control_characters(result)
|
|
|
|
|
|
|
|
|
class TencentTranslator(BaseTranslator):
|
|
|
- def sign(self, key, msg):
|
|
|
- return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
|
|
|
+ # https://github.com/TencentCloud/tencentcloud-sdk-python
|
|
|
+ envs = {
|
|
|
+ "TENCENTCLOUD_SECRET_ID": None,
|
|
|
+ "TENCENTCLOUD_SECRET_KEY": None,
|
|
|
+ }
|
|
|
|
|
|
def __init__(self, service, lang_out, lang_in, model):
|
|
|
lang_out = "zh" if lang_out == "auto" else lang_out
|
|
|
lang_in = "en" if lang_in == "auto" else lang_in
|
|
|
super().__init__(service, lang_out, lang_in, model)
|
|
|
- try:
|
|
|
- server_url = "tmt.tencentcloudapi.com"
|
|
|
- self.secret_id = os.getenv("TENCENT_SECRET_ID")
|
|
|
- self.secret_key = os.getenv("TENCENT_SECRET_KEY")
|
|
|
-
|
|
|
- except KeyError as e:
|
|
|
- missing_var = e.args[0]
|
|
|
- raise ValueError(
|
|
|
- f"The environment variable '{missing_var}' is required but not set."
|
|
|
- ) from e
|
|
|
-
|
|
|
- self.session = requests.Session()
|
|
|
- self.base_link = f"{server_url}"
|
|
|
+ cred = credential.DefaultCredentialProvider().get_credential()
|
|
|
+ self.client = TmtClient(cred, "ap-beijing")
|
|
|
+ self.req = TextTranslateRequest()
|
|
|
+ self.req.Source = self.lang_in
|
|
|
+ self.req.Target = self.lang_out
|
|
|
+ self.req.ProjectId = 0
|
|
|
|
|
|
def translate(self, text):
|
|
|
- text = text[:5000]
|
|
|
- data = {
|
|
|
- "SourceText": text,
|
|
|
- "Source": self.lang_in,
|
|
|
- "Target": self.lang_out,
|
|
|
- "ProjectId": 0,
|
|
|
- }
|
|
|
- payloadx = dumps(data)
|
|
|
- hashed_request_payload = hashlib.sha256(payloadx.encode("utf-8")).hexdigest()
|
|
|
- canonical_request = (
|
|
|
- "POST"
|
|
|
- + "\n"
|
|
|
- + "/"
|
|
|
- + "\n"
|
|
|
- + ""
|
|
|
- + "\n"
|
|
|
- + "content-type:application/json; charset=utf-8\nhost:tmt.tencentcloudapi.com\nx-tc-action:texttranslate\n"
|
|
|
- + "\n"
|
|
|
- + "content-type;host;x-tc-action"
|
|
|
- + "\n"
|
|
|
- + hashed_request_payload
|
|
|
- )
|
|
|
-
|
|
|
- timestamp = int(time.time())
|
|
|
- date = datetime.fromtimestamp(timestamp, timezone.utc).strftime("%Y-%m-%d")
|
|
|
- credential_scope = date + "/tmt/tc3_request"
|
|
|
- hashed_canonical_request = hashlib.sha256(
|
|
|
- canonical_request.encode("utf-8")
|
|
|
- ).hexdigest()
|
|
|
- algorithm = "TC3-HMAC-SHA256"
|
|
|
- string_to_sign = (
|
|
|
- algorithm
|
|
|
- + "\n"
|
|
|
- + str(timestamp)
|
|
|
- + "\n"
|
|
|
- + credential_scope
|
|
|
- + "\n"
|
|
|
- + hashed_canonical_request
|
|
|
- )
|
|
|
- secret_date = self.sign(("TC3" + str(self.secret_key)).encode("utf-8"), date)
|
|
|
- secret_service = self.sign(secret_date, "tmt")
|
|
|
- secret_signing = self.sign(secret_service, "tc3_request")
|
|
|
- signed_headers = "content-type;host;x-tc-action"
|
|
|
- signature = hmac.new(
|
|
|
- secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256
|
|
|
- ).hexdigest()
|
|
|
- authorization = (
|
|
|
- algorithm
|
|
|
- + " "
|
|
|
- + "Credential="
|
|
|
- + str(self.secret_id)
|
|
|
- + "/"
|
|
|
- + credential_scope
|
|
|
- + ", "
|
|
|
- + "SignedHeaders="
|
|
|
- + signed_headers
|
|
|
- + ", "
|
|
|
- + "Signature="
|
|
|
- + signature
|
|
|
- )
|
|
|
- self.headers = {
|
|
|
- "Authorization": authorization,
|
|
|
- "Content-Type": "application/json; charset=utf-8",
|
|
|
- "Host": "tmt.tencentcloudapi.com",
|
|
|
- "X-TC-Action": "TextTranslate",
|
|
|
- "X-TC-Region": "ap-beijing",
|
|
|
- "X-TC-Timestamp": str(timestamp),
|
|
|
- "X-TC-Version": "2018-03-21",
|
|
|
- }
|
|
|
-
|
|
|
- response = self.session.post(
|
|
|
- "https://" + self.base_link,
|
|
|
- json=data,
|
|
|
- headers=self.headers,
|
|
|
- )
|
|
|
- # 1. Status code test
|
|
|
- if response.status_code == 200:
|
|
|
- result = loads(response.text)
|
|
|
- else:
|
|
|
- raise ValueError("HTTP error: " + str(response.status_code))
|
|
|
- # 2. Result test
|
|
|
- try:
|
|
|
- result = result["Response"]["TargetText"]
|
|
|
- # return result
|
|
|
- except KeyError:
|
|
|
- result = ""
|
|
|
- # raise ValueError("No valid key in Tencent's response")
|
|
|
- # # 3. Result length check
|
|
|
- # if len(result) == 0:
|
|
|
- # raise ValueError("Empty translation result")
|
|
|
- return result
|
|
|
+ self.req.SourceText = text
|
|
|
+ resp: TextTranslateResponse = self.client.TextTranslate(self.req)
|
|
|
+ return resp.TargetText
|
|
|
|
|
|
|
|
|
class DeepLXTranslator(BaseTranslator):
|
|
|
+ # https://deeplx.owo.network/endpoints/free.html
|
|
|
+ envs = {
|
|
|
+ "DEEPLX_ENDPOINT": "https://api.deepl.com/v2/translate",
|
|
|
+ }
|
|
|
+
|
|
|
def __init__(self, service, lang_out, lang_in, model):
|
|
|
lang_out = "zh" if lang_out == "auto" else lang_out
|
|
|
lang_in = "en" if lang_in == "auto" else lang_in
|
|
|
super().__init__(service, lang_out, lang_in, model)
|
|
|
- try:
|
|
|
- auth_key = os.getenv("DEEPLX_AUTH_KEY")
|
|
|
- server_url = (
|
|
|
- "https://api.deeplx.org"
|
|
|
- if not os.getenv("DEEPLX_SERVER_URL")
|
|
|
- else os.getenv("DEEPLX_SERVER_URL")
|
|
|
- )
|
|
|
- except KeyError as e:
|
|
|
- missing_var = e.args[0]
|
|
|
- raise ValueError(
|
|
|
- f"The environment variable '{missing_var}' is required but not set."
|
|
|
- ) from e
|
|
|
-
|
|
|
+ self.endpoint = os.getenv("DEEPLX_ENDPOINT")
|
|
|
self.session = requests.Session()
|
|
|
- server_url = str(server_url).rstrip("/")
|
|
|
- if auth_key:
|
|
|
- self.base_link = f"{server_url}/{auth_key}/translate"
|
|
|
- else:
|
|
|
- self.base_link = f"{server_url}/translate"
|
|
|
- self.headers = {
|
|
|
- "User-Agent": "Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727;.NET CLR 3.0.04506.30)" # noqa: E501
|
|
|
- }
|
|
|
|
|
|
def translate(self, text):
|
|
|
- text = text[:5000] # google translate max length
|
|
|
- response = self.session.post(
|
|
|
- self.base_link,
|
|
|
- dumps(
|
|
|
- {
|
|
|
- "target_lang": self.lang_out,
|
|
|
- "text": text,
|
|
|
- }
|
|
|
- ),
|
|
|
- headers=self.headers,
|
|
|
+ resp = self.session.post(
|
|
|
+ self.endpoint,
|
|
|
+ json={
|
|
|
+ "source_lang": self.lang_in,
|
|
|
+ "target_lang": self.lang_out,
|
|
|
+ "text": text,
|
|
|
+ },
|
|
|
)
|
|
|
- # 1. Status code test
|
|
|
- if response.status_code == 200:
|
|
|
- result = loads(response.text)
|
|
|
- else:
|
|
|
- raise ValueError("HTTP error: " + str(response.status_code))
|
|
|
- # 2. Result test
|
|
|
- try:
|
|
|
- result = result["data"]
|
|
|
- return result
|
|
|
- except KeyError:
|
|
|
- result = ""
|
|
|
- raise ValueError("No valid key in DeepLX's response")
|
|
|
- # 3. Result length check
|
|
|
- if len(result) == 0:
|
|
|
- raise ValueError("Empty translation result")
|
|
|
- return result
|
|
|
+ return resp.json()["data"]
|
|
|
|
|
|
|
|
|
class DeepLTranslator(BaseTranslator):
|
|
|
+ # https://github.com/DeepLcom/deepl-python
|
|
|
+ envs = {
|
|
|
+ "DEEPL_SERVER_URL": "https://api.deepl.com",
|
|
|
+ "DEEPL_AUTH_KEY": None,
|
|
|
+ }
|
|
|
+
|
|
|
def __init__(self, service, lang_out, lang_in, model):
|
|
|
- lang_out = "ZH" if lang_out == "auto" else lang_out
|
|
|
- lang_in = "EN" if lang_in == "auto" else lang_in
|
|
|
+ lang_out = "zh" if lang_out == "auto" else lang_out
|
|
|
+ lang_in = "en" if lang_in == "auto" else lang_in
|
|
|
super().__init__(service, lang_out, lang_in, model)
|
|
|
self.session = requests.Session()
|
|
|
- auth_key = os.getenv("DEEPL_AUTH_KEY")
|
|
|
server_url = os.getenv("DEEPL_SERVER_URL")
|
|
|
+ auth_key = os.getenv("DEEPL_AUTH_KEY")
|
|
|
self.client = deepl.Translator(auth_key, server_url=server_url)
|
|
|
|
|
|
def translate(self, text):
|
|
|
@@ -257,12 +137,16 @@ class DeepLTranslator(BaseTranslator):
|
|
|
|
|
|
|
|
|
class OllamaTranslator(BaseTranslator):
|
|
|
+ # https://github.com/ollama/ollama-python
|
|
|
+ envs = {
|
|
|
+ "OLLAMA_HOST": "http://127.0.0.1:11434",
|
|
|
+ }
|
|
|
+
|
|
|
def __init__(self, service, lang_out, lang_in, model):
|
|
|
lang_out = "zh-CN" if lang_out == "auto" else lang_out
|
|
|
lang_in = "en" if lang_in == "auto" else lang_in
|
|
|
super().__init__(service, lang_out, lang_in, model)
|
|
|
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
|
|
|
- # OLLAMA_HOST
|
|
|
self.client = ollama.Client()
|
|
|
|
|
|
def translate(self, text):
|
|
|
@@ -284,13 +168,17 @@ class OllamaTranslator(BaseTranslator):
|
|
|
|
|
|
|
|
|
class OpenAITranslator(BaseTranslator):
|
|
|
+ # https://github.com/openai/openai-python
|
|
|
+ envs = {
|
|
|
+ "OPENAI_BASE_URL": "https://api.openai.com/v1",
|
|
|
+ "OPENAI_API_KEY": None,
|
|
|
+ }
|
|
|
+
|
|
|
def __init__(self, service, lang_out, lang_in, model):
|
|
|
lang_out = "zh-CN" if lang_out == "auto" else lang_out
|
|
|
lang_in = "en" if lang_in == "auto" else lang_in
|
|
|
super().__init__(service, lang_out, lang_in, model)
|
|
|
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
|
|
|
- # OPENAI_BASE_URL
|
|
|
- # OPENAI_API_KEY
|
|
|
self.client = openai.OpenAI()
|
|
|
|
|
|
def translate(self, text) -> str:
|
|
|
@@ -312,26 +200,22 @@ class OpenAITranslator(BaseTranslator):
|
|
|
|
|
|
|
|
|
class AzureTranslator(BaseTranslator):
|
|
|
+ # https://github.com/Azure/azure-sdk-for-python
|
|
|
+ envs = {
|
|
|
+ "AZURE_ENDPOINT": "https://api.translator.azure.cn",
|
|
|
+ "AZURE_APIKEY": None,
|
|
|
+ }
|
|
|
+
|
|
|
def __init__(self, service, lang_out, lang_in, model):
|
|
|
lang_out = "zh-Hans" if lang_out == "auto" else lang_out
|
|
|
lang_in = "en" if lang_in == "auto" else lang_in
|
|
|
super().__init__(service, lang_out, lang_in, model)
|
|
|
-
|
|
|
- try:
|
|
|
- api_key = os.environ["AZURE_APIKEY"]
|
|
|
- endpoint = os.environ["AZURE_ENDPOINT"]
|
|
|
- region = os.environ["AZURE_REGION"]
|
|
|
- except KeyError as e:
|
|
|
- missing_var = e.args[0]
|
|
|
- raise ValueError(
|
|
|
- f"The environment variable '{missing_var}' is required but not set."
|
|
|
- ) from e
|
|
|
-
|
|
|
+ endpoint = os.environ["AZURE_ENDPOINT"]
|
|
|
+ api_key = os.environ["AZURE_APIKEY"]
|
|
|
credential = AzureKeyCredential(api_key)
|
|
|
self.client = TextTranslationClient(
|
|
|
- endpoint=endpoint, credential=credential, region=region
|
|
|
+ endpoint=endpoint, credential=credential, region="chinaeast2"
|
|
|
)
|
|
|
-
|
|
|
# https://github.com/Azure/azure-sdk-for-python/issues/9422
|
|
|
logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy")
|
|
|
logger.setLevel(logging.WARNING)
|
|
|
@@ -342,6 +226,5 @@ class AzureTranslator(BaseTranslator):
|
|
|
from_language=self.lang_in,
|
|
|
to_language=[self.lang_out],
|
|
|
)
|
|
|
-
|
|
|
translated_text = response[0].translations[0].text
|
|
|
return translated_text
|