| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460 |
- import html
- import logging
- import os
- import re
- import unicodedata
- import deepl
- import ollama
- import openai
- import requests
- from azure.ai.translation.text import TextTranslationClient
- from azure.core.credentials import AzureKeyCredential
- from tencentcloud.common import credential
- from tencentcloud.tmt.v20180321.tmt_client import TmtClient
- from tencentcloud.tmt.v20180321.models import TextTranslateRequest
- from tencentcloud.tmt.v20180321.models import TextTranslateResponse
- import json
- def remove_control_characters(s):
- return "".join(ch for ch in s if unicodedata.category(ch)[0] != "C")
- class BaseTranslator:
- name = "base"
- envs = {}
- lang_map = {}
- def __init__(self, lang_in, lang_out, model):
- lang_in = self.lang_map.get(lang_in.lower(), lang_in)
- lang_out = self.lang_map.get(lang_out.lower(), lang_out)
- self.lang_in = lang_in
- self.lang_out = lang_out
- self.model = model
- def translate(self, text):
- pass
- def prompt(self, text):
- return [
- {
- "role": "system",
- "content": "You are a professional,authentic machine translation engine.",
- },
- {
- "role": "user",
- "content": f"Translate the following markdown source text to {self.lang_out}. Keep the formula notation {{v*}} unchanged. Output translation directly without any additional text.\nSource Text: {text}\nTranslated Text:", # noqa: E501
- },
- ]
- def __str__(self):
- return f"{self.name} {self.lang_in} {self.lang_out} {self.model}"
- class GoogleTranslator(BaseTranslator):
- name = "google"
- lang_map = {"zh": "zh-CN"}
- def __init__(self, lang_in, lang_out, model):
- super().__init__(lang_in, lang_out, model)
- self.session = requests.Session()
- self.endpoint = "http://translate.google.com/m"
- self.headers = {
- "User-Agent": "Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727;.NET CLR 3.0.04506.30)" # noqa: E501
- }
- def translate(self, text):
- text = text[:5000] # google translate max length
- response = self.session.get(
- self.endpoint,
- params={"tl": self.lang_out, "sl": self.lang_in, "q": text},
- headers=self.headers,
- )
- re_result = re.findall(
- r'(?s)class="(?:t0|result-container)">(.*?)<', response.text
- )
- if response.status_code == 400:
- result = "IRREPARABLE TRANSLATION ERROR"
- else:
- response.raise_for_status()
- result = html.unescape(re_result[0])
- return remove_control_characters(result)
- class BingTranslator(BaseTranslator):
- # https://github.com/immersive-translate/old-immersive-translate/blob/6df13da22664bea2f51efe5db64c63aca59c4e79/src/background/translationService.js
- name = "bing"
- lang_map = {"zh": "zh-Hans"}
- def __init__(self, lang_in, lang_out, model):
- super().__init__(lang_in, lang_out, model)
- self.session = requests.Session()
- self.endpoint = "https://www.bing.com/translator"
- self.headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", # noqa: E501
- }
- def findSID(self):
- response = self.session.get(self.endpoint)
- response.raise_for_status()
- url = response.url[:-10]
- ig = re.findall(r"\"ig\":\"(.*?)\"", response.text)[0]
- iid = re.findall(r"data-iid=\"(.*?)\"", response.text)[-1]
- key, token = re.findall(
- r"params_AbusePreventionHelper\s=\s\[(.*?),\"(.*?)\",", response.text
- )[0]
- return url, ig, iid, key, token
- def translate(self, text):
- text = text[:1000] # bing translate max length
- url, ig, iid, key, token = self.findSID()
- response = self.session.post(
- f"{url}ttranslatev3?IG={ig}&IID={iid}",
- data={
- "fromLang": self.lang_in,
- "to": self.lang_out,
- "text": text,
- "token": token,
- "key": key,
- },
- headers=self.headers,
- )
- response.raise_for_status()
- return response.json()[0]["translations"][0]["text"]
- class DeepLTranslator(BaseTranslator):
- # https://github.com/DeepLcom/deepl-python
- name = "deepl"
- envs = {
- "DEEPL_AUTH_KEY": None,
- }
- lang_map = {"zh": "zh-Hans"}
- def __init__(self, lang_in, lang_out, model):
- super().__init__(lang_in, lang_out, model)
- auth_key = os.getenv("DEEPL_AUTH_KEY")
- self.client = deepl.Translator(auth_key)
- def translate(self, text):
- response = self.client.translate_text(
- text, target_lang=self.lang_out, source_lang=self.lang_in
- )
- return response.text
- class DeepLXTranslator(BaseTranslator):
- # https://deeplx.owo.network/endpoints/free.html
- name = "deeplx"
- envs = {
- "DEEPLX_ENDPOINT": "https://api.deepl.com/translate",
- }
- lang_map = {"zh": "zh-Hans"}
- def __init__(self, lang_in, lang_out, model):
- super().__init__(lang_in, lang_out, model)
- self.endpoint = os.getenv("DEEPLX_ENDPOINT", self.envs["DEEPLX_ENDPOINT"])
- self.session = requests.Session()
- def translate(self, text):
- response = self.session.post(
- self.endpoint,
- json={
- "source_lang": self.lang_in,
- "target_lang": self.lang_out,
- "text": text,
- },
- )
- response.raise_for_status()
- return response.json()["data"]
- class OllamaTranslator(BaseTranslator):
- # https://github.com/ollama/ollama-python
- name = "ollama"
- envs = {
- "OLLAMA_HOST": "http://127.0.0.1:11434",
- "OLLAMA_MODEL": "gemma2",
- }
- def __init__(self, lang_in, lang_out, model):
- if not model:
- model = os.getenv("OLLAMA_MODEL", self.envs["OLLAMA_MODEL"])
- super().__init__(lang_in, lang_out, model)
- self.options = {"temperature": 0} # 随机采样可能会打断公式标记
- self.client = ollama.Client()
- def translate(self, text):
- response = self.client.chat(
- model=self.model,
- options=self.options,
- messages=self.prompt(text),
- )
- return response["message"]["content"].strip()
- class OpenAITranslator(BaseTranslator):
- # https://github.com/openai/openai-python
- name = "openai"
- envs = {
- "OPENAI_BASE_URL": "https://api.openai.com/v1",
- "OPENAI_API_KEY": None,
- "OPENAI_MODEL": "gpt-4o-mini",
- }
- def __init__(self, lang_in, lang_out, model, base_url=None, api_key=None):
- if not model:
- model = os.getenv("OPENAI_MODEL", self.envs["OPENAI_MODEL"])
- super().__init__(lang_in, lang_out, model)
- self.options = {"temperature": 0} # 随机采样可能会打断公式标记
- self.client = openai.OpenAI(base_url=base_url, api_key=api_key)
- def translate(self, text) -> str:
- response = self.client.chat.completions.create(
- model=self.model,
- **self.options,
- messages=self.prompt(text),
- )
- return response.choices[0].message.content.strip()
- class AzureOpenAITranslator(BaseTranslator):
- name = "azure-openai"
- envs = {
- "AZURE_OPENAI_BASE_URL": None, # e.g. "https://xxx.openai.azure.com"
- "AZURE_OPENAI_API_KEY": None,
- "AZURE_OPENAI_MODEL": "gpt-4o-mini",
- }
- def __init__(self, lang_in, lang_out, model, base_url=None, api_key=None):
- base_url = os.getenv(
- "AZURE_OPENAI_BASE_URL", self.envs["AZURE_OPENAI_BASE_URL"]
- )
- if not model:
- model = os.getenv("AZURE_OPENAI_MODEL", self.envs["AZURE_OPENAI_MODEL"])
- super().__init__(lang_in, lang_out, model)
- self.options = {"temperature": 0}
- self.client = openai.AzureOpenAI(
- azure_endpoint=base_url,
- azure_deployment=model,
- api_version="2024-06-01",
- api_key=api_key,
- )
- def translate(self, text) -> str:
- response = self.client.chat.completions.create(
- model=self.model,
- **self.options,
- messages=self.prompt(text),
- )
- return response.choices[0].message.content.strip()
- class ModelScopeTranslator(OpenAITranslator):
- name = "modelscope"
- envs = {
- "MODELSCOPE_BASE_URL": "https://api-inference.modelscope.cn/v1",
- "MODELSCOPE_API_KEY": None,
- "MODELSCOPE_MODEL": "Qwen/Qwen2.5-Coder-32B-Instruct",
- }
- def __init__(self, lang_in, lang_out, model, base_url=None, api_key=None):
- base_url = "https://api-inference.modelscope.cn/v1"
- api_key = os.getenv("MODELSCOPE_API_KEY")
- if not model:
- model = os.getenv("MODELSCOPE_MODEL", self.envs["MODELSCOPE_MODEL"])
- super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
- class ZhipuTranslator(OpenAITranslator):
- # https://bigmodel.cn/dev/api/thirdparty-frame/openai-sdk
- name = "zhipu"
- envs = {
- "ZHIPU_API_KEY": None,
- "ZHIPU_MODEL": "glm-4-flash",
- }
- def __init__(self, lang_in, lang_out, model):
- base_url = "https://open.bigmodel.cn/api/paas/v4"
- api_key = os.getenv("ZHIPU_API_KEY")
- if not model:
- model = os.getenv("ZHIPU_MODEL", self.envs["ZHIPU_MODEL"])
- super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
- def translate(self, text) -> str:
- try:
- response = self.client.chat.completions.create(
- model=self.model,
- **self.options,
- messages=self.prompt(text),
- )
- except openai.BadRequestError as e:
- if (
- json.loads(response.choices[0].message.content.strip())["error"]["code"]
- == "1301"
- ):
- return "IRREPARABLE TRANSLATION ERROR"
- raise e
- return response.choices[0].message.content.strip()
- class SiliconTranslator(OpenAITranslator):
- # https://docs.siliconflow.cn/quickstart
- name = "silicon"
- envs = {
- "SILICON_API_KEY": None,
- "SILICON_MODEL": "Qwen/Qwen2.5-7B-Instruct",
- }
- def __init__(self, lang_in, lang_out, model):
- base_url = "https://api.siliconflow.cn/v1"
- api_key = os.getenv("SILICON_API_KEY")
- if not model:
- model = os.getenv("SILICON_MODEL", self.envs["SILICON_MODEL"])
- super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
- class GeminiTranslator(OpenAITranslator):
- # https://ai.google.dev/gemini-api/docs/openai
- name = "gemini"
- envs = {
- "GEMINI_API_KEY": None,
- "GEMINI_MODEL": "gemini-1.5-flash",
- }
- def __init__(self, lang_in, lang_out, model):
- base_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
- api_key = os.getenv("GEMINI_API_KEY")
- if not model:
- model = os.getenv("GEMINI_MODEL", self.envs["GEMINI_MODEL"])
- super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
- class AzureTranslator(BaseTranslator):
- # https://github.com/Azure/azure-sdk-for-python
- name = "azure"
- envs = {
- "AZURE_ENDPOINT": "https://api.translator.azure.cn",
- "AZURE_API_KEY": None,
- }
- lang_map = {"zh": "zh-Hans"}
- def __init__(self, lang_in, lang_out, model):
- super().__init__(lang_in, lang_out, model)
- endpoint = os.getenv("AZURE_ENDPOINT", self.envs["AZURE_ENDPOINT"])
- api_key = os.getenv("AZURE_API_KEY")
- credential = AzureKeyCredential(api_key)
- self.client = TextTranslationClient(
- endpoint=endpoint, credential=credential, region="chinaeast2"
- )
- # https://github.com/Azure/azure-sdk-for-python/issues/9422
- logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy")
- logger.setLevel(logging.WARNING)
- def translate(self, text) -> str:
- response = self.client.translate(
- body=[text],
- from_language=self.lang_in,
- to_language=[self.lang_out],
- )
- translated_text = response[0].translations[0].text
- return translated_text
- class TencentTranslator(BaseTranslator):
- # https://github.com/TencentCloud/tencentcloud-sdk-python
- name = "tencent"
- envs = {
- "TENCENTCLOUD_SECRET_ID": None,
- "TENCENTCLOUD_SECRET_KEY": None,
- }
- def __init__(self, lang_in, lang_out, model):
- super().__init__(lang_in, lang_out, model)
- cred = credential.DefaultCredentialProvider().get_credential()
- self.client = TmtClient(cred, "ap-beijing")
- self.req = TextTranslateRequest()
- self.req.Source = self.lang_in
- self.req.Target = self.lang_out
- self.req.ProjectId = 0
- def translate(self, text):
- self.req.SourceText = text
- resp: TextTranslateResponse = self.client.TextTranslate(self.req)
- return resp.TargetText
- class AnythingLLMTranslator(BaseTranslator):
- name = "anythingllm"
- envs = {
- "AnythingLLM_URL": None,
- "AnythingLLM_APIKEY": "api_key",
- }
- def __init__(self, lang_out, lang_in, model):
- super().__init__(lang_out, lang_in, model)
- self.api_url = os.getenv("AnythingLLM_URL", self.envs["AnythingLLM_URL"])
- self.api_key = os.getenv("AnythingLLM_APIKEY", self.envs["AnythingLLM_APIKEY"])
- self.headers = {
- "accept": "application/json",
- "Authorization": f"Bearer {self.api_key}",
- "Content-Type": "application/json",
- }
- def translate(self, text):
- messages = self.prompt(text)
- payload = {
- "message": messages,
- "mode": "chat",
- "sessionId": "translation_expert",
- }
- response = requests.post(
- self.api_url, headers=self.headers, data=json.dumps(payload)
- )
- response.raise_for_status()
- data = response.json()
- if "textResponse" in data:
- return data["textResponse"].strip()
- class DifyTranslator(BaseTranslator):
- name = "dify"
- envs = {
- "DIFY_API_URL": None, # 填写实际 Dify API 地址
- "DIFY_API_KEY": "api_key", # 替换为实际 API 密钥
- }
- def __init__(self, lang_out, lang_in, model):
- super().__init__(lang_out, lang_in, model)
- self.api_url = os.getenv("DIFY_API_URL", self.envs["DIFY_API_URL"])
- self.api_key = os.getenv("DIFY_API_KEY", self.envs["DIFY_API_KEY"])
- def translate(self, text):
- headers = {
- "Authorization": f"Bearer {self.api_key}",
- "Content-Type": "application/json",
- }
- payload = {
- "inputs": {
- "lang_out": self.lang_out,
- "lang_in": self.lang_in,
- "text": text,
- },
- "response_mode": "blocking",
- "user": "translator-service",
- }
- # 向 Dify 服务器发送请求
- response = requests.post(
- self.api_url, headers=headers, data=json.dumps(payload)
- )
- response.raise_for_status()
- response_data = response.json()
- # 解析响应
- return response_data.get("data", {}).get("outputs", {}).get("text", [])
|