|
@@ -17,7 +17,6 @@ from tencentcloud.tmt.v20180321.models import TextTranslateResponse
|
|
|
|
|
|
|
|
import json
|
|
import json
|
|
|
|
|
|
|
|
-
|
|
|
|
|
def remove_control_characters(s):
|
|
def remove_control_characters(s):
|
|
|
return "".join(ch for ch in s if unicodedata.category(ch)[0] != "C")
|
|
return "".join(ch for ch in s if unicodedata.category(ch)[0] != "C")
|
|
|
|
|
|
|
@@ -49,17 +48,25 @@ class BaseTranslator:
|
|
|
def translate(self, text):
|
|
def translate(self, text):
|
|
|
pass
|
|
pass
|
|
|
|
|
|
|
|
- def prompt(self, text):
|
|
|
|
|
- return [
|
|
|
|
|
- {
|
|
|
|
|
- "role": "system",
|
|
|
|
|
- "content": "You are a professional,authentic machine translation engine.",
|
|
|
|
|
- },
|
|
|
|
|
- {
|
|
|
|
|
- "role": "user",
|
|
|
|
|
- "content": f"Translate the following markdown source text to {self.lang_out}. Keep the formula notation {{v*}} unchanged. Output translation directly without any additional text.\nSource Text: {text}\nTranslated Text:", # noqa: E501
|
|
|
|
|
- },
|
|
|
|
|
- ]
|
|
|
|
|
|
|
+ def prompt(self, text, prompt):
|
|
|
|
|
+ if prompt:
|
|
|
|
|
+ context={
|
|
|
|
|
+ "lang_in":self.lang_in,
|
|
|
|
|
+ "lang_out":self.lang_out,
|
|
|
|
|
+ "text":text,
|
|
|
|
|
+ }
|
|
|
|
|
+ return eval(prompt.safe_substitute(context))
|
|
|
|
|
+ else:
|
|
|
|
|
+ return [
|
|
|
|
|
+ {
|
|
|
|
|
+ "role": "system",
|
|
|
|
|
+ "content": "You are a professional,authentic machine translation engine.",
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "role": "user",
|
|
|
|
|
+ "content": f"Translate the following markdown source text to {self.lang_out}. Keep the formula notation {{v*}} unchanged. Output translation directly without any additional text.\nSource Text: {text}\nTranslated Text:", # noqa: E501
|
|
|
|
|
+ },
|
|
|
|
|
+ ]
|
|
|
|
|
|
|
|
def __str__(self):
|
|
def __str__(self):
|
|
|
return f"{self.name} {self.lang_in} {self.lang_out} {self.model}"
|
|
return f"{self.name} {self.lang_in} {self.lang_out} {self.model}"
|
|
@@ -145,7 +152,7 @@ class DeepLTranslator(BaseTranslator):
|
|
|
}
|
|
}
|
|
|
lang_map = {"zh": "zh-Hans"}
|
|
lang_map = {"zh": "zh-Hans"}
|
|
|
|
|
|
|
|
- def __init__(self, lang_in, lang_out, model, envs=None):
|
|
|
|
|
|
|
+ def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
super().__init__(lang_in, lang_out, model)
|
|
super().__init__(lang_in, lang_out, model)
|
|
|
auth_key = self.envs["DEEPL_AUTH_KEY"]
|
|
auth_key = self.envs["DEEPL_AUTH_KEY"]
|
|
@@ -166,7 +173,7 @@ class DeepLXTranslator(BaseTranslator):
|
|
|
}
|
|
}
|
|
|
lang_map = {"zh": "zh-Hans"}
|
|
lang_map = {"zh": "zh-Hans"}
|
|
|
|
|
|
|
|
- def __init__(self, lang_in, lang_out, model, envs=None):
|
|
|
|
|
|
|
+ def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
super().__init__(lang_in, lang_out, model)
|
|
super().__init__(lang_in, lang_out, model)
|
|
|
self.endpoint = self.envs["DEEPLX_ENDPOINT"]
|
|
self.endpoint = self.envs["DEEPLX_ENDPOINT"]
|
|
@@ -193,19 +200,23 @@ class OllamaTranslator(BaseTranslator):
|
|
|
"OLLAMA_MODEL": "gemma2",
|
|
"OLLAMA_MODEL": "gemma2",
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- def __init__(self, lang_in, lang_out, model, envs=None):
|
|
|
|
|
|
|
+ def __init__(self, lang_in, lang_out, model, envs=None,prompt=None):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
if not model:
|
|
if not model:
|
|
|
model = self.envs["OLLAMA_MODEL"]
|
|
model = self.envs["OLLAMA_MODEL"]
|
|
|
super().__init__(lang_in, lang_out, model)
|
|
super().__init__(lang_in, lang_out, model)
|
|
|
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
|
|
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
|
|
|
self.client = ollama.Client()
|
|
self.client = ollama.Client()
|
|
|
|
|
+ self.prompttext=prompt
|
|
|
|
|
|
|
|
def translate(self, text):
|
|
def translate(self, text):
|
|
|
|
|
+ print(len(self.prompt(text,self.prompttext)))
|
|
|
|
|
+ print(self.prompt(text,self.prompttext)[0])
|
|
|
|
|
+ print(self.prompt(text,self.prompttext)[1])
|
|
|
response = self.client.chat(
|
|
response = self.client.chat(
|
|
|
model=self.model,
|
|
model=self.model,
|
|
|
options=self.options,
|
|
options=self.options,
|
|
|
- messages=self.prompt(text),
|
|
|
|
|
|
|
+ messages=self.prompt(text,self.prompttext),
|
|
|
)
|
|
)
|
|
|
return response["message"]["content"].strip()
|
|
return response["message"]["content"].strip()
|
|
|
|
|
|
|
@@ -220,7 +231,7 @@ class OpenAITranslator(BaseTranslator):
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
def __init__(
|
|
def __init__(
|
|
|
- self, lang_in, lang_out, model, base_url=None, api_key=None, envs=None
|
|
|
|
|
|
|
+ self, lang_in, lang_out, model, base_url=None, api_key=None, envs=None,prompt=None
|
|
|
):
|
|
):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
if not model:
|
|
if not model:
|
|
@@ -228,12 +239,13 @@ class OpenAITranslator(BaseTranslator):
|
|
|
super().__init__(lang_in, lang_out, model)
|
|
super().__init__(lang_in, lang_out, model)
|
|
|
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
|
|
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
|
|
|
self.client = openai.OpenAI(base_url=base_url, api_key=api_key)
|
|
self.client = openai.OpenAI(base_url=base_url, api_key=api_key)
|
|
|
|
|
+ self.prompttext=prompt
|
|
|
|
|
|
|
|
def translate(self, text) -> str:
|
|
def translate(self, text) -> str:
|
|
|
response = self.client.chat.completions.create(
|
|
response = self.client.chat.completions.create(
|
|
|
model=self.model,
|
|
model=self.model,
|
|
|
**self.options,
|
|
**self.options,
|
|
|
- messages=self.prompt(text),
|
|
|
|
|
|
|
+ messages=self.prompt(text,self.prompttext),
|
|
|
)
|
|
)
|
|
|
return response.choices[0].message.content.strip()
|
|
return response.choices[0].message.content.strip()
|
|
|
|
|
|
|
@@ -247,7 +259,7 @@ class AzureOpenAITranslator(BaseTranslator):
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
def __init__(
|
|
def __init__(
|
|
|
- self, lang_in, lang_out, model, base_url=None, api_key=None, envs=None
|
|
|
|
|
|
|
+ self, lang_in, lang_out, model, base_url=None, api_key=None, envs=None,prompt=None
|
|
|
):
|
|
):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
base_url = self.envs["AZURE_OPENAI_BASE_URL"]
|
|
base_url = self.envs["AZURE_OPENAI_BASE_URL"]
|
|
@@ -261,12 +273,13 @@ class AzureOpenAITranslator(BaseTranslator):
|
|
|
api_version="2024-06-01",
|
|
api_version="2024-06-01",
|
|
|
api_key=api_key,
|
|
api_key=api_key,
|
|
|
)
|
|
)
|
|
|
|
|
+ self.prompttext=prompt
|
|
|
|
|
|
|
|
def translate(self, text) -> str:
|
|
def translate(self, text) -> str:
|
|
|
response = self.client.chat.completions.create(
|
|
response = self.client.chat.completions.create(
|
|
|
model=self.model,
|
|
model=self.model,
|
|
|
**self.options,
|
|
**self.options,
|
|
|
- messages=self.prompt(text),
|
|
|
|
|
|
|
+ messages=self.prompt(text,self.prompttext),
|
|
|
)
|
|
)
|
|
|
return response.choices[0].message.content.strip()
|
|
return response.choices[0].message.content.strip()
|
|
|
|
|
|
|
@@ -280,7 +293,7 @@ class ModelScopeTranslator(OpenAITranslator):
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
def __init__(
|
|
def __init__(
|
|
|
- self, lang_in, lang_out, model, base_url=None, api_key=None, envs=None
|
|
|
|
|
|
|
+ self, lang_in, lang_out, model, base_url=None, api_key=None, envs=None,prompt=None
|
|
|
):
|
|
):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
base_url = "https://api-inference.modelscope.cn/v1"
|
|
base_url = "https://api-inference.modelscope.cn/v1"
|
|
@@ -288,6 +301,7 @@ class ModelScopeTranslator(OpenAITranslator):
|
|
|
if not model:
|
|
if not model:
|
|
|
model = self.envs["MODELSCOPE_MODEL"]
|
|
model = self.envs["MODELSCOPE_MODEL"]
|
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
|
|
|
+ self.prompttext=prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
class ZhipuTranslator(OpenAITranslator):
|
|
class ZhipuTranslator(OpenAITranslator):
|
|
@@ -298,20 +312,21 @@ class ZhipuTranslator(OpenAITranslator):
|
|
|
"ZHIPU_MODEL": "glm-4-flash",
|
|
"ZHIPU_MODEL": "glm-4-flash",
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- def __init__(self, lang_in, lang_out, model, envs=None):
|
|
|
|
|
|
|
+ def __init__(self, lang_in, lang_out, model, envs=None,prompt=None):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
base_url = "https://open.bigmodel.cn/api/paas/v4"
|
|
base_url = "https://open.bigmodel.cn/api/paas/v4"
|
|
|
api_key = self.envs["ZHIPU_API_KEY"]
|
|
api_key = self.envs["ZHIPU_API_KEY"]
|
|
|
if not model:
|
|
if not model:
|
|
|
model = self.envs["ZHIPU_MODEL"]
|
|
model = self.envs["ZHIPU_MODEL"]
|
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
|
|
|
+ self.prompttext=prompt
|
|
|
|
|
|
|
|
def translate(self, text) -> str:
|
|
def translate(self, text) -> str:
|
|
|
try:
|
|
try:
|
|
|
response = self.client.chat.completions.create(
|
|
response = self.client.chat.completions.create(
|
|
|
model=self.model,
|
|
model=self.model,
|
|
|
**self.options,
|
|
**self.options,
|
|
|
- messages=self.prompt(text),
|
|
|
|
|
|
|
+ messages=self.prompt(text,self.prompttext),
|
|
|
)
|
|
)
|
|
|
except openai.BadRequestError as e:
|
|
except openai.BadRequestError as e:
|
|
|
if (
|
|
if (
|
|
@@ -331,13 +346,14 @@ class SiliconTranslator(OpenAITranslator):
|
|
|
"SILICON_MODEL": "Qwen/Qwen2.5-7B-Instruct",
|
|
"SILICON_MODEL": "Qwen/Qwen2.5-7B-Instruct",
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- def __init__(self, lang_in, lang_out, model, envs=None):
|
|
|
|
|
|
|
+ def __init__(self, lang_in, lang_out, model, envs=None,prompt=None):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
base_url = "https://api.siliconflow.cn/v1"
|
|
base_url = "https://api.siliconflow.cn/v1"
|
|
|
api_key = self.envs["SILICON_API_KEY"]
|
|
api_key = self.envs["SILICON_API_KEY"]
|
|
|
if not model:
|
|
if not model:
|
|
|
model = self.envs["SILICON_MODEL"]
|
|
model = self.envs["SILICON_MODEL"]
|
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
|
|
|
+ self.prompttext=prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
class GeminiTranslator(OpenAITranslator):
|
|
class GeminiTranslator(OpenAITranslator):
|
|
@@ -348,14 +364,14 @@ class GeminiTranslator(OpenAITranslator):
|
|
|
"GEMINI_MODEL": "gemini-1.5-flash",
|
|
"GEMINI_MODEL": "gemini-1.5-flash",
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- def __init__(self, lang_in, lang_out, model, envs=None):
|
|
|
|
|
|
|
+ def __init__(self, lang_in, lang_out, model, envs=None,prompt=None):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
base_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
|
base_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
|
|
api_key = self.envs["GEMINI_API_KEY"]
|
|
api_key = self.envs["GEMINI_API_KEY"]
|
|
|
if not model:
|
|
if not model:
|
|
|
model = self.envs["GEMINI_MODEL"]
|
|
model = self.envs["GEMINI_MODEL"]
|
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
|
-
|
|
|
|
|
|
|
+ self.prompttext=prompt
|
|
|
|
|
|
|
|
class AzureTranslator(BaseTranslator):
|
|
class AzureTranslator(BaseTranslator):
|
|
|
# https://github.com/Azure/azure-sdk-for-python
|
|
# https://github.com/Azure/azure-sdk-for-python
|
|
@@ -366,7 +382,7 @@ class AzureTranslator(BaseTranslator):
|
|
|
}
|
|
}
|
|
|
lang_map = {"zh": "zh-Hans"}
|
|
lang_map = {"zh": "zh-Hans"}
|
|
|
|
|
|
|
|
- def __init__(self, lang_in, lang_out, model, envs=None):
|
|
|
|
|
|
|
+ def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
super().__init__(lang_in, lang_out, model)
|
|
super().__init__(lang_in, lang_out, model)
|
|
|
endpoint = self.envs["AZURE_ENDPOINT"]
|
|
endpoint = self.envs["AZURE_ENDPOINT"]
|
|
@@ -397,7 +413,7 @@ class TencentTranslator(BaseTranslator):
|
|
|
"TENCENTCLOUD_SECRET_KEY": None,
|
|
"TENCENTCLOUD_SECRET_KEY": None,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- def __init__(self, lang_in, lang_out, model, envs=None):
|
|
|
|
|
|
|
+ def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
super().__init__(lang_in, lang_out, model)
|
|
super().__init__(lang_in, lang_out, model)
|
|
|
cred = credential.DefaultCredentialProvider().get_credential()
|
|
cred = credential.DefaultCredentialProvider().get_credential()
|
|
@@ -420,7 +436,7 @@ class AnythingLLMTranslator(BaseTranslator):
|
|
|
"AnythingLLM_APIKEY": None,
|
|
"AnythingLLM_APIKEY": None,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- def __init__(self, lang_out, lang_in, model, envs=None):
|
|
|
|
|
|
|
+ def __init__(self, lang_out, lang_in, model, envs=None,prompt=None):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
super().__init__(lang_out, lang_in, model)
|
|
super().__init__(lang_out, lang_in, model)
|
|
|
self.api_url = self.envs["AnythingLLM_URL"]
|
|
self.api_url = self.envs["AnythingLLM_URL"]
|
|
@@ -430,9 +446,10 @@ class AnythingLLMTranslator(BaseTranslator):
|
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
|
"Content-Type": "application/json",
|
|
"Content-Type": "application/json",
|
|
|
}
|
|
}
|
|
|
|
|
+ self.prompttext=prompt
|
|
|
|
|
|
|
|
def translate(self, text):
|
|
def translate(self, text):
|
|
|
- messages = self.prompt(text)
|
|
|
|
|
|
|
+ messages = self.prompt(text,self.prompttext)
|
|
|
payload = {
|
|
payload = {
|
|
|
"message": messages,
|
|
"message": messages,
|
|
|
"mode": "chat",
|
|
"mode": "chat",
|
|
@@ -456,7 +473,7 @@ class DifyTranslator(BaseTranslator):
|
|
|
"DIFY_API_KEY": None, # 替换为实际 API 密钥
|
|
"DIFY_API_KEY": None, # 替换为实际 API 密钥
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- def __init__(self, lang_out, lang_in, model, envs=None):
|
|
|
|
|
|
|
+ def __init__(self, lang_out, lang_in, model, envs=None, **kwargs):
|
|
|
self.set_envs(envs)
|
|
self.set_envs(envs)
|
|
|
super().__init__(lang_out, lang_in, model)
|
|
super().__init__(lang_out, lang_in, model)
|
|
|
self.api_url = self.envs["DIFY_API_URL"]
|
|
self.api_url = self.envs["DIFY_API_URL"]
|