|
@@ -8,6 +8,7 @@ import deepl
|
|
|
import ollama
|
|
import ollama
|
|
|
import openai
|
|
import openai
|
|
|
import requests
|
|
import requests
|
|
|
|
|
+from pdf2zh.cache import TranslationCache
|
|
|
from azure.ai.translation.text import TextTranslationClient
|
|
from azure.ai.translation.text import TextTranslationClient
|
|
|
from azure.core.credentials import AzureKeyCredential
|
|
from azure.core.credentials import AzureKeyCredential
|
|
|
from tencentcloud.common import credential
|
|
from tencentcloud.common import credential
|
|
@@ -29,6 +30,7 @@ class BaseTranslator:
|
|
|
envs = {}
|
|
envs = {}
|
|
|
lang_map = {}
|
|
lang_map = {}
|
|
|
CustomPrompt = False
|
|
CustomPrompt = False
|
|
|
|
|
+ ignore_cache = False
|
|
|
|
|
|
|
|
def __init__(self, lang_in, lang_out, model):
|
|
def __init__(self, lang_in, lang_out, model):
|
|
|
lang_in = self.lang_map.get(lang_in.lower(), lang_in)
|
|
lang_in = self.lang_map.get(lang_in.lower(), lang_in)
|
|
@@ -37,6 +39,15 @@ class BaseTranslator:
|
|
|
self.lang_out = lang_out
|
|
self.lang_out = lang_out
|
|
|
self.model = model
|
|
self.model = model
|
|
|
|
|
|
|
|
|
|
+ self.cache = TranslationCache(
|
|
|
|
|
+ self.name,
|
|
|
|
|
+ {
|
|
|
|
|
+ "lang_in": lang_in,
|
|
|
|
|
+ "lang_out": lang_out,
|
|
|
|
|
+ "model": model,
|
|
|
|
|
+ },
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
def set_envs(self, envs):
|
|
def set_envs(self, envs):
|
|
|
# Detach from self.__class__.envs
|
|
# Detach from self.__class__.envs
|
|
|
# Cannot use self.envs = copy(self.__class__.envs)
|
|
# Cannot use self.envs = copy(self.__class__.envs)
|
|
@@ -49,8 +60,36 @@ class BaseTranslator:
|
|
|
for key in envs:
|
|
for key in envs:
|
|
|
self.envs[key] = envs[key]
|
|
self.envs[key] = envs[key]
|
|
|
|
|
|
|
|
- def translate(self, text):
|
|
|
|
|
- pass
|
|
|
|
|
|
|
+ def add_cache_impact_parameters(self, k: str, v):
|
|
|
|
|
+ """
|
|
|
|
|
+ Add parameters that affect the translation quality to distinguish the translation effects under different parameters.
|
|
|
|
|
+ :param k: key
|
|
|
|
|
+ :param v: value
|
|
|
|
|
+ """
|
|
|
|
|
+ self.cache.add_params(k, v)
|
|
|
|
|
+
|
|
|
|
|
+ def translate(self, text, ignore_cache=False):
|
|
|
|
|
+ """
|
|
|
|
|
+ Translate the text, and the other part should call this method.
|
|
|
|
|
+ :param text: text to translate
|
|
|
|
|
+ :return: translated text
|
|
|
|
|
+ """
|
|
|
|
|
+ if not (self.ignore_cache or ignore_cache):
|
|
|
|
|
+ cache = self.cache.get(text)
|
|
|
|
|
+ if cache is not None:
|
|
|
|
|
+ return cache
|
|
|
|
|
+
|
|
|
|
|
+ translation = self.do_translate(text)
|
|
|
|
|
+ self.cache.set(text, translation)
|
|
|
|
|
+ return translation
|
|
|
|
|
+
|
|
|
|
|
+ def do_translate(self, text):
|
|
|
|
|
+ """
|
|
|
|
|
+ Actual translate text, override this method
|
|
|
|
|
+ :param text: text to translate
|
|
|
|
|
+ :return: translated text
|
|
|
|
|
+ """
|
|
|
|
|
+ raise NotImplementedError
|
|
|
|
|
|
|
|
def prompt(self, text, prompt):
|
|
def prompt(self, text, prompt):
|
|
|
if prompt:
|
|
if prompt:
|
|
@@ -88,7 +127,7 @@ class GoogleTranslator(BaseTranslator):
|
|
|
"User-Agent": "Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727;.NET CLR 3.0.04506.30)" # noqa: E501
|
|
"User-Agent": "Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727;.NET CLR 3.0.04506.30)" # noqa: E501
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- def translate(self, text):
|
|
|
|
|
|
|
+ def do_translate(self, text):
|
|
|
text = text[:5000] # google translate max length
|
|
text = text[:5000] # google translate max length
|
|
|
response = self.session.get(
|
|
response = self.session.get(
|
|
|
self.endpoint,
|
|
self.endpoint,
|
|
@@ -119,7 +158,7 @@ class BingTranslator(BaseTranslator):
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", # noqa: E501
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", # noqa: E501
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- def findSID(self):
|
|
|
|
|
|
|
+ def find_sid(self):
|
|
|
response = self.session.get(self.endpoint)
|
|
response = self.session.get(self.endpoint)
|
|
|
response.raise_for_status()
|
|
response.raise_for_status()
|
|
|
url = response.url[:-10]
|
|
url = response.url[:-10]
|
|
@@ -130,9 +169,9 @@ class BingTranslator(BaseTranslator):
|
|
|
)[0]
|
|
)[0]
|
|
|
return url, ig, iid, key, token
|
|
return url, ig, iid, key, token
|
|
|
|
|
|
|
|
- def translate(self, text):
|
|
|
|
|
|
|
+ def do_translate(self, text):
|
|
|
text = text[:1000] # bing translate max length
|
|
text = text[:1000] # bing translate max length
|
|
|
- url, ig, iid, key, token = self.findSID()
|
|
|
|
|
|
|
+ url, ig, iid, key, token = self.find_sid()
|
|
|
response = self.session.post(
|
|
response = self.session.post(
|
|
|
f"{url}ttranslatev3?IG={ig}&IID={iid}",
|
|
f"{url}ttranslatev3?IG={ig}&IID={iid}",
|
|
|
data={
|
|
data={
|
|
@@ -162,7 +201,7 @@ class DeepLTranslator(BaseTranslator):
|
|
|
auth_key = self.envs["DEEPL_AUTH_KEY"]
|
|
auth_key = self.envs["DEEPL_AUTH_KEY"]
|
|
|
self.client = deepl.Translator(auth_key)
|
|
self.client = deepl.Translator(auth_key)
|
|
|
|
|
|
|
|
- def translate(self, text):
|
|
|
|
|
|
|
+ def do_translate(self, text):
|
|
|
response = self.client.translate_text(
|
|
response = self.client.translate_text(
|
|
|
text, target_lang=self.lang_out, source_lang=self.lang_in
|
|
text, target_lang=self.lang_out, source_lang=self.lang_in
|
|
|
)
|
|
)
|
|
@@ -183,7 +222,7 @@ class DeepLXTranslator(BaseTranslator):
|
|
|
self.endpoint = self.envs["DEEPLX_ENDPOINT"]
|
|
self.endpoint = self.envs["DEEPLX_ENDPOINT"]
|
|
|
self.session = requests.Session()
|
|
self.session = requests.Session()
|
|
|
|
|
|
|
|
- def translate(self, text):
|
|
|
|
|
|
|
+ def do_translate(self, text):
|
|
|
response = self.session.post(
|
|
response = self.session.post(
|
|
|
self.endpoint,
|
|
self.endpoint,
|
|
|
json={
|
|
json={
|
|
@@ -213,8 +252,11 @@ class OllamaTranslator(BaseTranslator):
|
|
|
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
|
|
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
|
|
|
self.client = ollama.Client()
|
|
self.client = ollama.Client()
|
|
|
self.prompttext = prompt
|
|
self.prompttext = prompt
|
|
|
|
|
+ self.add_cache_impact_parameters("temperature", self.options["temperature"])
|
|
|
|
|
+ if prompt:
|
|
|
|
|
+ self.add_cache_impact_parameters("prompt", prompt)
|
|
|
|
|
|
|
|
- def translate(self, text):
|
|
|
|
|
|
|
+ def do_translate(self, text):
|
|
|
maxlen = max(2000, len(text) * 5)
|
|
maxlen = max(2000, len(text) * 5)
|
|
|
for model in self.model.split(";"):
|
|
for model in self.model.split(";"):
|
|
|
try:
|
|
try:
|
|
@@ -263,8 +305,11 @@ class OpenAITranslator(BaseTranslator):
|
|
|
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
|
|
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
|
|
|
self.client = openai.OpenAI(base_url=base_url, api_key=api_key)
|
|
self.client = openai.OpenAI(base_url=base_url, api_key=api_key)
|
|
|
self.prompttext = prompt
|
|
self.prompttext = prompt
|
|
|
|
|
+ self.add_cache_impact_parameters("temperature", self.options["temperature"])
|
|
|
|
|
+ if prompt:
|
|
|
|
|
+ self.add_cache_impact_parameters("prompt", prompt)
|
|
|
|
|
|
|
|
- def translate(self, text) -> str:
|
|
|
|
|
|
|
+ def do_translate(self, text) -> str:
|
|
|
response = self.client.chat.completions.create(
|
|
response = self.client.chat.completions.create(
|
|
|
model=self.model,
|
|
model=self.model,
|
|
|
**self.options,
|
|
**self.options,
|
|
@@ -305,8 +350,11 @@ class AzureOpenAITranslator(BaseTranslator):
|
|
|
api_key=api_key,
|
|
api_key=api_key,
|
|
|
)
|
|
)
|
|
|
self.prompttext = prompt
|
|
self.prompttext = prompt
|
|
|
|
|
+ self.add_cache_impact_parameters("temperature", self.options["temperature"])
|
|
|
|
|
+ if prompt:
|
|
|
|
|
+ self.add_cache_impact_parameters("prompt", prompt)
|
|
|
|
|
|
|
|
- def translate(self, text) -> str:
|
|
|
|
|
|
|
+ def do_translate(self, text) -> str:
|
|
|
response = self.client.chat.completions.create(
|
|
response = self.client.chat.completions.create(
|
|
|
model=self.model,
|
|
model=self.model,
|
|
|
**self.options,
|
|
**self.options,
|
|
@@ -341,6 +389,8 @@ class ModelScopeTranslator(OpenAITranslator):
|
|
|
model = self.envs["MODELSCOPE_MODEL"]
|
|
model = self.envs["MODELSCOPE_MODEL"]
|
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
|
self.prompttext = prompt
|
|
self.prompttext = prompt
|
|
|
|
|
+ if prompt:
|
|
|
|
|
+ self.add_cache_impact_parameters("prompt", prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
class ZhipuTranslator(OpenAITranslator):
|
|
class ZhipuTranslator(OpenAITranslator):
|
|
@@ -360,8 +410,10 @@ class ZhipuTranslator(OpenAITranslator):
|
|
|
model = self.envs["ZHIPU_MODEL"]
|
|
model = self.envs["ZHIPU_MODEL"]
|
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
|
self.prompttext = prompt
|
|
self.prompttext = prompt
|
|
|
|
|
+ if prompt:
|
|
|
|
|
+ self.add_cache_impact_parameters("prompt", prompt)
|
|
|
|
|
|
|
|
- def translate(self, text) -> str:
|
|
|
|
|
|
|
+ def do_translate(self, text) -> str:
|
|
|
try:
|
|
try:
|
|
|
response = self.client.chat.completions.create(
|
|
response = self.client.chat.completions.create(
|
|
|
model=self.model,
|
|
model=self.model,
|
|
@@ -395,6 +447,8 @@ class SiliconTranslator(OpenAITranslator):
|
|
|
model = self.envs["SILICON_MODEL"]
|
|
model = self.envs["SILICON_MODEL"]
|
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
|
self.prompttext = prompt
|
|
self.prompttext = prompt
|
|
|
|
|
+ if prompt:
|
|
|
|
|
+ self.add_cache_impact_parameters("prompt", prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
class GeminiTranslator(OpenAITranslator):
|
|
class GeminiTranslator(OpenAITranslator):
|
|
@@ -414,6 +468,8 @@ class GeminiTranslator(OpenAITranslator):
|
|
|
model = self.envs["GEMINI_MODEL"]
|
|
model = self.envs["GEMINI_MODEL"]
|
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
|
|
|
self.prompttext = prompt
|
|
self.prompttext = prompt
|
|
|
|
|
+ if prompt:
|
|
|
|
|
+ self.add_cache_impact_parameters("prompt", prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
class AzureTranslator(BaseTranslator):
|
|
class AzureTranslator(BaseTranslator):
|
|
@@ -438,7 +494,7 @@ class AzureTranslator(BaseTranslator):
|
|
|
logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy")
|
|
logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy")
|
|
|
logger.setLevel(logging.WARNING)
|
|
logger.setLevel(logging.WARNING)
|
|
|
|
|
|
|
|
- def translate(self, text) -> str:
|
|
|
|
|
|
|
+ def do_translate(self, text) -> str:
|
|
|
response = self.client.translate(
|
|
response = self.client.translate(
|
|
|
body=[text],
|
|
body=[text],
|
|
|
from_language=self.lang_in,
|
|
from_language=self.lang_in,
|
|
@@ -466,7 +522,7 @@ class TencentTranslator(BaseTranslator):
|
|
|
self.req.Target = self.lang_out
|
|
self.req.Target = self.lang_out
|
|
|
self.req.ProjectId = 0
|
|
self.req.ProjectId = 0
|
|
|
|
|
|
|
|
- def translate(self, text):
|
|
|
|
|
|
|
+ def do_translate(self, text):
|
|
|
self.req.SourceText = text
|
|
self.req.SourceText = text
|
|
|
resp: TextTranslateResponse = self.client.TextTranslate(self.req)
|
|
resp: TextTranslateResponse = self.client.TextTranslate(self.req)
|
|
|
return resp.TargetText
|
|
return resp.TargetText
|
|
@@ -491,8 +547,10 @@ class AnythingLLMTranslator(BaseTranslator):
|
|
|
"Content-Type": "application/json",
|
|
"Content-Type": "application/json",
|
|
|
}
|
|
}
|
|
|
self.prompttext = prompt
|
|
self.prompttext = prompt
|
|
|
|
|
+ if prompt:
|
|
|
|
|
+ self.add_cache_impact_parameters("prompt", prompt)
|
|
|
|
|
|
|
|
- def translate(self, text):
|
|
|
|
|
|
|
+ def do_translate(self, text):
|
|
|
messages = self.prompt(text, self.prompttext)
|
|
messages = self.prompt(text, self.prompttext)
|
|
|
payload = {
|
|
payload = {
|
|
|
"message": messages,
|
|
"message": messages,
|
|
@@ -523,7 +581,7 @@ class DifyTranslator(BaseTranslator):
|
|
|
self.api_url = self.envs["DIFY_API_URL"]
|
|
self.api_url = self.envs["DIFY_API_URL"]
|
|
|
self.api_key = self.envs["DIFY_API_KEY"]
|
|
self.api_key = self.envs["DIFY_API_KEY"]
|
|
|
|
|
|
|
|
- def translate(self, text):
|
|
|
|
|
|
|
+ def do_translate(self, text):
|
|
|
headers = {
|
|
headers = {
|
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
|
"Content-Type": "application/json",
|
|
"Content-Type": "application/json",
|