|
|
@@ -12,11 +12,13 @@ from string import Template
|
|
|
from typing import List, Optional
|
|
|
|
|
|
from pdf2zh import __version__, log
|
|
|
-from pdf2zh.high_level import translate
|
|
|
+from pdf2zh.high_level import translate, download_remote_fonts
|
|
|
from pdf2zh.doclayout import OnnxModel, ModelInstance
|
|
|
import os
|
|
|
|
|
|
from pdf2zh.config import ConfigManager
|
|
|
+from yadt.translation_config import TranslationConfig as YadtConfig
|
|
|
+from yadt.high_level import translate as yadt_translate
|
|
|
|
|
|
|
|
|
def create_parser() -> argparse.ArgumentParser:
|
|
|
@@ -164,6 +166,13 @@ def create_parser() -> argparse.ArgumentParser:
|
|
|
help="config file.",
|
|
|
)
|
|
|
|
|
|
+ parse_params.add_argument(
|
|
|
+ "--yadt",
|
|
|
+ default=False,
|
|
|
+ action="store_true",
|
|
|
+ help="Use experimental backend yadt.",
|
|
|
+ )
|
|
|
+
|
|
|
return parser
|
|
|
|
|
|
|
|
|
@@ -178,6 +187,7 @@ def parse_args(args: Optional[List[str]]) -> argparse.Namespace:
|
|
|
pages.extend(range(int(start) - 1, int(end)))
|
|
|
else:
|
|
|
pages.append(int(p) - 1)
|
|
|
+ parsed_args.raw_pages = parsed_args.pages
|
|
|
parsed_args.pages = pages
|
|
|
|
|
|
return parsed_args
|
|
|
@@ -255,6 +265,8 @@ def main(args: Optional[List[str]] = None) -> int:
|
|
|
raise ValueError("prompt error.")
|
|
|
|
|
|
print(parsed_args)
|
|
|
+ if parsed_args.yadt:
|
|
|
+ return yadt_main(parsed_args)
|
|
|
if parsed_args.dir:
|
|
|
untranlate_file = find_all_files_in_directory(parsed_args.files[0])
|
|
|
parsed_args.files = untranlate_file
|
|
|
@@ -265,5 +277,106 @@ def main(args: Optional[List[str]] = None) -> int:
|
|
|
return 0
|
|
|
|
|
|
|
|
|
+def yadt_main(parsed_args) -> int:
|
|
|
+ if parsed_args.dir:
|
|
|
+ untranlate_file = find_all_files_in_directory(parsed_args.files[0])
|
|
|
+ else:
|
|
|
+ untranlate_file = parsed_args.files
|
|
|
+ lang_in = parsed_args.lang_in
|
|
|
+ lang_out = parsed_args.lang_out
|
|
|
+ outputdir = None
|
|
|
+ if parsed_args.output:
|
|
|
+ outputdir = parsed_args.output
|
|
|
+ font_path = download_remote_fonts(lang_out.lower())
|
|
|
+
|
|
|
+ param = parsed_args.service.split(":", 1)
|
|
|
+ service_name = param[0]
|
|
|
+ service_model = param[1] if len(param) > 1 else None
|
|
|
+
|
|
|
+ envs = {}
|
|
|
+ prompt = []
|
|
|
+
|
|
|
+ if parsed_args.prompt:
|
|
|
+ try:
|
|
|
+ with open(parsed_args.prompt, "r", encoding="utf-8") as file:
|
|
|
+ content = file.read()
|
|
|
+ prompt = Template(content)
|
|
|
+ except Exception:
|
|
|
+ raise ValueError("prompt error.")
|
|
|
+
|
|
|
+ from pdf2zh.translator import (
|
|
|
+ AzureOpenAITranslator,
|
|
|
+ GoogleTranslator,
|
|
|
+ BingTranslator,
|
|
|
+ DeepLTranslator,
|
|
|
+ DeepLXTranslator,
|
|
|
+ OllamaTranslator,
|
|
|
+ OpenAITranslator,
|
|
|
+ ZhipuTranslator,
|
|
|
+ ModelScopeTranslator,
|
|
|
+ SiliconTranslator,
|
|
|
+ GeminiTranslator,
|
|
|
+ AzureTranslator,
|
|
|
+ TencentTranslator,
|
|
|
+ DifyTranslator,
|
|
|
+ AnythingLLMTranslator,
|
|
|
+ XinferenceTranslator,
|
|
|
+ ArgosTranslator,
|
|
|
+ GorkTranslator,
|
|
|
+ GroqTranslator,
|
|
|
+ DeepseekTranslator,
|
|
|
+ OpenAIlikedTranslator,
|
|
|
+ )
|
|
|
+
|
|
|
+ for translator in [
|
|
|
+ GoogleTranslator,
|
|
|
+ BingTranslator,
|
|
|
+ DeepLTranslator,
|
|
|
+ DeepLXTranslator,
|
|
|
+ OllamaTranslator,
|
|
|
+ XinferenceTranslator,
|
|
|
+ AzureOpenAITranslator,
|
|
|
+ OpenAITranslator,
|
|
|
+ ZhipuTranslator,
|
|
|
+ ModelScopeTranslator,
|
|
|
+ SiliconTranslator,
|
|
|
+ GeminiTranslator,
|
|
|
+ AzureTranslator,
|
|
|
+ TencentTranslator,
|
|
|
+ DifyTranslator,
|
|
|
+ AnythingLLMTranslator,
|
|
|
+ ArgosTranslator,
|
|
|
+ GorkTranslator,
|
|
|
+ GroqTranslator,
|
|
|
+ DeepseekTranslator,
|
|
|
+ OpenAIlikedTranslator,
|
|
|
+ ]:
|
|
|
+ if service_name == translator.name:
|
|
|
+ translator = translator(
|
|
|
+ lang_in, lang_out, service_model, envs=envs, prompt=prompt
|
|
|
+ )
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ raise ValueError("Unsupported translation service")
|
|
|
+
|
|
|
+ for file in untranlate_file:
|
|
|
+ file = file.strip("\"'")
|
|
|
+ yadt_config = YadtConfig(
|
|
|
+ input_file=file,
|
|
|
+ font=font_path,
|
|
|
+ pages=",".join((str(x) for x in parsed_args.raw_pages)),
|
|
|
+ output_dir=outputdir,
|
|
|
+ translator=translator,
|
|
|
+ debug=parsed_args.debug,
|
|
|
+ lang_in=lang_in,
|
|
|
+ lang_out=lang_out,
|
|
|
+ no_dual=False,
|
|
|
+ no_mono=False,
|
|
|
+ qps=parsed_args.thread,
|
|
|
+ )
|
|
|
+ yadt_translate(yadt_config)
|
|
|
+ return 0
|
|
|
+
|
|
|
+
|
|
|
if __name__ == "__main__":
|
|
|
sys.exit(main())
|