소스 검색

准备standalone

hellofinch 1 년 전
부모
커밋
cd6aae45ce
3개의 변경된 파일127개의 추가작업 그리고 4개의 파일을 삭제
  1. 1 0
      .gitignore
  2. 112 0
      pdf2zh/interface.py
  3. 14 4
      pdf2zh/pdf2zh.py

+ 1 - 0
.gitignore

@@ -135,6 +135,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
+pdf2zh-dev/
 
 # Spyder project settings
 .spyderproject

+ 112 - 0
pdf2zh/interface.py

@@ -0,0 +1,112 @@
+import os
+import shutil
+from pathlib import Path
+from pdf2zh import __version__
+from pdf2zh.pdf2zh import extract_text
+from pdf2zh.translator import (
+    BaseTranslator,
+    GoogleTranslator,
+    BingTranslator,
+    DeepLTranslator,
+    DeepLXTranslator,
+    OllamaTranslator,
+    OpenAITranslator,
+    ZhipuTranslator,
+    SiliconTranslator,
+    AzureTranslator,
+    TencentTranslator,
+)
+
+import gradio as gr
+
+service_map: dict[str, BaseTranslator] = {
+    "Google": GoogleTranslator,
+    "Bing": BingTranslator,
+    "DeepL": DeepLTranslator,
+    "DeepLX": DeepLXTranslator,
+    "Ollama": OllamaTranslator,
+    "OpenAI": OpenAITranslator,
+    "Zhipu": ZhipuTranslator,
+    "Silicon": SiliconTranslator,
+    "Azure": AzureTranslator,
+    "Tencent": TencentTranslator,
+}
+lang_map = {
+    "Chinese": "zh",
+    "English": "en",
+    "French": "fr",
+    "German": "de",
+    "Japanese": "ja",
+    "Korean": "ko",
+    "Russian": "ru",
+    "Spanish": "es",
+    "Italian": "it",
+}
+page_map = {
+    "All": None,
+    "First": [0],
+    "First 5 pages": list(range(0, 5)),
+}
+
+def translate(
+    file_input,
+    service,
+    lang_from,
+    lang_to,
+    page_range,
+    *envs,
+):
+
+    output = Path("pdf2zh_files")
+    output.mkdir(parents=True, exist_ok=True)
+
+    if not file_input:
+        raise gr.Error("No input")
+    file_path = shutil.copy(file_input, output)
+    
+
+    filename = os.path.splitext(os.path.basename(file_path))[0]
+    file_en = output / f"{filename}.pdf"
+    file_zh = output / f"{filename}-zh.pdf"
+    file_dual = output / f"{filename}-dual.pdf"
+
+    translator = service_map[service]
+    selected_page = page_map[page_range]
+    lang_from = lang_map[lang_from]
+    lang_to = lang_map[lang_to]
+
+    for i, env in enumerate(translator.envs.items()):
+        os.environ[env[0]] = envs[i]
+
+    print(f"Files before translation: {os.listdir(output)}")
+
+    param = {
+        "files": [file_en],
+        "pages": selected_page,
+        "lang_in": lang_from,
+        "lang_out": lang_to,
+        "service": f"{translator.name}",
+        "output": output,
+        "thread": 4,
+    }
+    print(param)
+    extract_text(**param)
+    print(f"Files after translation: {os.listdir(output)}")
+
+    if not file_zh.exists() or not file_dual.exists():
+        raise gr.Error("No output")
+
+    return (
+        str(file_zh),
+        str(file_dual)
+    )
+
+demo = gr.Interface(fn=translate,inputs=["text","file"],outputs=["file","file"], api_name="translate")
+
+def setup_interface(share=False):
+    demo.launch(server_name="0.0.0.0", debug=True, share=share)
+
+
+# For auto-reloading while developing
+if __name__ == "__main__":
+    setup_interface()

+ 14 - 4
pdf2zh/pdf2zh.py

@@ -285,6 +285,12 @@ def create_parser() -> argparse.ArgumentParser:
         action="store_true",
         help="Enable Gradio Share",
     )
+    parse_params.add_argument(
+        "--interface",
+        "-X",
+        action="store_true",
+        help="test interface.",
+    )
 
     return parser
 
@@ -314,11 +320,15 @@ def main(args: Optional[List[str]] = None) -> int:
         for file in missing_files:
             print(f"  {file}", file=sys.stderr)
         return -1
-    if parsed_args.interactive:
-        from pdf2zh.gui import setup_gui
+    
+    from pdf2zh.gui import setup_gui
+
+    setup_gui(parsed_args.share)
+    # if parsed_args.interface:
+    #     from pdf2zh.interface import setup_interface
 
-        setup_gui(parsed_args.share)
-        return 0
+    #     setup_interface(parsed_args.share)
+    #     return 0
 
     extract_text(**vars(parsed_args))
     return 0