yuze.zyz преди 1 година
родител
ревизия
cc27355ad7
променени са 6 файла, в които са добавени 18 реда и са изтрити 10 реда
  1. 3 0
      pdf2zh/__init__.py
  2. 1 1
      pdf2zh/converter.py
  3. 11 4
      pdf2zh/doclayout.py
  4. 1 2
      pdf2zh/gui.py
  5. 1 2
      pdf2zh/pdf2zh.py
  6. 1 1
      pyproject.toml

+ 3 - 0
pdf2zh/__init__.py

@@ -1,5 +1,8 @@
 import logging
+from pdf2zh.high_level import translate, translate_stream
+
 log = logging.getLogger(__name__)
 
 __version__ = "1.8.8"
 __author__ = "Byaidu"
+__all__ = ["translate", "translate_stream"]

+ 1 - 1
pdf2zh/converter.py

@@ -1,4 +1,4 @@
-from typing import List, Dict
+from typing import Dict
 
 from pdfminer.pdfinterp import PDFGraphicState, PDFResourceManager
 from pdfminer.pdffont import PDFCIDFont

+ 11 - 4
pdf2zh/doclayout.py

@@ -13,7 +13,7 @@ class DocLayoutModel(abc.ABC):
     @staticmethod
     def load_onnx():
         model = OnnxModel.from_pretrained(
-            repo_id='AI-ModelScope/DocLayout-YOLO-DocStructBench-onnx',
+            repo_id="wybxc/DocLayout-YOLO-DocStructBench-onnx",
             filename="doclayout_yolo_docstructbench_imgsz1024.onnx",
         )
         return model
@@ -72,9 +72,16 @@ class OnnxModel(DocLayoutModel):
 
     @staticmethod
     def from_pretrained(repo_id: str, filename: str):
-        from modelscope import snapshot_download
-        model_dir = snapshot_download(repo_id)
-        pth = os.path.join(model_dir, filename)
+        if os.environ.get("USE_MODELSCOPE", "0") == "1":
+            repo_mapping = {
+                # Edit here to add more models
+                "wybxc/DocLayout-YOLO-DocStructBench-onnx": "AI-ModelScope/DocLayout-YOLO-DocStructBench-onnx"
+            }
+            from modelscope import snapshot_download
+            model_dir = snapshot_download(repo_mapping[repo_id])
+            pth = os.path.join(model_dir, filename)
+        else:
+            pth = hf_hub_download(repo_id=repo_id, filename=filename, etag_timeout=1)
         return OnnxModel(pth)
 
     @property

+ 1 - 2
pdf2zh/gui.py

@@ -292,7 +292,7 @@ with gr.Blocks(
             service = gr.Dropdown(
                 label="Service",
                 choices=service_map.keys(),
-                value="ModelScope",
+                value="Google",
             )
             envs = []
             for i in range(3):
@@ -440,7 +440,6 @@ with gr.Blocks(
         stop_translate_file,
         inputs=[state],
     )
-    demo.load(on_select_service, service, envs)
 
 
 def readuserandpasswd(file_path):

+ 1 - 2
pdf2zh/entrance.py → pdf2zh/pdf2zh.py

@@ -9,11 +9,11 @@ import argparse
 import sys
 import logging
 from typing import List, Optional
+from pdf2zh import __version__, log
 from pdf2zh.high_level import translate
 
 
 def create_parser() -> argparse.ArgumentParser:
-    from pdf2zh import __version__
     parser = argparse.ArgumentParser(description=__doc__, add_help=True)
     parser.add_argument(
         "files",
@@ -144,7 +144,6 @@ def parse_args(args: Optional[List[str]]) -> argparse.Namespace:
 
 
 def main(args: Optional[List[str]] = None) -> int:
-    from pdf2zh import log
     logging.basicConfig()
 
     parsed_args = parse_args(args)

+ 1 - 1
pyproject.toml

@@ -50,4 +50,4 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [project.scripts]
-pdf2zh = "pdf2zh.entrance:main"
+pdf2zh = "pdf2zh.pdf2zh:main"