浏览代码

chore: use huggingface_hub

Byaidu 1 年之前
父节点
当前提交
60a3264acc
共有 3 个文件被更改,包括 10 次插入8 次删除
  1. 1 1
      pdf2zh/__init__.py
  2. 7 7
      pdf2zh/pdf2zh.py
  3. 2 0
      setup.py

+ 1 - 1
pdf2zh/__init__.py

@@ -1,2 +1,2 @@
-__version__ = "1.7.4"
+__version__ = "1.7.5"
 __author__ = "Byaidu"

+ 7 - 7
pdf2zh/pdf2zh.py

@@ -10,12 +10,10 @@ import logging
 import os
 import sys
 import pymupdf
-import tempfile
-import urllib.request
+from huggingface_hub import hf_hub_download
 
 from pdf2zh import __version__
 from pdf2zh.pdfexceptions import PDFValueError
-from pdf2zh.gui import setup_gui
 from typing import Any, Container, Iterable, List, Optional, TYPE_CHECKING
 
 if TYPE_CHECKING:
@@ -82,10 +80,11 @@ def extract_text(
                 output_type = alttype
 
     outfp: AnyIO = sys.stdout
-    pth = os.path.join(tempfile.gettempdir(), 'doclayout_yolo_docstructbench_imgsz1024.pt')
-    if not os.path.exists(pth):
-        print('Downloading...')
-        urllib.request.urlretrieve("http://huggingface.co/juliozhao/DocLayout-YOLO-DocStructBench/resolve/main/doclayout_yolo_docstructbench_imgsz1024.pt",pth)
+    # pth = os.path.join(tempfile.gettempdir(), 'doclayout_yolo_docstructbench_imgsz1024.pt')
+    # if not os.path.exists(pth):
+    #     print('Downloading...')
+    #     urllib.request.urlretrieve("http://huggingface.co/juliozhao/DocLayout-YOLO-DocStructBench/resolve/main/doclayout_yolo_docstructbench_imgsz1024.pt",pth)
+    pth = hf_hub_download(repo_id="juliozhao/DocLayout-YOLO-DocStructBench", filename="doclayout_yolo_docstructbench_imgsz1024.pt")
     model = doclayout_yolo.YOLOv10(pth)
 
     for file in files:
@@ -255,6 +254,7 @@ def main(args: Optional[List[str]] = None) -> int:
             print(f"  {file}", file=sys.stderr)
         return -1
     if parsed_args.interactive:
+        from pdf2zh.gui import setup_gui
         setup_gui()
         return 0
 

+ 2 - 0
setup.py

@@ -31,6 +31,8 @@ setup(
         "openai",
         "azure-ai-translation-text<=1.0.1",
         "gradio",
+        "huggingface_hub",
+        "torch",
     ],
     classifiers=[
         "Programming Language :: Python :: 3",