1 年之前 · 679a4b25dd
--- a/pdf2zh/doclayout.py
+++ b/pdf2zh/doclayout.py
@@ -206,5 +206,7 @@ class OnnxModel(DocLayoutModel):
 
															         # Postprocess predictions
														
 
															         preds = preds[preds[..., 4] > 0.25]
														
 
															-        preds[..., :4] = self.scale_boxes((new_h, new_w), preds[..., :4], (orig_h, orig_w))
														
 
															+        preds[..., :4] = self.scale_boxes(
														
 
															+            (new_h, new_w), preds[..., :4], (orig_h, orig_w)
														
 
															+        )
														
 
															         return [YoloResult(boxes=preds, names=self._names)]
														
--- a/pdf2zh/high_level.py
+++ b/pdf2zh/high_level.py
@@ -4,7 +4,6 @@ import logging
 
															 import sys
														
 
															 from io import StringIO
														
 
															 from typing import Any, BinaryIO, Container, Iterator, Optional, cast
														
 
															-import torch
														
 
															 import numpy as np
														
 
															 import tqdm
														
 
															 from pymupdf import Document
														
@@ -22,7 +21,7 @@ from pdf2zh.pdfdevice import PDFDevice, TagExtractor
 
															 from pdf2zh.pdfexceptions import PDFValueError
														
 
															 from pdf2zh.pdfinterp import PDFPageInterpreter, PDFResourceManager
														
 
															 from pdf2zh.pdfpage import PDFPage
														
 
															-from pdf2zh.utils import AnyIO, FileOrName, open_filename
														
 
															+from pdf2zh.utils import AnyIO, FileOrName, open_filename, get_device
														
 
															 def extract_text_to_fp(
														
@@ -176,11 +175,7 @@ def extract_text_to_fp(
 
															                 pix.height, pix.width, 3
														
 
															             )[:, :, ::-1]
														
 
															             page_layout = model.predict(
														
 
															-                image,
														
 
															-                imgsz=int(pix.height / 32) * 32,
														
 
															-                device=(
														
 
															-                    "cuda:0" if torch.cuda.is_available() else "cpu"
														
 
															-                ),  # Auto-select GPU if available
														
 
															+                image, imgsz=int(pix.height / 32) * 32, device=get_device()
														
 
															             )[0]
														
 
															             # kdtree 是不可能 kdtree 的，不如直接渲染成图片，用空间换时间
														
 
															             box = np.ones((pix.height, pix.width))
														
--- a/pdf2zh/utils.py
+++ b/pdf2zh/utils.py
@@ -819,3 +819,16 @@ def format_int_alpha(value: int) -> str:
 
															     result.reverse()
														
 
															     return "".join(result)
														
 
															+
														
 
															+
														
 
															+def get_device():
														
 
															+    """Get the device to use for computation."""
														
 
															+    try:
														
 
															+        import torch
														
 
															+
														
 
															+        if torch.cuda.is_available():
														
 
															+            return "cuda:0"
														
 
															+    except ImportError:
														
 
															+        pass
														
 
															+
														
 
															+    return "cpu"
														
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ description = "Latex PDF Translator"
 
															 authors = [{ name = "Byaidu", email = "byaidux@gmail.com" }]
														
 
															 license = "AGPL-3.0"
														
 
															 readme = "README.md"
														
 
															-requires-python = ">=3.8,<3.13"
														
 
															+requires-python = ">=3.9,<3.13"
														
 
															 classifiers = [
														
 
															     "Programming Language :: Python :: 3",
														
 
															     "Operating System :: OS Independent",
														
@@ -17,7 +17,6 @@ dependencies = [
 
															     "pymupdf",
														
 
															     "tqdm",
														
 
															     "tenacity",
														
 
															-    "doclayout-yolo",
														
 
															     "numpy",
														
 
															     "ollama",
														
 
															     "deepl<1.19.1",
														
@@ -25,10 +24,16 @@ dependencies = [
 
															     "azure-ai-translation-text<=1.0.1",
														
 
															     "gradio",
														
 
															     "huggingface_hub",
														
 
															-    "torch",
														
 
															+    "onnx",
														
 
															+    "onnxruntime",
														
 
															+    "opencv-python-headless",
														
 
															 ]
														
 
															 [project.optional-dependencies]
														
 
															+torch = [
														
 
															+    "doclayout-yolo",
														
 
															+    "torch",
														
 
															+]
														
 
															 dev = [
														
 
															     "black",
														
 
															     "flake8",