Просмотр исходного кода

feat(main) : add directoy option.

hellofinch 1 год назад
Родитель
Сommit
b85f884793
1 измененных файлов с 38 добавлено и 0 удалено
  1. 38 0
      pdf2zh/pdf2zh.py

+ 38 - 0
pdf2zh/pdf2zh.py

@@ -14,6 +14,7 @@ from typing import List, Optional
 from pdf2zh import __version__, log
 from pdf2zh.high_level import translate
 from pdf2zh.doclayout import OnnxModel
+import os
 
 
 def create_parser() -> argparse.ArgumentParser:
@@ -149,6 +150,12 @@ def create_parser() -> argparse.ArgumentParser:
         help="custom WebUI port.",
     )
 
+    parse_params.add_argument(
+        "--dir",
+        action="store_true",
+        help="translate directory.",
+    )
+
     return parser
 
 
@@ -167,6 +174,28 @@ def parse_args(args: Optional[List[str]]) -> argparse.Namespace:
 
     return parsed_args
 
+def find_all_files_in_directory(directory_path):
+    """
+    Recursively search all PDF files in the given directory and return their paths as a list.
+
+    :param directory_path: str, the path to the directory to search
+    :return: list of PDF file paths
+    """
+    # Check if the provided path is a directory
+    if not os.path.isdir(directory_path):
+        raise ValueError(f"The provided path '{directory_path}' is not a directory.")
+
+    file_paths = []
+
+    # Walk through the directory recursively
+    for root, _, files in os.walk(directory_path):
+        for file in files:
+            # Check if the file is a PDF
+            if file.lower().endswith('.pdf'):
+                # Append the full file path to the list
+                file_paths.append(os.path.join(root, file))
+
+    return file_paths
 
 def main(args: Optional[List[str]] = None) -> int:
     logging.basicConfig()
@@ -206,11 +235,20 @@ def main(args: Optional[List[str]] = None) -> int:
             parsed_args.prompt = Template(content)
         except Exception:
             raise ValueError("prompt error.")
+    
     model = None
     if parsed_args.onnx:
         model = OnnxModel(parsed_args.onnx)
     else:
         model = OnnxModel.load_available()
+
+    if parsed_args.dir:
+        untranlate_file=find_all_files_in_directory(parsed_args.files[0])
+        parsed_args.files = untranlate_file
+        print(parsed_args)
+        translate(model=model,**vars(parsed_args))
+        return 0
+    # print(parsed_args)
     translate(model=model, **vars(parsed_args))
     return 0