فهرست منبع

Merge commit '72cab51476d9f2e4bc661ed1db251e6577d598bc' into feat/modelscope-gradio

* commit '72cab51476d9f2e4bc661ed1db251e6577d598bc':
  Fix bug where the font size could become too large.
  Support Traditional Chinese
  Fix error when using the `mat` method with NumPy version >= 2.0.0.
  readme 更新
  添加网页认证选项。
yuze.zyz 1 سال پیش
والد
کامیت
83db82fb5f
7فایلهای تغییر یافته به همراه91 افزوده شده و 18 حذف شده
  1. 2 1
      README.md
  2. 2 1
      README_ja-JP.md
  3. 2 1
      README_zh-CN.md
  4. 1 1
      pdf2zh/converter.py
  5. 9 1
      pdf2zh/entrance.py
  6. 70 12
      pdf2zh/gui.py
  7. 5 1
      pdf2zh/pdfinterp.py

+ 2 - 1
README.md

@@ -164,7 +164,8 @@ In the following table, we list all advanced options for reference:
 | `-t`  | [Multi-threads](#threads) | `pdf2zh example.pdf -t 1` |
 | `-o`  | Output dir | `pdf2zh example.pdf -o output` |
 | `-f`, `-c` | [Exceptions](#exceptions) | `pdf2zh example.pdf -f "(MS.*)"` |
-| `--share` | Get gradio public link | `pdf2zh -i --share` |
+| `--share` | [Get gradio public link] | `pdf2zh -i --share` |
+| `-a` | [add authorization and custom login page] | `pdf2zh -i -a users.txt [auth.html]` |
 
 <h3 id="partial">Full / partial document translation</h3>
 

+ 2 - 1
README_ja-JP.md

@@ -164,7 +164,8 @@ Python環境を事前にインストールする必要はありません
 | `-t`  | [マルチスレッド](#threads) | `pdf2zh example.pdf -t 1` |
 | `-o`  | 出力ディレクトリ | `pdf2zh example.pdf -o output` |
 | `-f`, `-c` | [例外](#exceptions) | `pdf2zh example.pdf -f "(MS.*)"` |
-| `--share` | gradio公開リンクを取得 | `pdf2zh -i --share` |
+| `--share` | [gradio公開リンクを取得] | `pdf2zh -i --share` |
+| `-a` | [ウェブ認証とカスタム認証ページの追加] | `pdf2zh -i -a users.txt [auth.html]` |
 
 <h3 id="partial">全文または部分的なドキュメント翻訳</h3>
 

+ 2 - 1
README_zh-CN.md

@@ -164,7 +164,8 @@
 | `-t`  | [多线程](#threads) | `pdf2zh example.pdf -t 1` |
 | `-o`  | 输出目录 | `pdf2zh example.pdf -o output` |
 | `-f`, `-c` | [例外规则](#exceptions) | `pdf2zh example.pdf -f "(MS.*)"` |
-| `--share` | 获取 gradio 公开链接 | `pdf2zh -i --share` |
+| `--share` | [获取 gradio 公开链接] | `pdf2zh -i --share` |
+| `-a` | [添加网页认证和自定义认证页] | `pdf2zh -i -a users.txt [auth.html]` |
 
 <h3 id="partial">全文或部分文档翻译</h3>
 

+ 1 - 1
pdf2zh/converter.py

@@ -279,7 +279,7 @@ class TranslateConverter(PDFConverterEx):
                     if (                                                    # 根据当前字符修正段落属性
                         child.size > pstk[-1].size / 0.79                   # 1. 当前字符显著比段落字体大
                         or len(sstk[-1].strip()) == 1                       # 2. 当前字符为段落第二个文字(考虑首字母放大的情况)
-                    ):
+                    ) and child.get_text() != " ":                          # 3. 当前字符不是空格
                         pstk[-1].y -= child.size - pstk[-1].size            # 修正段落初始纵坐标,假设两个不同大小字符的上边界对齐
                         pstk[-1].size = child.size
                     sstk[-1] += child.get_text()

+ 9 - 1
pdf2zh/entrance.py

@@ -115,6 +115,14 @@ def create_parser() -> argparse.ArgumentParser:
         action="store_true",
         help="celery",
     )
+    parse_params.add_argument(
+        "--authorized",
+        "-a",
+        type=str,
+        nargs="+",
+        default=["./users.txt", "./auth.html"],
+        help="user name and password.",
+    )
 
     return parser
 
@@ -147,7 +155,7 @@ def main(args: Optional[List[str]] = None) -> int:
     if parsed_args.interactive:
         from pdf2zh.gui import setup_gui
 
-        setup_gui(parsed_args.share)
+        setup_gui(parsed_args.share, parsed_args.authorized)
         return 0
 
     if parsed_args.flask:

+ 70 - 12
pdf2zh/gui.py

@@ -49,7 +49,8 @@ service_map: dict[str, BaseTranslator] = {
     "AnythingLLM": AnythingLLMTranslator,
 }
 lang_map = {
-    "Chinese": "zh",
+    "Simplified Chinese": "zh",
+    "Traditional Chinese": "zh-TW",
     "English": "en",
     "French": "fr",
     "German": "de",
@@ -310,7 +311,7 @@ with gr.Blocks(
                 lang_to = gr.Dropdown(
                     label="Translate to",
                     choices=lang_map.keys(),
-                    value="Chinese",
+                    value="Simplified Chinese",
                 )
             page_range = gr.Radio(
                 choices=page_map.keys(),
@@ -442,25 +443,82 @@ with gr.Blocks(
     demo.load(on_select_service, service, envs)
 
 
-def setup_gui(share=False):
+def readuserandpasswd(file_path):
+    tuple_list = []
+    content = ""
+    if len(file_path) == 2:
+        try:
+            with open(file_path[1], "r", encoding="utf-8") as file:
+                content = file.read()
+        except FileNotFoundError:
+            print(f"Error: File '{file_path[1]}' not found.")
+    try:
+        with open(file_path[0], "r", encoding="utf-8") as file:
+            tuple_list = [
+                tuple(line.strip().split(",")) for line in file if line.strip()
+            ]
+    except FileNotFoundError:
+        print(f"Error: File '{file_path[0]}' not found.")
+    return tuple_list, content
+
+
+def setup_gui(share=False, authfile=["", ""]):
+    userlist, html = readuserandpasswd(authfile)
     if flag_demo:
         demo.launch(server_name="0.0.0.0", max_file_size="5mb", inbrowser=True)
     else:
-        try:
-            demo.launch(server_name="0.0.0.0", debug=True, inbrowser=True, share=share)
-        except Exception:
-            print(
-                "Error launching GUI using 0.0.0.0.\nThis may be caused by global mode of proxy software."
-            )
+        if len(userlist) == 0:
             try:
                 demo.launch(
-                    server_name="127.0.0.1", debug=True, inbrowser=True, share=share
+                    server_name="0.0.0.0", debug=True, inbrowser=True, share=share
                 )
             except Exception:
                 print(
-                    "Error launching GUI using 127.0.0.1.\nThis may be caused by global mode of proxy software."
+                    "Error launching GUI using 0.0.0.0.\nThis may be caused by global mode of proxy software."
                 )
-                demo.launch(debug=True, inbrowser=True, share=True)
+                try:
+                    demo.launch(
+                        server_name="127.0.0.1", debug=True, inbrowser=True, share=share
+                    )
+                except Exception:
+                    print(
+                        "Error launching GUI using 127.0.0.1.\nThis may be caused by global mode of proxy software."
+                    )
+                    demo.launch(debug=True, inbrowser=True, share=True)
+        else:
+            try:
+                demo.launch(
+                    server_name="0.0.0.0",
+                    debug=True,
+                    inbrowser=True,
+                    share=share,
+                    auth=userlist,
+                    auth_message=html,
+                )
+            except Exception:
+                print(
+                    "Error launching GUI using 0.0.0.0.\nThis may be caused by global mode of proxy software."
+                )
+                try:
+                    demo.launch(
+                        server_name="127.0.0.1",
+                        debug=True,
+                        inbrowser=True,
+                        share=share,
+                        auth=userlist,
+                        auth_message=html,
+                    )
+                except Exception:
+                    print(
+                        "Error launching GUI using 127.0.0.1.\nThis may be caused by global mode of proxy software."
+                    )
+                    demo.launch(
+                        debug=True,
+                        inbrowser=True,
+                        share=True,
+                        auth=userlist,
+                        auth_message=html,
+                    )
 
 
 # For auto-reloading while developing

+ 5 - 1
pdf2zh/pdfinterp.py

@@ -229,7 +229,11 @@ class PDFPageInterpreterEx(PDFPageInterpreter):
                 self.device.fontmap = interpreter.fontmap
                 ops_new = self.device.end_figure(xobjid)
                 ctm_inv = np.linalg.inv(np.array(ctm[:4]).reshape(2, 2))
-                pos_inv = -np.mat(ctm[4:]) * ctm_inv
+                np_version = np.__version__
+                if np_version.split(".")[0] >= "2":
+                    pos_inv = -np.asmatrix(ctm[4:]) * ctm_inv
+                else:
+                    pos_inv = -np.mat(ctm[4:]) * ctm_inv
                 a, b, c, d = ctm_inv.reshape(4).tolist()
                 e, f = pos_inv.tolist()[0]
                 self.obj_patch[self.xobjmap[xobjid].objid] = (