Jelajahi Sumber

doc : format and doc update.

hellofinch 1 tahun lalu
induk
melakukan
478e4f4a0b

+ 43 - 0
docs/ADVANCED.md

@@ -191,3 +191,46 @@ example auth.html
 [⬆️ Back to top](#toc)
 
 ---
+
+<h3 id="cofig">Custom configuration file</h3>
+
+Use `--config` to specify which file to configure the PDFMathTranslate:
+
+```bash
+pdf2zh example.pdf --config config.json
+```
+
+```bash
+pdf2zh -i --config config.json
+```
+
+example config.json
+```json
+{
+    "USE_MODELSCOPE": "0",
+    "PDF2ZH_LANG_FROM": "English",
+    "PDF2ZH_LANG_TO": "Simplified Chinese",
+    "NOTO_FONT_PATH": "/app/SourceHanSerifCN-Regular.ttf",
+    "translators": [
+        {
+            "name": "deeplx",
+            "envs": {
+                "DEEPLX_ENDPOINT": "http://localhost:1188/translate/",
+                "DEEPLX_ACCESS_TOKEN": null
+            }
+        },
+        {
+            "name": "ollama",
+            "envs": {
+                "OLLAMA_HOST": "http://127.0.0.1:11434",
+                "OLLAMA_MODEL": "gemma2"
+            }
+        }
+    ]
+}
+```
+By default, the config file is saved in the `~/.config/PDFMathTranslate/config.json`. The program will start by reading the contents of config.json, and after that it will read the contents of the environment variables. When an environment variable is available, the contents of the environment variable are used first and the file is updated.
+
+[⬆️ Back to top](#toc)
+
+---

+ 1 - 0
docs/README_ja-JP.md

@@ -180,6 +180,7 @@ Python環境を事前にインストールする必要はありません
 | `--onnx` | [カスタムDocLayout-YOLO ONNXモデルの使用] | `pdf2zh --onnx [onnx/model/path]` |
 | `--serverport` | [カスタムWebUIポートを使用する] | `pdf2zh --serverport 7860` |
 | `--dir` | [batch translate] | `pdf2zh --dir /path/to/translate/` |
+| `--config` | [configuration file](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#cofig) | `pdf2zh --config /path/to/config/config.json` |
 
 <h3 id="partial">全文または部分的なドキュメント翻訳</h3>
 

+ 2 - 0
docs/README_zh-CN.md

@@ -182,6 +182,8 @@ set HF_ENDPOINT=https://hf-mirror.com
 | `--onnx` | [使用自定义的 DocLayout-YOLO ONNX 模型] | `pdf2zh --onnx [onnx/model/path]` |
 | `--serverport` | [使用自定义的 WebUI 端口] | `pdf2zh --serverport 7860` |
 | `--dir` | [文件夹翻译] | `pdf2zh --dir /path/to/translate/` |
+| `--config` | [持久化定义配置文件](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#cofig) | `pdf2zh --config /path/to/config/config.json` |
+
 
 <h3 id="partial">全文或部分文档翻译</h3>
 

+ 11 - 3
pdf2zh/config.py

@@ -4,6 +4,7 @@ from threading import RLock  # 改成 RLock
 import os
 import copy
 
+
 class ConfigManager:
     _instance = None
     _lock = RLock()  # 用 RLock 替换 Lock,允许在同一个线程中重复获取锁
@@ -144,7 +145,9 @@ class ConfigManager:
                     translator["envs"] = copy.deepcopy(new_translator_envs)
                     instance._save_config()
                     return
-            translators.append({"name": name, "envs": copy.deepcopy(new_translator_envs)})
+            translators.append(
+                {"name": name, "envs": copy.deepcopy(new_translator_envs)}
+            )
             instance._config_data["translators"] = translators
             instance._save_config()
 
@@ -162,7 +165,7 @@ class ConfigManager:
                         translator["envs"][name] = default
                         instance._save_config()
                         return default
-                    
+
         with instance._lock:
             translators = instance._config_data.get("translators", [])
             for translator in translators:
@@ -170,7 +173,12 @@ class ConfigManager:
                     translator["envs"][name] = default
                     instance._save_config()
                     return default
-            translators.append({"name": translater_name.name, "envs": copy.deepcopy(translater_name.envs)})
+            translators.append(
+                {
+                    "name": translater_name.name,
+                    "envs": copy.deepcopy(translater_name.envs),
+                }
+            )
             instance._config_data["translators"] = translators
             instance._save_config()
             return default

+ 0 - 1
pdf2zh/doclayout.py

@@ -62,7 +62,6 @@ class YoloBox:
 
 
 class OnnxModel(DocLayoutModel):
-
     def __init__(self, model_path: str):
         self.model_path = model_path
 

+ 5 - 1
pdf2zh/gui.py

@@ -448,7 +448,11 @@ with gr.Blocks(
                     _envs.append(gr.update(visible=False, value=""))
                 for i, env in enumerate(translator.envs.items()):
                     _envs[i] = gr.update(
-                        visible=True, label=env[0], value=ConfigManager.get_env_by_translatername(translator,env[0],env[1])
+                        visible=True,
+                        label=env[0],
+                        value=ConfigManager.get_env_by_translatername(
+                            translator, env[0], env[1]
+                        ),
                     )
                 _envs[-1] = gr.update(visible=translator.CustomPrompt)
                 return _envs

+ 3 - 2
pdf2zh/pdf2zh.py

@@ -157,7 +157,7 @@ def create_parser() -> argparse.ArgumentParser:
         action="store_true",
         help="translate directory.",
     )
-    
+
     parse_params.add_argument(
         "--config",
         type=str,
@@ -206,6 +206,7 @@ def find_all_files_in_directory(directory_path):
 
     return file_paths
 
+
 def main(args: Optional[List[str]] = None) -> int:
     logging.basicConfig()
 
@@ -259,7 +260,7 @@ def main(args: Optional[List[str]] = None) -> int:
         parsed_args.files = untranlate_file
         translate(model=ModelInstance.value, **vars(parsed_args))
         return 0
-    
+
     translate(model=ModelInstance.value, **vars(parsed_args))
     return 0
 

+ 6 - 6
pdf2zh/pdfinterp.py

@@ -236,9 +236,9 @@ class PDFPageInterpreterEx(PDFPageInterpreter):
                     pos_inv = -np.mat(ctm[4:]) * ctm_inv
                 a, b, c, d = ctm_inv.reshape(4).tolist()
                 e, f = pos_inv.tolist()[0]
-                self.obj_patch[self.xobjmap[xobjid].objid] = (
-                    f"q {ops_base}Q {a} {b} {c} {d} {e} {f} cm {ops_new}"
-                )
+                self.obj_patch[
+                    self.xobjmap[xobjid].objid
+                ] = f"q {ops_base}Q {a} {b} {c} {d} {e} {f} cm {ops_new}"
             except Exception:
                 pass
         elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj:
@@ -269,9 +269,9 @@ class PDFPageInterpreterEx(PDFPageInterpreter):
         self.device.fontmap = self.fontmap
         ops_new = self.device.end_page(page)
         # 上面渲染的时候会根据 cropbox 减掉页面偏移得到真实坐标,这里输出的时候需要用 cm 把页面偏移加回来
-        self.obj_patch[page.page_xref] = (
-            f"q {ops_base}Q 1 0 0 1 {x0} {y0} cm {ops_new}"  # ops_base 里可能有图,需要让 ops_new 里的文字覆盖在上面,使用 q/Q 重置位置矩阵
-        )
+        self.obj_patch[
+            page.page_xref
+        ] = f"q {ops_base}Q 1 0 0 1 {x0} {y0} cm {ops_new}"  # ops_base 里可能有图,需要让 ops_new 里的文字覆盖在上面,使用 q/Q 重置位置矩阵
         for obj in page.contents:
             self.obj_patch[obj.objid] = ""
 

+ 6 - 3
pdf2zh/translator.py

@@ -22,6 +22,7 @@ import argostranslate.translate
 import json
 from pdf2zh.config import ConfigManager
 
+
 def remove_control_characters(s):
     return "".join(ch for ch in s if unicodedata.category(ch)[0] != "C")
 
@@ -56,17 +57,17 @@ class BaseTranslator:
         self.envs = copy(self.envs)
         if ConfigManager.get_translator_by_name(self.name):
             self.envs = ConfigManager.get_translator_by_name(self.name)
-        needUpdate=False
+        needUpdate = False
         for key in self.envs:
             if key in os.environ:
                 self.envs[key] = os.environ[key]
                 needUpdate = True
         if needUpdate:
-            ConfigManager.set_translator_by_name(self.name,self.envs)
+            ConfigManager.set_translator_by_name(self.name, self.envs)
         if envs is not None:
             for key in envs:
                 self.envs[key] = envs[key]
-            ConfigManager.set_translator_by_name(self.name,self.envs)
+            ConfigManager.set_translator_by_name(self.name, self.envs)
 
     def add_cache_impact_parameters(self, k: str, v):
         """
@@ -734,6 +735,7 @@ class GorkTranslator(OpenAITranslator):
         if prompt:
             self.add_cache_impact_parameters("prompt", prompt.template)
 
+
 class GroqTranslator(OpenAITranslator):
     name = "groq"
     envs = {
@@ -753,6 +755,7 @@ class GroqTranslator(OpenAITranslator):
         if prompt:
             self.add_cache_impact_parameters("prompt", prompt.template)
 
+
 class DeepseekTranslator(OpenAITranslator):
     name = "deepseek"
     envs = {