1 năm trước cách đây · b8d5854bb0
--- a/README.md
+++ b/README.md
@@ -40,6 +40,12 @@ pdf2zh example.pdf
 
				 pdf2zh example.pdf -p 1-3,5
			
 
				 ```
			
 
				 
			
 
				+### Translate with the specified language
			
 
				+
			
 
				+```bash
			
 
				+pdf2zh example.pdf -li en -lo ja
			
 
				+```
			
 
				+
			
 
				 ### Use regex to specify formula fonts and characters that need to be preserved
			
 
				 
			
 
				 Hint: Starting from `\ufb00` is English style ligature.
			
--- a/pdf2zh/__init__.py
+++ b/pdf2zh/__init__.py
@@ -1,2 +1,2 @@
 
				-__version__ = "1.3.2"
			
 
				+__version__ = "1.3.3"
			
 
				 __author__ = "Byaidu"
			
--- a/pdf2zh/converter.py
+++ b/pdf2zh/converter.py
@@ -347,6 +347,8 @@ class TextConverter(PDFConverter[AnyIO]):
 
				         vchar: str = None,
			
 
				         thread: int = 0,
			
 
				         layout = {},
			
 
				+        lang_in: str = "",
			
 
				+        lang_out: str = "",
			
 
				     ) -> None:
			
 
				         super().__init__(rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams)
			
 
				         self.showpageno = showpageno
			
@@ -355,6 +357,8 @@ class TextConverter(PDFConverter[AnyIO]):
 
				         self.vchar = vchar
			
 
				         self.thread = thread
			
 
				         self.layout = layout
			
 
				+        self.lang_in = lang_in
			
 
				+        self.lang_out = lang_out
			
 
				 
			
 
				     def write_text(self, text: str) -> None:
			
 
				         text = utils.compatible_encode_method(text, self.codec, "ignore")
			
@@ -520,10 +524,10 @@ class TextConverter(PDFConverter[AnyIO]):
 
				             def worker(s): # 多线程翻译
			
 
				                 try:
			
 
				                     if sum(map(str.islower,s))>1: # 包含小写字母
			
 
				-                        hash_key_paragraph = cache.deterministic_hash(s)
			
 
				+                        hash_key_paragraph = cache.deterministic_hash((s,self.lang_in,self.lang_out))
			
 
				                         new = cache.load_paragraph(hash_key, hash_key_paragraph) # 查询缓存
			
 
				                         if new is None:
			
 
				-                            new=translator.translate(s,'zh-CN','en')
			
 
				+                            new=translator.translate(s,self.lang_out,self.lang_in)
			
 
				                             new=remove_control_characters(new)
			
 
				                             cache.write_paragraph(hash_key, hash_key_paragraph, new)
			
 
				                     else:
			
--- a/pdf2zh/high_level.py
+++ b/pdf2zh/high_level.py
@@ -46,6 +46,8 @@ def extract_text_to_fp(
 
				     thread: int = 0,
			
 
				     doc_en = None,
			
 
				     model = None,
			
 
				+    lang_in: str = "",
			
 
				+    lang_out: str = "",
			
 
				     **kwargs: Any,
			
 
				 ) -> None:
			
 
				     """Parses text from inf-file and writes to outfp file-like object.
			
@@ -102,6 +104,8 @@ def extract_text_to_fp(
 
				             vchar=vchar,
			
 
				             thread=thread,
			
 
				             layout=layout,
			
 
				+            lang_in=lang_in,
			
 
				+            lang_out=lang_out,
			
 
				         )
			
 
				 
			
 
				     elif output_type == "xml":
			
--- a/pdf2zh/pdf2zh.py
+++ b/pdf2zh/pdf2zh.py
@@ -51,6 +51,8 @@ def extract_text(
 
				     vfont: str = "",
			
 
				     vchar: str = "",
			
 
				     thread: int = 0,
			
 
				+    lang_in: str = "",
			
 
				+    lang_out: str = "",
			
 
				     **kwargs: Any,
			
 
				 ) -> AnyIO:
			
 
				     if not files:
			
@@ -152,6 +154,20 @@ def create_parser() -> argparse.ArgumentParser:
 
				         default="",
			
 
				         help="The regex to math character of formula.",
			
 
				     )
			
 
				+    parse_params.add_argument(
			
 
				+        "--lang-in",
			
 
				+        "-li",
			
 
				+        type=str,
			
 
				+        default="en",
			
 
				+        help="The code of source language.",
			
 
				+    )
			
 
				+    parse_params.add_argument(
			
 
				+        "--lang-out",
			
 
				+        "-lo",
			
 
				+        type=str,
			
 
				+        default="zh-CN",
			
 
				+        help="The code of target language.",
			
 
				+    )
			
 
				     parse_params.add_argument(
			
 
				         "--thread",
			
 
				         "-t",