Bläddra i källkod

fix: val name

timel 1 år sedan
förälder
incheckning
5f24432a2b
1 ändrade filer med 24 tillägg och 20 borttagningar
  1. 24 20
      pdf2zh/converter.py

+ 24 - 20
pdf2zh/converter.py

@@ -1,4 +1,5 @@
 from typing import Dict, List
+from enum import Enum
 
 from pdfminer.pdfinterp import PDFGraphicState, PDFResourceManager
 from pdfminer.pdffont import PDFCIDFont
@@ -373,11 +374,11 @@ class TranslateConverter(PDFConverterEx):
                 return "".join(["%02x" % ord(c) for c in cstk])
 
         # 根据目标语言获取默认行距
-        lang_space = {
+        LANG_LINEHEIGHT_MAP = {
             "zh-cn": 1.4, "zh-tw": 1.4, "zh-hans": 1.4, "zh-hant": 1.4, "zh": 1.4,
             "ja": 1.1, "ko": 1.2, "en": 1.2, "ar": 1.0, "ru": 0.8, "uk": 0.8, "ta": 0.8
         }
-        default_line_spacing = lang_space.get(self.translator.lang_out.lower(), 1.1)
+        default_line_height = LANG_LINEHEIGHT_MAP.get(self.translator.lang_out.lower(), 1.1) # 小语种默认1.1
 
         _x, _y = 0, 0
 
@@ -394,7 +395,7 @@ class TranslateConverter(PDFConverterEx):
             brk: bool = pstk[id].brk                    # 段落换行标记
             cstk: str = ""                              # 当前文字栈
             fcur: str = None                            # 当前字体 ID
-            line = 0                                    # 记录换行次数
+            lidx = 0                                    # 记录换行次数
             tx = x
             fcur_ = fcur
             ptr = 0
@@ -438,18 +439,18 @@ class TranslateConverter(PDFConverterEx):
                 ):
                     if cstk:
                         ops_vals.append({
-                            "type": "text",
+                            "type": OpType.TEXT,
                             "font": fcur,
                             "size": size,
                             "x": tx,
                             "dy": 0,
                             "rtxt": raw_string(fcur, cstk),
-                            "line": line
+                            "lidx": lidx
                         })
                         cstk = ""
                 if brk and x + adv > x1 + 0.1 * size:  # 到达右边界且原文段落存在换行
                     x = x0
-                    line += 1
+                    lidx += 1
                 if vy_regex:  # 插入公式
                     fix = 0
                     if fcur is not None:  # 段落内公式修正纵向偏移
@@ -457,13 +458,13 @@ class TranslateConverter(PDFConverterEx):
                     for vch in var[vid]:  # 排版公式字符
                         vc = chr(vch.cid)
                         ops_vals.append({
-                            "type": "text",
+                            "type": OpType.TEXT,
                             "font": self.fontid[vch.font],
                             "size": vch.size,
                             "x": x + vch.x0 - var[vid][0].x0,
                             "dy": fix + vch.y0 - var[vid][0].y0,
                             "rtxt": raw_string(self.fontid[vch.font], vc),
-                            "line": line
+                            "lidx": lidx
                         })
                         if log.isEnabledFor(logging.DEBUG):
                             lstk.append(LTLine(0.1, (_x, _y), (x + vch.x0 - var[vid][0].x0, fix + y + vch.y0 - var[vid][0].y0)))
@@ -471,13 +472,13 @@ class TranslateConverter(PDFConverterEx):
                     for l in varl[vid]:  # 排版公式线条
                         if l.linewidth < 5:  # hack 有的文档会用粗线条当图片背景
                             ops_vals.append({
-                                "type": "formula",
+                                "type": OpType.LINE,
                                 "x": l.pts[0][0] + x - var[vid][0].x0,
                                 "dy": l.pts[0][1] + fix - var[vid][0].y0,
                                 "linewidth": l.linewidth,
                                 "xlen": l.pts[1][0] - l.pts[0][0],
                                 "ylen": l.pts[1][1] - l.pts[0][1],
-                                "line": line
+                                "lidx": lidx
                             })
                 else:  # 插入文字缓冲区
                     if not cstk:  # 单行开头
@@ -497,26 +498,25 @@ class TranslateConverter(PDFConverterEx):
             # 处理结尾
             if cstk:
                 ops_vals.append({
-                    "type": "text",
+                    "type": OpType.TEXT,
                     "font": fcur,
                     "size": size,
                     "x": tx,
                     "dy": 0,
                     "rtxt": raw_string(fcur, cstk),
-                    "line": line
+                    "lidx": lidx
                 })
 
-            line_spacing = default_line_spacing
+            line_height = default_line_height
 
-            while (line + 1) * size * line_spacing > height and line_spacing >= 1:
-                line_spacing -= 0.05
+            while (lidx + 1) * size * line_height > height and line_height >= 1:
+                line_height -= 0.05
 
             for vals in ops_vals:
-                match vals["type"]:
-                    case "text":
-                        ops_list.append(gen_op_txt(vals["font"], vals["size"], vals["x"], vals["dy"] + y - vals["line"] * size * line_spacing, vals["rtxt"]))
-                    case "formula":
-                        ops_list.append(gen_op_line(vals["x"], vals["dy"] + y - vals["line"] * size * line_spacing, vals["xlen"], vals["ylen"], vals["linewidth"]))
+                if vals["type"] == OpType.TEXT:
+                    ops_list.append(gen_op_txt(vals["font"], vals["size"], vals["x"], vals["dy"] + y - vals["lidx"] * size * line_height, vals["rtxt"]))
+                elif vals["type"] == OpType.LINE:
+                    ops_list.append(gen_op_line(vals["x"], vals["dy"] + y - vals["lidx"] * size * line_height, vals["xlen"], vals["ylen"], vals["linewidth"]))
 
         for l in lstk:  # 排版全局线条
             if l.linewidth < 5:  # hack 有的文档会用粗线条当图片背景
@@ -524,3 +524,7 @@ class TranslateConverter(PDFConverterEx):
 
         ops = f"BT {''.join(ops_list)}ET "
         return ops
+
+class OpType(Enum):
+    TEXT = "text"
+    LINE = "line"