Byaidu 1 рік тому
батько
коміт
1b7f405970
3 змінених файлів з 14 додано та 14 видалено
  1. 1 1
      .github/workflows/python-build.yml
  2. 1 1
      .pre-commit-config.yaml
  3. 12 12
      pdf2zh/converter.py

+ 1 - 1
.github/workflows/python-build.yml

@@ -25,7 +25,7 @@ jobs:
     - name: Check code format
       run: |
         black --check --diff --color pdf2zh/*.py
-        flake8
+        flake8 --ignore E203,E261,E501,W503,E741
 
     - name: Build package
       run: python -m build

+ 1 - 1
.pre-commit-config.yaml

@@ -10,5 +10,5 @@ repos:
       language: python
     - id: flake8
       name: flake8
-      entry: flake8
+      entry: flake8 --ignore E203,E261,E501,W503,E741
       language: python

+ 12 - 12
pdf2zh/converter.py

@@ -511,7 +511,7 @@ class TextConverter(PDFConverter[AnyIO]):
                             pstk[-1][6] = True
                     else:                           # 根据当前字符构建一个新的段落
                         sstk.append("")
-                        pstk.append([child.y0,child.x0,child.x0,child.x0,child.size,child.font,False,])
+                        pstk.append([child.y0, child.x0, child.x0, child.x0, child.size, child.font, False])
                 if not cur_v:                                               # 文字入栈
                     if (                                                    # 根据当前字符修正段落属性
                         child.size > pstk[-1][4] / 0.79                     # 1. 当前字符显著比段落字体大
@@ -565,7 +565,7 @@ class TextConverter(PDFConverter[AnyIO]):
             varf.append(vfix)
         log.debug("\n==========[VSTACK]==========\n")
         for id, v in enumerate(var):  # 计算公式宽度
-            l = max([vch.x1 for vch in v]) - v[0].x0  # noqa: E741
+            l = max([vch.x1 for vch in v]) - v[0].x0
             log.debug(f'< {l:.1f} {v[0].x0:.1f} {v[0].y0:.1f} {v[0].cid} {v[0].fontname} {len(varl[id])} > $v{id}$ = {"".join([ch.get_text() for ch in v])}')
             vlen.append(l)
 
@@ -574,6 +574,7 @@ class TextConverter(PDFConverter[AnyIO]):
         log.debug("\n==========[SSTACK]==========\n")
         hash_key = cache.deterministic_hash("PDFMathTranslate")
         cache.create_cache(hash_key)
+
         @retry(wait=wait_fixed(1))
         def worker(s):  # 多线程翻译
             try:
@@ -604,6 +605,7 @@ class TextConverter(PDFConverter[AnyIO]):
                 return "".join(["%04x" % ord(c) for c in cstk])
             else:
                 return "".join(["%02x" % ord(c) for c in cstk])
+
         _x, _y = 0, 0
         for id, new in enumerate(news):
             tx = x = pstk[id][1]    # 段落初始横坐标
@@ -633,9 +635,7 @@ class TextConverter(PDFConverter[AnyIO]):
                         adv = vlen[vid]
                     except Exception:
                         continue  # 翻译器可能会自动补个越界的公式标记
-                    if len(var[vid]) == 1 and unicodedata.category(
-                        var[vid][0].get_text()[0]
-                    ) in ["Lm","Mn","Sk",]:  # 文字修饰符
+                    if len(var[vid]) == 1 and unicodedata.category(var[vid][0].get_text()[0]) in ["Lm", "Mn", "Sk"]:  # 文字修饰符
                         mod = True
                 else:  # 加载文字
                     ch = new[ptr]
@@ -667,7 +667,7 @@ class TextConverter(PDFConverter[AnyIO]):
                         cstk = ""
                 if lb and x + adv > rt + 0.1 * size:  # 到达右边界且原文段落存在换行
                     x = lt
-                    lang_space = {"zh-CN": 1.4,"zh-TW": 1.4,"ja": 1.1,"ko": 1.2,"en": 1.2}  # CJK
+                    lang_space = {"zh-CN": 1.4, "zh-TW": 1.4, "ja": 1.1, "ko": 1.2, "en": 1.2}  # CJK
                     y -= size * lang_space.get(self.translator.lang_out, 1.1)  # 小语种大多适配 1.1
                 if vy_regex:  # 插入公式
                     fix = 0
@@ -675,13 +675,13 @@ class TextConverter(PDFConverter[AnyIO]):
                         fix = varf[vid]
                     for vch in var[vid]:  # 排版公式字符
                         vc = chr(vch.cid)
-                        ops += f"/{self.fontid[vch.font]} {vch.size:f} Tf 1 0 0 1 {x + vch.x0 - var[vid][0].x0:f} {fix + y + vch.y0 - var[vid][0].y0:f} Tm [<{raw_string(self.fontid[vch.font], vc)}>] TJ "  # noqa: E501
+                        ops += f"/{self.fontid[vch.font]} {vch.size:f} Tf 1 0 0 1 {x + vch.x0 - var[vid][0].x0:f} {fix + y + vch.y0 - var[vid][0].y0:f} Tm [<{raw_string(self.fontid[vch.font], vc)}>] TJ "
                         if log.isEnabledFor(logging.DEBUG):
-                            lstk.append(LTLine(0.1,(_x, _y),(x + vch.x0 - var[vid][0].x0,fix + y + vch.y0 - var[vid][0].y0,)))
+                            lstk.append(LTLine(0.1, (_x, _y), (x + vch.x0 - var[vid][0].x0, fix + y + vch.y0 - var[vid][0].y0)))
                             _x, _y = x + vch.x0 - var[vid][0].x0, fix + y + vch.y0 - var[vid][0].y0
-                    for l in varl[vid]:  # 排版公式线条 # noqa: E741
+                    for l in varl[vid]:  # 排版公式线条
                         if l.linewidth < 5:  # hack 有的文档会用粗线条当图片背景
-                            ops += f"ET q 1 0 0 1 {l.pts[0][0] + x - var[vid][0].x0:f} {l.pts[0][1] + fix + y - var[vid][0].y0:f} cm [] 0 d 0 J {l.linewidth:f} w 0 0 m {l.pts[1][0] - l.pts[0][0]:f} {l.pts[1][1] - l.pts[0][1]:f} l S Q BT "  # noqa: E501
+                            ops += f"ET q 1 0 0 1 {l.pts[0][0] + x - var[vid][0].x0:f} {l.pts[0][1] + fix + y - var[vid][0].y0:f} cm [] 0 d 0 J {l.linewidth:f} w 0 0 m {l.pts[1][0] - l.pts[0][0]:f} {l.pts[1][1] - l.pts[0][1]:f} l S Q BT "
                 else:  # 插入文字缓冲区
                     if not cstk:  # 单行开头
                         tx = x
@@ -698,9 +698,9 @@ class TextConverter(PDFConverter[AnyIO]):
                 if log.isEnabledFor(logging.DEBUG):
                     lstk.append(LTLine(0.1, (_x, _y), (x, y)))
                     _x, _y = x, y
-        for l in lstk:  # 排版全局线条 # noqa: E741
+        for l in lstk:  # 排版全局线条
             if l.linewidth < 5:  # hack 有的文档会用粗线条当图片背景
-                ops += f"ET q 1 0 0 1 {l.pts[0][0]:f} {l.pts[0][1]:f} cm [] 0 d 0 J {l.linewidth:f} w 0 0 m {l.pts[1][0] - l.pts[0][0]:f} {l.pts[1][1] - l.pts[0][1]:f} l S Q BT "  # noqa: E501
+                ops += f"ET q 1 0 0 1 {l.pts[0][0]:f} {l.pts[0][1]:f} cm [] 0 d 0 J {l.linewidth:f} w 0 0 m {l.pts[1][0] - l.pts[0][0]:f} {l.pts[1][1] - l.pts[0][1]:f} l S Q BT "
         ops = f"BT {ops}ET "
         return ops