Browse Source

fix fig ctm inverse

Byaidu 1 year ago
parent
commit
72f2c608e2
4 changed files with 9 additions and 22 deletions
  1. 1 1
      pdf2zh/__init__.py
  2. 0 1
      pdf2zh/high_level.py
  3. 0 17
      pdf2zh/pdfcompiler.py
  4. 8 3
      pdf2zh/pdfinterp.py

+ 1 - 1
pdf2zh/__init__.py

@@ -1,2 +1,2 @@
-__version__ = "1.5.0"
+__version__ = "1.5.1"
 __author__ = "Byaidu"

+ 0 - 1
pdf2zh/high_level.py

@@ -5,7 +5,6 @@ import sys
 from io import StringIO
 from typing import Any, BinaryIO, Container, Iterator, Optional, cast
 import tqdm
-from pdf2zh.pdfcompiler import pdf_compile
 
 from pdf2zh.converter import (
     HOCRConverter,

+ 0 - 17
pdf2zh/pdfcompiler.py

@@ -1,17 +0,0 @@
-def pdf_compile(file:str,objs:list,trailer):
-    with open(file,'wb') as f:
-        xrefs=[]
-        f.write('%PDF-1.7\n'.encode())
-        for obj in objs:
-            xrefs.append(f.tell())
-            f.write(obj)
-        startxref=f.tell()
-        f.write(f'xref\n0 {len(objs)+1}\n0000000000 65536 f\n'.encode())
-        for id,xref in enumerate(xrefs):
-            if objs[id]==b'':
-                f.write(f'{xref:0>10d} 00001 f\n'.encode())
-            else:
-                f.write(f'{xref:0>10d} 00000 n\n'.encode())
-        f.write(f"trailer<<\n/Root {trailer['Root'].objid} 0 R\n/Info {trailer['Info'].objid} 0 R\n/Size {trailer['Size']}>>\n".encode())
-        f.write(f'startxref\n{startxref}\n'.encode())
-        f.write('%%EOF\n'.encode())

+ 8 - 3
pdf2zh/pdfinterp.py

@@ -2,6 +2,7 @@ import logging
 import re
 from io import BytesIO
 from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast
+import numpy as np
 
 from pdf2zh import settings
 from pdf2zh.casting import safe_float
@@ -961,16 +962,20 @@ class PDFPageInterpreter:
             else:
                 resources = self.resources.copy()
             self.device.begin_figure(xobjid, bbox, matrix)
+            ctm=mult_matrix(matrix, self.ctm)
             ops_base=interpreter.render_contents(
                 resources,
                 [xobj],
-                ctm=mult_matrix(matrix, self.ctm),
+                ctm=ctm,
             )
+            ctm_inv=np.linalg.inv(np.array(ctm[:4]).reshape(2,2))
             self.device.fontmap=interpreter.fontmap # hack
             try: # 有的时候 form 字体加不上这里会烂掉
                 ops_new=self.device.end_figure(xobjid)
-                xobjid=self.xobjmap[xobjid].objid
-                self.obj_patch[xobjid]=f'q {ops_base}Q 1 0 0 1 {-self.ctm[4]} {-self.ctm[5]} cm {ops_new}'
+                pos_inv=-np.mat(ctm[4:])*ctm_inv
+                a,b,c,d=ctm_inv.reshape(4).tolist()
+                e,f=pos_inv.tolist()[0]
+                self.obj_patch[self.xobjmap[xobjid].objid]=f'q {ops_base}Q {a} {b} {c} {d} {e} {f} cm {ops_new}'
             except:
                 pass
         elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj: