Răsfoiți Sursa

fix fig ctm inverse

Byaidu 1 an în urmă
părinte
comite
72f2c608e2
4 a modificat fișierele cu 9 adăugiri și 22 ștergeri
  1. 1 1
      pdf2zh/__init__.py
  2. 0 1
      pdf2zh/high_level.py
  3. 0 17
      pdf2zh/pdfcompiler.py
  4. 8 3
      pdf2zh/pdfinterp.py

+ 1 - 1
pdf2zh/__init__.py

@@ -1,2 +1,2 @@
-__version__ = "1.5.0"
+__version__ = "1.5.1"
 __author__ = "Byaidu"
 __author__ = "Byaidu"

+ 0 - 1
pdf2zh/high_level.py

@@ -5,7 +5,6 @@ import sys
 from io import StringIO
 from io import StringIO
 from typing import Any, BinaryIO, Container, Iterator, Optional, cast
 from typing import Any, BinaryIO, Container, Iterator, Optional, cast
 import tqdm
 import tqdm
-from pdf2zh.pdfcompiler import pdf_compile
 
 
 from pdf2zh.converter import (
 from pdf2zh.converter import (
     HOCRConverter,
     HOCRConverter,

+ 0 - 17
pdf2zh/pdfcompiler.py

@@ -1,17 +0,0 @@
-def pdf_compile(file:str,objs:list,trailer):
-    with open(file,'wb') as f:
-        xrefs=[]
-        f.write('%PDF-1.7\n'.encode())
-        for obj in objs:
-            xrefs.append(f.tell())
-            f.write(obj)
-        startxref=f.tell()
-        f.write(f'xref\n0 {len(objs)+1}\n0000000000 65536 f\n'.encode())
-        for id,xref in enumerate(xrefs):
-            if objs[id]==b'':
-                f.write(f'{xref:0>10d} 00001 f\n'.encode())
-            else:
-                f.write(f'{xref:0>10d} 00000 n\n'.encode())
-        f.write(f"trailer<<\n/Root {trailer['Root'].objid} 0 R\n/Info {trailer['Info'].objid} 0 R\n/Size {trailer['Size']}>>\n".encode())
-        f.write(f'startxref\n{startxref}\n'.encode())
-        f.write('%%EOF\n'.encode())

+ 8 - 3
pdf2zh/pdfinterp.py

@@ -2,6 +2,7 @@ import logging
 import re
 import re
 from io import BytesIO
 from io import BytesIO
 from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast
 from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast
+import numpy as np
 
 
 from pdf2zh import settings
 from pdf2zh import settings
 from pdf2zh.casting import safe_float
 from pdf2zh.casting import safe_float
@@ -961,16 +962,20 @@ class PDFPageInterpreter:
             else:
             else:
                 resources = self.resources.copy()
                 resources = self.resources.copy()
             self.device.begin_figure(xobjid, bbox, matrix)
             self.device.begin_figure(xobjid, bbox, matrix)
+            ctm=mult_matrix(matrix, self.ctm)
             ops_base=interpreter.render_contents(
             ops_base=interpreter.render_contents(
                 resources,
                 resources,
                 [xobj],
                 [xobj],
-                ctm=mult_matrix(matrix, self.ctm),
+                ctm=ctm,
             )
             )
+            ctm_inv=np.linalg.inv(np.array(ctm[:4]).reshape(2,2))
             self.device.fontmap=interpreter.fontmap # hack
             self.device.fontmap=interpreter.fontmap # hack
             try: # 有的时候 form 字体加不上这里会烂掉
             try: # 有的时候 form 字体加不上这里会烂掉
                 ops_new=self.device.end_figure(xobjid)
                 ops_new=self.device.end_figure(xobjid)
-                xobjid=self.xobjmap[xobjid].objid
-                self.obj_patch[xobjid]=f'q {ops_base}Q 1 0 0 1 {-self.ctm[4]} {-self.ctm[5]} cm {ops_new}'
+                pos_inv=-np.mat(ctm[4:])*ctm_inv
+                a,b,c,d=ctm_inv.reshape(4).tolist()
+                e,f=pos_inv.tolist()[0]
+                self.obj_patch[self.xobjmap[xobjid].objid]=f'q {ops_base}Q {a} {b} {c} {d} {e} {f} cm {ops_new}'
             except:
             except:
                 pass
                 pass
         elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj:
         elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj: