1 year ago · 2604748fb7
--- a/pdf2zh/converter.py
+++ b/pdf2zh/converter.py
@@ -170,7 +170,7 @@ class TranslateConverter(PDFConverterEx):
 
				         # 全局
			
 
				         lstk: list[LTLine] = []         # 全局线条栈
			
 
				         xt: LTChar = None               # 上一个字符
			
 
				-        xt_cls: int = -1                # 上一个字符所属段落
			
 
				+        xt_cls: int = -1                # 上一个字符所属段落，保证无论第一个字符属于哪个类别都可以触发新段落
			
 
				         vmax: float = ltpage.width / 4  # 行内公式最大宽度
			
 
				         ops: str = ""                   # 渲染结果
			
 
				 
			
@@ -216,6 +216,10 @@ class TranslateConverter(PDFConverterEx):
 
				                 # 读取当前字符在 layout 中的类别
			
 
				                 cx, cy = np.clip(int(child.x0), 0, w - 1), np.clip(int(child.y0), 0, h - 1)
			
 
				                 cls = layout[cy, cx]
			
 
				+                # 锚定文档中 bullet 的位置
			
 
				+                if child.get_text() == "•":
			
 
				+                    cls = 0
			
 
				+                # 判定当前字符是否属于公式
			
 
				                 if (                                                                                        # 判定当前字符是否属于公式
			
 
				                     cls == 0                                                                                # 1. 类别为保留区域
			
 
				                     or (cls == xt_cls and len(sstk[-1].strip()) > 1 and child.size < pstk[-1].size * 0.79)  # 2. 角标字体，有 0.76 的角标和 0.799 的大写，这里用 0.79 取中，同时考虑首字母放大的情况