فهرست منبع

feat: recaptcha

Byaidu 1 سال پیش
والد
کامیت
a03f04984f
3فایلهای تغییر یافته به همراه175 افزوده شده و 191 حذف شده
  1. 122 152
      pdf2zh/gui.py
  2. 36 32
      pdf2zh/high_level.py
  3. 17 7
      pdf2zh/pdf2zh.py

+ 122 - 152
pdf2zh/gui.py

@@ -1,13 +1,14 @@
 import os
-import re
-import subprocess
-import tempfile
+import shutil
 from pathlib import Path
 from pdf2zh import __version__
+from pdf2zh.pdf2zh import extract_text
 
 import gradio as gr
 import numpy as np
 import pymupdf
+import tqdm
+import requests
 
 # Map service names to pdf2zh service options
 service_map = {
@@ -29,6 +30,37 @@ lang_map = {
     "Spanish": "es",
     "Italian": "it",
 }
+page_map = {
+    "All": None,
+    "First": [0],
+    "First 5 pages": list(range(0,5)),
+}
+
+flag_demo=False
+if os.environ.get('PDF2ZH_DEMO'):
+    flag_demo=True
+    service_map = {
+        "Google": "google",
+    }
+    page_map = {
+        "First": [0],
+        "First 20 pages": list(range(0,20)),
+    }
+    client_key=os.environ.get('PDF2ZH_CLIENT_KEY')
+    server_key=os.environ.get('PDF2ZH_SERVER_KEY')
+
+
+def verify_recaptcha(response):
+    recaptcha_url = "https://www.google.com/recaptcha/api/siteverify"
+
+    print('reCAPTCHA',server_key,response)
+
+    data = {"secret": server_key, "response": response}
+    result = requests.post(recaptcha_url, data=data).json()
+
+    print('reCAPTCHA',result.get("success"))
+
+    return result.get("success")
 
 
 def pdf_preview(file):
@@ -42,146 +74,76 @@ def pdf_preview(file):
 def upload_file(file, service, progress=gr.Progress()):
     """Handle file upload, validation, and initial preview."""
     if not file or not os.path.exists(file):
-        return None, None, gr.update(visible=False)
+        return None, None, gr.update(visible=False), gr.update(visible=False)
 
-    progress(0.3, desc="Converting PDF for preview...")
     try:
         # Convert first page for preview
         preview_image = pdf_preview(file)
 
-        return file, preview_image, gr.update(visible=True)
+        return file, preview_image, gr.update(visible=True), gr.update(visible=True)
     except Exception as e:
         print(f"Error converting PDF: {e}")
-        return None, None, gr.update(visible=False)
+        return None, None, gr.update(visible=False), gr.update(visible=False)
 
 
 def translate(
-    file_path, service, model_id, lang, page_range, extra_args, progress=gr.Progress()
+    file_path, service, model_id, lang, page_range, recaptcha_response, progress=gr.Progress()
 ):
     """Translate PDF content using selected service."""
     if not file_path:
-        return (
-            None,
-            None,
-            None,
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-        )
+        raise gr.Error('No input')
 
-    progress(0, desc="Starting translation...")
-
-    # Create a temporary working directory using Gradio's file utilities
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # Create safe paths using pathlib
-        temp_path = Path(temp_dir)
-        input_pdf = temp_path / "input.pdf"
-
-        # Copy input file to temp directory
-        progress(0.2, desc="Preparing files...")
-        with open(file_path, "rb") as src, open(input_pdf, "wb") as dst:
-            dst.write(src.read())
-
-        selected_service = service_map.get(service, "google")
-        lang_to = lang_map.get(lang, "zh")
-
-        # Execute translation in temp directory with real-time progress
-        progress(0.3, desc=f"Starting translation with {selected_service}...")
-
-        # Create output directory for translated files
-        output_dir = Path("gradio_files") / "outputs"
-        output_dir.mkdir(parents=True, exist_ok=True)
-        final_output = output_dir / f"translated_{os.path.basename(file_path)}"
-        final_output_dual = output_dir / f"dual_{os.path.basename(file_path)}"
-
-        # Prepare extra arguments
-        extra_args = extra_args.strip()
-        # Add page range arguments
-        if page_range == "All":
-            extra_args += ""
-        elif page_range == "First":
-            extra_args += " -p 1"
-        elif page_range == "First 5 pages":
-            extra_args += " -p 1-5"
-
-        # Execute translation command
-        if selected_service == "google":
-            lang_to = "zh-CN" if lang_to == "zh" else lang_to
-
-        if selected_service in ["ollama", "openai"]:
-            command = f'cd "{temp_path}" && pdf2zh "{input_pdf}" -lo {lang_to} -s {selected_service}:{model_id} {extra_args}'
-        else:
-            command = f'cd "{temp_path}" && pdf2zh "{input_pdf}" -lo {lang_to} -s {selected_service} {extra_args}'
-        print(f"Executing command: {command}")
-        print(f"Files in temp directory: {os.listdir(temp_path)}")
-
-        process = subprocess.Popen(
-            command,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            universal_newlines=True,
-        )
-
-        # Monitor progress from command output
-        while True:
-            output = process.stdout.readline()
-            if output == "" and process.poll() is not None:
-                break
-            if output:
-                print(f"Command output: {output.strip()}")
-                # Look for percentage in output
-                match = re.search(r"(\d+)%", output.strip())
-                if match:
-                    percent = int(match.group(1))
-                    # Map command progress (0-100%) to our progress range (30-80%)
-                    progress_val = 0.3 + (percent * 0.5 / 100)
-                    progress(progress_val, desc=f"Translating content: {percent}%")
-
-        # Get the return code
-        return_code = process.poll()
-        print(f"Command completed with return code: {return_code}")
-
-        # Check if translation was successful
-        translated_file = temp_path / "input-zh.pdf"  # <= Do not change filename
-        dual_file = temp_path / "input-dual.pdf"
-        print(f"Files after translation: {os.listdir(temp_path)}")
-
-        if not translated_file.exists() and not dual_file.exists():
-            print("Translation failed: No output files found")
-            return (
-                None,
-                None,
-                None,
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False),
-            )
+    if flag_demo and not verify_recaptcha(recaptcha_response):
+        raise gr.Error('reCAPTCHA fail')
 
-        # Copy the translated files to permanent locations
-        progress(0.8, desc="Saving translated files...")
-
-        if translated_file.exists():
-            with open(translated_file, "rb") as src, open(final_output, "wb") as dst:
-                dst.write(src.read())
+    progress(0, desc="Starting translation...")
 
-        if dual_file.exists():
-            with open(dual_file, "rb") as src, open(final_output_dual, "wb") as dst:
-                dst.write(src.read())
+    output = Path("pdf2zh_files")
+    output.mkdir(parents=True, exist_ok=True)
+    filename = os.path.splitext(os.path.basename(file_path))[0]
+    file_en = output / f"{filename}.pdf"
+    file_zh = output / f"{filename}-zh.pdf"
+    file_dual = output / f"{filename}-dual.pdf"
+    shutil.copyfile(file_path, file_en)
+
+    selected_service = service_map.get(service, "google")
+    selected_page = page_map.get(page_range, [1])
+    lang_to = lang_map.get(lang, "zh")
+    if selected_service == "google":
+        lang_to = "zh-CN" if lang_to == "zh" else lang_to
+
+    print(f"Files before translation: {os.listdir(output)}")
+    def progress_bar(t:tqdm.tqdm):
+        progress(t.n/t.total, desc="Translating...")
+
+    param={
+            'files':[file_en],
+            'pages':selected_page,
+            'lang_in':'auto',
+            'lang_out':lang_to,
+            'service':f"{selected_service}:{model_id}",
+            'output':output,
+            'thread':4,
+            'callback':progress_bar,
+           }
+    print(param)
+    extract_text(**param)
+    print(f"Files after translation: {os.listdir(output)}")
+
+    if not file_zh.exists() or not file_dual.exists():
+        raise gr.Error('No output')
 
-        # Generate preview of translated PDF
-        progress(0.9, desc="Generating preview...")
-        try:
-            translated_preview = pdf_preview(str(final_output))
-        except Exception as e:
-            print(f"Error generating preview: {e}")
-            translated_preview = None
+    try:
+        translated_preview = pdf_preview(str(file_zh))
+    except Exception as e:
+        raise gr.Error('No preview')
 
     progress(1.0, desc="Translation complete!")
+
     return (
-        str(final_output),
+        str(file_zh),
         translated_preview,
-        str(final_output_dual),
+        str(file_dual),
         gr.update(visible=True),
         gr.update(visible=True),
         gr.update(visible=True),
@@ -239,12 +201,12 @@ with gr.Blocks(
         transition: background-color 0.2s ease-in;
     }
 
-.progress-bar-wrap {
-  border-radius: 8px !important;
-}
-.progress-bar {
-  border-radius: 8px !important;
-}
+    .progress-bar-wrap {
+    border-radius: 8px !important;
+    }
+    .progress-bar {
+    border-radius: 8px !important;
+    }
 
     # .input-file label {
     #     color: #165DFF !important;
@@ -259,12 +221,22 @@ with gr.Blocks(
     #     color: #165DFF !important;
     # }
     """,
+    head='''
+    <script src="https://www.google.com/recaptcha/api.js" async defer></script>
+    <script type="text/javascript">
+        var onVerify = function(token) {
+            el=document.getElementById('verify').getElementsByTagName('textarea')[0];
+            el.value=token;
+            el.dispatchEvent(new Event('input'));
+        };
+    </script>
+    ''' if flag_demo else None
 ) as demo:
-    gr.Markdown("# PDFMathTranslate")
+    gr.Markdown("# [PDFMathTranslate @ Github](https://github.com/Byaidu/PDFMathTranslate)")
 
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("## File")
+            gr.Markdown("## File | < 5 MB" if flag_demo else "## File")
             file_input = gr.File(
                 label="Document",
                 file_count="single",
@@ -279,12 +251,6 @@ with gr.Blocks(
                 choices=service_map.keys(),
                 value="Google",
             )
-            # lang_src = gr.Dropdown(
-            #     label="Source Language",
-            #     info="Which translation service to use. Some require keys",
-            #     choices=["Google", "DeepL", "DeepLX", "Ollama", "Azure"],
-            #     value="Google",
-            # )
             lang_to = gr.Dropdown(
                 label="Translate to",
                 info="Which language to translate to (optional)",
@@ -292,10 +258,10 @@ with gr.Blocks(
                 value="Chinese",
             )
             page_range = gr.Radio(
-                ["All", "First", "First 5 pages"],
+                choices=page_map.keys(),
                 label="Pages",
                 info="Translate the full document or just few pages (optional)",
-                value="All",
+                value=list(page_map.keys())[0],
             )
             model_id = gr.Textbox(
                 label="Model ID",
@@ -303,11 +269,6 @@ with gr.Blocks(
                 # value="gemma2",
                 visible=False,  # hide by default
             )
-            extra_args = gr.Textbox(
-                label="Advanced Arguments",
-                info="Extra arguments supported in commandline (optional)",
-                value="",
-            )
             envs_status = "<span class='env-success'>- Properly configured.</span><br>"
 
             def details_wrapper(text_markdown):
@@ -374,6 +335,11 @@ with gr.Blocks(
             output_file_dual = gr.File(
                 label="Download Translation (Dual)", visible=False
             )
+            recaptcha_response = gr.Textbox(label="reCAPTCHA Response", elem_id='verify', visible=False)
+            if flag_demo:
+                recaptcha_box=gr.HTML(f'<div class="g-recaptcha" data-sitekey="{client_key}" data-callback="onVerify"></div>', visible=False)
+            else:
+                recaptcha_box=gr.HTML()
             translate_btn = gr.Button("Translate", variant="primary", visible=False)
             tech_details_tog = gr.Markdown(
                 details_wrapper(envs_status),
@@ -389,12 +355,12 @@ with gr.Blocks(
     file_input.upload(
         upload_file,
         inputs=[file_input, service],
-        outputs=[file_input, preview, translate_btn],
+        outputs=[file_input, preview, translate_btn, recaptcha_box],
     )
 
     translate_btn.click(
         translate,
-        inputs=[file_input, service, model_id, lang_to, page_range, extra_args],
+        inputs=[file_input, service, model_id, lang_to, page_range, recaptcha_response],
         outputs=[
             output_file,
             preview,
@@ -407,15 +373,19 @@ with gr.Blocks(
 
 
 def setup_gui(share=False):
-    try:
-        demo.launch(server_name="0.0.0.0", debug=True, inbrowser=True, share=share)
-    except Exception:
-        print("Error launching GUI using 0.0.0.0.\nThis may be caused by global mode of proxy software.")
+    import doclayout_yolo # cache
+    if flag_demo:
+        demo.launch(server_name="0.0.0.0", max_file_size='5mb', inbrowser=True)
+    else:
         try:
-            demo.launch(server_name="127.0.0.1", debug=True, inbrowser=True, share=share)
+            demo.launch(server_name="0.0.0.0", debug=True, inbrowser=True, share=share)
         except Exception:
-            print("Error launching GUI using 127.0.0.1.\nThis may be caused by global mode of proxy software.")
-            demo.launch(debug=True, inbrowser=True, share=True)
+            print("Error launching GUI using 0.0.0.0.\nThis may be caused by global mode of proxy software.")
+            try:
+                demo.launch(server_name="127.0.0.1", debug=True, inbrowser=True, share=share)
+            except Exception:
+                print("Error launching GUI using 127.0.0.1.\nThis may be caused by global mode of proxy software.")
+                demo.launch(debug=True, inbrowser=True, share=True)
 
 # For auto-reloading while developing
 if __name__ == "__main__":

+ 36 - 32
pdf2zh/high_level.py

@@ -50,6 +50,7 @@ def extract_text_to_fp(
     lang_in: str = "",
     lang_out: str = "",
     service: str = "",
+    callback: object = None,
     **kwargs: Any,
 ) -> None:
     """Parses text from inf-file and writes to outfp file-like object.
@@ -156,43 +157,46 @@ def extract_text_to_fp(
         total_pages=len(pages)
     else:
         total_pages=page_count
-    for page in tqdm.tqdm(PDFPage.get_pages(
+    with tqdm.tqdm(PDFPage.get_pages(
         inf,
         pages,
         maxpages=maxpages,
         password=password,
         caching=not disable_caching,
-    ), total=total_pages, position=0):
-        pix = doc_en[page.pageno].get_pixmap()
-        image = np.fromstring(pix.samples, np.uint8).reshape(pix.height, pix.width, 3)[:, :, ::-1]
-        page_layout=model.predict(
-            image,
-            imgsz=int(pix.height/32)*32,
-            device="cuda:0" if torch.cuda.is_available() else "cpu", # Auto-select GPU if available
-        )[0]
-        # kdtree 是不可能 kdtree 的,不如直接渲染成图片,用空间换时间
-        box=np.ones((pix.height, pix.width))
-        h,w=box.shape
-        vcls=['abandon','figure','table','isolate_formula','formula_caption']
-        for i,d in enumerate(page_layout.boxes):
-            if not page_layout.names[int(d.cls)] in vcls:
-                x0,y0,x1,y1=d.xyxy.squeeze()
-                x0,y0,x1,y1=np.clip(int(x0-1),0,w-1),np.clip(int(h-y1-1),0,h-1),np.clip(int(x1+1),0,w-1),np.clip(int(h-y0+1),0,h-1)
-                box[y0:y1,x0:x1]=i+2
-        for i,d in enumerate(page_layout.boxes):
-            if page_layout.names[int(d.cls)] in vcls:
-                x0,y0,x1,y1=d.xyxy.squeeze()
-                x0,y0,x1,y1=np.clip(int(x0-1),0,w-1),np.clip(int(h-y1-1),0,h-1),np.clip(int(x1+1),0,w-1),np.clip(int(h-y0+1),0,h-1)
-                box[y0:y1,x0:x1]=0
-        layout[page.pageno]=box
-        # print(page.number,page_layout)
-        page.rotate = (page.rotate + rotation) % 360
-        # 新建一个 xref 存放新指令流
-        page.page_xref = doc_en.get_new_xref() # hack
-        doc_en.update_object(page.page_xref, "<<>>")
-        doc_en.update_stream(page.page_xref,b'')
-        doc_en[page.pageno].set_contents(page.page_xref)
-        interpreter.process_page(page)
+    ), total=total_pages, position=0) as progress:
+        for page in progress:
+            if callback:
+                callback(progress)
+            pix = doc_en[page.pageno].get_pixmap()
+            image = np.fromstring(pix.samples, np.uint8).reshape(pix.height, pix.width, 3)[:, :, ::-1]
+            page_layout=model.predict(
+                image,
+                imgsz=int(pix.height/32)*32,
+                device="cuda:0" if torch.cuda.is_available() else "cpu", # Auto-select GPU if available
+            )[0]
+            # kdtree 是不可能 kdtree 的,不如直接渲染成图片,用空间换时间
+            box=np.ones((pix.height, pix.width))
+            h,w=box.shape
+            vcls=['abandon','figure','table','isolate_formula','formula_caption']
+            for i,d in enumerate(page_layout.boxes):
+                if not page_layout.names[int(d.cls)] in vcls:
+                    x0,y0,x1,y1=d.xyxy.squeeze()
+                    x0,y0,x1,y1=np.clip(int(x0-1),0,w-1),np.clip(int(h-y1-1),0,h-1),np.clip(int(x1+1),0,w-1),np.clip(int(h-y0+1),0,h-1)
+                    box[y0:y1,x0:x1]=i+2
+            for i,d in enumerate(page_layout.boxes):
+                if page_layout.names[int(d.cls)] in vcls:
+                    x0,y0,x1,y1=d.xyxy.squeeze()
+                    x0,y0,x1,y1=np.clip(int(x0-1),0,w-1),np.clip(int(h-y1-1),0,h-1),np.clip(int(x1+1),0,w-1),np.clip(int(h-y0+1),0,h-1)
+                    box[y0:y1,x0:x1]=0
+            layout[page.pageno]=box
+            # print(page.number,page_layout)
+            page.rotate = (page.rotate + rotation) % 360
+            # 新建一个 xref 存放新指令流
+            page.page_xref = doc_en.get_new_xref() # hack
+            doc_en.update_object(page.page_xref, "<<>>")
+            doc_en.update_stream(page.page_xref,b'')
+            doc_en[page.pageno].set_contents(page.page_xref)
+            interpreter.process_page(page)
 
     device.close()
     return obj_patch

+ 17 - 7
pdf2zh/pdf2zh.py

@@ -13,6 +13,7 @@ from typing import TYPE_CHECKING, Any, Container, Iterable, List, Optional
 
 import pymupdf
 from huggingface_hub import hf_hub_download
+from pathlib import Path
 
 from pdf2zh import __version__
 from pdf2zh.pdfexceptions import PDFValueError
@@ -67,6 +68,8 @@ def extract_text(
     lang_in: str = "",
     lang_out: str = "",
     service: str = "",
+    callback: object = None,
+    output: str = "",
     **kwargs: Any,
 ) -> AnyIO:
     import doclayout_yolo
@@ -118,9 +121,9 @@ def extract_text(
                                 )
                 except:
                     pass
-        doc_en.save(f"{filename}-en.pdf")
+        doc_en.save(Path(output) / f"{filename}-en.pdf")
 
-        with open(f"{filename}-en.pdf", "rb") as fp:
+        with open(Path(output) / f"{filename}-en.pdf", "rb") as fp:
             obj_patch: dict = pdf2zh.high_level.extract_text_to_fp(fp, **locals())
 
         for obj_id, ops_new in obj_patch.items():
@@ -131,15 +134,15 @@ def extract_text(
             doc_en.update_stream(obj_id, ops_new.encode())
 
         doc_zh = doc_en
-        doc_dual = pymupdf.open(f"{filename}-en.pdf")
+        doc_dual = pymupdf.open(Path(output) / f"{filename}-en.pdf")
         doc_dual.insert_file(doc_zh)
         for id in range(page_count):
             doc_dual.move_page(page_count + id, id * 2 + 1)
-        doc_zh.save(f"{filename}-zh.pdf", deflate=1)
-        doc_dual.save(f"{filename}-dual.pdf", deflate=1)
+        doc_zh.save(Path(output) / f"{filename}-zh.pdf", deflate=1)
+        doc_dual.save(Path(output) / f"{filename}-dual.pdf", deflate=1)
         doc_zh.close()
         doc_dual.close()
-        os.remove(f"{filename}-en.pdf")
+        os.remove(Path(output) / f"{filename}-en.pdf")
 
     return
 
@@ -216,7 +219,14 @@ def create_parser() -> argparse.ArgumentParser:
         "-s",
         type=str,
         default="google",
-        help="The service to use for translating.",
+        help="The service to use for translation.",
+    )
+    parse_params.add_argument(
+        "--output",
+        "-o",
+        type=str,
+        default="",
+        help="Output directory for files.",
     )
     parse_params.add_argument(
         "--thread",