| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521 |
- import os
- import shutil
- import uuid
- import asyncio
- from asyncio import CancelledError
- from pathlib import Path
- from pdf2zh import __version__
- from pdf2zh.high_level import translate
- from pdf2zh.translator import (
- BaseTranslator,
- GoogleTranslator,
- BingTranslator,
- DeepLTranslator,
- DeepLXTranslator,
- OllamaTranslator,
- AzureOpenAITranslator,
- OpenAITranslator,
- ZhipuTranslator,
- SiliconTranslator,
- GeminiTranslator,
- AzureTranslator,
- TencentTranslator,
- DifyTranslator,
- AnythingLLMTranslator,
- )
- import gradio as gr
- from gradio_pdf import PDF
- import tqdm
- import requests
- import cgi
- service_map: dict[str, BaseTranslator] = {
- "Google": GoogleTranslator,
- "Bing": BingTranslator,
- "DeepL": DeepLTranslator,
- "DeepLX": DeepLXTranslator,
- "Ollama": OllamaTranslator,
- "AzureOpenAI": AzureOpenAITranslator,
- "OpenAI": OpenAITranslator,
- "Zhipu": ZhipuTranslator,
- "Silicon": SiliconTranslator,
- "Gemini": GeminiTranslator,
- "Azure": AzureTranslator,
- "Tencent": TencentTranslator,
- "Dify": DifyTranslator,
- "AnythingLLM": AnythingLLMTranslator,
- }
- lang_map = {
- "Chinese": "zh",
- "English": "en",
- "French": "fr",
- "German": "de",
- "Japanese": "ja",
- "Korean": "ko",
- "Russian": "ru",
- "Spanish": "es",
- "Italian": "it",
- }
- page_map = {
- "All": None,
- "First": [0],
- "First 5 pages": list(range(0, 5)),
- }
- flag_demo = False
- if os.getenv("PDF2ZH_DEMO"):
- flag_demo = True
- service_map = {
- "Google": GoogleTranslator,
- }
- page_map = {
- "First": [0],
- "First 20 pages": list(range(0, 20)),
- }
- client_key = os.getenv("PDF2ZH_CLIENT_KEY")
- server_key = os.getenv("PDF2ZH_SERVER_KEY")
- def verify_recaptcha(response):
- recaptcha_url = "https://www.google.com/recaptcha/api/siteverify"
- print("reCAPTCHA", server_key, response)
- data = {"secret": server_key, "response": response}
- result = requests.post(recaptcha_url, data=data).json()
- print("reCAPTCHA", result.get("success"))
- return result.get("success")
- def download_with_limit(url, save_path, size_limit):
- chunk_size = 1024
- total_size = 0
- with requests.get(url, stream=True, timeout=10) as response:
- response.raise_for_status()
- content = response.headers.get("Content-Disposition")
- try: # filename from header
- _, params = cgi.parse_header(content)
- filename = params["filename"]
- except Exception: # filename from url
- filename = os.path.basename(url)
- with open(save_path / filename, "wb") as file:
- for chunk in response.iter_content(chunk_size=chunk_size):
- total_size += len(chunk)
- if size_limit and total_size > size_limit:
- raise gr.Error("Exceeds file size limit")
- file.write(chunk)
- return save_path / filename
- def stop_translate_file(state):
- session_id = state["session_id"]
- if session_id is None:
- return
- if session_id in cancellation_event_map:
- cancellation_event_map[session_id].set()
- def translate_file(
- file_type,
- file_input,
- link_input,
- service,
- lang_from,
- lang_to,
- page_range,
- recaptcha_response,
- state,
- progress=gr.Progress(),
- *envs,
- ):
- session_id = uuid.uuid4()
- state["session_id"] = session_id
- cancellation_event_map[session_id] = asyncio.Event()
- """Translate PDF content using selected service."""
- if flag_demo and not verify_recaptcha(recaptcha_response):
- raise gr.Error("reCAPTCHA fail")
- progress(0, desc="Starting translation...")
- output = Path("pdf2zh_files")
- output.mkdir(parents=True, exist_ok=True)
- if file_type == "File":
- if not file_input:
- raise gr.Error("No input")
- file_path = shutil.copy(file_input, output)
- else:
- if not link_input:
- raise gr.Error("No input")
- file_path = download_with_limit(
- link_input,
- output,
- 5 * 1024 * 1024 if flag_demo else None,
- )
- filename = os.path.splitext(os.path.basename(file_path))[0]
- file_raw = output / f"{filename}.pdf"
- file_mono = output / f"{filename}-mono.pdf"
- file_dual = output / f"{filename}-dual.pdf"
- translator = service_map[service]
- selected_page = page_map[page_range]
- lang_from = lang_map[lang_from]
- lang_to = lang_map[lang_to]
- for i, env in enumerate(translator.envs.items()):
- os.environ[env[0]] = envs[i]
- print(f"Files before translation: {os.listdir(output)}")
- def progress_bar(t: tqdm.tqdm):
- progress(t.n / t.total, desc="Translating...")
- param = {
- "files": [str(file_raw)],
- "pages": selected_page,
- "lang_in": lang_from,
- "lang_out": lang_to,
- "service": f"{translator.name}",
- "output": output,
- "thread": 4,
- "callback": progress_bar,
- "cancellation_event": cancellation_event_map[session_id],
- }
- print(param)
- try:
- translate(**param)
- except CancelledError:
- del cancellation_event_map[session_id]
- raise gr.Error("Translation cancelled")
- print(f"Files after translation: {os.listdir(output)}")
- if not file_mono.exists() or not file_dual.exists():
- raise gr.Error("No output")
- progress(1.0, desc="Translation complete!")
- return (
- str(file_mono),
- str(file_mono),
- str(file_dual),
- gr.update(visible=True),
- gr.update(visible=True),
- gr.update(visible=True),
- )
- # Global setup
- custom_blue = gr.themes.Color(
- c50="#E8F3FF",
- c100="#BEDAFF",
- c200="#94BFFF",
- c300="#6AA1FF",
- c400="#4080FF",
- c500="#165DFF", # Primary color
- c600="#0E42D2",
- c700="#0A2BA6",
- c800="#061D79",
- c900="#03114D",
- c950="#020B33",
- )
- cancellation_event_map = {}
- with gr.Blocks(
- title="PDFMathTranslate - PDF Translation with preserved formats",
- theme=gr.themes.Default(
- primary_hue=custom_blue, spacing_size="md", radius_size="lg"
- ),
- css="""
- .secondary-text {color: #999 !important;}
- footer {visibility: hidden}
- .env-warning {color: #dd5500 !important;}
- .env-success {color: #559900 !important;}
- /* Add dashed border to input-file class */
- .input-file {
- border: 1.2px dashed #165DFF !important;
- border-radius: 6px !important;
- }
- .progress-bar-wrap {
- border-radius: 8px !important;
- }
- .progress-bar {
- border-radius: 8px !important;
- }
- """,
- head=(
- """
- <script src="https://www.google.com/recaptcha/api.js?render=explicit" async defer></script>
- <script type="text/javascript">
- var onVerify = function(token) {
- el=document.getElementById('verify').getElementsByTagName('textarea')[0];
- el.value=token;
- el.dispatchEvent(new Event('input'));
- };
- </script>
- """
- if flag_demo
- else ""
- ),
- ) as demo:
- gr.Markdown(
- "# [PDFMathTranslate @ GitHub](https://github.com/Byaidu/PDFMathTranslate)"
- )
- with gr.Row():
- with gr.Column(scale=1):
- gr.Markdown("## File | < 5 MB" if flag_demo else "## File")
- file_type = gr.Radio(
- choices=["File", "Link"],
- label="Type",
- value="File",
- )
- file_input = gr.File(
- label="File",
- file_count="single",
- file_types=[".pdf"],
- type="filepath",
- elem_classes=["input-file"],
- )
- link_input = gr.Textbox(
- label="Link",
- visible=False,
- interactive=True,
- )
- gr.Markdown("## Option")
- service = gr.Dropdown(
- label="Service",
- choices=service_map.keys(),
- value="Google",
- )
- envs = []
- for i in range(3):
- envs.append(
- gr.Textbox(
- visible=False,
- interactive=True,
- )
- )
- with gr.Row():
- lang_from = gr.Dropdown(
- label="Translate from",
- choices=lang_map.keys(),
- value="English",
- )
- lang_to = gr.Dropdown(
- label="Translate to",
- choices=lang_map.keys(),
- value="Chinese",
- )
- page_range = gr.Radio(
- choices=page_map.keys(),
- label="Pages",
- value=list(page_map.keys())[0],
- )
- def on_select_service(service, evt: gr.EventData):
- translator = service_map[service]
- _envs = []
- for i in range(3):
- _envs.append(gr.update(visible=False, value=""))
- for i, env in enumerate(translator.envs.items()):
- _envs[i] = gr.update(
- visible=True, label=env[0], value=os.getenv(env[0], env[1])
- )
- return _envs
- def on_select_filetype(file_type):
- return (
- gr.update(visible=file_type == "File"),
- gr.update(visible=file_type == "Link"),
- )
- output_title = gr.Markdown("## Translated", visible=False)
- output_file_mono = gr.File(
- label="Download Translation (Mono)", visible=False
- )
- output_file_dual = gr.File(
- label="Download Translation (Dual)", visible=False
- )
- recaptcha_response = gr.Textbox(
- label="reCAPTCHA Response", elem_id="verify", visible=False
- )
- recaptcha_box = gr.HTML('<div id="recaptcha-box"></div>')
- translate_btn = gr.Button("Translate", variant="primary")
- cancellation_btn = gr.Button("Cancel", variant="secondary")
- tech_details_tog = gr.Markdown(
- f"""
- <summary>Technical details</summary>
- - GitHub: <a href="https://github.com/Byaidu/PDFMathTranslate">Byaidu/PDFMathTranslate</a><br>
- - GUI by: <a href="https://github.com/reycn">Rongxin</a><br>
- - Version: {__version__}
- """,
- elem_classes=["secondary-text"],
- )
- service.select(
- on_select_service,
- service,
- envs,
- )
- file_type.select(
- on_select_filetype,
- file_type,
- [file_input, link_input],
- js=(
- f"""
- (a,b)=>{{
- try{{
- grecaptcha.render('recaptcha-box',{{
- 'sitekey':'{client_key}',
- 'callback':'onVerify'
- }});
- }}catch(error){{}}
- return [a];
- }}
- """
- if flag_demo
- else ""
- ),
- )
- with gr.Column(scale=2):
- gr.Markdown("## Preview")
- preview = PDF(label="Document Preview", visible=True)
- # Event handlers
- file_input.upload(
- lambda x: x,
- inputs=file_input,
- outputs=preview,
- js=(
- f"""
- (a,b)=>{{
- try{{
- grecaptcha.render('recaptcha-box',{{
- 'sitekey':'{client_key}',
- 'callback':'onVerify'
- }});
- }}catch(error){{}}
- return [a];
- }}
- """
- if flag_demo
- else ""
- ),
- )
- state = gr.State({"session_id": None})
- translate_btn.click(
- translate_file,
- inputs=[
- file_type,
- file_input,
- link_input,
- service,
- lang_from,
- lang_to,
- page_range,
- recaptcha_response,
- state,
- *envs,
- ],
- outputs=[
- output_file_mono,
- preview,
- output_file_dual,
- output_file_mono,
- output_file_dual,
- output_title,
- ],
- ).then(lambda: None, js="()=>{grecaptcha.reset()}" if flag_demo else "")
- cancellation_btn.click(
- stop_translate_file,
- inputs=[state],
- )
- def readuserandpasswd(file_path):
- tuple_list = []
- content = ""
- if len(file_path) == 2:
- try:
- with open(file_path[1], "r", encoding="utf-8") as file:
- content = file.read()
- except FileNotFoundError:
- print(f"Error: File '{file_path[1]}' not found.")
- try:
- with open(file_path[0], "r", encoding="utf-8") as file:
- tuple_list = [
- tuple(line.strip().split(",")) for line in file if line.strip()
- ]
- except FileNotFoundError:
- print(f"Error: File '{file_path[0]}' not found.")
- return tuple_list, content
- def setup_gui(share=False, authfile=["", ""]):
- userlist, html = readuserandpasswd(authfile)
- if flag_demo:
- demo.launch(server_name="0.0.0.0", max_file_size="5mb", inbrowser=True)
- else:
- if len(userlist) == 0:
- try:
- demo.launch(
- server_name="0.0.0.0", debug=True, inbrowser=True, share=share
- )
- except Exception:
- print(
- "Error launching GUI using 0.0.0.0.\nThis may be caused by global mode of proxy software."
- )
- try:
- demo.launch(
- server_name="127.0.0.1", debug=True, inbrowser=True, share=share
- )
- except Exception:
- print(
- "Error launching GUI using 127.0.0.1.\nThis may be caused by global mode of proxy software."
- )
- demo.launch(debug=True, inbrowser=True, share=True)
- else:
- try:
- demo.launch(
- server_name="0.0.0.0",
- debug=True,
- inbrowser=True,
- share=share,
- auth=userlist,
- auth_message=html,
- )
- except Exception:
- print(
- "Error launching GUI using 0.0.0.0.\nThis may be caused by global mode of proxy software."
- )
- try:
- demo.launch(
- server_name="127.0.0.1",
- debug=True,
- inbrowser=True,
- share=share,
- auth=userlist,
- auth_message=html,
- )
- except Exception:
- print(
- "Error launching GUI using 127.0.0.1.\nThis may be caused by global mode of proxy software."
- )
- demo.launch(
- debug=True,
- inbrowser=True,
- share=True,
- auth=userlist,
- auth_message=html,
- )
- # For auto-reloading while developing
- if __name__ == "__main__":
- setup_gui()
|