gui.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. import os
  2. import re
  3. import subprocess
  4. import tempfile
  5. from pathlib import Path
  6. from pdf2zh import __version__
  7. import gradio as gr
  8. import numpy as np
  9. import pymupdf
  10. # Map service names to pdf2zh service options
  11. service_map = {
  12. "Google": "google",
  13. "DeepL": "deepl",
  14. "DeepLX": "deeplx",
  15. "Ollama": "ollama",
  16. "OpenAI": "openai",
  17. "Azure": "azure",
  18. }
  19. lang_map = {
  20. "Chinese": "zh",
  21. "English": "en",
  22. "French": "fr",
  23. "German": "de",
  24. "Japanese": "ja",
  25. "Korean": "ko",
  26. "Russian": "ru",
  27. "Spanish": "es",
  28. "Italian": "it",
  29. }
  30. def pdf_preview(file):
  31. doc = pymupdf.open(file)
  32. page = doc[0]
  33. pix = page.get_pixmap()
  34. image = np.frombuffer(pix.samples, np.uint8).reshape(pix.height, pix.width, 3)
  35. return image
  36. def upload_file(file, service, progress=gr.Progress()):
  37. """Handle file upload, validation, and initial preview."""
  38. if not file or not os.path.exists(file):
  39. return None, None, gr.update(visible=False)
  40. progress(0.3, desc="Converting PDF for preview...")
  41. try:
  42. # Convert first page for preview
  43. preview_image = pdf_preview(file)
  44. return file, preview_image, gr.update(visible=True)
  45. except Exception as e:
  46. print(f"Error converting PDF: {e}")
  47. return None, None, gr.update(visible=False)
  48. def translate(
  49. file_path, service, model_id, lang, page_range, extra_args, progress=gr.Progress()
  50. ):
  51. """Translate PDF content using selected service."""
  52. if not file_path:
  53. return (
  54. None,
  55. None,
  56. None,
  57. gr.update(visible=False),
  58. gr.update(visible=False),
  59. gr.update(visible=False),
  60. )
  61. progress(0, desc="Starting translation...")
  62. # Create a temporary working directory using Gradio's file utilities
  63. with tempfile.TemporaryDirectory() as temp_dir:
  64. # Create safe paths using pathlib
  65. temp_path = Path(temp_dir)
  66. input_pdf = temp_path / "input.pdf"
  67. # Copy input file to temp directory
  68. progress(0.2, desc="Preparing files...")
  69. with open(file_path, "rb") as src, open(input_pdf, "wb") as dst:
  70. dst.write(src.read())
  71. selected_service = service_map.get(service, "google")
  72. lang_to = lang_map.get(lang, "zh")
  73. # Execute translation in temp directory with real-time progress
  74. progress(0.3, desc=f"Starting translation with {selected_service}...")
  75. # Create output directory for translated files
  76. output_dir = Path("gradio_files") / "outputs"
  77. output_dir.mkdir(parents=True, exist_ok=True)
  78. final_output = output_dir / f"translated_{os.path.basename(file_path)}"
  79. final_output_dual = output_dir / f"dual_{os.path.basename(file_path)}"
  80. # Prepare extra arguments
  81. extra_args = extra_args.strip()
  82. # Add page range arguments
  83. if page_range == "All":
  84. extra_args += ""
  85. elif page_range == "First":
  86. extra_args += " -p 1"
  87. elif page_range == "First 5 pages":
  88. extra_args += " -p 1-5"
  89. # Execute translation command
  90. if selected_service == "google":
  91. lang_to = "zh-CN" if lang_to == "zh" else lang_to
  92. if selected_service in ["ollama", "openai"]:
  93. command = f'cd "{temp_path}" && pdf2zh "{input_pdf}" -lo {lang_to} -s {selected_service}:{model_id} {extra_args}'
  94. else:
  95. command = f'cd "{temp_path}" && pdf2zh "{input_pdf}" -lo {lang_to} -s {selected_service} {extra_args}'
  96. print(f"Executing command: {command}")
  97. print(f"Files in temp directory: {os.listdir(temp_path)}")
  98. process = subprocess.Popen(
  99. command,
  100. shell=True,
  101. stdout=subprocess.PIPE,
  102. stderr=subprocess.STDOUT,
  103. universal_newlines=True,
  104. )
  105. # Monitor progress from command output
  106. while True:
  107. output = process.stdout.readline()
  108. if output == "" and process.poll() is not None:
  109. break
  110. if output:
  111. print(f"Command output: {output.strip()}")
  112. # Look for percentage in output
  113. match = re.search(r"(\d+)%", output.strip())
  114. if match:
  115. percent = int(match.group(1))
  116. # Map command progress (0-100%) to our progress range (30-80%)
  117. progress_val = 0.3 + (percent * 0.5 / 100)
  118. progress(progress_val, desc=f"Translating content: {percent}%")
  119. # Get the return code
  120. return_code = process.poll()
  121. print(f"Command completed with return code: {return_code}")
  122. # Check if translation was successful
  123. translated_file = temp_path / "input-zh.pdf" # <= Do not change filename
  124. dual_file = temp_path / "input-dual.pdf"
  125. print(f"Files after translation: {os.listdir(temp_path)}")
  126. if not translated_file.exists() and not dual_file.exists():
  127. print("Translation failed: No output files found")
  128. return (
  129. None,
  130. None,
  131. None,
  132. gr.update(visible=False),
  133. gr.update(visible=False),
  134. gr.update(visible=False),
  135. )
  136. # Copy the translated files to permanent locations
  137. progress(0.8, desc="Saving translated files...")
  138. if translated_file.exists():
  139. with open(translated_file, "rb") as src, open(final_output, "wb") as dst:
  140. dst.write(src.read())
  141. if dual_file.exists():
  142. with open(dual_file, "rb") as src, open(final_output_dual, "wb") as dst:
  143. dst.write(src.read())
  144. # Generate preview of translated PDF
  145. progress(0.9, desc="Generating preview...")
  146. try:
  147. translated_preview = pdf_preview(str(final_output))
  148. except Exception as e:
  149. print(f"Error generating preview: {e}")
  150. translated_preview = None
  151. progress(1.0, desc="Translation complete!")
  152. return (
  153. str(final_output),
  154. translated_preview,
  155. str(final_output_dual),
  156. gr.update(visible=True),
  157. gr.update(visible=True),
  158. gr.update(visible=True),
  159. )
  160. # Global setup
  161. custom_blue = gr.themes.Color(
  162. c50="#E8F3FF",
  163. c100="#BEDAFF",
  164. c200="#94BFFF",
  165. c300="#6AA1FF",
  166. c400="#4080FF",
  167. c500="#165DFF", # Primary color
  168. c600="#0E42D2",
  169. c700="#0A2BA6",
  170. c800="#061D79",
  171. c900="#03114D",
  172. c950="#020B33",
  173. )
  174. with gr.Blocks(
  175. title="PDFMathTranslate - PDF Translation with preserved formats",
  176. theme=gr.themes.Default(
  177. primary_hue=custom_blue, spacing_size="md", radius_size="lg"
  178. ),
  179. css="""
  180. .secondary-text {color: #999 !important;}
  181. footer {visibility: hidden}
  182. .env-warning {color: #dd5500 !important;}
  183. .env-success {color: #559900 !important;}
  184. @keyframes pulse-background {
  185. 0% { background-color: #FFFFFF; }
  186. 25% { background-color: #FFFFFF; }
  187. 50% { background-color: #E8F3FF; }
  188. 75% { background-color: #FFFFFF; }
  189. 100% { background-color: #FFFFFF; }
  190. }
  191. /* Add dashed border to input-file class */
  192. .input-file {
  193. border: 1.2px dashed #165DFF !important;
  194. border-radius: 6px !important;
  195. # background-color: #ffffff !important;
  196. animation: pulse-background 2s ease-in-out;
  197. transition: background-color 0.4s ease-out;
  198. }
  199. .input-file:hover {
  200. border: 1.2px dashed #165DFF !important;
  201. border-radius: 6px !important;
  202. color: #165DFF !important;
  203. background-color: #E8F3FF !important;
  204. transition: background-color 0.2s ease-in;
  205. }
  206. .progress-bar-wrap {
  207. border-radius: 8px !important;
  208. }
  209. .progress-bar {
  210. border-radius: 8px !important;
  211. }
  212. # .input-file label {
  213. # color: #165DFF !important;
  214. # border: 1.2px dashed #165DFF !important;
  215. # border-left: none !important;
  216. # border-top: none !important;
  217. # }
  218. # .input-file .wrap {
  219. # color: #165DFF !important;
  220. # }
  221. # .input-file .or {
  222. # color: #165DFF !important;
  223. # }
  224. """,
  225. ) as demo:
  226. gr.Markdown("# PDFMathTranslate")
  227. with gr.Row():
  228. with gr.Column(scale=1):
  229. gr.Markdown("## File")
  230. file_input = gr.File(
  231. label="Document",
  232. file_count="single",
  233. file_types=[".pdf"],
  234. type="filepath",
  235. elem_classes=["input-file"],
  236. )
  237. gr.Markdown("## Option")
  238. service = gr.Dropdown(
  239. label="Service",
  240. info="Which translation service to use. Some require keys",
  241. choices=service_map.keys(),
  242. value="Google",
  243. )
  244. # lang_src = gr.Dropdown(
  245. # label="Source Language",
  246. # info="Which translation service to use. Some require keys",
  247. # choices=["Google", "DeepL", "DeepLX", "Ollama", "Azure"],
  248. # value="Google",
  249. # )
  250. lang_to = gr.Dropdown(
  251. label="Translate to",
  252. info="Which language to translate to (optional)",
  253. choices=lang_map.keys(),
  254. value="Chinese",
  255. )
  256. page_range = gr.Radio(
  257. ["All", "First", "First 5 pages"],
  258. label="Pages",
  259. info="Translate the full document or just few pages (optional)",
  260. value="All",
  261. )
  262. model_id = gr.Textbox(
  263. label="Model ID",
  264. info="Please enter the identifier of the model you wish to use (e.g., gemma2). This identifier will be used to specify the particular model for translation.",
  265. # value="gemma2",
  266. visible=False, # hide by default
  267. )
  268. extra_args = gr.Textbox(
  269. label="Advanced Arguments",
  270. info="Extra arguments supported in commandline (optional)",
  271. value="",
  272. )
  273. envs_status = "<span class='env-success'>- Properly configured.</span><br>"
  274. def details_wrapper(text_markdown):
  275. text = f"""
  276. <details>
  277. <summary>Technical details</summary>
  278. {text_markdown}
  279. - GitHub: <a href="https://github.com/Byaidu/PDFMathTranslate">Byaidu/PDFMathTranslate</a><br>
  280. - GUI by: <a href="https://github.com/reycn">Rongxin</a><br>
  281. - Version: {__version__}
  282. </details>"""
  283. return text
  284. def env_var_checker(env_var_name: str) -> str:
  285. if (
  286. not os.environ.get(env_var_name)
  287. or os.environ.get(env_var_name) == ""
  288. ):
  289. envs_status = f"<span class='env-warning'>- Warning: environmental not found or error ({env_var_name}).</span><br>- Please make sure that the environment variables are properly configured (<a href='https://github.com/Byaidu/PDFMathTranslate'>guide</a>).<br>"
  290. else:
  291. value = str(os.environ.get(env_var_name))
  292. envs_status = (
  293. "<span class='env-success'>- Properly configured.</span><br>"
  294. )
  295. if len(value) < 13:
  296. envs_status += (
  297. f"- Env: <code>{os.environ.get(env_var_name)}</code><br>"
  298. )
  299. else:
  300. envs_status += f"- Env: <code>{value[:13]}***</code><br>"
  301. return details_wrapper(envs_status)
  302. def on_select_service(value, evt: gr.EventData):
  303. # hide model id by default
  304. model_visibility = gr.update(visible=False)
  305. # add a text description
  306. if value == "Google":
  307. envs_status = details_wrapper(
  308. "<span class='env-success'>- Properly configured.</span><br>"
  309. )
  310. elif value == "DeepL":
  311. envs_status = env_var_checker("DEEPL_AUTH_KEY")
  312. elif value == "DeepLX":
  313. envs_status = env_var_checker("DEEPLX_AUTH_KEY")
  314. elif value == "Azure":
  315. envs_status = env_var_checker("AZURE_APIKEY")
  316. elif value == "OpenAI":
  317. model_visibility = gr.update(
  318. visible=True, value="gpt-4o"
  319. ) # show model id when service is selected
  320. envs_status = env_var_checker("OPENAI_API_KEY")
  321. elif value == "Ollama":
  322. model_visibility = gr.update(
  323. visible=True, value="gemma2"
  324. ) # show model id when service is selected
  325. envs_status = env_var_checker("OLLAMA_HOST")
  326. else:
  327. envs_status = "<span class='env-warning'>- Warning: model not in the list.</span><br>- Please report via (<a href='https://github.com/Byaidu/PDFMathTranslate'>guide</a>).<br>"
  328. return envs_status, model_visibility
  329. output_title = gr.Markdown("## Translated", visible=False)
  330. output_file = gr.File(label="Download Translation", visible=False)
  331. output_file_dual = gr.File(
  332. label="Download Translation (Dual)", visible=False
  333. )
  334. translate_btn = gr.Button("Translate", variant="primary", visible=False)
  335. tech_details_tog = gr.Markdown(
  336. details_wrapper(envs_status),
  337. elem_classes=["secondary-text"],
  338. )
  339. service.select(on_select_service, service, [tech_details_tog, model_id])
  340. with gr.Column(scale=2):
  341. gr.Markdown("## Preview")
  342. preview = gr.Image(label="Document Preview", visible=True)
  343. # Event handlers
  344. file_input.upload(
  345. upload_file,
  346. inputs=[file_input, service],
  347. outputs=[file_input, preview, translate_btn],
  348. )
  349. translate_btn.click(
  350. translate,
  351. inputs=[file_input, service, model_id, lang_to, page_range, extra_args],
  352. outputs=[
  353. output_file,
  354. preview,
  355. output_file_dual,
  356. output_file,
  357. output_file_dual,
  358. output_title,
  359. ],
  360. )
  361. def setup_gui(share=False):
  362. try:
  363. demo.launch(server_name="0.0.0.0", debug=True, inbrowser=True, share=share)
  364. except Exception:
  365. print("Error launching GUI using 0.0.0.0.\nThis may be caused by global mode of proxy software.")
  366. try:
  367. demo.launch(server_name="127.0.0.1", debug=True, inbrowser=True, share=share)
  368. except Exception:
  369. print("Error launching GUI using 127.0.0.1.\nThis may be caused by global mode of proxy software.")
  370. demo.launch(debug=True, inbrowser=True, share=True)
  371. # For auto-reloading while developing
  372. if __name__ == "__main__":
  373. setup_gui()