gui.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. import os
  2. import re
  3. import subprocess
  4. import tempfile
  5. from pathlib import Path
  6. import gradio as gr
  7. import numpy as np
  8. import pymupdf
  9. # Map service names to pdf2zh service options
  10. service_map = {
  11. "Google": "google",
  12. "DeepL": "deepl",
  13. "DeepLX": "deeplx",
  14. "Ollama": "ollama",
  15. "OpenAI": "openai",
  16. "Azure": "azure",
  17. }
  18. lang_map = {
  19. "Chinese": "zh",
  20. "English": "en",
  21. "French": "fr",
  22. "German": "de",
  23. "Japanese": "ja",
  24. "Korean": "ko",
  25. "Russian": "ru",
  26. "Spanish": "es",
  27. }
  28. def pdf_preview(file):
  29. doc = pymupdf.open(file)
  30. page = doc[0]
  31. pix = page.get_pixmap()
  32. image = np.frombuffer(pix.samples, np.uint8).reshape(pix.height, pix.width, 3)
  33. return image
  34. def upload_file(file, service, progress=gr.Progress()):
  35. """Handle file upload, validation, and initial preview."""
  36. if not file or not os.path.exists(file):
  37. return None, None, gr.update(visible=False)
  38. progress(0.3, desc="Converting PDF for preview...")
  39. try:
  40. # Convert first page for preview
  41. preview_image = pdf_preview(file)
  42. return file, preview_image, gr.update(visible=True)
  43. except Exception as e:
  44. print(f"Error converting PDF: {e}")
  45. return None, None, gr.update(visible=False)
  46. def translate(
  47. file_path, service, model_id, lang, page_range, extra_args, progress=gr.Progress()
  48. ):
  49. """Translate PDF content using selected service."""
  50. if not file_path:
  51. return (
  52. None,
  53. None,
  54. None,
  55. gr.update(visible=False),
  56. gr.update(visible=False),
  57. gr.update(visible=False),
  58. )
  59. progress(0, desc="Starting translation...")
  60. # Create a temporary working directory using Gradio's file utilities
  61. with tempfile.TemporaryDirectory() as temp_dir:
  62. # Create safe paths using pathlib
  63. temp_path = Path(temp_dir)
  64. input_pdf = temp_path / "input.pdf"
  65. # Copy input file to temp directory
  66. progress(0.2, desc="Preparing files...")
  67. with open(file_path, "rb") as src, open(input_pdf, "wb") as dst:
  68. dst.write(src.read())
  69. selected_service = service_map.get(service, "google")
  70. lang_to = lang_map.get(lang, "zh")
  71. # Execute translation in temp directory with real-time progress
  72. progress(0.3, desc=f"Starting translation with {selected_service}...")
  73. # Create output directory for translated files
  74. output_dir = Path("gradio_files") / "outputs"
  75. output_dir.mkdir(parents=True, exist_ok=True)
  76. final_output = output_dir / f"translated_{os.path.basename(file_path)}"
  77. final_output_dual = output_dir / f"dual_{os.path.basename(file_path)}"
  78. # Prepare extra arguments
  79. extra_args = extra_args.strip()
  80. # Add page range arguments
  81. if page_range == "All":
  82. extra_args += ""
  83. elif page_range == "First":
  84. extra_args += " -p 1"
  85. elif page_range == "First 5 pages":
  86. extra_args += " -p 1-5"
  87. # Execute translation command
  88. if selected_service == "google":
  89. lang_to = "zh-CN" if lang_to == "zh" else lang_to
  90. if selected_service in ["ollama", "openai"]:
  91. command = f'cd "{temp_path}" && pdf2zh "{input_pdf}" -lo {lang_to} -s {selected_service}:{model_id} {extra_args}'
  92. else:
  93. command = f'cd "{temp_path}" && pdf2zh "{input_pdf}" -lo {lang_to} -s {selected_service} {extra_args}'
  94. print(f"Executing command: {command}")
  95. print(f"Files in temp directory: {os.listdir(temp_path)}")
  96. process = subprocess.Popen(
  97. command,
  98. shell=True,
  99. stdout=subprocess.PIPE,
  100. stderr=subprocess.STDOUT,
  101. universal_newlines=True,
  102. )
  103. # Monitor progress from command output
  104. while True:
  105. output = process.stdout.readline()
  106. if output == "" and process.poll() is not None:
  107. break
  108. if output:
  109. print(f"Command output: {output.strip()}")
  110. # Look for percentage in output
  111. match = re.search(r"(\d+)%", output.strip())
  112. if match:
  113. percent = int(match.group(1))
  114. # Map command progress (0-100%) to our progress range (30-80%)
  115. progress_val = 0.3 + (percent * 0.5 / 100)
  116. progress(progress_val, desc=f"Translating content: {percent}%")
  117. # Get the return code
  118. return_code = process.poll()
  119. print(f"Command completed with return code: {return_code}")
  120. # Check if translation was successful
  121. translated_file = temp_path / "input-zh.pdf" # <= Do not change filename
  122. dual_file = temp_path / "input-dual.pdf"
  123. print(f"Files after translation: {os.listdir(temp_path)}")
  124. if not translated_file.exists() and not dual_file.exists():
  125. print("Translation failed: No output files found")
  126. return (
  127. None,
  128. None,
  129. None,
  130. gr.update(visible=False),
  131. gr.update(visible=False),
  132. gr.update(visible=False),
  133. )
  134. # Copy the translated files to permanent locations
  135. progress(0.8, desc="Saving translated files...")
  136. if translated_file.exists():
  137. with open(translated_file, "rb") as src, open(final_output, "wb") as dst:
  138. dst.write(src.read())
  139. if dual_file.exists():
  140. with open(dual_file, "rb") as src, open(final_output_dual, "wb") as dst:
  141. dst.write(src.read())
  142. # Generate preview of translated PDF
  143. progress(0.9, desc="Generating preview...")
  144. try:
  145. translated_preview = pdf_preview(str(final_output))
  146. except Exception as e:
  147. print(f"Error generating preview: {e}")
  148. translated_preview = None
  149. progress(1.0, desc="Translation complete!")
  150. return (
  151. str(final_output),
  152. translated_preview,
  153. str(final_output_dual),
  154. gr.update(visible=True),
  155. gr.update(visible=True),
  156. gr.update(visible=True),
  157. )
  158. # Global setup
  159. custom_blue = gr.themes.Color(
  160. c50="#E8F3FF",
  161. c100="#BEDAFF",
  162. c200="#94BFFF",
  163. c300="#6AA1FF",
  164. c400="#4080FF",
  165. c500="#165DFF", # Primary color
  166. c600="#0E42D2",
  167. c700="#0A2BA6",
  168. c800="#061D79",
  169. c900="#03114D",
  170. c950="#020B33",
  171. )
  172. with gr.Blocks(
  173. title="PDFMathTranslate - PDF Translation with preserved formats",
  174. theme=gr.themes.Default(
  175. primary_hue=custom_blue, spacing_size="md", radius_size="lg"
  176. ),
  177. css="""
  178. .secondary-text {color: #999 !important;}
  179. footer {visibility: hidden}
  180. .env-warning {color: #dd5500 !important;}
  181. .env-success {color: #559900 !important;}
  182. @keyframes pulse-background {
  183. 0% { background-color: #FFFFFF; }
  184. 25% { background-color: #FFFFFF; }
  185. 50% { background-color: #E8F3FF; }
  186. 75% { background-color: #FFFFFF; }
  187. 100% { background-color: #FFFFFF; }
  188. }
  189. /* Add dashed border to input-file class */
  190. .input-file {
  191. border: 1.2px dashed #165DFF !important;
  192. border-radius: 6px !important;
  193. # background-color: #ffffff !important;
  194. animation: pulse-background 2s ease-in-out;
  195. transition: background-color 0.4s ease-out;
  196. }
  197. .input-file:hover {
  198. border: 1.2px dashed #165DFF !important;
  199. border-radius: 6px !important;
  200. color: #165DFF !important;
  201. background-color: #E8F3FF !important;
  202. transition: background-color 0.2s ease-in;
  203. }
  204. .progress-bar-wrap {
  205. border-radius: 8px !important;
  206. }
  207. .progress-bar {
  208. border-radius: 8px !important;
  209. }
  210. # .input-file label {
  211. # color: #165DFF !important;
  212. # border: 1.2px dashed #165DFF !important;
  213. # border-left: none !important;
  214. # border-top: none !important;
  215. # }
  216. # .input-file .wrap {
  217. # color: #165DFF !important;
  218. # }
  219. # .input-file .or {
  220. # color: #165DFF !important;
  221. # }
  222. """,
  223. ) as demo:
  224. gr.Markdown("# PDFMathTranslate")
  225. with gr.Row():
  226. with gr.Column(scale=1):
  227. gr.Markdown("## File")
  228. file_input = gr.File(
  229. label="Document",
  230. file_count="single",
  231. file_types=[".pdf"],
  232. type="filepath",
  233. elem_classes=["input-file"],
  234. )
  235. gr.Markdown("## Option")
  236. service = gr.Dropdown(
  237. label="Service",
  238. info="Which translation service to use. Some require keys",
  239. choices=service_map.keys(),
  240. value="Google",
  241. )
  242. # lang_src = gr.Dropdown(
  243. # label="Source Language",
  244. # info="Which translation service to use. Some require keys",
  245. # choices=["Google", "DeepL", "DeepLX", "Ollama", "Azure"],
  246. # value="Google",
  247. # )
  248. lang_to = gr.Dropdown(
  249. label="Translate to",
  250. info="Which language to translate to (optional)",
  251. choices=lang_map.keys(),
  252. value="Chinese",
  253. )
  254. page_range = gr.Radio(
  255. ["All", "First", "First 5 pages"],
  256. label="Pages",
  257. info="Translate the full document or just few pages (optional)",
  258. value="All",
  259. )
  260. model_id = gr.Textbox(
  261. label="Model ID",
  262. info="Please enter the identifier of the model you wish to use (e.g., gemma2). This identifier will be used to specify the particular model for translation.",
  263. # value="gemma2",
  264. visible=False, # hide by default
  265. )
  266. extra_args = gr.Textbox(
  267. label="Advanced Arguments",
  268. info="Extra arguments supported in commandline (optional)",
  269. value="",
  270. )
  271. envs_status = "<span class='env-success'>- Properly configured.</span><br>"
  272. def details_wrapper(text_markdown):
  273. text = f"""
  274. <details>
  275. <summary>Technical details</summary>
  276. {text_markdown}
  277. - GitHub: <a href="https://github.com/Byaidu/PDFMathTranslate">Byaidu/PDFMathTranslate</a><br>
  278. - GUI by: <a href="https://github.com/reycn">Rongxin</a>
  279. </details>"""
  280. return text
  281. def env_var_checker(env_var_name: str) -> str:
  282. if (
  283. not os.environ.get(env_var_name)
  284. or os.environ.get(env_var_name) == ""
  285. ):
  286. envs_status = f"<span class='env-warning'>- Warning: environmental not found or error ({env_var_name}).</span><br>- Please make sure that the environment variables are properly configured (<a href='https://github.com/Byaidu/PDFMathTranslate'>guide</a>).<br>"
  287. else:
  288. value = str(os.environ.get(env_var_name))
  289. envs_status = (
  290. "<span class='env-success'>- Properly configured.</span><br>"
  291. )
  292. if len(value) < 13:
  293. envs_status += (
  294. f"- Env: <code>{os.environ.get(env_var_name)}</code><br>"
  295. )
  296. else:
  297. envs_status += f"- Env: <code>{value[:13]}***</code><br>"
  298. return details_wrapper(envs_status)
  299. def on_select_service(value, evt: gr.EventData):
  300. # hide model id by default
  301. model_visibility = gr.update(visible=False)
  302. # add a text description
  303. if value == "Google":
  304. envs_status = details_wrapper(
  305. "<span class='env-success'>- Properly configured.</span><br>"
  306. )
  307. elif value == "DeepL":
  308. envs_status = env_var_checker("DEEPL_AUTH_KEY")
  309. elif value == "DeepLX":
  310. envs_status = env_var_checker("DEEPLX_AUTH_KEY")
  311. elif value == "Azure":
  312. envs_status = env_var_checker("AZURE_APIKEY")
  313. elif value == "OpenAI":
  314. model_visibility = gr.update(
  315. visible=True, value="gpt-4o"
  316. ) # show model id when service is selected
  317. envs_status = env_var_checker("OPENAI_API_KEY")
  318. elif value == "Ollama":
  319. model_visibility = gr.update(
  320. visible=True, value="gemma2"
  321. ) # show model id when service is selected
  322. envs_status = env_var_checker("OLLAMA_HOST")
  323. else:
  324. envs_status = "<span class='env-warning'>- Warning: model not in the list.</span><br>- Please report via (<a href='https://github.com/Byaidu/PDFMathTranslate'>guide</a>).<br>"
  325. return envs_status, model_visibility
  326. output_title = gr.Markdown("## Translated", visible=False)
  327. output_file = gr.File(label="Download Translation", visible=False)
  328. output_file_dual = gr.File(
  329. label="Download Translation (Dual)", visible=False
  330. )
  331. translate_btn = gr.Button("Translate", variant="primary", visible=False)
  332. tech_details_tog = gr.Markdown(
  333. details_wrapper(envs_status),
  334. elem_classes=["secondary-text"],
  335. )
  336. service.select(on_select_service, service, [tech_details_tog, model_id])
  337. with gr.Column(scale=2):
  338. gr.Markdown("## Preview")
  339. preview = gr.Image(label="Document Preview", visible=True)
  340. # Event handlers
  341. file_input.upload(
  342. upload_file,
  343. inputs=[file_input, service],
  344. outputs=[file_input, preview, translate_btn],
  345. )
  346. translate_btn.click(
  347. translate,
  348. inputs=[file_input, service, model_id, lang_to, page_range, extra_args],
  349. outputs=[
  350. output_file,
  351. preview,
  352. output_file_dual,
  353. output_file,
  354. output_file_dual,
  355. output_title,
  356. ],
  357. )
  358. def setup_gui():
  359. try:
  360. demo.launch(server_name="0.0.0.0", debug=True, inbrowser=True, share=False)
  361. except Exception:
  362. print(
  363. "Error launching GUI using 0.0.0.0.\nThis may be caused by global mode of proxy software."
  364. )
  365. try:
  366. demo.launch(
  367. server_name="127.0.0.1", debug=True, inbrowser=True, share=False
  368. )
  369. except Exception:
  370. print(
  371. "Error launching GUI using 127.0.0.1.\nThis may be caused by global mode of proxy software."
  372. )
  373. demo.launch(server_name="0.0.0.0", debug=True, inbrowser=True, share=True)
  374. # For auto-reloading while developing
  375. if __name__ == "__main__":
  376. setup_gui()