gui.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. import os
  2. import shutil
  3. from pathlib import Path
  4. from pdf2zh import __version__
  5. from pdf2zh.pdf2zh import extract_text
  6. import gradio as gr
  7. import numpy as np
  8. import pymupdf
  9. import tqdm
  10. import requests
  11. # Map service names to pdf2zh service options
  12. service_map = {
  13. "Google": "google",
  14. "DeepL": "deepl",
  15. "DeepLX": "deeplx",
  16. "Ollama": "ollama",
  17. "OpenAI": "openai",
  18. "Azure": "azure",
  19. }
  20. lang_map = {
  21. "Chinese": "zh",
  22. "English": "en",
  23. "French": "fr",
  24. "German": "de",
  25. "Japanese": "ja",
  26. "Korean": "ko",
  27. "Russian": "ru",
  28. "Spanish": "es",
  29. "Italian": "it",
  30. }
  31. page_map = {
  32. "All": None,
  33. "First": [0],
  34. "First 5 pages": list(range(0, 5)),
  35. }
  36. flag_demo = False
  37. if os.environ.get("PDF2ZH_DEMO"):
  38. flag_demo = True
  39. service_map = {
  40. "Google": "google",
  41. }
  42. page_map = {
  43. "First": [0],
  44. "First 20 pages": list(range(0, 20)),
  45. }
  46. client_key = os.environ.get("PDF2ZH_CLIENT_KEY")
  47. server_key = os.environ.get("PDF2ZH_SERVER_KEY")
  48. def verify_recaptcha(response):
  49. recaptcha_url = "https://www.google.com/recaptcha/api/siteverify"
  50. print("reCAPTCHA", server_key, response)
  51. data = {"secret": server_key, "response": response}
  52. result = requests.post(recaptcha_url, data=data).json()
  53. print("reCAPTCHA", result.get("success"))
  54. return result.get("success")
  55. def pdf_preview(file):
  56. doc = pymupdf.open(file)
  57. page = doc[0]
  58. pix = page.get_pixmap()
  59. image = np.frombuffer(pix.samples, np.uint8).reshape(pix.height, pix.width, 3)
  60. return image
  61. def upload_file(file, service, progress=gr.Progress()):
  62. """Handle file upload, validation, and initial preview."""
  63. if not file or not os.path.exists(file):
  64. return None, None
  65. try:
  66. # Convert first page for preview
  67. preview_image = pdf_preview(file)
  68. return file, preview_image
  69. except Exception as e:
  70. print(f"Error converting PDF: {e}")
  71. return None, None
  72. def translate(
  73. file_path,
  74. service,
  75. model_id,
  76. lang,
  77. page_range,
  78. recaptcha_response,
  79. progress=gr.Progress(),
  80. ):
  81. """Translate PDF content using selected service."""
  82. if not file_path:
  83. raise gr.Error("No input")
  84. if flag_demo and not verify_recaptcha(recaptcha_response):
  85. raise gr.Error("reCAPTCHA fail")
  86. progress(0, desc="Starting translation...")
  87. output = Path("pdf2zh_files")
  88. output.mkdir(parents=True, exist_ok=True)
  89. filename = os.path.splitext(os.path.basename(file_path))[0]
  90. file_en = output / f"{filename}.pdf"
  91. file_zh = output / f"{filename}-zh.pdf"
  92. file_dual = output / f"{filename}-dual.pdf"
  93. shutil.copyfile(file_path, file_en)
  94. selected_service = service_map.get(service, "google")
  95. selected_page = page_map.get(page_range, [0])
  96. lang_to = lang_map.get(lang, "zh")
  97. if selected_service == "google":
  98. lang_to = "zh-CN" if lang_to == "zh" else lang_to
  99. print(f"Files before translation: {os.listdir(output)}")
  100. def progress_bar(t: tqdm.tqdm):
  101. progress(t.n / t.total, desc="Translating...")
  102. param = {
  103. "files": [file_en],
  104. "pages": selected_page,
  105. "lang_in": "auto",
  106. "lang_out": lang_to,
  107. "service": f"{selected_service}:{model_id}",
  108. "output": output,
  109. "thread": 4,
  110. "callback": progress_bar,
  111. }
  112. print(param)
  113. extract_text(**param)
  114. print(f"Files after translation: {os.listdir(output)}")
  115. if not file_zh.exists() or not file_dual.exists():
  116. raise gr.Error("No output")
  117. try:
  118. translated_preview = pdf_preview(str(file_zh))
  119. except Exception:
  120. raise gr.Error("No preview")
  121. progress(1.0, desc="Translation complete!")
  122. return (
  123. str(file_zh),
  124. translated_preview,
  125. str(file_dual),
  126. gr.update(visible=True),
  127. gr.update(visible=True),
  128. gr.update(visible=True),
  129. )
  130. # Global setup
  131. custom_blue = gr.themes.Color(
  132. c50="#E8F3FF",
  133. c100="#BEDAFF",
  134. c200="#94BFFF",
  135. c300="#6AA1FF",
  136. c400="#4080FF",
  137. c500="#165DFF", # Primary color
  138. c600="#0E42D2",
  139. c700="#0A2BA6",
  140. c800="#061D79",
  141. c900="#03114D",
  142. c950="#020B33",
  143. )
  144. with gr.Blocks(
  145. title="PDFMathTranslate - PDF Translation with preserved formats",
  146. theme=gr.themes.Default(
  147. primary_hue=custom_blue, spacing_size="md", radius_size="lg"
  148. ),
  149. css="""
  150. .secondary-text {color: #999 !important;}
  151. footer {visibility: hidden}
  152. .env-warning {color: #dd5500 !important;}
  153. .env-success {color: #559900 !important;}
  154. @keyframes pulse-background {
  155. 0% { background-color: #FFFFFF; }
  156. 25% { background-color: #FFFFFF; }
  157. 50% { background-color: #E8F3FF; }
  158. 75% { background-color: #FFFFFF; }
  159. 100% { background-color: #FFFFFF; }
  160. }
  161. /* Add dashed border to input-file class */
  162. .input-file {
  163. border: 1.2px dashed #165DFF !important;
  164. border-radius: 6px !important;
  165. # background-color: #ffffff !important;
  166. animation: pulse-background 2s ease-in-out;
  167. transition: background-color 0.4s ease-out;
  168. }
  169. .input-file:hover {
  170. border: 1.2px dashed #165DFF !important;
  171. border-radius: 6px !important;
  172. color: #165DFF !important;
  173. background-color: #E8F3FF !important;
  174. transition: background-color 0.2s ease-in;
  175. }
  176. .progress-bar-wrap {
  177. border-radius: 8px !important;
  178. }
  179. .progress-bar {
  180. border-radius: 8px !important;
  181. }
  182. # .input-file label {
  183. # color: #165DFF !important;
  184. # border: 1.2px dashed #165DFF !important;
  185. # border-left: none !important;
  186. # border-top: none !important;
  187. # }
  188. # .input-file .wrap {
  189. # color: #165DFF !important;
  190. # }
  191. # .input-file .or {
  192. # color: #165DFF !important;
  193. # }
  194. """,
  195. head=(
  196. """
  197. <script src="https://www.google.com/recaptcha/api.js?render=explicit" async defer></script>
  198. <script type="text/javascript">
  199. var onVerify = function(token) {
  200. el=document.getElementById('verify').getElementsByTagName('textarea')[0];
  201. el.value=token;
  202. el.dispatchEvent(new Event('input'));
  203. };
  204. </script>
  205. """
  206. if flag_demo
  207. else ""
  208. ),
  209. ) as demo:
  210. gr.Markdown(
  211. "# [PDFMathTranslate @ Github](https://github.com/Byaidu/PDFMathTranslate)"
  212. )
  213. with gr.Row():
  214. with gr.Column(scale=1):
  215. gr.Markdown("## File | < 5 MB" if flag_demo else "## File")
  216. file_input = gr.File(
  217. label="Document",
  218. file_count="single",
  219. file_types=[".pdf"],
  220. type="filepath",
  221. elem_classes=["input-file"],
  222. )
  223. gr.Markdown("## Option")
  224. service = gr.Dropdown(
  225. label="Service",
  226. info="Which translation service to use. Some require keys",
  227. choices=service_map.keys(),
  228. value="Google",
  229. )
  230. lang_to = gr.Dropdown(
  231. label="Translate to",
  232. info="Which language to translate to (optional)",
  233. choices=lang_map.keys(),
  234. value="Chinese",
  235. )
  236. page_range = gr.Radio(
  237. choices=page_map.keys(),
  238. label="Pages",
  239. info="Translate the full document or just few pages (optional)",
  240. value=list(page_map.keys())[0],
  241. )
  242. model_id = gr.Textbox(
  243. label="Model ID",
  244. info="Please enter the identifier of the model you wish to use (e.g., gemma2). "
  245. "This identifier will be used to specify the particular model for translation.",
  246. # value="gemma2",
  247. visible=False, # hide by default
  248. )
  249. envs_status = "<span class='env-success'>- Properly configured.</span><br>"
  250. def details_wrapper(text_markdown):
  251. text = f"""
  252. <details>
  253. <summary>Technical details</summary>
  254. {text_markdown}
  255. - GitHub: <a href="https://github.com/Byaidu/PDFMathTranslate">Byaidu/PDFMathTranslate</a><br>
  256. - GUI by: <a href="https://github.com/reycn">Rongxin</a><br>
  257. - Version: {__version__}
  258. </details>"""
  259. return text
  260. def env_var_checker(env_var_name: str) -> str:
  261. if (
  262. not os.environ.get(env_var_name)
  263. or os.environ.get(env_var_name) == ""
  264. ):
  265. envs_status = (
  266. f"<span class='env-warning'>- Warning: environmental not found or error ({env_var_name})."
  267. + "</span><br>- Please make sure that the environment variables are properly configured "
  268. + "(<a href='https://github.com/Byaidu/PDFMathTranslate'>guide</a>).<br>"
  269. )
  270. else:
  271. value = str(os.environ.get(env_var_name))
  272. envs_status = (
  273. "<span class='env-success'>- Properly configured.</span><br>"
  274. )
  275. if len(value) < 13:
  276. envs_status += (
  277. f"- Env: <code>{os.environ.get(env_var_name)}</code><br>"
  278. )
  279. else:
  280. envs_status += f"- Env: <code>{value[:13]}***</code><br>"
  281. return details_wrapper(envs_status)
  282. def on_select_service(value, evt: gr.EventData):
  283. # hide model id by default
  284. model_visibility = gr.update(visible=False)
  285. # add a text description
  286. if value == "Google":
  287. envs_status = details_wrapper(
  288. "<span class='env-success'>- Properly configured.</span><br>"
  289. )
  290. elif value == "DeepL":
  291. envs_status = env_var_checker("DEEPL_AUTH_KEY")
  292. elif value == "DeepLX":
  293. envs_status = env_var_checker("DEEPLX_AUTH_KEY")
  294. elif value == "Azure":
  295. envs_status = env_var_checker("AZURE_APIKEY")
  296. elif value == "OpenAI":
  297. model_visibility = gr.update(
  298. visible=True, value="gpt-4o"
  299. ) # show model id when service is selected
  300. envs_status = env_var_checker("OPENAI_API_KEY")
  301. elif value == "Ollama":
  302. model_visibility = gr.update(
  303. visible=True, value="gemma2"
  304. ) # show model id when service is selected
  305. envs_status = env_var_checker("OLLAMA_HOST")
  306. else:
  307. envs_status = (
  308. "<span class='env-warning'>- Warning: model not in the list."
  309. "</span><br>- Please report via "
  310. "(<a href='https://github.com/Byaidu/PDFMathTranslate'>guide</a>).<br>"
  311. )
  312. return envs_status, model_visibility
  313. output_title = gr.Markdown("## Translated", visible=False)
  314. output_file = gr.File(label="Download Translation", visible=False)
  315. output_file_dual = gr.File(
  316. label="Download Translation (Dual)", visible=False
  317. )
  318. recaptcha_response = gr.Textbox(
  319. label="reCAPTCHA Response", elem_id="verify", visible=False
  320. )
  321. recaptcha_box = gr.HTML(f'<div id="recaptcha-box"></div>')
  322. translate_btn = gr.Button("Translate", variant="primary")
  323. tech_details_tog = gr.Markdown(
  324. details_wrapper(envs_status),
  325. elem_classes=["secondary-text"],
  326. )
  327. service.select(on_select_service, service, [tech_details_tog, model_id])
  328. with gr.Column(scale=2):
  329. gr.Markdown("## Preview")
  330. preview = gr.Image(label="Document Preview", visible=True)
  331. # Event handlers
  332. file_input.upload(
  333. upload_file,
  334. inputs=[file_input, service],
  335. outputs=[file_input, preview],
  336. js=(
  337. f"""
  338. (a,b)=>{{
  339. try{{
  340. grecaptcha.render('recaptcha-box',{{
  341. 'sitekey':'{client_key}',
  342. 'callback':'onVerify'
  343. }});
  344. }}catch(error){{}}
  345. return [a];
  346. }}
  347. """
  348. if flag_demo
  349. else ""
  350. ),
  351. )
  352. translate_btn.click(
  353. translate,
  354. inputs=[file_input, service, model_id, lang_to, page_range, recaptcha_response],
  355. outputs=[
  356. output_file,
  357. preview,
  358. output_file_dual,
  359. output_file,
  360. output_file_dual,
  361. output_title,
  362. ],
  363. ).then(lambda: None, js="()=>{grecaptcha.reset()}" if flag_demo else "")
  364. def setup_gui(share=False):
  365. import doclayout_yolo # cache # noqa: F401
  366. if flag_demo:
  367. demo.launch(server_name="0.0.0.0", max_file_size="5mb", inbrowser=True)
  368. else:
  369. try:
  370. demo.launch(server_name="0.0.0.0", debug=True, inbrowser=True, share=share)
  371. except Exception:
  372. print(
  373. "Error launching GUI using 0.0.0.0.\nThis may be caused by global mode of proxy software."
  374. )
  375. try:
  376. demo.launch(
  377. server_name="127.0.0.1", debug=True, inbrowser=True, share=share
  378. )
  379. except Exception:
  380. print(
  381. "Error launching GUI using 127.0.0.1.\nThis may be caused by global mode of proxy software."
  382. )
  383. demo.launch(debug=True, inbrowser=True, share=True)
  384. # For auto-reloading while developing
  385. if __name__ == "__main__":
  386. setup_gui()