瀏覽代碼

Merge pull request #108 from Byaidu/dev-guide

Dev guide
Byaidu 1 年之前
父節點
當前提交
860f4bc59e
共有 4 個文件被更改,包括 399 次插入10 次删除
  1. 二進制
      docs/images/banner.nocolor.png
  2. 二進制
      docs/images/icon/setting-one.png
  3. 299 8
      pdf2zh/gui.py
  4. 100 2
      pdf2zh/translator.py

二進制
docs/images/banner.nocolor.png


二進制
docs/images/icon/setting-one.png


+ 299 - 8
pdf2zh/gui.py

@@ -237,12 +237,12 @@ with gr.Blocks(
         transition: background-color 0.2s ease-in;
         transition: background-color 0.2s ease-in;
     }
     }
 
 
-.progress-bar-wrap {
-  border-radius: 8px !important;
-}
-.progress-bar {
-  border-radius: 8px !important;
-}
+    .progress-bar-wrap {
+    border-radius: 8px !important;
+    }
+    .progress-bar {
+    border-radius: 8px !important;
+    }
 
 
     # .input-file label {
     # .input-file label {
     #     color: #165DFF !important;
     #     color: #165DFF !important;
@@ -257,7 +257,7 @@ with gr.Blocks(
     #     color: #165DFF !important;
     #     color: #165DFF !important;
     # }
     # }
     """,
     """,
-) as demo:
+) as demo1:
     gr.Markdown("# PDFMathTranslate")
     gr.Markdown("# PDFMathTranslate")
 
 
     with gr.Row():
     with gr.Row():
@@ -280,7 +280,7 @@ with gr.Blocks(
             # lang_src = gr.Dropdown(
             # lang_src = gr.Dropdown(
             #     label="Source Language",
             #     label="Source Language",
             #     info="Which translation service to use. Some require keys",
             #     info="Which translation service to use. Some require keys",
-            #     choices=["Google", "DeepL", "DeepLX", "Ollama", "Azure"],
+            #     choices=["Google", "DeepL", "DeepLX", "Azure", "OpenAI", "Ollama"],
             #     value="Google",
             #     value="Google",
             # )
             # )
             lang_to = gr.Dropdown(
             lang_to = gr.Dropdown(
@@ -403,6 +403,297 @@ with gr.Blocks(
     )
     )
 
 
 
 
+class EnvSync:
+    """Two-way synchronization between a variable and its system environment counterpart."""
+
+    def __init__(self, env_name: str, default_value: str = ""):
+        self._name = env_name
+        self._value = os.environ.get(env_name, default_value)
+        # Initialize the environment variable if it doesn't exist
+        if env_name not in os.environ:
+            os.environ[env_name] = default_value
+
+    @property
+    def value(self) -> str:
+        """Get the current value, ensuring sync with system env."""
+        sys_value = os.environ.get(self._name)
+        if sys_value != self._value:
+            self._value = sys_value
+        return self._value
+
+    @value.setter
+    def value(self, new_value: str):
+        """Set the value and sync with system env."""
+        self._value = new_value
+        os.environ[self._name] = new_value
+
+    def __str__(self) -> str:
+        return self.value
+
+    def __bool__(self) -> bool:
+        return bool(self.value)
+
+
+# Global setup
+with gr.Blocks(
+    title="PDFMathTranslate - PDF Translation with preserved formats",
+    theme=gr.themes.Default(
+        primary_hue=custom_blue, spacing_size="md", radius_size="lg"
+    ),
+    css="""
+    # .secondary-text {color: #999 !important;}
+    footer {visibility: hidden}
+    .env-warning {color: #dd5500 !important;}
+    .env-success {color: #559900 !important;}
+    .logo {border: transparent;}
+    .logo label {display: none;}
+    .logo .top-panel {display: none;}
+    .title {text-align: center;}
+    .title h1 {color: #999999 !important;}
+    .question  {text-align: center;}
+    .question h2 {color: #165DFF !important;}
+    .info-text {text-align: center; margin-top: -5px;}
+    .info-text p {color: #aaaaaa !important;}
+    """,
+) as demo3:
+    gr.Markdown(" ", elem_classes=["title"])
+    gr.Markdown(" ", elem_classes=["title"])
+    gr.Markdown(" ", elem_classes=["title"])
+    gr.Markdown(" ", elem_classes=["title"])
+    gr.Image("./docs/images/banner.nocolor.png", elem_classes=["logo"], width=400)
+    gr.Markdown("# Configuration Guide<br/><br/>", elem_classes=["title"])
+    gr.Markdown("## Use Google Translate?<br/>", elem_classes=["question"])
+    with gr.Row():
+        gr.Markdown("")
+        use_google = gr.Button(
+            "Yes",
+            variant="primary",
+            elem_classes=["secondary-text"],
+        )
+        use_other = gr.Button(
+            "No",
+            variant="secondary",
+            elem_classes=["secondary-text"],
+        )
+        gr.Markdown("")
+    with gr.Row():
+        gr.Markdown("")
+        gr.Markdown(
+            "Google Translate",
+            elem_classes=["info-text"],
+        )
+        gr.Markdown(
+            "DeepL, OpenAI, and more",
+            elem_classes=["info-text"],
+        )
+        gr.Markdown("")
+    # gr.Markdown(
+    #     """<span class='env-success'>- Properly configured.</span><br>
+    #     - GitHub: <a href="https://github.com/Byaidu/PDFMathTranslate">Byaidu/PDFMathTranslate</a><br>
+    #     - GUI by: <a href="https://github.com/reycn">Rongxin</a>"""
+    # )
+
+with gr.Blocks(
+    title="PDFMathTranslate - PDF Translation with preserved formats",
+    theme=gr.themes.Default(
+        primary_hue=custom_blue, spacing_size="md", radius_size="lg"
+    ),
+    css="""
+    # .secondary-text {color: #999 !important;}
+    footer {visibility: hidden}
+    .env-warning {color: #dd5500 !important;}
+    .env-success {color: #559900 !important;}
+    .logo {border: transparent;
+    height: 10vh;}
+    .logo label {display: none;}
+    .logo .top-panel {display: none;}
+    .title {text-align: center;
+    height: 5vh;}
+    .title h1 {color: #999999 !important;}
+    .question  {text-align: center;}
+    .question h2 {color: #165DFF !important;}
+    .info-text {text-align: center; margin-top: -5px;}
+    .info-text p {color: #aaaaaa !important;}
+
+    @keyframes pulse-background {
+        0% { background-color: #FFFFFF; }
+        25% { background-color: #FFFFFF; }
+        50% { background-color: #E8F3FF; }
+        75% { background-color: #FFFFFF; }
+        100% { background-color: #FFFFFF; }
+    }
+    
+    /* Add dashed border to input-file class */
+    .input-file {
+        border: 1.2px dashed #165DFF !important;
+        border-radius: 6px !important;
+        # background-color: #ffffff !important;
+        animation: pulse-background 2s ease-in-out;
+        transition: background-color 0.4s ease-out;
+        width: 80vw;
+        height: 60vh;
+        margin: 0 auto;
+    }
+
+    .input-file:hover {
+        border: 1.2px dashed #165DFF !important;
+        border-radius: 6px !important;
+        color: #165DFF !important;
+        background-color: #E8F3FF !important;
+        transition: background-color 0.2s ease-in;
+        box-shadow: 4px 4px 20px rgba(22, 93, 255, 0.1);
+    }
+
+
+    .input-file label {
+        color: #165DFF !important;
+        border: 1.2px dashed #165DFF !important;
+        border-left: none !important;
+        border-top: none !important;
+    }
+    .input-file .top-panel {
+        color: #165DFF !important;
+        border: 1.2px dashed #165DFF !important;
+        border-right: none !important;
+        border-top: none !important;
+    }
+    .input-file .filename {
+        color: #165DFF !important;
+        background-color: #FFFFFF !important;
+    }
+    .input-file .download {
+        color: #165DFF !important;
+        background-color: #FFFFFF !important;
+    }
+    .input-file .wrap {
+        color: #165DFF !important;
+    }
+    .input-file .or {
+        color: #165DFF !important;
+    }
+
+    .progress-bar-wrap {
+    border-radius: 8px !important;
+    }
+    .progress-bar {
+    border-radius: 8px !important;
+    }
+
+    .options-row {
+        align-items: center;
+        display: flex;
+    }
+    .options-row .wrap  {
+        align-items: center;
+        justify-content: center;
+        flex-wrap: wrap;
+        gap: 1rem;}
+
+    .options-row .form label  {
+        color: #999;}
+    .options-row .form   {
+        border: none !important;
+        align-items: center !important;}
+    .options-row [data-testid="block-info"] {
+        display: none !important;}
+    .logo-row {
+        align-items: center;}
+    .title-row {
+        align-items: center;}
+    .details-row {
+        align-items: center;}
+    .hide-frame {
+        border: none !important;}
+    .hide-frame .top-panel {
+        display: none !important;}
+    .hide-frame label {
+        display: none !important;}
+    .options-icon {
+        height: 2em;
+        width: 2em;
+    }
+    .options-btn {
+        line-height: var(--line-md);
+        background-color: #FFFFFF;
+        border: 1.2px solid var(--checkbox-label-border-color) !important;
+        border-radius: 6px !important;
+        # color: var(--checkbox-label-border-color) !important;
+        color: #999;
+        transition: background-color 0.2s ease-in;
+    }
+    .options-btn:hover {
+        background-color: #fafafa;
+        # border: 1.2px solid  #fcfcfc !important;
+    }
+    """,
+) as demo:
+    with gr.Row(elem_classes=["logo-row"]):
+        gr.Image("./docs/images/banner.nocolor.png", elem_classes=["logo"])
+    with gr.Row(elem_classes=["title-row"]):
+        gr.Markdown("# PDFMathTranslate", elem_classes=["title"])
+    with gr.Row(elem_classes=["input-file-row"]):
+        gr.File(
+            label="Upload PDF",
+            file_count="single",
+            file_types=[".pdf"],
+            interactive=True,
+            elem_classes=["input-file", "secondary-text"],
+        )
+    with gr.Row(elem_classes=["options-row"]):
+        gr.Markdown("")
+        # gr.Dropdown(
+        #     ["Google", "DeepL", "DeepLX", "Azure", "OpenAI", "Ollama"],
+        #     value="Google",
+        #     label="Translation Service",
+        #     interactive=True,
+        #     elem_classes=["secondary-text"],
+        # )
+        # gr.Dropdown(
+        #     [
+        #         "Chinese",
+        #         "English",
+        #         "French",
+        #         "German",
+        #         "Japanese",
+        #         "Korean",
+        #         "Russian",
+        #         "Spanish",
+        #     ],
+        #     value="Chinese",
+        #     label="To",
+        #     interactive=True,
+        #     elem_classes=["secondary-text"],
+        #     scale=2,
+        # )
+        gr.Radio(
+            ["All Pages", "First Page", "First 5 Pages"],
+            value="All Pages",
+            label="Pages",
+            interactive=True,
+            elem_classes=["secondary-text"],
+            scale=2,
+        )
+        gr.Markdown("")
+    with gr.Row(elem_classes=["options-row"]):
+        gr.Markdown("")
+        gr.Markdown("")
+        # gr.Image(
+        #     "./docs/images/icon/setting-one.png",
+        #     elem_classes=["hide-frame", "options-icon"],
+        #     scale=1,
+        # )
+        # gr.Markdown("Advanced Settings", elem_classes=["secondary-text"])
+        gr.Button(
+            "⚙️ Advanced Options",
+            variant="secondary",
+            elem_classes=["options-btn"],
+        )
+        gr.Markdown("")
+        gr.Markdown("")
+    # with gr.Row(elem_classes=["details-row"]):
+    # gr.Markdown("Technical details", elem_classes=["info-text"])
+
+
 def setup_gui():
 def setup_gui():
     try:
     try:
         demo.launch(server_name="0.0.0.0", debug=True, inbrowser=True, share=False)
         demo.launch(server_name="0.0.0.0", debug=True, inbrowser=True, share=False)

+ 100 - 2
pdf2zh/translator.py

@@ -11,6 +11,10 @@ import requests
 from azure.ai.translation.text import TextTranslationClient
 from azure.ai.translation.text import TextTranslationClient
 from azure.core.credentials import AzureKeyCredential
 from azure.core.credentials import AzureKeyCredential
 
 
+import hmac
+import hashlib
+import time
+from datetime import datetime,UTC
 
 
 class BaseTranslator:
 class BaseTranslator:
     def __init__(self, service, lang_out, lang_in, model):
     def __init__(self, service, lang_out, lang_in, model):
@@ -58,6 +62,96 @@ class GoogleTranslator(BaseTranslator):
             result = html.unescape(re_result[0])
             result = html.unescape(re_result[0])
         return result
         return result
 
 
+class TencentTranslator(BaseTranslator):
+    def sign(self,key, msg):
+        return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
+
+    def __init__(self, service, lang_out, lang_in, model):
+        lang_out = "zh" if lang_out == "auto" else lang_out
+        lang_in = "en" if lang_in == "auto" else lang_in
+        super().__init__(service, lang_out, lang_in, model)
+        try:
+            server_url = (
+                "tmt.tencentcloudapi.com"
+            )
+            self.secret_id = os.getenv("Tencent_SECRET_ID")
+            self.secret_key = os.getenv("Tencent_SECRET_KEY")
+            
+        except KeyError as e:
+            missing_var = e.args[0]
+            raise ValueError(
+                f"The environment variable '{missing_var}' is required but not set."
+            ) from e
+
+        self.session = requests.Session()
+        self.base_link = f"{server_url}"
+
+    def translate(self, text):
+        text = text[:5000]
+        data={
+            "SourceText":text,
+            "Source":self.lang_in,
+            "Target":self.lang_out,
+            "ProjectId":0
+        }
+        payloadx = dumps(data)
+        hashed_request_payload = hashlib.sha256(payloadx.encode("utf-8")).hexdigest()
+        canonical_request = ("POST" + "\n" +
+                            "/" + "\n" +
+                            "" + "\n" +
+                            "content-type:application/json; charset=utf-8\nhost:tmt.tencentcloudapi.com\nx-tc-action:texttranslate\n" + "\n" +
+                            "content-type;host;x-tc-action" + "\n" +
+                            hashed_request_payload)
+
+        timestamp = int(time.time())
+        date = datetime.fromtimestamp(timestamp, UTC).strftime("%Y-%m-%d")
+        credential_scope = date + "/tmt/tc3_request"
+        hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
+        algorithm = "TC3-HMAC-SHA256"
+        string_to_sign = (algorithm + "\n" +
+                        str(timestamp) + "\n" +
+                        credential_scope + "\n" +
+                        hashed_canonical_request)
+        secret_date = self.sign(("TC3" + self.secret_key).encode("utf-8"), date)
+        secret_service = self.sign(secret_date, "tmt")
+        secret_signing = self.sign(secret_service, "tc3_request")
+        signed_headers = "content-type;host;x-tc-action"
+        signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
+        authorization = (algorithm + " " +
+                 "Credential=" + self.secret_id + "/" + credential_scope + ", " +
+                 "SignedHeaders=" + signed_headers + ", " +
+                 "Signature=" + signature)
+        self.headers = {
+            "Authorization": authorization,
+            "Content-Type": "application/json; charset=utf-8",
+            "Host": "tmt.tencentcloudapi.com",
+            "X-TC-Action": "TextTranslate",
+            "X-TC-Region":"ap-beijing",
+            "X-TC-Timestamp": str(timestamp),
+            "X-TC-Version": "2018-03-21"
+        }
+
+        response = self.session.post(
+            "https://"+self.base_link,
+            json=data,
+            headers=self.headers,
+        )
+        # 1. Status code test
+        if response.status_code == 200:
+            result = loads(response.text)
+        else:
+            raise ValueError("HTTP error: " + str(response.status_code))
+        # 2. Result test
+        try:
+            result = result['Response']['TargetText']
+            return result
+        except KeyError:
+            result = ""
+            raise ValueError("No valid key in Tencent's response")
+        # 3. Result length check
+        if len(result) == 0:
+            raise ValueError("Empty translation result")
+        return result
 
 
 class DeepLXTranslator(BaseTranslator):
 class DeepLXTranslator(BaseTranslator):
     def __init__(self, service, lang_out, lang_in, model):
     def __init__(self, service, lang_out, lang_in, model):
@@ -78,7 +172,11 @@ class DeepLXTranslator(BaseTranslator):
             ) from e
             ) from e
 
 
         self.session = requests.Session()
         self.session = requests.Session()
-        self.base_link = f"{server_url}/{auth_key}/translate"
+        server_url=server_url.rstrip('/')
+        if auth_key:
+            self.base_link = f"{server_url}/{auth_key}/translate"
+        else:
+            self.base_link = f"{server_url}/translate"
         self.headers = {
         self.headers = {
             "User-Agent": "Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727;.NET CLR 3.0.04506.30)"
             "User-Agent": "Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727;.NET CLR 3.0.04506.30)"
         }
         }
@@ -112,7 +210,6 @@ class DeepLXTranslator(BaseTranslator):
             raise ValueError("Empty translation result")
             raise ValueError("Empty translation result")
         return result
         return result
 
 
-
 class DeepLTranslator(BaseTranslator):
 class DeepLTranslator(BaseTranslator):
     def __init__(self, service, lang_out, lang_in, model):
     def __init__(self, service, lang_out, lang_in, model):
         lang_out='ZH' if lang_out=='auto' else lang_out
         lang_out='ZH' if lang_out=='auto' else lang_out
@@ -218,3 +315,4 @@ class AzureTranslator(BaseTranslator):
 
 
         translated_text = response[0].translations[0].text
         translated_text = response[0].translations[0].text
         return translated_text
         return translated_text
+