Просмотр исходного кода

celery pandoc 改为 html convert

mrh 1 год назад
Родитель
Сommit
3ca36e53e6
2 измененных файлов с 7 добавлено и 5 удалено
  1. 2 2
      worker/celery/html_convert_tasks.py
  2. 5 3
      worker/html_convert/pandoc.py

+ 2 - 2
worker/celery/pandoc_tasks.py → worker/celery/html_convert_tasks.py

@@ -7,7 +7,7 @@ from worker.html_convert.models import HtmlConvertResult
 
 logger = get_logger('pandoc_tasks')
 
-@current_app.task(name='pandoc_worker.convert_single_result')
+@current_app.task(name='html_convert_tasks_worker.convert_single_result')
 def convert_single_result_task(result_id: int, font_name: str = "宋体", include_toc: bool = False, skip_existing: bool = True):
     """
     Celery task to convert a single SearchResultItem using Pandoc.
@@ -36,7 +36,7 @@ def convert_single_result_task(result_id: int, font_name: str = "宋体", includ
         logger.exception(f"Error during Pandoc conversion for SearchResultItem ID: {result_id}: {str(e)}")
         return {"result_id": result_id, "status": "failed"}
 
-@current_app.task(name='pandoc_worker.convert_all_results')
+@current_app.task(name='html_convert_tasks.convert_all_results')
 def convert_all_results_task(font_name: str = "宋体", include_toc: bool = False, skip_existing: bool = True):
     """
     Celery task to convert all SearchResultItems using Pandoc.

+ 5 - 3
worker/html_convert/pandoc.py

@@ -19,6 +19,7 @@ import subprocess
 from docx import Document
 from docx.oxml.ns import qn
 from docx.oxml import OxmlElement
+import tempfile
 logger = get_logger('pandoc')
 
 class PandocConverter:
@@ -34,6 +35,7 @@ class PandocConverter:
         """
         self.font_name = font_name
         self.include_toc = include_toc
+        self._temp_dir = tempfile.TemporaryDirectory()  # Create a temporary directory for reference docs
     
     def convert_md_to_docx(self, md_path: Path, output_path: Path) -> bool:
         """Convert markdown file to docx using pandoc with custom options"""
@@ -51,7 +53,7 @@ class PandocConverter:
                 PANDOC_EXE,
                 '-f', 'markdown+yaml_metadata_block',
                 '-t', 'docx',
-                '--reference-doc', self._get_reference_doc(md_path.parent),
+                '--reference-doc', self._get_reference_doc(),
                 '-o', str(output_path),
                 str(md_path)
             ]
@@ -150,9 +152,9 @@ class PandocConverter:
             
             return False
     
-    def _get_reference_doc(self, output_dir: Path) -> str:
+    def _get_reference_doc(self) -> str:
         """Get path to reference document with specified font"""
-        reference_doc = output_dir / f"{self.font_name.replace(' ', '_')}.docx"
+        reference_doc = Path(self._temp_dir.name) / f"{self.font_name.replace(' ', '_')}.docx"
         
         if not reference_doc.exists():
             self._create_reference_doc(reference_doc)