smucclaw · kharus · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025
diff --git a/natural4-server/natural4_server/hello.py b/natural4-server/natural4_server/hello.py
@@ -24,7 +24,7 @@
 import orjson
 from sanic import HTTPResponse, Request, Sanic, file, json
 
-from natural4_server.plugins.docgen.pandoc_md_to_outputs import pandoc_md_to_output, pandoc_outputs
+from natural4_server.plugins.docgen.pandoc_md_to_outputs import pandoc_docx, pandoc_md_to_output, pandoc_pdf
 from natural4_server.plugins.flowchart import get_flowchart_tasks
 from natural4_server.task import run_tasks
 
@@ -120,29 +120,6 @@ async def process_csv(request: Request) -> HTTPResponse:
 
     target_path = await save_csv(request, target_folder, time_now)
 
-    # Generate markdown files asynchronously in the background.
-    # uuiddir: anyio.Path = anyio.Path(uuid) / spreadsheet_id / sheet_id
-
-    # markdown_cmd: Sequence[str] = (
-    #   natural4_exe,
-    #   '--only', 'tomd', f'--workdir={natural4_dir}',
-    #   f'--uuiddir={uuiddir}',
-    #   f'{target_path}'
-    # )
-
-    # print(f'hello.py child: calling natural4-exe {natural4_exe} (slowly) for tomd', file=sys.stderr)
-    # print(f'hello.py child: {markdown_cmd}', file=sys.stderr)
-
-    # Coroutine which is awaited before pandoc is called to generate documents
-    # (ie word and pdf) from the markdown file.
-    # markdown_coro: Awaitable[asyncio.subprocess.Process] = (
-    #   asyncio.subprocess.create_subprocess_exec(
-    #     *markdown_cmd,
-    #     stdout = asyncio.subprocess.PIPE,
-    #     stderr = asyncio.subprocess.PIPE
-    #   )
-    # )
-
     # ---------------------------------------------
     # call natural4-exe, wait for it to complete.
     # ---------------------------------------------
@@ -194,16 +171,12 @@ async def process_csv(request: Request) -> HTTPResponse:
     # ---------------------------------------------
     timestamp, flowchart_tasks = await petri_post_process(target_folder)
 
-    # Slow tasks below.
-    # These are run in the background using app.add_background_task, which
-    # adds them to Sanic's event loop.
-
     # ---------------------------------------------
     # postprocessing:
     # Use pandoc to generate word and pdf docs from markdown.
     # ---------------------------------------------
-    app.add_task(pandoc_md_to_output(target_folder, timestamp, pandoc_outputs[0])) #docx
-    app.add_task(pandoc_md_to_output(target_folder, timestamp, pandoc_outputs[1])) #pdf
+    app.add_task(pandoc_md_to_output(target_folder, timestamp, pandoc_docx))
+    app.add_task(pandoc_md_to_output(target_folder, timestamp, pandoc_pdf))
     # Concurrently peform the following:
     # - Write natural4-exe's stdout to a file.
     # - Write natural4-exe's stderr to a file.

diff --git a/natural4-server/natural4_server/plugins/docgen/__init__.py b/natural4-server/natural4_server/plugins/docgen/__init__.py
@@ -1,17 +0,0 @@
-import os
-from collections.abc import AsyncGenerator
-
-import aiostream
-
-from natural4_server.task import Task
-
-try:
-    from .pandoc_md_to_outputs import get_pandoc_tasks
-except ImportError:
-
-    def get_pandoc_tasks(
-        # markdown_coro: Awaitable[asyncio.subprocess.Process],
-        uuid_ss_folder: str | os.PathLike,
-        timestamp: str | os.PathLike,
-    ) -> AsyncGenerator[Task | None, None]:
-        return aiostream.stream.empty()

diff --git a/natural4-server/natural4_server/plugins/docgen/pandoc_md_to_outputs.py b/natural4-server/natural4_server/plugins/docgen/pandoc_md_to_outputs.py
@@ -1,36 +1,32 @@
 import asyncio
 import os
 import sys
-from collections.abc import AsyncGenerator, Sequence
+from dataclasses import dataclass
+from typing import List
 
 import anyio
 import pypandoc
-import pyrsistent as pyrs
 
-from natural4_server.task import Task
 
+@dataclass
+class PandocOutput:
+    file_extension: str
+    extra_args: List[str]
 
-class PandocOutput(pyrs.PRecord):
-    file_extension = pyrs.field(mandatory=True, type=str)
-    extra_args = pyrs.field(Sequence, initial=())
 
+pandoc_docx = PandocOutput(file_extension="docx", extra_args=["-f", "markdown+hard_line_breaks", "-s"])
 
-pandoc_outputs = [
-    PandocOutput(file_extension="docx", extra_args=("-f", "markdown+hard_line_breaks", "-s")),
-    PandocOutput(
-        file_extension="pdf",
-        extra_args=(
-            "--pdf-engine=xelatex",
-            "-V",
-            "CJKmainfont=Droid Sans Fallback",
-            "-f",
-            "markdown+hard_line_breaks",
-            "-s",
-        ),
-    ),
-]
-
-pandoc_path = pypandoc.get_pandoc_path()
+pandoc_pdf = PandocOutput(
+    file_extension="pdf",
+    extra_args=[
+        "--pdf-engine=xelatex",
+        "-V",
+        "CJKmainfont=Droid Sans Fallback",
+        "-f",
+        "markdown+hard_line_breaks",
+        "-s",
+    ],
+)
 
 
 async def pandoc_md_to_output(
@@ -43,15 +39,15 @@ async def pandoc_md_to_output(
 
     if await md_file.exists():
         match pandoc_output:
-            case {"file_extension": file_extension, "extra_args": extra_args}:
+            case PandocOutput(file_extension = file_extension, extra_args = extra_args):
                 outputpath: anyio.Path = uuid_ss_folder_path / file_extension
                 await outputpath.mkdir(parents=True, exist_ok=True)
 
                 timestamp_file: str = f"{timestamp}.{file_extension}"
                 outputfile: anyio.Path = outputpath / timestamp_file
 
                 pandoc_cmd = (
-                    pandoc_path,
+                    pypandoc.get_pandoc_path(),
                     "-o",
                     f"{outputfile}",
                     *extra_args,
@@ -83,17 +79,3 @@ async def pandoc_md_to_output(
                 await latest_file.symlink_to(timestamp_file)
             case _:
                 pass
-
-
-async def get_pandoc_tasks(
-    # markdown_coro: Awaitable[asyncio.subprocess.Process],
-    uuid_ss_folder: str | os.PathLike,
-    timestamp: str | os.PathLike,
-) -> AsyncGenerator[Task, None]:
-    # markdown_proc: asyncio.subprocess.Process = await markdown_coro
-    # await markdown_proc.wait()
-
-    print("Markdown output done.", file=sys.stderr)
-
-    for output in pandoc_outputs:
-        yield Task(func=pandoc_md_to_output, args=(uuid_ss_folder, timestamp, output))