Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up docgen code #258

Merged
merged 2 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 3 additions & 30 deletions natural4-server/natural4_server/hello.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import orjson
from sanic import HTTPResponse, Request, Sanic, file, json

from natural4_server.plugins.docgen.pandoc_md_to_outputs import pandoc_md_to_output, pandoc_outputs
from natural4_server.plugins.docgen.pandoc_md_to_outputs import pandoc_docx, pandoc_md_to_output, pandoc_pdf
from natural4_server.plugins.flowchart import get_flowchart_tasks
from natural4_server.task import run_tasks

Expand Down Expand Up @@ -120,29 +120,6 @@ async def process_csv(request: Request) -> HTTPResponse:

target_path = await save_csv(request, target_folder, time_now)

# Generate markdown files asynchronously in the background.
# uuiddir: anyio.Path = anyio.Path(uuid) / spreadsheet_id / sheet_id

# markdown_cmd: Sequence[str] = (
# natural4_exe,
# '--only', 'tomd', f'--workdir={natural4_dir}',
# f'--uuiddir={uuiddir}',
# f'{target_path}'
# )

# print(f'hello.py child: calling natural4-exe {natural4_exe} (slowly) for tomd', file=sys.stderr)
# print(f'hello.py child: {markdown_cmd}', file=sys.stderr)

# Coroutine which is awaited before pandoc is called to generate documents
# (ie word and pdf) from the markdown file.
# markdown_coro: Awaitable[asyncio.subprocess.Process] = (
# asyncio.subprocess.create_subprocess_exec(
# *markdown_cmd,
# stdout = asyncio.subprocess.PIPE,
# stderr = asyncio.subprocess.PIPE
# )
# )

# ---------------------------------------------
# call natural4-exe, wait for it to complete.
# ---------------------------------------------
Expand Down Expand Up @@ -194,16 +171,12 @@ async def process_csv(request: Request) -> HTTPResponse:
# ---------------------------------------------
timestamp, flowchart_tasks = await petri_post_process(target_folder)

# Slow tasks below.
# These are run in the background using app.add_background_task, which
# adds them to Sanic's event loop.

# ---------------------------------------------
# postprocessing:
# Use pandoc to generate word and pdf docs from markdown.
# ---------------------------------------------
app.add_task(pandoc_md_to_output(target_folder, timestamp, pandoc_outputs[0])) #docx
app.add_task(pandoc_md_to_output(target_folder, timestamp, pandoc_outputs[1])) #pdf
app.add_task(pandoc_md_to_output(target_folder, timestamp, pandoc_docx))
app.add_task(pandoc_md_to_output(target_folder, timestamp, pandoc_pdf))
# Concurrently peform the following:
# - Write natural4-exe's stdout to a file.
# - Write natural4-exe's stderr to a file.
Expand Down
17 changes: 0 additions & 17 deletions natural4-server/natural4_server/plugins/docgen/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +0,0 @@
import os
from collections.abc import AsyncGenerator

import aiostream

from natural4_server.task import Task

try:
from .pandoc_md_to_outputs import get_pandoc_tasks
except ImportError:

def get_pandoc_tasks(
# markdown_coro: Awaitable[asyncio.subprocess.Process],
uuid_ss_folder: str | os.PathLike,
timestamp: str | os.PathLike,
) -> AsyncGenerator[Task | None, None]:
return aiostream.stream.empty()
Original file line number Diff line number Diff line change
@@ -1,36 +1,32 @@
import asyncio
import os
import sys
from collections.abc import AsyncGenerator, Sequence
from dataclasses import dataclass
from typing import List

import anyio
import pypandoc
import pyrsistent as pyrs

from natural4_server.task import Task

@dataclass
class PandocOutput:
file_extension: str
extra_args: List[str]

class PandocOutput(pyrs.PRecord):
file_extension = pyrs.field(mandatory=True, type=str)
extra_args = pyrs.field(Sequence, initial=())

pandoc_docx = PandocOutput(file_extension="docx", extra_args=["-f", "markdown+hard_line_breaks", "-s"])

pandoc_outputs = [
PandocOutput(file_extension="docx", extra_args=("-f", "markdown+hard_line_breaks", "-s")),
PandocOutput(
file_extension="pdf",
extra_args=(
"--pdf-engine=xelatex",
"-V",
"CJKmainfont=Droid Sans Fallback",
"-f",
"markdown+hard_line_breaks",
"-s",
),
),
]

pandoc_path = pypandoc.get_pandoc_path()
pandoc_pdf = PandocOutput(
file_extension="pdf",
extra_args=[
"--pdf-engine=xelatex",
"-V",
"CJKmainfont=Droid Sans Fallback",
"-f",
"markdown+hard_line_breaks",
"-s",
],
)


async def pandoc_md_to_output(
Expand All @@ -43,15 +39,15 @@ async def pandoc_md_to_output(

if await md_file.exists():
match pandoc_output:
case {"file_extension": file_extension, "extra_args": extra_args}:
case PandocOutput(file_extension = file_extension, extra_args = extra_args):
outputpath: anyio.Path = uuid_ss_folder_path / file_extension
await outputpath.mkdir(parents=True, exist_ok=True)

timestamp_file: str = f"{timestamp}.{file_extension}"
outputfile: anyio.Path = outputpath / timestamp_file

pandoc_cmd = (
pandoc_path,
pypandoc.get_pandoc_path(),
"-o",
f"{outputfile}",
*extra_args,
Expand Down Expand Up @@ -83,17 +79,3 @@ async def pandoc_md_to_output(
await latest_file.symlink_to(timestamp_file)
case _:
pass


async def get_pandoc_tasks(
# markdown_coro: Awaitable[asyncio.subprocess.Process],
uuid_ss_folder: str | os.PathLike,
timestamp: str | os.PathLike,
) -> AsyncGenerator[Task, None]:
# markdown_proc: asyncio.subprocess.Process = await markdown_coro
# await markdown_proc.wait()

print("Markdown output done.", file=sys.stderr)

for output in pandoc_outputs:
yield Task(func=pandoc_md_to_output, args=(uuid_ss_folder, timestamp, output))
Loading