Skip to content

Commit

Permalink
pardeditor: add document import via Pandoc (#3697)
Browse files Browse the repository at this point in the history
* Add editor tab for Pandoc import

* Add function to upload document file and import file content to editor

* Add variable to filter files allowed for Pandoc conversion

* Add file extension to mimetype map for Pandoc conversion

* Add route for importing document content from uploaded file

* Remove PDF from supported formats

* Update browser test images

* Update browser test images

* Add mimetypes for OpenDocument format files

* pandoc import: add error handling, clean up

* Add automatic uploads for images contained in docx and odt files [skip ci]

* Add db entries for automatically uploaded images

* Add image list to end of document only if imported document had images

* Add instructions on the document import tab

* Move image/file upload and document import under a common Upload main tab, localize document import instructions

* Update localizations

* Update test screenshots

---------

Co-authored-by: dezhidki <[email protected]>
  • Loading branch information
saviit and dezhidki authored Oct 4, 2024
1 parent 9990177 commit d5ded20
Show file tree
Hide file tree
Showing 10 changed files with 332 additions and 14 deletions.
99 changes: 98 additions & 1 deletion timApp/document/editing/routes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
"""Routes for editing a document."""
import re
import os
import secrets
import tempfile
import zipfile
from dataclasses import field

from flask import Blueprint, render_template
Expand All @@ -19,6 +23,8 @@
verify_seeanswers_access,
has_edit_access,
verify_route_access,
AccessDenied,
verify_logged_in,
)
from timApp.auth.get_user_rights_for_item import get_user_rights_for_item
from timApp.auth.sessioninfo import (
Expand Down Expand Up @@ -48,6 +54,7 @@
from timApp.document.version import Version
from timApp.document.viewcontext import ViewRoute, ViewContext, default_view_ctx
from timApp.document.yamlblock import YamlBlock
from timApp.item.block import BlockType
from timApp.item.validation import validate_uploaded_document_content
from timApp.markdown.markdownconverter import md_to_html
from timApp.notification.notification import NotificationType
Expand All @@ -60,7 +67,6 @@
# from timApp.timdb.dbaccess import get_timdb
from timApp.timdb.exceptions import TimDbException
from timApp.timdb.sqa import db, run_sql
from timApp.upload.uploadedfile import UploadedFile
from timApp.util.flask.requesthelper import (
verify_json_params,
use_model,
Expand All @@ -71,6 +77,16 @@
from timApp.util.utils import get_error_html
from tim_common.marshmallow_dataclass import dataclass

from timApp.upload.upload import ALLOWED_PANDOC_EXTENSIONS, save_file_and_grant_access
from timApp.upload.uploadedfile import (
is_script_safe_mimetype,
ALLOWED_DOC_IMPORT_EXT_MIMETYPES,
UploadedFile,
)
from pypandoc import convert_file
from timApp.util.utils import temp_folder_path


edit_page = Blueprint("edit_page", __name__, url_prefix="") # TODO: Better URL prefix.


Expand Down Expand Up @@ -1122,3 +1138,84 @@ def set_drawio_base(args: DrawIODataModel):
plug.values["data"] = data
save_plugin(plug, max_attr_width=float("inf"))
return ok_response()


@dataclass
class ImportDocumentModel:
doc_id: int


@dataclass
class ImportedImageFile:
filename: str


@edit_page.post("/importDocFile")
@use_model(ImportDocumentModel)
def import_document_from_file(m: ImportDocumentModel) -> Response:
verify_logged_in()
d = DocEntry.find_by_id(m.doc_id)
verify_edit_access(d)

file = request.files.get("file")
if file is None:
raise RouteException("Missing file")

filetype = file.filename.split(".")[-1]
expected_mimetype = ALLOWED_DOC_IMPORT_EXT_MIMETYPES.get(filetype)

if expected_mimetype is None or not is_script_safe_mimetype(file.mimetype):
raise RouteException("Unsupported file.")

# Basic sanity check
if file.mimetype != expected_mimetype:
raise RouteException("Invalid file: file type does not match mimetype.")

# Save the file to disk temporarily, so we can give it to Pandoc
tmp_dir = temp_folder_path.as_posix()
fd, name = tempfile.mkstemp(suffix=f".{filetype}", dir=tmp_dir)
file.save(name)

# Convert file with Pandoc and return the content
try:
content = convert_file(name, format=filetype, to="md", sandbox=True)
except RuntimeError as e:
raise RouteException(f"Could not convert file. {e}")
data = {"file": content}

# If the document contains embedded image files, extract those,
# upload them and add references to them to the end of the document content.
# Currently only supported for .docx and .odt files
if filetype in ["docx", "odt"]:
img_pat = re.compile(
r"(word/media/|Pictures/).+\.(png|jpg|jpeg|gif|bmp|tif|tiff|tga)"
)
uploaded_images = []
with zipfile.ZipFile(name, "r") as zf:
img_list = list(
filter(lambda img_name: img_pat.match(img_name), zf.namelist())
)
if img_list:
for img in img_list:
imagefile = ImportedImageFile(filename=img.split("/")[-1])

img_upload = save_file_and_grant_access(
d,
content=zf.read(img),
file=imagefile,
block_type=BlockType.from_str("image"),
)
db.session.commit()
uploaded_images.append(img_upload)

if uploaded_images:
data["file"] += f"\n----------------------------------------\n"
data["file"] += f"Images contained in the document file:\n"

for u in uploaded_images:
data["file"] += f"\n![{u.filename}](/images/{u.id}/{u.filename})"

# Delete the temporary file
os.remove(name)

return json_response(data)
40 changes: 40 additions & 0 deletions timApp/i18n/messages.fi.xlf
Original file line number Diff line number Diff line change
Expand Up @@ -9056,6 +9056,46 @@
<context context-type="linenumber">752</context>
</context-group>
</trans-unit>
<trans-unit id="2781878696015708257" datatype="html">
<source>Document import instructions</source>
<target state="translated">Asiakirjojen tuontiohjeet</target>
<context-group purpose="location">
<context context-type="sourcefile">static/scripts/tim/editor/pareditor.ts</context>
<context context-type="linenumber">222</context>
</context-group>
</trans-unit>
<trans-unit id="2931024279077252601" datatype="html">
<source>You can use this tab to import document files directly to editable text. Click on 'Browse...' to select a document to import.</source>
<target state="translated">Voit käyttää tämän välilehden toimintoa tuodaksesi dokumenttitiedoston sisällön editoitavaksi tekstiksi. Klikkaa 'Browse...' -painiketta valitaksesi tuotavan tiedoston.</target>
<context-group purpose="location">
<context context-type="sourcefile">static/scripts/tim/editor/pareditor.ts</context>
<context context-type="linenumber">223</context>
</context-group>
</trans-unit>
<trans-unit id="1063006754352868679" datatype="html">
<source>Currently supported document formats: Microsoft Word (.docx), OpenOffice/LibreOffice Writer (.odt), Markdown (.md), TeX/LaTeX document (.tex), raw text (.txt).</source>
<target state="translated">Tuetut dokumenttiformaatit: Microsoft Word (.docx), OpenOffice/LibreOffice Writer (.odt), Markdown (.md), TeX/LaTeX-dokumentti (.tex), raakateksti (.txt).</target>
<context-group purpose="location">
<context context-type="sourcefile">static/scripts/tim/editor/pareditor.ts</context>
<context context-type="linenumber">224</context>
</context-group>
</trans-unit>
<trans-unit id="6834985917377711936" datatype="html">
<source>Document styles are not imported. You may need to correct formatting and/or styles manually.</source>
<target state="translated">Asiakirjan tyylejä ei tuoda. Voit joutua korjaamaan muotoiluja ja/tai tyylejä käsin.</target>
<context-group purpose="location">
<context context-type="sourcefile">static/scripts/tim/editor/pareditor.ts</context>
<context context-type="linenumber">225</context>
</context-group>
</trans-unit>
<trans-unit id="3187018401189234278" datatype="html">
<source>Images embedded in the document are automatically uploaded. You will find the appropriate image references at the end of the imported content.</source>
<target state="translated">Asiakirjaan upotetut kuvat ladataan automaattisesti. Löydät asianmukaiset kuvaviittaukset tuodun sisällön lopusta.</target>
<context-group purpose="location">
<context context-type="sourcefile">static/scripts/tim/editor/pareditor.ts</context>
<context context-type="linenumber">226</context>
</context-group>
</trans-unit>
<trans-unit id="8875063726542017107" datatype="html">
<source>Could not finish uploading. Please check your internet connection and try again.</source>
<target state="translated">Latausta ei voitu viimeistellä. Tarkista Internet-yhteys ja kokeile ladata tiedosto uudelleen.</target>
Expand Down
40 changes: 40 additions & 0 deletions timApp/i18n/messages.sv.xlf
Original file line number Diff line number Diff line change
Expand Up @@ -8958,6 +8958,46 @@
<context context-type="linenumber">752</context>
</context-group>
</trans-unit>
<trans-unit id="2781878696015708257" datatype="html">
<source>Document import instructions</source>
<target state="translated">Instruktioner för import av dokument</target>
<context-group purpose="location">
<context context-type="sourcefile">static/scripts/tim/editor/pareditor.ts</context>
<context context-type="linenumber">222</context>
</context-group>
</trans-unit>
<trans-unit id="2931024279077252601" datatype="html">
<source>You can use this tab to import document files directly to editable text. Click on 'Browse...' to select a document to import.</source>
<target state="translated">Du kan använda den här fliken för att importera dokumentfiler direkt till redigerbar text. Klicka på "Bläddra..." för att välja ett dokument att importera.</target>
<context-group purpose="location">
<context context-type="sourcefile">static/scripts/tim/editor/pareditor.ts</context>
<context context-type="linenumber">223</context>
</context-group>
</trans-unit>
<trans-unit id="1063006754352868679" datatype="html">
<source>Currently supported document formats: Microsoft Word (.docx), OpenOffice/LibreOffice Writer (.odt), Markdown (.md), TeX/LaTeX document (.tex), raw text (.txt).</source>
<target state="translated">Dokumentformat som stöds för närvarande: Microsoft Word (.docx), OpenOffice/LibreOffice Writer (.odt), Markdown (.md), TeX/LaTeX-dokument (.tex), råtext (.txt).</target>
<context-group purpose="location">
<context context-type="sourcefile">static/scripts/tim/editor/pareditor.ts</context>
<context context-type="linenumber">224</context>
</context-group>
</trans-unit>
<trans-unit id="6834985917377711936" datatype="html">
<source>Document styles are not imported. You may need to correct formatting and/or styles manually.</source>
<target state="translated">Dokumentstilar importeras inte. Du kan behöva korrigera formateringen och/eller stilarna manuellt.</target>
<context-group purpose="location">
<context context-type="sourcefile">static/scripts/tim/editor/pareditor.ts</context>
<context context-type="linenumber">225</context>
</context-group>
</trans-unit>
<trans-unit id="3187018401189234278" datatype="html">
<source>Images embedded in the document are automatically uploaded. You will find the appropriate image references at the end of the imported content.</source>
<target state="translated">Bilder som är inbäddade i dokumentet laddas upp automatiskt. Du hittar lämpliga bildreferenser i slutet av det importerade innehållet.</target>
<context-group purpose="location">
<context context-type="sourcefile">static/scripts/tim/editor/pareditor.ts</context>
<context context-type="linenumber">226</context>
</context-group>
</trans-unit>
<trans-unit id="8875063726542017107" datatype="html">
<source>Could not finish uploading. Please check your internet connection and try again.</source>
<target state="new">Could not finish uploading. Please check your internet connection and try again.</target>
Expand Down
2 changes: 2 additions & 0 deletions timApp/static/scripts/tim/document/editing/editing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,9 @@ This will delete the whole ${
(If you only want to remove selected text, use backspace.)`,
localSaveTag: options.localSaveTag ?? "par",
showDelete: options.showDelete,
showUpload: true,
showImageUpload: true,
showDocumentImport: true,
showPlugins: true,
cursorPosition: cursorPos,
showSettings:
Expand Down
2 changes: 2 additions & 0 deletions timApp/static/scripts/tim/document/notes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,9 @@ export class NotesHandler {
caption,
localSaveTag: "note",
showDelete: !!options.noteData,
showUpload: true,
showImageUpload: true,
showDocumentImport: false,
showSettings: false,
tags: [],
showPlugins: false,
Expand Down
80 changes: 79 additions & 1 deletion timApp/static/scripts/tim/editor/pareditor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ export interface IEditorParams {
showDelete: boolean;
showPlugins: boolean;
showSettings: boolean;
showUpload: boolean;
showImageUpload: boolean;
showDocumentImport: boolean;
touchDevice: boolean;
tags: ITag[];
choices?: IChoice[];
Expand Down Expand Up @@ -212,6 +214,14 @@ export interface ISpellWordInfo {
suggestions: string[];
}

class DocumentImportHelp {
heading: string = $localize`Document import instructions`;
shortHelp: string = $localize`You can use this tab to import document files directly to editable text. Click on 'Browse...' to select a document to import.`;
formats: string = $localize`Currently supported document formats: Microsoft Word (.docx), OpenOffice/LibreOffice Writer (.odt), Markdown (.md), TeX/LaTeX document (.tex), raw text (.txt).`;
styles: string = $localize`Document styles are not imported. You may need to correct formatting and/or styles manually.`;
images: string = $localize`Images embedded in the document are automatically uploaded. You will find the appropriate image references at the end of the imported content.`;
}

export class PareditorController extends DialogController<
{params: IEditorParams},
IEditorResult
Expand Down Expand Up @@ -282,6 +292,7 @@ export class PareditorController extends DialogController<
private currentSymbol: FormulaEvent = {
text: "",
};
docImportHelp: DocumentImportHelp;

constructor(protected element: JQLite, protected scope: IScope) {
super(element, scope);
Expand Down Expand Up @@ -1056,6 +1067,10 @@ ${backTicks}
name: "Upload",
entries: [],
},
{
name: "Import document",
entries: [],
},
];

$(document).on(
Expand All @@ -1071,6 +1086,7 @@ ${backTicks}
this.outofdate = false;
this.parCount = 0;
this.touchDevice = false;
this.docImportHelp = new DocumentImportHelp();
}

getEditor() {
Expand All @@ -1082,14 +1098,24 @@ ${backTicks}
// it has special content that cannot be placed under "extra".

return this.tabs.filter(
(tab) => (!tab.show || tab.show()) && tab.name !== "Upload"
(tab) =>
(!tab.show || tab.show()) &&
tab.name !== "Upload" &&
tab.name !== "Import document"
);
}

getUploadMainTab() {
return this.findTab("upload_main_tab");
}
getUploadTab() {
return this.findTab("upload");
}

getPandocTab() {
return this.findTab("pandoc");
}

findTab(name: string) {
return this.tabs.find(
(tab) => tab.name.toLowerCase() === name.toLowerCase()
Expand Down Expand Up @@ -2257,6 +2283,58 @@ ${backTicks}
}
}

async onFileSelectForPandoc(file: File) {
const editor = this.editor!;
await this.focusEditor();
this.file = file;
const editorText = editor.getEditorText();

// const selectionRange = editor.getPosition(); // Selected area in the editor

if (file) {
this.file.progress = 0;
this.file.error = undefined;
const upload = $upload.upload<{file: string}>({
data: {
doc_id: this.getExtraData().docId.toString(),
file,
},
method: "POST",
url: "/importDocFile",
});
upload.progress((evt) => {
if (this.file) {
this.file.progress = Math.min(
100,
Math.floor((100.0 * evt.loaded) / evt.total)
);
}
});

const result = await to(upload);

if (result.ok) {
const response = result.result;
$timeout(() => {
// For now, just append the converted file content to the end of the current paragraph
// Images embedded in the imported document are automatically uploaded and image links for them
// are added to the end of the document
const convertedDoc = response.data.file;
editor.setPosition([
editorText.length - 1,
editorText.length - 1,
]);
editor.insertTemplate(`\n${convertedDoc}\n`);
});
} else {
const response = result.result;
if (this.file) {
this.file.error = response.data.error;
}
}
}
}

async putTemplate(data: string) {
await this.focusEditor();
data = await replaceTemplateValues(data);
Expand Down
2 changes: 2 additions & 0 deletions timApp/static/scripts/tim/editor/pareditorOpen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ export async function openEditorSimple(
choices: undefined,
localSaveTag: localSaveTag,
showDelete: false,
showUpload: true,
showImageUpload: true,
showDocumentImport: true,
showPlugins: false,
showSettings: false,
tags: [],
Expand Down
Loading

0 comments on commit d5ded20

Please sign in to comment.