Skip to content

Commit

Permalink
Added removal of tmp docx files from file system
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidMockler committed Jul 23, 2024
1 parent bb5a4e0 commit bf80f31
Showing 1 changed file with 17 additions and 21 deletions.
38 changes: 17 additions & 21 deletions api/src/routes/digitalReader.route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,55 +6,51 @@ const makeEndpoints = require('../utils/makeEndpoints');
const nodePandoc = require('node-pandoc-promise');

const express = require('express');
var multer = require('multer');
var path = require('path')
const multer = require('multer');
const path = require('path');

const fs = require('node:fs');

const multerStorage = multer.diskStorage({
destination: function (req, file, cb) {
cb(null, './public/temp/uploads/')
},
filename: function (req, file, cb) {
//const nameLessExt = file.fieldname + '_uploaded_'
const ext = path.extname(file.originalname)
cb(null, 'upload_' + Date.now() + ext)
}
})

var upload = multer({
const upload = multer({
storage: multerStorage
});

const digitalReaderRoute = express.Router();

//digitalReaderRoute.route('/docx2html').post(require('../endpoint/digitalReader/docx2html'))

//TODO : Refactor all this into a service (?), return the converted html file and return it to be used in a separate segmentation call.
digitalReaderRoute.route('/docx2html').post(upload.single("docx"), async function (req, res) {

//console.log(req.file);
//TODO : Factor some of this functionality out into services and individual endpoint files (?)


digitalReaderRoute.route('/docx2html').post(upload.single("docx"), async function (req, res) {

const pathToFile = req.file.path;
//console.log(pathToFile)

//const pandocArgs = '-f docx -t html5 --standalone --embed-resources --wrap=none --no-highlight';
const pandocArgs = ['-f', 'docx', '-t', 'html5', '--standalone', '--embed-resources', '--wrap=none', '--no-highlight'];

const htmlOutput = await nodePandoc(pathToFile, pandocArgs);
//console.log(htmlOutput)

/*// call to sentence segmentation API
const segmentedSentences = [] //[{text: 'Test Document'}]
// call to POS tagger
const segmentedWords = [] //[{text: 'Test', pos:{}}, {text: 'Document',pos:{}}]
const segmentedHtml = segmentBody(htmlOutput, segmentedSentences, segmentedWords)
console.log(segmentedHtml)*/

//res.json(segmentedHtml)
res.json(htmlOutput)

//TODO : add removal of temporary file / remove storing of file altogether if possible
fs.unlink(pathToFile, function (err) {
if (err) {
console.error('Problem removing docx file from file system')
console.error(err)
} else {
console.log('Successfully removed tmp docx file from file system')
}
})
})

digitalReaderRoute.route('/segment-html').post(/*upload.single("docx"), */async function (req, res) {
Expand Down

0 comments on commit bf80f31

Please sign in to comment.