Skip to content

Commit

Permalink
Added sentence counter
Browse files Browse the repository at this point in the history
  • Loading branch information
valearna committed Dec 14, 2023
1 parent ac929f4 commit 90e9c8b
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 9 deletions.
10 changes: 7 additions & 3 deletions src/backend/api/endpoints/curator_dashboard.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
import re
from collections import Counter

import requests
import numpy as np
Expand Down Expand Up @@ -179,10 +180,13 @@ def get_text_from_pdfs(self, paper_id):
sentences = [sentence for sentence in sentences if len(sentence) > 20 and len(sentence.split(" ")) > 2]
paper.abstract = paper.abstract if paper.abstract else ""
paper.title = paper.title if paper.title else ""
counter = Counter(sentences)
sentences = sorted(list(set(sentences)))
counter_list = [counter[sentence] for sentence in sentences]
res = requests.post(f"{os.environ['SENTENCE_CLASSIFICATION_API']}/api/sentence_classification/"
f"classify_sentences",
{"sentences": sentences})
return fulltext, sentences, json.dumps(res.json()["classes"])
return fulltext, sentences, counter_list, json.dumps(res.json()["classes"])

def on_post(self, req, resp, req_type):
with self.db:
Expand Down Expand Up @@ -282,10 +286,10 @@ def on_post(self, req, resp, req_type):
resp.body = '{{"afp_comments": {}}}'.format(comments)
resp.status = falcon.HTTP_200
elif req_type == "converted_text":
fulltext, sentences, classes = self.get_text_from_pdfs(paper_id)
fulltext, sentences, counters, classes = self.get_text_from_pdfs(paper_id)
sentences = ["\"" + sentence + "\"" for sentence in sentences]
resp.body = (f'{{"fulltext": "{fulltext}", "sentences": [{", ".join(sentences)}],'
f' "classes": {classes}}}')
f' "counters": {counters}, "classes": {classes}}}')
resp.status = falcon.HTTP_200
else:
raise falcon.HTTPError(falcon.HTTP_NOT_FOUND)
Expand Down
4 changes: 2 additions & 2 deletions src/frontend/curator_dashboard/src/lib/file.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ export const downloadCSVSpreadsheet = async (paperID) => {
}

export const downloadSentenceClassificationCSV = async (paperID, data, dataType) => {
let formContent = "SENTENCE\tHAS_ALL_INFO_FOR_CURATION\tIS_CURATABLE\tCONTAINS_LANGUAGE\n";
data.sentences.forEach((sentence, idx) => formContent += "\"" + sentence + "\"\t" + Boolean(data.classes[dataType]['all_info'][idx]) + "\t" + Boolean(data.classes[dataType]['curatable'][idx]) + "\t" + Boolean(data.classes[dataType]['language'][idx]) + "\t\n");
let formContent = "SENTENCE\tCOUNTER\tHAS_ALL_INFO_FOR_CURATION\tIS_CURATABLE\tCONTAINS_LANGUAGE\n";
data.sentences.forEach((sentence, idx) => formContent += "\"" + sentence + "\"\t" + data.counters[idx] + "\t" + Boolean(data.classes[dataType]['all_info'][idx]) + "\t" + Boolean(data.classes[dataType]['curatable'][idx]) + "\t" + Boolean(data.classes[dataType]['language'][idx]) + "\t\n");
downloadFile(formContent, "Sentence_level_classification_" + dataType + "_" + paperID, "text/plain", "csv");
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,12 @@ const SentenceClassification = () => {
<br/>
{queryRes.data.data.fulltext ?
<Table>
{queryRes.data.data.sentences.filter((sent, idx) =>
queryRes.data.data.classes[dataType][classifierType][idx] === resultType).map(sent =>
<th>Sentence</th><th>Counter</th>
{queryRes.data.data.sentences.map((sent, idx) =>
queryRes.data.data.classes[dataType][classifierType][idx] === resultType ?
<tr>
<td>{sent}</td>
</tr>)
<td>{sent}</td><td>{queryRes.data.data.counters[idx]}</td>
</tr> : null )
}
</Table>
: null
Expand Down

0 comments on commit 90e9c8b

Please sign in to comment.