-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathannotate_fsummaries.py
174 lines (167 loc) · 8.9 KB
/
annotate_fsummaries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import os
import streamlit as st
import streamlit_nested_layout
from text_highlighter import text_highlighter
import glob
import json
import nltk
import csv
import random
query = st.query_params
format_1 = [1, 28, 18, 12, 22]
format_2 = [4, 7, 3, 6, 15, 23]
format_3 = [14, 21, 10, 30, 24]
to_download = ""
if "download" in query:
# We can download all files.
annotations = []
files = glob.glob(pathname="data/annotations/*/*")
for output_name in files:
with open(output_name, "r") as file:
st.write(output_name)
try:
annotations.append(json.loads(file.readline()))
except:
st.write("Failed")
continue
btn = st.download_button(
label="Download all annotations",
data=json.dumps(annotations, indent=2),
file_name="annotations.json",
)
elif any(["username" not in query, "summaryid" not in query]):
# display summarization guidelines
# load summarization guideline from guideline.md
guideline_name = "fsummary_guildline.md"
with open(guideline_name, "r") as f:
guideline = f.read()
st.markdown(guideline)
else:
st.set_page_config(layout="wide")
nltk.download('punkt')
username = query["username"]
summary_id = query["summaryid"]
peek = 0
if "peek" in query:
peek = query["peek"]
if 'clicked' not in st.session_state:
st.session_state.clicked = False
def clicked():
st.session_state.clicked = True
col1, col2 = st.columns(2)
# open the jsonl containing all source articles into a dictionary
# each line is a json contains two entries: "id" and "text"
with open(f"fsummaries.json", "r") as f:
source_articles = json.load(f)
# get the text of the article
story_id = source_articles[summary_id]['story-id']
if story_id not in format_1 + format_2 + format_3:
st.markdown("Wrong URL.")
else:
random.seed(story_id)
article_text = source_articles[summary_id]['story'].replace('\n', '\n\n')
if story_id in format_1 or story_id in format_3:
summary_text = source_articles[summary_id]['fsummary']
else:
summary_text = source_articles[summary_id]['summary']
actual_subj = source_articles[summary_id]['subj']
is_subj = source_articles[summary_id]['fsummary_subj']
themes = source_articles[summary_id]['fsummary_themes']
with col1.container(height=700):
with st.container():
st.markdown("### Story")
st.markdown(article_text)
st.markdown("---")
with col2.container(height=700):
with st.container():
outfolder = f"data/annotations/{username}"
os.makedirs(outfolder, exist_ok=True)
output_name = os.path.join(outfolder, f"{summary_id}.jsonl")
selected = dict()
selected = dict()
if peek == '1':
for i, line in enumerate(summary_text):
if is_subj[i] == 1:
if actual_subj[i][0] == is_subj[i]:
st.markdown(f":red[Theme {themes[i]}: {line}]")
else:
st.markdown(f":red[Objective swapped to Theme {themes[i]} Subjective, {line}]")
else:
if actual_subj[i][0] == is_subj[i]:
st.markdown(f":green[{line}]")
else:
st.markdown(f":green[Subjective Theme {actual_subj[i][1]} swapped to Objective: {line}]")
else:
if story_id in format_1:
st.markdown("### Summary")
st.markdown(" ".join(summary_text))
st.markdown(f"### Summary Evaluation")
st.markdown("For each line in the summary, evaluate if it is consistent with the story.")
st.markdown("Along with the selected input, you can provide an explanation as to why you selected a particular answer, *if you mark the line as inconsistent to the story*. When evaluating, remember that the events and details described in a consistent summary should not misrepresent details from the story or include details that are unsupported by the story.")
st.markdown("#### Answers")
for i, line in enumerate(summary_text):
st.markdown(f"Line {i+1}: {line}")
binary_choice_list = ["Yes", "No", "N/A, just commentary"]
selected[f"consistent_{i}"] = st.radio(
"Is this line in the summary consistent with the story?",
key=hash("consistent")+i,
options=binary_choice_list,
index=None,
)
if selected[f"consistent_{i}"] == "No":
selected[f"explanation_{i}"] = st.text_area("Provide an explanation for your selection.", key=hash("explanation")+i)
if story_id in format_2:
with open(f"storysumm_claim_level.json", "r") as f:
claims = json.load(f)
st.markdown("### Summary")
st.markdown(" ".join(summary_text))
st.markdown(f"### Summary Evaluation")
st.markdown("For some lines in the summary, choose between two potential candidates.")
for i, (line, is_subj) in enumerate(zip(summary_text, actual_subj)):
if is_subj[0] == 1 and is_subj[1] != 4:
obj_line = claims[f"{summary_id}_{i}"]["sentences"]["objective"]
if random.randint(0, 1) == 0:
st.markdown(f"Line {i+1}: {line}")
st.markdown(f"Alternate Line {i+1}: {obj_line}")
selected[f"alternate_{i}"] = "objective"
else:
st.markdown(f"Line {i+1}: {obj_line}")
st.markdown(f"Alternate Line {i+1}: {line}")
selected[f"alternate_{i}"] = "subjective"
binary_choice_list = ["Yes", "No"]
selected[f"consistent_{i}"] = st.radio(
"Would you swap the line with its alternate?",
key=hash("consistent")+i,
options=binary_choice_list,
index=None,
)
selected[f"explanation_{i}"] = st.text_area("Provide an explanation for your selection.", key=hash("explanation")+i)
if story_id in format_3:
st.markdown("### Summary")
st.markdown(" ".join(summary_text))
st.markdown(f"### Summary Evaluation")
st.markdown("For each line in the summary, highlight any portion that makes the line ambiguous with respect to the story. You can do so by clicking and dragging the cursor on the text.")
st.markdown("Along with the selected input, you can provide an explanation as to why you highlighted this portion as ambiguous.")
st.markdown("#### Answers")
for i, line in enumerate(summary_text):
selected[f"annotation_{i}"] = text_highlighter(
text=line,
labels=[("ambiguous", "red")],
annotations=[]
)
if selected[f"annotation_{i}"]:
selected[f"explanation_{i}"] = st.text_area("Provide an explanation for your selection.", key=hash("explanation")+i)
annotation = {
"id": summary_id,
"username": username,
"story": article_text,
"summary": summary_text,
"annotation": selected,
}
# create a submit button and refresh the page when the button is clicked
if st.button("Submit", on_click=clicked):
os.makedirs("data/annotations", exist_ok=True)
with open(output_name, "w") as f:
f.write(json.dumps(annotation) + "\n")
# display a success message
st.success("Annotation submitted successfully!")