-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimple_baseline.py
71 lines (52 loc) · 2.54 KB
/
simple_baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import csv
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
import spacy
import pytextrank
def clean_text(text):
sentences = sent_tokenize(text)
words = [word_tokenize(sentence.lower()) for sentence in sentences]
stop_words = set(stopwords.words('english'))
cleaned_words = [
[word for word in sentence if word.isalpha() and word not in stop_words]
for sentence in words
]
return cleaned_words
def text_rank(input, k):
en_nlp = spacy.load("en_core_web_sm")
en_nlp.add_pipe("textrank", config={"stopwords": {"word": ["NOUN"]}})
doc = en_nlp(input)
tr = doc._.textrank
output = ""
print(tr)
print(tr.elapsed_time)
for sent in tr.summary(limit_phrases=10, limit_sentences=k):
output += f" {sent}"
# for phrase in doc._.phrases[:5]:
# print(phrase)
# print(f"output: {output}")
return output
text = '''India recorded its lowest daily Covid-19 cases in over four months on Tuesday as it
registered 30,093 fresh cases of the coronavirus disease, the Union ministry of health and
family welfare data showed. The last time India's Covid-19 tally was below 30,000-mark was on
March 16 when the country saw 28,903 fresh cases.
The country also saw 374 deaths due to Covid-19 in the last 24 hours, taking the death toll to 414,482. This is also the lowest death count India has seen after over three months. India witnessed deaths below 400 on March 30 when 354 fatalities were recorded.
Active cases of Covid-19 in the last 24 hours dipped sharply by 15,535, bringing the current infections in the country down to 406,130, the health ministry data showed. These account for 1.35% of the total infections reported in the country.
At least 45,254 people recovered from the infectious disease in the last 24 hours, taking India's recovery rate to 97.32%.'''
def run():
file_path = 'kindle_reviews.csv'
output_file_path = 'evaluation_output.txt'
with open(file_path, 'r') as csv_file:
csv_reader = csv.DictReader(csv_file)
lines_to_read = 5
with open(output_file_path, 'w') as output_file:
# Accessing rows by column names
for line_num, row in enumerate(csv_reader):
if line_num > lines_to_read:
break
# Accessing individual column values by column names
review_text = row['reviewText']
output_file.write("\n\n" + review_text)
output_file.write("\n" + text_rank(review_text, 1))
run()