-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind_terrorist
executable file
·47 lines (38 loc) · 1.22 KB
/
find_terrorist
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#! /usr/bin/env python
import argparse
import json
import nltk
import sys
def main(args):
parser = argparse.ArgumentParser(
description="Frame someone as a terrorist.",
)
# TODO: Make it get data from stdin if this is not supplied
parser.add_argument(
'-s', '--source',
help='Get data from this file.',
dest='source',
required=True,
)
options = parser.parse_args(args)
with open(options.source) as fh:
data = json.load(fh)
all_sentences = {}
for pos in range(0,len(data)):
sentences = nltk.sent_tokenize(data[pos])
sentences = [sent.strip() for sent in sentences]
for sentence in sentences:
if sentence not in all_sentences:
all_sentences[sentence] = []
all_sentences[sentence].append(pos)
# Get the list of suspect terms
with open('suspect') as fh:
suspect_terms = fh.readlines()
suspect_terms = [term.strip() for term in suspect_terms]
for sentence in all_sentences.keys():
for term in suspect_terms:
if term in sentence.lower():
print(sentence)
print('-----')
if __name__ == '__main__':
main(sys.argv[1:])