-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgrep.py
26 lines (21 loc) · 1.06 KB
/
grep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from bs4 import BeautifulSoup
from os import listdir
# print """
# For fact paragraphs, type HN-Facts.
# For holding paragraphs, type HN-Held.
# For judgement paragraphs, type Judg-1.
# """
# searchTerm = raw_input("Which paragraphs do you want to search for? ")
searchTerm = ["misappropriated", "appeal", "dedication", "contribution", "compassion", "first-time", "deterrence", "public institution", "substantial", "premeditated", "fine"]
with open("/Users/jobchong/git/scrapelawnet/Output.txt", "w") as f:
for filename in listdir("/Users/jobchong/git/scrapelawnet/"):
if filename.endswith(".html"):
with open("/Users/jobchong/git/scrapelawnet/" + filename) as currentCase:
soup = BeautifulSoup(currentCase, "html.parser")
paras = soup.find_all("p")
for para in paras:
parastrip = para.get_text()
for i in searchTerm:
if i in parastrip:
f.write(parastrip.encode("utf-8"))
f.write("\n")