-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgeneral_extractor.py
40 lines (30 loc) · 1.71 KB
/
general_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from os import listdir
import string
import re
casefolder = "/Users/wuguowei/Google Drive/Year 4 Sem 2/Artificial Intelligence/scrapelawnet/casesTXT/"
datafolder = "/Users/wuguowei/Google Drive/Year 4 Sem 2/Artificial Intelligence/scrapelawnet/data/"
#extracting level of court
#list of headers
headerlist = []
#counter
n=0
with open(datafolder+"data_court.txt", "w") as f: #opens a datafile
for filename in listdir(casefolder): #calling a txt file
if filename.endswith(".txt") and not filename.startswith(".DS"):
with open(casefolder + filename) as currentCase:
lines = currentCase.readlines() #linify case
courtdata = string.strip(string.strip(lines[4],"Tribunal/Court:" + "Tribunal/Court : ")) ###COURT RETRIEVE
if courtdata == "High Court":
courtresult = "H"
elif courtdata == "District Court":
courtresult = "D"
elif courtdata == "Magistrates Court":
courtrusult = "M"
else:
courtresult = "NA"
coramdata = lines[5].replace("Coram: ", "", 1) ### CORAM RETRIEVE
coramdata = coramdata.replace(" Coram : ", "", 1)
##continue to add the headers
f.write(str(n)+","+courtresult+","+coramdata + "\n")
print filename
n += 1