-
Notifications
You must be signed in to change notification settings - Fork 0
/
stat_summary.py
64 lines (56 loc) · 2.4 KB
/
stat_summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import sys, os
import pandas as pd
#set paths
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
datasets_path = os.path.abspath(cwd)+"/eatiht/datasets/"
stance_path = os.path.abspath(cwd)+"/eatiht/stance_class/"
path = cwd+"/src/fnc-1/deep_learning_model/"
#four initial datasets, with list of sources
emergent = pd.read_csv(datasets_path+"emergent.csv", sep="\t")
factcheck = pd.read_csv(datasets_path+"factcheck.csv", sep="\t")
politifact = pd.read_csv(datasets_path+"politifact.csv", sep="\t")
snopes = pd.read_csv(datasets_path+"snopes.csv", sep="\t")
tof = pd.read_csv(datasets_path+"tof.csv", sep="\t")
print("Initial datasets sizes, [dataset].csv files, generated by c_[dataset].py:\n")
print("Emergent: ",emergent.shape)
print("Factcheck: ",factcheck.shape)
print("Politifact: ",politifact.shape)
print("Snopes: ",snopes.shape)
print("Tof: ",tof.shape)
#print("\nChecking for null entries:")
#print(emergent.isnull().any())
#print(factcheck.isnull().any())
#print(politifact.isnull().any())
#print(snopes.isnull().any())
#generate file for stance classification (acquire the body of each link in the list)
db = pd.read_csv(stance_path+"pred_bodies.csv", sep="\t")
df = pd.read_csv(stance_path+"pred_stances.csv", sep="\t")
print("\nAfter retrieving the body for each origin candidate, generateStanceCSV.py writes them to pred_bodies.csv and pred_stances.csv: ")
print(len(df))
print(len(db))
#print("\nChecking for null entries:")
#print(df.isnull().any())
#print(db.isnull().any())
#post stance classification
stance = pd.read_csv(cwd+"/src/results/result_stance.csv", sep=',')
print("\nResults size:")
print(stance.shape)
print("\nNot null results size:")
print(stance[stance.Agree.notnull()].shape)
print(stance['Body ID'].unique().shape)
print("\nUnique results size:")
print(stance['Headline'].unique().shape)
#final documents "*_content.c"
e = pd.read_csv(datasets_path+"emergent_content.csv", sep="\t")
f = pd.read_csv(datasets_path+"factcheck_content.csv", sep="\t")
p = pd.read_csv(datasets_path+"politifact_content.csv", sep="\t")
s = pd.read_csv(datasets_path+"snopes_content.csv", sep="\t")
t = pd.read_csv(datasets_path+"tof_content.csv", sep="\t")
tofe = pd.read_csv(datasets_path+"tof_e_content.csv", sep="\t")
print("\nSize of [dataset]_content.csv: ")
print("Emergent: ",e.shape)
print("Factcheck: ",f.shape)
print("Politifact: ",p.shape)
print("Snopes: ",s.shape)
print("Tof: ",t.shape)
print("Tof Examples: ",tofe.shape)