-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathComparison.py
119 lines (97 loc) · 3.91 KB
/
Comparison.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import csv
import pandas as pd
def createDisconnectedOutputFile(file_name2):
df = pd.read_csv(file_name2)
df.drop(columns=['CallCompletedTimeStamp','ExactBillingDurationInSeconds',
'RoundedBillingDurationInMinutes']
, axis=1, inplace=True)
for index,row in df.iterrows():
if row[1] == "No_Answer":
df = df.drop([index], axis=0)
elif row[1] == "Answering_Machine":
df = df.drop([index], axis=0)
elif row[1] == "Disconnected5408":
df = df.drop([index], axis=0)
else:
None
df.drop(columns=['Response'], axis=1, inplace=True)
df.drop_duplicates(keep= 'first', ignore_index=True, inplace = True)
df.to_csv("DISCONNECTED_PHONE_NUMBERS.csv", index = False)
def csv_toDictionary(file_name):
#create empty dictionary for storing
dictA = {}
with open(file_name) as inputfile:
reader = csv.reader(inputfile)
#skip Header
next(reader)
for row in reader:
#Assign the phoneNumber to the key and HouseHoldID as value pair (phoneNumber:HouseHoldID)
dictA[row[1]] = row[0]
return dictA
def createSuccessfulVoicemailFile(file_name2, dictA, filename):
if filename.endswith(".csv"):
None
else:
filename = filename + ".csv"
#delete unnecessary columns
df = pd.read_csv(file_name2)
df.drop(
columns = ['ExactBillingDurationInSeconds','RoundedBillingDurationInMinutes'], axis=1, inplace=True)
#delete rows with NaN
df = df.dropna()
#delete No_Answer records and Diconnected Records
for index,row in df.iterrows():
if row[2] == "No_Answer":
df = df.drop([index], axis=0)
elif row[2] == "Disconnected5404":
df = df.drop([index], axis=0)
elif row[2] == "Disconnected5408":
df = df.drop([index], axis=0)
else:
None
df.drop(
columns = ['Response'], axis=1, inplace=True)
#Reformat timeStamp column
df['CallCompletedTimeStamp'] = pd.to_datetime(df['CallCompletedTimeStamp'])
#Sort by Most Recent Timestamp
df = df.sort_values(by='CallCompletedTimeStamp' , axis = 0, ascending= False, ignore_index= True)
#keep most recent timeStamp ,delete the non recent timestamps
df = df.drop_duplicates(subset = ['PhoneNumberDialed'], keep='first')
#find timestamp for phonenumbers with prefix
for index, row in df.iterrows():
#check if the number is in the list
phonenumber = str(row[1])[1:]
dialedList = df['PhoneNumberDialed'].tolist()
dialedMap = map(str, dialedList)
if phonenumber in dialedMap:
#find index of matching phonenumber
for i, r in df.iterrows():
if str(r[1]) == phonenumber:
#update master record with max timestamp
df.iloc[i,0] = max(df.iloc[i,0],df.iloc[index,0])
#drop nonmaster record
df = df.drop(index, axis=0)
#delete records that have the same number with 1 in front
#for index, row in df.iterrows():
# #check if the number is in the list
# phonenumber = str(row[1])[1:]
# dialedList = df['PhoneNumberDialed'].tolist()
# dialedMap = map(str, dialedList)
# if phonenumber in dialedMap:
# df = df.drop(index, axis=0)
#get the corresponding householdID from dictionary
householdID = []
for index, row in df.iterrows():
keyexist = dictA.get(str(row[1]), False)
if keyexist:
#add to householdID list
householdID.append(dictA.get(str(row[1])))
else:
#else not a key
None
#append householdID list to csv
df.insert(0, 'HouseholdID', householdID)
#delete PhoneNumberDialed and Response columns
df = df.drop(['PhoneNumberDialed'], axis = 1)
#convert into a csv
df.to_csv(filename, index=False)