-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathModel - Voice.R
81 lines (58 loc) · 4.38 KB
/
Model - Voice.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
######################################################################
# Project: DSAT Prediction #
# Author : Ramya Ananth #
# Code : model for the voice data #
######################################################################
#Headers#
##Comments
#####Code for model on the voice data#####
#Set the library where the input data is available. The code exports the final data to the same location#
setwd("C:\\Users\\ramya.ananth\\workspace\\text_mining\\DemoModelScoring")
#Start:Importing the Data#
EntireData<-as.data.frame(read.csv("Analytical_data_version2B.csv",header=T,na.strings=c("" , "NA" , "-"),stringsAsFactors = TRUE))
#End:Importing the Data#
#Start:Subsetting the Data#
##The data is subsetted to the records where the current model is applicable to.
Voice<-EntireData[which(EntireData$C_RNT_TotalTouchPointChat_D_INT_C ==0 & EntireData$C_RNT_TotalTouchPointEmail_D_INT_C ==0 ),]
#End:Subsetting the Data#
#Start: Data Preparation#
##A new dataframe is created which has only the variables required by the model.
##It helps create a concise output which could be used to study the model or do QC.
##Creating a table. Adding Contact ID, Reference ID and relevant variables for the model
Voice2 <- as.data.frame(Voice$B_RNT_ContactId_A_Category_N)
Voice2$B_RNT_ContactId_A_Category_N <- Voice$`Voice$B_RNT_ContactId_A_Category_N`
Voice2$`Voice$B_RNT_ContactId_A_Category_N` <- NULL
Voice2$B_RNT_ContactId_A_Category_N <- Voice$B_RNT_ContactId_A_Category_N
Voice2$B_RNT_ReferenceId_A_Category_N <- Voice$B_RNT_ReferenceId_A_Category_N
Voice2$B_TXT_CurrentEscalationFlag_A_Category_O <- as.factor(Voice$B_TXT_CurrentEscalationFlag_A_Category_O)
##Bucketing and creating dummies
Voice2$B_ACOUS_MaxHoldTime_A_Sec_C_High <- as.factor(ifelse(Voice$B_ACOUS_MaxHoldTime_A_Sec_C>=350,"1","0"))
Voice2$B_ACOUS_MaxHoldTime_A_Sec_C_Medium <- as.factor(ifelse(Voice$B_ACOUS_MaxHoldTime_A_Sec_C>=150 & Voice$B_ACOUS_MaxHoldTime_A_Sec_C < 350,"1","0"))
Voice2$B_RNT_Process_D_Category_N_refund.return.repair <- as.factor(ifelse(toupper(Voice$B_RNT_Process_D_Category_N) %in% c("REFUND/RETURN" ,"REPAIR"),"1","0"))
Voice2$B_RNT_Parts_D_Category_N_ScreenNApp <- as.factor(ifelse(toupper(Voice$B_RNT_Parts_D_Category_N) %in% c("SCREEN ISSUE" ,"APPLICATION & ACCOUNT ISSUE"),"1","0"))
Voice2$B_TXT_PctPositiveSentence_D_INT_C_High <- as.factor(ifelse(Voice$B_TXT_PctPositiveSentence_D_INT_C>= 0.05, "1","0"))
Voice2$C_TXT_PctNegativeSentenceCategory_D_O_M <- as.factor(ifelse(Voice$C_TXT_PctNegativeSentenceCategory_D_O == "M",1,0))
Voice2$C_TXT_PctNegativeSentenceCategory_D_O_L <- as.factor(ifelse(Voice$C_TXT_PctNegativeSentenceCategory_D_O == "L",1,0))
##Creating flag for topic variables and imputing for missing on the same
Voice2$B_TXT_VoiceTopicFour_Flag <- as.factor(ifelse(Voice$B_TXT_VoiceTopicFour_A_Count_C>0,1,0))
Voice2$B_TXT_VoiceTopicFour_Flag[is.na(Voice2$B_TXT_VoiceTopicFour_Flag) ]<- 0
Voice2$B_TXT_AgentTopicEight_flag <- as.factor(ifelse(Voice$B_TXT_AgentTopicEight_A_Count_C>0,1,0))
Voice2$B_TXT_AgentTopicEight_flag[is.na(Voice2$B_TXT_AgentTopicEight_flag)] <- 0
Voice2$B_TXT_VoiceTopicEight_Flag <- as.factor(ifelse(Voice$B_TXT_VoiceTopicEight_A_Count_C>0,1,0))
Voice2$B_TXT_VoiceTopicEight_Flag[is.na(Voice2$B_TXT_VoiceTopicEight_Flag) ]<- 0
##Creating a combination variable of Escalation Flag and Voice Topic 4, using a logical AND
Voice2$EscalationFlagWithTopic4 <- as.factor(as.numeric(Voice$B_TXT_CurrentEscalationFlag_A_Category_O)==1 & as.numeric(Voice2$B_TXT_VoiceTopicFour_Flag == 1))
#Start: Reading Model Object#
##Change location of the model object here
M_New <- readRDS("C:\\Users\\ramya.ananth\\workspace\\text_mining\\ModelObjects\\VoiceV2.rds")
#End: Reading Model Object#
#Start: Prediction/ Scoring of the dataset using the model#
##The probability cut-offs can be changed here.
Voice2$Predicted <- as.factor(ifelse(predict(M_New,Voice2,type = 'response') >0.3, "Yes" ,"No"))
#End: Prediction/ Scoring of the dataset using the model#
#End: Prediction/ Scoring of the dataset using the model#
#Start: Exporting the final dataset#
##Full address can be given, if you want a different location.
write.csv(Voice2,"VoiceScored.csv" , row.names = FALSE)
#End: Exporting the final dataset#
#####End of Code#####