diff --git a/R_Assignment.R b/R_Assignment.R new file mode 100644 index 0000000..61b43d9 --- /dev/null +++ b/R_Assignment.R @@ -0,0 +1,153 @@ +library(readr) +hpddata <- read_csv("E:/DIH/R Assignment/Assignment - 22-03-2017/hospitaldata.csv") +# for rename column first should be install deplyr package +library("dplyr") +library(tidyr) +#Please remove the dots in the names, so it may become easier for you to work through it. +#Q.1 +names(hpddata) <- gsub("\\.", "", names(hpddata)) +View(hpddata) +# Data Cleaning +hpddata<- hpddata%>% + mutate(Age=ifelse(Age=="-", NA,Age)) + +hpddata<-hpddata%>% + mutate(Age = ifelse(grepl("M", Age), parse_number(Age)/12, ifelse(grepl("-", Age), "",Age))) + +hpddata<- hpddata%>% + mutate(Sex=replace(Sex,Sex=='f','F')) +hpddata<- hpddata%>% + mutate(Sex=replace(Sex,Sex=="-", NA)) +hpddata<- hpddata%>% + mutate(AmountBalance=replace(AmountBalance,AmountBalance=="-", NA)) +hpddata<-mutate(hpddata, Time=format(strptime(hpddata$Time, "%I:%M %p"), format="%H:%M")) +#Q.2 + +Maximum_days<- + hpddata%>% + separate(Date,c("Day"))%>% + group_by(Day)%>% + summarize(count_no =n())%>% + filter(count_no==max(count_no)) + +View(Maximum_days) + + #Q.3 + + Avg_age_Patients<-hpddata%>% + filter(!is.na(Age))%>% + summarize(cal_avg = mean(parse_number(Age))) + View(Avg_age_Patients) + + # Q.4 + + children_visits<- hpddata%>% + filter(parse_number(Age)>0 , parse_number(Age)<=12)%>% + summarize(total_children= n()) + View(children_visits) + + # Q.5 + + gender_type<-hpddata%>% + filter(!is.na(Sex))%>% + group_by(Sex,Procedure)%>% + summarize(total_gender= n())%>% + filter(total_gender==max(total_gender)) + View(gender_type) + + # Q.6 + + high_paid_con_Var<- hpddata%>% + filter(ConsultingDoctor!='Nursing Staff')%>% + filter(!is.na(AmountReceived))%>% + group_by(ConsultingDoctor)%>% + summarize(high_paid_con = sum(AmountReceived))%>% + filter(high_paid_con == max(high_paid_con)) + View(high_paid_con_Var) + + # Q.7 + + high_paid_prc_type_Var <-hpddata%>% + filter(!is.na(AmountReceived))%>% + group_by(Procedure)%>% + summarize(high_paid_prc_type = sum(AmountReceived))%>% + filter(high_paid_prc_type==max(high_paid_prc_type)) + + View(high_paid_prc_type_Var) + + + #Q.8 + + max_freq_time<-hpddata%>% + filter(!is.na(Time),Time!='-')%>% + group_by(Time)%>% + summarize(Total_row_count=n())%>% + filter(Total_row_count==max(Total_row_count)) + View(max_freq_time) + + #Q.9 + slot_hour<-hpddata%>% + filter(!is.na(Time),Time!='-')%>% + separate(Time,c("slot_hour"))%>% + mutate(time_bracket = ifelse(parse_number(slot_hour)>=6&parse_number(slot_hour)<12,'Morning', + ifelse(parse_number(slot_hour)>=12&parse_number(slot_hour)<16,'Afternoon', + ifelse(parse_number(slot_hour)>=16&parse_number(slot_hour)<20,'Evening', + ifelse(parse_number(slot_hour)>=20&parse_number(slot_hour)<=24,'Night','Night'))))) + View(slot_hour) + + #Q.10 + + repeated_patients<-hpddata%>% + group_by(id)%>% + summarize(repeated_count =n())%>% + filter(repeated_count>1)%>% + mutate(total_repeated=1)%>% + group_by(total_repeated)%>% + summarize(sum(total_repeated)) + View(repeated_patients) + #Q.11 + repeated_patients<-hpddata%>% + group_by(id)%>% + summarize(repeated_count =n())%>% + filter(repeated_count>1)%>% + select(id) + #Q.12 + repeated_patients<-hpddata%>% + group_by(id,Procedure)%>% + summarize(repeated_count =n())%>% + filter(repeated_count>1)%>% + group_by(id,Procedure) + View(repeated_patients) + #Q.13 + gender_median_age <-hpddata%>% + filter(!is.na(Age),Age!='-')%>% + group_by(Sex)%>% + mutate(Age=parse_number(Age))%>% + summarize(median_age=median(parse_number(Age))) + View(gender_median_age) + + + # Q.14 + total_amount_balance<-hpddata%>% + filter(!is.na(AmountBalance))%>% + summarize(Total_balance_amount=sum(parse_number(AmountBalance))) + View(total_amount_balance) + #Q.15 + total_cost_Consultation<-hpddata%>% + filter(Procedure=="Consultation",!is.na(AmountReceived))%>% + summarize(Total_cost=sum(parse_number(AmountReceived))) + View(total_cost_Consultation) + #Q.17 + age_group_data<-hpddata%>% + filter(!is.na(Age))%>% + mutate(Age_group = ifelse(parse_number(Age)<=12,'Children','Elder') )%>% + group_by(Age_group)%>% + summarize(no_visits = n())%>% + filter(no_visits==max(no_visits)) + View(age_group_data) + #Q.18 + total_xray_scalling <-hpddata%>% + filter(Procedure=='X Ray'|Procedure=='Scalling')%>% + summarize(Total_amount= sum(AmountReceived)) + View(total_xray_scalling) + \ No newline at end of file diff --git a/R_Assignment.html b/R_Assignment.html new file mode 100644 index 0000000..4912425 --- /dev/null +++ b/R_Assignment.html @@ -0,0 +1,337 @@ + + + + + + + + + + + + + + +R_Assignment.R + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + +
library(readr)
+hpddata <- read_csv("E:/DIH/R Assignment/Assignment - 22-03-2017/hospitaldata.csv")
+
## Parsed with column specification:
+## cols(
+##   Date = col_character(),
+##   id = col_integer(),
+##   Time = col_character(),
+##   Age = col_character(),
+##   Sex = col_character(),
+##   Consulting..Doctor = col_character(),
+##   Specialty = col_character(),
+##   Procedure = col_character(),
+##   Total..Charges = col_character(),
+##   Amount..Received. = col_integer(),
+##   Amount..Balance = col_character(),
+##   Amount.Received.By = col_character(),
+##   Amount.in.Hospital = col_integer(),
+##   Receptionist..Name = col_character(),
+##   Next.Apt = col_character()
+## )
+
# for rename column first should be install deplyr package
+library("dplyr")
+
## 
+## Attaching package: 'dplyr'
+
## The following objects are masked from 'package:stats':
+## 
+##     filter, lag
+
## The following objects are masked from 'package:base':
+## 
+##     intersect, setdiff, setequal, union
+
library(tidyr)
+#Please remove the dots in the names, so it may become easier for you to work through it.
+#Q.1
+names(hpddata) <- gsub("\\.", "", names(hpddata))
+View(hpddata)
+# Data Cleaning
+hpddata<-  hpddata%>%
+  mutate(Age=ifelse(Age=="-", NA,Age))
+
+hpddata<-hpddata%>%
+  mutate(Age = ifelse(grepl("M", Age), parse_number(Age)/12, ifelse(grepl("-", Age), "",Age)))
+
+hpddata<-  hpddata%>%
+  mutate(Sex=replace(Sex,Sex=='f','F'))
+hpddata<-  hpddata%>%
+  mutate(Sex=replace(Sex,Sex=="-", NA))
+hpddata<-  hpddata%>%
+  mutate(AmountBalance=replace(AmountBalance,AmountBalance=="-", NA))
+hpddata<-mutate(hpddata, Time=format(strptime(hpddata$Time, "%I:%M %p"), format="%H:%M"))
+#Q.2
+
+Maximum_days<-
+  hpddata%>%
+  separate(Date,c("Day"))%>%
+  group_by(Day)%>%
+  summarize(count_no =n())%>%
+  filter(count_no==max(count_no))
+
## Warning: Too many values at 222 locations: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+## 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...
+
View(Maximum_days)
+
+  #Q.3
+  
+  Avg_age_Patients<-hpddata%>%
+    filter(!is.na(Age))%>% 
+    summarize(cal_avg = mean(parse_number(Age)))
+  View(Avg_age_Patients)
+  
+  # Q.4
+  
+   children_visits<- hpddata%>%
+  filter(parse_number(Age)>0 , parse_number(Age)<=12)%>%
+    summarize(total_children= n())
+   View(children_visits)
+  
+  # Q.5
+  
+  gender_type<-hpddata%>%
+    filter(!is.na(Sex))%>%
+    group_by(Sex,Procedure)%>%
+    summarize(total_gender= n())%>%
+    filter(total_gender==max(total_gender))
+  View(gender_type)
+  
+  # Q.6
+  
+  high_paid_con_Var<- hpddata%>%
+    filter(ConsultingDoctor!='Nursing Staff')%>%
+    filter(!is.na(AmountReceived))%>%
+    group_by(ConsultingDoctor)%>%
+    summarize(high_paid_con = sum(AmountReceived))%>%
+    filter(high_paid_con == max(high_paid_con))
+  View(high_paid_con_Var)
+  
+  # Q.7
+  
+  high_paid_prc_type_Var <-hpddata%>%
+    filter(!is.na(AmountReceived))%>%
+    group_by(Procedure)%>%
+    summarize(high_paid_prc_type = sum(AmountReceived))%>%
+    filter(high_paid_prc_type==max(high_paid_prc_type))
+  
+    View(high_paid_prc_type_Var)
+
+    
+  #Q.8
+    
+    max_freq_time<-hpddata%>%
+      filter(!is.na(Time),Time!='-')%>%
+      group_by(Time)%>%
+      summarize(Total_row_count=n())%>%
+      filter(Total_row_count==max(Total_row_count))
+    View(max_freq_time)
+    
+    #Q.9
+  slot_hour<-hpddata%>%
+    filter(!is.na(Time),Time!='-')%>%
+    separate(Time,c("slot_hour"))%>%
+    mutate(time_bracket = ifelse(parse_number(slot_hour)>=6&parse_number(slot_hour)<12,'Morning',
+           ifelse(parse_number(slot_hour)>=12&parse_number(slot_hour)<16,'Afternoon',
+                  ifelse(parse_number(slot_hour)>=16&parse_number(slot_hour)<20,'Evening',
+                         ifelse(parse_number(slot_hour)>=20&parse_number(slot_hour)<=24,'Night','Night')))))
+
## Warning: Too many values at 193 locations: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+## 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...
+
  View(slot_hour)  
+
+   #Q.10
+  
+  repeated_patients<-hpddata%>%
+    group_by(id)%>%
+    summarize(repeated_count =n())%>%
+    filter(repeated_count>1)%>%
+    mutate(total_repeated=1)%>%
+    group_by(total_repeated)%>% 
+    summarize(sum(total_repeated))
+  View(repeated_patients)
+  #Q.11
+  repeated_patients<-hpddata%>%
+    group_by(id)%>%
+    summarize(repeated_count =n())%>%
+    filter(repeated_count>1)%>%
+    select(id)
+  #Q.12
+  repeated_patients<-hpddata%>%
+    group_by(id,Procedure)%>%
+    summarize(repeated_count =n())%>%
+    filter(repeated_count>1)%>%
+    group_by(id,Procedure)
+  View(repeated_patients)
+  #Q.13
+  gender_median_age <-hpddata%>%
+    filter(!is.na(Age),Age!='-')%>%
+    group_by(Sex)%>%
+    mutate(Age=parse_number(Age))%>%
+    summarize(median_age=median(parse_number(Age)))
+  View(gender_median_age)
+  
+      
+  #    Q.14
+  total_amount_balance<-hpddata%>%
+    filter(!is.na(AmountBalance))%>%
+    summarize(Total_balance_amount=sum(parse_number(AmountBalance)))
+  View(total_amount_balance)
+  #Q.15
+  total_cost_Consultation<-hpddata%>%
+    filter(Procedure=="Consultation",!is.na(AmountReceived))%>%
+    summarize(Total_cost=sum(parse_number(AmountReceived)))      
+  View(total_cost_Consultation)
+  #Q.17
+  age_group_data<-hpddata%>%
+    filter(!is.na(Age))%>%
+    mutate(Age_group = ifelse(parse_number(Age)<=12,'Children','Elder') )%>%
+    group_by(Age_group)%>%
+    summarize(no_visits = n())%>%
+    filter(no_visits==max(no_visits))
+  View(age_group_data)
+  #Q.18
+  total_xray_scalling <-hpddata%>%
+    filter(Procedure=='X Ray'|Procedure=='Scalling')%>%
+    summarize(Total_amount= sum(AmountReceived))
+  View(total_xray_scalling)
+ + + + +
+ + + + + + + +