From 14b241b65a5a68032d77193efa6d3d2ab433852c Mon Sep 17 00:00:00 2001 From: ShayanIshaq Date: Mon, 27 Mar 2017 16:49:46 +0500 Subject: [PATCH 1/2] Add files via upload --- Shayan_Ishaq_karachi_R_Assigment2.R | 142 ++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 Shayan_Ishaq_karachi_R_Assigment2.R diff --git a/Shayan_Ishaq_karachi_R_Assigment2.R b/Shayan_Ishaq_karachi_R_Assigment2.R new file mode 100644 index 0000000..d8b2dbc --- /dev/null +++ b/Shayan_Ishaq_karachi_R_Assigment2.R @@ -0,0 +1,142 @@ +library(dplyr) +library(readr) +library(tidyr) + + + +#Q1 + +Datasethosp <- read.csv("E:\\DIH\\hospitaldata.csv", stringsAsFactors = F, strip.white = T) +names(Datasethosp) <- gsub("\\.", "", names(Datasethosp)) +df <- tbl_df(Datasethosp) +names(Datasethosp) +View(df) + +df$Age <-as.numeric(df$Age) +df[is.na(df$Age),"Age"]<-0 +df$TotalCharges <-as.numeric(df$TotalCharges) +df[is.na(df$TotalCharges),"TotalCharges"]<-0 + +#Q2 + +class(df$Date) +df$Date <- as.Date(strptime(df$Date, "%a, %B %d, %Y")) +weekdays(df$Date[which(table(df$Date) == max(table(df$Date)))]) + + +#Q3 + +v_age <- as.numeric(df$Age) +v_age[which(is.na(as.numeric(as.character(v_age))))]<-0 +v_age +mean(v_age) + +#Q4 + +X <- select(df, Age) +top_counts <- filter(Datasethosp, X<= 12) + + + +#Q5 +df%>% + count(Sex, Procedure) %>% + slice(which.max(n)) + + + # Qs : 6 + x <- + df %>% + filter(ConsultingDoctor !='Nursing Staff' ,!is.na(AmountReceived)) %>% + group_by(ConsultingDoctor) %>% + summarize(tot_con_doc_wise_amt = sum(AmountReceived)) %>% + filter(tot_con_doc_wise_amt==max(tot_con_doc_wise_amt)) + + +# Qs : 7 +x <- df %>% + group_by(Procedure) %>% + summarize(tot_pro_wise_amt = sum(AmountReceived)) %>% + filter(!is.na(tot_pro_wise_amt)) %>% + filter(tot_pro_wise_amt == max(tot_pro_wise_amt)) + + +# Qs : 8 +x <- df %>% + filter(!is.na(Time), Time != '-') %>% + group_by(Time) %>% + summarize(time_wise_cnt = n()) %>% + filter(Time != '') %>% + filter(time_wise_cnt == max(time_wise_cnt)) + + +#Qs : 9 + +# Qs : 10 +x <- df %>% + group_by(id)%>% + summarize(pat_wise_cnt = n()) %>% + filter(pat_wise_cnt > 1) %>% + summarize(tot_rep_vis = n()) + + +# Qs : 11 +x <- df %>% + group_by(id)%>% + summarize(pat_wise_cnt = n()) %>% + filter(pat_wise_cnt > 1) %>% + arrange(desc(pat_wise_cnt)) + + +# Qs : 12 +x <- df %>% + group_by(id, Procedure)%>% + summarize(pat_wise_cnt = n()) %>% + filter(pat_wise_cnt > 1) %>% + # summarize(tot_rep_vis = n()) + arrange(id) + + +# Qs : 13 +x <- df %>% + filter(!is.na(Sex), Sex!='-', !is.na(Age), Age!='-') %>% + group_by(Sex) %>% + summarize(M_mean=mean(parse_number(Age))) + +# Qs : 14 +x <- df$AmountBalance +x <- as.numeric(parse_number(x)) +x <- as.numeric(x) +x[which(is.na(as.numeric(as.character(x))))]<-0 +x = sum(x) + +x <- df %>% + filter(!is.na(AmountBalance), AmountBalance!= '-') %>% + summarize(M_mean=sum(parse_number(AmountBalance))) + + +# Qs : 15 +x <- df %>% + filter( Procedure == 'Consultation', !is.na(AmountReceived), AmountReceived!= '-') %>% + group_by(Procedure) %>% + summarize(tot_pro_wise_amt = sum(AmountReceived)) + +# Qs : 16 +cor(df$Age,df$TotalCharges) + +# Qs : 17 +x <- df %>% + filter(!is.na(Age), Age!='-') %>% + group_by(Age) %>% + summarize(Age_wise_cnt=n()) %>% + filter(Age!='') %>% + filter(Age_wise_cnt == max(Age_wise_cnt)) + + +# Qs : 18 +x <- df %>% + filter( Procedure == 'X Ray'|Procedure == 'Scalling', !is.na(AmountReceived), AmountReceived!= '-') %>% + group_by(Procedure) %>% + summarize(proc_wise_tot = sum(AmountReceived)) + + From 6847242e0fdd48dd3f79c97c516efd025c395194 Mon Sep 17 00:00:00 2001 From: ShayanIshaq Date: Mon, 27 Mar 2017 16:58:07 +0500 Subject: [PATCH 2/2] Add files via upload --- Shayan_Ishaq_karachi_R_Assigment2.html | 338 +++++++++++++++++++++++++ 1 file changed, 338 insertions(+) create mode 100644 Shayan_Ishaq_karachi_R_Assigment2.html diff --git a/Shayan_Ishaq_karachi_R_Assigment2.html b/Shayan_Ishaq_karachi_R_Assigment2.html new file mode 100644 index 0000000..38e5229 --- /dev/null +++ b/Shayan_Ishaq_karachi_R_Assigment2.html @@ -0,0 +1,338 @@ + + + + + + + + + + + + + + +Shayan_Ishaq_karachi_R_Assigment2.R + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + +
library(dplyr)
+
## 
+## Attaching package: 'dplyr'
+
## The following objects are masked from 'package:stats':
+## 
+##     filter, lag
+
## The following objects are masked from 'package:base':
+## 
+##     intersect, setdiff, setequal, union
+
library(readr)
+library(tidyr)
+
+
+
+#Q1
+
+Datasethosp <- read.csv("E:\\DIH\\hospitaldata.csv", stringsAsFactors = F, strip.white = T)
+names(Datasethosp) <- gsub("\\.", "", names(Datasethosp))
+df <- tbl_df(Datasethosp)
+names(Datasethosp)
+
##  [1] "Date"             "id"               "Time"            
+##  [4] "Age"              "Sex"              "ConsultingDoctor"
+##  [7] "Specialty"        "Procedure"        "TotalCharges"    
+## [10] "AmountReceived"   "AmountBalance"    "AmountReceivedBy"
+## [13] "AmountinHospital" "ReceptionistName" "NextApt"
+
View(df)
+
+df$Age <-as.numeric(df$Age)
+
## Warning: NAs introduced by coercion
+
df[is.na(df$Age),"Age"]<-0
+df$TotalCharges <-as.numeric(df$TotalCharges) 
+
## Warning: NAs introduced by coercion
+
df[is.na(df$TotalCharges),"TotalCharges"]<-0
+
+#Q2
+
+class(df$Date)
+
## [1] "character"
+
df$Date <- as.Date(strptime(df$Date, "%a, %B %d, %Y"))
+weekdays(df$Date[which(table(df$Date) == max(table(df$Date)))])
+
## [1] "Monday"
+
#Q3
+
+v_age <- as.numeric(df$Age)
+v_age[which(is.na(as.numeric(as.character(v_age))))]<-0
+v_age
+
##   [1] 40 26 30 40 27 40 43 28  2 40 32 28 76 75 36 42 23 48 25 50 60  0  0
+##  [24] 26 27  0 57  0 39  6 26 60 26  5 40  9  0 30 40 29 39 50 50 50 60 29
+##  [47] 48 45 42 29 26 34 50 45 30 40 40 29 27 23 60 52 21 32 58  0  0  0 26
+##  [70] 40 29 33 30 28 10 26 19 53 30 10  0 55 26 30  0  0  0  0  0  9  0 28
+##  [93] 47 49 19 31  7 26  8 28 17 54 35 45 30 30 23 27  6  2 30 32 25 80 70
+## [116] 20 23 60 50 30 13 30 13 13 39 28 28 17 27 57  6 18 25 50  0 19 39  6
+## [139] 40 39 20  0 14  0  0 24  3 23 29 55 30  0 30  3  0 13 10 30 45  3 38
+## [162] 20  2  3  0 23 26 35 22 65 45 21 30 64 40 52 55 54 30 53  0 38 30 17
+## [185]  3 17  0  0 30 17 26  0 45 17 22 30 17 38 17 34 28 78 56 53 21 17  0
+## [208] 78  3 76  9 32  0  0 24  3 76  0 39 30 76 45
+
mean(v_age)
+
## [1] 28.31081
+
#Q4
+
+X <- select(df, Age)
+top_counts <- filter(Datasethosp,  X<= 12)
+
+
+
+#Q5
+df%>%
+  count(Sex, Procedure) %>%
+  slice(which.max(n))
+
## Source: local data frame [5 x 3]
+## Groups: Sex [5]
+## 
+##     Sex    Procedure     n
+##   <chr>        <chr> <int>
+## 1           Pharmacy    10
+## 2     - Consultation     1
+## 3     f Consultation     1
+## 4     F Consultation    45
+## 5     M Consultation    37
+
  # Qs : 6
+  x <- 
+    df %>%
+  filter(ConsultingDoctor !='Nursing Staff' ,!is.na(AmountReceived)) %>%
+  group_by(ConsultingDoctor) %>%
+  summarize(tot_con_doc_wise_amt = sum(AmountReceived)) %>%
+  filter(tot_con_doc_wise_amt==max(tot_con_doc_wise_amt))
+
+
+# Qs : 7
+x <- df %>%
+  group_by(Procedure) %>%
+  summarize(tot_pro_wise_amt = sum(AmountReceived)) %>%
+  filter(!is.na(tot_pro_wise_amt)) %>%
+  filter(tot_pro_wise_amt == max(tot_pro_wise_amt))
+
+
+# Qs : 8
+x <- df %>%
+  filter(!is.na(Time), Time != '-') %>%
+  group_by(Time) %>%
+  summarize(time_wise_cnt = n()) %>%
+  filter(Time != '') %>%
+  filter(time_wise_cnt == max(time_wise_cnt))
+
+
+#Qs : 9
+
+# Qs : 10
+x <- df %>%
+  group_by(id)%>%
+  summarize(pat_wise_cnt = n()) %>%
+  filter(pat_wise_cnt > 1) %>%
+  summarize(tot_rep_vis = n())
+
+
+# Qs : 11
+x <- df %>%
+  group_by(id)%>%
+  summarize(pat_wise_cnt = n()) %>%
+  filter(pat_wise_cnt > 1) %>%
+  arrange(desc(pat_wise_cnt))
+
+
+# Qs : 12
+x <- df %>%
+  group_by(id, Procedure)%>%
+  summarize(pat_wise_cnt = n()) %>%
+  filter(pat_wise_cnt > 1) %>%
+  #  summarize(tot_rep_vis = n())
+  arrange(id)
+
+
+# Qs : 13
+x <- df %>%
+  filter(!is.na(Sex), Sex!='-', !is.na(Age), Age!='-') %>%
+  group_by(Sex) %>%
+  summarize(M_mean=mean(parse_number(Age)))
+
+# Qs : 14
+x <- df$AmountBalance
+x <- as.numeric(parse_number(x))
+
## Warning: 211 parsing failures.
+## row col expected actual
+##   1  -- a number      -
+##   2  -- a number      -
+##   3  -- a number      -
+##   4  -- a number      -
+##   5  -- a number      -
+## ... ... ........ ......
+## See problems(...) for more details.
+
x <- as.numeric(x)
+x[which(is.na(as.numeric(as.character(x))))]<-0
+x = sum(x)
+
+x <- df %>%
+  filter(!is.na(AmountBalance), AmountBalance!= '-')  %>%
+  summarize(M_mean=sum(parse_number(AmountBalance)))
+
+
+# Qs : 15
+x <- df %>%
+  filter( Procedure == 'Consultation', !is.na(AmountReceived), AmountReceived!= '-') %>%
+  group_by(Procedure) %>% 
+  summarize(tot_pro_wise_amt = sum(AmountReceived)) 
+
+# Qs : 16
+cor(df$Age,df$TotalCharges)
+
## [1] 0.07017853
+
# Qs : 17
+x <- df %>%
+  filter(!is.na(Age), Age!='-') %>%
+  group_by(Age) %>%
+  summarize(Age_wise_cnt=n()) %>%
+  filter(Age!='') %>%
+  filter(Age_wise_cnt == max(Age_wise_cnt)) 
+
+
+# Qs : 18
+x <- df %>%
+  filter( Procedure == 'X Ray'|Procedure == 'Scalling', !is.na(AmountReceived), AmountReceived!= '-') %>%
+  group_by(Procedure) %>%
+  summarize(proc_wise_tot = sum(AmountReceived))
+ + + + +
+ + + + + + + +