hw5 submission

pradeep17j · Jun 8, 2019 · b9ea7ae · b9ea7ae
1 parent 4775906
commit b9ea7ae
Show file tree

Hide file tree

Showing 8 changed files with 66,913 additions and 0 deletions.
diff --git a/HW5/HW5.pdf b/HW5/HW5.pdf
diff --git a/HW5/most_popular_girls.txt b/HW5/most_popular_girls.txt
@@ -0,0 +1,11 @@
+"","Name","Sex","Count.x","Count.y","Total"
+"9980","Harper","F",10733,10283,21016
+"8273","Emily","F",10926,11766,22692
+"277","Abigail","F",11699,12371,24070
+"5493","Charlotte","F",13030,11381,24411
+"18247","Mia","F",14366,14871,29237
+"10682","Isabella","F",14722,15574,30296
+"3252","Ava","F",16237,16340,32577
+"23273","Sophia","F",16070,17381,33451
+"19886","Olivia","F",19246,19638,38884
+"8290","Emma","F",19414,20415,39829
diff --git a/HW5/only_girls.txt b/HW5/only_girls.txt
@@ -0,0 +1,7 @@
+"","Name","Sex","Count.x","Count.y","Total"
+"18247","Mia","F",14366,14871,29237
+"10682","Isabella","F",14722,15574,30296
+"3252","Ava","F",16237,16340,32577
+"23273","Sophia","F",16070,17381,33451
+"19886","Olivia","F",19246,19638,38884
+"8290","Emma","F",19414,20415,39829
diff --git a/HW5/pradeep_hw5.R b/HW5/pradeep_hw5.R
@@ -0,0 +1,56 @@
+df <- read.table('yob2016.txt', sep=";")
+colnames(df) <- c('Name','Sex','Count')
+summary(df)
+structure(df)
+row_num <- grep('yyy$', df$Name)
+y2016 <- df[-c(row_num),]
+
+y2015 <- read.table('yob2015.txt', sep=",")
+colnames(y2015) <- c('Name','Sex','Count')
+tail(y2015,10)
+
+# It is surprising to see that the count of kids 
+# who got these last 10 names are exactly 5 for each name in Year 2015
+
+
+final <- merge(y2016, y2015, by=c('Name','Sex'))
+
+
+# Remove any rows with NA
+
+na_count <- length(which(is.na(final)))
+if (na_count > 0){
+  final[-c(which(is.na(final))),]
+
+}
+
+Total<- c(final$Count.x + final$Count.y)
+
+final <- cbind(final, Total)
+
+final <- final[order(final$Total),]
+
+tail(final)
+
+girls <- final[final$Sex == 'F',]
+
+most_popular <- tail(girls,10)
+
+write.csv(most_popular, file='most_popular_girls.txt')
+
+
+new_df <- data.frame()
+for (name in unique(final$Name)){
+  tmp <- final[final$Name == name,]
+  for (sex in unique(tmp$Sex)){
+    #print (sex)
+    tmp1 <- tmp[tmp$Sex == sex,]
+    count_sum <- sum(tmp1$Count)
+    tmp_df <- tmp1[1,]
+    tmp_df[,3] <- count_sum
+    new_df <- rbind(new_df, tmp_df)
+    #print (new_df)
+    #Sys.sleep(3)
+
+  }
+}
diff --git a/HW5/pradeep_hw5.Rmd b/HW5/pradeep_hw5.Rmd
@@ -0,0 +1,114 @@
+---
+title: "pradeep_hw5"
+author: "pradeepkumar"
+date: "6/8/2019"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+## R Markdown
+
+This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
+
+When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
+
+# Question 1
+```{r ,echo=TRUE}
+
+library('knitr')
+
+```
+### Part a
+### Reading the file into data frame and assign col names
+```{r ,echo=TRUE}
+df <- read.table('yob2016.txt', sep=";")
+colnames(df) <- c('Name','Sex','Count')
+```
+### Part b
+### Print Summary and Dimension 
+```{r ,echo=TRUE}
+summary(df)
+dim(df)
+```
+### Part c
+### Find the row with name ending with yyy
+```{r ,echo=TRUE}
+row_num <- grep('yyy$', df$Name)
+print(df[row_num,])
+```
+### Part d
+### Remove the row with name ending with yyy
+```{r ,echo=TRUE}
+y2016 <- df[-c(row_num),]
+```
+
+# Question 2
+### Part a
+### Read the 2015 data
+```{r ,echo=TRUE}
+y2015 <- read.table('yob2015.txt', sep=",")
+colnames(y2015) <- c('Name','Sex','Count')
+```
+
+### Part b
+#### It is surprising to see that the count of kids 
+#### who got these last 10 names are exactly 5 for each name in Year 2015
+
+```{r ,echo=TRUE}
+kable(tail(y2015,10))
+```
+
+### Part c
+### Merge the data from 2015 and 2016
+```{r ,echo=TRUE}
+final <- merge(y2016, y2015, by=c('Name','Sex'))
+
+```
+
+### Remove any rows with NA
+```{r ,echo=TRUE}
+na_count <- length(which(is.na(final)))
+if (na_count > 0){
+  final[-c(which(is.na(final))),]
+
+}
+```
+
+# Question 3
+### Part a
+```{r ,echo=TRUE}
+Total<- c(final$Count.x + final$Count.y)
+
+final <- cbind(final, Total)
+```
+
+### Part b
+```{r ,echo=TRUE}
+final <- final[order(final$Total),]
+
+kable(tail(final,10))
+```
+
+### Part c
+### Get Only Girl Names
+```{r ,echo=TRUE}
+girls <- final[final$Sex == 'F',]
+
+most_popular <- tail(girls,10)
+kable(most_popular)
+```
+
+### Part d
+### Write the 10 most popular names to csv
+```{r ,echo=TRUE}
+write.csv(most_popular, file='most_popular_girls.txt')
+
+```
+
+
+
+
+
diff --git a/HW5/pradeep_hw5.html b/HW5/pradeep_hw5.html