-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Kumaraiah Pradeepkumar
committed
Jun 8, 2019
1 parent
4775906
commit b9ea7ae
Showing
8 changed files
with
66,913 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
"","Name","Sex","Count.x","Count.y","Total" | ||
"9980","Harper","F",10733,10283,21016 | ||
"8273","Emily","F",10926,11766,22692 | ||
"277","Abigail","F",11699,12371,24070 | ||
"5493","Charlotte","F",13030,11381,24411 | ||
"18247","Mia","F",14366,14871,29237 | ||
"10682","Isabella","F",14722,15574,30296 | ||
"3252","Ava","F",16237,16340,32577 | ||
"23273","Sophia","F",16070,17381,33451 | ||
"19886","Olivia","F",19246,19638,38884 | ||
"8290","Emma","F",19414,20415,39829 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
"","Name","Sex","Count.x","Count.y","Total" | ||
"18247","Mia","F",14366,14871,29237 | ||
"10682","Isabella","F",14722,15574,30296 | ||
"3252","Ava","F",16237,16340,32577 | ||
"23273","Sophia","F",16070,17381,33451 | ||
"19886","Olivia","F",19246,19638,38884 | ||
"8290","Emma","F",19414,20415,39829 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
df <- read.table('yob2016.txt', sep=";") | ||
colnames(df) <- c('Name','Sex','Count') | ||
summary(df) | ||
structure(df) | ||
row_num <- grep('yyy$', df$Name) | ||
y2016 <- df[-c(row_num),] | ||
|
||
y2015 <- read.table('yob2015.txt', sep=",") | ||
colnames(y2015) <- c('Name','Sex','Count') | ||
tail(y2015,10) | ||
|
||
# It is surprising to see that the count of kids | ||
# who got these last 10 names are exactly 5 for each name in Year 2015 | ||
|
||
|
||
final <- merge(y2016, y2015, by=c('Name','Sex')) | ||
|
||
|
||
# Remove any rows with NA | ||
|
||
na_count <- length(which(is.na(final))) | ||
if (na_count > 0){ | ||
final[-c(which(is.na(final))),] | ||
|
||
} | ||
|
||
Total<- c(final$Count.x + final$Count.y) | ||
|
||
final <- cbind(final, Total) | ||
|
||
final <- final[order(final$Total),] | ||
|
||
tail(final) | ||
|
||
girls <- final[final$Sex == 'F',] | ||
|
||
most_popular <- tail(girls,10) | ||
|
||
write.csv(most_popular, file='most_popular_girls.txt') | ||
|
||
|
||
new_df <- data.frame() | ||
for (name in unique(final$Name)){ | ||
tmp <- final[final$Name == name,] | ||
for (sex in unique(tmp$Sex)){ | ||
#print (sex) | ||
tmp1 <- tmp[tmp$Sex == sex,] | ||
count_sum <- sum(tmp1$Count) | ||
tmp_df <- tmp1[1,] | ||
tmp_df[,3] <- count_sum | ||
new_df <- rbind(new_df, tmp_df) | ||
#print (new_df) | ||
#Sys.sleep(3) | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
--- | ||
title: "pradeep_hw5" | ||
author: "pradeepkumar" | ||
date: "6/8/2019" | ||
output: html_document | ||
--- | ||
|
||
```{r setup, include=FALSE} | ||
knitr::opts_chunk$set(echo = TRUE) | ||
``` | ||
|
||
## R Markdown | ||
|
||
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>. | ||
|
||
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: | ||
|
||
# Question 1 | ||
```{r ,echo=TRUE} | ||
library('knitr') | ||
``` | ||
### Part a | ||
### Reading the file into data frame and assign col names | ||
```{r ,echo=TRUE} | ||
df <- read.table('yob2016.txt', sep=";") | ||
colnames(df) <- c('Name','Sex','Count') | ||
``` | ||
### Part b | ||
### Print Summary and Dimension | ||
```{r ,echo=TRUE} | ||
summary(df) | ||
dim(df) | ||
``` | ||
### Part c | ||
### Find the row with name ending with yyy | ||
```{r ,echo=TRUE} | ||
row_num <- grep('yyy$', df$Name) | ||
print(df[row_num,]) | ||
``` | ||
### Part d | ||
### Remove the row with name ending with yyy | ||
```{r ,echo=TRUE} | ||
y2016 <- df[-c(row_num),] | ||
``` | ||
|
||
# Question 2 | ||
### Part a | ||
### Read the 2015 data | ||
```{r ,echo=TRUE} | ||
y2015 <- read.table('yob2015.txt', sep=",") | ||
colnames(y2015) <- c('Name','Sex','Count') | ||
``` | ||
|
||
### Part b | ||
#### It is surprising to see that the count of kids | ||
#### who got these last 10 names are exactly 5 for each name in Year 2015 | ||
|
||
```{r ,echo=TRUE} | ||
kable(tail(y2015,10)) | ||
``` | ||
|
||
### Part c | ||
### Merge the data from 2015 and 2016 | ||
```{r ,echo=TRUE} | ||
final <- merge(y2016, y2015, by=c('Name','Sex')) | ||
``` | ||
|
||
### Remove any rows with NA | ||
```{r ,echo=TRUE} | ||
na_count <- length(which(is.na(final))) | ||
if (na_count > 0){ | ||
final[-c(which(is.na(final))),] | ||
} | ||
``` | ||
|
||
# Question 3 | ||
### Part a | ||
```{r ,echo=TRUE} | ||
Total<- c(final$Count.x + final$Count.y) | ||
final <- cbind(final, Total) | ||
``` | ||
|
||
### Part b | ||
```{r ,echo=TRUE} | ||
final <- final[order(final$Total),] | ||
kable(tail(final,10)) | ||
``` | ||
|
||
### Part c | ||
### Get Only Girl Names | ||
```{r ,echo=TRUE} | ||
girls <- final[final$Sex == 'F',] | ||
most_popular <- tail(girls,10) | ||
kable(most_popular) | ||
``` | ||
|
||
### Part d | ||
### Write the 10 most popular names to csv | ||
```{r ,echo=TRUE} | ||
write.csv(most_popular, file='most_popular_girls.txt') | ||
``` | ||
|
||
|
||
|
||
|
||
|
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.