diff --git a/vijieaswar/intror/Challenges.R b/vijieaswar/intror/Challenges.R
index 1a82ceb..70fce6f 100644
--- a/vijieaswar/intror/Challenges.R
+++ b/vijieaswar/intror/Challenges.R
@@ -75,3 +75,8 @@ lapply(df, range, na.rm=T)
sapply(df, range, na.rm=T)
#the sapply displays the range min and max value in 2 rows
+mapply()
+?mapply (not in class)
+
+Question8 <- tapply(df$hp, df$gear, max, na.rm=T)
+Question8
diff --git a/vijieaswar/intror/Practice.R b/vijieaswar/intror/Practice.R
new file mode 100644
index 0000000..d79561d
--- /dev/null
+++ b/vijieaswar/intror/Practice.R
@@ -0,0 +1,66 @@
+
+cat <- "cat"
+alpha <1
+int <-3L
+#for integer, you have to have the L at the end, else it will be treated as a numeric
+
+#vectors are a sequence of objects that all have the same class. R will convert some of them if you have a word along with other numerics
+
+list is a special type of vector that can have objects of different classes.
+
+matrices are vectors with a deminsion attributes
+
+x = matrix (1:6, nrow=2, ncol=3)
+
+dim(x)
+attributes(x)
+
+x <- 1:10
+dim(x) <- c(2,5)
+# we added a dimension to create a matrix
+# the above does the same as the command below
+y <- matrix(1:10, nrow=2, ncol=5)
+y
+#you can also create a matrix with cbind
+
+x <- 1:3
+y <- 10:12
+#binding by coloumn
+x1 <- cbind(x,y)
+x1
+#binding by row
+x2 <- rbind(x,y)
+x2
+
+#matrices all have to be the same class- cant mix char, numeric. dataframe acn have mixed classes
+#Factors is another class- can label
+
+unclass(x) is more descriptive-
+
+num <- c(1,1,2,2,3)
+fact <- factor(num, level=c(1,2,3), labels=c("yes","no","maybe"))
+#database has intergers and you are giving it an atrribute by giving it labels and saying what it means
+
+
+datafram: each column has to be the same type of data
+
+(x <- data.frame(foo=1:4,bar=c(T,T,F,F)))
+
+attributes are like metadata
+adding information to your data without adding more values in the ddata.class(
+
+ datafames have row.names
+
+ you could give specific attributes such as names etc
+
+x <-1:3
+names(x)
+#there wont be any names
+
+names(x) <- c("alpha", "beta","gamma")
+#the above does not give a data fram
+names(x)
+str(x)
+#it is not a factor but it is assigning names- it is making a label
+names function unique to the person but factor labels will replace any value = to that by that character
+
diff --git a/vijieaswar/plotting/Classwork.R b/vijieaswar/plotting/Classwork.R
index e312a93..d6ed2b3 100644
--- a/vijieaswar/plotting/Classwork.R
+++ b/vijieaswar/plotting/Classwork.R
@@ -102,7 +102,6 @@ library(reshape2)
#cor returns a matric, not a df
#creating a heatmap
-
mtcars %>%
select(c(1,3,4,5,6,7)) %>%
cor() %>%
diff --git a/vijieaswar/rmarkdown/Challenge.Rmd b/vijieaswar/rmarkdown/Challenge.Rmd
new file mode 100644
index 0000000..ea206ca
--- /dev/null
+++ b/vijieaswar/rmarkdown/Challenge.Rmd
@@ -0,0 +1,75 @@
+---
+title: "Challenge"
+author: "Viji"
+date: "November 23, 2015"
+output:
+ html_document:
+ fig_caption: true
+ number_section: true
+ toc: true
+---
+
+
+
+```{r, echo = FALSE}
+library(dplyr)
+library(tidyr)
+library(pander)
+library(captioner)
+library(ggplot2)
+library(rmarkdown)
+
+knitr::opts_chunk$set(echo = FALSE, warning = FALSE)
+setwd("~/Desktop/practice-2015-10/vijieaswar/rmarkdown")
+```
+
+```{r}
+ds <- as.data.frame(state.x77) %>%
+ add_rownames() %>%
+ tbl_df()
+
+str(state.x77)
+
+names(ds)
+
+
+```
+
+# Brief Description
+The dataset consists of demographic information from `r dim(ds)[1]` states for the following `r dim(ds[-1])[2]` variables: `r names(ds[-1])`. The mean population across all states is `r round( mean(ds$Population),2)` and the standard deviation is `r round(sd(ds$Population),2)`
+
+```{r}
+
+figNums <- captioner(prefix = 'Figure')
+state_cap <- figNums('stateLitIncome', 'Income varies across illiteracy')
+
+```
+
+# Plots
+```{r, echo=FALSE, fig.cap=state_cap, dpi=150}
+ ds %>%
+ ggplot(aes(x=Illiteracy, y=Income))+
+ geom_point()+
+ labs(plot.title="Income across illteracy", x="Illiteracy", y="Income")
+
+
+```
+
+#Tables
+```{r}
+
+tabNums <- captioner(prefix = 'Table')
+
+ds %>%
+ gather(Variable, Value, -rowname) %>%
+ group_by(Variable) %>%
+ summarise(Means = mean(Value) %>% round(2),
+ SD = sd(Value) %>% round(2)) %>%
+ pander(caption = tabNums('Demodata', 'Demographic data of 50 states'))
+```
+
+```{r, eval=FALSE}
+render('Challenge.Rmd', c('word_document', 'html_document'))
+```
+
+
diff --git a/vijieaswar/rmarkdown/Challenge.docx b/vijieaswar/rmarkdown/Challenge.docx
new file mode 100644
index 0000000..6432f8f
Binary files /dev/null and b/vijieaswar/rmarkdown/Challenge.docx differ
diff --git a/vijieaswar/rmarkdown/Challenge.html b/vijieaswar/rmarkdown/Challenge.html
new file mode 100644
index 0000000..e16205a
--- /dev/null
+++ b/vijieaswar/rmarkdown/Challenge.html
@@ -0,0 +1,143 @@
+
+
+
+
+
The dataset consists of demographic information from 50 states for the following 8 variables: Population, Income, Illiteracy, Life Exp, Murder, HS Grad, Frost, Area. The mean population across all states is 4246.42 and the standard deviation is 4464.49
+
+
+
2 Plots
+
+
Figure 1: Income varies across illiteracy
+
+
+
+
3 Tables
+
##
+## ------------------------
+## Variable Means SD
+## ---------- ------- -----
+## Population 4246 4464
+##
+## Income 4436 614.5
+##
+## Illiteracy 1.17 0.61
+##
+## Life Exp 70.88 1.34
+##
+## Murder 7.38 3.69
+##
+## HS Grad 53.11 8.08
+##
+## Frost 104.5 51.98
+##
+## Area 70736 85327
+## ------------------------
+##
+## Table: Table 1: Demographic data of 50 states
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/vijieaswar/rmarkdown/Practice.Rmd b/vijieaswar/rmarkdown/Practice.Rmd
new file mode 100644
index 0000000..c71cebe
--- /dev/null
+++ b/vijieaswar/rmarkdown/Practice.Rmd
@@ -0,0 +1,134 @@
+---
+title: "Practice"
+author: "Viji"
+date: "November 23, 2015"
+output:
+ html_document:
+ fig_caption: true
+ toc: true
+---
+```{r}
+#in the YAML, add the following
+#bibliography: give the location of the bib file
+#for a particular style
+#csl: give the location of the bib file citation style language. can be found in github
+
+```
+
+This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see .
+
+When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
+
+# Intro
+
+## Citing a study
+```{r}
+
+
+
+#how to enter here from papers. need to export the bib key
+#@some bibkey
+#[] will make the round bracketts around the citation.
+```
+
+```{r}
+
+
+#* makes the text in italics
+#** on either side will bold words
+#``` is a code block
+
+#` having things in line
+# > indicates that it is goin to be quote
+# @ with bib key will extract refs
+
+#YAML header is for R to know what to do. YAML has o be closed in ---
+
+#pander, kable creates tables
+#ggplot qplot creates figure
+
+#conrol+shift+K will knit html
+```
+
+```{r, echo=FALSE}
+library(pander)
+library(captioner)
+
+#install.packages('captioner', type = 'source') if you get an error message while installing
+
+knitr::opts_chunk$set(echo = FALSE, warning = FALSE)
+#Can use this above ling to apply to every chunk
+
+#table option 1
+knitr::kable(summary(cars))
+
+
+#table option 2
+tabNums <- captioner(prefix = 'Table')
+figNums <- captioner(prefix = 'Figure')
+cars_cap <- figNums('carsFig', 'This is a caption for the figure')
+
+
+pander(lm(cars), caption = tabNums('tab1', 'This is the caption for table 1')) #first coloumn as Y, and second column as X. if you have many columns, 2nd 3rd columnds are X2, X3 etc.
+
+
+```
+
+REference to **`r tabNums('tab1', display = 'cite')`**.
+
+You can also embed plots, for example:
+
+```{r, echo=FALSE, fig.cap=cars_cap}
+#have to write fig.caps = cars_cap because knitr is finicky
+
+plot(cars)
+```
+
+```{r}
+#Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
+
+#`r will take the following stuff within ` as a command. if you dont write r after ` it will take it as text
+#compare `r mean(cars$dist)` with `mean(cars$dist)`
+```
+
+#Tables
+```{r}
+library(dplyr)
+library(tidyr)
+
+cars %>%
+ gather(Measure, Value) %>%
+ group_by(Measure) %>%
+ summarise(means =mean(Value) %>% round(2)) %>%
+ pander()
+
+```
+
+# Figures
+## Scatterplot
+
+```{r, message = FALSE, fig.height=10, fig.width=5, dpi=150, dev='png'}
+library(ggplot2)
+qplot(dist,speed, data = cars, geom= 'point')
+
+```
+
+This is a list
+
+- There is `r length(cars)` rows in `cars`
+- There are `r dim(cars)[2]` variables in `cars`
+
+you would add ` ` around the cars to differentiate it from regular text
+
+This is a quote:
+
+> to be or not to be
+
+```{r}
+#command+option+i is a new R chunk
+#command+alt+c runs the R chunk
+
+
+```
+
+# References
\ No newline at end of file
diff --git a/vijieaswar/rmarkdown/Practice.docx b/vijieaswar/rmarkdown/Practice.docx
new file mode 100644
index 0000000..37f0261
Binary files /dev/null and b/vijieaswar/rmarkdown/Practice.docx differ
diff --git a/vijieaswar/rmarkdown/Practice.html b/vijieaswar/rmarkdown/Practice.html
new file mode 100644
index 0000000..598a164
--- /dev/null
+++ b/vijieaswar/rmarkdown/Practice.html
@@ -0,0 +1,274 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Practice
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
#in the YAML, add the following
+#bibliography: give the location of the bib file
+#for a particular style
+#csl: give the location of the bib file citation style language. can be found in github
+
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
+
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
+
+
Intro
+
+
Citing a study
+
#how to enter here from papers. need to export the bib key
+#@some bibkey
+#[] will make the round bracketts around the citation.
+
#* makes the text in italics
+#** on either side will bold words
+#``` is a code block
+
+#` having things in line
+# > indicates that it is goin to be quote
+# @ with bib key will extract refs
+
+#YAML header is for R to know what to do. YAML has o be closed in ---
+
+#pander, kable creates tables
+#ggplot qplot creates figure
+
+#conrol+shift+K will knit html
+
+
+
+
+
speed
+
dist
+
+
+
+
+
+
Min. : 4.0
+
Min. : 2.00
+
+
+
+
1st Qu.:12.0
+
1st Qu.: 26.00
+
+
+
+
Median :15.0
+
Median : 36.00
+
+
+
+
Mean :15.4
+
Mean : 42.98
+
+
+
+
3rd Qu.:19.0
+
3rd Qu.: 56.00
+
+
+
+
Max. :25.0
+
Max. :120.00
+
+
+
+
+
Table 1: This is the caption for table 1
+
+
+
+
+
+
+
+
+
+
Â
+
Estimate
+
Std. Error
+
t value
+
Pr(>|t|)
+
+
+
+
+
dist
+
0.1656
+
0.01749
+
9.464
+
1.49e-12
+
+
+
(Intercept)
+
8.284
+
0.8744
+
9.474
+
1.441e-12
+
+
+
+
REference to Table 1.
+
You can also embed plots, for example:
+
+
Figure 1: This is a caption for the figure
+
+
+
+
+
Tables
+
##
+## Attaching package: 'dplyr'
+##
+## The following object is masked from 'package:stats':
+##
+## filter
+##
+## The following objects are masked from 'package:base':
+##
+## intersect, setdiff, setequal, union
+
+
+
+
+
+
+
+
Measure
+
means
+
+
+
+
+
speed
+
15.4
+
+
+
dist
+
42.98
+
+
+
+
+
+
Figures
+
+
Scatterplot
+
+
+
+
This is a list
+
+
There is 2 rows in cars
+
There are 2 variables in cars
+
+
you would add ` ` around the cars to differentiate it from regular text
+
This is a quote:
+
+
to be or not to be
+
+
+
+
+
References
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/vijieaswar/wrangling/ChallengesVE.R b/vijieaswar/wrangling/ChallengesVE.R
new file mode 100644
index 0000000..a1aa7a0
--- /dev/null
+++ b/vijieaswar/wrangling/ChallengesVE.R
@@ -0,0 +1,3 @@
+
+#Date: 9th Nov 2015
+
diff --git a/vijieaswar/wrangling/Practice.R b/vijieaswar/wrangling/Practice.R
new file mode 100644
index 0000000..3295822
--- /dev/null
+++ b/vijieaswar/wrangling/Practice.R
@@ -0,0 +1,153 @@
+
+install.packages('dplyr')
+install.packages('tidyr')
+
+library(dplyr)
+library(tidyr)
+
+head(swiss)
+tail(swiss)
+
+summary(swiss)
+str(swiss)
+names(swiss)
+
+#command+shift+m gives %>%. pipe build on a previous function
+head(swiss)
+swiss %>% head
+swiss %>% head() # it will use the first avaiable place (dont want this for regression)
+swiss %>% head(.) # the . specifies where the data has to go to
+
+swiss %>% lm(Education ~ Infant.Mortality, data=.) %>%
+ summary %>%
+ coef %>%
+
+ # this avoids the need for putting things in bracketts
+ # this will place the swiss data int he right palce
+
+
+#using dplyr
+
+
+ds <- swiss %>%
+ add_rownames() %>%
+ tbl_df()
+
+#rowname was only an attribute
+#addg table dataframe property to it- the printing is prettier
+ds
+swiss
+
+
+ds %>%
+ select(Education, Catholic) %>%
+
+ #if you end up naeanting to select by the same name or partial name
+
+ds %>%
+ select(contains('Edu'),
+ matches('Cath'),
+ starts_with('F')
+ )
+
+#matches('^F'),
+#matches('n$'),
+#matches('.'),
+#matches('*'),
+#matches('C.*l')
+
+#in reg exp, . means one (anything) but * means everything. . means a wildcard for one character. but * wild card for any lenght. . is one character
+#contains and starts with can be a subset of matches. Matches is very powerful. matches uses regular expression.
+# learn regular expression
+ #regular expression: ^ means starts with, $ means ends with so you can substitute the #starts with. reg exp is usefulwhen you want to do find and replace.
+
+#filter
+dim(ds)
+ ds %>%
+ filter(Catholic < 50 | Fertility > 40) %>%
+ str()
+
+ dplyr::add_rownames()
+ #THis is way of calling a library.
+
+ ds %>%
+ filter(Education == 10 &
+ Infant.Mortality >5)
+
+ ds %>%
+ filter(rowname != 'Rolle') %>%
+ str()
+
+ #mutate
+ ds %>%
+ mutate(testing = 'yes',
+ Educated =ifelse(
+ Education >20, 'Yes', 'No'
+ )) %>%
+ select(testing, Educated)
+
+ #it is better to do this instead of assigning into objects each time so that youdont overpopulate and it is
+ #easy wen you want to change something. and then you assign the final one if you needed
+
+
+ #tidyr
+ ds %>%
+ select(-rowname) %>%
+ gather(Variable, Value) %>% #variable is the factor level/variable and the value is the actual value. you coudl kee this line the same- call it the same
+ group_by(Variable) %>%
+ summarise(mean = mean(Value),
+ sd = sd(Value),
+ median = median(Value))
+
+ #if you want only a few variables, you filter first
+ #the group_by adds an attribute, so anything that occurs after group_by will be done by the the grouping
+
+ ds %>%
+ select(-rowname) %>%
+ mutate(Fertile = ifelse(Fertility > 70, 'Yes', 'No')) %>%
+ gather(Variable, Value, -Fertile) %>% #variable is the factor level/variable and the value is the actual value. you coudl kee this line the same- call it the same
+ group_by(Fertile, Variable) %>%
+ summarise(meanSD = paste0(mean(Value) %>% round(2),
+ ' (',
+ sd(Value) %>% round(2),
+ ')')) %>%
+ spread(Fertile, meanSD)
+
+ #paste retruns a character
+ #spread changes the output table to a wide format. otherwise Fertile will be in long format
+
+ #arrange
+ #sorting by
+ ds %>%
+ arrange(Education, Agriculture) %>%
+ select(Education, Agriculture)
+
+
+ ds %>%
+ arrange(Education, desc(Agriculture)) %>%
+ select(Education, Agriculture)
+
+
+ ds %>%
+ select(County = rowname) %>%
+ mutate(County =gsub('e$', "", County) %>%
+ gsub('^C','HAHAHA', .))
+ #gsub means global substite. the . says thats where the output needs to go. so R has to know where to put it first
+ # thats why it is mentioned in the first row. An alternate in as below. You are already telling R which dataset
+ #you are working with.
+
+ ds %>%
+ select(County = rowname) %>%
+ mutate(County = County %>%
+ gsub('e$', "", .) %>%
+ gsub('^C','HAHAHA', .))
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file