diff --git a/DataWork/Code/1-intro-to-R-solutions.R b/DataWork/Code/1-intro-to-R-solutions.R
new file mode 100644
index 0000000..2f7d7b7
--- /dev/null
+++ b/DataWork/Code/1-intro-to-R-solutions.R
@@ -0,0 +1,44 @@
+## R for Stata Users
+## March 2023
+## Exercise solutions
+## Session: Introduction to R
+
+## Exercise 1 ====
+whr <- read.csv("/path/to/data/file")
+# note that this was executed through point-and-click
+# during the actual session
+
+## Exercise 2 ====
+# Subset data
+subset(whr, year == 2016)
+# Check first 6 observations or whr
+head(whr)
+
+## Exercise 3 ====
+# Subset data and store result in a new df
+whr2016 <- subset(whr, year == 2016)
+# Display head of new df
+head(whr2016)
+# Display head of origninal df
+head(whr)
+
+## Exercise 4 ====
+# Create vector of strings
+str_vec <- c("R", "Python", "SAS", "Excel", "Stata")
+# Create string "scalar"
+str_scalar <- "can be an option to"
+# Concatenation
+paste(str_vec[1], str_scalar, str_vec[5])
+
+## Exercise 5 ====
+# Create boolean vector
+inc_below_avg <- whr$economy_gdp_per_capita < mean(whr$economy_gdp_per_capita)
+# See head of vector
+head(inc_below_avg)
+
+## Exercise 6 ====
+# Create new column (vector) of zeros
+whr$rank_low <- 0
+# Subset obs with income below average
+# and replace values of rank_low with 1 for those obs
+whr$rank_low[inc_below_avg] <- 1
\ No newline at end of file
diff --git a/DataWork/Code/2-intro-to-R-programming-solutions.R b/DataWork/Code/2-intro-to-R-programming-solutions.R
new file mode 100644
index 0000000..634cfa9
--- /dev/null
+++ b/DataWork/Code/2-intro-to-R-programming-solutions.R
@@ -0,0 +1,53 @@
+## R for Stata Users
+## March 2023
+## Exercise solutions
+## Session: Introduction to R programming
+
+## Exercise 1 ====
+# (no coding needed for exercise)
+
+## Exercise 2 ====
+# (no coding needed for exercise)
+
+## Exercise 3 ====
+library(here)
+whr <- read.csv(here("DataWork", "DataSets", "Final", "whr_panel.csv"))
+# note that this will only work if exercise 2
+# was executed correctly
+
+## Exercise 4 ====
+#install.packages("dplyr") # uncomment installation if needed
+#install.packages("purrr") # uncomment installation if needed
+library(dply)
+library(purrr)
+
+## Exercise 5 ====
+# Create dataframe
+df <- data.frame(replicate(50000, sample(1:100, 400, replace=TRUE)))
+# Create empty vector
+col_means_loop <- c()
+# Loop and append means to vector (will take a few seconds)
+for (column in df){
+ col_means_loop <- append(col_means_loop, mean(column))
+}
+
+## Exercise 6 ====
+col_means_map <- map(df, mean)
+# this will only work if you defined df in exercise 5
+
+## Exercise 7 ====
+zscore <- function(x) {
+ mean <- mean(x, na.rm = TRUE)
+ sd <- sd(x, na.rm = TRUE)
+ z <- (x - mean)/sd
+ return(z)
+}
+
+## Exercise 8 ====
+z_scores <- whr %>%
+ select(health_life_expectancy, freedom) %>%
+ map(zscore)
+whr$hle_st <- z_scores[[1]]
+whr$freedom_st <- z_scores[[2]]
+# this will only run if you created the function
+# zscores() in exercise 7
diff --git a/DataWork/Code/main.R b/DataWork/Code/main.R
new file mode 100644
index 0000000..5efcd70
--- /dev/null
+++ b/DataWork/Code/main.R
@@ -0,0 +1,90 @@
+# ------------------------------------------------------------------------------ #
+# #
+# DIME #
+# Introduction to R for Stata users #
+# MAIN SCRIPT #
+# #
+# ------------------------------------------------------------------------------ #
+
+# PURPOSE: Set-up configurations and run scripts
+
+# NOTES: Version 2
+
+# WRITTEN BY: Luiza Cardoso de Andrade, Leonardo Viotti
+
+# Last modified in Mar 2023
+
+# PART 1: Select sections to run ----------------------------------------------
+
+Lab2 <- 0
+Lab3 <- 0
+Lab4 <- 0
+Lab5 <- 0
+Lab6 <- 0
+
+# PART 2: Load packages -----------------------------------------------------
+
+packages <- c("readstata13","foreign",
+ "doBy", "broom", "dplyr",
+ "stargazer",
+ "ggplot2", "plotly", "ggrepel",
+ "RColorBrewer", "wesanderson",
+ "sp", "rgdal", "rgeos", "raster", "velox",
+ "ggmap", "rasterVis", "leaflet",
+ "htmlwidgets", "geosphere")
+
+# If you selected the option to install packages, install them
+sapply(packages, function(x) {
+ if (!(x %in% installed.packages())) {
+ install.packages(x, dependencies = TRUE)
+ }
+}
+)
+
+# Load all packages -- this is equivalent to using library(package) for each
+# package listed before
+invisible(sapply(packages, library, character.only = TRUE))
+
+# PART 3: Set folder folder paths --------------------------------------------
+
+#-------------#
+# Root folder #
+#-------------#
+
+# Add your username and folder path here (for Windows computers)
+# To find out what your username is, type Sys.getenv("USERNAME")
+if (Sys.getenv("USERNAME") == "luiza") {
+
+ projectFolder <- "C:/Users/luiza/Documents/GitHub/dime-r-training"
+
+}
+
+# If you're using Mac, just add your folder path, without the if statement
+
+#--------------------#
+# Project subfolders #
+#--------------------#
+
+rawData <- file.path(projectFolder, "Data", "Raw")
+finalData <- file.path(projectFolder, "Data", "Final")
+Code <- file.path(projectFolder ,"Codes")
+Output <- file.path(projectFolder, "Output")
+
+
+# PART 4: Run selected sections -----------------------------------------------
+
+if (Lab2 == 1) {
+ source(file.path(Code, "Lab 2 - Coding for Reproducible Research"))
+}
+if (Lab3 == 1) {
+ source(file.path(Code, "Lab 3 - Data Processing"))
+}
+if (Lab4 == 1) {
+ source(file.path(Code, "Lab 4 - Descriptive Analysis"))
+}
+if (Lab5 == 1) {
+ source(file.path(Code, "Lab 5 - Data Visualization"))
+}
+if (Lab6 == 1) {
+ source(file.path(Code, "Lab 6 - Spatial Data"))
+}
\ No newline at end of file
diff --git a/Presentations/01-intro-to-R.Rmd b/Presentations/01-intro-to-R.Rmd
index 5824351..984fc3e 100644
--- a/Presentations/01-intro-to-R.Rmd
+++ b/Presentations/01-intro-to-R.Rmd
@@ -1,8 +1,8 @@
---
title: "Session 1 - Introduction to R"
subtitle: "R for Stata Users"
-author: "Luiza Andrade, Rob Marty, Rony Rodriguez-Ramirez, Luis Eduardo San Martin, Leonardo Viotti"
-date: "The World Bank | [WB Github](https://github.com/worldbank) May 2022"
+author: "Luiza Andrade, Marc-Andrea Fiorina, Rob Marty, Rony Rodriguez-Ramirez, Luis Eduardo San Martin, Leonardo Viotti"
+date: "The World Bank | [WB Github](https://github.com/worldbank) March 2023"
output:
xaringan::moon_reader:
css: ["libs/remark-css/default.css", "libs/remark-css/metropolis.css", "libs/remark-css/metropolis-fonts.css"]
@@ -65,7 +65,7 @@ xaringanExtra::use_logo(
```
```{r echo = FALSE, include = FALSE, eval = TRUE}
-whr <- read_csv(here("DataWork", "DataSets", "Final", "whr_panel.csv"))
+whr <- read.csv(here("DataWork", "DataSets", "Final", "whr_panel.csv"))
```
# Table of contents
@@ -99,9 +99,9 @@ We're glad you're joining us today!
## Format
-- Every session has two TAs. For this session, our TAs are __Luiza Cardoso De Andrade__ and __Rony Rodriguez-Ramirez__
+- Every session has two TAs. For this session, our TAs are __Luiza Cardoso De Andrade__ and __Marc-Andrea Fiorina__
-- The TAs will help you troubleshooting __particular issues__ which make you unable to follow along the presentation. Send them a message over the chat whenever you need help
+- The TAs will help you troubleshooting __particular issues__ which make you unable to follow along the presentation. Send a message over the chat whenever you need help
---
@@ -118,7 +118,7 @@ We're glad you're joining us today!
- The materials of each session will be shared in the OSF page of the course by the end of each session: https://osf.io/86g3b/
-- The recordings will be shared in a WB internal link
+- The recordings will be shared each day after the session
---
@@ -165,7 +165,7 @@ knitr::include_graphics("img/Interface.png")
# Getting started - RStudio interface
```{r echo = FALSE, out.width = "70%"}
-knitr::include_graphics("img/scritpt1.png")
+knitr::include_graphics("img/script1.png")
```
---
@@ -173,14 +173,14 @@ knitr::include_graphics("img/scritpt1.png")
# Getting started - RStudio interface
```{r echo = FALSE, out.width = "70%"}
-knitr::include_graphics("img/scritpt2.png")
+knitr::include_graphics("img/script2.png")
```
---
# Getting started - RStudio interface
```{r echo = FALSE, out.width = "70%"}
-knitr::include_graphics("img/scritpt3.png")
+knitr::include_graphics("img/script3.png")
```
---
@@ -188,24 +188,25 @@ knitr::include_graphics("img/scritpt3.png")
# Getting started - RStudio interface
```{r echo = FALSE, out.width = "70%"}
-knitr::include_graphics("img/scritpt4.png")
+knitr::include_graphics("img/script4.png")
```
---
# Getting started - Importing data
-Let's start by loading the data set we'll be using:
+Let's start by loading the data we'll be using:
## Exercise 1: Import data manually (`r fa("clock")` 3 min)
-1. Go to the OSF page of the course (https://osf.io/86g3b/) and download the file located in `R for Stata Users - 2022 May` > `Data` > `whr_panel.csv`
+1. Go to the OSF page of the course (https://osf.io/86g3b/) and download the file located in `R for Stata Users - 2023 March` > `Data` > `whr_panel.csv`
2. In RStudio, go to `File` > `Import Dataset` > `From Text (base)` and open the `whr_panel.csv` file.
+ Depending on your Rstudio version, it might be `File` > `Import Dataset` > `From CSV`
-3. Assign the name `whr` to the dataset on the import window.
+3. Assign the name `whr` to the data on the import window.
+ + If you solved the exercise correctly, you'll see that RStudio opens a tab with a viewer of the dataframe
---
@@ -258,11 +259,11 @@ name: data-in-r
## In R:
-__R__ works in a completely different way:
+Datasets are called __dataframes__. R works with them in a different way:
-* You can load __as many datasets as you wish__ or your computer's memory allows
+* You can load __as many dataframes as you wish__ or your computer's memory allows
-* Operations will have lasting effects __only if you store them__
+* Operations will have lasting effects __only if you store their results__
---
@@ -270,7 +271,7 @@ __R__ works in a completely different way:
## In R:
-* Everything that exists in R's memory -- variables, datasets, functions -- __is an object__
+* Everything that exists in R's memory -variables, dataframes, functions- __is an object__
* You could think of an object like a chunk of data with some properties that has a name by which you call it
@@ -294,7 +295,7 @@ View(whr) # <--- Note that the first letter is uppercase
```
-```{r echo = FALSE, out.width = "45%"}
+```{r echo = FALSE, out.width = "50%"}
knitr::include_graphics("img/View.png")
```
@@ -305,7 +306,7 @@ knitr::include_graphics("img/View.png")
Alternatively we can print the first 6 obs. with `head()`:
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -316,7 +317,7 @@ Now, let's try some simple manipulations. First, assume we're only interested in
## Exercise 2: Subset the data (`r fa("clock")` 1 min)
-- Subset the data set, keeping only observations where variable `year` equals `2016`.
+- Subset the dataframe, keeping only observations where variable `year` equals `2016`.
```{r, eval=FALSE}
# To do that we'll use the subset() function
@@ -341,7 +342,7 @@ head(whr)
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -369,7 +370,7 @@ x <- 42
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -388,7 +389,7 @@ knitr::include_graphics("img/x_42.png")
## Exercise 3: Create an object (`r fa("clock")` 1 min)
-Create a new dataset, called `whr2016`, that is a subset of the `whr` data set containing only data from the year 2016.
+Create a new dataframe, called `whr2016`, that is a subset of the `whr` dataframe containing only data from the year 2016.
```{r, include = T, results = "hide"}
# Using the same function but now assigning it to an object
@@ -397,30 +398,16 @@ whr2016 <- subset(whr, year == 2016)
# Display the 6 first obs. of the new data
head(whr2016)
-# Notice that we still have the original data set intact
-head(whr)
-
-```
-
----
-
-# Data in R
-
-```{r, include = T, results = "hide"}
-whr2016 <- subset(whr, year == 2016)
-head(whr2016)
+# Notice that we still have the original dataframe intact
head(whr)
-```
-```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
```
---
# Data in R
-You can also see that your environment panel now has two `Data` objects:
+You can also see that your environment panel now has two data objects:
```{r echo = FALSE, out.width = "60%"}
knitr::include_graphics("img/environment_2vars_2021.png")
@@ -511,7 +498,7 @@ knitr::include_graphics("img/subset_arguments.png")
* Usually the first argument is the object you want to use the function on, e.g. `subset(whr, ...)`
-* Functions usually return values that you can store in an object, print or use directly as an argument of another function.
+* Functions usually return values that you can store in an object, print or use directly as an argument of another function. __They rarely modify an object in-place__
We will explore these ideas in depth in a later session.
@@ -542,7 +529,7 @@ This will give you the foundation to explore your data and construct analytical
* An object is like a global or local in Stata, it's __something you can refer to later__ in your code to get a value
-* But while you can only put a number or a string in a global, __you can put anything into an object__: scalars, strings, datasets, vectors, plots, functions, etc.
+* But while you can only put a number or a string in a global, __you can put anything into an object__: scalars, strings, dataframes, vectors, plots, functions
* Objects also have attributes that can be used to manipulate them
@@ -556,7 +543,7 @@ Here are the object classes we will cover in this first session:
* __Vectors:__ an uni-dimensional object that __stores a sequence of values of the same class__
-* __Data frames:__ a combination of different vectors of the same length (the same as your dataset in Stata)
+* __Dataframes:__ a combination of different vectors of the same length (the same as a dataset in Stata)
* __Lists:__ a multidimensional object that can store several objects __of different classes and dimensions__
@@ -588,7 +575,7 @@ v2 <- 1:5 # Alternative way to create an evenly spaced vector
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -602,54 +589,18 @@ v2[1:3] # Prints from the 1st to the 3rd element
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
-# R objects - Vectors
-
-## Vectors
+# R objects - Dataframes
-To R, each of the columns of the object `whr` is a vector.
+## Dataframes
-### Calling a vector from a `data.frame` column:
-
-We use the `$` character (operator) to extract vectors (variables) by their names in a `data.frame`
-
-For example:
+The `whr` and `whr2016` objects are both dataframes. You can also construct a new dataframe from scratch by __combining vectors with the same number of elements__ with the command `data.frame()`.
-```{r}
-# Create a vector with the values of the "year" variable
-year_vector <- whr$year
-
-# See the 3 first elements of the year column
-whr$year[1:3]
-
-```
-
----
-
-# R objects - Vectors
-
-```{r, eval=F}
-year_vector <- whr$year # creates a vector with the values of the "year" variable
-whr$year[1:3] # see the 3 first elements of the year column
-```
-
-```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
-```
-
----
-
-# R objects - Data frames
-
-## Data frames
-
-The `whr` and `whr2016` objects are both data frames. You can also construct a new data frame from scratch by __combining vectors with the same number of elements__.
-
-#### Now, type the following code to create a new data frame
+#### Now, type the following code to create a new dataframe
```{r}
# Dataframe created by biding vectors
df1 <- data.frame(v1,v2)
@@ -658,7 +609,7 @@ df1
---
-# R objects - Data frames
+# R objects - Dataframes
```{r, eval=F}
df1 <- data.frame(v1,v2) #creates a df by binding to existing vectors
@@ -666,16 +617,16 @@ df1
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
-# R objects - Data frames
+# R objects - Dataframes
-## Data frames
+## Dataframes
-Since a data frame has two dimensions, you can use indices for both. The first index indicates the row selection and the second indicates the column.
+Since a dataframe has two dimensions, you can use indices for both. The first index indicates the row selection and the second indicates the column.
### Numeric indexing
```{r, eval = F}
@@ -692,7 +643,7 @@ whr[45,1]
---
-# R objects - Data frames
+# R objects - Dataframes
```{r, eval = F}
whr[,1] # The first column of whr
@@ -701,14 +652,14 @@ whr[45,1] # Or the 45th element of the first column
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
-# R objects - Data frames
+# R objects - Dataframes
-## Data frames
+## Dataframes
Alternatively, you can use the column names for indexing, which is the same as using the `$` sign.
@@ -716,12 +667,11 @@ Alternatively, you can use the column names for indexing, which is the same as u
```{r}
# The 22th element of the country column
whr[22,"country"] # The same as whr$country[22]
-
```
---
-# R objects - Data frames
+# R objects - Dataframes
```{r, eval=F}
# The 22th element of the country column
@@ -729,7 +679,38 @@ whr[22,"country"] # The same as whr$country[22]
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
+```
+
+---
+
+# R objects - Dataframes
+
+## Vectors in dataframes
+
+To R, each of the columns of the object `whr` is a vector.
+
+### Calling a vector from a dataframe:
+
+We use the `$` character to extract vectors (variables) by their names in a dataframe
+
+For example:
+
+```{r}
+# Create a vector with the values of the "year" variable
+year_vector <- whr$year
+```
+
+---
+
+# R objects - Dataframes
+
+```{r, eval=F}
+year_vector <- whr$year # creates a vector with the values of the "year" variable
+```
+
+```{r echo=FALSE}
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -740,9 +721,9 @@ knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
Lists are more complex objects that can contain many objects of __different classes and dimensions__.
-The outputs of many functions, a regression for example, are similar to lists.
+The outputs of many functions, a regression for example, are similar to lists (more on this in a later session).
-It would be beyond the scope of this introduction to go deep into them, but here's a quick example:
+Here's a quick example:
### Combine several objects of different types in a list
```{r, include = T, results = "hide"}
@@ -761,7 +742,7 @@ print(lst) # checking the content of lst
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -780,13 +761,13 @@ You can subset lists using single brackets (`[]`) or double brackets (`[[]]`)
# R objects - Lists
```{r, eval=F}
+lst <- list(v1, df1, 45)
lst[[3]] # returns 45
lst[3] # returns a list of one element (45)
-lst[c(1,2)] # returns a list with the first two elements of "lst"
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -815,45 +796,18 @@ As in Stata, there are two different ways to store numbers. They are different b
# Basic types of data - Strings
-Now we'll use string data to practice some basic object manipulations in R.
-
-### Exercise 4: Create a vector of strings (`r fa("clock")` 2 min)
-Create a string vector containing the names of commonly used statistical software:
-```{r, include = T, results = "hide"}
-
-# Creating string vector
-str_vec <- c("R", "Python", "SAS", "Excel", "Stata")
-```
-
-Now print them to check them out.
-
----
-
-# Basic types of data - Strings
-
-```{r, eval=F}
-# Creating string vector
-str_vec <- c("R", "Python", "SAS", "Excel", "Stata")
-```
-
-```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
-```
-
----
-
-# Basic types of data - Strings
+### Exercise 4: Concatenate strings (`r fa("clock")` 3 min)
-### Exercise 5: Concatenate strings (`r fa("clock")` 3 min)
+1. Create the following vector of strings: `str_vec <- c("R", "Python", "SAS", "Excel", "Stata")`
- 1. Create a scalar (a vector of one element) containing the phrase "can be an option to" and call it `str_scalar`. Your code will be similar to this: `str_scalar <- "can be an option to"`
+1. Create a scalar (a vector of one element) containing the phrase "can be an option to" and call it `str_scalar`. Your code will be similar to this: `str_scalar <- "can be an option to"`
- 2. Use the function `paste()` with 3 arguments separated by commas:
+1. Use the function `paste()` with 3 arguments separated by commas:
+ The first argument as the 1st element of `str_vec`.
+ The second argument as the `str_scalar`.
+ The third argument as the 5th element of `str_vec`.
- 3. If you're not sure where to start, type:
+1. If you're not sure where to start, type:
```{r, eval=FALSE}
help(paste)
```
@@ -863,12 +817,13 @@ help(paste)
# Basic types of data - Strings
```{r, eval=F}
+str_vec <- c("R", "Python", "SAS", "Excel", "Stata")
str_scalar <- "can be an option to" # creating str_scalar
paste(str_vec[1], str_scalar, str_vec[5]) # using paste()
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -898,20 +853,6 @@ Booleans are __logical binary variables__, accepting either `TRUE` or `FALSE` as
# Advanced types of data
-## Factors
-
-We'll learn more about factors in a later session, since they are important for the kind of analysis we usually do. For now, here are two important things to keep in mind when using them.
-
-Unlike Stata, in R:
-
-1. __You use the labels to refer to factors__
-
-2. __You cannot choose the underlying values__
-
----
-
-# Advanced types of data
-
## Booleans
Boolean data is the result of logical conditions. It can take two possible values: `TRUE` or `FALSE`.
@@ -941,7 +882,7 @@ boolean_false <- FALSE
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -950,7 +891,7 @@ knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
## Booleans
-### Exercise 6 (`r fa("clock")` 3 min)
+### Exercise 5 (`r fa("clock")` 3 min)
Create a boolean vector with the condition of annual income below average:
@@ -972,7 +913,7 @@ head(inc_below_avg) # See the 6 first elements of the vector
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -1008,7 +949,7 @@ boolean2 <- c(FALSE, TRUE, TRUE, TRUE, TRUE) # And this to select every elemen
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -1017,9 +958,9 @@ knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
## Booleans
-Now let's use the boolean vector `inc_below_avg` to add a dummy variable in the `whr` data set for the same condition.
+Now let's use the boolean vector `inc_below_avg` to add a dummy variable in the `whr` dataframe for the same condition.
-### Exercise 7 (`r fa("clock")` 3 min)
+### Exercise 6 (`r fa("clock")` 3 min)
* Create a column in `whr` containing zeros and call it `rank_low`. You can do this by typing:
@@ -1046,7 +987,7 @@ whr$rank_low[inc_below_avg] <- 1
```
```{r echo=FALSE}
-knitr::include_app("https://rrmaximiliano.shinyapps.io/learnr-app/")
+knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
```
---
@@ -1138,26 +1079,26 @@ Some possible disadvantages of Stata:
Here are some advantages of R:
-* R is a free and open source software!
+* R is a free and open source software, a huge advantage for open science
-* It allows you to have several datasets open simultaneously.
+* It allows you to have several dataframes open simultaneously
+ No need to use `keep`, `preserve`, `restore`
-* It can run complex Geographic Information System (GIS) analyses.
+* It can run complex Geographic Information System (GIS) analyses
-* You can use it for web scrapping.
+* You can use it for web scrapping and APIs
-* You can run machine learning algorithms with it.
+* You can easily run machine learning algorithms with it
-* You can create complex Markdown documents. This presentation, for example, is entirely done in R.
+* You can create complex Markdown documents. This presentation, for example, is entirely done in R
-* You can create interactive dashboards and online applications with the Shiny package.
+* You can create interactive dashboards and online applications with the Shiny package
---
# Appendix - Syntax
-R's syntax is a bit heavier than Stata's:
+R's syntax is heavier than Stata's:
* Parentheses to separate function names from its arguments.
* Commas to separate arguments.
@@ -1322,7 +1263,7 @@ round(num)
Help in R works very much like in Stata: the help files usually start with a brief description of the function, explain its syntax and arguments and list a few examples. There are two ways to access help files:
-## Exercise 8: Use help
+## Exercise 7: Use help
```{r, eval=FALSE}
# You can use the help() function
diff --git a/Presentations/01-intro-to-R.html b/Presentations/01-intro-to-R.html
index 5de57a4..a2b927d 100644
--- a/Presentations/01-intro-to-R.html
+++ b/Presentations/01-intro-to-R.html
@@ -3,7 +3,7 @@
Session 1 - Introduction to R
-
+
@@ -31,10 +31,10 @@
## R for Stata Users
]
.author[
-### Luiza Andrade, Rob Marty, Rony Rodriguez-Ramirez, Luis Eduardo San Martin, Leonardo Viotti
+### Luiza Andrade, Marc-Andrea Fiorina, Rob Marty, Rony Rodriguez-Ramirez, Luis Eduardo San Martin, Leonardo Viotti
]
.date[
-### The World Bank | WB Github May 2022
+### The World Bank | WB Github March 2023
]
---
@@ -84,9 +84,9 @@
## Format
-- Every session has two TAs. For this session, our TAs are __Luiza Cardoso De Andrade__ and __Rony Rodriguez-Ramirez__
+- Every session has two TAs. For this session, our TAs are __Luiza Cardoso De Andrade__ and __Marc-Andrea Fiorina__
-- The TAs will help you troubleshooting __particular issues__ which make you unable to follow along the presentation. Send them a message over the chat whenever you need help
+- The TAs will help you troubleshooting __particular issues__ which make you unable to follow along the presentation. Send a message over the chat whenever you need help
---
@@ -103,7 +103,7 @@
- The materials of each session will be shared in the OSF page of the course by the end of each session: https://osf.io/86g3b/
-- The recordings will be shared in a WB internal link
+- The recordings will be shared each day after the session
---
@@ -147,40 +147,41 @@
# Getting started - RStudio interface
-<img src="img/scritpt1.png" width="70%" style="display: block; margin: auto;" />
+<img src="img/script1.png" width="70%" style="display: block; margin: auto;" />
---
# Getting started - RStudio interface
-<img src="img/scritpt2.png" width="70%" style="display: block; margin: auto;" />
+<img src="img/script2.png" width="70%" style="display: block; margin: auto;" />
---
# Getting started - RStudio interface
-<img src="img/scritpt3.png" width="70%" style="display: block; margin: auto;" />
+<img src="img/script3.png" width="70%" style="display: block; margin: auto;" />
---
# Getting started - RStudio interface
-<img src="img/scritpt4.png" width="70%" style="display: block; margin: auto;" />
+<img src="img/script4.png" width="70%" style="display: block; margin: auto;" />
---
# Getting started - Importing data
-Let's start by loading the data set we'll be using:
+Let's start by loading the data we'll be using:
-## Exercise 1: Import data manually <font size="5">( 3 min)</font>
+## Exercise 1: Import data manually <font size="5">( 3 min)</font>
-1. Go to the OSF page of the course (https://osf.io/86g3b/) and download the file located in `R for Stata Users - 2022 May` > `Data` > `whr_panel.csv`
+1. Go to the OSF page of the course (https://osf.io/86g3b/) and download the file located in `R for Stata Users - 2023 March` > `Data` > `whr_panel.csv`
2. In RStudio, go to `File` > `Import Dataset` > `From Text (base)` and open the `whr_panel.csv` file.
+ Depending on your Rstudio version, it might be `File` > `Import Dataset` > `From CSV`
-3. Assign the name `whr` to the dataset on the import window.
+3. Assign the name `whr` to the data on the import window.
+ + If you solved the exercise correctly, you'll see that RStudio opens a tab with a viewer of the dataframe
---
@@ -227,11 +228,11 @@
## In R:
-__R__ works in a completely different way:
+Datasets are called __dataframes__. R works with them in a different way:
-* You can load __as many datasets as you wish__ or your computer's memory allows
+* You can load __as many dataframes as you wish__ or your computer's memory allows
-* Operations will have lasting effects __only if you store them__
+* Operations will have lasting effects __only if you store their results__
---
@@ -239,7 +240,7 @@
## In R:
-* Everything that exists in R's memory -- variables, datasets, functions -- __is an object__
+* Everything that exists in R's memory -variables, dataframes, functions- __is an object__
* You could think of an object like a chunk of data with some properties that has a name by which you call it
@@ -262,7 +263,7 @@
View(whr) # <--- Note that the first letter is uppercase
```
-<img src="img/View.png" width="45%" style="display: block; margin: auto;" />
+<img src="img/View.png" width="50%" style="display: block; margin: auto;" />
---
@@ -270,7 +271,7 @@
Alternatively we can print the first 6 obs. with `head()`:
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
@@ -278,9 +279,9 @@
Now, let's try some simple manipulations. First, assume we're only interested in data of the year 2016.
-## Exercise 2: Subset the data <font size="5">( 1 min)</font>
+## Exercise 2: Subset the data <font size="5">( 1 min)</font>
-- Subset the data set, keeping only observations where variable `year` equals `2016`.
+- Subset the dataframe, keeping only observations where variable `year` equals `2016`.
```r
@@ -307,7 +308,7 @@
head(whr)
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
@@ -335,7 +336,7 @@
x <- 42
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
@@ -349,9 +350,9 @@
# Data in R
-## Exercise 3: Create an object <font size="5">( 1 min)</font>
+## Exercise 3: Create an object <font size="5">( 1 min)</font>
-Create a new dataset, called `whr2016`, that is a subset of the `whr` data set containing only data from the year 2016.
+Create a new dataframe, called `whr2016`, that is a subset of the `whr` dataframe containing only data from the year 2016.
```r
@@ -361,7 +362,7 @@
# Display the 6 first obs. of the new data
head(whr2016)
-# Notice that we still have the original data set intact
+# Notice that we still have the original dataframe intact
head(whr)
```
@@ -369,20 +370,7 @@
# Data in R
-
-```r
-whr2016 <- subset(whr, year == 2016)
-head(whr2016)
-head(whr)
-```
-
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
-
----
-
-# Data in R
-
-You can also see that your environment panel now has two `Data` objects:
+You can also see that your environment panel now has two data objects:
<img src="img/environment_2vars_2021.png" width="60%" style="display: block; margin: auto;" />
---
@@ -468,7 +456,7 @@
* Usually the first argument is the object you want to use the function on, e.g. `subset(whr, ...)`
-* Functions usually return values that you can store in an object, print or use directly as an argument of another function.
+* Functions usually return values that you can store in an object, print or use directly as an argument of another function. __They rarely modify an object in-place__
We will explore these ideas in depth in a later session.
@@ -499,7 +487,7 @@
* An object is like a global or local in Stata, it's __something you can refer to later__ in your code to get a value
-* But while you can only put a number or a string in a global, __you can put anything into an object__: scalars, strings, datasets, vectors, plots, functions, etc.
+* But while you can only put a number or a string in a global, __you can put anything into an object__: scalars, strings, dataframes, vectors, plots, functions
* Objects also have attributes that can be used to manipulate them
@@ -513,7 +501,7 @@
* __Vectors:__ an uni-dimensional object that __stores a sequence of values of the same class__
-* __Data frames:__ a combination of different vectors of the same length (the same as your dataset in Stata)
+* __Dataframes:__ a combination of different vectors of the same length (the same as a dataset in Stata)
* __Lists:__ a multidimensional object that can store several objects __of different classes and dimensions__
@@ -545,7 +533,7 @@
v2 <- 1:5 # Alternative way to create an evenly spaced vector
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
@@ -558,56 +546,17 @@
v2[1:3] # Prints from the 1st to the 3rd element
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
-# R objects - Vectors
-
-## Vectors
+# R objects - Dataframes
-To R, each of the columns of the object `whr` is a vector.
+## Dataframes
-### Calling a vector from a `data.frame` column:
-
-We use the `$` character (operator) to extract vectors (variables) by their names in a `data.frame`
-
-For example:
+The `whr` and `whr2016` objects are both dataframes. You can also construct a new dataframe from scratch by __combining vectors with the same number of elements__ with the command `data.frame()`.
-
-```r
-# Create a vector with the values of the "year" variable
-year_vector <- whr$year
-
-# See the 3 first elements of the year column
-whr$year[1:3]
-```
-
-```
-## [1] 2015 2015 2015
-```
-
----
-
-# R objects - Vectors
-
-
-```r
-year_vector <- whr$year # creates a vector with the values of the "year" variable
-whr$year[1:3] # see the 3 first elements of the year column
-```
-
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
-
----
-
-# R objects - Data frames
-
-## Data frames
-
-The `whr` and `whr2016` objects are both data frames. You can also construct a new data frame from scratch by __combining vectors with the same number of elements__.
-
-#### Now, type the following code to create a new data frame
+#### Now, type the following code to create a new dataframe
```r
# Dataframe created by biding vectors
@@ -626,7 +575,7 @@
---
-# R objects - Data frames
+# R objects - Dataframes
```r
@@ -634,15 +583,15 @@
df1
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
-# R objects - Data frames
+# R objects - Dataframes
-## Data frames
+## Dataframes
-Since a data frame has two dimensions, you can use indices for both. The first index indicates the row selection and the second indicates the column.
+Since a dataframe has two dimensions, you can use indices for both. The first index indicates the row selection and the second indicates the column.
### Numeric indexing
@@ -659,7 +608,7 @@
---
-# R objects - Data frames
+# R objects - Dataframes
```r
@@ -668,13 +617,13 @@
whr[45,1] # Or the 45th element of the first column
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
-# R objects - Data frames
+# R objects - Dataframes
-## Data frames
+## Dataframes
Alternatively, you can use the column names for indexing, which is the same as using the `$` sign.
@@ -686,15 +635,12 @@
```
```
-## # A tibble: 1 × 1
-## country
-## <chr>
-## 1 Oman
+## [1] "Oman"
```
---
-# R objects - Data frames
+# R objects - Dataframes
```r
@@ -702,7 +648,38 @@
whr[22,"country"] # The same as whr$country[22]
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+
+---
+
+# R objects - Dataframes
+
+## Vectors in dataframes
+
+To R, each of the columns of the object `whr` is a vector.
+
+### Calling a vector from a dataframe:
+
+We use the `$` character to extract vectors (variables) by their names in a dataframe
+
+For example:
+
+
+```r
+# Create a vector with the values of the "year" variable
+year_vector <- whr$year
+```
+
+---
+
+# R objects - Dataframes
+
+
+```r
+year_vector <- whr$year # creates a vector with the values of the "year" variable
+```
+
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
@@ -712,9 +689,9 @@
Lists are more complex objects that can contain many objects of __different classes and dimensions__.
-The outputs of many functions, a regression for example, are similar to lists.
+The outputs of many functions, a regression for example, are similar to lists (more on this in a later session).
-It would be beyond the scope of this introduction to go deep into them, but here's a quick example:
+Here's a quick example:
### Combine several objects of different types in a list
@@ -734,7 +711,7 @@
print(lst) # checking the content of lst
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
@@ -753,12 +730,12 @@
```r
+lst <- list(v1, df1, 45)
lst[[3]] # returns 45
lst[3] # returns a list of one element (45)
-lst[c(1,2)] # returns a list with the first two elements of "lst"
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
class: inverse, center, middle
@@ -786,44 +763,18 @@
# Basic types of data - Strings
-Now we'll use string data to practice some basic object manipulations in R.
-
-### Exercise 4: Create a vector of strings <font size="5">( 2 min)</font>
-Create a string vector containing the names of commonly used statistical software:
-
-```r
-# Creating string vector
-str_vec <- c("R", "Python", "SAS", "Excel", "Stata")
-```
-
-Now print them to check them out.
-
----
-
-# Basic types of data - Strings
-
+### Exercise 4: Concatenate strings <font size="5">( 3 min)</font>
-```r
-# Creating string vector
-str_vec <- c("R", "Python", "SAS", "Excel", "Stata")
-```
-
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
-
----
-
-# Basic types of data - Strings
+1. Create the following vector of strings: `str_vec <- c("R", "Python", "SAS", "Excel", "Stata")`
-### Exercise 5: Concatenate strings <font size="5">( 3 min)</font>
+1. Create a scalar (a vector of one element) containing the phrase "can be an option to" and call it `str_scalar`. Your code will be similar to this: `str_scalar <- "can be an option to"`
- 1. Create a scalar (a vector of one element) containing the phrase "can be an option to" and call it `str_scalar`. Your code will be similar to this: `str_scalar <- "can be an option to"`
-
- 2. Use the function `paste()` with 3 arguments separated by commas:
+1. Use the function `paste()` with 3 arguments separated by commas:
+ The first argument as the 1st element of `str_vec`.
+ The second argument as the `str_scalar`.
+ The third argument as the 5th element of `str_vec`.
- 3. If you're not sure where to start, type:
+1. If you're not sure where to start, type:
```r
help(paste)
@@ -835,11 +786,12 @@
```r
+str_vec <- c("R", "Python", "SAS", "Excel", "Stata")
str_scalar <- "can be an option to" # creating str_scalar
paste(str_vec[1], str_scalar, str_vec[5]) # using paste()
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
@@ -868,20 +820,6 @@
# Advanced types of data
-## Factors
-
-We'll learn more about factors in a later session, since they are important for the kind of analysis we usually do. For now, here are two important things to keep in mind when using them.
-
-Unlike Stata, in R:
-
-1. __You use the labels to refer to factors__
-
-2. __You cannot choose the underlying values__
-
----
-
-# Advanced types of data
-
## Booleans
Boolean data is the result of logical conditions. It can take two possible values: `TRUE` or `FALSE`.
@@ -924,7 +862,7 @@
boolean_false <- FALSE
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
@@ -932,7 +870,7 @@
## Booleans
-### Exercise 6 <font size="5">( 3 min)</font>
+### Exercise 5 <font size="5">( 3 min)</font>
Create a boolean vector with the condition of annual income below average:
@@ -959,7 +897,7 @@
head(inc_below_avg) # See the 6 first elements of the vector
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
@@ -1011,7 +949,7 @@
boolean2 <- c(FALSE, TRUE, TRUE, TRUE, TRUE) # And this to select every element but the first
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
@@ -1019,9 +957,9 @@
## Booleans
-Now let's use the boolean vector `inc_below_avg` to add a dummy variable in the `whr` data set for the same condition.
+Now let's use the boolean vector `inc_below_avg` to add a dummy variable in the `whr` dataframe for the same condition.
-### Exercise 7 <font size="5">( 3 min)</font>
+### Exercise 6 <font size="5">( 3 min)</font>
* Create a column in `whr` containing zeros and call it `rank_low`. You can do this by typing:
@@ -1050,7 +988,7 @@
# this ^ turns its values to 1, for the observations with a TRUE value in inc_below_avg
```
-<iframe src="https://rrmaximiliano.shinyapps.io/learnr-app/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
+<iframe src="https://luizaandrade.shinyapps.io/learnr/?showcase=0" width="100%" height="400px" data-external="1"></iframe>
---
@@ -1141,26 +1079,26 @@
Here are some advantages of R:
-* R is a free and open source software!
+* R is a free and open source software, a huge advantage for open science
-* It allows you to have several datasets open simultaneously.
+* It allows you to have several dataframes open simultaneously
+ No need to use `keep`, `preserve`, `restore`
-* It can run complex Geographic Information System (GIS) analyses.
+* It can run complex Geographic Information System (GIS) analyses
-* You can use it for web scrapping.
+* You can use it for web scrapping and APIs
-* You can run machine learning algorithms with it.
+* You can easily run machine learning algorithms with it
-* You can create complex Markdown documents. This presentation, for example, is entirely done in R.
+* You can create complex Markdown documents. This presentation, for example, is entirely done in R
-* You can create interactive dashboards and online applications with the Shiny package.
+* You can create interactive dashboards and online applications with the Shiny package
---
# Appendix - Syntax
-R's syntax is a bit heavier than Stata's:
+R's syntax is heavier than Stata's:
* Parentheses to separate function names from its arguments.
* Commas to separate arguments.
@@ -1359,7 +1297,7 @@
Help in R works very much like in Stata: the help files usually start with a brief description of the function, explain its syntax and arguments and list a few examples. There are two ways to access help files:
-## Exercise 8: Use help
+## Exercise 7: Use help
```r
diff --git a/Presentations/01-intro-to-R.pdf b/Presentations/01-intro-to-R.pdf
index 083ff98..c12e085 100644
Binary files a/Presentations/01-intro-to-R.pdf and b/Presentations/01-intro-to-R.pdf differ
diff --git a/Presentations/02-intro-to-R-programming.Rmd b/Presentations/02-intro-to-R-programming.Rmd
index 40800a1..b6ad390 100644
--- a/Presentations/02-intro-to-R-programming.Rmd
+++ b/Presentations/02-intro-to-R-programming.Rmd
@@ -1,8 +1,8 @@
---
title: "Session 2: Introduction to R Programming"
subtitle: "R for Stata Users"
-author: "Luiza Andrade, Rob Marty, Rony Rodriguez-Ramirez, Luis Eduardo San Martin, Leonardo Viotti"
-date: "The World Bank | [WB Github](https://github.com/worldbank) May 2022"
+author: "Luiza Andrade, Marc-Andrea Fiorina, Rob Marty, Rony Rodriguez-Ramirez, Luis Eduardo San Martin, Leonardo Viotti"
+date: "The World Bank | [WB Github](https://github.com/worldbank) March 2023"
output:
xaringan::moon_reader:
css: ["libs/remark-css/default.css",
@@ -74,16 +74,13 @@ whr <- read_csv(here("DataWork", "DataSets", "Final", "whr_panel.csv"))
# Table of contents
1. [Introduction](#introduction)
-2. [Initial settings](#initial-settings)
-3. [File paths](#file-paths)
-4. [Exploring a dataset](#exploring-a-dataset)
-5. [Creating a document outline in RStudio](#creating-a-document-outline-in-rstudio)
-6. [Using packages](#using-packages)
-7. [Functions inception](#functions-inception)
-8. [Mapping and iterations](#mapping-and-iterations)
-9. [Custom functions](#custom-functions)
-10. [Indentation](#indentation)
-11. [Appendix](#appendix)
+1. [Initial settings](#initial-settings)
+1. [File paths](#file-paths)
+1. [Using packages](#using-packages)
+1. [Functions inception](#functions-inception)
+1. [Mapping and iterations](#mapping-and-iterations)
+1. [Custom functions](#custom-functions)
+1. [Appendix](#appendix)
---
@@ -133,7 +130,7 @@ name: initial-settings
* Notice two things:
- 1. Your environment is *probably* empty (it's OK if it's not)
+ 1. Your environment is *probably* empty (it's okay if it's not)
```{r echo = FALSE, out.width = "60%"}
knitr::include_graphics("img/empty_environment.png")
@@ -181,18 +178,18 @@ knitr::include_graphics("img/stataheader.jpg")
# Initial settings
+Have you ever seen these lines of code before?
+
+```{r echo = FALSE, out.width = "40%"}
+knitr::include_graphics("img/stataheader.jpg")
+```
+
* We __don't need to set the memory or the maximum number of variables__ in R
* The equivalent of `set more off` is the default
* The equivalent of `clear all` is not a default setting, but we'll change that in exercise 1
-* In any case, remember that you can see all the objects in your computer's memory at any point in the `Environment` panel
-
-```{r echo = FALSE, out.width = "65%"}
-knitr::include_graphics("img/environment_2vars_2021.png")
-```
-
---
# Initial settings
@@ -263,55 +260,69 @@ setwd("your/path")
* Instead, you should use RStudio projects and the `here` library
-> __Important:__ We won't get into the specifics of directory organization here, but we'll assume that all the files you use for a specific project (data, scripts, and outputs) reside in the same project directory. We'll call this the __working directory__.
-
* RStudio projects let you "bind" your project files to a root directory, regardless of the path to it
* This is crucial because it allows smooth interoperability between different computers where the exact path to the project root directory differs
* Additionally, each RStudio project you work on keeps their own history of commands!
+__Important:__ We won't get into the specifics of directory organization here, but we'll assume that all the files you use for a specific project (data, scripts, and outputs) reside in the same project directory. We'll call this the __working directory__.
+
---
# RStudio projects
.exercise[
-### Exercise 2 `r fa("keyboard")` (`r fa("clock")` 1 min)
+### Exercise 2 `r fa("keyboard")` (`r fa("clock")` 3 min)
+
+1. Create a folder named `dime-r-training-mar2023` in your preferred location in your computer
+
+1. Go to https://osf.io/86g3b/ and download the file in: `R for Stata Users - 2023 March` > `Data` > `DataWork.zip`
+
+1. Unzip `DataWork.zip` in the folder `dime-r-training-mar2023`
1. On RStudio, select `File` > `New Project...`
-2. Select `New Directory` > `New Project`
+2. Select `Existing Directory`
-3. Assign the name: `dime-r-training-project` to the project
+3. Browse to the location of `dime-r-training-mar2023` and select `Create Project`
]
---
# RStudio projects
-```{r echo = FALSE, out.width = "80%"}
+```{r echo = FALSE, out.width = "60%"}
knitr::include_graphics("img/dime-r-training-project.png")
```
---
+# RStudio projects
+
+```{r echo = FALSE, out.width = "60%"}
+knitr::include_graphics("img/dime-r-training-project-dir.png")
+```
+
+---
+
# The `here` library
* `here` locates files relative to your project root
* It uses the root project directory to build paths to files easily
-* Similar to RStudio projects, it allows for interoperability between different computers where the absolute path to the same file is not the same
+* It allows for interoperability between different computers where the absolute path to the same file is not the same
---
# Usage of `here`
-- Install and load the `here` library:
+- Load `here`
```{r, eval=FALSE}
-install.packages("here")
+install.packages("here") # install first if you don't have it
library(here)
```
@@ -337,13 +348,7 @@ df <- read.csv(path)
### Exercise 3 `r fa("keyboard")` (`r fa("clock")` 3 min)
-1. Go to the [OSF page of the course](https://osf.io/86g3b/) and download the file in: `R for Stata Users - 2022 May` > `Data` > `DataWork.zip`
-
-2. Unzip the file in your RStudio project root folder. This is the folder where the file `dime-r-training-project.Rproj` sits
-
-3. On RStudio, go to `File` > `New File` > `R Script` and save this new empty script in `DataWork` > `Code` as `exercises-session2.R`
-
-5. Now let's test if that worked. Load the `here` library and read the `csv` file `DataWork/DataSets/Final/whr_panel.csv` using `here()`
+1. Load `here` and read the `.csv` file in `DataWork/DataSets/Final/whr_panel.csv` using `here()`
+ Use the function `read.csv()` to load the file. The argument for `read.csv()` is the result of `here()`
+ Remember to assign the dataframe you're reading to an object. You can call it `whr` as we did yesterday
@@ -363,7 +368,7 @@ whr <- read.csv(here("DataWork", "DataSets", "Final", "whr_panel.csv"))
# RStudio projects and `here`
-If you did the exercise correctly, you should see the `whr` data frame listed in the Environment panel
+If you did the exercise correctly, you should see the `whr` dataframe listed in the Environment panel
```{r echo = FALSE, out.width = "80%"}
knitr::include_graphics("img/environment_2021.png")
@@ -371,151 +376,6 @@ knitr::include_graphics("img/environment_2021.png")
---
-class: inverse, center, middle
-name: creating-a-document-outline-in-rstudio
-
-# Creating a document outline in RStudio
-
-
-
----
-
-# Document outline
-
-* RStudio allows you to __create an interactive index__ for your scripts
-
-* To add a section to your code, create a commented line with the title of your section and add at least 4 trailing dashes (`----`), pound signs (`####`) or equal signs (`====`) after it
-
----
-
-# Document outline
-
-.exercise[
-
-### Exercise 4 `r fa("keyboard")` (`r fa("clock")` 1 min)
-
-1. In your script, add a header before the line where you used `library(here)` with the text: `# Part 1: Loading libraries----`
-
-2. Before `read.csv(...)`, add the following header: `Part 2: Loading data----`
-
- + Remember: you create a section header by adding at least 4 trailing dashes (`-`), pound (`#`) or equal (`=`) signs in a comment line
-
-3. Note that once you create a section header, an arrow appears right next to the row number. Click on the arrows to see what happens.
-]
-
----
-
-# Document outline
-
-* The outline can be accessed by clicking on the button on the top right corner of the script window. You can use it to jump from one section to another
-
-* You can also use the keyboard shortcuts `Alt + L` (`Cmd + Option + L` on Mac) and `Alt + Shift + L` to collapse and expand sections
-
-```{r echo = FALSE, out.width = "65%"}
-knitr::include_graphics("img/document-outline.png")
-```
-
----
-
-class: inverse, center, middle
-name: exploring-a-dataset
-
-# Exploring a dataset
-
-
-
----
-
-# Exploring a dataset
-
-Some useful functions:
-
-* **`View()`:** opens the data set
-
-* **`class()`:** reports object type or type of data stored
-
-* **`dim()`:** reports the size of each one of an object's dimension
-
-* **`names()`:** returns the variable names of a data set
-
-* **`str()`:** general information about the structure of an R object
-
-* **`summary()`:** summary information about the variables in a data frame
-
-* **`head()`:** shows the first few observations in the dataset
-
-* **`tail()`:** shows the last few observations in the dataset
-
----
-
-# Exploring a dataset
-
-```{r, eval = F}
-# View the data set (notice the uppercase "V")
-View(whr)
-```
-
-This is the same as clicking on the object name in the environment panel. It opens a spreadsheet-style data viewer of a dataframe.
-
-```{r echo = FALSE, out.width = "80%"}
-knitr::include_graphics("img/whr_viewer.png")
-```
-
----
-
-# Exploring a dataset
-
-```{r, eval = F}
-# Object type and dimensions
-class(whr)
-dim(whr)
-```
-
-```{r echo=FALSE}
-knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
-```
-
----
-
-# Exploring a dataset
-
-```{r, eval = F}
-# Object structure
-str(whr)
-```
-
-```{r echo=FALSE}
-knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
-```
-
----
-
-# Exploring a dataset
-
-```{r, eval = F}
-# Summarize a dataframe
-summary(whr)
-```
-
-```{r echo=FALSE}
-knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
-```
-
----
-
-# Exploring a dataset
-
-```{r, eval = F}
-# Printing the first rows of a dataframe
-head(whr)
-```
-
-```{r echo=FALSE}
-knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
-```
-
----
-
class: inverse, center, middle
name: using-packages
@@ -527,21 +387,21 @@ name: using-packages
# Packages
-* Since there is a lot of people developing for R, it can have many different functionalities.
+* Since there is a lot of people developing for R, it can have many different functionalities
-* To make it simpler, these functionalities are bundled into packages.
+* To make it simpler, these functionalities are bundled into packages
-* A package is just __a unit of shareable code__.
+* A package is just __a unit of shareable code__
---
# Packages
-* It may contain new functions, but also more complex functionalities, such as a Graphic User Interface (GUI) or settings for parallel processing (similar to Stata MP).
+* Packages may contain new functions, but also more complex functionalities, such as a Graphic User Interface (GUI) or settings for parallel processing (similar to Stata MP)
-* They can be shared through R's official repository - CRAN (18,000+ packages reviewed and tested).
+* They are usually shared through R's official repository - CRAN (19,000+ packages reviewed and tested)
-* There are many other online sources such as GitHub, but it's important to be careful, as these probably haven't gone through a review process as rigorous as those in CRAN.
+* There are many other online sources such as GitHub, but it's important to be careful, as these probably haven't gone through a review process as rigorous as those in CRAN
---
@@ -551,7 +411,7 @@ name: using-packages
```{r, eval = F}
# Installing a package
-install.packages("tidyverse",
+install.packages("dplyr",
dependencies = TRUE)
# the dependencies argument also installs all other packages
# that it may depend upon to run
@@ -565,10 +425,10 @@ install.packages("tidyverse",
.exercise[
-### Exercise 5 `r fa("keyboard")` (`r fa("clock")` 1 min)
+### Exercise 4 `r fa("keyboard")` (`r fa("clock")` 1 min)
-1. Load the `tidyverse` meta package in part 1 of your script using `library(tidyverse)`
-2. Run your script
+1. Load the packages `dplyr` and `purrr` in part 1 of your script using `library(dplyr)` and `library(purrr)`
+1. Run your script
]
@@ -579,17 +439,17 @@ install.packages("tidyverse",
What if this happens?
```{r echo = FALSE, out.width = "70%"}
-knitr::include_graphics("img/warning_2021.png")
+knitr::include_graphics("img/Warning.png")
```
---
# Warnings vs errors
-R has two types of error messages, `warnings` and actual `errors`:
+R has two types of error messages, warnings and actual errors:
- * `Errors` - break your code, usually preventing it from running.
- * `Warnings` - usually mean that nothing went wrong yet, but you should be careful.
+ * **Errors** - break your code, usually preventing it from running
+ * **Warnings** - your code kept running, but R wants you to be aware of something that might be a problem later
RStudio's default is to print warning messages, but not to stop the code at the lines where they occur. You can configure R to stop at warnings if you want.
@@ -653,7 +513,7 @@ knitr::include_graphics("img/statalog.png")
* Metaprogramming is a __very powerful technique__, as you will soon see
-* It's __also a common source of error__, as you can only use one function inside the other if the output of the inner function is the same as the input of the outer function
+* It's __also a common source of error__, as you can only use one function inside the other if the output of the inner function can be taken as the input of the outer function
* It can also get quite tricky to follow what a line of code with multiple functions inceptions is doing
@@ -716,9 +576,13 @@ name: mapping-and-iterations
}
```
+---
+
+# Map
+
* R, however, has a set of functions that allows users to loop through an object __in a more efficient way__, without using explicit loops
-* In this training we'll introduce `map()`. It is a function part of the tidyverse meta package
+* In this training we'll introduce `map()`. It is a function part of `purrr`, a package that contains tools for functional programming
* Also, in case you have not noticed yet: __R is vectorized!__ this means that many operations are applied element-wise by default so you don't have to code loops to apply them to each element of a vector or dataframe
@@ -726,15 +590,15 @@ name: mapping-and-iterations
# Map
-* To use `map()`, you need to either load the package `purrr` eithre by itself or alongside other `tidyverse` packages
+* To use `map()`, you need to load the package `purrr`
* The basic syntax of `map()` is:
.command[
-**`map(X, function, ...)`:** applies `function` to each of the elements of `X`. If `X` is a data frame then `function` is applied column-wise while if it's a vector or a list it is applied item-wise. The output of `map()` is always a list with the results.
+**`map(X, function, ...)`:** applies `function` to each of the elements of `X`. If `X` is a dataframe then `function` is applied column-wise while if it's a vector or a list it is applied item-wise. The output of `map()` is always a list with the results.
- + **X:** a data frame, matrix or vector the function will be applied to
+ + **X:** a dataframe, matrix or vector the function will be applied to
+ **function:** the name of the function you want to apply to each of the elements of `X`
]
@@ -768,6 +632,10 @@ knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
+ You can collect the answers by asking each one of them individually -- __this is looping__
+ Otherwise, you can ask them to raise their hands and collect all answers at once -- __this is `map()`__
+---
+
+# Map vs looping
+
* The output of a loop is the regular output of the operation you're repeating, times the number of iterations you did
* The output of `map()` will be always a list
@@ -783,12 +651,12 @@ knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
.exercise[
-### Exercise 6: Looping over a dataframe (`r fa("clock")` 3 min)
+### Exercise 5: Looping over a dataframe (`r fa("clock")` 3 min)
-- Create a toy dataframe of 70,000 columns and 400 observations using this code
+- Create a toy dataframe of 50,000 columns and 400 observations using this code
```{r, eval = FALSE}
-df <- data.frame(replicate(70000, sample(1:100, 400, replace=TRUE)))
+df <- data.frame(replicate(50000, sample(1:100, 400, replace=TRUE)))
```
- Create an empty vector named `col_means_loop` where you will store column means with this code: `col_means_loop <- c()`
@@ -814,12 +682,12 @@ for (column in df) {
The solution is this:
```{r, eval=FALSE}
-df <- data.frame(replicate(70000, sample(1:100, 400, replace=TRUE)))
+df <- data.frame(replicate(50000, sample(1:100, 400, replace=TRUE)))
col_means_loop <- c()
-for (col in df){
- col_means_loop <- append(col_means_loop, mean(col))
+for (column in df){
+ col_means_loop <- append(col_means_loop, mean(column))
}
```
@@ -829,16 +697,17 @@ for (col in df){
.exercise[
-### Exercise 7: Now use `map()` `r fa("keyboard")` (`r fa("clock")` 1 min)
+### Exercise 6: Now use `map()` `r fa("keyboard")` (`r fa("clock")` 1 min)
-- Use `map()` to produce a list with the means of the columns of `df`
-- Store the result in a list named `col_means_map`
+1. Use `map()` to produce a list with the means of the columns of `df`
+1. Store the result in a list named `col_means_map`
-> Hint:
- + Remember the syntax of `map()`: `map(X, function_name)`
-
]
+Hints:
+ + Remember the syntax of `map()`: `map(X, function_name)`
+ + The function name inside `map()` shouldn't have parentheses next to it (i.e.: `mean` instead of `mean()`)
+
---
# Map vs looping
@@ -846,7 +715,10 @@ for (col in df){
Compare the syntax of the solutions of both exercises:
```{r, eval=FALSE}
-# Looping exercise
+# Dataframe creation
+df <- data.frame(replicate(50000, sample(1:100, 400, replace=TRUE)))
+
+# Loop exercise
col_means_loop <- c()
for (col in df){
@@ -863,7 +735,7 @@ Do you remember which one ran faster?
# Map vs looping
-Last but not least, remember we said that loops produce side effects?
+Also, remember we said that loops produce side effects?
```{r echo = FALSE, out.width = "50%"}
knitr::include_graphics("img/loop-side-effects.png")
@@ -921,7 +793,7 @@ knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
.exercise[
-### Exercise 8 `r fa("keyboard")` (`r fa("clock")` 2 min)
+### Exercise 7 `r fa("keyboard")` (`r fa("clock")` 2 min)
Create a function named `zscore` that standardizes the values of a vector.
@@ -969,20 +841,19 @@ knitr::include_app("https://luizaandrade.shinyapps.io/learnr/")
.exercise[
-### Exercise 9 `r fa("keyboard")`
+### Exercise 8 `r fa("keyboard")`
1. Subselect the columns `health_life_expectancy` and `freedom` in `whr`
- + Use tidyverse's `select()` for this, as in: `whr %>% select(freedom, happiness_score)`
+ + Use dplyr's `select()` for this, as in: `whr %>% select(freedom, happiness_score)`
2. Use `map()` combined with the `zscore` function to get the z-score of these two columns and assign the resulting list to an object named `z_scores`
3. Use list indexing on `z_scores` to generate two new columns in `whr` with the standardized values of `health_life_expectancy` and `freedom`
-> **Hints:**
-* Use the pipes (`%>%`) successively
-* Remember that we don't use parenthesis next to the function name we're using `map()` with
-* Remember that we use double brackets instead of single brackets to index the actual elements of a list
+ **Hints:**
+* Don't use parenthesis next to the function name we're using `map()` with
+* Use double brackets instead of single brackets or the symbol `$` to index the elements of a list
]
@@ -1023,11 +894,11 @@ name: appendix
* `.RData` stores the objects in your environment only if you save your workspace, and loads them again in the next RStudio session
-* Both files are relative to the working directory where your RStudio session started
+* Both files are stored in the working directory where your RStudio session started
---
-# Appendix - Using packages
+# Appendix - More on packages
Once a package is loaded, you can use its features and functions. Here's a list of some useful packages:
@@ -1037,21 +908,13 @@ Once a package is loaded, you can use its features and functions. Here's a list
* `stargazer` - awesome latex regression and summary statistics tables
* `foreign` - reads `.dta` and other formats from inferior statistical software
* `zoo` - time series and panel data manipulation useful functions
-* `data.table` - some functions to deal with huge data sets
+* `data.table` - some functions to deal with huge dataframes
* `sp` and `rgeos` - spatial analysis
* `multiwayvcov` and `sandwich` - clustered and robust standard errors
* `RODBC`, `RMySQL`, `RPostgresSQL`, `RSQLite` - For relational databases and using SQL in R.
---
-# Appendix - Resources
-
-* A discussion of folder structure and data managament can be found here: https://dimewiki.worldbank.org/wiki/DataWork_Folder
-
-* For a broader discussion of data management, go to https://dimewiki.worldbank.org/wiki/Data_Management
-
----
-
# Appendix - Git
Git is a version-control system for tracking changes in code and other text files. It is a great resource to include in your work flow.
@@ -1071,41 +934,11 @@ https://r4ds.had.co.nz/workflow-projects.html
---
-# Appendix - Commenting
-
-* To comment a line, write `#` as its first character
-
-```{r, eval=FALSE}
-# This is a comment
-print("But this part is not")
-```
-
-* You can also add `#` halfway through a line to comment whatever comes after it
-
-```{r, eval=FALSE}
-print("This part is not a comment") # And this is a comment
-```
-
-* In Stata, you can use `/*` and `*/` to comment in the middle of a line's code. That is not possible in R: everything that comes after `#` will always be a comment
-
-* To comment a selection of lines, press `Ctrl` + `Shift` + `C`
-
----
-
-# Appendix - Assignment 1
-
-.exercise[
-
-### Exercise `r fa("keyboard")`
-
-1. In your script panel, select all the lines of your script
+# Appendix - More on folder management
-2. Use the keyboard shortcut to comment these lines.
- + Shortcut: `Ctrl` + `Shift` + `C`
-
-3. Use the keyboard shortcut to comment these lines again. What happened?
+* A discussion of folder structure and data managament can be found here: https://dimewiki.worldbank.org/wiki/DataWork_Folder
-]
+* For a broader discussion of data management, go to https://dimewiki.worldbank.org/wiki/Data_Management
---
@@ -1162,7 +995,7 @@ for (col in colnames(whr)) {
# Appendix - Apply
-* Apart from tidyverse's `map()`, base R also has a set of functions that allows users to apply a function to a number of objects without using explicit loops
+* Apart from purrr's `map()`, base R also has a set of functions that allows users to apply a function to a number of objects without using explicit loops
* They're called `apply` and there are many of them, with different use cases
@@ -1182,10 +1015,10 @@ sapply(X, FUN, ...)
* Its main arguments are:
- + **X:** a data frame, matrix or vector the function will be applied to
+ + **X:** a dataframe, matrix or vector the function will be applied to
+ **FUN:** the function you want to apply
-* `sapply()` applies the function (`FUN`) to all the elements of `X`. If `X` is a data frame then the function is applied column-wise, while if it's a vector or a list it is applied item-wise
+* `sapply()` applies the function (`FUN`) to all the elements of `X`. If `X` is a dataframe then the function is applied column-wise, while if it's a vector or a list it is applied item-wise
* The output of `sapply()` is usually a vector with the results, but it can be a matrix if the results have more than one dimension
@@ -1228,7 +1061,7 @@ apply(X, MARGIN, FUN, ...)
* Arguments:
- + **X:** a data frame (or matrix) the function will be applied to
+ + **X:** a dataframe (or matrix) the function will be applied to
+ **MARGIN:** 1 to apply the function to all rows or 2 to apply the function to all columns
+ **FUN:** the function you want to apply
@@ -1246,7 +1079,7 @@ apply(matrix, 2, mean) # column means
---
-# Appendix - Assignment 2
+# Appendix - Assignment 1
### Exercise: Get the row max
@@ -1273,6 +1106,64 @@ whr %>%
---
+# Appendix - Commenting
+
+* To comment a line, write `#` as its first character
+
+```{r, eval=FALSE}
+# This is a comment
+print("But this part is not")
+```
+
+* You can also add `#` halfway through a line to comment whatever comes after it
+
+```{r, eval=FALSE}
+print("This part is not a comment") # And this is a comment
+```
+
+* In Stata, you can use `/*` and `*/` to comment in the middle of a line's code. That is not possible in R: everything that comes after `#` will always be a comment
+
+* To comment a selection of lines, press `Ctrl` + `Shift` + `C`
+
+---
+
+# Appendix - Assignment 2
+
+.exercise[
+
+### Exercise `r fa("keyboard")`
+
+1. In your script panel, select all the lines of your script
+
+2. Use the keyboard shortcut to comment these lines.
+ + Shortcut: `Ctrl` + `Shift` + `C`
+
+3. Use the keyboard shortcut to comment these lines again. What happened?
+
+]
+
+---
+
+# Appendix - Document outline
+
+* RStudio allows you to __create an interactive index__ for your scripts
+
+* To add a section to your code, create a commented line with the title of your section and add at least 4 trailing dashes (`----`), pound signs (`####`) or equal signs (`====`) after it
+
+---
+
+# Appendix - Document outline
+
+* The outline can be accessed by clicking on the button on the top right corner of the script window. You can use it to jump from one section to another
+
+* You can also use the keyboard shortcuts `Alt + L` (`Cmd + Option + L` on Mac) and `Alt + Shift + L` to collapse and expand sections
+
+```{r echo = FALSE, out.width = "65%"}
+knitr::include_graphics("img/document-outline.png")
+```
+
+---
+
# Appendix - Indentation
```{r, eval = F}
@@ -1376,6 +1267,28 @@ map(c(1.2,
round)
```
+---
+
+# Appendix - Exploring a dataframe
+
+Some useful functions:
+
+* **`View()`:** opens a visualization of the dataframe
+
+* **`class()`:** reports object type or type of data stored
+
+* **`dim()`:** reports the size of each one of an object's dimension
+
+* **`names()`:** returns the variable names of a dataframe
+
+* **`str()`:** general information about the structure of an R object
+
+* **`summary()`:** summary information about the variables in a dataframe
+
+* **`head()`:** shows the first few observations in the dataframe
+
+* **`tail()`:** shows the last few observations in the dataframe
+
---
exclude: true
diff --git a/Presentations/02-intro-to-R-programming.html b/Presentations/02-intro-to-R-programming.html
index 65402f9..4e11324 100644
--- a/Presentations/02-intro-to-R-programming.html
+++ b/Presentations/02-intro-to-R-programming.html
@@ -3,7 +3,7 @@
Session 2: Introduction to R Programming
-
+
@@ -25,10 +25,18 @@