Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mar 2023 intro #104

Merged
merged 7 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions DataWork/Code/1-intro-to-R-solutions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
## R for Stata Users
## March 2023
## Exercise solutions
## Session: Introduction to R

## Exercise 1 ====
whr <- read.csv("/path/to/data/file")
# note that this was executed through point-and-click
# during the actual session

## Exercise 2 ====
# Subset data
subset(whr, year == 2016)
# Check first 6 observations or whr
head(whr)

## Exercise 3 ====
# Subset data and store result in a new df
whr2016 <- subset(whr, year == 2016)
# Display head of new df
head(whr2016)
# Display head of origninal df
head(whr)

## Exercise 4 ====
# Create vector of strings
str_vec <- c("R", "Python", "SAS", "Excel", "Stata")
# Create string "scalar"
str_scalar <- "can be an option to"
# Concatenation
paste(str_vec[1], str_scalar, str_vec[5])

## Exercise 5 ====
# Create boolean vector
inc_below_avg <- whr$economy_gdp_per_capita < mean(whr$economy_gdp_per_capita)
# See head of vector
head(inc_below_avg)

## Exercise 6 ====
# Create new column (vector) of zeros
whr$rank_low <- 0
# Subset obs with income below average
# and replace values of rank_low with 1 for those obs
whr$rank_low[inc_below_avg] <- 1
53 changes: 53 additions & 0 deletions DataWork/Code/2-intro-to-R-programming-solutions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
## R for Stata Users
## March 2023
## Exercise solutions
## Session: Introduction to R programming

## Exercise 1 ====
# (no coding needed for exercise)

## Exercise 2 ====
# (no coding needed for exercise)

## Exercise 3 ====
library(here)
whr <- read.csv(here("DataWork", "DataSets", "Final", "whr_panel.csv"))
# note that this will only work if exercise 2
# was executed correctly

## Exercise 4 ====
#install.packages("dplyr") # uncomment installation if needed
#install.packages("purrr") # uncomment installation if needed
library(dply)
library(purrr)

## Exercise 5 ====
# Create dataframe
df <- data.frame(replicate(50000, sample(1:100, 400, replace=TRUE)))
# Create empty vector
col_means_loop <- c()
# Loop and append means to vector (will take a few seconds)
for (column in df){
col_means_loop <- append(col_means_loop, mean(column))
}

## Exercise 6 ====
col_means_map <- map(df, mean)
# this will only work if you defined df in exercise 5

## Exercise 7 ====
zscore <- function(x) {
mean <- mean(x, na.rm = TRUE)
sd <- sd(x, na.rm = TRUE)
z <- (x - mean)/sd
return(z)
}

## Exercise 8 ====
z_scores <- whr %>%
select(health_life_expectancy, freedom) %>%
map(zscore)
whr$hle_st <- z_scores[[1]]
whr$freedom_st <- z_scores[[2]]
# this will only run if you created the function
# zscores() in exercise 7
90 changes: 90 additions & 0 deletions DataWork/Code/main.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# ------------------------------------------------------------------------------ #
# #
# DIME #
# Introduction to R for Stata users #
# MAIN SCRIPT #
# #
# ------------------------------------------------------------------------------ #

# PURPOSE: Set-up configurations and run scripts

# NOTES: Version 2

# WRITTEN BY: Luiza Cardoso de Andrade, Leonardo Viotti

# Last modified in Mar 2023

# PART 1: Select sections to run ----------------------------------------------

Lab2 <- 0
Lab3 <- 0
Lab4 <- 0
Lab5 <- 0
Lab6 <- 0

# PART 2: Load packages -----------------------------------------------------

packages <- c("readstata13","foreign",
"doBy", "broom", "dplyr",
"stargazer",
"ggplot2", "plotly", "ggrepel",
"RColorBrewer", "wesanderson",
"sp", "rgdal", "rgeos", "raster", "velox",
"ggmap", "rasterVis", "leaflet",
"htmlwidgets", "geosphere")

# If you selected the option to install packages, install them
sapply(packages, function(x) {
if (!(x %in% installed.packages())) {
install.packages(x, dependencies = TRUE)
}
}
)

# Load all packages -- this is equivalent to using library(package) for each
# package listed before
invisible(sapply(packages, library, character.only = TRUE))

# PART 3: Set folder folder paths --------------------------------------------

#-------------#
# Root folder #
#-------------#

# Add your username and folder path here (for Windows computers)
# To find out what your username is, type Sys.getenv("USERNAME")
if (Sys.getenv("USERNAME") == "luiza") {

projectFolder <- "C:/Users/luiza/Documents/GitHub/dime-r-training"

}

# If you're using Mac, just add your folder path, without the if statement

#--------------------#
# Project subfolders #
#--------------------#

rawData <- file.path(projectFolder, "Data", "Raw")
finalData <- file.path(projectFolder, "Data", "Final")
Code <- file.path(projectFolder ,"Codes")
Output <- file.path(projectFolder, "Output")


# PART 4: Run selected sections -----------------------------------------------

if (Lab2 == 1) {
source(file.path(Code, "Lab 2 - Coding for Reproducible Research"))
}
if (Lab3 == 1) {
source(file.path(Code, "Lab 3 - Data Processing"))
}
if (Lab4 == 1) {
source(file.path(Code, "Lab 4 - Descriptive Analysis"))
}
if (Lab5 == 1) {
source(file.path(Code, "Lab 5 - Data Visualization"))
}
if (Lab6 == 1) {
source(file.path(Code, "Lab 6 - Spatial Data"))
}
Loading