Skip to content

Latest commit



379 lines (312 loc) · 15 KB

File metadata and controls

379 lines (312 loc) · 15 KB


read the data into memory

lrn14 <- read.table("", sep="\t", header=TRUE)

Look at the dimensions of the data


Look at the structure of the data

str(lrn14) #lrn14 is available

divide each number in a vector

c(1,2,3,4,5) / 2

print the "Attitude" column vector of the lrn14 data


divide each number in the column vector

lrn14$Attitude / 10

create column "attitude" by scaling the column "Attitude"

lrn14$attitude <- lrn14$Attitude / 10

lrn14 is available

Access the dplyr library


questions related to deep, surface and strategic learning

deep_questions <- c("D03", "D11", "D19", "D27", "D07", "D14", "D22", "D30", "D06", "D15", "D23", "D31") surface_questions <- c("SU02","SU10","SU18","SU26", "SU05","SU13","SU21","SU29","SU08","SU16","SU24","SU32") strategic_questions <- c("ST01","ST09","ST17","ST25","ST04","ST12","ST20","ST28")

select the columns related to deep learning and create column 'deep' by averaging

deep_columns <- select(lrn14, one_of(deep_questions)) lrn14$deep <- rowMeans(deep_columns)

select the columns related to surface learning and create column 'surf' by averaging

surface_columns <- select(lrn14, one_of(surface_questions)) lrn14$surf <- rowMeans(surface_columns)

select the columns related to strategic learning and create column 'stra' by averaging

strategic_columns <- select(lrn14, one_of(strategic_questions)) lrn14$stra <- rowMeans(strategic_columns)

lrn14 is available

Access the dplyr library


questions related to deep, surface and strategic learning

deep_questions <- c("D03", "D11", "D19", "D27", "D07", "D14", "D22", "D30", "D06", "D15", "D23", "D31") surface_questions <- c("SU02","SU10","SU18","SU26", "SU05","SU13","SU21","SU29","SU08","SU16","SU24","SU32") strategic_questions <- c("ST01","ST09","ST17","ST25","ST04","ST12","ST20","ST28")

select the columns related to deep learning and create column 'deep' by averaging

deep_columns <- select(lrn14, one_of(deep_questions)) lrn14$deep <- rowMeans(deep_columns)

select the columns related to surface learning and create column 'surf' by averaging

surface_columns <- select(lrn14, one_of(surface_questions)) lrn14$surf <- rowMeans(surface_columns)

select the columns related to strategic learning and create column 'stra' by averaging

strategic_columns <- select(lrn14, one_of(strategic_questions)) lrn14$stra <- rowMeans(strategic_columns)

lrn14 is available

Access the dplyr library


questions related to deep, surface and strategic learning

deep_questions <- c("D03", "D11", "D19", "D27", "D07", "D14", "D22", "D30", "D06", "D15", "D23", "D31") surface_questions <- c("SU02","SU10","SU18","SU26", "SU05","SU13","SU21","SU29","SU08","SU16","SU24","SU32") strategic_questions <- c("ST01","ST09","ST17","ST25","ST04","ST12","ST20","ST28")

select the columns related to deep learning and create column 'deep' by averaging

deep_columns <- select(lrn14, one_of(deep_questions)) lrn14$deep <- rowMeans(deep_columns)

select the columns related to surface learning and create column 'surf' by averaging

surface_columns <- select(lrn14, one_of(surface_questions)) lrn14$surf <- rowMeans(surface_columns)

select the columns related to strategic learning and create column 'stra' by averaging

strategic_columns <- select(lrn14, one_of(strategic_questions)) lrn14$stra <- rowMeans(strategic_columns)

title: "chapter4.Rmd" author: "sanaa kadi" date: "19 11 2020" output: html_document

knitr::opts_chunk$set(echo = TRUE)

access the MASS package


load the data


explore the dataset

str(Boston) summary(Boston)

plot matrix of the variables


R Markdown

access the MASS package


load the data


explore the dataset

str(Boston) 'data.frame': 506 obs. of 14 variables: $ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ... $ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ... $ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ... $ chas : int 0 0 0 0 0 0 0 0 0 0 ... $ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ... $ rm : num 6.58 6.42 7.18 7 7.15 ... $ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ... $ dis : num 4.09 4.97 4.97 6.06 6.06 ... $ rad : int 1 2 2 3 3 3 5 5 5 5 ... $ tax : num 296 242 242 222 222 222 311 311 311 311 ... $ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ... $ black : num 397 397 393 395 397 ... $ lstat : num 4.98 9.14 4.03 2.94 5.33 ... $ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ... summary(Boston) crim zn indus chas
Min. : 0.00632 Min. : 0.00 Min. : 0.46 Min. :0.00000
1st Qu.: 0.08204 1st Qu.: 0.00 1st Qu.: 5.19 1st Qu.:0.00000
Median : 0.25651 Median : 0.00 Median : 9.69 Median :0.00000
Mean : 3.61352 Mean : 11.36 Mean :11.14 Mean :0.06917
3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10 3rd Qu.:0.00000
Max. :88.97620 Max. :100.00 Max. :27.74 Max. :1.00000
nox rm age dis
Min. :0.3850 Min. :3.561 Min. : 2.90 Min. : 1.130
1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02 1st Qu.: 2.100
Median :0.5380 Median :6.208 Median : 77.50 Median : 3.207
Mean :0.5547 Mean :6.285 Mean : 68.57 Mean : 3.795
3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08 3rd Qu.: 5.188
Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
rad tax ptratio black
Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 0.32
1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.:375.38
Median : 5.000 Median :330.0 Median :19.05 Median :391.44
Mean : 9.549 Mean :408.2 Mean :18.46 Mean :356.67
3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:396.23
Max. :24.000 Max. :711.0 Max. :22.00 Max. :396.90
lstat medv
Min. : 1.73 Min. : 5.00
1st Qu.: 6.95 1st Qu.:17.02
Median :11.36 Median :21.20
Mean :12.65 Mean :22.53
3rd Qu.:16.95 3rd Qu.:25.00
Max. :37.97 Max. :50.00

plot matrix of the variables

pairs(Boston) This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:


variable matrix of the variables


access the MASS package


load the data


explore the dataset

str(Boston) 'data.frame': 506 obs. of 14 variables: $ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ... $ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ... $ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ... $ chas : int 0 0 0 0 0 0 0 0 0 0 ... $ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ... $ rm : num 6.58 6.42 7.18 7 7.15 ... $ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ... $ dis : num 4.09 4.97 4.97 6.06 6.06 ... $ rad : int 1 2 2 3 3 3 5 5 5 5 ... $ tax : num 296 242 242 222 222 222 311 311 311 311 ... $ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ... $ black : num 397 397 393 395 397 ... $ lstat : num 4.98 9.14 4.03 2.94 5.33 ... $ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ... summary(Boston) crim zn indus chas
Min. : 0.00632 Min. : 0.00 Min. : 0.46 Min. :0.00000
1st Qu.: 0.08204 1st Qu.: 0.00 1st Qu.: 5.19 1st Qu.:0.00000
Median : 0.25651 Median : 0.00 Median : 9.69 Median :0.00000
Mean : 3.61352 Mean : 11.36 Mean :11.14 Mean :0.06917
3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10 3rd Qu.:0.00000
Max. :88.97620 Max. :100.00 Max. :27.74 Max. :1.00000
nox rm age dis
Min. :0.3850 Min. :3.561 Min. : 2.90 Min. : 1.130
1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02 1st Qu.: 2.100
Median :0.5380 Median :6.208 Median : 77.50 Median : 3.207
Mean :0.5547 Mean :6.285 Mean : 68.57 Mean : 3.795
3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08 3rd Qu.: 5.188
Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
rad tax ptratio black
Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 0.32
1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.:375.38
Median : 5.000 Median :330.0 Median :19.05 Median :391.44
Mean : 9.549 Mean :408.2 Mean :18.46 Mean :356.67
3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:396.23
Max. :24.000 Max. :711.0 Max. :22.00 Max. :396.90
lstat medv
Min. : 1.73 Min. : 5.00
1st Qu.: 6.95 1st Qu.:17.02
Median :11.36 Median :21.20
Mean :12.65 Mean :22.53
3rd Qu.:16.95 3rd Qu.:25.00
Max. :37.97 Max. :50.00

plot matrix of the variables

pairs(Boston) You can also embed plots, for example:

Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
  pdf_document: default
  html_document: default
# continue to be amazed

*I did finally realized that the editing and commiting was in the github not in RStudio, I was desperate even though I was trying to understand, I hope this will begin to be simple now.*

- I installed R, then RStudio and finally Git, I created an account and tried to update my profile. 
- I learned how to execute some data camp stuff.
- I could say that this project is designated to people who would like to share their project to public so that others can use them also or for team work so that they can share and edit together a specific project.


Let's learn more this week...

human with modified GNI and dplyr are available

columns to keep

keep <- c("Country", "Edu2.FM", "Labo.FM", "Life.Exp", "Edu.Exp", "GNI", "Mat.Mor", "Ado.Birth", "Parli.F")

select the 'keep' columns

human <- select(human, one_of(keep))

print out a completeness indicator of the 'human' data


print out the data along with a completeness indicator as the last column

data.frame(human[-1], comp = complete.cases(human))

filter out all rows with NA values

human_ <- filter(human, complete.cases(human))

`# human without NA is available

look at the last 10 observations

tail(human, 10)

last indice we want to keep

last <- nrow(human) - 7

choose everything until the last 7 observations

human_ <- human[1:last, ]

add countries as rownames

rownames(human) <- human$Country ``{r}

  pdf_document: default
  html_document: default
# continue to be amazed

*I did finally realized that the editing and commiting was in the github not in RStudio, I was desperate even though I was trying to understand, I hope this will begin to be simple now.*

- I installed R, then RStudio and finally Git, I created an account and tried to update my profile. 
- I learned how to execute some data camp stuff.
- I could say that this project is designated to people who would like to share their project to public so that others can use them also or for team work so that they can share and edit together a specific project.


Let's learn more this week...

human with modified GNI and dplyr are available

columns to keep

keep <- c("Country", "Edu2.FM", "Labo.FM", "Life.Exp", "Edu.Exp", "GNI", "Mat.Mor", "Ado.Birth", "Parli.F")

select the 'keep' columns

human <- select(human, one_of(keep))

print out a completeness indicator of the 'human' data


print out the data along with a completeness indicator as the last column

data.frame(human[-1], comp = complete.cases(human))

filter out all rows with NA values

human_ <- filter(human, complete.cases(human))

`# human without NA is available

look at the last 10 observations

tail(human, 10)

last indice we want to keep

last <- nrow(human) - 7

choose everything until the last 7 observations

human_ <- human[1:last, ]

add countries as rownames

rownames(human) <- human$Country ``{r}

# modified human, dplyr and the corrplot functions are available

# remove the Country variable
human_ <- select(human, -Country)

# Access GGally

# visualize the 'human_' variables

# compute the correlation matrix and visualize it with corrplot
cor# tidyr package and human are available
# access the stringr package
# look at the structure of the GNI column in 'human'
# remove the commas from GNI and print out a numeric version of it
str_replace(human$GNI, pattern=",", replace ="")
  [1] "64992"  "42261"  "56431"  "44025"  "45435"  "43919"  "39568"  "52947" 
  [9] "42155"  "32689"  "76628"  "53959"  "79851"  "45636"  "39267"  "35182" 
 [17] "33890"  "30676"  "58711"  "36927"  "41187"  "38056"  "43869"  "38695" 
 [25] "27852"  "32045"  "33030"  "26660"  "24524"  "25214"  "72570"  "28633" 
 [33] "123124" "43978"  "25845"  "23177"  "24500"  "27930"  "52821"  "22050" 
 [41] "60868"  "21290"  "25757"  "22916"  "38599"  "22281"  "19409"  "83961" 
 [49] "14558"  "16676"  "22352"  "34858"  "18108"  "19283"  "21336"  "20867" 
 [57] "12488"  "20070"  "15596"  "13496"  "18192"  "22762"  "17470"  "23300" 
 [65] "26090"  "12190"  "7301"   "16509"  "13413"  "15440"  "16159"  "18677" 
 [73] "9779"   "16056"  "15175"  "7164"   "20805"  "16428"  "10939"  "11365" 
 [81] "11780"  "8178"   "13054"  "11015"  "9943"   "8124"   "9638"   "10605" 
 [89] "9765"   "12547"  "7493"   "10729"  "13323"  "9994"   "14911"  "10404" 
 [97] "12040"  "9937"   "7415"   "5069"   "7614"   "11883"  "15617"  "12328" 
[105] "5327"   "16646"  "5223"   "10512"  "13066"  "16367"  "9788"   "7643"  
[113] "4699"   "5567"   "7915"   "7349"   "12122"  "5092"   "5760"   "3044"  
[121] "14003"  "6094"   "3432"   "6522"   "4457"   "6850"   "9418"   "6929"  
[129] "2517"   "5497"   "3938"   "7176"   "5363"   "2728"   "2803"   "6012"  
[137] "2434"   "21056"  "3734"   "3852"   "4680"   "3191"   "2949"   "2918"  
[145] "2762"   "2311"   "4866"   "4608"   "6822"   "5542"   "2411"   "5341"  
[153] "2803"   "1328"   "1615"   "3560"   "1540"   "2463"   "1456"   "3519"  
[161] "3306"   "1228"   "1669"   "1458"   "1613"   "1767"   "3809"   "3276"  
[169] "2332"   "2188"   "1885"   "3171"   "747"    "1428"   "1507"   "680"   
[177] "805"    "1362"   "1583"   "1123"   "1780"   "1096"   "1591"   "758"   
[185] "2085"   "1130"   "581"    "908"    "15722"  "11449"  "12791"  "14242" 
[193] "5605"   "3363"   "14301" (human_) %>% corrplot