Skip to content

sanaakadi/my-IODSproject

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

6 Commits
 
 
 
 

Repository files navigation

my-IODSproject

read the data into memory

lrn14 <- read.table("http://www.helsinki.fi/~kvehkala/JYTmooc/JYTOPKYS3-data.txt", sep="\t", header=TRUE)

Look at the dimensions of the data

dim(lrn14)

Look at the structure of the data

str(lrn14) #lrn14 is available

divide each number in a vector

c(1,2,3,4,5) / 2

print the "Attitude" column vector of the lrn14 data

lrn14$Attitude

divide each number in the column vector

lrn14$Attitude / 10

create column "attitude" by scaling the column "Attitude"

lrn14$attitude <- lrn14$Attitude / 10

lrn14 is available

Access the dplyr library

library(dplyr)

questions related to deep, surface and strategic learning

deep_questions <- c("D03", "D11", "D19", "D27", "D07", "D14", "D22", "D30", "D06", "D15", "D23", "D31") surface_questions <- c("SU02","SU10","SU18","SU26", "SU05","SU13","SU21","SU29","SU08","SU16","SU24","SU32") strategic_questions <- c("ST01","ST09","ST17","ST25","ST04","ST12","ST20","ST28")

select the columns related to deep learning and create column 'deep' by averaging

deep_columns <- select(lrn14, one_of(deep_questions)) lrn14$deep <- rowMeans(deep_columns)

select the columns related to surface learning and create column 'surf' by averaging

surface_columns <- select(lrn14, one_of(surface_questions)) lrn14$surf <- rowMeans(surface_columns)

select the columns related to strategic learning and create column 'stra' by averaging

strategic_columns <- select(lrn14, one_of(strategic_questions)) lrn14$stra <- rowMeans(strategic_columns)

lrn14 is available

Access the dplyr library

library(dplyr)

questions related to deep, surface and strategic learning

deep_questions <- c("D03", "D11", "D19", "D27", "D07", "D14", "D22", "D30", "D06", "D15", "D23", "D31") surface_questions <- c("SU02","SU10","SU18","SU26", "SU05","SU13","SU21","SU29","SU08","SU16","SU24","SU32") strategic_questions <- c("ST01","ST09","ST17","ST25","ST04","ST12","ST20","ST28")

select the columns related to deep learning and create column 'deep' by averaging

deep_columns <- select(lrn14, one_of(deep_questions)) lrn14$deep <- rowMeans(deep_columns)

select the columns related to surface learning and create column 'surf' by averaging

surface_columns <- select(lrn14, one_of(surface_questions)) lrn14$surf <- rowMeans(surface_columns)

select the columns related to strategic learning and create column 'stra' by averaging

strategic_columns <- select(lrn14, one_of(strategic_questions)) lrn14$stra <- rowMeans(strategic_columns)

lrn14 is available

Access the dplyr library

library(dplyr)

questions related to deep, surface and strategic learning

deep_questions <- c("D03", "D11", "D19", "D27", "D07", "D14", "D22", "D30", "D06", "D15", "D23", "D31") surface_questions <- c("SU02","SU10","SU18","SU26", "SU05","SU13","SU21","SU29","SU08","SU16","SU24","SU32") strategic_questions <- c("ST01","ST09","ST17","ST25","ST04","ST12","ST20","ST28")

select the columns related to deep learning and create column 'deep' by averaging

deep_columns <- select(lrn14, one_of(deep_questions)) lrn14$deep <- rowMeans(deep_columns)

select the columns related to surface learning and create column 'surf' by averaging

surface_columns <- select(lrn14, one_of(surface_questions)) lrn14$surf <- rowMeans(surface_columns)

select the columns related to strategic learning and create column 'stra' by averaging

strategic_columns <- select(lrn14, one_of(strategic_questions)) lrn14$stra <- rowMeans(strategic_columns)

title: "chapter4.Rmd" author: "sanaa kadi" date: "19 11 2020" output: html_document

knitr::opts_chunk$set(echo = TRUE)

access the MASS package

library(MASS)

load the data

data("Boston")

explore the dataset

str(Boston) summary(Boston)

plot matrix of the variables

pairs(Boston)

R Markdown

access the MASS package

library(MASS)

load the data

data("Boston")

explore the dataset

str(Boston) 'data.frame': 506 obs. of 14 variables: $ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ... $ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ... $ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ... $ chas : int 0 0 0 0 0 0 0 0 0 0 ... $ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ... $ rm : num 6.58 6.42 7.18 7 7.15 ... $ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ... $ dis : num 4.09 4.97 4.97 6.06 6.06 ... $ rad : int 1 2 2 3 3 3 5 5 5 5 ... $ tax : num 296 242 242 222 222 222 311 311 311 311 ... $ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ... $ black : num 397 397 393 395 397 ... $ lstat : num 4.98 9.14 4.03 2.94 5.33 ... $ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ... summary(Boston) crim zn indus chas
Min. : 0.00632 Min. : 0.00 Min. : 0.46 Min. :0.00000
1st Qu.: 0.08204 1st Qu.: 0.00 1st Qu.: 5.19 1st Qu.:0.00000
Median : 0.25651 Median : 0.00 Median : 9.69 Median :0.00000
Mean : 3.61352 Mean : 11.36 Mean :11.14 Mean :0.06917
3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10 3rd Qu.:0.00000
Max. :88.97620 Max. :100.00 Max. :27.74 Max. :1.00000
nox rm age dis
Min. :0.3850 Min. :3.561 Min. : 2.90 Min. : 1.130
1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02 1st Qu.: 2.100
Median :0.5380 Median :6.208 Median : 77.50 Median : 3.207
Mean :0.5547 Mean :6.285 Mean : 68.57 Mean : 3.795
3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08 3rd Qu.: 5.188
Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
rad tax ptratio black
Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 0.32
1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.:375.38
Median : 5.000 Median :330.0 Median :19.05 Median :391.44
Mean : 9.549 Mean :408.2 Mean :18.46 Mean :356.67
3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:396.23
Max. :24.000 Max. :711.0 Max. :22.00 Max. :396.90
lstat medv
Min. : 1.73 Min. : 5.00
1st Qu.: 6.95 1st Qu.:17.02
Median :11.36 Median :21.20
Mean :12.65 Mean :22.53
3rd Qu.:16.95 3rd Qu.:25.00
Max. :37.97 Max. :50.00

plot matrix of the variables

pairs(Boston) This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

variable matrix of the variables

pairs(boston)

access the MASS package

library(MASS)

load the data

data("Boston")

explore the dataset

str(Boston) 'data.frame': 506 obs. of 14 variables: $ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ... $ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ... $ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ... $ chas : int 0 0 0 0 0 0 0 0 0 0 ... $ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ... $ rm : num 6.58 6.42 7.18 7 7.15 ... $ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ... $ dis : num 4.09 4.97 4.97 6.06 6.06 ... $ rad : int 1 2 2 3 3 3 5 5 5 5 ... $ tax : num 296 242 242 222 222 222 311 311 311 311 ... $ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ... $ black : num 397 397 393 395 397 ... $ lstat : num 4.98 9.14 4.03 2.94 5.33 ... $ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ... summary(Boston) crim zn indus chas
Min. : 0.00632 Min. : 0.00 Min. : 0.46 Min. :0.00000
1st Qu.: 0.08204 1st Qu.: 0.00 1st Qu.: 5.19 1st Qu.:0.00000
Median : 0.25651 Median : 0.00 Median : 9.69 Median :0.00000
Mean : 3.61352 Mean : 11.36 Mean :11.14 Mean :0.06917
3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10 3rd Qu.:0.00000
Max. :88.97620 Max. :100.00 Max. :27.74 Max. :1.00000
nox rm age dis
Min. :0.3850 Min. :3.561 Min. : 2.90 Min. : 1.130
1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02 1st Qu.: 2.100
Median :0.5380 Median :6.208 Median : 77.50 Median : 3.207
Mean :0.5547 Mean :6.285 Mean : 68.57 Mean : 3.795
3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08 3rd Qu.: 5.188
Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
rad tax ptratio black
Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 0.32
1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.:375.38
Median : 5.000 Median :330.0 Median :19.05 Median :391.44
Mean : 9.549 Mean :408.2 Mean :18.46 Mean :356.67
3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:396.23
Max. :24.000 Max. :711.0 Max. :22.00 Max. :396.90
lstat medv
Min. : 1.73 Min. : 5.00
1st Qu.: 6.95 1st Qu.:17.02
Median :11.36 Median :21.20
Mean :12.65 Mean :22.53
3rd Qu.:16.95 3rd Qu.:25.00
Max. :37.97 Max. :50.00

plot matrix of the variables

pairs(Boston) You can also embed plots, for example:


Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
---
output:
  pdf_document: default
  html_document: default
---
# continue to be amazed

*I did finally realized that the editing and commiting was in the github not in RStudio, I was desperate even though I was trying to understand, I hope this will begin to be simple now.*

- I installed R, then RStudio and finally Git, I created an account and tried to update my profile. 
- I learned how to execute some data camp stuff.
- I could say that this project is designated to people who would like to share their project to public so that others can use them also or for team work so that they can share and edit together a specific project.

```{r}
date()

Let's learn more this week...

human with modified GNI and dplyr are available

columns to keep

keep <- c("Country", "Edu2.FM", "Labo.FM", "Life.Exp", "Edu.Exp", "GNI", "Mat.Mor", "Ado.Birth", "Parli.F")

select the 'keep' columns

human <- select(human, one_of(keep))

print out a completeness indicator of the 'human' data

complete.cases(human)

print out the data along with a completeness indicator as the last column

data.frame(human[-1], comp = complete.cases(human))

filter out all rows with NA values

human_ <- filter(human, complete.cases(human))

`# human without NA is available

look at the last 10 observations

tail(human, 10)

last indice we want to keep

last <- nrow(human) - 7

choose everything until the last 7 observations

human_ <- human[1:last, ]

add countries as rownames

rownames(human) <- human$Country ``{r}

---
output:
  pdf_document: default
  html_document: default
---
# continue to be amazed

*I did finally realized that the editing and commiting was in the github not in RStudio, I was desperate even though I was trying to understand, I hope this will begin to be simple now.*

- I installed R, then RStudio and finally Git, I created an account and tried to update my profile. 
- I learned how to execute some data camp stuff.
- I could say that this project is designated to people who would like to share their project to public so that others can use them also or for team work so that they can share and edit together a specific project.

```{r}
date()

Let's learn more this week...

human with modified GNI and dplyr are available

columns to keep

keep <- c("Country", "Edu2.FM", "Labo.FM", "Life.Exp", "Edu.Exp", "GNI", "Mat.Mor", "Ado.Birth", "Parli.F")

select the 'keep' columns

human <- select(human, one_of(keep))

print out a completeness indicator of the 'human' data

complete.cases(human)

print out the data along with a completeness indicator as the last column

data.frame(human[-1], comp = complete.cases(human))

filter out all rows with NA values

human_ <- filter(human, complete.cases(human))

`# human without NA is available

look at the last 10 observations

tail(human, 10)

last indice we want to keep

last <- nrow(human) - 7

choose everything until the last 7 observations

human_ <- human[1:last, ]

add countries as rownames

rownames(human) <- human$Country ``{r}


# modified human, dplyr and the corrplot functions are available

# remove the Country variable
human_ <- select(human, -Country)

# Access GGally
library(GGally)

# visualize the 'human_' variables
ggpairs(human_)

# compute the correlation matrix and visualize it with corrplot
cor# tidyr package and human are available
# access the stringr package
library(stringr)
# look at the structure of the GNI column in 'human'
# remove the commas from GNI and print out a numeric version of it
str_replace(human$GNI, pattern=",", replace ="")
  [1] "64992"  "42261"  "56431"  "44025"  "45435"  "43919"  "39568"  "52947" 
  [9] "42155"  "32689"  "76628"  "53959"  "79851"  "45636"  "39267"  "35182" 
 [17] "33890"  "30676"  "58711"  "36927"  "41187"  "38056"  "43869"  "38695" 
 [25] "27852"  "32045"  "33030"  "26660"  "24524"  "25214"  "72570"  "28633" 
 [33] "123124" "43978"  "25845"  "23177"  "24500"  "27930"  "52821"  "22050" 
 [41] "60868"  "21290"  "25757"  "22916"  "38599"  "22281"  "19409"  "83961" 
 [49] "14558"  "16676"  "22352"  "34858"  "18108"  "19283"  "21336"  "20867" 
 [57] "12488"  "20070"  "15596"  "13496"  "18192"  "22762"  "17470"  "23300" 
 [65] "26090"  "12190"  "7301"   "16509"  "13413"  "15440"  "16159"  "18677" 
 [73] "9779"   "16056"  "15175"  "7164"   "20805"  "16428"  "10939"  "11365" 
 [81] "11780"  "8178"   "13054"  "11015"  "9943"   "8124"   "9638"   "10605" 
 [89] "9765"   "12547"  "7493"   "10729"  "13323"  "9994"   "14911"  "10404" 
 [97] "12040"  "9937"   "7415"   "5069"   "7614"   "11883"  "15617"  "12328" 
[105] "5327"   "16646"  "5223"   "10512"  "13066"  "16367"  "9788"   "7643"  
[113] "4699"   "5567"   "7915"   "7349"   "12122"  "5092"   "5760"   "3044"  
[121] "14003"  "6094"   "3432"   "6522"   "4457"   "6850"   "9418"   "6929"  
[129] "2517"   "5497"   "3938"   "7176"   "5363"   "2728"   "2803"   "6012"  
[137] "2434"   "21056"  "3734"   "3852"   "4680"   "3191"   "2949"   "2918"  
[145] "2762"   "2311"   "4866"   "4608"   "6822"   "5542"   "2411"   "5341"  
[153] "2803"   "1328"   "1615"   "3560"   "1540"   "2463"   "1456"   "3519"  
[161] "3306"   "1228"   "1669"   "1458"   "1613"   "1767"   "3809"   "3276"  
[169] "2332"   "2188"   "1885"   "3171"   "747"    "1428"   "1507"   "680"   
[177] "805"    "1362"   "1583"   "1123"   "1780"   "1096"   "1591"   "758"   
[185] "2085"   "1130"   "581"    "908"    "15722"  "11449"  "12791"  "14242" 
[193] "5605"   "3363"   "14301" (human_) %>% corrplot

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published