From 808817313cb2dee2eae1deeb0195bea573e44c7b Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Mon, 6 Jan 2025 23:35:50 +0000 Subject: [PATCH] differences for PR #908 --- 04-data-structures-part1.md | 59 ++++++++++++++++++------------------- data/feline-data.csv | 2 +- md5sum.txt | 2 +- 3 files changed, 30 insertions(+), 33 deletions(-) diff --git a/04-data-structures-part1.md b/04-data-structures-part1.md index 3847908a7..d4c755069 100644 --- a/04-data-structures-part1.md +++ b/04-data-structures-part1.md @@ -237,30 +237,29 @@ No matter how complicated our analyses become, all data in R is interpreted as one of these basic data types. This strictness has some really important consequences. -A user has added details of another cat. This information is in the file -`data/feline-data_v2.csv`. +A user has added details of another cat. We can add an additional row to our cats `data.frame` using `rbind`. ``` r -file.show("data/feline-data_v2.csv") +additional_cat <- data.frame(coat = "tabby", weight = "2.3 or 2.4", likes_catnip = 1) +cats2 <- rbind(cats, additional_cat) +cats2 ``` - -``` r -coat,weight,likes_catnip -calico,2.1,1 -black,5.0,0 -tabby,3.2,1 -tabby,2.3 or 2.4,1 +``` output + coat weight likes_catnip +1 calico 2.1 1 +2 black 5 0 +3 tabby 3.2 1 +4 tabby 2.3 or 2.4 1 ``` -Load the new cats data like before, and check what type of data we find in the -`weight` column: +Let's check what type of data we find in the +`weight` column of our new object: ``` r -cats <- read.csv(file="data/feline-data_v2.csv") -typeof(cats$weight) +typeof(cats2$weight) ``` ``` output @@ -272,11 +271,11 @@ we did on them before, we run into trouble: ``` r -cats$weight + 2 +cats2$weight + 2 ``` ``` error -Error in cats$weight + 2: non-numeric argument to binary operator +Error in cats2$weight + 2: non-numeric argument to binary operator ``` What happened? @@ -292,14 +291,14 @@ is written by the `str()` function: ``` r -str(cats) +str(cats2) ``` ``` output 'data.frame': 4 obs. of 3 variables: $ coat : chr "calico" "black" "tabby" "tabby" $ weight : chr "2.1" "5" "3.2" "2.3 or 2.4" - $ likes_string: int 1 0 1 1 + $ likes_catnip: num 1 0 1 1 ``` *Data frames* are composed of rows and columns, where each column has the @@ -555,8 +554,7 @@ Create a new script in RStudio and copy and paste the following code. Then move on to the tasks below, which help you to fill in the gaps (\_\_\_\_\_\_). ``` -# Read data -cats <- read.csv("data/feline-data_v2.csv") +Using the object `cats2`: # 1. Print the data _____ @@ -568,15 +566,15 @@ _____(cats) # The correct data type is: ____________. # 4. Correct the 4th weight data point with the mean of the two given values -cats$weight[4] <- 2.35 +cats2$weight[4] <- 2.35 # print the data again to see the effect cats # 5. Convert the weight to the right data type -cats$weight <- ______________(cats$weight) +cats2$weight <- ______________(cats2$weight) # Calculate the mean to test yourself -mean(cats$weight) +mean(cats2$weight) # If you see the correct mean value (and not NA), you did the exercise # correctly! @@ -586,7 +584,7 @@ mean(cats$weight) #### 1\. Print the data -Execute the first statement (`read.csv(...)`). Then print the data to the +Print the data to the console ::::::::::::::: solution @@ -601,8 +599,8 @@ Show the content of any variable by typing its name. Two correct solutions: ``` -cats -print(cats) +cats2 +print(cats2) ``` ::::::::::::::::::::::::: @@ -611,7 +609,7 @@ print(cats) The data type of your data is as important as the data itself. Use a function we saw earlier to print out the data types of all columns of the -`cats` table. +`cats2` `data.frame`. ::::::::::::::: solution @@ -628,7 +626,7 @@ here. > ### Solution to Challenge 1.2 > > ``` -> str(cats) +> str(cats2) > ``` #### 3\. Which data type do we need? @@ -636,7 +634,6 @@ here. The shown data type is not the right one for this data (weight of a cat). Which data type do we need? -- Why did the `read.csv()` function not choose the correct data type? - Fill in the gap in the comment with the correct data type for cat weight! ::::::::::::::: solution @@ -715,8 +712,8 @@ auto-complete function: Type "`as.`" and then press the TAB key. > There are two functions that are synonymous for historic reasons: > > ``` -> cats$weight <- as.double(cats$weight) -> cats$weight <- as.numeric(cats$weight) +> cats2$weight <- as.double(cats2$weight) +> cats2$weight <- as.numeric(cats2$weight) > ``` :::::::::::::::::::::::::::::::::::::::::::::::::: diff --git a/data/feline-data.csv b/data/feline-data.csv index df1620802..e11def894 100644 --- a/data/feline-data.csv +++ b/data/feline-data.csv @@ -1,4 +1,4 @@ -"coat","weight","likes_string" +"coat","weight","likes_catnip" "calico",2.1,1 "black",5,0 "tabby",3.2,1 diff --git a/md5sum.txt b/md5sum.txt index 31b3b0fd5..3d922676f 100644 --- a/md5sum.txt +++ b/md5sum.txt @@ -6,7 +6,7 @@ "episodes/01-rstudio-intro.Rmd" "04f6b758558750cef962768d78dd63b0" "site/built/01-rstudio-intro.md" "2024-12-03" "episodes/02-project-intro.Rmd" "cd60cc3116d4f6be92f03f5cc51bcc3b" "site/built/02-project-intro.md" "2024-12-03" "episodes/03-seeking-help.Rmd" "d24c310b8f36930e70379458f3c93461" "site/built/03-seeking-help.md" "2024-12-03" -"episodes/04-data-structures-part1.Rmd" "afc6c3ced3677ab088457152f8d84b54" "site/built/04-data-structures-part1.md" "2024-12-03" +"episodes/04-data-structures-part1.Rmd" "7e23998065053be6bd77bdeb403fe0d5" "site/built/04-data-structures-part1.md" "2025-01-06" "episodes/05-data-structures-part2.Rmd" "95c5dd30b8288090ce89ecbf2d3072bd" "site/built/05-data-structures-part2.md" "2024-12-03" "episodes/06-data-subsetting.Rmd" "5d4ce8731ab37ddea81874d63ae1ce86" "site/built/06-data-subsetting.md" "2024-12-03" "episodes/07-control-flow.Rmd" "6a8691c8668737e4202f49b52aeb8ac6" "site/built/07-control-flow.md" "2024-12-03"