forked from kbroman/datacarpentry_R_2017-01-10
-
Notifications
You must be signed in to change notification settings - Fork 0
/
handout-script.R
103 lines (86 loc) · 3.78 KB
/
handout-script.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
### Creating objects in R
### Vectors and data types
## ## We’ve seen that atomic vectors can be of type character, numeric, integer, and
## ## logical. But what happens if we try to mix these types in a single
## ## vector?
##
## ## What will happen in each of these examples? (hint: use `class()` to
## ## check the data type of your object)
## num_char <- c(1, 2, 3, 'a')
##
## num_logical <- c(1, 2, 3, TRUE)
##
## char_logical <- c('a', 'b', 'c', TRUE)
##
## tricky <- c(1, 2, 3, '4')
##
## ## Why do you think it happens?
##
## ## Can you draw a diagram that represents the hierarchy of the data
## ## types?
# * Can you figure out why `"four" > "five"` returns `TRUE`?
### Presentation of the survey data
## download.file("https://ndownloader.figshare.com/files/2292169",
## "data/portal_data_joined.csv")
## Challenge
## Based on the output of `str(surveys)`, can you answer the following questions?
## * What is the class of the object `surveys`?
## * How many rows and how many columns are in this object?
## * How many species have been recorded during these surveys?
### Factors
sex <- factor(c("male", "female", "female", "male"))
food <- factor(c("low", "high", "medium", "high", "low", "medium", "high"))
levels(food)
food <- factor(food, levels=c("low", "medium", "high"))
levels(food)
min(food) ## doesn't work
food <- factor(food, levels=c("low", "medium", "high"), ordered=TRUE)
levels(food)
min(food) ## works!
f <- factor(c(1, 5, 10, 2))
as.numeric(f) ## wrong! and there is no warning...
as.numeric(as.character(f)) ## works...
as.numeric(levels(f))[f] ## The recommended way.
## Challenge
##
## * In which order are the treatments listed?
##
## * How can you recreate this plot with "control" listed
## last instead of first?
exprmt <- factor(c("treat1", "treat2", "treat1", "treat3", "treat1", "control",
"treat1", "treat2", "treat3"))
table(exprmt)
barplot(table(exprmt))
## The data.frame class
## Compare the output of these examples, and compare the difference between when
## the data are being read as `character`, and when they are being read as
## `factor`.
example_data <- data.frame(animal=c("dog", "cat", "sea cucumber", "sea urchin"),
feel=c("furry", "furry", "squishy", "spiny"),
weight=c(45, 8, 1.1, 0.8))
str(example_data)
example_data <- data.frame(animal=c("dog", "cat", "sea cucumber", "sea urchin"),
feel=c("furry", "furry", "squishy", "spiny"),
weight=c(45, 8, 1.1, 0.8), stringsAsFactors=FALSE)
str(example_data)
## ## Challenge
## ## There are a few mistakes in this hand crafted `data.frame`,
## ## can you spot and fix them? Don't hesitate to experiment!
## author_book <- data.frame(author_first=c("Charles", "Ernst", "Theodosius"),
## author_last=c(Darwin, Mayr, Dobzhansky),
## year=c(1942, 1970))
## ## Challenge:
## ## Can you predict the class for each of the columns in the following
## ## example?
## ## Check your guesses using `str(country_climate)`:
## ## * Are they what you expected? Why? why not?
## ## * What would have been different if we had added `stringsAsFactors = FALSE`
## ## to this call?
## ## * What would you need to change to ensure that each column had the
## ## accurate data type?
## country_climate <- data.frame(country=c("Canada", "Panama", "South Africa", "Australia"),
## climate=c("cold", "hot", "temperate", "hot/temperate"),
## temperature=c(10, 30, 18, "15"),
## northern_hemisphere=c(TRUE, TRUE, FALSE, "FALSE"),
## has_kangaroo=c(FALSE, FALSE, FALSE, 1))
## Indexing, Sequences, and Subsetting