forked from rdpeng/RepData_PeerAssessment1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPA1_template.RMD
113 lines (98 loc) · 3.56 KB
/
PA1_template.RMD
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#Course project 1
## 1. Let's clean the data set first
```{r}
setwd('D:/heyimeng')
activity <- read.csv('activity.csv', na.strings = '')
activity$date <- as.Date(as.character(activity$date)) ## change the format of date
head(activity, 5)
```
## 2. Total number of steps taken per day
#### 1. Caculate the total number of steps taken per day
```{r}
library(dplyr)
activity1 <- filter(activity, steps != 'NA')
activity1$steps <- as.numeric(activity1$steps)
activity1$date <- as.factor(activity1$date)
steps_per_day <- tapply(activity1$steps, activity1$date, sum)
print(steps_per_day)
```
#### 2. Make a histgram of total number of steps per day
```{r}
hist(steps_per_day,
xlab = expression('Steps'),
main = 'Histogram of steps per day')
```
#### 3. Caculate the mean and median value of steps per day
```{r}
meanvalue <- mean(steps_per_day)
medianvalue <- median(steps_per_day)
print(meanvalue)
print(medianvalue)
```
## 3. Daily activity pattern
#### 1. Time series plot
```{r}
steps_per_interval <- tapply(activity1$steps, activity1$interval, mean)
plot(unique(activity1$interval), steps_per_interval,
xlab = 'Interval',
ylab = 'Steps',
main = 'Time series plot',
type = 'l')
```
#### 2. Find the maximum number of steps
```{r}
location <- which(steps_per_interval == max(steps_per_interval))
steps_per_interval[location]
```
## 4. Imputing missing value
#### 1. Calculate and report the total number of missing values in the dataset
```{r}
NAS <- activity$steps == 'NA'
sum(NAS)
```
#### 2&3. Filling all the missing values(use the mean of 5-minute intervals)
```{r}
actna <- filter(activity, steps == 'NA')
Index<- data.frame()
for (i in 1:288){
Index[i,1] <- steps_per_interval[[i]]
}
cn <- names(steps_per_interval)
meanIndex <- cbind(cn,Index)
names(meanIndex) = c('interval', 'steps')
actfilled <- cbind(meanIndex, actna)
actfilled <- actfilled[, -c(1,3)]
actfilled$interval <- as.integer(actfilled$interval)
activityModeifed <- rbind(actfilled, activity1)
head(activityModeifed)
```
#### 4.Histogram of steps taken per day and mean and median valuue
```{r}
steps_per_day_Modified <- tapply(activityModeifed$steps, activityModeifed$date, sum)
hist(steps_per_day_Modified, main = 'Modified histogram', xlab = 'steps')
mean(steps_per_day_Modified)
median(steps_per_day_Modified)
```
## 5. Differences in activity patterns between weekdays and weekends
#### 1. filter weekdays and weekend
```{r}
Sys.setlocale("LC_TIME", "English")
DATE <- weekdays(activityModeifed$date)
activityModeifed <- cbind(activityModeifed, DATE)
actweekday <- filter(activityModeifed, DATE != 'Sunday' & DATE != 'Saturday' )
actweekday$DATE <- 'Weekday'
actweekend <- filter(activityModeifed, DATE == 'Sunday' | DATE == 'Saturday')
actweekend$DATE <- 'Weekend'
activityModeifed <- rbind(actweekend, actweekday)
activityModeifed <- arrange(activityModeifed, date)
head(activityModeifed)
```
#### 2. Average steps taken between weekday and weekend
```{r}
steps_per_interval_weekend <- tapply(actweekend$steps, actweekend$interval, mean)
steps_per_interval_weekday <- tapply(actweekday$steps, actweekday$interval, mean)
par(mfcol = c(2,1))
plot(unique(activityModeifed$interval), steps_per_interval_weekend, type = 'l',
col = 'blue', main = 'weekend', xlab = 'Interval', ylab = 'Steps')
plot(unique(activityModeifed$interval), steps_per_interval_weekday, type = 'l',
col = 'blue', main = 'weekday', xlab = 'Interval', ylab = 'Steps')