-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
102 lines (85 loc) · 3.68 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
read_activity_label_func <- function(filename)
{
filename <- paste(filename, ".txt", sep = "")
activityLabelDS <- read.table(filename, sep = "", as.is=TRUE)
names(activityLabelDS) <- c("activityID", "activityName")
activityLabelDS
}
read_features_label_func <- function(filename)
{
filename <- paste(filename, ".txt", sep = "")
featuresLabelDS <- read.table(filename, sep = "")
names(featuresLabelDS) <- c("featureID", "featureName")
featuresLabelDS
}
read_subject_order_func <- function (filename)
{
filename <- paste(filename, ".txt", sep = "")
subjectOrderDS <- read.table(filename, sep = "")
names(subjectOrderDS) <- c("Subject")
subjectOrderDS
}
read_y_order_func <- function (filename)
{
filename <- paste(filename, ".txt", sep = "")
yOrderDS <- read.table(filename, sep = "")
names(yOrderDS) <- c("ActivityDescription")
yOrderDS
}
read_X_order_func <- function (filename)
{
filename <- paste(filename, ".txt", sep = "")
XOrderDS <- read.table(filename, sep = "")
XOrderDS
}
#list of columns with mean and std data in 561 length vector form X_*.txt
validMeanStdData <- c(1, 2, 3, 4, 5, 6, 41, 42, 43, 44, 45, 46, 81, 82, 83, 84, 85, 86, 121, 122, 123, 124, 125, 126, 161, 162, 163, 164, 165, 166, 201, 202, 214, 215, 227, 228, 240, 241, 253, 254, 266, 267, 268, 269, 270, 271, 345, 346, 347, 348, 349, 350, 424, 425, 426, 427, 428, 429, 503, 504, 516, 517, 529, 530, 542, 543
)
read_data_set_func <- function(dataSetDir, dataSetName, featuresLabelDS, activityLabelDS)
{
cwd <- getwd();
setwd(dataSetDir)
print(" Read subject order data...")
subjectOrderDS <- read_subject_order_func(paste("subject", dataSetName, sep = "_"))
print(" Read y order data...")
yOrderDS <- read_y_order_func(paste("y", dataSetName, sep = "_"))
print(" Read x order data...")
XOrderDS <- read_X_order_func(paste("X", dataSetName, sep = "_"))
setwd(cwd)
#ensure that all the datasets are of the same size
if ((nrow(subjectOrderDS) != nrow(yOrderDS)) | (nrow(yOrderDS) != nrow(XOrderDS)))
{
print(" Error in data set : ", dataSetName)
return(0)
}
#Appropriately labels the data set with descriptive variable names
print(" Assign feature labels to X Order")
names(XOrderDS) <- featuresLabelDS[, "featureName"]
#Extracts only the measurements on the mean and standard deviation for each measurement
XOrderDS <- XOrderDS[, validMeanStdData]
print(" Generate data frame for set ...")
data.frame(subjectOrderDS, yOrderDS, XOrderDS)
}
#This is the starting function. This function should be called after sourcing this script
# This writes the "output.txt' with the summarized data in the current working directory
# It also returns the summarized data frame
main_func <- function()
{
print("Read activity label data...")
activityLabelDS <- read_activity_label_func("activity_labels")
str(activityLabelDS)
print(class(activityLabelDS$activityName))
print("Read features label data...")
featuresLabelDS <- read_features_label_func("features")
print("Read training data...")
trainSetDS <- read_data_set_func("train/", "train", featuresLabelDS, activityLabelDS)
print("Read test data...")
testSetDS <- read_data_set_func("test/", "test", featuresLabelDS, activityLabelDS)
print("Merge training and test data...")
mergedSetDS <- rbind(trainSetDS, testSetDS)
print("Extract summarised data (step-5)")
library(dplyr)
summarySetDs <- ddply(mergedSetDS, c("Subject", "ActivityDescription"), summarise_each, funs(mean))
write.table(summarySetDs, file = "output.txt", row.names = FALSE)
summarySetDs
}