forked from DanielaGawehns/DementiaPhysicalActivity
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean2021DataOfNotOnWrist.R
64 lines (46 loc) · 2.72 KB
/
clean2021DataOfNotOnWrist.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#clean 2021 data
#the 24hrs data was manually extracted from the watches and includes times that are non-wear times
#this script checks with the Accelerometer Data for start and end times and removes all 24hrs entries that
#are outside of those recordings
#data.frame: Day startAcc endAcc PatientID
clean2021_24hrs <- function (Dat10_File, Dat11_File, Dat12_File, Dat13_File,Dat14_File, AccTimesforFiltering) {
#24hrs data in need of cleaning:
Dat10<- transformData21 ( "Data24hr/ActivityLog_20210510.csv")
Dat11<- transformData21 ( "Data24hr/ActivityLog_20210511.csv")
Dat12<- transformData21 ( "Data24hr/ActivityLog_20210512.csv")
Dat13<- transformData21 ( "Data24hr/ActivityLog_20210513.csv")
Dat14<- transformData21 ( "Data24hr/ActivityLog_20210514.csv")
DataActive21<- rbind(Dat10, Dat11, Dat12, Dat13, Dat14)
Date24<-as.Date(as.POSIXct(DataActive21$Date, format = "%Y-%m-%d"))
start24<-sapply(strsplit(DataActive21$Time, split = "-"), function (x) x[1])
end24<-sapply(strsplit(DataActive21$Time, split = "-"), function (x) x[2])
End24<- as.POSIXct( paste(Date24, end24), format = "%Y-%m-%d %H:%M")
Start24<- as.POSIXct( paste(Date24, start24), format = "%Y-%m-%d %H:%M")
Full24Data<-data.frame(DataActive21, Date24,Start24,End24)
#Date, Time (as chr), Activity, Minutes, ID
splitdat21<- split(Full24Data, Full24Data$ID)
#from BatchProcessing all datafiles with Acc Data, we get:
# AccTimes<- data.frame(Date=as.Date(startAcc),startAcc,endAcc,residentID)
#as.numeric(AccTimes$residentID)
AccTimes<- AccTimesforFiltering
#create for each entry in Full24Data an entry if datapoint should be kept or not
startLogic<-numeric()
endLogic<-numeric()
for (i in 1: length(Full24Data$ID)) {
accSameID<- AccTimes[which(Full24Data[i,]$ID == as.numeric(AccTimes$residentID)), ]
accSameIDSameDate<- accSameID [which(Full24Data[i,]$Date24 == accSameID$Date ), ]
#there can be several Acc measurements of one resident on one day!! -> use min/max for start/end
#24hrs datapoint starts after the earliest Acc Data recording:
#reduce start time Acc by 5 min as there is a timelag issue/recording issue??!
ifelse(length(accSameIDSameDate$startAcc) == 0,
startLogic[i] <- 0,
startLogic[i]<- (Full24Data[i,]$Start24 > (min(accSameIDSameDate$startAcc) - 300) ))
#does the 24hrs data point end before the end of the Acc measures?
ifelse(length(accSameIDSameDate$startAcc) == 0,
endLogic[i] <- 0,
endLogic[i]<- (Full24Data[i,]$End24 < min(accSameIDSameDate$endAcc) ))
}
KeepIndex<- startLogic+endLogic
cleaned24hrsdat<- Full24Data[which(KeepIndex ==2), ] #filter those whose start/end times are ok
return (cleaned24hrsdat)
}