-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSendIgReader.R
87 lines (78 loc) · 3.06 KB
/
SendIgReader.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
### Functions to create and read SENDIG data.frame
# read all domain structures
readDomainStructures <-function() {
# read if it is not there yet
if (!exists("bSENDIGRead")) {
bSENDIGRead <<- FALSE
}
if (!bSENDIGRead) {
IGFile <- "sendig-3-1-excel.xls"
IGDownloadsDir <- paste0(sourceDir, "/downloads/")
if(file.exists(paste0(IGDownloadsDir,IGFile))) {
print(paste0("IG Loading... from ",IGDownloadsDir, IGFile))
} else {
print("IG Downloading...")
# Create directory if not there
createOutputDirectory(sourceDir,"downloads")
# Reads from phuse github biocelerate location
path <- "https://github.com/phuse-org/BioCelerate/raw/a9022106134943bd46e06d064b629b6ef3488125/metadata/sendig-3-1-excel.xls"
CTxl <- paste0(IGDownloadsDir,IGFile)
print(paste0("Downloading the IG file... ",path))
GET(path, write_disk(CTxl),timeout(40))
}
df <- readWorksheet(loadWorkbook(paste0(IGDownloadsDir, IGFile)), sheet = "SENDIG 3.1 Variables")
# column renames
df <- rename(df, "Domain" = "Domain.Prefix")
df <- rename(df, "Column" = "Variable.Name")
df <- rename(df, "Label" = "Variable.Label")
df <- rename(df, "Codelist" = "Controlled.Terms..Codelist.or.Format")
df <- rename(df, "Expectancy" = "Core")
dfSENDIG <<- df
bSENDIGRead <<- TRUE
}
}
# This function checks columns to verify if the column needs to be included
# based upon the columns core requirement. Returns a logical vector. TRUE
# if it should be included, FALSE otherwise
checkCore <- function(dataset) {
# Create vector for permisable columns
domain_i <- unique(dataset$DOMAIN)
perms <- dfSENDIG[dfSENDIG$Domain == domain_i & dfSENDIG$Expectancy=="Perm","Column"]
# Create TF vector for blank cols
blankCol <- apply(dataset, 2, function(x) all(is.na(x)))
blankCol2 <- names(blankCol)[blankCol == T]
# reduce blank list to those which are also permissible
toRemove <- intersect(perms,blankCol2)
# Return list of columns minus the to remove ones
`%notin%` <- Negate(`%in%`)
which(names(dataset) %notin% toRemove)
}
# Reset within the dataframe as numeric columns that should be numeric
setSENDNumeric <- function(dataset) {
domain_i <- unique(dataset$DOMAIN)
numerics <- dfSENDIG[dfSENDIG$Domain == domain_i & dfSENDIG$Type=="Num", "Column"]
numerics <- intersect(numerics,names(dataset))
printDebug(paste(" In setSENDNumeric set numeric for these variables...",numerics))
printDebug(numerics)
if (length(numerics)>1) {
dataset[numerics] <- suppressWarnings(sapply(dataset[numerics],as.numeric))
} else if (length(numerics)==1) {
dataset[numerics] <- as.numeric(dataset[[numerics]])
}
dataset
}
createOutputDirectory <- function (aDir,aStudy) {
setwd(aDir)
if (file.exists(aStudy)){
setwd(file.path(aDir, aStudy))
} else {
dir.create(file.path(aDir, aStudy))
setwd(file.path(aDir, aStudy))
}
}
sleepSeconds <- function(x)
{
p1 <- proc.time()
Sys.sleep(x)
proc.time() - p1 # The cpu usage should be negligible
}