Skip to content

Commit

Permalink
Structure project as an R package
Browse files Browse the repository at this point in the history
  • Loading branch information
polynomialherder committed Apr 2, 2020
1 parent f7e5300 commit ccc2dfd
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 47 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^modelc\.Rproj$
^\.Rproj\.user$
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,5 @@ flycheck_*.el
.dir-locals.el

# network security
/network-security.data
/network-security.data
.Rproj.user
14 changes: 14 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Package: modelc
Title: A generalized linear model object to SQL compiler
Version: 0.0.0.9000
Authors@R:
person(given = "Spark",
family = "Fish",
email = "[email protected]",
role = c("aut", "cre"))
Description: modelc is an R model object to SQL compiler. It generates SQL select statements from linear and generalized linear models. Its interface currently consists of a single function, construct_select, which takes a single input, namely an lm or glm model object. It supports GLM family distributions using log or identity link functions.
License: Proprietary
Encoding: UTF-8
LazyData: true
Suggests:
testthat (>= 2.1.0)
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Generated by roxygen2: fake comment so roxygen2 overwrites silently.
exportPattern("^[^\\.]")
100 changes: 54 additions & 46 deletions modelc.r → R/modelc.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
extract_parameters <- function(model) {
return(names(coef(model)))
return(names(coef(model)))
}

extract_parameter_coefficient <- function(model, parameter) {
extract_parameter_coefficient <- function(model, parameter) {
coefficient_value <- coef(model)[[parameter]]
if (!is.na(coefficient_value)) {
return(coefficient_value)
if (!is.na(coefficient_value)) {
return(coefficient_value)
} else {
return(0)
return(0)
}
}

Expand All @@ -27,11 +27,11 @@ is_intercept <- function(parameter) {
return(parameter == '(Intercept)')
}

build_intercept <- function(model, parameter, first=FALSE) {
build_intercept <- function(model, parameter, first=FALSE) {
coefficient <- extract_parameter_coefficient(model, parameter)
if (!first) {
return(paste("+", coefficient))
} else {
} else {
return(coefficient)
}
}
Expand All @@ -52,7 +52,7 @@ is_interaction <- function(parameter) {

is_factor <- function(parameter, model) {
factorlist <- names(model$xlevels)
for (factor in factorlist) {
for (factor in factorlist) {
if (grepl(factor, parameter, fixed=T)) {
return(TRUE)
}
Expand All @@ -62,7 +62,7 @@ is_factor <- function(parameter, model) {

get_factor_name <- function(parameter, model) {
factorlist <- names(model$xlevels)
for (factor in factorlist) {
for (factor in factorlist) {
if (grepl(factor, parameter, fixed=T)) {
return(factor)
}
Expand All @@ -76,79 +76,80 @@ extract_level <- function(parameter, factor) {
return(substring(parameter, level_start, level_end))
}

has_parameter <- function(model, parameter) {
has_parameter <- function(model, parameter) {
parameter %in% names(coef(model));
}

build_interaction_term <- function(model, interaction_term, first=FALSE) {
build_interaction_term <- function(model, interaction_term, first=FALSE) {

split_interaction <- strsplit(interaction_term, ":")[[1]]
coefficient <- extract_parameter_coefficient(model, interaction_term)

sql <- paste(coefficient, "*", sep="")
if (first) {
sql <- paste("+", sql)
if (!first) {
sql <- paste("+", sql)
}

i = 0

for (interaction_variable in split_interaction) {
if (is_factor(interaction_variable, model)) {
factor <- get_factor_name(interaction_variable, model)
level <- extract_level(interaction_variable, factor)
sql = paste(sql, "(CASE WHEN", factor, "=", level, "THEN", level, "ELSE", 0, "END)")
sql = paste(sql, "(CASE WHEN", factor, "=", level, "THEN", 1, "ELSE", 0, "END)")
}
else {
sql = paste(sql, interaction_variable, sep="")
}
if (i == 0) {
sql = paste(sql, "*", sep="")

if (i == 0) {
sql = paste(sql, "*", sep="")
}

i = i + 1

}
return (sql)
}

build_factor_case_statements <- function(model, first=F) {
SQL_START_FIRST <- "(CASE"
SQL_START <- "+ (CASE"
factors <- model$xlevels
SQL_START <- "+ (CASE"
factors <- model$xlevels
factor_variables <- names(factors)
cases <- ""
for (factor in factor_variables) {
if (!first) {
sql = SQL_START
} else {
sql <- SQL_START_FIRST
} else {
sql <- SQL_START_FIRST
}
for (level in factors[[factor]]) {
formula_term <- paste(factor, level, sep="")
if (has_parameter(model, formula_term)) {
coefficient <- extract_parameter_coefficient(model, formula_term)
sql = paste(sql, "WHEN", factor, "=", level, "THEN", coefficient, "*", factor)
coefficient <- extract_parameter_coefficient(model, formula_term)
sql = paste(sql, "WHEN", factor, "=", level, "THEN", coefficient)
}
}

if (!(sql %in% c(SQL_START_FIRST, SQL_START))) {
cases <- paste(cases, sql, "END)")
cases <- paste(cases, sql, "END)")
}

}
return(cases)
}

apply_linkinverse <- function(model, sql) {
apply_linkinverse <- function(model, sql) {
if (is.null(model$family)) {
return(sql)
}

if (model$family$link == "identity") {
return(sql)
}

else if(model$family$link == "log") {

if (model$family$link == "identity") {
return(sql)
}

else if(model$family$link == "log") {
sql <- paste("EXP(", sql, ")", sep="")
return(sql)
}
Expand All @@ -161,32 +162,39 @@ apply_linkinverse <- function(model, sql) {
construct_select <- function(model) {
parameters <- extract_parameters(model)
select <- ""
count <- 0
count <- 0
for (parameter in parameters) {

if (is_intercept(parameter)) {
cat(paste(parameter, "is an intercept\n"))
build_term <- build_intercept
}
else if (is_factor(parameter, model)) {
next;
}
else if (is_interaction(parameter)) {
cat(paste(parameter, "is an interaction\n"))
build_term <- build_interaction_term
}
else if (is_factor(parameter, model)) {
cat(paste(parameter, "is a factor\n"))
next;
}
else {
build_term <- build_additive_term
}
select = paste(select, build_term(model, parameter, first=count==0))
count = count + 1
}

select <- paste(
select,
build_factor_case_statements(model, first=count==0)
" ",
build_factor_case_statements(model, first=count==0),
sep=""
)

select_with_linkinverse <- apply_linkinverse(model, select)

select <- paste("SELECT", select_with_linkinverse)

select <- gsub(" ", " ", trimws(select))

return(select)
}
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,18 @@ SELECT
```

Note that your R session should be configured with `options(scipen=999)` to disable rendering numbers with scientific notation, otherwise `construct_select` may output invalid SQL.

# Installing

Ensure you have [R devtools](https://cran.r-project.org/web/packages/devtools/readme/README.html) installed globally.

```R
setwd("..") # This line assumes your working directory is the `modelc/` root. Otherwise set the working directory to the folder *containing* modelc/
devtools::install("modelc")
```

Alternately you can use `devtools::install_github` by passing a a GitHub auth token.

```R
devtools::install_github("team-sparkfish/modelc", auth_token = my_secret_auth_token)
```
21 changes: 21 additions & 0 deletions modelc.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Version: 1.0

RestoreWorkspace: No
SaveWorkspace: No
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace

0 comments on commit ccc2dfd

Please sign in to comment.