ccao-data · jeancochrane · May 3, 2024 · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024
diff --git a/api.R b/api.R
@@ -1,5 +1,6 @@
 # Setup ------------------------------------------------------------------------
 library(arrow)
+library(assertthat)
 library(aws.s3)
 library(ccao)
 library(dplyr)
@@ -11,91 +12,130 @@ source("generics.R")
 # Read AWS creds from Docker secrets
 if (file.exists("/run/secrets/ENV_FILE")) {
   readRenviron("/run/secrets/ENV_FILE")
-} else {
+} else if (file.exists("secrets/ENV_FILE")) {
   readRenviron("secrets/ENV_FILE")
+} else {
+  readRenviron(".env")
 }
-readRenviron(".env")
 
 # Get the model run attributes at runtime from env vars
 dvc_bucket <- Sys.getenv("AWS_S3_DVC_BUCKET")
 run_bucket <- Sys.getenv("AWS_S3_MODEL_BUCKET")
-run_id <- Sys.getenv("AWS_S3_MODEL_RUN_ID")
-run_year <- Sys.getenv("AWS_S3_MODEL_YEAR")
 api_port <- as.numeric(Sys.getenv("API_PORT", unset = "3636"))
+default_run_id_var_name <- "AWS_S3_MODEL_RUN_ID"
+default_run_id <- Sys.getenv(default_run_id_var_name)
 
-
-# Download Files ---------------------------------------------------------------
-
-# Grab model fit and recipe objects
-temp_file_fit <- tempfile(fileext = ".zip")
-aws.s3::save_object(
-  object = file.path(
-    run_bucket, "workflow/fit",
-    paste0("year=", run_year),
-    paste0(run_id, ".zip")
-  ),
-  file = temp_file_fit
+# The list of run IDs that will be deployed as possible model endpoints
+valid_run_ids <- c(
+  "2024-02-06-relaxed-tristan",
+  "2024-03-17-stupefied-maya"
 )
 
-temp_file_recipe <- tempfile(fileext = ".rds")
-aws.s3::save_object(
-  object = file.path(
-    run_bucket, "workflow/recipe",
-    paste0("year=", run_year),
-    paste0(run_id, ".rds")
-  ),
-  file = temp_file_recipe
+assert_that(
+  default_run_id %in% valid_run_ids,
+  msg = sprintf(
+    "%s must be a valid run_id - got '%s', expected one of: %s",
+    default_run_id_var_name,
+    default_run_id,
+    paste(valid_run_ids, collapse = ", ")
+  )
 )
 
-# Grab metadata file for the specified run
-metadata <- read_parquet(
-  file.path(
-    run_bucket, "metadata",
-    paste0("year=", run_year),
-    paste0(run_id, ".parquet")
+# Given a run ID, return a model object that can be used to power a
+# vetiver API endpoint
+get_model_from_run_id <- function(run_id) {
+  run_year = substr(run_id, 1, 4)
+
+  # Download Files -------------------------------------------------------------
+
+  # Grab model fit and recipe objects
+  temp_file_fit <- tempfile(fileext = ".zip")
+  aws.s3::save_object(
+    object = file.path(
+      run_bucket, "workflow/fit",
+      paste0("year=", run_year),
+      paste0(run_id, ".zip")
+    ),
+    file = temp_file_fit
   )
-)
 
-# Load the training data used for this model
-training_data_md5 <- metadata$dvc_md5_training_data
-training_data <- read_parquet(
-  file.path(
-    dvc_bucket,
-    substr(training_data_md5, 1, 2),
-    substr(training_data_md5, 3, nchar(training_data_md5))
+  temp_file_recipe <- tempfile(fileext = ".rds")
+  aws.s3::save_object(
+    object = file.path(
+      run_bucket, "workflow/recipe",
+      paste0("year=", run_year),
+      paste0(run_id, ".rds")
+    ),
+    file = temp_file_recipe
   )
-)
 
+  # Grab metadata file for the specified run
+  metadata <- read_parquet(
+    file.path(
+      run_bucket, "metadata",
+      paste0("year=", run_year),
+      paste0(run_id, ".parquet")
+    )
+  )
 
-# Load Model -------------------------------------------------------------------
+  # Load the training data used for this model
+  training_data_md5 <- metadata$dvc_md5_training_data
+  training_data <- read_parquet(
+    file.path(
+      dvc_bucket,
+      substr(training_data_md5, 1, 2),
+      substr(training_data_md5, 3, nchar(training_data_md5))
+    )
+  )
 
-# Load fit and recipe from file
-fit <- lightsnip::lgbm_load(temp_file_fit)
-recipe <- readRDS(temp_file_recipe)
 
-# Extract a sample row of predictors to use for the API docs
-predictors <- recipe$var_info %>%
-  filter(role == "predictor") %>%
-  pull(variable)
-ptype_tbl <- training_data %>%
-  filter(meta_pin == "15251030220000") %>%
-  select(all_of(predictors))
-ptype <- vetiver_create_ptype(model = fit, save_prototype = ptype_tbl)
+  # Load Model -----------------------------------------------------------------
 
+  # Load fit and recipe from file
+  fit <- lightsnip::lgbm_load(temp_file_fit)
+  recipe <- readRDS(temp_file_recipe)
 
-# Create API -------------------------------------------------------------------
+  # Extract a sample row of predictors to use for the API docs
+  predictors <- recipe$var_info %>%
+    filter(role == "predictor") %>%
+    pull(variable)
+  ptype_tbl <- training_data %>%
+    filter(meta_pin == "15251030220000") %>%
+    select(all_of(predictors))
+  ptype <- vetiver_create_ptype(model = fit, save_prototype = ptype_tbl)
 
-# Create model object and populate metadata
-model <- vetiver_model(fit, "LightGBM", save_prototype = ptype)
-model$recipe <- recipe
-model$pv$round_type <- metadata$pv_round_type
-model$pv$round_break <- metadata$pv_round_break[[1]]
-model$pv$round_to_nearest <- metadata$pv_round_to_nearest[[1]]
 
-# Start API
-pr() %>%
-  vetiver_api(model) %>%
-  pr_run(
-    host = "0.0.0.0",
-    port = api_port
+  # Create API -----------------------------------------------------------------
+
+  # Create model object and populate metadata
+  model <- vetiver_model(fit, "LightGBM", save_prototype = ptype)
+  model$recipe <- recipe
+  model$pv$round_type <- metadata$pv_round_type
+  model$pv$round_break <- metadata$pv_round_break[[1]]
+  model$pv$round_to_nearest <- metadata$pv_round_to_nearest[[1]]
+
+  return(model)
+}
+
+default_model <- get_model_from_run_id(default_run_id)
+
+router <- pr() %>%
+  # Point the /predict endpoint to the default model
+  vetiver_api(default_model)
+
+# Create endpoints for each model based on run ID and add them to the router
+for (run_id in valid_run_ids) {
+  model <- get_model_from_run_id(run_id)
+  vetiver_api(
+    router,
+    model,
+    path = sprintf("/predict/%s", run_id)
   )
+}
+
+# Start API
+pr_run(
+  router,
+  host = "0.0.0.0",
+  port = api_port
+)
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -12,7 +12,6 @@ services:
       - AWS_S3_DVC_BUCKET
       - AWS_S3_MODEL_BUCKET
       - AWS_S3_MODEL_RUN_ID
-      - AWS_S3_MODEL_YEAR
       - API_PORT
     secrets:
       - ENV_FILE

diff --git a/renv.lock b/renv.lock
@@ -1,6 +1,6 @@
 {
   "R": {
-    "Version": "4.2.2",
+    "Version": "4.3.2",
     "Repositories": [
       {
         "Name": "CRAN",
@@ -146,19 +146,19 @@
     },
     "assessr": {
       "Package": "assessr",
-      "Version": "0.5.2",
+      "Version": "0.6.0",
       "Source": "GitHub",
       "RemoteType": "github",
-      "RemoteHost": "api.github.com",
       "RemoteUsername": "ccao-data",
       "RemoteRepo": "assessr",
       "RemoteRef": "master",
-      "RemoteSha": "dcfc0f0585462cc87cab42b965d16ec4c5546256",
+      "RemoteSha": "3c0172c47da0adf48be9084be141564f06872220",
+      "RemoteHost": "api.github.com",
       "Requirements": [
         "R",
         "stats"
       ],
-      "Hash": "2bb19b867910fb7334778ec519fac8d2"
+      "Hash": "7229107fa32f9570d4be09258478845a"
     },
     "aws.s3": {
       "Package": "aws.s3",
@@ -256,14 +256,14 @@
     },
     "ccao": {
       "Package": "ccao",
-      "Version": "1.2.2",
+      "Version": "1.3.0",
       "Source": "GitHub",
       "RemoteType": "github",
       "RemoteHost": "api.github.com",
       "RemoteUsername": "ccao-data",
       "RemoteRepo": "ccao",
-      "RemoteRef": "master",
-      "RemoteSha": "74737102c48ce07b769a10f42693d9c2c958e9ef",
+      "RemoteRef": "969dae702ed420ba9f9d252e5a0459c63b991e80",
+      "RemoteSha": "969dae702ed420ba9f9d252e5a0459c63b991e80",
       "Remotes": "ccao-data/assessr",
       "Requirements": [
         "R",
@@ -273,7 +273,7 @@
         "rlang",
         "tidyr"
       ],
-      "Hash": "2aaaeceb70766ef2dc175999eb4aa0ad"
+      "Hash": "82d3e915a18fc2b38c8fb10988322fe6"
     },
     "class": {
       "Package": "class",
@@ -1335,9 +1335,9 @@
     },
     "tibble": {
       "Package": "tibble",
-      "Version": "3.1.8",
+      "Version": "3.2.1",
       "Source": "Repository",
-      "Repository": "CRAN",
+      "Repository": "RSPM",
       "Requirements": [
         "R",
         "fansi",
@@ -1350,7 +1350,7 @@
         "utils",
         "vctrs"
       ],
-      "Hash": "56b6934ef0f8c68225949a8672fe1a8f"
+      "Hash": "a84e2cc86d07289b3b6f5069df7a004c"
     },
     "tidyr": {
       "Package": "tidyr",