Skip to content

Commit

Permalink
Adding some extra documentation to cached fim template sql files.
Browse files Browse the repository at this point in the history
  • Loading branch information
TylerSchrag-NOAA committed Dec 20, 2023
1 parent ca3308e commit 015c2fc
Show file tree
Hide file tree
Showing 9 changed files with 19 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
-- This creates the four tables on a Redshift db needed for a cached fim pipeline run.
-- These four tables exist on both RDS and Redshift, so any changes here will need to be synced with the RDS version as well - 0b_rds_create_inundation_tables_if_not_exist.sql
CREATE TABLE IF NOT EXISTS {rs_fim_table}_flows
(
feature_id integer,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
-- This creates the four tables on a RDS db needed for a cached fim pipeline run.
-- These four tables exist on both RDS and Redshift, so any changes here will need to be synced with the Redshift version as well - 0a_redshift_create_inundation_tables_if_not_exist.sql
CREATE TABLE IF NOT EXISTS {db_fim_table}_flows
(
hydro_id integer,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
-- This populates a standardized fim_flows table, filtered to high water threshold, on RDS. This is essentially the domain of a given fim run.
-- the prc_status columns is updated throughout the fim run with a status reflecting how fim is calculated for each reach (from ras2fim cache, from hand cache, hand processing, etc.)
-- This table is copied to Redshift in the next step (in order to query the cache there), but this table on RDS is the authoritative source as far as the prc_status column goes.
TRUNCATE {db_fim_table}_flows;
INSERT INTO {db_fim_table}_flows (feature_id, hydro_id, huc8, branch, reference_time, discharge_cms, discharge_cfs, prc_status)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
-- Copy the fim_flows table on RDS to Redshift - this allows querying the hand cache on redshift by joining to this table.
TRUNCATE {rs_fim_table}_flows;
INSERT INTO {rs_fim_table}_flows (feature_id, hydro_id, huc8, branch, reference_time, discharge_cms, discharge_cfs, prc_status)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
-- This is the query that pulls cached hand fim from the cache on Redshift. It does this by joining to the just-populated flows table, with WHERE clauses on discharge
-- As of right now, feature_id, hydro_id, huc8, branch, and stage combine to represent a primary key in the hand hydrotables, so all of those fields are used in joins
-- (I've asked the fim team to hash a single unique id for feature_id, hydro_id, huc8, branch combinations... which will simplify these queries, and hopefully help with performance.
TRUNCATE {rs_fim_table};
TRUNCATE {rs_fim_table}_geo;
TRUNCATE {rs_fim_table}_zero_stage;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
-- This SQL queries the ras2fim cache on RDS, and inserts appropriate rows into the fim tables of the given run.
TRUNCATE {db_fim_table};
TRUNCATE {db_fim_table}_geo;
TRUNCATE {db_fim_table}_zero_stage;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
-- This SQL queries the just-updated hand cache table on RDS, and inserts appropriate rows into the fim tables of the given run.
INSERT INTO {db_fim_table}(
SELECT * FROM dblink('external_vpp_redshift', $REDSHIFT$
SELECT hydro_id, feature_id, huc8, branch, forecast_discharge_cfs, forecast_stage_ft, rc_discharge_cfs,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
-- This is a generic / standardized query to create a publish.fim table for fim_config product processing (works for NWM configurations, but may not work for special fim configurations like RnR or CatFIM)
DROP TABLE IF EXISTS {db_publish_table};

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
-- This template is designed to add freshly processed FIM polygons to the cached_fim tables on Redshift
-- This template is designed to add freshly processed FIM polygons (which don't already exist in the cache) in the current FIM run back into to the cached hand tables on Redshift.
-- To ensure that no duplicates are added to the cache (which could be possible if multiple fim configurations are running at the same time), this query joins to the target table and ensures that
-- the current hydrotable record doesn't alraedy exist in the cache. This slows down the query significantly, and there is likely a potential optimization here... possibly using the UPSERT functionality of Redshift.
-- As of right now, feature_id, hydro_id, huc8, branch, and stage combine to represent a primary key in the hand hydrotables, so all of those fields are used in joins
-- (I've asked the fim team to hash a single unique id for feature_id, hydro_id, huc8, branch combinations... which will simplify these queries, and hopefully help with performance.

-- 1. Add unique feature_id/hydro_id records to the hydrotable_cached_max table
INSERT INTO fim.hydrotable_cached_max(hydro_id, feature_id, huc8, branch, fim_version, max_rc_discharge_cfs, max_rc_stage_ft)
Expand Down

0 comments on commit 015c2fc

Please sign in to comment.