Adding some extra documentation to cached fim template sql files.

NOAA-OWP · Dec 20, 2023 · 015c2fc · 015c2fc
1 parent ca3308e
commit 015c2fc
Show file tree

Hide file tree

Showing 9 changed files with 19 additions and 1 deletion.
diff --git a/...stprocess_sql/fim_caching_templates/0a_redshift_create_inundation_tables_if_not_exist.sql b/...stprocess_sql/fim_caching_templates/0a_redshift_create_inundation_tables_if_not_exist.sql
@@ -1,3 +1,5 @@
+-- This creates the four tables on a Redshift db needed for a cached fim pipeline run.
+-- These four tables exist on both RDS and Redshift, so any changes here will need to be synced with the RDS version as well - 0b_rds_create_inundation_tables_if_not_exist.sql
 CREATE TABLE IF NOT EXISTS {rs_fim_table}_flows
 (
     feature_id integer,

diff --git a/...db_postprocess_sql/fim_caching_templates/0b_rds_create_inundation_tables_if_not_exist.sql b/...db_postprocess_sql/fim_caching_templates/0b_rds_create_inundation_tables_if_not_exist.sql
@@ -1,3 +1,5 @@
+-- This creates the four tables on a RDS db needed for a cached fim pipeline run.
+-- These four tables exist on both RDS and Redshift, so any changes here will need to be synced with the Redshift version as well - 0a_redshift_create_inundation_tables_if_not_exist.sql
 CREATE TABLE IF NOT EXISTS {db_fim_table}_flows
 (
     hydro_id integer,

diff --git a/...ions/viz_db_postprocess_sql/fim_caching_templates/1a_rds_build_inundation_flows_table.sql b/...ions/viz_db_postprocess_sql/fim_caching_templates/1a_rds_build_inundation_flows_table.sql
@@ -1,3 +1,6 @@
+-- This populates a standardized fim_flows table, filtered to high water threshold, on RDS. This is essentially the domain of a given fim run.
+-- the prc_status columns is updated throughout the fim run with a status reflecting how fim is calculated for each reach (from ras2fim cache, from hand cache, hand processing, etc.)
+-- This table is copied to Redshift in the next step (in order to query the cache there), but this table on RDS is the authoritative source as far as the prc_status column goes.
 TRUNCATE {db_fim_table}_flows;
 INSERT INTO {db_fim_table}_flows (feature_id, hydro_id, huc8, branch, reference_time, discharge_cms, discharge_cfs, prc_status)
 SELECT

diff --git a/...ctions/viz_db_postprocess_sql/fim_caching_templates/1b_redshift_copy_inundation_flows.sql b/...ctions/viz_db_postprocess_sql/fim_caching_templates/1b_redshift_copy_inundation_flows.sql
@@ -1,3 +1,4 @@
+-- Copy the fim_flows table on RDS to Redshift -  this allows querying the hand cache on redshift by joining to this table.
 TRUNCATE {rs_fim_table}_flows;
 INSERT INTO {rs_fim_table}_flows (feature_id, hydro_id, huc8, branch, reference_time, discharge_cms, discharge_cfs, prc_status)
 SELECT

diff --git a/...tions/viz_db_postprocess_sql/fim_caching_templates/2a_redshift_query_cached_fim_table.sql b/...tions/viz_db_postprocess_sql/fim_caching_templates/2a_redshift_query_cached_fim_table.sql
@@ -1,3 +1,6 @@
+-- This is the query that pulls cached hand fim from the cache on Redshift. It does this by joining to the just-populated flows table, with WHERE clauses on discharge
+-- As of right now, feature_id, hydro_id, huc8, branch, and stage combine to represent a primary key in the hand hydrotables, so all of those fields are used in joins
+-- (I've asked the fim team to hash a single unique id for feature_id, hydro_id, huc8, branch combinations... which will simplify these queries, and hopefully help with performance.
 TRUNCATE {rs_fim_table};
 TRUNCATE {rs_fim_table}_geo;
 TRUNCATE {rs_fim_table}_zero_stage;

diff --git a/...A/viz_functions/viz_db_postprocess_sql/fim_caching_templates/3a_rds_ras2fim_insertion.sql b/...A/viz_functions/viz_db_postprocess_sql/fim_caching_templates/3a_rds_ras2fim_insertion.sql
@@ -1,3 +1,4 @@
+-- This SQL queries the ras2fim cache on RDS, and inserts appropriate rows into the fim tables of the given run.
 TRUNCATE {db_fim_table};
 TRUNCATE {db_fim_table}_geo;
 TRUNCATE {db_fim_table}_zero_stage;

diff --git a/...z_functions/viz_db_postprocess_sql/fim_caching_templates/3b_rds_cached_hand_insertion.sql b/...z_functions/viz_db_postprocess_sql/fim_caching_templates/3b_rds_cached_hand_insertion.sql
@@ -1,3 +1,4 @@
+-- This SQL queries the just-updated hand cache table on RDS, and inserts appropriate rows into the fim tables of the given run.
 INSERT INTO {db_fim_table}(
 	SELECT * FROM dblink('external_vpp_redshift', $REDSHIFT$
 	SELECT hydro_id, feature_id, huc8, branch, forecast_discharge_cfs, forecast_stage_ft, rc_discharge_cfs,

diff --git a/...unctions/viz_db_postprocess_sql/fim_caching_templates/4a_rds_create_fim_publish_table.sql b/...unctions/viz_db_postprocess_sql/fim_caching_templates/4a_rds_create_fim_publish_table.sql
@@ -1,3 +1,4 @@
+-- This is a generic / standardized query to create a publish.fim table for fim_config product processing (works for NWM configurations, but may not work for special fim configurations like RnR or CatFIM)
 DROP TABLE IF EXISTS {db_publish_table};
 
 SELECT  

diff --git a/...functions/viz_db_postprocess_sql/fim_caching_templates/5a_redshift_cache_fim_from_rds.sql b/...functions/viz_db_postprocess_sql/fim_caching_templates/5a_redshift_cache_fim_from_rds.sql
@@ -1,4 +1,8 @@
--- This template is designed to add freshly processed FIM polygons to the cached_fim tables on Redshift
+-- This template is designed to add freshly processed FIM polygons (which don't already exist in the cache) in the current FIM run back into to the cached hand tables on Redshift.
+-- To ensure that no duplicates are added to the cache (which could be possible if multiple fim configurations are running at the same time), this query joins to the target table and ensures that
+-- the current hydrotable record doesn't alraedy exist in the cache. This slows down the query significantly, and there is likely a potential optimization here... possibly using the UPSERT functionality of Redshift.
+-- As of right now, feature_id, hydro_id, huc8, branch, and stage combine to represent a primary key in the hand hydrotables, so all of those fields are used in joins
+-- (I've asked the fim team to hash a single unique id for feature_id, hydro_id, huc8, branch combinations... which will simplify these queries, and hopefully help with performance.
 
 -- 1. Add unique feature_id/hydro_id records to the hydrotable_cached_max table
 INSERT INTO fim.hydrotable_cached_max(hydro_id, feature_id, huc8, branch, fim_version, max_rc_discharge_cfs, max_rc_stage_ft)