Skip to content

Commit

Permalink
Merge pull request #100 from Sage-Bionetworks/snow-155-userprofile_la…
Browse files Browse the repository at this point in the history
…test-dynamic-table

[SNOW-155] Converting userprofile_latest table to dynamic table
  • Loading branch information
danlu1 authored Jan 11, 2025
2 parents 4888855 + 8ab919c commit 22bc594
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
-- Introduce the dynamic table
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
CREATE OR REPLACE DYNAMIC TABLE USERPROFILE_LATEST
TARGET_LAG = '1 day'
WAREHOUSE = compute_xsmall
AS
WITH dedup_userprofile AS (
SELECT
*
FROM {{database_name}}.SYNAPSE_RAW.USERPROFILESNAPSHOT --noqa: TMP
WHERE
SNAPSHOT_DATE >= CURRENT_TIMESTAMP - INTERVAL '14 days'
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY ID
ORDER BY CHANGE_TIMESTAMP DESC, SNAPSHOT_TIMESTAMP DESC
) = 1
)
SELECT
* exclude (LOCATION, COMPANY, POSITION, INDUSTRY),
-- TODO: Need to revisit this section after the mixture of NULL and empty strings issue being resolved in https://sagebionetworks.jira.com/browse/SWC-7215
NULLIF(LOCATION, '') AS LOCATION,
NULLIF(COMPANY, '') AS COMPANY,
NULLIF(POSITION, '') AS POSITION,
NULLIF(INDUSTRY, '') AS INDUSTRY,
FROM
dedup_userprofile;
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
-- Add table and column comments to userprofile_latest dynamic table
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP

-- Table comments
COMMENT ON DYNAMIC TABLE USERPROFILE_LATEST IS 'This dynamic table contain the latest snapshot of user-profiles during the past 14 days. Snapshots are taken when user profiles are created or modified. Note: Snapshots are also taken periodically and independently of the changes. The snapshot_timestamp records when the snapshot was taken.';

-- Column comments
COMMENT ON COLUMN USERPROFILE_LATEST.CHANGE_TYPE IS 'The type of change that occurred to the user profile, e.g., CREATE, UPDATE (Snapshotting does not capture DELETE change).';
COMMENT ON COLUMN USERPROFILE_LATEST.CHANGE_TIMESTAMP IS 'The time when any change to the user profile was made (e.g. create or update).';
COMMENT ON COLUMN USERPROFILE_LATEST.CHANGE_USER_ID IS 'The unique identifier of the user who made the change to the user profile.';
COMMENT ON COLUMN USERPROFILE_LATEST.SNAPSHOT_TIMESTAMP IS 'The time when the snapshot was taken (It is usually after the change happened).';
COMMENT ON COLUMN USERPROFILE_LATEST.ID IS 'The unique identifier of the user.';
COMMENT ON COLUMN USERPROFILE_LATEST.USER_NAME IS 'The Synapse username.';
COMMENT ON COLUMN USERPROFILE_LATEST.FIRST_NAME IS 'The first name of the user.';
COMMENT ON COLUMN USERPROFILE_LATEST.LAST_NAME IS 'The last name of the user.';
COMMENT ON COLUMN USERPROFILE_LATEST.EMAIL IS 'The primary email of the user.';
COMMENT ON COLUMN USERPROFILE_LATEST.SNAPSHOT_DATE IS 'The data is partitioned for fast and cost effective queries. The snapshot_timestamp field is converted into a date and stored in the snapshot_date field for partitioning. The date should be used as a condition (WHERE CLAUSE) in the queries.';
COMMENT ON COLUMN USERPROFILE_LATEST.CREATED_ON IS 'The creation time of the user profile.';
COMMENT ON COLUMN USERPROFILE_LATEST.IS_TWO_FACTOR_AUTH_ENABLED IS 'Indicates if the user had two factor authentication enabled when the snapshot was captured.';
COMMENT ON COLUMN USERPROFILE_LATEST.TOS_AGREEMENTS IS 'Contains the list of all the term of service that the user agreed to, with their agreed on date and version.';
COMMENT ON COLUMN USERPROFILE_LATEST.LOCATION IS 'The location of the user.';
COMMENT ON COLUMN USERPROFILE_LATEST.COMPANY IS 'The company where the user works.';
COMMENT ON COLUMN USERPROFILE_LATEST.POSITION IS 'The position of the user in the company.';
COMMENT ON COLUMN USERPROFILE_LATEST.INDUSTRY IS 'The industry/discipline that this person is associated with.';
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Backup the original latest table
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP

-- Clone the USERPROFILE_LATEST table to ``USERPROFILE_LATEST_BACKUP`` for validation purposes
CREATE OR REPLACE TABLE USERPROFILE_LATEST_BACKUP CLONE USERPROFILE_LATEST;
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Drop the ``USERPROFILE_LATEST`` table
USE SCHEMA {{database_name}}.synapse;
DROP TABLE USERPROFILE_LATEST;
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Drop the snapshot stream
USE SCHEMA {{database_name}}.synapse_raw;
DROP STREAM USERPROFILESNAPSHOT_STREAM;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Drop any scheduled tasks
USE SCHEMA {{database_name}}.synapse_raw;
-- Suspend ROOT TASK
ALTER TASK REFRESH_SYNAPSE_WAREHOUSE_S3_STAGE_TASK SUSPEND;
-- Drop LATEST_TABLE UPSERTING TASK
DROP TASK UPSERT_TO_USERPROFILE_LATEST_TASK;
-- Resume the ROOT task and its child tasks
SELECT SYSTEM$TASK_DEPENDENTS_ENABLE( 'REFRESH_SYNAPSE_WAREHOUSE_S3_STAGE_TASK' );

0 comments on commit 22bc594

Please sign in to comment.