Skip to content

Commit

Permalink
Merge pull request #31 from umccr/implement-raw-vault-library-satelli…
Browse files Browse the repository at this point in the history
…te-plab

Implemented OrcaVault Library satellite model - plab
  • Loading branch information
victorskl authored Feb 5, 2025
2 parents d923b9a + 00d3d09 commit 3b9d9fe
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 0 deletions.
116 changes: 116 additions & 0 deletions orcavault/models/raw/sat_library_plab.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{{
config(
materialized='incremental',
incremental_strategy='append',
on_schema_change='fail'
)
}}

with source as (

select
library_id,
workflow,
phenotype,
type,
assay,
quality,
source,
truseqindex
from
{{ source('ods', 'data_portal_labmetadata') }}

),

cleaned as (

select
trim(regexp_replace(library_id, E'[\\n\\r]+', '', 'g')) as library_id,
trim(regexp_replace(workflow, E'[\\n\\r]+', '', 'g')) as workflow,
trim(regexp_replace(phenotype, E'[\\n\\r]+', '', 'g')) as phenotype,
trim(regexp_replace(type, E'[\\n\\r]+', '', 'g')) as type,
trim(regexp_replace(assay, E'[\\n\\r]+', '', 'g')) as assay,
trim(regexp_replace(quality, E'[\\n\\r]+', '', 'g')) as quality,
trim(regexp_replace(source, E'[\\n\\r]+', '', 'g')) as source,
trim(regexp_replace(truseqindex, E'[\\n\\r]+', '', 'g')) as truseqindex
from
source

),

encoded as (

select
encode(sha256(cast(library_id as bytea)), 'hex') as library_hk,
encode(sha256(concat(workflow, phenotype, type, assay, quality, source, truseqindex)::bytea), 'hex') as hash_diff,
workflow,
phenotype,
type,
assay,
quality,
source,
truseqindex
from
cleaned

),

differentiated as (

select
library_hk,
hash_diff
from
encoded
{% if is_incremental() %}
except
select
library_hk,
hash_diff
from
{{ this }}
{% endif %}

),

transformed as (

select
library_hk,
cast('{{ run_started_at }}' as timestamptz) as load_datetime,
(select 'data_portal_labmetadata') as record_source,
hash_diff,
workflow,
phenotype,
type,
assay,
quality,
source,
truseqindex
from
encoded
{% if is_incremental() %}
where
library_hk in (select library_hk from differentiated)
{% endif %}

),

final as (
select
cast(library_hk as char(64)) as library_hk,
cast(load_datetime as timestamptz) as load_datetime,
cast(record_source as varchar(255)) as record_source,
cast(hash_diff as char(64)) as hash_diff,
cast(workflow as varchar(255)) as workflow,
cast(phenotype as varchar(255)) as phenotype,
cast(type as varchar(255)) as type,
cast(assay as varchar(255)) as assay,
cast(quality as varchar(255)) as quality,
cast(source as varchar(255)) as source,
cast(truseqindex as varchar(255)) as truseqindex
from
transformed
)

select * from final
34 changes: 34 additions & 0 deletions orcavault/models/raw/sat_schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,37 @@ models:
data_type: varchar(255)
- name: quality
data_type: varchar(255)

- name: sat_library_plab
config:
contract: { enforced: true }
constraints:
- type: primary_key
columns: [ library_hk, load_datetime ]
- type: foreign_key
columns: [ library_hk ]
to: ref('hub_library')
to_columns: [ library_hk ]
columns:
- name: library_hk
data_type: char(64)
- name: load_datetime
data_type: timestamptz
- name: record_source
data_type: varchar(255)
- name: hash_diff
data_type: char(64)
- name: workflow
data_type: varchar(255)
- name: phenotype
data_type: varchar(255)
- name: type
data_type: varchar(255)
- name: assay
data_type: varchar(255)
- name: quality
data_type: varchar(255)
- name: source
data_type: varchar(255)
- name: truseqindex
data_type: varchar(255)

0 comments on commit 3b9d9fe

Please sign in to comment.