From 83741f8ab5caaaba0ade8deba9f0b91af9f080fd Mon Sep 17 00:00:00 2001 From: pvannierop Date: Wed, 25 Oct 2023 13:57:17 +0200 Subject: [PATCH] Add clickhouse development code Moved and adapted from from pvannierop/cbioportal-clickhouse-pilot repository --- .dockerignore | 1 + dev/clickhouse/DEV_SETUP.md | 32 +++ .../a_column_schema.sql | 156 ++++++++++++++ .../c_import_mysql_data.sql | 115 +++++++++++ dev/clickhouse/docker-compose.yml | 27 +++ .../cbio_database_views.sql | 190 ++++++++++++++++++ 6 files changed, 521 insertions(+) create mode 100644 dev/clickhouse/DEV_SETUP.md create mode 100644 dev/clickhouse/clickhouse_provisioning/a_column_schema.sql create mode 100644 dev/clickhouse/clickhouse_provisioning/c_import_mysql_data.sql create mode 100644 dev/clickhouse/docker-compose.yml create mode 100644 dev/clickhouse/mysql_provisioning/cbio_database_views.sql diff --git a/.dockerignore b/.dockerignore index c722aa46de9..a8db72d5712 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,3 +9,4 @@ docs docker # don't ignore entrypoint scripts !docker/web-and-data/docker-entrypoint.sh +dev/clickhouse/clickhouse_data \ No newline at end of file diff --git a/dev/clickhouse/DEV_SETUP.md b/dev/clickhouse/DEV_SETUP.md new file mode 100644 index 00000000000..40740f67743 --- /dev/null +++ b/dev/clickhouse/DEV_SETUP.md @@ -0,0 +1,32 @@ +# Setup for development on Clickhouse integration + +## Pre-requisites + +1. cBioPortal database: + +- provisioned with one or more study in the _cbioportal_ database. +- user: _cbio_ password: _P@ssword1_ with all permissions on the _cbioportal_ database. +- available on port 3306 on the host system. + +3. System with docker and docker compose installed. + +## Setup + +All commands start from the root repository location. + +1. Start cBioPortal database. +2. Create MySQL views in the cBioPortal database by running the commands + in [cbio_database_views.sql](mysql_provisioning/cbio_database_views.sql). +3. Start Clickhouse (provisioned automatically from MySQL). + +``` +cd ./dev/clickhouse +docker compose up -d +``` + +This will start a Clickhouse instance that is available on the host system: +- port: _8123_ +- database: _cbioportal_ +- username: _cbio_ +- password: _P@ssword1_ + diff --git a/dev/clickhouse/clickhouse_provisioning/a_column_schema.sql b/dev/clickhouse/clickhouse_provisioning/a_column_schema.sql new file mode 100644 index 00000000000..ef4acaab3d3 --- /dev/null +++ b/dev/clickhouse/clickhouse_provisioning/a_column_schema.sql @@ -0,0 +1,156 @@ +-- MySQL Script generated by MySQL Workbench +-- Wed 08 Mar 2023 07:24:40 PM CET +-- Model: New Model Version: 1.0 +-- MySQL Workbench Forward Engineering + +-- ----------------------------------------------------- +-- Schema cbioportal +-- ----------------------------------------------------- + +-- ----------------------------------------------------- +-- Schema cbioportal +-- ----------------------------------------------------- +CREATE DATABASE IF NOT EXISTS `cbioportal` ; +USE `cbioportal` ; + + +-- ----------------------------------------------------- +-- Table `cbioportal`.`genomic_event` +-- ----------------------------------------------------- +-- This table records genomics events at the single gene level. +-- For Structural Variants gene1 and gene2 are represented as separate rows. +-- This table supports queries for the mutates-genes, cna-genes, and structvar-genes endpoints. +DROP TABLE IF EXISTS `cbioportal`.`genomic_event` ; +CREATE TABLE IF NOT EXISTS `cbioportal`.`genomic_event` ( + `sample_unique_id` VARCHAR(45), + `variant` VARCHAR(45), + `variant_type` VARCHAR(45), + `hugo_gene_symbol` VARCHAR(45), + `gene_panel_stable_id` VARCHAR(45), + `cancer_study_identifier` VARCHAR(45), + `genetic_profile_stable_id` VARCHAR(45)) +ENGINE = MergeTree +ORDER BY (`sample_unique_id`, `variant`, `hugo_gene_symbol`, `cancer_study_identifier`, `genetic_profile_stable_id`) +PRIMARY KEY (`sample_unique_id`, `variant`, `hugo_gene_symbol`, `cancer_study_identifier`, `genetic_profile_stable_id`); + +-- ----------------------------------------------------- +-- Table `cbioportal`.`structural_variant` +-- ----------------------------------------------------- +DROP TABLE IF EXISTS `cbioportal`.`structural_variant` ; +CREATE TABLE IF NOT EXISTS `cbioportal`.`structural_variant` ( + `sample_unique_id` VARCHAR(45), + `hugo_symbol_gene1` VARCHAR(45), + `hugo_symbol_gene2` VARCHAR(45), + `gene_panel_stable_id` VARCHAR(45), + `cancer_study_identifier` VARCHAR(45), + `genetic_profile_stable_id` VARCHAR(45)) +ENGINE = MergeTree +ORDER BY (`sample_unique_id`, `hugo_symbol_gene1`, `hugo_symbol_gene2`, `cancer_study_identifier`) +PRIMARY KEY (`sample_unique_id`, `hugo_symbol_gene1`, `hugo_symbol_gene2`, `cancer_study_identifier`); + + +-- ----------------------------------------------------- +-- Table `cbioportal`.`sample_clinical_attribute_numeric` +-- ----------------------------------------------------- +DROP TABLE IF EXISTS `cbioportal`.`sample_clinical_attribute_numeric` ; + +CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_clinical_attribute_numeric` ( + `patient_unique_id` VARCHAR(45), + `sample_unique_id` VARCHAR(45), + `attribute_name` VARCHAR(45), + `attribute_value` FLOAT, + `cancer_study_identifier` VARCHAR(45)) +ENGINE = MergeTree +ORDER BY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier) +PRIMARY KEY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier); + + +-- ----------------------------------------------------- +-- Table `cbioportal`.`sample_clinical_attribute_categorical` +-- ----------------------------------------------------- +DROP TABLE IF EXISTS `cbioportal`.`sample_clinical_attribute_categorical` ; + +CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_clinical_attribute_categorical` ( + `patient_unique_id` VARCHAR(45), + `sample_unique_id` VARCHAR(45), + `attribute_name` VARCHAR(45), + `attribute_value` VARCHAR(45), + `cancer_study_identifier` VARCHAR(45)) +ENGINE = MergeTree +ORDER BY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier) +PRIMARY KEY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier); + + +-- ----------------------------------------------------- +-- Table `cbioportal`.`patient_clinical_attribute_categorical` +-- ----------------------------------------------------- +DROP TABLE IF EXISTS `cbioportal`.`patient_clinical_attribute_categorical` ; + +CREATE TABLE IF NOT EXISTS `cbioportal`.`patient_clinical_attribute_categorical` ( + `patient_unique_id` VARCHAR(45), + `attribute_name` VARCHAR(45), + `attribute_value` VARCHAR(45), + `cancer_study_identifier` VARCHAR(45)) +ENGINE = MergeTree +ORDER BY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) +PRIMARY KEY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier); + +-- ----------------------------------------------------- +-- Table `cbioportal`.`patient_clinical_attribute_numeric` +-- ----------------------------------------------------- +DROP TABLE IF EXISTS `cbioportal`.`patient_clinical_attribute_numeric` ; + +CREATE TABLE IF NOT EXISTS `cbioportal`.`patient_clinical_attribute_numeric` ( + `patient_unique_id` VARCHAR(45), + `attribute_name` VARCHAR(45), + `attribute_value` FLOAT, + `cancer_study_identifier` VARCHAR(45)) +ENGINE = MergeTree +ORDER BY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) +PRIMARY KEY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier); + + +-- ----------------------------------------------------- +-- Table `cbioportal`.`sample_in_genetic_profile` +-- ----------------------------------------------------- +DROP TABLE IF EXISTS `cbioportal`.`sample_in_genetic_profile`; + +CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_in_genetic_profile` ( + `sample_unique_id` VARCHAR(45), + `genetic_profile_stable_id_short` VARCHAR(45)) +ENGINE = MergeTree +ORDER BY (sample_unique_id, genetic_profile_stable_id_short) +PRIMARY KEY (sample_unique_id, genetic_profile_stable_id_short); + + +-- ----------------------------------------------------- +-- Table `cbioportal`.`sample_list` +-- ----------------------------------------------------- +DROP TABLE IF EXISTS `cbioportal`.`sample_list` ; + +CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_list` ( + `sample_unique_id` VARCHAR(45), + `sample_list_stable_id` VARCHAR(45), + `name` VARCHAR(45), + `cancer_study_identifier` VARCHAR(45)) +ENGINE = MergeTree +ORDER BY (sample_unique_id, sample_list_stable_id, `name`, cancer_study_identifier) +PRIMARY KEY (sample_unique_id, sample_list_stable_id, `name`, cancer_study_identifier); + + +-- ----------------------------------------------------- +-- Table `cbioportal`.`sample` +-- ----------------------------------------------------- +DROP TABLE IF EXISTS `cbioportal`.`sample`; + +CREATE TABLE IF NOT EXISTS `cbioportal`.`sample` ( + `sample_unique_id` VARCHAR(45), + `sample_unique_id_base64` VARCHAR(45), + `sample_stable_id` VARCHAR(45), + `patient_unique_id` VARCHAR(45), + `patient_unique_id_base64` VARCHAR(45), + `patient_stable_id` VARCHAR(45), + `cancer_study_identifier` VARCHAR(45)) +ENGINE = MergeTree +ORDER BY (sample_unique_id, patient_unique_id, cancer_study_identifier) +PRIMARY KEY (sample_unique_id, patient_unique_id, cancer_study_identifier); diff --git a/dev/clickhouse/clickhouse_provisioning/c_import_mysql_data.sql b/dev/clickhouse/clickhouse_provisioning/c_import_mysql_data.sql new file mode 100644 index 00000000000..1dc2967f48b --- /dev/null +++ b/dev/clickhouse/clickhouse_provisioning/c_import_mysql_data.sql @@ -0,0 +1,115 @@ +insert into cbioportal.sample_in_genetic_profile +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_sample_in_genetic_profile', + 'cbio', + 'P@ssword1' +); + +insert into cbioportal.sample_list +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_sample_list', + 'cbio', + 'P@ssword1' + ); + +insert into cbioportal.structural_variant +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_structural_variant', + 'cbio', + 'P@ssword1' + ); + +insert into cbioportal.sample +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_sample', + 'cbio', + 'P@ssword1' + ); + + +insert into cbioportal.sample_clinical_attribute_numeric +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_sample_clinical_attribute_numeric', + 'cbio', + 'P@ssword1' + ); + + +insert into cbioportal.sample_clinical_attribute_categorical +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_sample_clinical_attribute_categorical', + 'cbio', + 'P@ssword1' + ); + + +insert into cbioportal.patient_clinical_attribute_numeric +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_patient_clinical_attribute_numeric', + 'cbio', + 'P@ssword1' + ); + + + +insert into cbioportal.patient_clinical_attribute_categorical +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_patient_clinical_attribute_categorical', + 'cbio', + 'P@ssword1' + ); + + +insert into cbioportal.genomic_event +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_genomic_event_mutation', + 'cbio', + 'P@ssword1' + ); + + +insert into cbioportal.genomic_event +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_genomic_event_cna', + 'cbio', + 'P@ssword1' + ); + +insert into cbioportal.genomic_event +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_genomic_event_sv_gene1', + 'cbio', + 'P@ssword1' + ); + + +insert into cbioportal.genomic_event +select * from mysql( + '127.0.0.1:3306', + 'cbioportal', + 'view_genomic_event_sv_gene2', + 'cbio', + 'P@ssword1' + ); \ No newline at end of file diff --git a/dev/clickhouse/docker-compose.yml b/dev/clickhouse/docker-compose.yml new file mode 100644 index 00000000000..8d389a1b38e --- /dev/null +++ b/dev/clickhouse/docker-compose.yml @@ -0,0 +1,27 @@ +version: "3.1" + +services: + + clickhouse: + image: clickhouse/clickhouse-server:22.6 + container_name: clickhouse + network_mode: host + restart: unless-stopped + cap_add: + - SYS_NICE + - NET_ADMIN + - IPC_LOCK + environment: + - CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 + - CLICKHOUSE_DB=cbioportal + - CLICKHOUSE_USER=cbio + - CLICKHOUSE_PASSWORD=P@ssword1 + volumes: + - ./clickhouse_data/:/var/lib/clickhouse/ + - ./clickhouse_provisioning/:/docker-entrypoint-initdb.d/ + ulimits: + nofile: + soft: 1000000 + hard: 1000000 + ports: + - 8123:8123 \ No newline at end of file diff --git a/dev/clickhouse/mysql_provisioning/cbio_database_views.sql b/dev/clickhouse/mysql_provisioning/cbio_database_views.sql new file mode 100644 index 00000000000..0556c06e86e --- /dev/null +++ b/dev/clickhouse/mysql_provisioning/cbio_database_views.sql @@ -0,0 +1,190 @@ +# sample +DROP VIEW IF EXISTS view_sample; +CREATE VIEW view_sample AS +SELECT + concat(cs.CANCER_STUDY_IDENTIFIER, '_', sample.STABLE_ID) as sample_unique_id, + TO_BASE64(concat(cs.CANCER_STUDY_IDENTIFIER, '_', sample.STABLE_ID)) as sample_unique_id_base64, + sample.STABLE_ID as sample_stable_id, + concat(cs.CANCER_STUDY_IDENTIFIER, '_', p.STABLE_ID) as patient_unique_id, + TO_BASE64(concat(cs.CANCER_STUDY_IDENTIFIER, '_', p.STABLE_ID)) as patient_unique_id_base64, + p.STABLE_ID as patient_stable_id, + cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier +FROM sample + INNER JOIN patient p on sample.PATIENT_ID = p.INTERNAL_ID + INNER JOIN cancer_study cs on p.CANCER_STUDY_ID = cs.CANCER_STUDY_ID; + +# sample list +DROP VIEW IF EXISTS view_sample_list; +CREATE VIEW view_sample_list AS +SELECT + concat(cs.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, + sl.STABLE_ID as sample_list_stable_id, + sl.NAME as name, + cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier +FROM sample_list as sl + INNER JOIN sample_list_list as sll ON sll.LIST_ID = sl.LIST_ID + INNER JOIN sample as s ON s.INTERNAL_ID = sll.SAMPLE_ID + INNER JOIN cancer_study cs on sl.CANCER_STUDY_ID = cs.CANCER_STUDY_ID; + +# genomic_event +DROP VIEW IF EXISTS view_genomic_event; +DROP VIEW IF EXISTS view_genomic_event_mutation; +-- This view takes a long time to materialize. I store the data in a table to prevent repeated recalculations. +CREATE VIEW view_genomic_event_mutation AS +SELECT + concat(cs.CANCER_STUDY_IDENTIFIER, '_', sample.STABLE_ID) as sample_unique_id, + gene.HUGO_GENE_SYMBOL as hugo_gene_symbol, + me.PROTEIN_CHANGE as variant, + 'mutation', + gp.STABLE_ID as gene_panel_stable_id, + cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier, + g.STABLE_ID as genetic_profile_stable_id +FROM mutation + LEFT JOIN mutation_event as me ON mutation.MUTATION_EVENT_ID = me.MUTATION_EVENT_ID + LEFT JOIN sample_profile sp on mutation.SAMPLE_ID = sp.SAMPLE_ID and mutation.GENETIC_PROFILE_ID = sp.GENETIC_PROFILE_ID + LEFT JOIN gene_panel gp on sp.PANEL_ID = gp.INTERNAL_ID + LEFT JOIN genetic_profile g on sp.GENETIC_PROFILE_ID = g.GENETIC_PROFILE_ID + LEFT JOIN cancer_study cs on g.CANCER_STUDY_ID = cs.CANCER_STUDY_ID + LEFT JOIN sample on mutation.SAMPLE_ID = sample.INTERNAL_ID + LEFT JOIN gene ON mutation.ENTREZ_GENE_ID = gene.ENTREZ_GENE_ID; + +DROP VIEW IF EXISTS view_genomic_event_cna; +CREATE VIEW view_genomic_event_cna AS +SELECT + concat(cs.CANCER_STUDY_IDENTIFIER, '_', sample.STABLE_ID) as sample_unique_id, + gene.HUGO_GENE_SYMBOL as hugo_gene_symbol, + convert(ce.ALTERATION, char) as variant, + 'cna', + gene_panel.STABLE_ID as gene_panel_stable_id, + cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier, + gp.STABLE_ID as genetic_profile_stable_id +FROM sample_cna_event + INNER JOIN cna_event ce on sample_cna_event.CNA_EVENT_ID = ce.CNA_EVENT_ID + INNER JOIN gene on ce.ENTREZ_GENE_ID = gene.ENTREZ_GENE_ID + INNER JOIN genetic_profile gp on sample_cna_event.GENETIC_PROFILE_ID = gp.GENETIC_PROFILE_ID + INNER JOIN sample on sample_cna_event.SAMPLE_ID = sample.INTERNAL_ID + INNER JOIN sample_profile sp on gp.GENETIC_PROFILE_ID = sp.GENETIC_PROFILE_ID AND sp.SAMPLE_ID = sample.INTERNAL_ID + INNER JOIN cancer_study cs on gp.CANCER_STUDY_ID = cs.CANCER_STUDY_ID + INNER JOIN gene_panel ON sp.PANEL_ID = gene_panel.INTERNAL_ID; + +DROP VIEW IF EXISTS view_genomic_event_sv_gene1; +CREATE VIEW view_genomic_event_sv_gene1 AS +SELECT + concat(cs.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, + hugo_gene_symbol, + Event_Info as variant, + 'sv', + g.STABLE_ID as gene_panel_stable_id, + cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier, + gp.STABLE_ID as genetic_profile_stable_id +FROM structural_variant as sv + INNER JOIN (SELECT ENTREZ_GENE_ID, HUGO_GENE_SYMBOL as hugo_gene_symbol FROM gene) gene1 on gene1.ENTREZ_GENE_ID = sv.SITE1_ENTREZ_GENE_ID + INNER JOIN genetic_profile gp on gp.GENETIC_PROFILE_ID = sv.GENETIC_PROFILE_ID + INNER JOIN sample s on sv.SAMPLE_ID = s.INTERNAL_ID + INNER JOIN cancer_study cs on gp.CANCER_STUDY_ID = cs.CANCER_STUDY_ID + INNER JOIN sample_profile sp on sp.SAMPLE_ID = sv.SAMPLE_ID and sp.GENETIC_PROFILE_ID = sv.GENETIC_PROFILE_ID + INNER JOIN gene_panel g on sp.PANEL_ID = g.INTERNAL_ID; + +DROP VIEW IF EXISTS view_genomic_event_sv_gene2; +CREATE VIEW view_genomic_event_sv_gene2 AS +SELECT + concat(cs.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, + hugo_gene_symbol, + Event_Info as variant, + 'sv', + g.STABLE_ID as gene_panel_stable_id, + cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier, + gp.STABLE_ID as genetic_profile_stable_id +FROM structural_variant as sv + INNER JOIN (SELECT ENTREZ_GENE_ID, HUGO_GENE_SYMBOL as hugo_gene_symbol FROM gene) gene2 on gene2.ENTREZ_GENE_ID = sv.SITE2_ENTREZ_GENE_ID + INNER JOIN genetic_profile gp on gp.GENETIC_PROFILE_ID = sv.GENETIC_PROFILE_ID + INNER JOIN sample s on sv.SAMPLE_ID = s.INTERNAL_ID + INNER JOIN cancer_study cs on gp.CANCER_STUDY_ID = cs.CANCER_STUDY_ID + INNER JOIN sample_profile sp on sp.SAMPLE_ID = sv.SAMPLE_ID and sp.GENETIC_PROFILE_ID = sv.GENETIC_PROFILE_ID + INNER JOIN gene_panel g on sp.PANEL_ID = g.INTERNAL_ID; + +-- structural variant +DROP VIEW IF EXISTS view_structural_variant; +CREATE VIEW view_structural_variant AS +SELECT + concat(cs.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, + gene1.HUGO_GENE_SYMBOL as hugo_symbol_gene1, + gene2.HUGO_GENE_SYMBOL as hugo_symbol_gene2, + g.STABLE_ID as gene_panel_stable_id, + cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier, + gp.STABLE_ID as genetic_profile_stable_id +FROM structural_variant as sv + INNER JOIN (SELECT ENTREZ_GENE_ID, HUGO_GENE_SYMBOL FROM gene) gene1 on gene1.ENTREZ_GENE_ID = sv.SITE1_ENTREZ_GENE_ID + INNER JOIN (SELECT ENTREZ_GENE_ID, HUGO_GENE_SYMBOL FROM gene) gene2 on gene2.ENTREZ_GENE_ID = sv.SITE2_ENTREZ_GENE_ID + INNER JOIN genetic_profile gp on gp.GENETIC_PROFILE_ID = sv.GENETIC_PROFILE_ID + INNER JOIN sample s on sv.SAMPLE_ID = s.INTERNAL_ID + INNER JOIN cancer_study cs on gp.CANCER_STUDY_ID = cs.CANCER_STUDY_ID + INNER JOIN sample_profile sp on gp.GENETIC_PROFILE_ID = sp.GENETIC_PROFILE_ID AND sp.SAMPLE_ID = sv.SAMPLE_ID + INNER JOIN gene_panel g on sp.PANEL_ID = g.INTERNAL_ID; + +-- sample_clinical_attribute_numeric +DROP VIEW IF EXISTS view_sample_clinical_attribute_numeric; +CREATE VIEW view_sample_clinical_attribute_numeric AS +SELECT + concat(cancer_study.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, + concat(cancer_study.CANCER_STUDY_IDENTIFIER, '_', p.STABLE_ID) as patient_unique_id, + ATTR_ID as attribute_name, + ATTR_VALUE as attribute_value, + cancer_study.CANCER_STUDY_IDENTIFIER as cancer_study_identifier +FROM cancer_study + INNER JOIN patient p on cancer_study.CANCER_STUDY_ID = p.CANCER_STUDY_ID + INNER JOIN sample s on p.INTERNAL_ID = s.PATIENT_ID + INNER JOIN clinical_sample cs on s.INTERNAL_ID = cs.INTERNAL_ID +WHERE ATTR_VALUE REGEXP '^[0-9.]+$'; + +-- sample_clinical_attribute_categorical +DROP VIEW IF EXISTS view_sample_clinical_attribute_categorical; +CREATE VIEW view_sample_clinical_attribute_categorical AS +SELECT + concat(cancer_study.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, + concat(cancer_study.CANCER_STUDY_IDENTIFIER, '_', p.STABLE_ID) as patient_unique_id, + ATTR_ID as attribute_name, + ATTR_VALUE as attribute_value, + cancer_study.CANCER_STUDY_IDENTIFIER as cancer_study_identifier +FROM cancer_study + INNER JOIN patient p on cancer_study.CANCER_STUDY_ID = p.CANCER_STUDY_ID + INNER JOIN sample s on p.INTERNAL_ID = s.PATIENT_ID + INNER JOIN clinical_sample cs on s.INTERNAL_ID = cs.INTERNAL_ID +WHERE ATTR_VALUE NOT REGEXP '^[0-9.]+$'; + +-- patient_clinical_attribute_numeric +DROP VIEW IF EXISTS view_patient_clinical_attribute_numeric; +CREATE VIEW view_patient_clinical_attribute_numeric AS +SELECT + concat(cancer_study.CANCER_STUDY_IDENTIFIER, '_', p.STABLE_ID) as patient_unique_id, + ATTR_ID as attribute_name, + ATTR_VALUE as attribute_value, + cancer_study.CANCER_STUDY_IDENTIFIER as cancer_study_identifier +FROM cancer_study + INNER JOIN patient p on cancer_study.CANCER_STUDY_ID = p.CANCER_STUDY_ID + INNER JOIN clinical_patient cp on p.INTERNAL_ID = cp.INTERNAL_ID +WHERE ATTR_VALUE REGEXP '^[0-9.]+$'; + +-- patient_clinical_attribute_categorical +DROP VIEW IF EXISTS view_patient_clinical_attribute_categorical; +CREATE VIEW view_patient_clinical_attribute_categorical AS +SELECT + concat(cancer_study.CANCER_STUDY_IDENTIFIER, '_', p.STABLE_ID) as patient_unique_id, + ATTR_ID as attribute_name, + ATTR_VALUE as attribute_value, + cancer_study.CANCER_STUDY_IDENTIFIER as cancer_study_identifier +FROM cancer_study + INNER JOIN patient p on cancer_study.CANCER_STUDY_ID = p.CANCER_STUDY_ID + INNER JOIN clinical_patient cp on p.INTERNAL_ID = cp.INTERNAL_ID +WHERE ATTR_VALUE NOT REGEXP '^[0-9.]+$'; + +-- sample_in_genomic_profile +DROP VIEW IF EXISTS view_sample_in_genetic_profile; +CREATE VIEW view_sample_in_genetic_profile AS +SELECT + concat(cs.CANCER_STUDY_IDENTIFIER, '_', sample.STABLE_ID) as sample_unique_id, + replace(gp.STABLE_ID, concat(cs.CANCER_STUDY_IDENTIFIER, '_'), '') as genetic_profile_stable_id_short +FROM sample + INNER JOIN sample_profile sp on sample.INTERNAL_ID = sp.SAMPLE_ID + INNER JOIN genetic_profile gp on sp.GENETIC_PROFILE_ID = gp.GENETIC_PROFILE_ID + INNER JOIN cancer_study cs on gp.CANCER_STUDY_ID = cs.CANCER_STUDY_ID; \ No newline at end of file