forked from cBioPortal/cbioportal
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Moved and adapted from from pvannierop/cbioportal-clickhouse-pilot repository
- Loading branch information
1 parent
a317afe
commit 83741f8
Showing
6 changed files
with
521 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# Setup for development on Clickhouse integration | ||
|
||
## Pre-requisites | ||
|
||
1. cBioPortal database: | ||
|
||
- provisioned with one or more study in the _cbioportal_ database. | ||
- user: _cbio_ password: _P@ssword1_ with all permissions on the _cbioportal_ database. | ||
- available on port 3306 on the host system. | ||
|
||
3. System with docker and docker compose installed. | ||
|
||
## Setup | ||
|
||
All commands start from the root repository location. | ||
|
||
1. Start cBioPortal database. | ||
2. Create MySQL views in the cBioPortal database by running the commands | ||
in [cbio_database_views.sql](mysql_provisioning/cbio_database_views.sql). | ||
3. Start Clickhouse (provisioned automatically from MySQL). | ||
|
||
``` | ||
cd ./dev/clickhouse | ||
docker compose up -d | ||
``` | ||
|
||
This will start a Clickhouse instance that is available on the host system: | ||
- port: _8123_ | ||
- database: _cbioportal_ | ||
- username: _cbio_ | ||
- password: _P@ssword1_ | ||
|
156 changes: 156 additions & 0 deletions
156
dev/clickhouse/clickhouse_provisioning/a_column_schema.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
-- MySQL Script generated by MySQL Workbench | ||
-- Wed 08 Mar 2023 07:24:40 PM CET | ||
-- Model: New Model Version: 1.0 | ||
-- MySQL Workbench Forward Engineering | ||
|
||
-- ----------------------------------------------------- | ||
-- Schema cbioportal | ||
-- ----------------------------------------------------- | ||
|
||
-- ----------------------------------------------------- | ||
-- Schema cbioportal | ||
-- ----------------------------------------------------- | ||
CREATE DATABASE IF NOT EXISTS `cbioportal` ; | ||
USE `cbioportal` ; | ||
|
||
|
||
-- ----------------------------------------------------- | ||
-- Table `cbioportal`.`genomic_event` | ||
-- ----------------------------------------------------- | ||
-- This table records genomics events at the single gene level. | ||
-- For Structural Variants gene1 and gene2 are represented as separate rows. | ||
-- This table supports queries for the mutates-genes, cna-genes, and structvar-genes endpoints. | ||
DROP TABLE IF EXISTS `cbioportal`.`genomic_event` ; | ||
CREATE TABLE IF NOT EXISTS `cbioportal`.`genomic_event` ( | ||
`sample_unique_id` VARCHAR(45), | ||
`variant` VARCHAR(45), | ||
`variant_type` VARCHAR(45), | ||
`hugo_gene_symbol` VARCHAR(45), | ||
`gene_panel_stable_id` VARCHAR(45), | ||
`cancer_study_identifier` VARCHAR(45), | ||
`genetic_profile_stable_id` VARCHAR(45)) | ||
ENGINE = MergeTree | ||
ORDER BY (`sample_unique_id`, `variant`, `hugo_gene_symbol`, `cancer_study_identifier`, `genetic_profile_stable_id`) | ||
PRIMARY KEY (`sample_unique_id`, `variant`, `hugo_gene_symbol`, `cancer_study_identifier`, `genetic_profile_stable_id`); | ||
|
||
-- ----------------------------------------------------- | ||
-- Table `cbioportal`.`structural_variant` | ||
-- ----------------------------------------------------- | ||
DROP TABLE IF EXISTS `cbioportal`.`structural_variant` ; | ||
CREATE TABLE IF NOT EXISTS `cbioportal`.`structural_variant` ( | ||
`sample_unique_id` VARCHAR(45), | ||
`hugo_symbol_gene1` VARCHAR(45), | ||
`hugo_symbol_gene2` VARCHAR(45), | ||
`gene_panel_stable_id` VARCHAR(45), | ||
`cancer_study_identifier` VARCHAR(45), | ||
`genetic_profile_stable_id` VARCHAR(45)) | ||
ENGINE = MergeTree | ||
ORDER BY (`sample_unique_id`, `hugo_symbol_gene1`, `hugo_symbol_gene2`, `cancer_study_identifier`) | ||
PRIMARY KEY (`sample_unique_id`, `hugo_symbol_gene1`, `hugo_symbol_gene2`, `cancer_study_identifier`); | ||
|
||
|
||
-- ----------------------------------------------------- | ||
-- Table `cbioportal`.`sample_clinical_attribute_numeric` | ||
-- ----------------------------------------------------- | ||
DROP TABLE IF EXISTS `cbioportal`.`sample_clinical_attribute_numeric` ; | ||
|
||
CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_clinical_attribute_numeric` ( | ||
`patient_unique_id` VARCHAR(45), | ||
`sample_unique_id` VARCHAR(45), | ||
`attribute_name` VARCHAR(45), | ||
`attribute_value` FLOAT, | ||
`cancer_study_identifier` VARCHAR(45)) | ||
ENGINE = MergeTree | ||
ORDER BY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier) | ||
PRIMARY KEY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier); | ||
|
||
|
||
-- ----------------------------------------------------- | ||
-- Table `cbioportal`.`sample_clinical_attribute_categorical` | ||
-- ----------------------------------------------------- | ||
DROP TABLE IF EXISTS `cbioportal`.`sample_clinical_attribute_categorical` ; | ||
|
||
CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_clinical_attribute_categorical` ( | ||
`patient_unique_id` VARCHAR(45), | ||
`sample_unique_id` VARCHAR(45), | ||
`attribute_name` VARCHAR(45), | ||
`attribute_value` VARCHAR(45), | ||
`cancer_study_identifier` VARCHAR(45)) | ||
ENGINE = MergeTree | ||
ORDER BY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier) | ||
PRIMARY KEY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier); | ||
|
||
|
||
-- ----------------------------------------------------- | ||
-- Table `cbioportal`.`patient_clinical_attribute_categorical` | ||
-- ----------------------------------------------------- | ||
DROP TABLE IF EXISTS `cbioportal`.`patient_clinical_attribute_categorical` ; | ||
|
||
CREATE TABLE IF NOT EXISTS `cbioportal`.`patient_clinical_attribute_categorical` ( | ||
`patient_unique_id` VARCHAR(45), | ||
`attribute_name` VARCHAR(45), | ||
`attribute_value` VARCHAR(45), | ||
`cancer_study_identifier` VARCHAR(45)) | ||
ENGINE = MergeTree | ||
ORDER BY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) | ||
PRIMARY KEY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier); | ||
|
||
-- ----------------------------------------------------- | ||
-- Table `cbioportal`.`patient_clinical_attribute_numeric` | ||
-- ----------------------------------------------------- | ||
DROP TABLE IF EXISTS `cbioportal`.`patient_clinical_attribute_numeric` ; | ||
|
||
CREATE TABLE IF NOT EXISTS `cbioportal`.`patient_clinical_attribute_numeric` ( | ||
`patient_unique_id` VARCHAR(45), | ||
`attribute_name` VARCHAR(45), | ||
`attribute_value` FLOAT, | ||
`cancer_study_identifier` VARCHAR(45)) | ||
ENGINE = MergeTree | ||
ORDER BY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) | ||
PRIMARY KEY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier); | ||
|
||
|
||
-- ----------------------------------------------------- | ||
-- Table `cbioportal`.`sample_in_genetic_profile` | ||
-- ----------------------------------------------------- | ||
DROP TABLE IF EXISTS `cbioportal`.`sample_in_genetic_profile`; | ||
|
||
CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_in_genetic_profile` ( | ||
`sample_unique_id` VARCHAR(45), | ||
`genetic_profile_stable_id_short` VARCHAR(45)) | ||
ENGINE = MergeTree | ||
ORDER BY (sample_unique_id, genetic_profile_stable_id_short) | ||
PRIMARY KEY (sample_unique_id, genetic_profile_stable_id_short); | ||
|
||
|
||
-- ----------------------------------------------------- | ||
-- Table `cbioportal`.`sample_list` | ||
-- ----------------------------------------------------- | ||
DROP TABLE IF EXISTS `cbioportal`.`sample_list` ; | ||
|
||
CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_list` ( | ||
`sample_unique_id` VARCHAR(45), | ||
`sample_list_stable_id` VARCHAR(45), | ||
`name` VARCHAR(45), | ||
`cancer_study_identifier` VARCHAR(45)) | ||
ENGINE = MergeTree | ||
ORDER BY (sample_unique_id, sample_list_stable_id, `name`, cancer_study_identifier) | ||
PRIMARY KEY (sample_unique_id, sample_list_stable_id, `name`, cancer_study_identifier); | ||
|
||
|
||
-- ----------------------------------------------------- | ||
-- Table `cbioportal`.`sample` | ||
-- ----------------------------------------------------- | ||
DROP TABLE IF EXISTS `cbioportal`.`sample`; | ||
|
||
CREATE TABLE IF NOT EXISTS `cbioportal`.`sample` ( | ||
`sample_unique_id` VARCHAR(45), | ||
`sample_unique_id_base64` VARCHAR(45), | ||
`sample_stable_id` VARCHAR(45), | ||
`patient_unique_id` VARCHAR(45), | ||
`patient_unique_id_base64` VARCHAR(45), | ||
`patient_stable_id` VARCHAR(45), | ||
`cancer_study_identifier` VARCHAR(45)) | ||
ENGINE = MergeTree | ||
ORDER BY (sample_unique_id, patient_unique_id, cancer_study_identifier) | ||
PRIMARY KEY (sample_unique_id, patient_unique_id, cancer_study_identifier); |
115 changes: 115 additions & 0 deletions
115
dev/clickhouse/clickhouse_provisioning/c_import_mysql_data.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
insert into cbioportal.sample_in_genetic_profile | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_sample_in_genetic_profile', | ||
'cbio', | ||
'P@ssword1' | ||
); | ||
|
||
insert into cbioportal.sample_list | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_sample_list', | ||
'cbio', | ||
'P@ssword1' | ||
); | ||
|
||
insert into cbioportal.structural_variant | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_structural_variant', | ||
'cbio', | ||
'P@ssword1' | ||
); | ||
|
||
insert into cbioportal.sample | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_sample', | ||
'cbio', | ||
'P@ssword1' | ||
); | ||
|
||
|
||
insert into cbioportal.sample_clinical_attribute_numeric | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_sample_clinical_attribute_numeric', | ||
'cbio', | ||
'P@ssword1' | ||
); | ||
|
||
|
||
insert into cbioportal.sample_clinical_attribute_categorical | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_sample_clinical_attribute_categorical', | ||
'cbio', | ||
'P@ssword1' | ||
); | ||
|
||
|
||
insert into cbioportal.patient_clinical_attribute_numeric | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_patient_clinical_attribute_numeric', | ||
'cbio', | ||
'P@ssword1' | ||
); | ||
|
||
|
||
|
||
insert into cbioportal.patient_clinical_attribute_categorical | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_patient_clinical_attribute_categorical', | ||
'cbio', | ||
'P@ssword1' | ||
); | ||
|
||
|
||
insert into cbioportal.genomic_event | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_genomic_event_mutation', | ||
'cbio', | ||
'P@ssword1' | ||
); | ||
|
||
|
||
insert into cbioportal.genomic_event | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_genomic_event_cna', | ||
'cbio', | ||
'P@ssword1' | ||
); | ||
|
||
insert into cbioportal.genomic_event | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_genomic_event_sv_gene1', | ||
'cbio', | ||
'P@ssword1' | ||
); | ||
|
||
|
||
insert into cbioportal.genomic_event | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_genomic_event_sv_gene2', | ||
'cbio', | ||
'P@ssword1' | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
version: "3.1" | ||
|
||
services: | ||
|
||
clickhouse: | ||
image: clickhouse/clickhouse-server:22.6 | ||
container_name: clickhouse | ||
network_mode: host | ||
restart: unless-stopped | ||
cap_add: | ||
- SYS_NICE | ||
- NET_ADMIN | ||
- IPC_LOCK | ||
environment: | ||
- CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 | ||
- CLICKHOUSE_DB=cbioportal | ||
- CLICKHOUSE_USER=cbio | ||
- CLICKHOUSE_PASSWORD=P@ssword1 | ||
volumes: | ||
- ./clickhouse_data/:/var/lib/clickhouse/ | ||
- ./clickhouse_provisioning/:/docker-entrypoint-initdb.d/ | ||
ulimits: | ||
nofile: | ||
soft: 1000000 | ||
hard: 1000000 | ||
ports: | ||
- 8123:8123 |
Oops, something went wrong.