Skip to content

Commit

Permalink
Add clickhouse development code
Browse files Browse the repository at this point in the history
Moved and adapted from from pvannierop/cbioportal-clickhouse-pilot repository
  • Loading branch information
pvannierop committed Oct 25, 2023
1 parent a317afe commit 83741f8
Show file tree
Hide file tree
Showing 6 changed files with 521 additions and 0 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ docs
docker
# don't ignore entrypoint scripts
!docker/web-and-data/docker-entrypoint.sh
dev/clickhouse/clickhouse_data
32 changes: 32 additions & 0 deletions dev/clickhouse/DEV_SETUP.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Setup for development on Clickhouse integration

## Pre-requisites

1. cBioPortal database:

- provisioned with one or more study in the _cbioportal_ database.
- user: _cbio_ password: _P@ssword1_ with all permissions on the _cbioportal_ database.
- available on port 3306 on the host system.

3. System with docker and docker compose installed.

## Setup

All commands start from the root repository location.

1. Start cBioPortal database.
2. Create MySQL views in the cBioPortal database by running the commands
in [cbio_database_views.sql](mysql_provisioning/cbio_database_views.sql).
3. Start Clickhouse (provisioned automatically from MySQL).

```
cd ./dev/clickhouse
docker compose up -d
```

This will start a Clickhouse instance that is available on the host system:
- port: _8123_
- database: _cbioportal_
- username: _cbio_
- password: _P@ssword1_

156 changes: 156 additions & 0 deletions dev/clickhouse/clickhouse_provisioning/a_column_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
-- MySQL Script generated by MySQL Workbench
-- Wed 08 Mar 2023 07:24:40 PM CET
-- Model: New Model Version: 1.0
-- MySQL Workbench Forward Engineering

-- -----------------------------------------------------
-- Schema cbioportal
-- -----------------------------------------------------

-- -----------------------------------------------------
-- Schema cbioportal
-- -----------------------------------------------------
CREATE DATABASE IF NOT EXISTS `cbioportal` ;
USE `cbioportal` ;


-- -----------------------------------------------------
-- Table `cbioportal`.`genomic_event`
-- -----------------------------------------------------
-- This table records genomics events at the single gene level.
-- For Structural Variants gene1 and gene2 are represented as separate rows.
-- This table supports queries for the mutates-genes, cna-genes, and structvar-genes endpoints.
DROP TABLE IF EXISTS `cbioportal`.`genomic_event` ;
CREATE TABLE IF NOT EXISTS `cbioportal`.`genomic_event` (
`sample_unique_id` VARCHAR(45),
`variant` VARCHAR(45),
`variant_type` VARCHAR(45),
`hugo_gene_symbol` VARCHAR(45),
`gene_panel_stable_id` VARCHAR(45),
`cancer_study_identifier` VARCHAR(45),
`genetic_profile_stable_id` VARCHAR(45))
ENGINE = MergeTree
ORDER BY (`sample_unique_id`, `variant`, `hugo_gene_symbol`, `cancer_study_identifier`, `genetic_profile_stable_id`)
PRIMARY KEY (`sample_unique_id`, `variant`, `hugo_gene_symbol`, `cancer_study_identifier`, `genetic_profile_stable_id`);

-- -----------------------------------------------------
-- Table `cbioportal`.`structural_variant`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `cbioportal`.`structural_variant` ;
CREATE TABLE IF NOT EXISTS `cbioportal`.`structural_variant` (
`sample_unique_id` VARCHAR(45),
`hugo_symbol_gene1` VARCHAR(45),
`hugo_symbol_gene2` VARCHAR(45),
`gene_panel_stable_id` VARCHAR(45),
`cancer_study_identifier` VARCHAR(45),
`genetic_profile_stable_id` VARCHAR(45))
ENGINE = MergeTree
ORDER BY (`sample_unique_id`, `hugo_symbol_gene1`, `hugo_symbol_gene2`, `cancer_study_identifier`)
PRIMARY KEY (`sample_unique_id`, `hugo_symbol_gene1`, `hugo_symbol_gene2`, `cancer_study_identifier`);


-- -----------------------------------------------------
-- Table `cbioportal`.`sample_clinical_attribute_numeric`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `cbioportal`.`sample_clinical_attribute_numeric` ;

CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_clinical_attribute_numeric` (
`patient_unique_id` VARCHAR(45),
`sample_unique_id` VARCHAR(45),
`attribute_name` VARCHAR(45),
`attribute_value` FLOAT,
`cancer_study_identifier` VARCHAR(45))
ENGINE = MergeTree
ORDER BY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier)
PRIMARY KEY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier);


-- -----------------------------------------------------
-- Table `cbioportal`.`sample_clinical_attribute_categorical`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `cbioportal`.`sample_clinical_attribute_categorical` ;

CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_clinical_attribute_categorical` (
`patient_unique_id` VARCHAR(45),
`sample_unique_id` VARCHAR(45),
`attribute_name` VARCHAR(45),
`attribute_value` VARCHAR(45),
`cancer_study_identifier` VARCHAR(45))
ENGINE = MergeTree
ORDER BY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier)
PRIMARY KEY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier);


-- -----------------------------------------------------
-- Table `cbioportal`.`patient_clinical_attribute_categorical`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `cbioportal`.`patient_clinical_attribute_categorical` ;

CREATE TABLE IF NOT EXISTS `cbioportal`.`patient_clinical_attribute_categorical` (
`patient_unique_id` VARCHAR(45),
`attribute_name` VARCHAR(45),
`attribute_value` VARCHAR(45),
`cancer_study_identifier` VARCHAR(45))
ENGINE = MergeTree
ORDER BY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier)
PRIMARY KEY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier);

-- -----------------------------------------------------
-- Table `cbioportal`.`patient_clinical_attribute_numeric`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `cbioportal`.`patient_clinical_attribute_numeric` ;

CREATE TABLE IF NOT EXISTS `cbioportal`.`patient_clinical_attribute_numeric` (
`patient_unique_id` VARCHAR(45),
`attribute_name` VARCHAR(45),
`attribute_value` FLOAT,
`cancer_study_identifier` VARCHAR(45))
ENGINE = MergeTree
ORDER BY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier)
PRIMARY KEY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier);


-- -----------------------------------------------------
-- Table `cbioportal`.`sample_in_genetic_profile`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `cbioportal`.`sample_in_genetic_profile`;

CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_in_genetic_profile` (
`sample_unique_id` VARCHAR(45),
`genetic_profile_stable_id_short` VARCHAR(45))
ENGINE = MergeTree
ORDER BY (sample_unique_id, genetic_profile_stable_id_short)
PRIMARY KEY (sample_unique_id, genetic_profile_stable_id_short);


-- -----------------------------------------------------
-- Table `cbioportal`.`sample_list`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `cbioportal`.`sample_list` ;

CREATE TABLE IF NOT EXISTS `cbioportal`.`sample_list` (
`sample_unique_id` VARCHAR(45),
`sample_list_stable_id` VARCHAR(45),
`name` VARCHAR(45),
`cancer_study_identifier` VARCHAR(45))
ENGINE = MergeTree
ORDER BY (sample_unique_id, sample_list_stable_id, `name`, cancer_study_identifier)
PRIMARY KEY (sample_unique_id, sample_list_stable_id, `name`, cancer_study_identifier);


-- -----------------------------------------------------
-- Table `cbioportal`.`sample`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `cbioportal`.`sample`;

CREATE TABLE IF NOT EXISTS `cbioportal`.`sample` (
`sample_unique_id` VARCHAR(45),
`sample_unique_id_base64` VARCHAR(45),
`sample_stable_id` VARCHAR(45),
`patient_unique_id` VARCHAR(45),
`patient_unique_id_base64` VARCHAR(45),
`patient_stable_id` VARCHAR(45),
`cancer_study_identifier` VARCHAR(45))
ENGINE = MergeTree
ORDER BY (sample_unique_id, patient_unique_id, cancer_study_identifier)
PRIMARY KEY (sample_unique_id, patient_unique_id, cancer_study_identifier);
115 changes: 115 additions & 0 deletions dev/clickhouse/clickhouse_provisioning/c_import_mysql_data.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
insert into cbioportal.sample_in_genetic_profile
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_sample_in_genetic_profile',
'cbio',
'P@ssword1'
);

insert into cbioportal.sample_list
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_sample_list',
'cbio',
'P@ssword1'
);

insert into cbioportal.structural_variant
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_structural_variant',
'cbio',
'P@ssword1'
);

insert into cbioportal.sample
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_sample',
'cbio',
'P@ssword1'
);


insert into cbioportal.sample_clinical_attribute_numeric
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_sample_clinical_attribute_numeric',
'cbio',
'P@ssword1'
);


insert into cbioportal.sample_clinical_attribute_categorical
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_sample_clinical_attribute_categorical',
'cbio',
'P@ssword1'
);


insert into cbioportal.patient_clinical_attribute_numeric
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_patient_clinical_attribute_numeric',
'cbio',
'P@ssword1'
);



insert into cbioportal.patient_clinical_attribute_categorical
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_patient_clinical_attribute_categorical',
'cbio',
'P@ssword1'
);


insert into cbioportal.genomic_event
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_genomic_event_mutation',
'cbio',
'P@ssword1'
);


insert into cbioportal.genomic_event
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_genomic_event_cna',
'cbio',
'P@ssword1'
);

insert into cbioportal.genomic_event
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_genomic_event_sv_gene1',
'cbio',
'P@ssword1'
);


insert into cbioportal.genomic_event
select * from mysql(
'127.0.0.1:3306',
'cbioportal',
'view_genomic_event_sv_gene2',
'cbio',
'P@ssword1'
);
27 changes: 27 additions & 0 deletions dev/clickhouse/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
version: "3.1"

services:

clickhouse:
image: clickhouse/clickhouse-server:22.6
container_name: clickhouse
network_mode: host
restart: unless-stopped
cap_add:
- SYS_NICE
- NET_ADMIN
- IPC_LOCK
environment:
- CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1
- CLICKHOUSE_DB=cbioportal
- CLICKHOUSE_USER=cbio
- CLICKHOUSE_PASSWORD=P@ssword1
volumes:
- ./clickhouse_data/:/var/lib/clickhouse/
- ./clickhouse_provisioning/:/docker-entrypoint-initdb.d/
ulimits:
nofile:
soft: 1000000
hard: 1000000
ports:
- 8123:8123
Loading

0 comments on commit 83741f8

Please sign in to comment.