diff --git a/README.md b/README.md index 922e9de..115accc 100755 --- a/README.md +++ b/README.md @@ -371,7 +371,7 @@ dataset is used as reference cohort (`--dataset TEST`). rnasum.R \ --sample_name test_sample_WTS \ --dataset TEST \ - --dragen_rnaseq inst/rawdata/test_data/dragen \ + --dragen_wts_dir inst/rawdata/test_data/dragen \ --report_dir inst/rawdata/test_data/dragen/RNAsum \ --umccrise inst/rawdata/test_data/umccrised/test_sample_WGS \ --save_tables FALSE @@ -396,7 +396,7 @@ pancreatic adenocarcinoma dataset is used as the reference cohort rnasum.R \ --sample_name test_sample_WTS \ --dataset TEST \ - --dragen_rnaseq inst/rawdata/test_data/dragen \ + --dragen_wts_dir inst/rawdata/test_data/dragen \ --report_dir inst/rawdata/test_data/dragen/RNAsum \ --save_tables FALSE ``` @@ -422,7 +422,7 @@ dataset is used as the reference cohort (`--dataset TEST`). rnasum.R \ --sample_name test_sample_WTS \ --dataset TEST \ - --dragen_rnaseq $(pwd)/../rawdata/test_data/dragen \ + --dragen_wts_dir $(pwd)/../rawdata/test_data/dragen \ --report_dir $(pwd)/../rawdata/test_data/dragen/RNAsum \ --umccrise $(pwd)/../rawdata/test_data/umccrised/test_sample_WGS \ --save_tables FALSE \ diff --git a/inst/scripts/compare_piedb.R b/inst/scripts/compare_piedb.R new file mode 100644 index 0000000..a162291 --- /dev/null +++ b/inst/scripts/compare_piedb.R @@ -0,0 +1,50 @@ +require(dplyr) +require(readr) +require(here) +require(glue) +require(tibble) +require(tidyr) +require(DT) + +SBJ04426_dev <- readr::read_csv("../../data/wts/RNAsum/reference_update_comp/SBJ04426/dev/genes.expr.perc.csv") +SBJ04426_pro <- readr::read_csv("../../data/wts/RNAsum/reference_update_comp/SBJ04426/pro/genes.expr.perc.csv") +SBJ04187_dev <- readr::read_csv("../../data/wts/RNAsum/reference_update_comp/SBJ04187/dev/genes.expr.perc.csv") +SBJ04187_pro <- readr::read_csv("../../data/wts/RNAsum/reference_update_comp/SBJ04187/pro/genes.expr.perc.csv") +cancer_genes <- readr::read_tsv("../../research/data/cancer_gene_list/somatic_panel-v24.03.0.tsv") + +# now explore expression differences in reference and patient columns +# between dev and prod. +SBJ04426_df <- dplyr::left_join(SBJ04426_dev, SBJ04426_pro, by = "Gene", suffix = c(".dev", ".pro")) |> + dplyr::mutate( + Ref_equal = `BRCA (TCGA).dev` == `BRCA (TCGA).pro`, + Pat_equal = Patient.dev == Patient.pro, + # Ref_diff = abs(`PANCAN (TCGA).dev` - `PANCAN (TCGA).pro`), + Ref_diff = abs(`BRCA (TCGA).dev` - `BRCA (TCGA).pro`), + Pat_diff = abs(Patient.dev - Patient.pro) + ) |> + dplyr::select(Gene, contains("BRCA"), contains("PANCAN"), Ref_diff, contains("Patient"), Pat_diff, contains("equal")) |> + dplyr::filter(Pat_diff > 0) |> + dplyr::filter(Gene %in% cancer_genes$ensembl_gene_symbol) |> + dplyr::arrange(desc(Pat_diff)) |> + dplyr::arrange(desc(Ref_diff)) |> + datatable() + +SBJ04187_df <- dplyr::left_join(SBJ04187_dev, SBJ04187_pro, by = "Gene", suffix = c(".dev", ".pro")) |> + dplyr::mutate( + Ref_equal = `BRCA (TCGA).dev` == `BRCA (TCGA).pro`, + Pat_equal = Patient.dev == Patient.pro, + # Ref_diff = abs(`PANCAN (TCGA).dev` - `PANCAN (TCGA).pro`), + Ref_diff = abs(`BRCA (TCGA).dev` - `BRCA (TCGA).pro`), + Pat_diff = abs(Patient.dev - Patient.pro) + ) |> + dplyr::select(Gene, contains("BRCA"), contains("PANCAN"), Ref_diff, contains("Patient"), Pat_diff, contains("equal")) |> + dplyr::filter(Pat_diff > 0) |> + dplyr::filter(Gene %in% cancer_genes$ensembl_gene_symbol) |> + dplyr::arrange(desc(Pat_diff)) |> + dplyr::arrange(desc(Ref_diff)) |> + datatable() + +# plot Ref_diff values +hist(SBJ04426_df[[1]]$data$Ref_diff, breaks = 100) + +