From 52e57074291ba62bc72ff88c262af89ae892c04b Mon Sep 17 00:00:00 2001 From: karissawhiting Date: Mon, 18 Dec 2023 23:24:01 -0500 Subject: [PATCH 1/2] Add extract patient function --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS.md | 1 + R/utils.R | 22 ++++++++++++++++++++++ _pkgdown.yml | 1 + man/extract_patient_id.Rd | 22 ++++++++++++++++++++++ tests/testthat/test-utils.R | 13 +++++++++++++ 7 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 man/extract_patient_id.Rd create mode 100644 tests/testthat/test-utils.R diff --git a/DESCRIPTION b/DESCRIPTION index de957cb3..e913897b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: gnomeR Title: Wrangle and analyze IMPACT and TCGA mutation data -Version: 1.3.0 +Version: 1.2.0.9004 Authors@R: c(person(given = "Karissa", family = "Whiting", diff --git a/NAMESPACE b/NAMESPACE index 680c7240..5fe01b1b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(add_pathways) export(annotate_any_panel) export(annotate_specific_panel) export(create_gene_binary) +export(extract_patient_id) export(ggcomut) export(gggenecor) export(ggsamplevar) diff --git a/NEWS.md b/NEWS.md index 85d7258c..9fa4351a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # gnomeR (development version) +- Added `extract_patient_id()` function to get IMPACT patient ID from sample ID - Deprecated `freq_cutoff`, `freq_cutoff_by_gene`, and `gene_subset` arguments in `tbl_genomic()`. It is now recommended that users use `subset_by_frequency()` instead before passing data to `tbl_genomic()`. - Added `other_vars` argument to `subset_by_frequency()`, `subset_by_panel()`, `summarize_by_gene()` and `add_pathways()` to allow retention of other clinical vars when using functions within pipeline. - Deprecated `count_pathways_by` argument of `add_pathways()` function. Now, user must specify which specific alteration to count towards the pathway via the `.mut`, `.Amp`, `.Del`, `.fus` suffix (e.g. `custom_pathways = c('TP53.mut', 'APC.Del)`). diff --git a/R/utils.R b/R/utils.R index 765d0e02..bb7a916e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -135,7 +135,29 @@ recode_cna <- function(alteration_vector){ } +#' Extract IMPACT Patient ID From Sample ID +#' +#' @param sample_id A character vector of IMPACT Tumor sample IDs +#' +#' @return Returns a vector of patient IDs +#' @export +#' +#' @examples +#' sample_id = c("P-0000071-T01-IM3", "P-0000072-T02-IM4", "P-0000073-T03-IM5") +#' extract_patient_id(sample_id) +#' +extract_patient_id <- function(sample_id) { + + # Checks ---------------------------------------------------------------- + wrong_format <- sample_id[!stringr::str_detect(sample_id, "^P-\\d{1,}-T.*")] + if (length(wrong_format) > 0) { + cli::cli_abort("Some {.code sample_id} values do not match the expected IMPACT sample format (e.g `P-0000XX-T01-IM3`)") + } + + patient_id = stringr::str_replace(sample_id, "-T.*", "") + return(patient_id) +} #' Create binary data.frames depending on type of mutation data #' diff --git a/_pkgdown.yml b/_pkgdown.yml index ea179a2c..b757b8ab 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -51,6 +51,7 @@ reference: - recode_cna - rename_columns - resolve_alias + - extract_patient_id - subtitle: Color Palette - contents: - gnomer_colors diff --git a/man/extract_patient_id.Rd b/man/extract_patient_id.Rd new file mode 100644 index 00000000..4878aa75 --- /dev/null +++ b/man/extract_patient_id.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{extract_patient_id} +\alias{extract_patient_id} +\title{Extract IMPACT Patient ID From Sample ID} +\usage{ +extract_patient_id(sample_id) +} +\arguments{ +\item{sample_id}{A character vector of IMPACT Tumor sample IDs} +} +\value{ +Returns a vector of patient IDs +} +\description{ +Extract IMPACT Patient ID From Sample ID +} +\examples{ +sample_id = c("P-0000071-T01-IM3", "P-0000072-T02-IM4", "P-0000073-T03-IM5") +extract_patient_id(sample_id) + +} diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R new file mode 100644 index 00000000..517226e4 --- /dev/null +++ b/tests/testthat/test-utils.R @@ -0,0 +1,13 @@ + +test_that("test extracting patient_id from sample_id works", { + + expect_no_error(extract_patient_id(gnomeR::mutations[1:10,]$sampleId)) + +}) + +test_that("test error thrown when non IMPACT ID", { + + sample_id = c("P-0000071-T01-IM3", "XX", "P-0000073-T03-IM5") + expect_error(extract_patient_id(sample_id)) + +}) From a7198a3e085e69a989effbd5a662f42b78160d51 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 20 Dec 2023 09:20:29 -0500 Subject: [PATCH 2/2] updated version --- codemeta.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/codemeta.json b/codemeta.json index 35d4ef66..2c62b91e 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,13 +8,13 @@ "codeRepository": "https://github.com/MSKCC-Epi-Bio/gnomeR", "issueTracker": "https://github.com/MSKCC-Epi-Bio/gnomeR/issues", "license": "https://spdx.org/licenses/MIT", - "version": "1.3.0", + "version": "1.2.0.9004", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.2.2 (2022-10-31 ucrt)", + "runtimePlatform": "R version 4.2.3 (2023-03-15)", "author": [ { "@type": "Person", @@ -371,7 +371,7 @@ }, "SystemRequirements": null }, - "fileSize": "2582.696KB", + "fileSize": "2349.143KB", "releaseNotes": "https://github.com/MSKCC-Epi-Bio/gnomeR/blob/master/NEWS.md", "readme": "https://github.com/MSKCC-Epi-Bio/gnomeR/blob/main/README.md", "contIntegration": ["https://github.com/MSKCC-Epi-Bio/gnomeR/actions", "https://app.codecov.io/gh/MSKCC-Epi-Bio/gnomeR?branch=main"],