diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 99b8f41..e925ed1 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -17,6 +17,6 @@ jobs: with: path: NanoStringNorm - run: apt-get update && apt-get install -y libxml2-dev cmake - - run: R -e "install.packages(c('gdata', 'XML', 'googleVis', 'lme4', 'RUnit'))" + - run: R -e "install.packages(c('readxl', 'XML', 'googleVis', 'lme4', 'RUnit'))" - run: R CMD build --compact-vignettes="gs+qpdf" NanoStringNorm - run: R CMD check --as-cran --run-donttest NanoStringNorm_*.tar.gz diff --git a/DESCRIPTION b/DESCRIPTION index 9235802..0ae8dec 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,18 @@ Package: NanoStringNorm Type: Package Title: Normalize NanoString miRNA and mRNA Data -Version: 2.0.0 -Date: 2023-03-21 +Version: 3.0.0 +Date: 2025-01-10 Authors@R: c( person(c("Daryl", "M."), "Waggott", role = "aut"), person("Paul", "Boutros", email = "PBoutros@mednet.ucla.edu", role = "cre"), person("Dan", "Knight", role = "ctb")) -Depends: R (>= 2.14.0), gdata (>= 2.8.2), XML (>= 3.98-1.5) -Imports: methods +Depends: + R (>= 2.14.0), + XML (>= 3.98-1.5) +Imports: + methods, + readxl Suggests: googleVis (>= 0.2.14), lme4, RUnit (>= 0.4.26) Description: A set of tools for normalizing, diagnostics and visualization of NanoString nCounter data. License: GPL-2 diff --git a/NAMESPACE b/NAMESPACE index 586431d..ad89d2c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,7 +21,7 @@ importFrom("utils", "download.file", "packageDescription", "read.table") importFrom("utils", "read.csv") import( - 'gdata', + 'readxl', 'XML' ) diff --git a/NEWS b/NEWS index 9286f42..026ba19 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,9 @@ +NanoStringNorm 3.0.0 2025-01-10 +---------------------------------------------------------------- + +UPDATED +- Replaced gdata dependency with readxl for loading Excel files due to deprecated Excel support. + NanoStringNorm 2.0.0 2023-03-21 ---------------------------------------------------------------- REMOVED diff --git a/R/read.xls.RCC.R b/R/read.xls.RCC.R index 74c28a1..d7eca17 100644 --- a/R/read.xls.RCC.R +++ b/R/read.xls.RCC.R @@ -22,102 +22,100 @@ read.xls.RCC <- function(xls, sheet = 1, perl, sample.id.row = "File.Name") { } # check if worksheet exists - sheet.names <- gdata::sheetNames(xls = xls, perl = perl); + sheet.names <- readxl::excel_sheets(xls); cat(paste("\nYou have chosen to import worksheet ", sheet, " named ", sheet.names[sheet], ". Does that sound correct?\n", sep = "")); cat(paste("The other sheet names are: \n")); cat(paste(paste(1:length(sheet.names), sheet.names, sep = ":"), collapse = "\n")); cat("\n\n"); - # define pattern of first line of sample names - pattern.first.line.header <- "File"; - - # call gdata::read.excel and load header with sample names - header <- gdata::read.xls( - xls = xls, - sheet = sheet, - pattern = pattern.first.line.header, - method = "tab", - perl = perl, - header = FALSE, - as.is = TRUE, - row.names = 1, - nrow = 16, - strip.white = TRUE - ); - - if (is.null(header)) { - stop("READ.XLS.RCC: There appears to be a problem with RCC file. No header found."); + prep.rcc <- function(path, sheet) { + data <- as.data.frame(readxl::read_excel( + xls, + sheet = sheet, + col_names = FALSE, + col_types = 'text', + trim_ws = TRUE + )); + + data.start.index <- min(which(data[, 1] == 'Reporter Counts')); + header <- data[1:(data.start.index - 1), ]; + data <- data[data.start.index:nrow(data), ]; + + return(list( + header = header, + x = data + )); + } + rcc <- prep.rcc(xls, sheet); + + if (is.null(rcc$header)) { + stop("READ.XLS.RCC: There appears to be a problem with RCC file. No rcc$header found."); } - rownames(header) <- gsub(" $", "", rownames(header)); - rownames(header) <- gsub(" ", ".", rownames(header)); - rownames(header) <- tolower(rownames(header)); - if ("id" %in% rownames(header)) {rownames(header)[rownames(header) == "id"] <- "sample.id"} - + rcc$header <- rcc$header[!is.na(rcc$header[1]), ]; + rownames(rcc$header) <- rcc$header[, 1]; + rcc$header <- rcc$header[, -1]; + + rownames(rcc$header) <- gsub(" $", "", rownames(rcc$header)); + rownames(rcc$header) <- gsub(" ", ".", rownames(rcc$header)); + rownames(rcc$header) <- tolower(rownames(rcc$header)); + + if ('id' %in% rownames(rcc$header)) { + rownames(rcc$header)[rownames(rcc$header) == 'id'] <- 'sample.id'; + } - if (!all(c("file.name", "sample.id", "binding.density") %in% rownames(header))) { - stop("READ.XLS.RCC: There appears to be a problem with RCC file. Rownames in header are missing File name , Sample id, Binding density"); + if (!all(c("file.name", "sample.id", "binding.density") %in% rownames(rcc$header))) { + stop("READ.XLS.RCC: There appears to be a problem with RCC file. Rownames in rcc$header are missing File name , Sample id, Binding density"); } - # parse the header - - # drop missing rows - header <- header[!rownames(header) %in% c('file.attributes','lane.attributes'),]; - # drop missing columns - header <- header[,-c(1,2)]; - # drop trailing columns - header <- header[,!is.na(header[1,]) & !is.na(header[2,])]; - # get sample IDs - sample.ids <- header[rownames(header) %in% tolower(sample.id.row),]; - - # change spaces to dots in sample names + # parse the rcc$header + rcc$header <- rcc$header[!rownames(rcc$header) %in% c('file.attributes', 'lane.attributes'), ]; + rcc$header['sample.date', ] <- format( + as.Date( + as.integer(rcc$header['sample.date', ]), + origin = '1899-12-30' + ), + format = '%Y/%m/%d' + ); + rcc$header['binding.density', ] <- as.numeric(rcc$header['binding.density', ]); + + prep.file.versions <- function(file.versions) { + result <- as.character(file.versions) + numeric.versions <- as.numeric(result); + result[!is.na(numeric.versions)] <- numeric.versions[!is.na(numeric.versions)]; + return(as.character(result)); + } + rcc$header['file.version', ] <- prep.file.versions(rcc$header['file.version', ]); + rcc$header <- rcc$header[, -c(1,2)]; + + sample.ids <- rcc$header[rownames(rcc$header) %in% tolower(sample.id.row),]; sample.ids <- gsub(" ", ".", sample.ids); - sample.ids <- gsub("^([0-9])", "X\\1" ,sample.ids); - - # add sample names - colnames(header) <- sample.ids; - - # define pattern of first line of count data - pattern.first.line.counts <- "Code"; - - # call gdata::read.excel and load counts - x <- gdata::read.xls( - xls = xls, - sheet = sheet, - pattern = pattern.first.line.counts, - method = "tab", - perl = perl, - header = TRUE, - strip.white = TRUE, - as.is = TRUE - ); - - if (is.null(x)) { - stop("READ.XLS.RCC: There appears to be a problem with RCC file. Likely couldnt find the count header specifically `Code Class`"); + sample.ids <- gsub("^([0-9])", "X\\1", sample.ids); + colnames(rcc$header) <- sample.ids; + + if (is.null(rcc$x)) { + stop("READ.XLS.RCC: There appears to be a problem with RCC file. Likely couldnt find the count rcc$header specifically `Code Class`"); } - # drop any trailing columns - x <- x[,1:(3+length(sample.ids))]; + colnames(rcc$x) <- rcc$x[2, ]; + rcc$x <- rcc$x[-c(1:2), 1:(3 + length(sample.ids))]; # drop rows that have a missing code class or gene name - rows.with.missing.anno <- (x[,1] == '' | x[,2] == ''); + rows.with.missing.anno <- (rcc$x[, 1] == '' | rcc$x[, 2] == ''); if (any(rows.with.missing.anno)) { + rcc$x <- rcc$x[!rows.with.missing.anno,]; cat(paste("The following row(s)", paste(which(rows.with.missing.anno), collapse = ", "), "have been dropped due to missing annotation.\n\t You may want to double check the excel file.\n\n")); } - if (any(rows.with.missing.anno)) { - x <- x[!rows.with.missing.anno,]; - } - - # add sample names - colnames(x) <- c(colnames(x)[1:3], sample.ids); + colnames(rcc$x) <- gsub(" ", ".", colnames(rcc$x)); + colnames(rcc$x) <- c(colnames(rcc$x)[1:3], sample.ids); # print summary of samples cat(paste("There were", length(sample.ids), "samples imported. \nNote that spaces in sample names will be replaced by dots. \n")); - if ( length(sample.ids) > 5) { + if (length(sample.ids) > 5) { cat("The first and last 3 sample names found in the dataset are:\n"); - cat(paste(c(sample.ids[1:3],rev(sample.ids)[1:3]))); + cat(paste(c(sample.ids[1:3], rev(sample.ids)[1:3]))); } else { cat("The sample names found in the dataset are:\n"); @@ -125,10 +123,9 @@ read.xls.RCC <- function(xls, sheet = 1, perl, sample.id.row = "File.Name") { } # print summary of genes - cat(paste("\n\nThere were", nrow(x), "genes imported with the following Code Class breakdown:")); - print(table(x$Code.Class)); + cat(paste("\n\nThere were", nrow(rcc$x), "genes imported with the following Code Class breakdown:")); + print(table(rcc$x[, 'Code.Class'])); - x <- list(x = x, header = header); - class(x) <- 'NanoString'; - return(x); + class(rcc) <- 'NanoString'; + return(rcc); } diff --git a/metadata.yaml b/metadata.yaml index c760d94..e2a18b3 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -8,7 +8,7 @@ Contributors: - Julie Livingstone Languages: R (>= 2.14.0) Dependencies: - - gdata (>= 2.8.2) + - readxl - XML (>= 3.98-1.5) - googleVis (>= 0.2.14) - lme4 diff --git a/vignettes/NanoStringNorm_Introduction.Rnw b/vignettes/NanoStringNorm_Introduction.Rnw index a63705f..b46fbb0 100644 --- a/vignettes/NanoStringNorm_Introduction.Rnw +++ b/vignettes/NanoStringNorm_Introduction.Rnw @@ -1,5 +1,5 @@ %\VignetteIndexEntry{Introduction to NanoStringNorm} -%\VignetteDepends{googleVis,gdata} +%\VignetteDepends{googleVis,readxl} %\VignetteKeywords{Expression Analysis} %\VignettePackage{NanoStringNorm} @@ -95,7 +95,7 @@ dev.off(); The input data usually comes in the form of a structured Excel spreadsheet. You can export the raw count data from Excel as a delimited text file for use with R. Start by opening the \emph{raw} worksheet in a blank Excel page for editing. Copy the count data (row 23) for each sample including the first 3 annotation columns (Code.Class, Name and Accession) to a separate worksheet or text file. Don't forget to add the sample IDs (row 5), and remove any incomplete rows or columns. The resulting tabular data can be saved as a tab delimited file for import into R. \\ \\ -Alternatively, you can import data directly from xls format into R using the function \Rfunction{read.xls.RCC} based on core functionality in the \Rpackage{gdata} package. +Alternatively, you can import data directly from xls format into R using the function \Rfunction{read.xls.RCC} based on core functionality in the \Rpackage{readxl} package. <>= # directly import the nCounter output