uclahs-cds · dan-knight · Jan 10, 2025 · Jan 10, 2025 · Jan 11, 2025 · Jan 11, 2025
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -17,6 +17,6 @@ jobs:
         with:
           path: NanoStringNorm
       - run: apt-get update && apt-get install -y libxml2-dev cmake
-      - run: R -e "install.packages(c('gdata', 'XML', 'googleVis', 'lme4', 'RUnit'))"
+      - run: R -e "install.packages(c('readxl', 'XML', 'googleVis', 'lme4', 'RUnit'))"
       - run: R CMD build --compact-vignettes="gs+qpdf" NanoStringNorm
       - run: R CMD check --as-cran --run-donttest NanoStringNorm_*.tar.gz
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,14 +1,18 @@
 Package: NanoStringNorm
 Type: Package
 Title: Normalize NanoString miRNA and mRNA Data
-Version: 2.0.0
-Date: 2023-03-21
+Version: 3.0.0
+Date: 2025-01-10
 Authors@R: c(
 	person(c("Daryl", "M."), "Waggott", role = "aut"),
 	person("Paul", "Boutros", email = "[email protected]", role = "cre"),
 	person("Dan", "Knight", role = "ctb"))
-Depends: R (>= 2.14.0), gdata (>= 2.8.2), XML (>= 3.98-1.5)
-Imports: methods
+Depends: 
+    R (>= 2.14.0),
+    XML (>= 3.98-1.5)
+Imports: 
+    methods,
+    readxl
 Suggests: googleVis (>= 0.2.14), lme4, RUnit (>= 0.4.26)
 Description: A set of tools for normalizing, diagnostics and visualization of NanoString nCounter data.
 License: GPL-2

diff --git a/NAMESPACE b/NAMESPACE
@@ -21,7 +21,7 @@ importFrom("utils", "download.file", "packageDescription",
          "read.table")
 importFrom("utils", "read.csv")
 import(
-    'gdata',
+    'readxl',
     'XML'
 	)
 

diff --git a/NEWS b/NEWS
@@ -1,3 +1,9 @@
+NanoStringNorm 3.0.0 2025-01-10
+----------------------------------------------------------------
+
+UPDATED
+- Replaced gdata dependency with readxl for loading Excel files due to deprecated Excel support.
+
 NanoStringNorm 2.0.0 2023-03-21
 ----------------------------------------------------------------
 REMOVED

diff --git a/R/read.xls.RCC.R b/R/read.xls.RCC.R
@@ -22,113 +22,110 @@ read.xls.RCC <- function(xls, sheet = 1, perl, sample.id.row = "File.Name") {
 		}
 
 	# check if worksheet exists
-	sheet.names <- gdata::sheetNames(xls = xls, perl = perl);
+	sheet.names <- readxl::excel_sheets(xls);
 	cat(paste("\nYou have chosen to import worksheet ", sheet, " named ", sheet.names[sheet], ". Does that sound correct?\n", sep = ""));
 	cat(paste("The other sheet names are: \n"));
 	cat(paste(paste(1:length(sheet.names), sheet.names, sep = ":"), collapse = "\n"));
 	cat("\n\n");
 
-	# define pattern of first line of sample names
-	pattern.first.line.header <- "File";
-
-	# call gdata::read.excel and load header with sample names
-	header <- gdata::read.xls(
-		xls = xls,
-		sheet = sheet,
-		pattern = pattern.first.line.header,
-		method = "tab",
-		perl = perl,
-		header = FALSE,
-		as.is = TRUE,
-		row.names = 1,
-		nrow = 16,
-		strip.white = TRUE
-		);
-
-	if (is.null(header)) {
-		stop("READ.XLS.RCC: There appears to be a problem with RCC file.  No header found.");
+	prep.rcc <- function(path, sheet) {
+	    data <- as.data.frame(readxl::read_excel(
+	        xls,
+	        sheet = sheet,
+	        col_names = FALSE,
+	        col_types = 'text',
+	        trim_ws = TRUE
+	        ));
+
+	    data.start.index <- min(which(data[, 1] == 'Reporter Counts'));
+	    header <- data[1:(data.start.index - 1), ];
+	    data <- data[data.start.index:nrow(data), ];
+
+	    return(list(
+	        header = header,
+	        x = data
+	        ));
+	    }
+	rcc <- prep.rcc(xls, sheet);
+
+	if (is.null(rcc$header)) {
+		stop("READ.XLS.RCC: There appears to be a problem with RCC file.  No rcc$header found.");
 		}
 
-	rownames(header) <- gsub(" $", "", rownames(header));
-	rownames(header) <- gsub(" ", ".", rownames(header));
-	rownames(header) <- tolower(rownames(header));
-	if ("id" %in% rownames(header)) {rownames(header)[rownames(header) == "id"] <- "sample.id"}
-
+	rcc$header <- rcc$header[!is.na(rcc$header[1]), ];
+	rownames(rcc$header) <- rcc$header[, 1];
+	rcc$header <- rcc$header[, -1];
+
+	rownames(rcc$header) <- gsub(" $", "", rownames(rcc$header));
+	rownames(rcc$header) <- gsub(" ", ".", rownames(rcc$header));
+	rownames(rcc$header) <- tolower(rownames(rcc$header));
+
+	if ('id' %in% rownames(rcc$header)) {
+	    rownames(rcc$header)[rownames(rcc$header) == 'id'] <- 'sample.id';
+	    }
 
-	if (!all(c("file.name", "sample.id", "binding.density") %in% rownames(header)))  {
-		stop("READ.XLS.RCC: There appears to be a problem with RCC file.  Rownames in header are missing File name , Sample id, Binding density");
+	if (!all(c("file.name", "sample.id", "binding.density") %in% rownames(rcc$header)))  {
+		stop("READ.XLS.RCC: There appears to be a problem with RCC file.  Rownames in rcc$header are missing File name , Sample id, Binding density");
 		}
 
-	# parse the header
-
-	# drop missing rows
-	header <- header[!rownames(header) %in% c('file.attributes','lane.attributes'),];
-	# drop missing columns
-	header <- header[,-c(1,2)]; 
-	# drop trailing columns
-	header <- header[,!is.na(header[1,]) & !is.na(header[2,])];
-	# get sample IDs
-	sample.ids <- header[rownames(header) %in% tolower(sample.id.row),];
-
-	# change spaces to dots in sample names
+	# parse the rcc$header
+	rcc$header <- rcc$header[!rownames(rcc$header) %in% c('file.attributes', 'lane.attributes'), ];
+	rcc$header['sample.date', ] <- format(
+	    as.Date(
+	        as.integer(rcc$header['sample.date', ]),
+	        origin = '1899-12-30'
+	        ),
+	    format = '%Y/%m/%d'
+	    );
+    rcc$header['binding.density', ] <- as.numeric(rcc$header['binding.density', ]);
+
+	prep.file.versions <- function(file.versions) {
+	    result <- as.character(file.versions)
+	    numeric.versions <- as.numeric(result);
+	    result[!is.na(numeric.versions)] <- numeric.versions[!is.na(numeric.versions)];
+	    return(as.character(result));
+	    }
+    rcc$header['file.version', ] <- prep.file.versions(rcc$header['file.version', ]);
+    rcc$header <- rcc$header[, -c(1,2)];
+
+	sample.ids <- rcc$header[rownames(rcc$header) %in% tolower(sample.id.row),];
 	sample.ids <- gsub(" ", ".", sample.ids);
-	sample.ids <- gsub("^([0-9])", "X\\1" ,sample.ids);  
-
-	# add sample names
-	colnames(header) <- sample.ids;
-
-	# define pattern of first line of count data
-	pattern.first.line.counts <- "Code";
-
-	# call gdata::read.excel and load counts
-	x <- gdata::read.xls(
-		xls = xls,
-		sheet = sheet,
-		pattern = pattern.first.line.counts,
-		method = "tab",
-		perl = perl,
-		header = TRUE,
-		strip.white = TRUE,
-		as.is = TRUE
-		);
-
-	if (is.null(x)) {
-		stop("READ.XLS.RCC: There appears to be a problem with RCC file. Likely couldnt find the count header specifically `Code Class`");
+	sample.ids <- gsub("^([0-9])", "X\\1", sample.ids);
+	colnames(rcc$header) <- sample.ids;
+
+	if (is.null(rcc$x)) {
+		stop("READ.XLS.RCC: There appears to be a problem with RCC file. Likely couldnt find the count rcc$header specifically `Code Class`");
 		}
 
-	# drop any trailing columns 
-	x <- x[,1:(3+length(sample.ids))];
+    colnames(rcc$x) <- rcc$x[2, ];
+    rcc$x <- rcc$x[-c(1:2), 1:(3 + length(sample.ids))];
 
 	# drop rows that have a missing code class or gene name
-	rows.with.missing.anno <- (x[,1] == '' | x[,2] == '');
+	rows.with.missing.anno <- (rcc$x[, 1] == '' | rcc$x[, 2] == '');
 	if (any(rows.with.missing.anno)) {
+	    rcc$x <- rcc$x[!rows.with.missing.anno,];
 		cat(paste("The following row(s)", paste(which(rows.with.missing.anno), collapse = ", "), "have been dropped due to missing annotation.\n\t  You may want to double check the excel file.\n\n"));
 		}
 
-	if (any(rows.with.missing.anno)) {
-		x <- x[!rows.with.missing.anno,];
-		}
-
-	# add sample names
-	colnames(x) <- c(colnames(x)[1:3], sample.ids);
+    colnames(rcc$x) <- gsub(" ", ".", colnames(rcc$x));
+	colnames(rcc$x) <- c(colnames(rcc$x)[1:3], sample.ids);
 
 	# print summary of samples
 	cat(paste("There were", length(sample.ids), "samples imported. \nNote that spaces in sample names will be replaced by dots. \n"));
 
-	if ( length(sample.ids) > 5) {
+	if (length(sample.ids) > 5) {
 		cat("The first and last 3 sample names found in the dataset are:\n");
-		cat(paste(c(sample.ids[1:3],rev(sample.ids)[1:3])));
+		cat(paste(c(sample.ids[1:3], rev(sample.ids)[1:3])));
 		}
 	else {
 		cat("The sample names found in the dataset are:\n");
 		cat(paste(sample.ids));
 		}
 
 	# print summary of genes 
-	cat(paste("\n\nThere were", nrow(x), "genes imported with the following Code Class breakdown:"));
-	print(table(x$Code.Class));
+	cat(paste("\n\nThere were", nrow(rcc$x), "genes imported with the following Code Class breakdown:"));
+	print(table(rcc$x[, 'Code.Class']));
 
-	x <- list(x = x, header = header);
-	class(x) <- 'NanoString';
-	return(x);
+	class(rcc) <- 'NanoString';
+	return(rcc);
 	}
diff --git a/metadata.yaml b/metadata.yaml
@@ -8,7 +8,7 @@ Contributors:
   - Julie Livingstone
 Languages: R (>= 2.14.0)
 Dependencies:
-  - gdata (>= 2.8.2)
+  - readxl
   - XML (>= 3.98-1.5)
   - googleVis (>= 0.2.14)
   - lme4

diff --git a/vignettes/NanoStringNorm_Introduction.Rnw b/vignettes/NanoStringNorm_Introduction.Rnw
@@ -1,5 +1,5 @@
 %\VignetteIndexEntry{Introduction to NanoStringNorm}
-%\VignetteDepends{googleVis,gdata}
+%\VignetteDepends{googleVis,readxl}
 %\VignetteKeywords{Expression Analysis}
 %\VignettePackage{NanoStringNorm}
 
@@ -95,7 +95,7 @@ dev.off();
 
 The input data usually comes in the form of a structured Excel spreadsheet.  You can export the raw count data from Excel as a delimited text file for use with R.  Start by opening the \emph{raw} worksheet in a blank Excel page for editing.  Copy the count data (row 23) for each sample including the first 3 annotation columns (Code.Class, Name and Accession) to a separate worksheet or text file.  Don't forget to add the sample IDs (row 5), and remove any incomplete rows or columns.  The resulting tabular data can be saved as a tab delimited file for import into R.
 \\ \\
-Alternatively, you can import data directly from xls format into R using the function \Rfunction{read.xls.RCC} based on core functionality in the \Rpackage{gdata} package.
+Alternatively, you can import data directly from xls format into R using the function \Rfunction{read.xls.RCC} based on core functionality in the \Rpackage{readxl} package.
 
 <<eg.read.xls>>=
 # directly import the nCounter output