From cf4fc8dbc9450629c64155cdba103cb8756263cb Mon Sep 17 00:00:00 2001
From: Dan Knight <danknight@mednet.ucla.edu>
Date: Fri, 10 Jan 2025 15:49:22 -0800
Subject: [PATCH 1/5] Load RCC header info with readxl

---
 R/read.xls.RCC.R | 80 +++++++++++++++++++++++++++++-------------------
 1 file changed, 49 insertions(+), 31 deletions(-)

diff --git a/R/read.xls.RCC.R b/R/read.xls.RCC.R
index 74c28a1..cf957d7 100644
--- a/R/read.xls.RCC.R
+++ b/R/read.xls.RCC.R
@@ -22,59 +22,77 @@ read.xls.RCC <- function(xls, sheet = 1, perl, sample.id.row = "File.Name") {
 		}
 
 	# check if worksheet exists
-	sheet.names <- gdata::sheetNames(xls = xls, perl = perl);
+	sheet.names <- readxl::excel_sheets(xls);
 	cat(paste("\nYou have chosen to import worksheet ", sheet, " named ", sheet.names[sheet], ". Does that sound correct?\n", sep = ""));
 	cat(paste("The other sheet names are: \n"));
 	cat(paste(paste(1:length(sheet.names), sheet.names, sep = ":"), collapse = "\n"));
 	cat("\n\n");
 
-	# define pattern of first line of sample names
-	pattern.first.line.header <- "File";
-
-	# call gdata::read.excel and load header with sample names
-	header <- gdata::read.xls(
-		xls = xls,
-		sheet = sheet,
-		pattern = pattern.first.line.header,
-		method = "tab",
-		perl = perl,
-		header = FALSE,
-		as.is = TRUE,
-		row.names = 1,
-		nrow = 16,
-		strip.white = TRUE
-		);
+	prep.rcc <- function(path, sheet) {
+	    data <- as.data.frame(readxl::read_excel(
+	        xls,
+	        sheet = sheet,
+	        col_names = FALSE,
+	        col_types = 'text',
+	        trim_ws = TRUE
+	        ));
+
+	    data.start.index <- min(which(data[, 1] == 'Reporter Counts'));
+	    header <- data[1:(data.start.index - 1), ];
+	    data <- data[data.start.index:nrow(data), ];
+
+	    return(list(
+	        header = header,
+	        counts = data
+	        ));
+	    }
+	rcc <- prep.rcc(xls, sheet);
+	
+	header <- rcc$header;
 
 	if (is.null(header)) {
 		stop("READ.XLS.RCC: There appears to be a problem with RCC file.  No header found.");
 		}
 
+	header <- header[!is.na(header[1]), ];
+	rownames(header) <- header[, 1];
+	header <- header[, -1];
+	
 	rownames(header) <- gsub(" $", "", rownames(header));
 	rownames(header) <- gsub(" ", ".", rownames(header));
 	rownames(header) <- tolower(rownames(header));
-	if ("id" %in% rownames(header)) {rownames(header)[rownames(header) == "id"] <- "sample.id"}
-
+	
+	if ('id' %in% rownames(header)) {
+	    rownames(header)[rownames(header) == 'id'] <- 'sample.id';
+	    }
 
 	if (!all(c("file.name", "sample.id", "binding.density") %in% rownames(header)))  {
 		stop("READ.XLS.RCC: There appears to be a problem with RCC file.  Rownames in header are missing File name , Sample id, Binding density");
 		}
 
 	# parse the header
+	header <- header[!rownames(header) %in% c('file.attributes', 'lane.attributes'), ];
+	header['sample.date', ] <- format(
+	    as.Date(
+	        as.integer(header['sample.date', ]),
+	        origin = '1899-12-30'
+	        ),
+	    format = '%Y/%m/%d'
+	    );
+    header['binding.density', ] <- as.numeric(header['binding.density', ]);
+	
+	prep.file.versions <- function(file.versions) {
+	    result <- as.character(file.versions)
+	    numeric.versions <- as.numeric(result);
+	    result[!is.na(numeric.versions)] <- numeric.versions[!is.na(numeric.versions)];
+	    return(as.character(result));
+	    }
+    header['file.version', ] <- prep.file.versions(header['file.version', ]);
+    header <- header[, -c(1,2)];
 
-	# drop missing rows
-	header <- header[!rownames(header) %in% c('file.attributes','lane.attributes'),];
-	# drop missing columns
-	header <- header[,-c(1,2)]; 
-	# drop trailing columns
-	header <- header[,!is.na(header[1,]) & !is.na(header[2,])];
-	# get sample IDs
 	sample.ids <- header[rownames(header) %in% tolower(sample.id.row),];
-
-	# change spaces to dots in sample names
 	sample.ids <- gsub(" ", ".", sample.ids);
-	sample.ids <- gsub("^([0-9])", "X\\1" ,sample.ids);  
-
-	# add sample names
+	sample.ids <- gsub("^([0-9])", "X\\1", sample.ids);
 	colnames(header) <- sample.ids;
 
 	# define pattern of first line of count data

From 5bcfc0e0f6c7bf869731f588100402b8392f26c3 Mon Sep 17 00:00:00 2001
From: Dan Knight <danknight@mednet.ucla.edu>
Date: Fri, 10 Jan 2025 15:54:28 -0800
Subject: [PATCH 2/5] Restructure RCC Excel loading implementation

---
 R/read.xls.RCC.R | 53 +++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 28 deletions(-)

diff --git a/R/read.xls.RCC.R b/R/read.xls.RCC.R
index cf957d7..60f07a7 100644
--- a/R/read.xls.RCC.R
+++ b/R/read.xls.RCC.R
@@ -43,43 +43,41 @@ read.xls.RCC <- function(xls, sheet = 1, perl, sample.id.row = "File.Name") {
 
 	    return(list(
 	        header = header,
-	        counts = data
+	        x = data
 	        ));
 	    }
 	rcc <- prep.rcc(xls, sheet);
-	
-	header <- rcc$header;
 
-	if (is.null(header)) {
-		stop("READ.XLS.RCC: There appears to be a problem with RCC file.  No header found.");
+	if (is.null(rcc$header)) {
+		stop("READ.XLS.RCC: There appears to be a problem with RCC file.  No rcc$header found.");
 		}
 
-	header <- header[!is.na(header[1]), ];
-	rownames(header) <- header[, 1];
-	header <- header[, -1];
+	rcc$header <- rcc$header[!is.na(rcc$header[1]), ];
+	rownames(rcc$header) <- rcc$header[, 1];
+	rcc$header <- rcc$header[, -1];
 	
-	rownames(header) <- gsub(" $", "", rownames(header));
-	rownames(header) <- gsub(" ", ".", rownames(header));
-	rownames(header) <- tolower(rownames(header));
+	rownames(rcc$header) <- gsub(" $", "", rownames(rcc$header));
+	rownames(rcc$header) <- gsub(" ", ".", rownames(rcc$header));
+	rownames(rcc$header) <- tolower(rownames(rcc$header));
 	
-	if ('id' %in% rownames(header)) {
-	    rownames(header)[rownames(header) == 'id'] <- 'sample.id';
+	if ('id' %in% rownames(rcc$header)) {
+	    rownames(rcc$header)[rownames(rcc$header) == 'id'] <- 'sample.id';
 	    }
 
-	if (!all(c("file.name", "sample.id", "binding.density") %in% rownames(header)))  {
-		stop("READ.XLS.RCC: There appears to be a problem with RCC file.  Rownames in header are missing File name , Sample id, Binding density");
+	if (!all(c("file.name", "sample.id", "binding.density") %in% rownames(rcc$header)))  {
+		stop("READ.XLS.RCC: There appears to be a problem with RCC file.  Rownames in rcc$header are missing File name , Sample id, Binding density");
 		}
 
-	# parse the header
-	header <- header[!rownames(header) %in% c('file.attributes', 'lane.attributes'), ];
-	header['sample.date', ] <- format(
+	# parse the rcc$header
+	rcc$header <- rcc$header[!rownames(rcc$header) %in% c('file.attributes', 'lane.attributes'), ];
+	rcc$header['sample.date', ] <- format(
 	    as.Date(
-	        as.integer(header['sample.date', ]),
+	        as.integer(rcc$header['sample.date', ]),
 	        origin = '1899-12-30'
 	        ),
 	    format = '%Y/%m/%d'
 	    );
-    header['binding.density', ] <- as.numeric(header['binding.density', ]);
+    rcc$header['binding.density', ] <- as.numeric(rcc$header['binding.density', ]);
 	
 	prep.file.versions <- function(file.versions) {
 	    result <- as.character(file.versions)
@@ -87,13 +85,13 @@ read.xls.RCC <- function(xls, sheet = 1, perl, sample.id.row = "File.Name") {
 	    result[!is.na(numeric.versions)] <- numeric.versions[!is.na(numeric.versions)];
 	    return(as.character(result));
 	    }
-    header['file.version', ] <- prep.file.versions(header['file.version', ]);
-    header <- header[, -c(1,2)];
+    rcc$header['file.version', ] <- prep.file.versions(rcc$header['file.version', ]);
+    rcc$header <- rcc$header[, -c(1,2)];
 
-	sample.ids <- header[rownames(header) %in% tolower(sample.id.row),];
+	sample.ids <- rcc$header[rownames(rcc$header) %in% tolower(sample.id.row),];
 	sample.ids <- gsub(" ", ".", sample.ids);
 	sample.ids <- gsub("^([0-9])", "X\\1", sample.ids);
-	colnames(header) <- sample.ids;
+	colnames(rcc$header) <- sample.ids;
 
 	# define pattern of first line of count data
 	pattern.first.line.counts <- "Code";
@@ -111,7 +109,7 @@ read.xls.RCC <- function(xls, sheet = 1, perl, sample.id.row = "File.Name") {
 		);
 
 	if (is.null(x)) {
-		stop("READ.XLS.RCC: There appears to be a problem with RCC file. Likely couldnt find the count header specifically `Code Class`");
+		stop("READ.XLS.RCC: There appears to be a problem with RCC file. Likely couldnt find the count rcc$header specifically `Code Class`");
 		}
 
 	# drop any trailing columns 
@@ -146,7 +144,6 @@ read.xls.RCC <- function(xls, sheet = 1, perl, sample.id.row = "File.Name") {
 	cat(paste("\n\nThere were", nrow(x), "genes imported with the following Code Class breakdown:"));
 	print(table(x$Code.Class));
 
-	x <- list(x = x, header = header);
-	class(x) <- 'NanoString';
-	return(x);
+	class(rcc) <- 'NanoString';
+	return(rcc);
 	}

From e94ed5803fdb866cb4888a6ed1a7cafc47eefa7e Mon Sep 17 00:00:00 2001
From: Dan Knight <danknight@mednet.ucla.edu>
Date: Fri, 10 Jan 2025 16:27:07 -0800
Subject: [PATCH 3/5] Replace gdata Excel dependency with readxl

---
 DESCRIPTION                               |  8 +++--
 NAMESPACE                                 |  2 +-
 R/read.xls.RCC.R                          | 40 +++++++----------------
 metadata.yaml                             |  2 +-
 vignettes/NanoStringNorm_Introduction.Rnw |  4 +--
 5 files changed, 21 insertions(+), 35 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9235802..474b39b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -7,8 +7,12 @@ Authors@R: c(
 	person(c("Daryl", "M."), "Waggott", role = "aut"),
 	person("Paul", "Boutros", email = "PBoutros@mednet.ucla.edu", role = "cre"),
 	person("Dan", "Knight", role = "ctb"))
-Depends: R (>= 2.14.0), gdata (>= 2.8.2), XML (>= 3.98-1.5)
-Imports: methods
+Depends: 
+    R (>= 2.14.0),
+    XML (>= 3.98-1.5)
+Imports: 
+    methods,
+    readxl
 Suggests: googleVis (>= 0.2.14), lme4, RUnit (>= 0.4.26)
 Description: A set of tools for normalizing, diagnostics and visualization of NanoString nCounter data.
 License: GPL-2
diff --git a/NAMESPACE b/NAMESPACE
index 586431d..ad89d2c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -21,7 +21,7 @@ importFrom("utils", "download.file", "packageDescription",
          "read.table")
 importFrom("utils", "read.csv")
 import(
-    'gdata',
+    'readxl',
     'XML'
 	)
 
diff --git a/R/read.xls.RCC.R b/R/read.xls.RCC.R
index 60f07a7..d7eca17 100644
--- a/R/read.xls.RCC.R
+++ b/R/read.xls.RCC.R
@@ -93,47 +93,29 @@ read.xls.RCC <- function(xls, sheet = 1, perl, sample.id.row = "File.Name") {
 	sample.ids <- gsub("^([0-9])", "X\\1", sample.ids);
 	colnames(rcc$header) <- sample.ids;
 
-	# define pattern of first line of count data
-	pattern.first.line.counts <- "Code";
-
-	# call gdata::read.excel and load counts
-	x <- gdata::read.xls(
-		xls = xls,
-		sheet = sheet,
-		pattern = pattern.first.line.counts,
-		method = "tab",
-		perl = perl,
-		header = TRUE,
-		strip.white = TRUE,
-		as.is = TRUE
-		);
-
-	if (is.null(x)) {
+	if (is.null(rcc$x)) {
 		stop("READ.XLS.RCC: There appears to be a problem with RCC file. Likely couldnt find the count rcc$header specifically `Code Class`");
 		}
 
-	# drop any trailing columns 
-	x <- x[,1:(3+length(sample.ids))];
+    colnames(rcc$x) <- rcc$x[2, ];
+    rcc$x <- rcc$x[-c(1:2), 1:(3 + length(sample.ids))];
 
 	# drop rows that have a missing code class or gene name
-	rows.with.missing.anno <- (x[,1] == '' | x[,2] == '');
+	rows.with.missing.anno <- (rcc$x[, 1] == '' | rcc$x[, 2] == '');
 	if (any(rows.with.missing.anno)) {
+	    rcc$x <- rcc$x[!rows.with.missing.anno,];
 		cat(paste("The following row(s)", paste(which(rows.with.missing.anno), collapse = ", "), "have been dropped due to missing annotation.\n\t  You may want to double check the excel file.\n\n"));
 		}
 
-	if (any(rows.with.missing.anno)) {
-		x <- x[!rows.with.missing.anno,];
-		}
-
-	# add sample names
-	colnames(x) <- c(colnames(x)[1:3], sample.ids);
+    colnames(rcc$x) <- gsub(" ", ".", colnames(rcc$x));
+	colnames(rcc$x) <- c(colnames(rcc$x)[1:3], sample.ids);
 
 	# print summary of samples
 	cat(paste("There were", length(sample.ids), "samples imported. \nNote that spaces in sample names will be replaced by dots. \n"));
 	
-	if ( length(sample.ids) > 5) {
+	if (length(sample.ids) > 5) {
 		cat("The first and last 3 sample names found in the dataset are:\n");
-		cat(paste(c(sample.ids[1:3],rev(sample.ids)[1:3])));
+		cat(paste(c(sample.ids[1:3], rev(sample.ids)[1:3])));
 		}
 	else {
 		cat("The sample names found in the dataset are:\n");
@@ -141,8 +123,8 @@ read.xls.RCC <- function(xls, sheet = 1, perl, sample.id.row = "File.Name") {
 		}
 
 	# print summary of genes 
-	cat(paste("\n\nThere were", nrow(x), "genes imported with the following Code Class breakdown:"));
-	print(table(x$Code.Class));
+	cat(paste("\n\nThere were", nrow(rcc$x), "genes imported with the following Code Class breakdown:"));
+	print(table(rcc$x[, 'Code.Class']));
 
 	class(rcc) <- 'NanoString';
 	return(rcc);
diff --git a/metadata.yaml b/metadata.yaml
index c760d94..e2a18b3 100644
--- a/metadata.yaml
+++ b/metadata.yaml
@@ -8,7 +8,7 @@ Contributors:
   - Julie Livingstone
 Languages: R (>= 2.14.0)
 Dependencies:
-  - gdata (>= 2.8.2)
+  - readxl
   - XML (>= 3.98-1.5)
   - googleVis (>= 0.2.14)
   - lme4
diff --git a/vignettes/NanoStringNorm_Introduction.Rnw b/vignettes/NanoStringNorm_Introduction.Rnw
index a63705f..b46fbb0 100644
--- a/vignettes/NanoStringNorm_Introduction.Rnw
+++ b/vignettes/NanoStringNorm_Introduction.Rnw
@@ -1,5 +1,5 @@
 %\VignetteIndexEntry{Introduction to NanoStringNorm}
-%\VignetteDepends{googleVis,gdata}
+%\VignetteDepends{googleVis,readxl}
 %\VignetteKeywords{Expression Analysis}
 %\VignettePackage{NanoStringNorm}
 
@@ -95,7 +95,7 @@ dev.off();
 
 The input data usually comes in the form of a structured Excel spreadsheet.  You can export the raw count data from Excel as a delimited text file for use with R.  Start by opening the \emph{raw} worksheet in a blank Excel page for editing.  Copy the count data (row 23) for each sample including the first 3 annotation columns (Code.Class, Name and Accession) to a separate worksheet or text file.  Don't forget to add the sample IDs (row 5), and remove any incomplete rows or columns.  The resulting tabular data can be saved as a tab delimited file for import into R.
 \\ \\
-Alternatively, you can import data directly from xls format into R using the function \Rfunction{read.xls.RCC} based on core functionality in the \Rpackage{gdata} package.
+Alternatively, you can import data directly from xls format into R using the function \Rfunction{read.xls.RCC} based on core functionality in the \Rpackage{readxl} package.
 
 <<eg.read.xls>>=
 # directly import the nCounter output

From 1a2c0ca4f2b3a4e10aff18b1dc911e47b8f32f14 Mon Sep 17 00:00:00 2001
From: Dan Knight <danknight@mednet.ucla.edu>
Date: Fri, 10 Jan 2025 16:28:49 -0800
Subject: [PATCH 4/5] Update changelog

---
 DESCRIPTION | 4 ++--
 NEWS        | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 474b39b..0ae8dec 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: NanoStringNorm
 Type: Package
 Title: Normalize NanoString miRNA and mRNA Data
-Version: 2.0.0
-Date: 2023-03-21
+Version: 3.0.0
+Date: 2025-01-10
 Authors@R: c(
 	person(c("Daryl", "M."), "Waggott", role = "aut"),
 	person("Paul", "Boutros", email = "PBoutros@mednet.ucla.edu", role = "cre"),
diff --git a/NEWS b/NEWS
index 9286f42..026ba19 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,9 @@
+NanoStringNorm 3.0.0 2025-01-10
+----------------------------------------------------------------
+
+UPDATED
+- Replaced gdata dependency with readxl for loading Excel files due to deprecated Excel support.
+
 NanoStringNorm 2.0.0 2023-03-21
 ----------------------------------------------------------------
 REMOVED

From b120f154649de0f0b82cafd501970c5a75270671 Mon Sep 17 00:00:00 2001
From: Dan Knight <danknight@mednet.ucla.edu>
Date: Fri, 10 Jan 2025 16:38:46 -0800
Subject: [PATCH 5/5] Update readxl dependency in GitHub action

---
 .github/workflows/R-CMD-check.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 99b8f41..e925ed1 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -17,6 +17,6 @@ jobs:
         with:
           path: NanoStringNorm
       - run: apt-get update && apt-get install -y libxml2-dev cmake
-      - run: R -e "install.packages(c('gdata', 'XML', 'googleVis', 'lme4', 'RUnit'))"
+      - run: R -e "install.packages(c('readxl', 'XML', 'googleVis', 'lme4', 'RUnit'))"
       - run: R CMD build --compact-vignettes="gs+qpdf" NanoStringNorm
       - run: R CMD check --as-cran --run-donttest NanoStringNorm_*.tar.gz