Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

functions to test different ranks and algorithms,extract results info #126

Open
wants to merge 1 commit into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: NMF
Type: Package
Title: Algorithms and framework for Nonnegative Matrix Factorization (NMF)
Version: 0.23.6
Date: 2017-06-29
Version: 0.24.0
Date: 2018-10-19
Author: Renaud Gaujoux, Cathal Seoighe
Maintainer: Renaud Gaujoux <[email protected]>
Description: This package provides a framework to perform Non-negative Matrix
Expand Down Expand Up @@ -110,5 +110,6 @@ Collate:
'setNMFClass.R'
'simulation.R'
'tests.R'
'exploration.R'
RoxygenNote: 6.0.1.9000
Roxygen: list(markdown = TRUE, namespace_unsorted = TRUE, roclets = c('collate', 'namespace', 'roclets::rd2_roclet'))
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,8 @@ import(reshape2)
export(nmfReport)
export(syntheticNMF)
export(nmfCheck)
export(exploreNmf)
export(exportAnalysis)
export(getFeatures)
export(nmfExplore)
export(runNmf)
202 changes: 202 additions & 0 deletions R/exploration.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
# Functions easily test and compare ranks and algorithms.
# Functiosn to extract informations of the final NMF analysis.
#
# Author: Cintia C Palu
# Creation: 25 May 2018
# Integrated to NMF: 19 Oct 2018
###############################################################################

#' @param data table to be analysed
#'
#' @param ann information on the data to be displayed on the heatmaps
#' @param r range of ranks to be tested
#' @param prefix a string used to name the generated plots
#' @param seed a number to initialise the algorithm
#' @param .opt
#'
#' @export
#' @rdname nmfExplore
#' @aliases nmfExplore

nmfExplore <- function(data, ann, r, prefix, seed = 123456, .opt = "vP"){
#Function to support the decision on algorithms and rank value

folder <- gsub("[^[:alnum:] ]", '.', prefix)
i <- 1
while(file.exists(folder)){
folder <- paste0(gsub("[^[:alnum:] ]", '.', prefix), i)
i <- i+1
}
dir.create(folder)

postfix <- paste(r[1], r[length(r)], sep = 'to')

nrange <- nmf(data, r, method = nmfAlgorithm(), nrun = 50, .opt = .opt, seed = seed)
save(nrange, file = paste0(folder, '/', prefix, '_nrange.rda'))

for(i in names(nrange)){
png(filename = paste0(folder, '/', prefix, '_', gsub('\\/', '.', i), '_', postfix, "_consensusmap.png"),
width = 1600, height = 1200, units = "px")
par(mfrow = c(4, 3))
consensusmap(nrange[[i]], annCol = ann)
dev.off()
}
par(mfrow = c(1, 1))

N.rand <- randomize(data)
# estimate quality measures from the shuffled data
# only run NMF using the algorithms that didn't have errors in nrange

nrand <- nmf(N.rand, r, method = names(nrange), nrun = 50, .opt = .opt, seed = 123456)
save(nrand, file = paste0(folder, '/', prefix, '_nrand.rda'))
assign(paste0(prefix, "_nrand"), nrand, .GlobalEnv)

for(i in names(nrange)){#[names(nrange)%in%names(nrand)]){
png(filename = paste0(folder, '/', prefix, '_', gsub('\\/', '.', i), '_', postfix, "_ranksurvey.png"),
width = 1600, height = 1200, units = "px")
print(plot(nrange[[i]], nrand[[i]]))#, method = i)
dev.off()
}

return(nrange)
}

#' @param meta
#'
#' @param fileID
#' @param data
#'
#' @export
#' @rdname exportAnalysis
#' @aliases exportAnalysis

exportAnalysis <- function(meta, fileID, data){
g <- apply(meta, 2, function (x) which(x == max(x)))

data <- cbind(g, data)
data <- data[names(sort(g)), ]
write.table(data, file = paste(fileID, "metagene.tsv", sep = ""), row.names = TRUE, col.names = NA, sep = "\t")
return(g)
}

#' @param nmf.result
#'
#' @param method
#' @param original.data
#'
#' @export
#' @rdname getFeatures
#' @aliases getFeatures

getFeatures = function(nmf.result, method = 'max', original.data){
groups <- (extractFeatures(nmf.result, method))
cat(paste('\n\nExtract Groups -', toupper(method), '\n'))
if( any(!is.na(groups))){
for (g in 1:length(groups)){
cat(paste('\nGroup', g, '\n'))
if(is.na(groups[g])){
cat('No top most contributing feature associated\n')
}else{
print(rownames(original.data)[groups[[g]]])
}
}
}else{
cat('No metagene has a top most contributing feature associated\n')
}
return(groups)
}

#' @param original.data the original data matrix
#'
#' @param nmf.result nmf object from which we will extract information
#' @param prefix the name of the folder to save the results
#' @param postfix a string informing the algorithm and rank used, it will also be used in the file names
#'
#' @export
#' @rdname exploreNmf
#' @aliases exploreNmf

exploreNmf <- function(original.data, nmf.result, prefix, postfix){
# Extracting information of the main features that contribute to defining
# each metagene, as well as identifying to which cluster the samples belong to.


setwd(dir = paste0('./', prefix))
rankSum <- summary(nmf.result)
write.table(rankSum, file = paste(prefix, postfix, "rankSum.tsv", sep = '.'), sep = "\t", row.names = TRUE,
col.names = NA, dec = ".")

###################
###################
## Data analysis ##
###################
###################
cat('\nStarting analysis\n')
# Getting the matrix
w <- basis(nmf.result)
h <- coef(nmf.result)

#fit(nmf.result)

V.hat <- fitted(nmf.result)
write.table(V.hat, file = paste(prefix, postfix, "V.HAT.tsv", sep = '.'), row.names = TRUE, col.names = NA, sep = "\t")

if(is.null(rownames(original.data))){
cat('\nNaming rows in the original dataset\n')
rownames(original.data) = paste0('R', 1:dim(original.data)[1])
}

groups_kim <- getFeatures(nmf.result, "kim", original.data)
groups_max <- getFeatures(nmf.result, "max", original.data)

####################
# Exporting #
# relevant Genes #
# for each cluster #
####################
cat('\nExporting metagenes information\n')
groups_h <- exportAnalysis(h, file = paste(prefix, postfix, ".H", sep = '.'), t(original.data))
groups_w <- exportAnalysis(t(w), file = paste(prefix, postfix, "W", sep = '.'), original.data)
write.table(w, file = paste(prefix, postfix, "W.tsv", sep = '.'), row.names = TRUE, col.names = NA, sep = "\t")
write.table(h, file = paste(prefix, postfix, "H.tsv", sep = '.'), row.names = TRUE, col.names = NA, sep = "\t")

cat('\nSaving variables\n')
save(list = c('w', 'h', 'V.hat', 'groups_w', 'groups_h', 'groups_max', 'groups_kim'),
file = paste(prefix, postfix, "exploreNMF.rda", sep = '.'))
setwd(dir = '../')
cat('\nExploratory Analysis finished. ')
cat('Environment objects saved in the file:\n"')
cat(paste(prefix, postfix, 'exploreNMF.rda"\n', sep = '.'))
}

#' @param original.data
#'
#' @param ann
#' @param r
#' @param prefix
#' @param .opt
#' @param alg
#' @param maxIter
#' @param nrun
#'
#' @export
#' @rdname runNmf
#' @aliases runNmf

runNmf <- function (original.data, ann, r, prefix, .opt = "vP", alg, maxIter = 30000, nrun = 1000){

nmf.result <- nmf(original.data, r, method = alg, .opt = .opt, maxIter = maxIter, nrun = nrun)

folder <- gsub("[^[:alnum:] ]", '.', prefix)
if(!file.exists(folder)){
dir.create(folder)
}
postfix <- paste0(alg, '.r', r)

save(nmf.result, file = paste0(folder, '/', folder, '.', postfix, '.rda'))
exploreNmf(original.data = original.data, nmf.result = nmf.result, prefix = folder, postfix = postfix)

return(nmf.result)
}


27 changes: 27 additions & 0 deletions man/exploreNmf.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
\name{exploreNmf}
\alias{exploreNmf}
\title{Function to extract information on the metagenes obtained after running \code{\link{nmf}}}
\description{It identifies the main features that contributef to defining each metagene,
as well as identifying to which metagene the samples belong to, based on the frequency they were assigned to it. This function is mainly called by the function \code{\link{runNmf}}}
\usage{
exploreNmf(original.data, nmf.result, prefix, postfix)
}
\arguments{
\item{original.data}{Matrix with the original data submited to the \code{\link{nmf}}.}
\item{nmf.result}{Object returned by \code{\link{nmf}} function, run with a single rank and a single algorithm.}
\item{prefix}{A string used to name the generated plots and the folder to save them.}
\item{postfix}{A string that informs the range of ranks tested, to be included as part of the files names.}
}
\value{
It generates files to be saved in the folder named after the \code{prefix}.
\item{*.rankSum.tsv}{Tab-separated table with the \code{summary} information of \code{nmf.result}.}
\item{*.V.HAT.tsv}{Tab-separated table with the \code{fitted} results of \code{nmf.result}.}
\item{*.W.tsv}{Tab-separated table with the \code{basis} results for \code{nmf.result}.}
\item{*.H.tsv}{Tab-separated table with the \code{coef} results for \code{nmf.result}.}
\item{*.metagene.tsv}{Tab-separated table informing to which metagene a row or a column of the original data matrix was assigned based on the \code{\link{nmf}} analysis. It is generated by the \code{exportAnalysis} function.}
\item{*.exploreNMF.rda}{File with R objects that contain relevant information regarding the metagenes.
They are exported to enable the user to review the information in the future.}
}
\author{
Cintia C Palu
}
20 changes: 20 additions & 0 deletions man/exportAnalysis.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
\name{exportAnalysis}
\alias{exportAnalysis}
\title{Function to find to which metagene a row or column of a matrix were more frequently assigned according to the \code{\link{nmf}} results.}
\description{
It searches for the higher value in each column of a matrix, in the context of \code{\link{nmf}} the higher value indicates the metagene more frequently assigned to the varaible represented by that column during the multiple \code{\link{nmf}} runs. The metagene information is added to the orginald ata matrix and saved as a .tsv file. It is designed for users interested in use \code{\link{nmf}} to find clusters on their data. \code{exportAnalysis} is autmatically called by \code{\link{exploreNmf}} function}
\usage{
exportAnalysis(meta, fileID, data)
}
\arguments{
\item{meta}{Data matrix generated by the \code{coef} or \code{basis} functions.}
\item{fileID}{A string providing the file name prefix to be used to save the reuslts.}
\item{data}{Original matrix submited to \code{\link{nmf}} analysis.}
}
\value{
It generates files to be saved in the folder named after the \code{prefix}.
\item{*.metagene.tsv}{Tab-separated table informing to which metagene a row or a column of the original data matrix was assigned based on the \code{\link{nmf}} analysis.}
}
\author{
Cintia C Palu
}
19 changes: 19 additions & 0 deletions man/getFeatures.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
\name{getFeatures}
\alias{getFeatures}
\title{It retrieves the top most contributing feature associated with a metagene estimated by \code{\link{nmf}}}
\description{It applies the \code{\link{extractFeatures}} function with the method of choice (default is 'max')
to identify the top most contributing features of the metagenes.}
\usage{
getFeatures(nmf.result, method = "max", original.data)
}

\arguments{
\item{nmf.result}{Object returned by \code{\link{nmf}} function, run with a single rank and a single algorithm.}
\item{method}{Scoring or selection method ("kim" or "max").}
\item{original.data}{The original data matrix submiited to the \code{\link{nmf}} analysis.}
}
\value{list with the selected features.}

\author{
Cintia C Palu
}
Loading