Skip to content

Commit

Permalink
update doc
Browse files Browse the repository at this point in the history
  • Loading branch information
fsoubes committed Mar 6, 2019
1 parent 17a9504 commit 1cc5448
Show file tree
Hide file tree
Showing 89 changed files with 2,572 additions and 574 deletions.
97 changes: 71 additions & 26 deletions function/EnhancedVolcano.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,51 @@
# Github: https://github.com/kevinblighe/EnhancedVolcano
# Modified by Franck Soubès (add 74-85; modify 98-107, add 3 parameters displayLab, findfamily, topgenes)

#' Volcano plots represent a useful way to visualise the results of differential expression analyses. Here, we present a highly-configurable function that
#'produces publication-ready volcano plots. EnhancedVolcano will attempt to fit as many transcript names in the plot window as possible,
#' thus avoiding 'clogging' up the plot with labels that could not otherwise have been read.
#'
#' @param toptable Requires at least the following: column for transcript names (can be rownames); a column for log2 fold changes; a column for nominal or adjusted p-value. REQUIRED.
#' @param lab A column name in toptable containing transcript names. Can be rownames(toptable). REQUIRED.
#' @param x A column name in toptable containing log2 fold changes. REQUIRED.
#' @param y A column name in toptable containing nominal or adjusted p-values. REQUIRED.
#' @param selectLab A vector containing a subset of lab. DEFAULT = NULL. OPTIONAL.
#' @param displaylab A comma-separated values corresponding to the displayed genes. DEFAULT = NULL. OPTIONAL.
#' @param findfamily A character to parse familiy of genes. DEFAULT = NULL. OPTIONAL.
#' @param topgenes A numeric value to display the top n genes based on the regulation.DEFAULT = NULL. OPTIONAL.
#' @param regulationvolc A character for the regulation ("up", "down", "both") .DEFAULT = NULL. OPTIONAL.
#' @param xlim Limits of the x-axis. DEFAULT = c(min(toptable[,x], na.rm=TRUE),max(toptable[,x], na.rm=TRUE)). OPTIONAL.
#' @param ylim Limits of the y-axis. DEFAULT = c(0, max(-log10(toptable[,y]), na.rm=TRUE) + 5). OPTIONAL.
#' @param xlab Label for x-axis. DEFAULT = bquote(Log[2]~ "fold change"). OPTIONAL.
#' @param ylab Label for y-axis. DEFAULT = bquote(-Log[10]~ P)).OPTIONAL.
#' @param axisLabSize Size of x- and y-axis labels. DEFAULT = 18. OPTIONAL.
#' @param pCutoff Cut-off for statistical significance. A horizontal line will be drawn at -log10(pCutoff). DEFAULT = 10e-6. OPTIONAL.
#' @param pLabellingCutoff Labelling cut-off for statistical significance. DEFAULT = pCutoff. OPTIONAL
#' @param FCcutoff Cut-off for absolute log2 fold-change. Vertical lines will be drawn at the negative and positive values of log2FCcutoff. DEFAULT =1.0. OPTIONAL.
#' @param title Plot title. DEFAULT = 'Volcano plot'. OPTIONAL.
#' @param titleLabSize Plot subtitle. DEFAULT = 'Bioconductor package, EnhancedVolcano'. OPTIONAL.
#' @param transcriptPointSize Size of plotted points for each transcript. DEFAULT = 0.8. OPTIONAL.
#' @param transcriptLabSize Size of labels for each transcript. DEFAULT = 3.0. OPTIONAL.
#' @param col Colour shading for plotted points, corresponding to< abs(FCcutoff) && > pCutoff, > abs(FCcutoff), < pCutoff,> abs(FCcutoff) && < pCutoff. DEFAULT = c("grey30", "forestgreen","royalblue", "red2"). OPTIONAL.
#' @param colAlpha Alpha for purposes of controlling colour transparency oftranscript points. DEFAULT = 1/2. OPTIONAL.
#' @param legend Plot legend text. DEFAULT = c("NS", "Log2 FC", "P","P & Log2 FC"). OPTIONAL.
#' @param legendPosition Position of legend ("top", "bottom", "left","right"). DEFAULT = "top". OPTIONAL.
#' @param legendLabSize Size of plot legend text. DEFAULT = 14. OPTIONAL.
#' @param legendIconSize Size of plot legend icons / symbols. DEFAULT = 4.0.OPTIONAL.
#' @param DrawConnectors Logical, indicating whether or not to connect plotlabels to their corresponding points by line connectors. DEFAULT = FALSE.OPTIONAL.
#' @param widthConnectors Line width of connectors. DEFAULT = 0.5. OPTIONAL.
#' @param colConnectors Line colour of connectors. DEFAULT = 'grey10'. OPTIONAL.
#' @param cutoffLineType Line type for FCcutoff and pCutoff ("blank","solid", "dashed", "dotted", "dotdash", "longdash", "twodash").DEFAULT = "longdash". OPTIONAL.
#' @param cutoffLineCol Line colour for FCcutoff and pCutoff. DEFAULT ="black". OPTIONAL.
#' @param cutoffLineWidth Line width for FCcutoff and pCutoff. DEFAULT = 0.4. OPTIONAL.
#'
#' @author Kevin Blighe <[email protected]>
#'
#' @return A list of two elements
#' @export
#'
#' @examples

EnhancedVolcano <- function(
toptable,
lab,
Expand Down Expand Up @@ -38,7 +83,7 @@ EnhancedVolcano <- function(
cutoffLineType = "longdash",
cutoffLineCol = "black",
cutoffLineWidth = 0.4)

{
if(!requireNamespace("ggplot2")) {
stop("Please install ggplot2 first.", call.=FALSE)
Expand All @@ -60,7 +105,7 @@ EnhancedVolcano <- function(
requireNamespace("ggrepel")
requireNamespace("dplyr")
i <- xvals <- yvals <- Sig <- NULL

toptable <- as.data.frame(toptable)
toptable$GeneName <- sapply(toptable$GeneName, function(v) {
if (is.character(v)) return(toupper(v))
Expand All @@ -74,9 +119,9 @@ EnhancedVolcano <- function(
(abs(toptable[,x])>FCcutoff)] <- "FC_P"
toptable$Sig <- factor(toptable$Sig,
levels=c("NS","FC","P","FC_P"))



if(is.na(topgenes) && !is.na(displaylab) ){
selectLab <- as.character(displaylab)
}
Expand All @@ -94,32 +139,32 @@ EnhancedVolcano <- function(
else{
toptable$abs <- unlist((toptable[x]))
}

toptable$X <- rownames(toptable)
myval <- toptable %>% dplyr::filter(Sig =="FC_P") %>% dplyr::select(GeneName,X,abs) %>%
myval <- toptable %>% dplyr::filter(Sig =="FC_P") %>% dplyr::select(GeneName,X,abs) %>%
{if (regulationvolc == "down") top_n(.,-topgenes) else top_n(.,topgenes)}
myvalueind <- myval$X
selectLab <- as.character(myval$GeneName)

}


if (min(toptable[,y], na.rm=TRUE) == 0) {
warning("One or more P values is 0. Converting to minimum possible value...", call. = FALSE)
toptable[which(toptable[,y] == 0), y] <- .Machine$double.xmin
}

toptable$lab <- sapply(toptable$GeneName, function(v) {
if (is.character(v)) return(toupper(v))
else return(v)
})



toptable$xvals <- toptable[,x]
toptable$yvals <- toptable[,y]


if (!is.null(selectLab)) {
if(!is.na(topgenes) && is.na(displaylab)&& is.na(findfamily)){
names.new <- rep("", length(toptable$lab))
Expand All @@ -134,15 +179,15 @@ EnhancedVolcano <- function(
toptable$lab <- names.new
}
}

subdata = subset(toptable,
toptable[,y]<pLabellingCutoff &
abs(toptable[,x])>FCcutoff)
abs(toptable[,x])>FCcutoff)









plot <- ggplot2::ggplot(toptable,
ggplot2::aes(x=xvals, y=-log10(yvals))) +

Expand Down Expand Up @@ -207,9 +252,9 @@ EnhancedVolcano <- function(
linetype=cutoffLineType,
colour=cutoffLineCol,
size=cutoffLineWidth)



if (DrawConnectors == TRUE) {
plot <- plot + ggrepel::geom_text_repel(max.iter = 100,
data=subdata ,
Expand Down Expand Up @@ -237,7 +282,7 @@ EnhancedVolcano <- function(
check_overlap = F,
vjust = 1.0)
}

if(!is.na(topgenes)) subdata <- filter(subdata, lab != "")
mylist = list(plot, subdata)
return(mylist)
Expand Down
65 changes: 36 additions & 29 deletions function/PCA.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,36 +8,36 @@

#' res.pca is a function that computed a PCA of non-normalized data with the FactoMineR package
#'
#' @param workingset a data frame corresponding to the WorkingSet
#' @param scale a boolean; by default this value is set to False non-normalized data
#' @param workingset A data frame corresponding to the WorkingSet
#' @param scale A boolean value; by default this value is set to False for non-normalized data
#'
#' @return A data frame with PCA attributes
#'
#' @return \PCAres a data frame with PCA attributes
#'
#' @export

res.pca <- function(workingset, scale = F) {

myt = transpose(workingset)
row.names(myt) = colnames(workingset)

PCAres = PCA(myt,
scale.unit = F,
graph = F)

return(PCAres)
}


#' eboulis is a function which aim is to display the eigenvalues of the data with the package factoextra
#'
#' @param PCAresa a data frame with PCA attributes
#' @param PCAres A data frame with PCA attributes
#'
#' @return A ggplot object
#'
#' @return \p a factoextra object
#'
#' @export

eboulis <- function(PCAres){

p <- fviz_eig(PCAres, addlabels=TRUE, hjust = -0.3, barfill="white", barcolor ="darkblue", linecolor ="red")
p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank(),
panel.background = element_blank())
Expand All @@ -48,45 +48,52 @@ eboulis <- function(PCAres){

#' PCAplot is a function that return a factoextra object of PCA type
#'
#' @param PCAres a data frame with PCA attributes
#' @param myax a numeric vector of length 2 specifying the dimensions to be plotted
#' @param elips a boolean value to add ellipse to the data distribution for the different groups; default = False
#' @param rep a boolean value to avoid overlaps between the label points
#' @param mylevel a data frame corresponding to the pData
#' @param mylabsize a numeric value representing the police size to display for the different labels
#' @param dispelip a numeric value representing the ellipsoid dispersion
#' @param labeled a character to display labels and/or points
#' @param pal a color object from the RcolorBrewer package
#' @param PCAres A data frame with PCA attributes
#' @param myax A numeric vector of length 2 specifying the dimensions to be plotted
#' @param elips A boolean value to add ellipse to the data distribution for the different groups; default = False
#' @param rep A boolean value to avoid overlaps between the label points
#' @param mylevel A data frame corresponding to the pData
#' @param mylabsize A numeric value representing the police size to display for the different labels
#' @param dispelip A numeric value representing the ellipsoid dispersion
#' @param labeled A character to display labels and/or points
#' @param pal A color object from the RcolorBrewer package
#'
#' @return A ggplot object
#'
#' @return
#'
#' @export

PCAplot <- function(PCAres, myax = c(1,2), elips = T , rep = T , mylevel = groups$Grp, mylabsize = 4, dispelip = 0.8 , labeled = 'all', pal = brewer.pal(8, "Dark2")){


p <- fviz_mca_ind(PCAres, label= labeled , habillage = mylevel, addEllipses= elips ,
ellipse.level= 0.8, repel = rep, axes = myax, pointsize = 2 , labelsize = mylabsize)


return(p + scale_color_manual(values=pal))
}


# res.pca <- function(workingset, restab ,scale = F, variable = F) {
#
#
# myt = transpose(workingset)
# row.names(myt) = colnames(workingset)
#
#
# if(variable)
# colnames(myt) = make.names(restab$GeneName, unique=TRUE)
#
#
# PCAres = PCA(myt,
# scale.unit = F,
# graph = F)
#
#
# return(PCAres)
# }










45 changes: 22 additions & 23 deletions function/cutheat.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,22 @@
#' cutHeatmaps if a function that takes as input an heatmap object and depending on the cut height and the cluster
#' choosen render a ggplot object or an heatmap object
#'
#' @param hmp an heatmap object
#' @param height a numeric value to cut the dendogram
#' @param exprData a data frame with specific columns depending on the user's choices
#' @param DEGres a data frame corresponding to the xxx_topTableAll
#' @param groups a data frame of the choosen groups
#' @param cexcol a positive numbers, used as cex.axis in for the row or column axis labeling
#' @param cexrow a positive numbers, used as cex.axis in for the row or column axis labeling
#' @param labrow a character vectors with row and column labels to use
#' @param fileType a character to select the plot to display heatmap, boxplot or stripchart
#' @param scale a character indicating if the values should be centered and scaled in either the row direction or the column direction, or none
#' @param meanGrp a boolean value to computes the mean for each groups; default = F
#' @param col.hm a character vector
#' @param type a character to select the plot to display heatmap, boxplot or stripchart
#' @param las a numeric value
#' @param distfun function used to compute the distance (dissimilarity) between both rows and columns.
#' @param palette.col a character vector
#' @param num an item of the heatmap object corresponding to a specific cluster choosen by the user
#' @param hmp An heatmap object
#' @param height A numeric value to cut the dendogram
#' @param exprData A data frame with specific columns depending on the user's choices
#' @param groups A data frame of the choosen groups
#' @param cexcol A positive numbers, used as cex.axis in for the row or column axis labeling
#' @param cexrow A positive numbers, used as cex.axis in for the row or column axis labeling
#' @param labrow A character vectors with row and column labels to use
#' @param fileType A character to select the plot to display heatmap, boxplot or stripchart
#' @param meanGrp A boolean value to computes the mean for each groups; default = F
#' @param type A character to select the plot to display heatmap, boxplot or stripchart
#' @param las A numeric value
#' @param distfun Function used to compute the distance (dissimilarity) between both rows and columns.
#' @param palette.col A character vector of colors
#' @param num An item of the heatmap object corresponding to a specific cluster choosen by the user
#' @param genename A character vector of gene symbols
#' @param scales A character indicating if the values should be centered and scaled in either the row direction or the column direction, or none. The default is "none"
#' @param ...
#'
#' @return a ggplot object or heatmapply object
Expand Down Expand Up @@ -92,7 +91,7 @@ cutHeatmaps = function(hmp, height, exprData, groups, cexcol = 1, cexrow = 1, la
###=======================
## cut the heatmap
###=======================

# Cut the dendogram in 2 part with the desired height

cut02 = cut(hmp$rowDendrogram, h = height)
Expand All @@ -114,7 +113,7 @@ cutHeatmaps = function(hmp, height, exprData, groups, cexcol = 1, cexrow = 1, la

## scaling
# HCgroupsLabExrsCenterScale <- ifelse(scales=="row",lapply(HCgroupsLabExrs,function(y){t(scale(t(y),center=T,scale=T))}),HCgroupsLabExrs)

if(scales=="row"){
HCgroupsLabExrsCenterScale <- lapply(HCgroupsLabExrs,function(y){t(scale(t(y),center=T,scale=T))})
}else HCgroupsLabExrsCenterScale <- HCgroupsLabExrs
Expand Down Expand Up @@ -252,7 +251,7 @@ cutHeatmaps = function(hmp, height, exprData, groups, cexcol = 1, cexrow = 1, la
sep = "")

if (!probes.boxplot) {

##=============
## plot stripchart
##
Expand Down Expand Up @@ -293,7 +292,7 @@ cutHeatmaps = function(hmp, height, exprData, groups, cexcol = 1, cexrow = 1, la
title = paste("Cluster", i),
y = "Expression Z-score",
caption = footnote
) +
) +
theme(
plot.title = element_text(size = 20, hjust = 0.5),
plot.caption = element_text(size = 10, hjust = 0.5),
Expand Down Expand Up @@ -344,7 +343,7 @@ cutHeatmaps = function(hmp, height, exprData, groups, cexcol = 1, cexrow = 1, la
title = paste("Cluster", i),
y = "Expression Z-score",
caption = footnote
) +
) +
theme(
plot.title = element_text(size = 24, hjust = 0.5),
axis.title.x = element_text(size = 10),
Expand Down Expand Up @@ -379,7 +378,7 @@ cutHeatmaps = function(hmp, height, exprData, groups, cexcol = 1, cexrow = 1, la
if(length(labels(cut02$lower[[i]]))>1){
rowIds=NA;


useRasterTF=T;
hm02gp=heatmap(exprData[labels(cut02$lower[[1]]),], Rowv=str(cut02$lower[[1]]),
Colv=hmp$colDendrogram,
Expand Down
Loading

0 comments on commit 1cc5448

Please sign in to comment.