Skip to content

Commit

Permalink
Comparative functions added to the vignette, vignette data generated
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasBrazier committed Sep 24, 2024
1 parent 49bb42a commit cdc32e3
Show file tree
Hide file tree
Showing 27 changed files with 716 additions and 111 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ jobs:
r-version: ${{ matrix.r-version }}
- name: Install dependencies
run: |
install.packages(c("remotes", "rcmdcheck"))
install.packages(c("remotes", "rcmdcheck", "gatepoints", "ggplot2", "pbmcapply", "paralle", "BiocManager", "dineq", "npreg", "reshape2", "segmented", "knitr"))
remotes::install_deps(dependencies = TRUE)
BiocManager::install("GenomicRanges")
shell: Rscript {0}
- name: Check
run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
Expand Down
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ Imports:
dineq,
S4Vectors,
npreg,
methods
methods,
reshape2,
segmented
VignetteBuilder: knitr
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
14 changes: 14 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,20 @@
S3method(mean,mareyMap)
S3method(median,mareyMap)
S3method(plot,mareyMap)
S3method(summary,comparative_marey_map)
S3method(summary,mareyMap)
S3method(weighted.mean,mareyMap)
export(bootstrapMareyMap)
export(bootstrapRecMap)
export(brokenstick)
export(calibrateSmoothing)
export(coefficientVariation)
export(comparative_interpolation_to_dataframe)
export(comparative_marey_map)
export(comparative_marey_to_dataframe)
export(comparative_recmap_to_dataframe)
export(comparative_recombination_maps)
export(compute_stats_marey)
export(cvResampling)
export(fitLoess)
export(fitSpline)
Expand All @@ -19,11 +27,15 @@ export(gini)
export(lorenz)
export(mareyMap)
export(mask.marker)
export(merge_comparative_marey)
export(outlierSelection)
export(peripherybias)
export(plot_comparative_marey)
export(plot_comparative_recmap)
export(plot_recombinationMap)
export(predictGeneticMap)
export(recombinationMap)
export(subset_comparative_marey)
export(unmask.marker)
export(variance)
export(veller)
Expand All @@ -38,6 +50,8 @@ import(grDevices)
import(npreg)
import(parallel)
import(pbmcapply)
import(reshape2)
import(segmented)
import(stats)
import(utils)
importFrom(ggplot2,.data)
Expand Down
170 changes: 170 additions & 0 deletions R/broken_stick.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#' Plot the broken stick
#'
#' @description
#' Plot a broken stick to compare among a large set of species
#' the heterogeneity in the distribution of recombination rates along the genome
#' For a description of the method, see Brazier, T., & Glémin, S. (2022).
#' Diversity and determinants of recombination landscapes in flowering plants.
#' PLOS Genetics, 18(8), Article 8. https://doi.org/10.1371/journal.pgen.1010141
#'
#' @param marey a `comparative_marey_map` object
#' @param k the number of segments (default = 10)
#' @param method the method to infer the breakpoint of segments, either `strict` to cut a marker position, or `segmented` to interpolate segments breakpoints
#' @param plot (logical) whether to plot the broken stick directly or return a data frame (default = `TRUE` will plot the figure)
#'
#'
#' @details
#' Broken Stick model (see White & Hill 2020 for details)
#' K segments of equal genomic size (Mb) and evaluate the genetic relative size
#'
#' @returns a `ggplot` of the broken stick or a data frame, depending on the value of `plot`
#'
#' @import segmented
#' @import ggplot2
#' @import reshape2
#'
#' @export
#'
brokenstick = function(marey, k = 10, method = "strict", plot = TRUE) {

# the list of set and names to process
s = marey$set
n = marey$map

list_bs = list()

for (i in 1:length(s)) {
idx = (s == s[i] & n == n[i])
cat(s[i], n[i], "\n")

subs = subset_comparative_marey(marey, subset = idx)
subs = comparative_marey_to_dataframe(subs)

bs = list(brokenstick_one_map(subs, k = k, method = method))

list_bs = c(list_bs, bs)
}

df = data.frame(set = s,
name = n)

res = as.data.frame(do.call("rbind", list_bs))
df = cbind(df, res)

# Format columns in proper way
colnames(df) = c('set', 'name', as.character(c(1:k)))

# Tidy data frame
# brokenstick = brokenstick[!(is.na(df$p1) | is.na(df$p2) | is.na(df$p3) | is.na(df$p4) | is.na(df$p5) | df(brokenstick$p6) | is.na(df$p7) | is.na(df$p8) | is.na(df$p9) | is.na(df$p10)),]
brokenstick = melt(df)
brokenstick$sample = paste(brokenstick$set, brokenstick$chromosome, sep = "_")
colnames(brokenstick)=c("set","chromosome","segment","proportion.length","sample")

# Set a vector of gradient color
# Value of color is simply expected - p, the departure from the expected proportion 1/k
brokenstick$color = (1/k) - brokenstick$proportion.length

# Besides, estimates the ratio expected/observed (longer than expected will have lower relative recombination rate)
brokenstick$ratio = brokenstick$proportion.length/(1/k)

p = ggplot(data = brokenstick, aes(x=sample, y=proportion.length, fill = log10(ratio)))+
geom_bar(stat='identity', width = 1) +
# scale_fill_manual(values = color) +
scale_fill_viridis_c(breaks = c(-1, 0, 1), labels = c("-1", "0", "1"), direction = -1, limits = c(-1, 1),
values = c(0,0.7,0.8,1), option = "D") +
# scale_fill_gradient2() +
facet_grid(~set, scales = "free", space="free_x") +
labs(x="Chromosome", y="Proportion of\ntotal physical length", fill="Segment") +
theme(axis.line = element_blank(),
# axis.line.x = element_blank(), # No x axis
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.title = element_text(color="black", size=24, face="bold.italic",hjust = 0.5),
plot.subtitle = element_text(color="black",size=24,hjust = 0.5),
axis.title.x = element_text(color="black", size=24),
axis.title.y = element_text(color="black", size=24),
axis.text=element_text(size=24, colour="black"),
axis.text.x=element_blank(), # No samples names
axis.ticks.x=element_blank(), # No x axis
strip.text=element_text(size=18, colour="black", angle = 90),
legend.key = element_rect(fill = "white", linewidth = 1),
legend.text=element_text(size=24),
legend.title=element_text(size=24))
p

if (plot) {
p
} else {
return(brokenstick)
}
}




#' Compute the broken stick model for a single Marey map
#'
#' @description
#' Estimate the proportions of a broken stick model for a `comparative_marey_map` object
#' i.e. proportion of relative genetic length (cM) in k segments of equal genomic size (bp) along the chromosome
brokenstick_one_map = function(marey, k = 10, method = "strict") {

marey$map = as.character(marey$map)
marey$mkr = as.character(marey$mkr)
marey$phys = as.numeric(as.character(marey$phys))

# Create empty dataset
stick_proportions = as.data.frame(matrix(NA, nrow = 1, ncol = k))
colnames(stick_proportions) = paste("p", c(1:k), sep ="")

if (method == "strict") {
# Segment in k segments of equal genomic size (Mb)
# Size of segment is total genomic size divided by k
segment_size = round(max(marey$phys, na.rm = TRUE)/k)

# A vector of segments genetic size pi (proportion of total length)
p = numeric(k)
for (i in 1:k) {
# print(max(which(marey$gen < segment_size*i)))
# p[i] = marey$phys[max(which(marey$gen < segment_size*i))]
p[i] = max(marey$gen[which(marey$phys < segment_size*i)], na.rm = TRUE)
# p[i] = max(marey$gen[marey$gen < segment_size*i])
# p[i] = marey$phys[max(which(marey$gen < quantile(marey$gen, i/k)))]
# p[i] = quantile(marey$gen, i/k)

if (i > 1) {
p[i] = p[i] - sum(p[i-1:(i-1)])
}
}
# END OF METHOD STRICT
}
if (method == "segmented") {
res = c()
lin.mod <- lm(phys~gen, data = marey)
# Estimated breakpoints in genomic distances
res = try(segmented.mod <- segmented(lin.mod, seg.Z = ~phys, npsi=(k-1), control = seg.control(n.boot = 100, fix.npsi=TRUE)))
# In cases of segmentation failure
if (class(res)[1] == "try-error") {
# Failure because not enough data
breakpoints = c(NA, NA, NA)
} else {
breakpoints = c(summary(segmented.mod)$psi[,2], max(marey$phys, na.rm = TRUE))
}

for (i in 1:k) {
p[i] = max(marey$gen[marey$phys < breakpoints[i]])

if (i > 1) {
p[i] = p[i] - sum(p[i-1:(i-1)])
}
}
# END OF METHOD SEGMENTED
}

# stick_proportions is a proportion of total physical length
stick_proportions = p/sum(p)

return(stick_proportions)
}
83 changes: 65 additions & 18 deletions R/comparative_marey_map.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ comparative_marey_map = function(x = data.frame(),
#'
comparative_recombination_maps = function(x,
method = 'loess',
verbose = TRUE) {
verbose = TRUE, ...) {
l = x$data

x$data = lapply(l, function(x) recombinationMap(x, method = method, verbose = verbose))
Expand Down Expand Up @@ -85,10 +85,10 @@ comparative_recombination_maps = function(x,
#' @return a list of summary statistics
#' @export
#'
compute_stats_marey = function(x, statistics = c('mean', 'median')) {
compute_stats_marey = function(x, statistics = c('mean', 'median'), ...) {
list_stats = list()
list_stats$sets = as.character(x$set)
list_stats$maps = as.character(x$map)
list_stats$set = as.character(x$set)
list_stats$map = as.character(x$map)

# marey = comparative_marey_to_dataframe(x)
recmap = comparative_recmap_to_dataframe(x)
Expand Down Expand Up @@ -170,24 +170,24 @@ plot_comparative_marey = function(x, group = 'set + map') {
if (group == 'set') {
grouping = as.formula(~as.factor(set))
facet = facet_wrap(grouping, scales = "free")
point_rec = geom_point(aes(group = as.factor(map)), alpha = 0.4)
line_rec = geom_line(data = marey, aes(x = physicalPosition/10^6, y = geneticPositioncM, group = as.factor(map)), colour = "black")
point_rec = geom_point(aes(colour = as.factor(map), fill = as.factor(map)), alpha = 0.2)
line_rec = geom_line(data = marey, aes(x = physicalPosition/10^6, y = geneticPositioncM, group = as.factor(map)), fill = "black")
ribbon_rec = geom_ribbon(data = marey, aes(x = physicalPosition/10^6, y = geneticPositioncM, ymin = lowerGeneticPositioncM, ymax = upperGeneticPositioncM, group = as.factor(map)),
fill = "darkorange", colour = "darkorange", alpha = 0.3)
alpha = 0.4)

}
if (group == 'map') {
grouping = as.formula(~as.factor(map))
grouping = as.formula(~ as.factor(map))
facet = facet_wrap(grouping, scales = "free")
point_rec = geom_point(aes(group = as.factor(set)), alpha = 0.4)
line_rec = geom_line(data = marey, aes(x = physicalPosition/10^6, y = geneticPositioncM, group = as.factor(set)), colour = "black")
point_rec = geom_point(aes(colour = as.factor(set), fill = as.factor(set)), alpha = 0.2)
line_rec = geom_line(data = marey, aes(x = physicalPosition/10^6, y = geneticPositioncM, group = as.factor(set)), fill = "black")
ribbon_rec = geom_ribbon(data = marey, aes(x = physicalPosition/10^6, y = geneticPositioncM, ymin = lowerGeneticPositioncM, ymax = upperGeneticPositioncM, group = as.factor(set)),
fill = "darkorange", colour = "darkorange", alpha = 0.3)
alpha = 0.4)
}
if (group == 'set + map') {
grouping = as.formula(~as.factor(map) + as.factor(set))
facet = facet_grid(grouping, scales = "free")
point_rec = geom_point(alpha = 0.4)
point_rec = geom_point(alpha = 0.2)
line_rec = geom_line(data = marey, aes(x = physicalPosition/10^6, y = geneticPositioncM), colour = "black")
ribbon_rec = geom_ribbon(data = marey, aes(x = physicalPosition/10^6, y = geneticPositioncM, ymin = lowerGeneticPositioncM, ymax = upperGeneticPositioncM),
fill = "darkorange", colour = "darkorange", alpha = 0.3)
Expand All @@ -197,11 +197,10 @@ plot_comparative_marey = function(x, group = 'set + map') {
if (nrow(marey) > 0) {
marey$vld = TRUE

p = ggplot2::ggplot(data = df, aes(x = phys/10^6, y = gen, colour = vld)) +
p = ggplot2::ggplot(data = df, aes(x = phys/10^6, y = gen)) +
point_rec +
line_rec +
ribbon_rec +
scale_colour_manual(values=c("TRUE" = "dodgerblue4", "FALSE" = "firebrick4")) +
facet +
labs(x = "Genomic position (Mb)", y = "Genetic distance (cM)") +
theme(axis.line = element_line(),
Expand All @@ -216,11 +215,10 @@ plot_comparative_marey = function(x, group = 'set + map') {
axis.text=element_text(colour="black"),
legend.position = "right")
} else {
p = ggplot2::ggplot(data = df, aes(x = phys/10^6, y = gen, colour = vld)) +
p = ggplot2::ggplot(data = df, aes(x = phys/10^6, y = gen)) +
point_rec +
scale_colour_manual(values=c("TRUE" = "dodgerblue4", "FALSE" = "firebrick4")) +
facet +
labs(x = "Genomic position (Mb)", y = "Genetic distance (cM)") +
labs(x = "Genomic position (Mb)", y = "Genetic distance (cM)", colour = "dataset") +
theme(axis.line = element_line(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
Expand Down Expand Up @@ -310,7 +308,23 @@ plot_comparative_recmap = function(x, group = 'set + map') {
#' @export
#'
merge_comparative_marey = function(x = list()) {
stop('Not implemented yet')

new_object = comparative_marey_map()

n_list = length(x)

for (i in 1:n_list) {
d = x[[i]]
new_object$data = c(new_object$data, d$data)
new_object$set = c(new_object$set, d$set)
new_object$map = c(new_object$map, d$map)
}

# check consistency
stopifnot(length(new_object$set) == length(new_object$data))
stopifnot(length(new_object$set) == length(new_object$map))

return(new_object)
}


Expand All @@ -336,6 +350,39 @@ subset_comparative_marey = function(x,



#' Summary of a `comparative_marey_map` object
#'
#' @param x a `comparative_marey_map` object to summarize
#'
#' @return a summary
#'
#' @method summary comparative_marey_map
#' @export
#'
summary.comparative_marey_map = function(x, ...) {
dataset = paste0(c(as.character(head(x$set, 3)), "..."))
n_maps = length(x$set)
length_linkage_map = unlist(lapply(x$data, function(x) max(x[[1]]$gen)))
length_genome_Mb = unlist(lapply(x$data, function(x) max(x[[1]]$phys)))
length_genome_Mb = length_genome_Mb / 1000000
n_markers = unlist(lapply(x$data, function(x) length(x[[1]]$gen)))
density_markers = n_markers / length_genome_Mb

cat("============== Summary of the comparative marey map ==============\n",
"Datasets: ", dataset, "\n",
"Number of maps: ", n_maps, "\n",
"Mean linkage map length (cM): ", mean(length_linkage_map, na.rm = TRUE), "\n",
"Mean chromosome size (Mb): ", mean(length_genome_Mb, na.rm = TRUE), "\n",
"Mean number of markers: ", mean(n_markers, na.rm = TRUE), "\n",
"Mean marker density (marker/Mb): ", mean(density_markers, na.rm = TRUE), "\n",
"==================================================================\n")
}






#' Convert Marey maps in a `comparative_marey_map` objects to `data.frame`
#'
#' @param x a `comparative_marey_map` object
Expand Down
2 changes: 1 addition & 1 deletion R/lorenz.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ lorenz = function(x, return.plot = TRUE) {
y = seq(0, 1, by = 0.01))

p = ggplot(data = out, aes(x = relativePhys, y = relativeGen, fill = as.factor(map), colour = as.factor(set))) +
geom_line() +
geom_line(alpha = 0.3) +
# facet_wrap(~ as.factor(set)) +
geom_line(data = diagonal, aes(x = x, y = y, fill = NA, colour = NA), color = "black") +
xlim(0, 1) +
Expand Down
Loading

0 comments on commit cdc32e3

Please sign in to comment.