Skip to content

Commit

Permalink
Add relative distances in plots
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasBrazier committed Sep 26, 2024
1 parent 9d4e436 commit 71fc6f2
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 52 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: EasyMareyMap
Type: Package
Title: Estimate local recombination rates using the Marey map method
Version: 0.2.0
Version: 0.3.0
Author: Thomas Brazier
Maintainer: Thomas Brazier <[email protected]>
Description: Estimate local recombination rates using the Marey map method. Recombination rates are estimated by interpolating a mathematical function over the Marey map (genetic distances in cM expressed as a function of the genomic position in Mb).
Expand Down
2 changes: 1 addition & 1 deletion R/broken_stick.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ brokenstick = function(x, k = 10, method = "strict", plot = TRUE) {
# Tidy data frame
# brokenstick = brokenstick[!(is.na(df$p1) | is.na(df$p2) | is.na(df$p3) | is.na(df$p4) | is.na(df$p5) | df(brokenstick$p6) | is.na(df$p7) | is.na(df$p8) | is.na(df$p9) | is.na(df$p10)),]
brokenstick = melt(df)
brokenstick$sample = paste(brokenstick$set, brokenstick$chromosome, sep = "_")
brokenstick$sample = paste(brokenstick$set, brokenstick$name, sep = "_")
colnames(brokenstick)=c("set","chromosome","segment","proportion.length","sample")

# Set a vector of gradient color
Expand Down
86 changes: 74 additions & 12 deletions R/comparative_marey_map.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ compute_stats_marey = function(x,
#'
#' @param x a `comparative_marey_map` object
#' @param group the grouping factor in `ggplot`, either `set`, `map` or `set + map`
#' @param relative_distance_x logical, whether to plot the X axis as relative distances (0,1) to compare different genomes
#' @param relative_distance_y logical, whether to plot the Y axis as relative distances (0,1) to compare different genetic maps
#' @param ... arguments passed to the generic summary function.
#'
#' @import ggplot2
Expand All @@ -164,49 +166,84 @@ compute_stats_marey = function(x,
#' @return a `ggplot2` object of Marey maps
#' @export
#'
plot_comparative_marey = function(x, group = 'set + map', ...) {
plot_comparative_marey = function(x,
group = 'set + map',
relative_distance_x = FALSE,
relative_distance_y = FALSE, ...) {

df = comparative_marey_to_dataframe(x)

scaling = 10^6 # Convert bp to Mb

# Relative distances
if (relative_distance_x) {
df$max_phys = unlist(lapply(1:nrow(df), function(x) {max(df$phys[which(df$set == df$set[x] & df$map == df$map[x])], na.rm = T)}))
df$phys = df$phys / df$max_phys
scaling = 1
lab = labs(x = "Relative genomic position", y = "Genetic distance (cM)")
}
if (relative_distance_y) {
df$max_gen = unlist(lapply(1:nrow(df), function(x) {max(df$gen[which(df$set == df$set[x] & df$map == df$map[x])], na.rm = T)}))
df$gen = df$gen / df$max_gen
lab = labs(x = "Genomic position (Mb)", y = "Relative genetic distance")
}
if (relative_distance_x & relative_distance_y){
lab = labs(x = "Relative genomic position", y = "Relative genetic distance")
}
if (!relative_distance_x & !relative_distance_y){
lab = labs(x = "Genomic position (Mb)", y = "Genetic distance (cM)")
}

# Add the Marey interpolated function
marey = comparative_interpolation_to_dataframe(x)
if (relative_distance_x) {
marey$max_phys = unlist(lapply(1:nrow(marey), function(x) {max(marey$physicalPosition[which(marey$set == marey$set[x] & marey$map == marey$map[x])], na.rm = T)}))
marey$physicalPosition = marey$physicalPosition / marey$max_phys
}
if (relative_distance_y) {
marey$max_gen = unlist(lapply(1:nrow(marey), function(x) {max(marey$upperGeneticPositioncM[which(marey$set == marey$set[x] & marey$map == marey$map[x])], na.rm = T)}))
marey$geneticPositioncM = marey$geneticPositioncM / marey$max_gen
marey$upperGeneticPositioncM = marey$upperGeneticPositioncM / marey$max_gen
marey$lowerGeneticPositioncM = marey$lowerGeneticPositioncM / marey$max_gen
}


if (group == 'set') {
grouping = as.formula(~as.factor(.data$set))
facet = facet_wrap(grouping, scales = "free")
point_rec = geom_point(aes(colour = as.factor(.data$map), fill = as.factor(.data$map)), alpha = 0.2)
line_rec = geom_line(data = marey, aes(x = .data$physicalPosition/10^6, y = .data$geneticPositioncM, group = as.factor(.data$map)), fill = "black")
ribbon_rec = geom_ribbon(data = marey, aes(x = .data$physicalPosition/10^6, y = .data$geneticPositioncM, ymin = .data$lowerGeneticPositioncM, ymax = .data$upperGeneticPositioncM, group = as.factor(.data$map)),
line_rec = geom_line(data = marey, aes(x = .data$physicalPosition/scaling, y = .data$geneticPositioncM, group = as.factor(.data$map)), fill = "black")
ribbon_rec = geom_ribbon(data = marey, aes(x = .data$physicalPosition/scaling, y = .data$geneticPositioncM, ymin = .data$lowerGeneticPositioncM, ymax = .data$upperGeneticPositioncM, group = as.factor(.data$map)),
alpha = 0.4)

}
if (group == 'map') {
grouping = as.formula(~ as.factor(.data$map))
facet = facet_wrap(grouping, scales = "free")
point_rec = geom_point(aes(colour = as.factor(.data$set), fill = as.factor(.data$set)), alpha = 0.2)
line_rec = geom_line(data = marey, aes(x = .data$physicalPosition/10^6, y = .data$geneticPositioncM, group = as.factor(.data$set)), fill = "black")
ribbon_rec = geom_ribbon(data = marey, aes(x = .data$physicalPosition/10^6, y = .data$geneticPositioncM, ymin = .data$lowerGeneticPositioncM, ymax = .data$upperGeneticPositioncM, group = as.factor(.data$set)),
line_rec = geom_line(data = marey, aes(x = .data$physicalPosition/scaling, y = .data$geneticPositioncM, group = as.factor(.data$set)), fill = "black")
ribbon_rec = geom_ribbon(data = marey, aes(x = .data$physicalPosition/scaling, y = .data$geneticPositioncM, ymin = .data$lowerGeneticPositioncM, ymax = .data$upperGeneticPositioncM, group = as.factor(.data$set)),
alpha = 0.4)
}
if (group == 'set + map') {
grouping = as.formula(~as.factor(.data$map) + as.factor(.data$set))
facet = facet_grid(grouping, scales = "free")
point_rec = geom_point(alpha = 0.2)
line_rec = geom_line(data = marey, aes(x = .data$physicalPosition/10^6, y = .data$geneticPositioncM), colour = "black")
ribbon_rec = geom_ribbon(data = marey, aes(x = .data$physicalPosition/10^6, y = .data$geneticPositioncM, ymin = .data$lowerGeneticPositioncM, ymax = .data$upperGeneticPositioncM),
line_rec = geom_line(data = marey, aes(x = .data$physicalPosition/scaling, y = .data$geneticPositioncM), colour = "black")
ribbon_rec = geom_ribbon(data = marey, aes(x = .data$physicalPosition/scaling, y = .data$geneticPositioncM, ymin = .data$lowerGeneticPositioncM, ymax = .data$upperGeneticPositioncM),
fill = "darkorange", colour = "darkorange", alpha = 0.3)
}


if (nrow(marey) > 0) {
marey$vld = TRUE

p = ggplot2::ggplot(data = df, aes(x = .data$phys/10^6, y = .data$gen)) +
p = ggplot2::ggplot(data = df, aes(x = .data$phys/scaling, y = .data$gen)) +
point_rec +
line_rec +
ribbon_rec +
facet +
labs(x = "Genomic position (Mb)", y = "Genetic distance (cM)") +
lab +
theme(axis.line = element_line(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
Expand All @@ -219,7 +256,7 @@ plot_comparative_marey = function(x, group = 'set + map', ...) {
axis.text=element_text(colour="black"),
legend.position = "right")
} else {
p = ggplot2::ggplot(data = df, aes(x = .data$phys/10^6, y = .data$gen)) +
p = ggplot2::ggplot(data = df, aes(x = .data$phys/scaling, y = .data$gen)) +
point_rec +
facet +
labs(x = "Genomic position (Mb)", y = "Genetic distance (cM)", colour = "dataset") +
Expand All @@ -245,13 +282,18 @@ plot_comparative_marey = function(x, group = 'set + map', ...) {
#'
#' @param x a `comparative_marey_map` object
#' @param group the grouping factor in `ggplot`, either `set`, `map` or `set + map`
#' @param relative_distance_x logical, whether to plot the X axis as relative distances (0,1) to compare different genomes
#' @param relative_distance_y logical, whether to plot the Y axis as relative distances (0,1) to compare maps with different average recombination rates
#'
#' @return a `ggplot2` object of Marey maps
#' @export
#'
#' @import ggplot2
#'
plot_comparative_recmap = function(x, group = 'set + map') {
plot_comparative_recmap = function(x,
group = 'set + map',
relative_distance_x = FALSE,
relative_distance_y = FALSE) {

x = comparative_recmap_to_dataframe(x)

Expand All @@ -261,6 +303,26 @@ plot_comparative_recmap = function(x, group = 'set + map') {
x$upperRecRate = x$upperRecRate * 10^6
x$lowerRecRate = x$lowerRecRate * 10^6

# Relative distances
if (relative_distance_x) {
x$max_phys = unlist(lapply(1:nrow(x), function(y) {max(x$end[which(x$set == x$set[y] & x$map == x$map[y])], na.rm = T)}))
x$point = x$point / x$max_phys
lab = labs(x = "Relative genomic position", y = "Recombination rate (cM/Mb)")
}
if (relative_distance_y) {
x$max_rec = unlist(lapply(1:nrow(x), function(y) {max(x$upperRecRate[which(x$set == x$set[y] & x$map == x$map[y])], na.rm = T)}))
x$recRate = x$recRate / x$max_rec
x$upperRecRate = x$upperRecRate / x$max_rec
x$lowerRecRate = x$lowerRecRate / x$max_rec
lab = labs(x = "Genomic position (Mb)", y = "Relative recombination rate")
}
if (relative_distance_x & relative_distance_y){
lab = labs(x = "Relative genomic position", y = "Recombination rate")
}
if (!relative_distance_x & !relative_distance_y){
lab = labs(x = "Genomic position (Mb)", y = "Recombination rate (cM/Mb)")
}

if (group == 'set') {
grouping = as.formula(~as.factor(.data$set))
facet = facet_wrap(grouping, scales = "free")
Expand All @@ -286,7 +348,7 @@ plot_comparative_recmap = function(x, group = 'set + map') {
# facet_grid(~as.factor(set)) +
# facet_wrap(grouping, scales = "free") +
facet +
labs(x = "Genomic position (Mb)", y = "Recombination rate (cM/Mb)") +
lab +
theme(axis.line = element_line(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
Expand Down
12 changes: 11 additions & 1 deletion man/plot_comparative_marey.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 10 additions & 1 deletion man/plot_comparative_recmap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 6 additions & 36 deletions vignettes/RecombinationMap.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -640,52 +640,22 @@ lorenz(compar)
The Broken stick model, as described in Brazier and Glémin (2022), is also an appropriate representation to compare the distribution of recombination rates along the genome across a large set of Marey maps.


```{r brokenStick, message=FALSE, warning=FALSE, echo=TRUE, fig.height = 4, fig.width = 8, fig.align="center", fig.cap = "The broken stick model applied to *Arabidopsis thaliana* and maize."}
```{r brokenStick, message=FALSE, warning=FALSE, echo=TRUE, fig.height = 6, fig.width = 8, fig.align="center", fig.cap = "The broken stick model applied to *Arabidopsis thaliana* and maize."}
brokenstick(compar)
```


Moreover, to compare between species, it can be interesting to use relative genomic positions and relative recombination rates, instead of absolute ones.


```{r relativeCompare, message=FALSE, warning=FALSE, echo=TRUE, eval=FALSE, fig.height = 4, fig.width = 8, fig.align="center", fig.cap = "Comparative recombination landscapes between Arabidopsis chromosome 1 and maize chromosome 1. Genomic distances are relative distances scaled by the total chromosome size."}
relative_arabido = res$recMap
relative_maize = maize$recMap
relative_arabido$relative_start = relative_arabido$start / res$chromosomeLength
relative_arabido$relative_end = relative_arabido$end / res$chromosomeLength
relative_arabido$relative_recRate = relative_arabido$recRate / max(relative_arabido$recRate, na.rm = TRUE)
relative_arabido$relative_lowerRecRate = relative_arabido$lowerRecRate / max(relative_arabido$lowerRecRate, na.rm = TRUE)
relative_arabido$relative_upperRecRate = relative_arabido$upperRecRate / max(relative_arabido$upperRecRate, na.rm = TRUE)
relative_maize$relative_start = relative_maize$start / maize$chromosomeLength
relative_maize$relative_end = relative_maize$end / maize$chromosomeLength
relative_maize$relative_recRate = relative_maize$recRate / max(relative_maize$recRate, na.rm = TRUE)
relative_maize$relative_lowerRecRate = relative_maize$lowerRecRate / max(relative_maize$lowerRecRate, na.rm = TRUE)
relative_maize$relative_upperRecRate = relative_maize$upperRecRate / max(relative_maize$upperRecRate, na.rm = TRUE)
df = rbind(relative_arabido,
relative_maize)
df$point = (df$relative_start + df$relative_end)/2
ggplot(data = df, aes(x = point, y = recRate)) +
geom_line(aes(group = set, color = set)) +
geom_ribbon(aes(x = point, ymin = lowerRecRate, ymax = upperRecRate, fill = set), alpha = 0.2) +
labs(x = "Relative genomic position", y = "Recombination rate (cM/Mb)")
ggplot(data = df, aes(x = point, y = relative_recRate)) +
geom_line(aes(group = set, color = set)) +
geom_ribbon(aes(x = point, ymin = relative_lowerRecRate, ymax = relative_upperRecRate, fill = set), alpha = 0.2) +
labs(x = "Relative genomic position", y = "Relative recombination rate")
```{r relativeCompareMarey, message=FALSE, warning=FALSE, echo=TRUE, eval=FALSE, fig.height = 4, fig.width = 8, fig.align="center", fig.cap = "Comparative marey maps between *Arabidopsis* and maize. Genomic distances are relative distances scaled by the total chromosome size. Genetic distances are scaled by the max genetic distance."}
plot_comparative_marey(compar, group = 'set', relative_distance_x = T, relative_distance_y = T)
```



```{r relativeCompareRec, message=FALSE, warning=FALSE, echo=TRUE, eval=FALSE, fig.height = 4, fig.width = 8, fig.align="center", fig.cap = "Comparative recombination landscapes between Arabidopsis chromosome 1 and maize chromosome 1. Genomic distances are relative distances scaled by the total chromosome size. Recombination rates are scaled by the max recombination rate."}
plot_comparative_recmap(compar, group = 'set', relative_distance_x = T, relative_distance_y = T)
```



Expand Down

0 comments on commit 71fc6f2

Please sign in to comment.