From 14d38560187bee6f41bfccc3219e007118b344db Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 25 Feb 2022 07:47:03 +0000 Subject: [PATCH] bridgeRegions maps unaligned regions flanked by colinear regions. Fixes #13 --- DESCRIPTION | 1 + NAMESPACE | 1 + NEWS.md | 2 ++ R/bridgeRegions.R | 53 ++++++++++++++++++++++++++++++ man/GOC.Rd | 1 + man/bridgeRegions.Rd | 62 +++++++++++++++++++++++++++++++++++ man/chain_contigs.Rd | 1 + man/coalesce_contigs.Rd | 2 ++ man/dist2next.Rd | 1 + man/filterColinearRegions.Rd | 1 + man/flagColinearAlignments.Rd | 1 + man/flagPairs.Rd | 1 + man/forceSeqLengths.Rd | 1 + man/guessSeqLengths.Rd | 1 + man/mergeSeqLevels.Rd | 1 + man/reverse.Rd | 1 + man/swap.Rd | 1 + 17 files changed, 132 insertions(+) create mode 100644 R/bridgeRegions.R create mode 100644 man/bridgeRegions.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 8a8217c..ad2eac5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,6 +52,7 @@ Collate: 'bp_coverage.R' 'bp_heatmap.R' 'bp_pair_analysis.R' + 'bridgeRegions.R' 'chain_contigs.R' 'cleanGaps.R' 'zipWithNext.R' diff --git a/NAMESPACE b/NAMESPACE index 90a6c06..b9c0e89 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(as) export(bp_coverage) export(bp_heatmap) export(bp_pair_analysis) +export(bridgeRegions) export(chain_contigs) export(cleanGaps) export(coalesce_contigs) diff --git a/NEWS.md b/NEWS.md index b3e5fc9..a95119d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,6 +15,8 @@ will call `getSeq()` automatically. * New "mid" option to the `direction` argument of `get_bps()`, to output breakpoints at mid-distance between ranges. + * New `bridgeRegions()` function that maps unaligned regions of the _target_ to + the _query_ genome when they are flanked by colinear regions. ## Backwards-incompatible changes diff --git a/R/bridgeRegions.R b/R/bridgeRegions.R new file mode 100644 index 0000000..681a11c --- /dev/null +++ b/R/bridgeRegions.R @@ -0,0 +1,53 @@ +#' Bridge regions +#' +#' Maps unaligned regions of the _target_ to the _query_ genome when they are +#' flanked by colinear regions. +#' +#' @note Because some aligned regions can be directly adjacent (no gaps), the +#' returned `GBreaks` object may contain ranges of width zero, where the _start_ +#' coordinate is 1 nucleotide higher than the _end_ coordinate. +#' +#' @references Bridge regions have also been called \dQuote{simultaneous gaps} +#' in the comparison of the mouse and human genomes by Kent WJ, Baertsch R, +#' Hinrichs A, Miller W, Haussler D. (_Evolution's cauldron: duplication, +#' deletion, and rearrangement in the mouse and human genomes._ Proc Natl Acad +#' Sci U S A. 2003;100(20):11484-11489. doi:10.1073/pnas.1932072100) +#' +#' @param gb A [`GBreaks`] object. +#' +#' @return Returns a new `GBreaks` object of shorter length. +#' +#' @family Colinearity functions +#' @family modifier functions +#' +#' @author Charles Plessy +#' +#' @examples +#' exampleColinear5 +#' bridgeRegions(exampleColinear5) +#' +#' # Note the zero-width ranges when aligned regions are directly adjacent. +#' exampleColinear3 +#' bridgeRegions(exampleColinear3) +#' +#' @export + +bridgeRegions <- function(gb) { + # Collect colinear regions and discard the rest + colinearRegions <- filterColinearRegions(flagColinearAlignments(gb), rename = FALSE) + # Turn the runs of [(TRUE)n, FALSE]n into indices identifying each colinear region + idx <- c(0, head(cumsum(!colinearRegions$colinear), -1)) + # Split into a GRangesList + gbl <- split(colinearRegions, idx) + # Collect gap ranges in the target and query genomes of each colinear regions. + br <- endoapply(gbl, \(gb) { + # Check strand + onMinus <- all(strand(gb) == "-") + # Need to subtract 1 temporarly because some ranges are adjacent (no gap). + GBreaks(target = cleanGaps(gb - 1) -1, + query = cleanGaps(gb$query -1) -1 |> sort(decreasing = onMinus)) + }) |> unlist() + # Remove names and return + names(br) <- NULL + br +} diff --git a/man/GOC.Rd b/man/GOC.Rd index 759f73b..65d2bbf 100644 --- a/man/GOC.Rd +++ b/man/GOC.Rd @@ -49,6 +49,7 @@ loss of gene order in bacteria.} \emph{Trends in genetics : TIG} vol. 19,11 } \seealso{ Other Colinearity functions: +\code{\link{bridgeRegions}()}, \code{\link{chain_contigs}()}, \code{\link{coalesce_contigs}()}, \code{\link{dist2next}()}, diff --git a/man/bridgeRegions.Rd b/man/bridgeRegions.Rd new file mode 100644 index 0000000..3295ff4 --- /dev/null +++ b/man/bridgeRegions.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bridgeRegions.R +\name{bridgeRegions} +\alias{bridgeRegions} +\title{Bridge regions} +\usage{ +bridgeRegions(gb) +} +\arguments{ +\item{gb}{A \code{\link{GBreaks}} object.} +} +\value{ +Returns a new \code{GBreaks} object of shorter length. +} +\description{ +Maps unaligned regions of the \emph{target} to the \emph{query} genome when they are +flanked by colinear regions. +} +\note{ +Because some aligned regions can be directly adjacent (no gaps), the +returned \code{GBreaks} object may contain ranges of width zero, where the \emph{start} +coordinate is 1 nucleotide higher than the \emph{end} coordinate. +} +\examples{ +exampleColinear5 +bridgeRegions(exampleColinear5) + +# Note the zero-width ranges when aligned regions are directly adjacent. +exampleColinear3 +bridgeRegions(exampleColinear3) + +} +\references{ +Bridge regions have also been called \dQuote{simultaneous gaps} +in the comparison of the mouse and human genomes by Kent WJ, Baertsch R, +Hinrichs A, Miller W, Haussler D. (\emph{Evolution's cauldron: duplication, +deletion, and rearrangement in the mouse and human genomes.} Proc Natl Acad +Sci U S A. 2003;100(20):11484-11489. doi:10.1073/pnas.1932072100) +} +\seealso{ +Other Colinearity functions: +\code{\link{GOC}()}, +\code{\link{chain_contigs}()}, +\code{\link{coalesce_contigs}()}, +\code{\link{dist2next}()}, +\code{\link{filterColinearRegions}()}, +\code{\link{flagColinearAlignments}()}, +\code{\link{flagPairs}()} + +Other modifier functions: +\code{\link{coalesce_contigs}()}, +\code{\link{forceSeqLengths}()}, +\code{\link{guessSeqLengths}()}, +\code{\link{mergeSeqLevels}()}, +\code{\link{reverse}()}, +\code{\link{swap}()} +} +\author{ +Charles Plessy +} +\concept{Colinearity functions} +\concept{modifier functions} diff --git a/man/chain_contigs.Rd b/man/chain_contigs.Rd index 4860bf4..741e144 100644 --- a/man/chain_contigs.Rd +++ b/man/chain_contigs.Rd @@ -27,6 +27,7 @@ chain_contigs(exampleInversion) \seealso{ Other Colinearity functions: \code{\link{GOC}()}, +\code{\link{bridgeRegions}()}, \code{\link{coalesce_contigs}()}, \code{\link{dist2next}()}, \code{\link{filterColinearRegions}()}, diff --git a/man/coalesce_contigs.Rd b/man/coalesce_contigs.Rd index f4514bf..4700890 100644 --- a/man/coalesce_contigs.Rd +++ b/man/coalesce_contigs.Rd @@ -62,6 +62,7 @@ coalesce_contigs(gb4) \seealso{ Other Colinearity functions: \code{\link{GOC}()}, +\code{\link{bridgeRegions}()}, \code{\link{chain_contigs}()}, \code{\link{dist2next}()}, \code{\link{filterColinearRegions}()}, @@ -69,6 +70,7 @@ Other Colinearity functions: \code{\link{flagPairs}()} Other modifier functions: +\code{\link{bridgeRegions}()}, \code{\link{forceSeqLengths}()}, \code{\link{guessSeqLengths}()}, \code{\link{mergeSeqLevels}()}, diff --git a/man/dist2next.Rd b/man/dist2next.Rd index 4a1771f..f0ec8b9 100644 --- a/man/dist2next.Rd +++ b/man/dist2next.Rd @@ -45,6 +45,7 @@ dist2next(exampleInversion, 2) \seealso{ Other Colinearity functions: \code{\link{GOC}()}, +\code{\link{bridgeRegions}()}, \code{\link{chain_contigs}()}, \code{\link{coalesce_contigs}()}, \code{\link{filterColinearRegions}()}, diff --git a/man/filterColinearRegions.Rd b/man/filterColinearRegions.Rd index 93f623f..f3c9cd4 100644 --- a/man/filterColinearRegions.Rd +++ b/man/filterColinearRegions.Rd @@ -27,6 +27,7 @@ filterColinearRegions(flagColinearAlignments(exampleColinear)) \seealso{ Other Colinearity functions: \code{\link{GOC}()}, +\code{\link{bridgeRegions}()}, \code{\link{chain_contigs}()}, \code{\link{coalesce_contigs}()}, \code{\link{dist2next}()}, diff --git a/man/flagColinearAlignments.Rd b/man/flagColinearAlignments.Rd index 55bdaa5..4fe1440 100644 --- a/man/flagColinearAlignments.Rd +++ b/man/flagColinearAlignments.Rd @@ -90,6 +90,7 @@ Other Flagging functions: Other Colinearity functions: \code{\link{GOC}()}, +\code{\link{bridgeRegions}()}, \code{\link{chain_contigs}()}, \code{\link{coalesce_contigs}()}, \code{\link{dist2next}()}, diff --git a/man/flagPairs.Rd b/man/flagPairs.Rd index 7051917..0b871c5 100644 --- a/man/flagPairs.Rd +++ b/man/flagPairs.Rd @@ -64,6 +64,7 @@ Other Inversion functions: Other Colinearity functions: \code{\link{GOC}()}, +\code{\link{bridgeRegions}()}, \code{\link{chain_contigs}()}, \code{\link{coalesce_contigs}()}, \code{\link{dist2next}()}, diff --git a/man/forceSeqLengths.Rd b/man/forceSeqLengths.Rd index 56ff52b..ae7fe41 100644 --- a/man/forceSeqLengths.Rd +++ b/man/forceSeqLengths.Rd @@ -44,6 +44,7 @@ forceSeqLengths(granges(gb)) |> seqlengths() } \seealso{ Other modifier functions: +\code{\link{bridgeRegions}()}, \code{\link{coalesce_contigs}()}, \code{\link{guessSeqLengths}()}, \code{\link{mergeSeqLevels}()}, diff --git a/man/guessSeqLengths.Rd b/man/guessSeqLengths.Rd index 4e32dd6..a5a2c03 100644 --- a/man/guessSeqLengths.Rd +++ b/man/guessSeqLengths.Rd @@ -29,6 +29,7 @@ guessSeqLengths(gb2$query) } \seealso{ Other modifier functions: +\code{\link{bridgeRegions}()}, \code{\link{coalesce_contigs}()}, \code{\link{forceSeqLengths}()}, \code{\link{mergeSeqLevels}()}, diff --git a/man/mergeSeqLevels.Rd b/man/mergeSeqLevels.Rd index b6e2474..16718b0 100644 --- a/man/mergeSeqLevels.Rd +++ b/man/mergeSeqLevels.Rd @@ -45,6 +45,7 @@ mergeSeqLevels(gb, seqlevelsInUse(gb), "AllMerged") } \seealso{ Other modifier functions: +\code{\link{bridgeRegions}()}, \code{\link{coalesce_contigs}()}, \code{\link{forceSeqLengths}()}, \code{\link{guessSeqLengths}()}, diff --git a/man/reverse.Rd b/man/reverse.Rd index 454132b..f67bed3 100644 --- a/man/reverse.Rd +++ b/man/reverse.Rd @@ -36,6 +36,7 @@ reverse(exampleInsertion, query = TRUE) See also the \code{\link[IRanges:reverse-methods]{IRanges::reverse}} function. Other modifier functions: +\code{\link{bridgeRegions}()}, \code{\link{coalesce_contigs}()}, \code{\link{forceSeqLengths}()}, \code{\link{guessSeqLengths}()}, diff --git a/man/swap.Rd b/man/swap.Rd index 1a84b09..4c3eab6 100644 --- a/man/swap.Rd +++ b/man/swap.Rd @@ -24,6 +24,7 @@ swap(exampleColinear3) } \seealso{ Other modifier functions: +\code{\link{bridgeRegions}()}, \code{\link{coalesce_contigs}()}, \code{\link{forceSeqLengths}()}, \code{\link{guessSeqLengths}()},