Skip to content

Commit

Permalink
Merge pull request #154 from BvAdrichem/Improved-documentation
Browse files Browse the repository at this point in the history
Improved documentation
  • Loading branch information
thackl authored Jun 16, 2023
2 parents d196250 + a630ed8 commit f90efab
Show file tree
Hide file tree
Showing 21 changed files with 687 additions and 67 deletions.
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ export(genes)
export(geom_bin_label)
export(geom_coverage)
export(geom_feat)
export(geom_feat_label)
export(geom_feat_note)
export(geom_feat_tag)
export(geom_feat_text)
Expand All @@ -123,7 +122,6 @@ export(geom_gene_note)
export(geom_gene_tag)
export(geom_gene_text)
export(geom_link)
export(geom_link_label)
export(geom_seq)
export(geom_seq_label)
export(geom_variant)
Expand Down
125 changes: 118 additions & 7 deletions R/geom.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,72 @@
#' draw seqs
#'
#' @param data seq_layout
#' @description
#' `geom_seq()` draws contigs for each sequence/chromosome supplied in the `seqs` track.
#' Several sequences belonging to the same bin will be plotted next to one another.
#'
#' If `seqs` track is empty, sequences are inferred from the `feats` or `links` track respectively.
#'
#' (*The length of sequences can be deduced from the axis and is typically indicated in base pairs.*)
#'
#' @details
#' `geom_seq()` uses `ggplot2::geom_segment()` under the hood. As a result,
#' different aesthetics such as *alpha*, *linewidth*, *color*, etc.
#' can be called upon to modify the visualization of the data.
#'
#' Note: The `seqs` track indicates the length/region of the sequence/contigs that will be plotted.
#' *Feats* or *links* data that falls outside of this region are ignored!
#' @returns Sequence data drawn as contigs is added as a layer/component to the plot.
#'
#' @param data seq_layout: Uses the first data frame stored in the `seqs` track, by default.
#' @param arrow set to non-NULL to generate default arrows
#' @inheritParams ggplot2::geom_segment
#' @importFrom ggplot2 geom_segment
#' @export
#' @examples
#' # Simple example of geom_seq
#' gggenomes(seqs = emale_seqs) +
#' geom_seq() + #creates contigs
#' geom_bin_label() #labels bins/sequences
#'
#' # No sequence information supplied, will inform/warn that seqs are inferred from feats.
#' gggenomes(genes = emale_genes) +
#' geom_seq() + #creates contigs
#' geom_gene() + #draws genes on top of contigs
#' geom_bin_label() #labels bins/sequences
#'
#' # Sequence data controls what sequences and/or regions will be plotted. Here one sequence is filtered out,
#' # Notice that the genes of the removed sequence are silently ignored and thus not plotted.
#' missing_seqs <- emale_seqs |> filter(seq_id != "Cflag_017B") |> arrange(seq_id) #`arrange` to restore alphabetical order.
#'
#' gggenomes(seqs = missing_seqs, genes = emale_genes) +
#' geom_seq() + #creates contigs
#' geom_gene() + #draws genes on top of contigs
#' geom_bin_label() #labels bins/sequences
#'
#' # Several sequences belonging to the same *bin* are plotted next to one another
#' seqs <- tibble(
#' bin_id = c("A", "A", "A", "B", "B", "B", "B", "C", "C"),
#' seq_id = c("A1", "A2", "A3", "B1", "B2", "B3", "B4", "C1", "C2"),
#' start = c(0, 100, 200, 0, 50, 150, 250, 0, 400),
#' end = c(100, 200, 400, 50, 100, 250, 300, 300, 500),
#' length = c(100, 100, 200, 50, 50, 100, 50, 300, 100))
#'
#' gggenomes(seqs = seqs) +
#' geom_seq() +
#' geom_bin_label() + #label bins
#' geom_seq_label() #label individual sequences
#'
#' # Wrap bins uptill a certain amount.
#' gggenomes(seqs = seqs, wrap=300) +
#' geom_seq() +
#' geom_bin_label() + #label bins
#' geom_seq_label() #label individual sequences
#'
#' # Change the space between sequences belonging to one bin
#' gggenomes(seqs = seqs, spacing = 100) +
#' geom_seq() +
#' geom_bin_label() + #label bins
#' geom_seq_label() #label individual sequences
geom_seq <- function(mapping = NULL, data = seqs(),
arrow = NULL, ...){

Expand All @@ -18,8 +80,47 @@ geom_seq <- function(mapping = NULL, data = seqs(),
geom_segment(mapping = mapping, data = data, arrow = arrow, ...)
}

#' draw feat labels
#' Draw seq labels
#' @description
#' This function will put labels at each individual sequence.
#' By default it will plot the `seq_id` as label, but users are able to change this manually.
#'
#' Position of the label/text can be adjusted with the different arguments (e.g. `vjust`, `hjust`, `angle`, etc.)
#'
#' @details
#' This labeling function uses [ggplot2::geom_text()] under the hood.
#' Any changes to the aesthetics of the text can be performed in a ggplot2 manner.
#'
#'
#' @inheritParams geom_gene_text
#' @examples
#' # example data
#' seqs <- tibble(
#' bin_id = c("A", "A", "A", "B", "B", "B", "B", "C", "C"),
#' seq_id = c("A1", "A2", "A3", "B1", "B2", "B3", "B4", "C1", "C2"),
#' start = c(0, 100, 200, 0, 50, 150, 250, 0, 400),
#' end = c(100, 200, 400, 50, 100, 250, 300, 300, 500),
#' length = c(100, 100, 200, 50, 50, 100, 50, 300, 100))
#'
#' # example plot using geom_seq_label
#' gggenomes(seqs = seqs) +
#' geom_seq() +
#' geom_seq_label()
#'
#' # changing default label to `length` column
#' gggenomes(seqs = seqs) +
#' geom_seq() +
#' geom_seq_label(aes(label=length))
#'
#' # with horizontal adjustment
#' gggenomes(seqs = seqs) +
#' geom_seq() +
#' geom_seq_label(hjust = -5)
#'
#' # with wrapping at 300
#' gggenomes(seqs=seqs, wrap = 300) +
#' geom_seq() +
#' geom_seq_label()
#' @export
geom_seq_label <- function(mapping = NULL, data = seqs(),
hjust = 0, vjust = 1, nudge_y = -0.15, size = 2.5, ...){
Expand Down Expand Up @@ -91,8 +192,20 @@ geom_bin_label <- function(mapping = NULL, data=bins(), hjust = 1, size = 3,
}
r
}
#' draw feat labels
#' Draw feat/link labels
#'
#' @description
#' These `geom_..._label()` functions able the user to plot labels/text at individual features and/or links.
#' Users have to indicate how to label the features/links by specifying `label = ...` or `aes(label = ...`
#'
#' Position of labels can be adjusted with arguments such as `vjust`, `hjust`, `angle`, `nudge_y`, etc.
#' Also check out [gggenomes::geom_bin_label()], [gggenomes::geom_seq_label()] or [gggenomes::geom_feat_text()] given their resemblance.
#'
#' @details
#' These labeling functions use [ggplot2::geom_text()] under the hood.
#' Any changes to the aesthetics of the text can be performed in a ggplot2 manner.
#'
#' @inheritParams geom_gene_text
#' @export
geom_gene_label <- function(mapping = NULL, data = genes(),
angle = 45,hjust = 0, nudge_y = 0.1, size = 6, ...){
Expand All @@ -103,7 +216,7 @@ geom_gene_label <- function(mapping = NULL, data = genes(),
geom_text(mapping = mapping, data = data, angle = angle, hjust = hjust,
nudge_y = nudge_y, size = size, ...)
}
#' @export
#' @rdname geom_gene_label
geom_feat_label <- function(mapping = NULL, data = feats(),
angle = 45,hjust = 0, nudge_y = 0.1, size = 6, ...){

Expand All @@ -114,9 +227,7 @@ geom_feat_label <- function(mapping = NULL, data = feats(),
nudge_y = nudge_y, size = size, ...)
}

#' draw link labels
#'
#' @export
#' @rdname geom_gene_label
geom_link_label <- function(mapping = NULL, data = links(),
angle = 0,hjust = 0.5, vjust = 0.5, size = 4, repel=FALSE, ...){

Expand Down
19 changes: 13 additions & 6 deletions R/geom_feat.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
#' Draw feats
#'
#' Geom_feat allows the user to draw (additional) features to the plot/graph.
#' For example, specific regions within a sequence can be highlighted by color, size, etc..
#' The function uses data from the feats' track.
#'@description
#' `geom_feat()` allows the user to draw (additional) features to the plot/graph.
#' For example, specific regions within a sequence (e.g. transposons, introns, mutation hotspots)
#' can be highlighted by color, size, etc..
#'
#' geom_feat uses `ggplot2::geom_segment` under the hood. As a result, different aesthetics such as `alpha`, `linewidth`, `color`, etc.
#' can be called upon to modify the data visualization.
#' @details
#' `geom_feat` uses `ggplot2::geom_segment` under the hood. As a result,
#' different aesthetics such as *alpha*, *linewidth*, *color*, etc.
#' can be called upon to modify the visualization of the data.
#'
#' @param data feat_layout
#' *By default, the function uses the first feature track.*
#'
#' @param data feat_layout: Uses first data frame stored in the `feats` track by default.
#' @param position describes how the position of different plotted features are adjusted. By default it uses `"pile"`,
#' but different ggplot2 position adjustments, such as `"identity` or `"jitter"` can be used as well.
#' @inheritParams ggplot2::geom_segment
#' @importFrom ggplot2 geom_segment
#' @export
#' @examples
#' # Plotting data from the feats' track with adjusted linewidth and color
Expand Down
63 changes: 62 additions & 1 deletion R/geom_gene_text.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,69 @@
#' Add text to genes, features, etc.
#'
#'
#' @description
#' The functions below are useful for labeling features/genes in plots.
#' Users have to call on `aes(label = ...)` or `(label = ...) `to define label's text
#' Based on the function, the label will be placed at a specific location:
#' - `geom_..._text()` will plot **text in the middle of the feature**.
#' - `geom_..._tag()` will plot **text on top of the feature, with a 45 degree angle**.
#' - `geom_..._note()` will plot **text under the feature at the left side**.
#'
#' *The `...` can be either replaced with `feat` or `gene` depending on which*
#' *track the user wants to label.*
#'
#'
#' With arguments such as `hjust`, `vjust`, `angle`, and `nudge_y`, the user
#' can also manually change the position of the text.
#'
#' @details
#' These labeling functions use `ggplot2::geom_text()` under the hood.
#' Any changes to the aesthetics of the text can be performed in a ggplot2 manner.
#' @param hjust Moves the text horizontally
#' @param vjust Moves the text vertically
#' @param nudge_y Moves the text vertically an entire contig/sequence.
#' (e.g. `nudge_y = 1` places the text to the contig above)
#' @param angle Defines the angle in which the text will be placed. *Note
#'
#' @inheritParams ggplot2::geom_text
#' @export
#' @examples
#' # example data
#' genes <- tibble(
#' seq_id = c("A", "A", "A", "B", "B", "C"),
#' start = c(20, 40, 80, 30, 10, 60),
#' end = c(30, 70, 85, 40, 15, 90),
#' feat_id = c("A1", "A2", "A3", "B1", "B2", "C1"),
#' type = c("CDS", "CDS", "CDS", "CDS", "CDS", "CDS"),
#' name = c("geneA", "geneB", "geneC", "geneA", "geneC", "geneB"))
#'
#' seqs <- tibble(
#' seq_id = c("A", "B", "C"),
#' start = c(0,0,0),
#' end = c(100, 100, 100),
#' length = c(100, 100, 100))
#'
#' # basic plot creation
#' plot <- gggenomes(seqs=seqs, genes=genes) +
#' geom_bin_label() +
#' geom_gene()
#'
#' # geom_..._text
#' plot + geom_gene_text(aes(label=name))
#'
#' # geom_..._tag
#' plot + geom_gene_tag(aes(label=name))
#'
#' # geom_..._note
#' plot + geom_gene_note(aes(label=name))
#'
#' # with horizontal adjustment (`hjust`), vertical adjustment (`vjust`)
#' plot + geom_gene_text(aes(label=name), vjust = -2, hjust = 1)
#'
#' # using `nudge_y` and and `angle` adjustment
#' plot + geom_gene_text(aes(label=name), nudge_y= 1, angle = 10)
#'
#' # labeling with manual input
#' plot + geom_gene_text(label = c("This", "is", "an", "example", "test", "test"))
geom_feat_text <- function(mapping = NULL, data = feats(), stat="identity", position="identity",
..., parse = FALSE, check_overlap = FALSE, na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE){
Expand Down
19 changes: 15 additions & 4 deletions R/geom_link.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,28 @@
#' Draw links
#'
#' Note that by default only links between adjacent genomes are computed and
#' shown. Set `gggenomes(..., adjacent_only=TRUE)` to compute and show all links
#' between all genomes.
#'
#' @description
#' `geom_link()` allows the user to link loci/regions between two sequences/genomes with one another.
#'
#' *Note that by default only links between adjacent sequences are computed and shown.*
#' *To compute and show all links between all genomes, set `gggenomes(..., adjacent_only=FALSE)`.*
#'
#' @details
#' The function calls upon the data stored within the `link` track. Data frames added to
#' this track have `seq_id` and `seq_id2` as required variables. Optional and recommended variables include
#' `start`, `start2`, `end`, `end2`, `bin_id`, `bin_id2` and `strand`.
#'
#' *Keep in mind: when start/end is not specified, links will be created between the entire contigs of `seq_id` and `seq_id2`*
#' @param offset distance between seq center and link start. Use two values
#' `c(<offset_top>, <offset_bottom>)` for different top and bottom offsets
#' @export
#' @examples
#' p <- gggenomes(seqs=emale_seqs, links = emale_ava) + geom_seq()
#' p + geom_link()
#'
#' # change offset from seqs
#' p + geom_link(aes(fill=de, color=de), offset = 0.05) +
#' scale_fill_viridis_b() + scale_colour_viridis_b()
#'
#' # combine with flip
#' p %>% flip(3,4,5) + geom_link()
#' # compute & show all links among all genomes (not recommended for large dataset)
Expand Down
24 changes: 24 additions & 0 deletions R/gggenomes.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
#' Plot genomes, features and synteny maps
#'
#' @description
#' `gggenomes()` initializes a gggenomes-flavored ggplot object.
#' It is used to declare the input data for gggenomes' track system.
#'
#' (*See for more details on the track system, gggenomes vignette or the Details/Arguments section*)
#'
#'
#' @details
#' `gggenomes::gggenomes()` resembles the functionality of `ggplot2::ggplot()`.
#' It is used to construct the initial plot object, and is often followed by "+" to add components to the plot (*e.g. "+ geom_gene()"*).
#'
#' A big difference between the two is that gggenomes has a multi-track setup (*`'seqs'`, `'feats'`, `'genes'` and `'links'`*).
#' `gggenomes()` pre-computes a layout and adds coordinates (`y,x,xend`) to each data frame prior to the actual plot construction.
#' This has some implications for the usage of gggenomes:
#' - **Data frames for tracks have required variables.** These predefined variables are used during import
#' to compute x/y coordinates (*see arguments*).
#' - **gggenomes' geoms can often be used without explicit `aes()` mappings** This works because
#' we always know the names of the plot variables ahead of time: they originate from the pre-computed layout,
#' and we can use that information to set sensible default aesthetic mappings for most cases.
#'
#' @param genes,feats A data.frame, a list of data.frames, or a character vector
#' with paths to files containing gene data. Each item is added as feature
Expand Down Expand Up @@ -35,6 +55,10 @@
#' By default subregions of sequences from the first to the last feat/link
#' are generated. Set `infer_start` to 0 to show all sequences from their
#' true beginning.
#' @param adjacent_only Indicates whether links should be created between adjacent sequences/chromosomes only.
#' By default it is set to `adjacent_only = TRUE`. If `FALSE`, links will be created between all sequences
#'
#' (*not recommended for large data sets*)
#' @inheritParams layout_seqs
#' @param theme choose a gggenomes default theme, NULL to omit.
#' @param .layout a pre-computed layout from [layout_genomes()]. Useful for
Expand Down
17 changes: 17 additions & 0 deletions R/shift.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,23 @@
#' @param bins to shift left/right, select-like expression
#' @param by shift each bin by this many bases. Single value or vector of the
#' same length as bins.
#' @examples
#' # Basic example plot
#' gggenomes(seqs = emale_seqs) |>
#' geom_seq() +
#' geom_bin_label()
#'
#' # All bins have been shifted 10000 basepaiers
#' gggenomes(seqs = emale_seqs) |>
#' shift(bins = everything(), by = 10000)
#' geom_seq() +
#' geom_bin_label()
#'
#' # Only RCC970_016B bin has been shifted for 5000 basepaiers
#' gggenomes(seqs = emale_seqs) |>
#' shift(bins = RCC970_016B, by = 5000, center = FALSE)
#' geom_seq() +
#' geom_bin_label()
#' @export
shift <- function(x, bins=everything(), by=0, center=FALSE){
# split by bin_id and select bins
Expand Down
15 changes: 13 additions & 2 deletions R/tracks.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,19 @@ track_ids.gggenomes_layout <- function(x, track_type=c("seqs", "feats", "links")

#' Basic info on tracks in a gggenomes object
#'
#' Call on a gggenomes or gggenomes_layout object to get a short tibble
#' with ids, types, index and size of loaded tracks.
#' Use `track_info()` to call on a gggenomes or gggenomes_layout object to return a short tibble
#' with ids, types, index and size of the loaded tracks.
#'
#' @details The short tibble contains basic information on the tracks within the entered gggenomes object.
#' - **id** : Shows original name of inputted data frame (only when more than one data frames are present in a track).
#' - **type** : The track in which the data frame is present.
#' - **i** (index) : The chronological order of data frames in a specific track.
#' - **n** (size) : Amount of objects **plotted** from the data frame.
#' (**not** the amount of objects *in* the inputted data frame)
#' @examples
#' gggenomes(seqs = emale_seqs, feats= list(emale_genes, emale_tirs, emale_ngaros), links = emale_ava) |>
#' track_info()
#' @return Short tibble with ids, types, index and size of loaded tracks.
#' @export
#' @inheritParams track_ids
track_info <- function(x, ...){
Expand Down
Loading

0 comments on commit f90efab

Please sign in to comment.