From 765697ad37fe6570c848aa86ebf8307056f5e87d Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Dec 2021 02:46:24 +0000 Subject: [PATCH] first commit --- .Rbuildignore | 10 + .gitignore | 4 + CODE_OF_CONDUCT.md | 126 ++++++++++++ DESCRIPTION | 39 ++++ LICENSE | 2 + LICENSE.md | 21 ++ NAMESPACE | 12 ++ R/amino_acid_index.R | 6 + R/amino_acid_pairs.R | 60 ++++++ R/amino_acids.R | 16 ++ R/amino_acids_properties.R | 13 ++ R/grantham-package.R | 7 + R/grantham_distance.R | 281 ++++++++++++++++++++++++++ R/grantham_distances_matrix.R | 13 ++ R/ij2k.R | 12 ++ R/is_amino_acid.R | 9 + R/sltm_k.R | 20 ++ R/sysdata.rda | Bin 0 -> 1368 bytes R/utils-pipe.R | 14 ++ README.Rmd | 133 ++++++++++++ README.md | 228 +++++++++++++++++++++ data-raw/amino_acids_properties.csv | 21 ++ data-raw/amino_acids_properties.ods | Bin 0 -> 14096 bytes data-raw/data.R | 66 ++++++ data-raw/grantham_distance_matrix.csv | 21 ++ data-raw/grantham_distance_matrix.ods | Bin 0 -> 23335 bytes data/amino_acids_properties.rda | Bin 0 -> 507 bytes data/grantham_distances_matrix.rda | Bin 0 -> 853 bytes grantham.Rproj | 21 ++ man/amino_acid_pairs.Rd | 45 +++++ man/amino_acids.Rd | 19 ++ man/amino_acids_properties.Rd | 25 +++ man/grantham-package.Rd | 26 +++ man/grantham_distance.Rd | 88 ++++++++ man/grantham_distance_exact.Rd | 63 ++++++ man/grantham_distance_original.Rd | 26 +++ man/grantham_distances_matrix.Rd | 25 +++ man/grantham_equation.Rd | 90 +++++++++ man/ij2k.Rd | 24 +++ man/pipe.Rd | 20 ++ 40 files changed, 1606 insertions(+) create mode 100644 .Rbuildignore create mode 100644 .gitignore create mode 100644 CODE_OF_CONDUCT.md create mode 100644 DESCRIPTION create mode 100644 LICENSE create mode 100644 LICENSE.md create mode 100644 NAMESPACE create mode 100644 R/amino_acid_index.R create mode 100644 R/amino_acid_pairs.R create mode 100644 R/amino_acids.R create mode 100644 R/amino_acids_properties.R create mode 100644 R/grantham-package.R create mode 100644 R/grantham_distance.R create mode 100644 R/grantham_distances_matrix.R create mode 100644 R/ij2k.R create mode 100644 R/is_amino_acid.R create mode 100644 R/sltm_k.R create mode 100644 R/sysdata.rda create mode 100644 R/utils-pipe.R create mode 100644 README.Rmd create mode 100644 README.md create mode 100644 data-raw/amino_acids_properties.csv create mode 100644 data-raw/amino_acids_properties.ods create mode 100644 data-raw/data.R create mode 100644 data-raw/grantham_distance_matrix.csv create mode 100644 data-raw/grantham_distance_matrix.ods create mode 100644 data/amino_acids_properties.rda create mode 100644 data/grantham_distances_matrix.rda create mode 100644 grantham.Rproj create mode 100644 man/amino_acid_pairs.Rd create mode 100644 man/amino_acids.Rd create mode 100644 man/amino_acids_properties.Rd create mode 100644 man/grantham-package.Rd create mode 100644 man/grantham_distance.Rd create mode 100644 man/grantham_distance_exact.Rd create mode 100644 man/grantham_distance_original.Rd create mode 100644 man/grantham_distances_matrix.Rd create mode 100644 man/grantham_equation.Rd create mode 100644 man/ij2k.Rd create mode 100644 man/pipe.Rd diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..74f9ad9 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,10 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^data-raw$ +^README\.Rmd$ +^CODE_OF_CONDUCT\.md$ +^LICENSE\.md$ +^docs$ +^pkgdown$ +^\.github$ +^CRAN-RELEASE$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..b8d5f67 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,126 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity and +orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, +and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall +community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or +advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email +address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a +professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards +of acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies +when an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at CINTESIS. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, +available at . + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +. Translations are available at . diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..4c80325 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,39 @@ +Package: grantham +Type: Package +Title: Grantham distance +Version: 0.1.0 +Authors@R: c( + person(given = "Ramiro", family = "Magno", + email = "ramiro.magno@gmail.com", + role = c("aut", "cre"), + comment = c(ORCID = "0000-0001-5226-3441")), + person(given = "Isabel", family = "Duarte", + email = "iduarte.scientist@gmail.com", + role = "aut", + comment = c(ORCID = "0000-0003-0060-2936")), + person(given = "Ana-Teresa", family = "Maia", + email = "maia.anateresa@gmail.com", role = "aut", + comment = c(ORCID = "0000-0002-0454-9207")), + person("CINTESIS", + role = c("cph", "fnd")) + ) +Description: A minimal set of routines to calculate the Grantham distance. + The Grantham distance attempts to provide a proxy for the evolutionary + distance between two amino acids based on three key chemical + properties: composition, polarity and molecular volume. In turn, + evolutionary distance is used as a proxy for the impact of missense + mutations. The higher the distance, the more deleterious the + substitution is expected to be. +License: MIT + file LICENSE +Encoding: UTF-8 +LazyData: true +RoxygenNote: 7.1.2 +Depends: + R (>= 2.10) +Imports: + tibble, + magrittr, + vctrs, + dplyr, + tidyr, + rlang diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5cb59a6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2021 +COPYRIGHT HOLDER: Ramiro Magno diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..a242c45 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2021 Ramiro Magno + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..9488cd3 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,12 @@ +# Generated by roxygen2: do not edit by hand + +export("%>%") +export(amino_acid_pairs) +export(amino_acids) +export(grantham_distance) +export(grantham_distance_exact) +export(grantham_distance_original) +export(grantham_equation) +importFrom(magrittr,"%>%") +importFrom(rlang,.data) +importFrom(tibble,tibble) diff --git a/R/amino_acid_index.R b/R/amino_acid_index.R new file mode 100644 index 0000000..d78a11a --- /dev/null +++ b/R/amino_acid_index.R @@ -0,0 +1,6 @@ +amino_acid_index <- function(amino_acid) { + match(amino_acid, amino_acids()) +} + +# aa_idx: abbreviated form of `amino_acid_index`. +aa_idx <- amino_acid_index diff --git a/R/amino_acid_pairs.R b/R/amino_acid_pairs.R new file mode 100644 index 0000000..b6fd0ee --- /dev/null +++ b/R/amino_acid_pairs.R @@ -0,0 +1,60 @@ +#' Generate amino acid pairs +#' +#' This function generates combinations of amino acids in pairs. By default, it +#' generates all pair combinations of the 20 standard amino acids. +#' +#' @param x A character vector of amino acids (three-letter codes). +#' @param y Another character vector of amino acids (three-letter codes). +#' @param keep_self Whether to keep pairs involving the same amino acid. +#' @param keep_duplicates Whether to keep duplicated pairs. +#' @param keep_reverses Whether to keep pairs that are reversed versions of +#' others. E.g. if `keep_reverses` is `TRUE` the pairs `"Ser"`-`"Arg"` and +#' `"Arg"`-`"Ser"` will be kept in the returned tibble; however, if +#' `keep_reverses` is `FALSE`, only the first pair is preserved in the output. +#' +#' @return A [tibble][tibble::tibble-package] of amino acid pairs. +#' +#' @examples +#' # Generate all pairs of the 20 standard amino acids +#' amino_acid_pairs() +#' +#' # Remove the self-to-self pairs +#' amino_acid_pairs(keep_self = FALSE) +#' +#' # Generate specific combinations of Ser against Ala and Trp. +#' amino_acid_pairs(x = 'Ser', y = c('Ala', 'Trp')) +#' @md +#' @importFrom rlang .data +#' @export +amino_acid_pairs <- + function(x = amino_acids(), + y = amino_acids(), + keep_self = TRUE, + keep_duplicates = TRUE, + keep_reverses = TRUE) { + + if(!all_amino_acids(x)) + stop('`x` must be a vector of three-letter code amino acids') + + if (!all_amino_acids(y)) + stop('`y` must be a vector of three-letter code amino acids' + ) + + tbl <- tidyr::expand_grid(x = x, y = y) + tbl <- `if`(keep_self, tbl, dplyr::filter(tbl, x != y)) + tbl <- `if`(keep_duplicates, tbl, dplyr::distinct(tbl)) + + tbl <- + if (keep_reverses) { + tbl # do nothing + } else { + tbl %>% + dplyr::rowwise() %>% + dplyr::mutate(key = paste(sort(c(x, y)), collapse = '-')) %>% + dplyr::ungroup() %>% + dplyr::distinct(.data$key, .keep_all = TRUE) %>% + dplyr::select(-'key') + } + + return(tbl) +} diff --git a/R/amino_acids.R b/R/amino_acids.R new file mode 100644 index 0000000..5ca2122 --- /dev/null +++ b/R/amino_acids.R @@ -0,0 +1,16 @@ +#' The 20 standard amino acids +#' +#' The 20 amino acids that are encoded directly by the codons of the universal +#' genetic code. +#' +#' @return Three-letter codes of the standard amino acids. +#' +#' @examples +#' amino_acids() +#' +#' @export +amino_acids <- function() { + c("Ser", "Arg", "Leu", "Pro", "Thr", "Ala", "Val", "Gly", "Ile", + "Phe", "Tyr", "Cys", "His", "Gln", "Asn", "Lys", "Asp", "Glu", + "Met", "Trp") +} diff --git a/R/amino_acids_properties.R b/R/amino_acids_properties.R new file mode 100644 index 0000000..335e575 --- /dev/null +++ b/R/amino_acids_properties.R @@ -0,0 +1,13 @@ +#' Amino acid side chain property values +#' +#' A dataset containing the amino acid side chain property values +#' ---composition, polarity and molecular volume. These values were obtained +#' from Table 1, Grantham (1974), \doi{10.1126/science.185.4154.862}. +#' +#' @examples +#' amino_acids_properties +#' +#' @source +#' Table 1, Grantham (1974), \doi{10.1126/science.185.4154.862}. +#' +"amino_acids_properties" diff --git a/R/grantham-package.R b/R/grantham-package.R new file mode 100644 index 0000000..404c70c --- /dev/null +++ b/R/grantham-package.R @@ -0,0 +1,7 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +#' @importFrom tibble tibble +## usethis namespace: end +NULL diff --git a/R/grantham_distance.R b/R/grantham_distance.R new file mode 100644 index 0000000..4c3df62 --- /dev/null +++ b/R/grantham_distance.R @@ -0,0 +1,281 @@ +#' Grantham distance +#' +#' @description +#' This function calculates Grantham's distance \eqn{d_{i,j}} between two +#' amino acids (\eqn{i} and \eqn{j}) based on their chemical properties: +#' +#' \deqn{d_{i,j} = \rho ((\alpha (c_i-c_j)^2 + \beta (p_i-p_j)^2 + \gamma (v_i-v_j)^2)^\frac{1}{2}} +#' +#' This calculation is based on three amino acid side chain properties that were +#' found to be the three strongest correlators with the relative substitution +#' frequency (RSF) (references cited in Grantham (1974)), namely: +#' +#' - composition \eqn{c}, meaning the atomic weight ratio of hetero (noncarbon) +#' elements in end groups or rings to carbons in the side chain. +#' - polarity \eqn{p}; +#' - molecular volume \eqn{v}. +#' +#' Each property difference is weighted by dividing by the mean distance found +#' with it alone in the formula. The constants \eqn{\alpha}, \eqn{\beta} and +#' \eqn{\gamma} are squares of the inverses of mean distances of each property, +#' respectively. +#' +#' The distances reported by Grantham (1972) are further scaled by a factor +#' ---here coined \eqn{\rho}--- such that the mean of all distances is 100. +#' Although this factor is not explicitly included in Grantham's distance +#' formula, it is actually used for calculating the amino acid pair distances +#' reported in Table 2 of Grantham's paper. So, for all intents and purposes, +#' this factor should be regarded as part of the formula used to calculate +#' Grantham distance, and therefore we include it explicitly in the equation +#' above. +#' +#' If you want to calculate Grantham's distance right off from the identity of +#' the amino acids, instead of using their chemical properties, then use +#' [grantham_distance()]. +#' +#' @param c_i composition value for the _ith_ amino acid. +#' @param c_j composition value for the _jth_ amino acid. +#' @param p_i polarity value for the _ith_ amino acid. +#' @param p_j polarity value for the _jth_ amino acid. +#' @param v_i molecular volume value for the _ith_ amino acid. +#' @param v_j molecular volume value for the _jth_ amino acid. +#' @param alpha The constant \eqn{\alpha} in the equation of Grantham's +#' paper, in page 863. +#' @param beta The constant \eqn{\beta} in the equation of Grantham's +#' paper, in page 863. +#' @param gamma The constant \eqn{\gamma} in the equation of Grantham's +#' paper, in page 863. +#' @param rho Grantham's distances reported in Table 2, Science (1974). +#' 185(4154): 862--4 by R. Grantham, are scaled by a factor (here named +#' \eqn{\rho}) such that the mean value of all distances are 100. The `rho` +#' parameter allows this factor \eqn{\rho} to be changed. By default +#' \eqn{\rho=50.723}, the same value used by Grantham. This value is +#' originally mentioned in the caption of Table 2 of the aforementioned paper. +#' +#' @return A double vector of Grantham's distances. +#' +#' @seealso Check [amino_acids_properties] for a table of the three property +#' values that can be used with this formula. This data set is from Table 1, +#' Science (1974). 185(4154): 862--4 by R. Grantham. +#' +#' @md +#' @export +grantham_equation <- + function(c_i, + c_j, + p_i, + p_j, + v_i, + v_j, + alpha = 1.833, + beta = 0.1018, + gamma = 0.000399, + rho = 50.723) { + + d_ij <- rho * + (alpha * (c_i - c_j) ^ 2 + + beta * (p_i - p_j) ^ 2 + + gamma * (v_i - v_j) ^ 2) ^ 0.5 + + return(d_ij) + } + +#' Grantham distance +#' +#' @description +#' This function calculates the Grantham distance for pairs of amino acids. +#' Amino acid identities should be provided as three-letter codes in `x` and +#' `y`. Amino acids identified in `x` and `y` are matched element-wise, i.e. the +#' first element of `x` is paired with the first element of `y`, and so on. +#' +#' The Grantham distance attempts to provide a proxy for the evolutionary +#' distance between two amino acids based on three key chemical properties: +#' composition, polarity and molecular volume. In turn, evolutionary distance is +#' used as a proxy for the impact of missense substitutions. The higher the +#' distance, the more deleterious the substitution is. +#' +#' The distance calculation is provided by two methods. The so-called _original_ +#' method, meaning that the amino acid distances used are the ones provided by +#' Grantham in his original publication in Table 2. This is the default method. +#' In addition, you may choose the _exact_ method, which uses the chemical +#' properties provided in Grantham's Table 1 to compute the amino acid +#' differences anew. The distances calculated with the _exact_ method are not +#' rounded to the nearest integer and will differ by ~1 unit for some amino acid +#' pairs from the _original_ method. +#' +#' If you want to calculate Grantham's distance by providing the values of the +#' amino acid properties explicitly, then use [grantham_equation()] instead. +#' +#' @param x A character vector of amino acid three-letter codes. +#' @param y A character vector of amino acid three-letter codes. +#' @param method Either `"original"` (default) or `"exact"`, see description for +#' more details. +#' @param alpha The constant \eqn{\alpha} in the equation of Grantham's +#' paper, in page 863. +#' @param beta The constant \eqn{\beta} in the equation of Grantham's +#' paper, in page 863. +#' @param gamma The constant \eqn{\gamma} in the equation of Grantham's +#' paper, in page 863. +#' @param rho Grantham's distances reported in Table 2, Science (1974). +#' 185(4154): 862--4 by R. Grantham, are scaled by a factor (here named +#' \eqn{\rho}) such that the mean value of all distances are 100. The `rho` +#' parameter allows this factor \eqn{\rho} to be changed. By default +#' \eqn{\rho=50.723}, the same value used by Grantham. This value is +#' originally mentioned in the caption of Table 2 of the aforementioned paper. +#' +#' @return A [tibble][tibble::tibble-package] of Grantham's distances for each +#' amino acid pair. +#' +#' @md +#' +#' @source \doi{10.1126/science.185.4154.862}. +#' +#' @examples +#' # Grantham's distance between Serine (Ser) and Glutamate (Glu) +#' grantham_distance('Ser', 'Glu') +#' +#' # Grantham's distance between Serine (Ser) and Glutamate (Glu) +#' # with the "exact" method +#' grantham_distance('Ser', 'Glu', method = 'exact') +#' +#' # `grantham_distance()` is vectorised +#' # amino acids are paired element-wise between `x` and `y` +#' grantham_distance(x = c('Pro', 'Gly'), y = c('Glu', 'Arg')) +#' +#' # Use `amino_acid_pairs()` to generate pairs (by default generates all pairs) +#' aa_pairs <- amino_acid_pairs() +#' grantham_distance(x = aa_pairs$x, y = aa_pairs$y) +#' +#' @export +grantham_distance <- + function(x, + y, + method = c('original', 'exact'), + alpha = 1.833, + beta = 0.1018, + gamma = 0.000399, + rho = 50.723) { + + if(!all_amino_acids(x)) + stop('`x` should contain only amino acid three-letter codes.') + + if(!all_amino_acids(y)) + stop('`y` should contain only amino acid three-letter codes.') + + # `rec`: recycled vectors `x` and `y`: + rec <- vctrs::vec_recycle_common(x = x, y = y) + + # Check that `method` is either 'original' or 'exact'. + method <- match.arg(method) + + if(identical(method, 'original')) + return(grantham_distance_original(x = rec$x, + y = rec$y)) + else + return( + grantham_distance_exact( + x = rec$x, + y = rec$y, + alpha = alpha, + beta = beta, + gamma = gamma, + rho = rho + ) + ) +} + +#' Grantham's distance (original) +#' +#' This function calculates the Grantham's distance for pairs of amino acids. It +#' uses the pre-calculated distances for each amino acid pair as published in +#' Table 2 of Science (1974). 185(4154): 862--4 by R. Grantham. +#' +#' @param x A character vector of amino acid three-letter codes. +#' @param y A character vector of amino acid three-letter codes. +#' +#' @return A [tibble][tibble::tibble-package] of Grantham's distances for each +#' amino acid pair. +#' +#' @md +#' @source \doi{10.1126/science.185.4154.862}. +#' @keywords internal +#' @export +grantham_distance_original <- function(x, y) { + + amino_acid_pairs <- matrix(c(aa_idx(x), aa_idx(y)), ncol = 2) + tbl <- tibble::tibble(x = x, y = y, d = grantham_distances_matrix[amino_acid_pairs]) + + return(tbl) +} + +#' Grantham's distance (exact) +#' +#' @md +#' +#' @description +#' This function calculates the Grantham's distance for pairs of amino acids. It +#' uses the values for the amino acid properties as published in Table 1 of +#' Science (1974). 185(4154): 862--4 by R. Grantham. +#' +#' @details +#' Contrary to Grantham's distances presented in Table 2 of Grantham's paper, the +#' distances returned by this funtion are calculated anew starting from the +#' amino acid properties (composition, polarity and molecular volume). No +#' rounding to nearest integer is performed. +#' +#' @param x A character vector of amino acid three-letter codes, e.g. `"Ala"` +#' (Alanine). +#' @param y A character vector of amino acid three-letter codes. +#' @param alpha The constant \eqn{\alpha} in the equation of Grantham's +#' paper, in page 863. +#' @param beta The constant \eqn{\beta} in the equation of Grantham's +#' paper, in page 863. +#' @param gamma The constant \eqn{\gamma} in the equation of Grantham's +#' paper, in page 863. +#' @param rho Grantham's distances reported in Table 2, Science (1974). +#' 185(4154): 862--4 by R. Grantham, are scaled by a factor (here named +#' \eqn{\rho}) such that the mean value of all distances are 100. The `rho` +#' parameter allows this factor \eqn{\rho} to be changed. By default +#' \eqn{\rho=50.723}, the same value used by Grantham. This value is +#' originally mentioned in the caption of Table 2 of the aforementioned paper. +#' +#' @return A [tibble][tibble::tibble-package] of Grantham's distances for each +#' amino acid pair. +#' @source \doi{10.1126/science.185.4154.862}. +#' +#' @seealso [grantham_equation()] +#' +#' @examples +#' grantham_distance_exact(c('Ser', 'Ser'), c('Pro', 'Trp')) +#' +#' @keywords internal +#' @export +grantham_distance_exact <- function(x, + y, + alpha = 1.833, + beta = 0.1018, + gamma = 0.000399, + rho = 50.723) { + + # Filter the properties table for the queried amino acids + x_tbl <- amino_acids_properties[aa_idx(x), ] + y_tbl <- amino_acids_properties[aa_idx(y), ] + + # Grantham's distance computed from the amino acids' properties as provided in + # Table 1 of Grantham (1974). + d <- grantham_equation(c_i = x_tbl$c, + c_j = y_tbl$c, + p_i = x_tbl$p, + p_j = y_tbl$p, + v_i = x_tbl$v, + v_j = y_tbl$v, + alpha = alpha, + beta = beta, + gamma = gamma, + rho = rho + ) + + tbl <- tibble::tibble(x = x, y = y, d = d) + + return(tbl) +} diff --git a/R/grantham_distances_matrix.R b/R/grantham_distances_matrix.R new file mode 100644 index 0000000..873572b --- /dev/null +++ b/R/grantham_distances_matrix.R @@ -0,0 +1,13 @@ +#' Grantham distance matrix +#' +#' A dataset containing Grantham distances in the format of a matrix. These +#' values were obtained from Table 2, Grantham (1974), +#' \doi{10.1126/science.185.4154.862}. +#' +#' @examples +#' grantham_distances_matrix +#' +#' @source +#' Table 2, Grantham (1974), \doi{10.1126/science.185.4154.862}. +#' +"grantham_distances_matrix" diff --git a/R/ij2k.R b/R/ij2k.R new file mode 100644 index 0000000..e0ffe98 --- /dev/null +++ b/R/ij2k.R @@ -0,0 +1,12 @@ +#' Convert an (i, j) index to a linear index. +#' +#' Converts an (i, j) index to a linear index. Converts the double index of +#' a square matrix to the corresponding linear one. This is column-major +#' as it is default in R. +#' +#' @param i i index, i.e. row position; indexing starts at 1. +#' @param j j index, i.e. column position; indexing starts at 1. +#' @param n size of the square matrix. +#' @return Linear position. +#' @keywords internal +ij2k <- function(i, j, n) (j - 1) * n + i diff --git a/R/is_amino_acid.R b/R/is_amino_acid.R new file mode 100644 index 0000000..56ff4d8 --- /dev/null +++ b/R/is_amino_acid.R @@ -0,0 +1,9 @@ +#' @keywords internal +is_amino_acid <- function(x) { + x %in% amino_acids() +} + +#' @keywords internal +all_amino_acids <- function(x) { + all(is_amino_acid(x)) +} diff --git a/R/sltm_k.R b/R/sltm_k.R new file mode 100644 index 0000000..3917e26 --- /dev/null +++ b/R/sltm_k.R @@ -0,0 +1,20 @@ +#' Linear positions of the entries of a strictly lower triangular matrix +#' +#' Returns the linear indices of the non-zero entries of a strictly lower +#' triangular matrix. +#' +#' @param n Dimension of a `n` by `n` square matrix. +#' +#' @return An integer vector of linear positions in column-major order. +#' @md +#' +#' @examples +#' sltm_k(3) +#' +#' @noRd +#' @keywords internal +sltm_k <- function(n) { + if(!(n > 1)) stop('`n` must be greater than 1') + + utils::combn(seq_len(n), 2, function(ij) {ij2k(i = ij[2], j = ij[1], n)}) +} diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 0000000000000000000000000000000000000000..0584f580b1f594390e32817f0fe36f8c798fe2a5 GIT binary patch literal 1368 zcmV-e1*iH#T4*^jL0KkKS-07F8UP1U|NsC0-|6?!8{Yr_|NsB@|M)_J!SGH209a6G z0^t3Q3xm)E{^7a6Bmh)32C3wvdWL{#4FEI%G(A874KxisK+&K813&-(05kwJ^n@T2 zLqRl{H9az#jMViF8e|%6LqG!{0MUqO&>8?eL7-`&plASSBA<#CLl7A;G8$>400bB& zhK&sj00A%p28;j*05Sm6ARq%HO*F*9F)}eUX{JU-1Yk@78Z^kn&@e`ZMhIkLU`7B5 zfJvzoB-HTIn^R1hJfV|JnHp#TrkWaL^)&SXpa26PG-+96y^D7DTSrv$44aErV4*s&Ni zpft6qPI9XF)9z(cRV#3fTq|Bv#5KyEuH~0xu_Q4^%a+{+j=}L-G)+l#+;Y^8ChS2S zXkn_01&k?0lk4x_-`}Ky1Pgn++Vfs9es4a=Lv%<^{M_Q^o^Bgf=PG7Qpx$EQ9PJ!%fCeUP^0h2Lfk^;Ath^S;oE`0h7?J@6 z0zzvx+mjja6Nxh=Mp|(tPCVUAGmSCEXH7}QmzwG2acer%(r9K-XXnBfAVB~HgpyC3 zpK&b$!eFQzF*0g^fMQ5U9*#{&9fP~7kRYKqT*9RHnKA}HT}jlstcg(EG3K(tY5CNz zf0TYs$HmvzL29A3WDxQhz8TVTb(-iNuNPAhCsvT{$LZ#B_8dOGrrK4NIk$PfBBG4~ zdP^aC99+(g#v<`gagmMFREIMWj>eY<3?U{n8hvP1OQ$$F6BOI)VL>QVO(!xaXQFt4 zTyic2Thd?wF@)tQN;>pfHh) z0LtN1fE!UHG>!g|<>~k(@PWHJ%?*{7t#V9_RKA&VZBFNJGJZT>CS29=^R|XrhckhX z7zV=$L^@$1x63to=5v4y81QslIxm3F%gvec@Uh-oZ#-+vhO!1cp=<_)xj-H$TJ9VM zpwXhwLI7Z?WMQ&0qyQ(J6D;7Xo5^E05NJ(c{Zl8$oGEmi{d3yd*o@6)4H>eQHQL=( zfg(>dBTxx!Knsv;bdJCM?*@!v9C58BF?vy(U2re95D+I3(V!#}%;*gi0l*5Nv6o4> zrd1(W(+BCX0|?y?P}vS~2SUE`*5V0_a&Jjv2J<^|hVQplmxm?ujUBQ%;lqF(2lvm( zGEZu8s6ykxzg!R7Feeu}a}`9zYMnY!KE|6{m|6pY#LxTw$))yX>VF-85Df3me-D$D z;U-bCfkYsTxhsOq!8$SO7SBB|saO{byUj_7l4dAo^cOGGYTF^rg0v@U*^u)}b6S=$ zB|=iOys&`IsH%N$wr8-QLX8$+`#vC$3s@=eev;uQf$`WShwBGySeg(JU&#d4AS6\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +#' @param lhs A value or the magrittr placeholder. +#' @param rhs A function call using the magrittr semantics. +#' @return The result of calling `rhs(lhs)`. +NULL diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..7819ff0 --- /dev/null +++ b/README.Rmd @@ -0,0 +1,133 @@ +--- +output: github_document +--- + + + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%" +) +``` + +# grantham + + + + +The goal of `{grantham}` is to provide a minimal set of routines to calculate +the Grantham distance [1]. + +The Grantham distance attempts to provide a proxy for the evolutionary distance +between two amino acids based on three key chemical properties: composition, +polarity and molecular volume. In turn, evolutionary distance is used as a proxy +for the impact of missense mutations. The higher the distance, the more +deleterious the substitution is expected to be. + +## Installation + +You can install the development version of `{grantham}` like so: + +``` r +# install.packages("remotes") +remotes::install_github("maialab/grantham") +``` + +## Usage + +Grantham distance between two amino acids: + +```{r} +library(grantham) + +grantham_distance(x = 'Ser', y = 'Phe') +``` + +The function `grantham_distance()` is vectorised with amino acids being matched element-wise to form pairs for comparison: + +```{r} +grantham_distance(x = c('Ser', 'Arg'), y = c('Phe', 'Leu')) +``` + +The two vectors of amino acids must have compatible sizes in the sense of +[vec_recycle()](https://vctrs.r-lib.org/reference/vec_recycle.html) for element +recycling to be possible, i.e., either the two vectors have the same length, or +one of them is of length one, and it is recycled up to the length of the other. + +```{r} +# `'Ser'` is recycled to match the length of the second vector, i.e. 3. +grantham_distance(x = 'Ser', y = c('Phe', 'Leu', 'Arg')) +``` + +Use the function `amino_acid_pairs()` to generate all 20 x 20 amino acid pairs: + +```{r} +aa_pairs <- amino_acid_pairs() +aa_pairs +``` + +And now calculate all Grantham distances for all pairs `aa_pairs`: + +```{r} +grantham_distance(x = aa_pairs$x, y = aa_pairs$y) +``` + +Because distances are symmetric, and pairs formed by the same amino acid are +trivially zero, you might want to exclude these pairs: + +```{r} +# `keep_self = FALSE`: excludes pairs such as ("Ser", "Ser") +# `keep_reverses = FALSE`: excludes reversed pairs, e.g. ("Arg", "Ser") will be +# removed because ("Ser", "Arg") already exists. +aa_pairs <- amino_acid_pairs(keep_self = FALSE, keep_reverses = FALSE) + +# These amino acid pairs are the 190 pairs shown in Table 2 of Grantham's +# original publication. +aa_pairs + +# Grantham distance for the 190 unique amino acid pairs +grantham_distance(x = aa_pairs$x, y = aa_pairs$y) +``` + +The Grantham distance $d_{i,j}$ for two amino acids $i$ and $j$ is: + +$$d_{i,j} = \rho (\alpha (c_i-c_j)^2+\beta (p_i-p_j)^2+ \gamma (v_i-v_j)^2)^{1/2}\ .$$ + +The distance is based on three chemical properties of amino acid side chains: + +- composition ($c$) +- polarity ($p$) +- molecular volume ($v$) + +We provide a data set with these properties: + +```{r} +amino_acids_properties +``` + +If you want to calculate the Grantham distance from these property values you +may use the function `grantham_equation()`. + + +## Related software + +Other sources we've found in the R ecosystem that also provide code for calculation of the Grantham distance: + +- A GitHub Gist by Daniel E Cook provides the function `calculate_grantham()`, see [Fetch_Grantham.R](https://gist.github.com/danielecook/501f03650bca6a3db31ff3af2d413d2a). +- The `{midasHLA}` package includes the unexported function `distGrantham()` in [utils.R](https://github.com/Genentech/midasHLA/blob/ec29296f9bfd7c4fae9e2040592b618e5f2a99a1/R/utils.R). +- The `{HLAdivR}` package exports a data set with the Grantham distances in the format of a matrix, see [data.R]( https://github.com/rbentham/HLAdivR/blob/master/R/data.R). + +## Code of Conduct + +Please note that the `{grantham}` package is released with a [Contributor Code +of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). +By contributing to this project, you agree to abide by its terms. + + +## References + +1. Grantham, R. _Amino acid difference formula to help explain protein evolution_. Science 185, 862--864 +(1974). doi: [10.1126/science.185.4154.862](https://doi.org/10.1126/science.185.4154.862). diff --git a/README.md b/README.md new file mode 100644 index 0000000..22d106d --- /dev/null +++ b/README.md @@ -0,0 +1,228 @@ + + + +# grantham + + + + +The goal of `{grantham}` is to provide a minimal set of routines to +calculate the Grantham distance \[1\]. + +The Grantham distance attempts to provide a proxy for the evolutionary +distance between two amino acids based on three key chemical properties: +composition, polarity and molecular volume. In turn, evolutionary +distance is used as a proxy for the impact of missense mutations. The +higher the distance, the more deleterious the substitution is expected +to be. + +## Installation + +You can install the development version of `{grantham}` like so: + +``` r +# install.packages("remotes") +remotes::install_github("maialab/grantham") +``` + +## Usage + +Grantham distance between two amino acids: + +``` r +library(grantham) + +grantham_distance(x = 'Ser', y = 'Phe') +#> # A tibble: 1 × 3 +#> x y d +#> +#> 1 Ser Phe 155 +``` + +The function `grantham_distance()` is vectorised with amino acids being +matched element-wise to form pairs for comparison: + +``` r +grantham_distance(x = c('Ser', 'Arg'), y = c('Phe', 'Leu')) +#> # A tibble: 2 × 3 +#> x y d +#> +#> 1 Ser Phe 155 +#> 2 Arg Leu 102 +``` + +The two vectors of amino acids must have compatible sizes in the sense +of [vec_recycle()](https://vctrs.r-lib.org/reference/vec_recycle.html) +for element recycling to be possible, i.e., either the two vectors have +the same length, or one of them is of length one, and it is recycled up +to the length of the other. + +``` r +# `'Ser'` is recycled to match the length of the second vector, i.e. 3. +grantham_distance(x = 'Ser', y = c('Phe', 'Leu', 'Arg')) +#> # A tibble: 3 × 3 +#> x y d +#> +#> 1 Ser Phe 155 +#> 2 Ser Leu 145 +#> 3 Ser Arg 110 +``` + +Use the function `amino_acid_pairs()` to generate all 20 x 20 amino acid +pairs: + +``` r +aa_pairs <- amino_acid_pairs() +aa_pairs +#> # A tibble: 400 × 2 +#> x y +#> +#> 1 Ser Ser +#> 2 Ser Arg +#> 3 Ser Leu +#> 4 Ser Pro +#> 5 Ser Thr +#> 6 Ser Ala +#> 7 Ser Val +#> 8 Ser Gly +#> 9 Ser Ile +#> 10 Ser Phe +#> # … with 390 more rows +``` + +And now calculate all Grantham distances for all pairs `aa_pairs`: + +``` r +grantham_distance(x = aa_pairs$x, y = aa_pairs$y) +#> # A tibble: 400 × 3 +#> x y d +#> +#> 1 Ser Ser 0 +#> 2 Ser Arg 110 +#> 3 Ser Leu 145 +#> 4 Ser Pro 74 +#> 5 Ser Thr 58 +#> 6 Ser Ala 99 +#> 7 Ser Val 124 +#> 8 Ser Gly 56 +#> 9 Ser Ile 142 +#> 10 Ser Phe 155 +#> # … with 390 more rows +``` + +Because distances are symmetric, and pairs formed by the same amino acid +are trivially zero, you might want to exclude these pairs: + +``` r +# `keep_self = FALSE`: excludes pairs such as ("Ser", "Ser") +# `keep_reverses = FALSE`: excludes reversed pairs, e.g. ("Arg", "Ser") will be +# removed because ("Ser", "Arg") already exists. +aa_pairs <- amino_acid_pairs(keep_self = FALSE, keep_reverses = FALSE) + +# These amino acid pairs are the 190 pairs shown in Table 2 of Grantham's +# original publication. +aa_pairs +#> # A tibble: 190 × 2 +#> x y +#> +#> 1 Ser Arg +#> 2 Ser Leu +#> 3 Ser Pro +#> 4 Ser Thr +#> 5 Ser Ala +#> 6 Ser Val +#> 7 Ser Gly +#> 8 Ser Ile +#> 9 Ser Phe +#> 10 Ser Tyr +#> # … with 180 more rows + +# Grantham distance for the 190 unique amino acid pairs +grantham_distance(x = aa_pairs$x, y = aa_pairs$y) +#> # A tibble: 190 × 3 +#> x y d +#> +#> 1 Ser Arg 110 +#> 2 Ser Leu 145 +#> 3 Ser Pro 74 +#> 4 Ser Thr 58 +#> 5 Ser Ala 99 +#> 6 Ser Val 124 +#> 7 Ser Gly 56 +#> 8 Ser Ile 142 +#> 9 Ser Phe 155 +#> 10 Ser Tyr 144 +#> # … with 180 more rows +``` + +The Grantham distance *d**i*, *j* for two amino acids *i* and +*j* is: + +*d**i*, *j* = *ρ*(*α*(*c**i*−*c**j*)2+*β*(*p**i*−*p**j*)2+*γ*(*v**i*−*v**j*)2)1/2 . + +The distance is based on three chemical properties of amino acid side +chains: + +- composition (*c*) +- polarity (*p*) +- molecular volume (*v*) + +We provide a data set with these properties: + +``` r +amino_acids_properties +#> # A tibble: 20 × 4 +#> amino_acid c p v +#> +#> 1 Ser 1.42 9.2 32 +#> 2 Arg 0.65 10.5 124 +#> 3 Leu 0 4.9 111 +#> 4 Pro 0.39 8 32.5 +#> 5 Thr 0.71 8.6 61 +#> 6 Ala 0 8.1 31 +#> 7 Val 0 5.9 84 +#> 8 Gly 0.74 9 3 +#> 9 Ile 0 5.2 111 +#> 10 Phe 0 5.2 132 +#> 11 Tyr 0.2 6.2 136 +#> 12 Cys 2.75 5.5 55 +#> 13 His 0.58 10.4 96 +#> 14 Gln 0.89 10.5 85 +#> 15 Asn 1.33 11.6 56 +#> 16 Lys 0.33 11.3 119 +#> 17 Asp 1.38 13 54 +#> 18 Glu 0.92 12.3 83 +#> 19 Met 0 5.7 105 +#> 20 Trp 0.13 5.4 170 +``` + +If you want to calculate the Grantham distance from these property +values you may use the function `grantham_equation()`. + +## Related software + +Other sources we’ve found in the R ecosystem that also provide code for +calculation of the Grantham distance: + +- A GitHub Gist by Daniel E Cook provides the function + `calculate_grantham()`, see + [Fetch_Grantham.R](https://gist.github.com/danielecook/501f03650bca6a3db31ff3af2d413d2a). +- The `{midasHLA}` package includes the unexported function + `distGrantham()` in + [utils.R](https://github.com/Genentech/midasHLA/blob/ec29296f9bfd7c4fae9e2040592b618e5f2a99a1/R/utils.R). +- The `{HLAdivR}` package exports a data set with the Grantham + distances in the format of a matrix, see + [data.R](https://github.com/rbentham/HLAdivR/blob/master/R/data.R). + +## Code of Conduct + +Please note that the `{grantham}` package is released with a +[Contributor Code of +Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). +By contributing to this project, you agree to abide by its terms. + +## References + +1. Grantham, R. *Amino acid difference formula to help explain protein + evolution*. Science 185, 862–864 (1974). doi: + [10.1126/science.185.4154.862](https://doi.org/10.1126/science.185.4154.862). diff --git a/data-raw/amino_acids_properties.csv b/data-raw/amino_acids_properties.csv new file mode 100644 index 0000000..92bf7df --- /dev/null +++ b/data-raw/amino_acids_properties.csv @@ -0,0 +1,21 @@ +amino_acid,c,p,v +Ser,1.42,9.2,32 +Arg,0.65,10.5,124 +Leu,0,4.9,111 +Pro,0.39,8,32.5 +Thr,0.71,8.6,61 +Ala,0,8.1,31 +Val,0,5.9,84 +Gly,0.74,9,3 +Ile,0,5.2,111 +Phe,0,5.2,132 +Tyr,0.2,6.2,136 +Cys,2.75,5.5,55 +His,0.58,10.4,96 +Gln,0.89,10.5,85 +Asn,1.33,11.6,56 +Lys,0.33,11.3,119 +Asp,1.38,13,54 +Glu,0.92,12.3,83 +Met,0,5.7,105 +Trp,0.13,5.4,170 diff --git a/data-raw/amino_acids_properties.ods b/data-raw/amino_acids_properties.ods new file mode 100644 index 0000000000000000000000000000000000000000..b6b653fa295c9b8e38a3d4ab434127fb1da0fd64 GIT binary patch literal 14096 zcmbum1#lh7vMnlRX4xW37Fo<-3oT}5W@ct)W@fU`VvCuXnQ@C@3oUr^xiQx>=grLj zKYs40=;|Gvt2nDWE0>HoI0Oa=2n-0wP-Ucipan-5EeHt6uk`j8h^48ep@XZ9p{|XM zg{i)-gQ>L@os*RTt+lSbsXeW=jiHr+wZ5aJp_K!zy^Woru7SOYp`nA!UohWb{#Wqc za$l{jj7*Ik?f!#i&q$}MuWx8!_;zb;NB7T~gny%HYN=~%Xivv)>R_pBWB)J8@BT*F z!P?rwM%T*F;$QUsuDUjM*2Z>*_V#+Z|5n|)P}>UkLpv#uh7G}DqBf7h zCGGl)bSrV!9J{RuLVsNp3@kw;zv<1c$2*SkZCE2^S%>4C7}q-@uJAR^Z#gw^x(#w| z!3{*QzPypJ62k!w{&>w&;(bg>*RmX4xu@F$I&Cxld0%ucsG=qo*FzAB_Ov$JzMpCM zZ+Ik{xSi>{InMN5ZjdDCWH6pdAGLgTeLZ-gop{CFuBvkqu;3lfH-3ycsJkLm6t0pH zhlZ)UGns}21p)B~2Lbt?`Tuv__n*~h=%7pMVrda6KVr2)kJ5Ta1;MHuo2QioqYCOq zVJ8v;=IFbQkh)r8VLDCpZSdje5#YUHpz%)~3ugC8L@G z`;jATM39fz!%RhXV#q$L)s*Xq1_8*z@)?Kn!Zc(R{A#18Q);LU>pCAOM#Gt_2F22g z{bIa++{-vP9VvRROtmn(i^QFeBzWVT2R;Q-*|0o!YHg;vKLlqGskMP{1?+ z>Im)lmTQuDQO}4&J%G zYyEH{JsmIDcswVuy=5^ zFtq>?3Ob8ELkTB!k!`(QR+`XtU6b%ZT$s#)L%kHTL;!X`6(L<7 zwQ4ej)Tx8LUlDPKz00^WZ61ZT#+uP=OP2;plJ?`&M<7=Q30C}OI zl{l0DY=*Dt2nUQPS5bOflv^6qa8ZHp0;UfaFFd+(i1cE65W)im;FUI4Lur>8;cLtB zh51K!3pMGjxL8-Gd0d*GOsvWY#5(^k#L_pnUi1LULBRtE4Ik;r+V3x(H`4c#8V zWtna#f{#1+%62oQGKyqpjG4uX(PE2a(U#uT%NLfu98>Z!@2UmAehrV@+8px*oeOuA zA%gSmXZIN1NpafClj8y}rK61Ud+*mckpW={{X>;ZRPrl{uFAYVKLiGGCtEX}5(^)_ zLKB5P9~oF&S+ikZ9+{^07k|4o`G|wfiZtfeR?k{0(X2>G)JRL9ey=D)^LsUi zeEV|4d0d(m4q*9tV!*6b5tBFIECW{Fc?^OWuNOzQy7fDx45It3TDl{k<3nkhghbmN zy)vi_8!Yq~Oj`@EBM=gu=WOdF=bS~s+#_aNBT%^Bp_-JR83i&{q;Ct#PVWjAA^kQ? ze^p=J(yY_x^9S2o9Qbv#-c+4X;7d>y1Ax>RnJp+ow!ybOBx%RVeAgqy7L2WKm7Zg) zFtf-P0!IGmvkNGP1PApzU#=Lmz343Mh!kbrbaxmL8GpCN21WOfecbu|wDV;{Ms+QD z6tOeLuInb9VrGi0u`byPk*)1$!sN<$I zxSXg}m?*(TG5_0Hd(wv8pe#*EA3^ z_1w<$d?@(hnI;^R-@G+I$= zk63l9uTFxMQuGJ%q4S0h{4DWQ_DVPMSuzb*b?>tkOBp*0jdx50Uj*Y>&Nbdj_7xvWNg3zLLDfu#j+@p{wDi$Jt!#~^wbrToqNbx zOe(IZLGah=xNC_*l0UO(8}W8>Xm!@>Xb!HMeegfa(fXYoL9b~Gs=vVh+Sm*f9cgl* zKtP7c{_SN2{%vE^x3+Tl^*GQ&clAx6Qz zZO~*G*ew8`T3gW;!;#Rn= zlEEJ>oFCe8rEf;zx@Gax(Xv`S9atbppFQ!M*BC@5SJ-0qh2F;uX_&8Ca~z{_Cs*$S zDGmxQc|cDcMJgQ<(z5=FsaKHs=IibK1xAIcSXVGBbGV9S;Z=_Zp37{Kymqz5C;9xH z63~P9q;M*D4-)j^N$DH|f}EX>DT zR1${pfc6&3#;-G9C_2=O8hfCT+{-3=H$#n3;G-hmrP)RI6k=u=#mpcAct!+{VmNlj!?GP+@lWW~{K7S!znxIS{&k zLTVfpiHU>4>ZJ*@Vj0jc67xA)WUE;9XuFmvP6SHwdzf3S?xqQ9I`jMaq*I}|o+N`| znlwySQN@mfFc@NrkGBRn?JM&aI^!m-kmnn?40 zWpjOh=!Fju9TG12xUo%Jl$ZsUma{=yuJ7H_oYzviIeIaM0AaDy+wMt|k@Uj;W{4$U ze(F`)^W9-)52sfs7~||znOr`KT;WX@gBsp!Op4HVlq0j8lJrUu^gh8 zjXo9;sZz^%FX4iP&X+8~dn8se$y?LA7^-LiNr2(|1z8y}{kv3dgGMi^Vm0@u+mfn> zQm5T~7Ijw+D=m}&64hjNS%N#5nuX7>xvaE;45Ej<5TZZ&eyLFJ9%pO_EZ?%$5#&it z{X0(7>7&OG{Co2_4$SvLqukI1tZ-y*>?PrW=$@N{x4X_cbrLUpBm^7xiZhYkc)J_O z?>*;D9*iSz?HJO$>!!7>0NE*zyRM=E1xAa2>YLuVd*(p2b^7nTY#93lQJ|ZvC@GuU z(zL;^=OKGq6{`}ngXxm8@`k=qK+Y>04C+bLz)Q@iuKQAQKUbai!hT)f`Xn!?tfSzN z*@gLG!7^n0a>)4~L`9WT4gJ)q;T^ty5a>hJ#OE8VGxFs^9l2hX9b>$zw0%DUh9BCO zzO}nO_EzAQk1m*k$YLsbf`e5n$#iNmvsg=pKKmcP?M zoUVQ;z-C!OCek10g9nXj7rK{gN5utUi!SJlZIJhitL zJ$2%T8tzKkN^4^sp2!kD6d>^mE3(iwjsA>N=MEW7)Zjl6w>&)3d>(lvlnEh0xg~8` zH5aynZG}A>H}S2kBpDn~gLiPi*@BVy8K+60p-4+ot|3Ib=chmoqAA;1sM2{OK!5i* zm`rN;tR9&`;wha^D>DuFI zurJ9yrb$vYu~2DdVG+8PjCQBW!RaSNa0T(5SJ{q_0IafdqKn0<1?0?nxNuDly^b+- zrGwG>xl7<6`P5PWVdd^|nmM4yKx)a1?W-`b0bDn%TL60Whx!#rE2j+N>vVXHh7QpRzJT^@Xypg z2D9U7b$Hi%Cj+JZKV9rBW}I3N^jFfH*0klKa0!kLr{CF)gC{z4=@9vhGJHouo}-;a zrc_qyaGZO>+HLV6|E^#Z^O~EbcEES1wst#pNRm_LSb5ME-k4c)ftU?}ILx)^m&^hw z`@$x+Z_*y3i5vXkojWdFAzB)U5>L@r0Pja7{mF=I`#kU~@2K{S?Q&3s7KqoN+!%@? zif_}v*97bnt9s-Iv{s|ZF5SdKe z$5n6edvN6`#E*we-O7+#l$p77UC1x<WJO1>IU*a4=SPMkP9wC68W z5g`l(3U0pu9}x0UQKX&ugd#q2<^}eHJgTe*iaAGZo;*P^0(hNVtNKK`?_+5RR5W-^ zY@a&y*Qg6-P(@jjkjT66j0HXo#+4$0Q-1d?3qSp#p8!8-HEx`TO zn)@Dpa{C+_p`m#+*SC3&G#WHj90Bno>1u0gC{hpX=&Ef)OY&x$S-uczsVb>d%>|Ak zyno<`C>*^1T;5WeVQEX*eXSo{Z3`4U@z&LLkR7p;(lxZIsbmsdk$M;(%11_$b#y_g zm>kD>xMKBja*@{5arxOai%k+_{=l!@t1+mGB6%dq=<*{WUs>t3gJ1m6hrNp09jX`! zZv?P!b5jLYG!34=G1@i@-9Cy zxq7z*i$lMrNJ@E6P8$@^A`=dRiO*a_`bytae;$aC$$12%N-9)#&Wv6`rL(|D-^jRU z3-^Mbdd2=_fdVVS)Z|pBIBEJC);tfc#3oyrc>yj+S~>x~3NPbPj)H zX>F{GLuI6d5#iqb^8X-;iU`QPUEAK$E6kg_2qZq0+zl)kX z7`p14+38t$SXkLv+B!Lz8@Sq<+u7OMJGwc!dbl~+dAK;ZySr;x`Dob&>09|4I|W;K z#@f64+j)e#dHTND4R0TBOaEkdzcA;(MAwKMuec&VD~(_~Enh3W5GUgR52si!ix5Be z1RtvefBX1A*Mtz))Ij^JP{)jLkK9Pt!dOp#e}BKgu%M9e&_Mr?z>tWL!0_;JpTIc3 zums=8^sva-(72Suh=BO0u(U|ul*o{@*pTF;ug zfP%!3f|Q8TM8ERX!1DC){EUQ}tgzhd+|sCCPbtdHF@9g(VfGMR{c<1?A-x z1?BYx-&@P8>q={y>&lCos!JOi8d55{GOK%w8~aPzrs`XJsyfFjx@POTM%%jjTY5$s z1{XRe*M5v{woPq!SETk;XAb@-?r*D`YWp_PSvAtrG}l!=+gm%;-#k0iw$xv zd44zs0)bbz57!S*54TrO55Sj~mtV`_^;KCj^UaHjWh^Sdr{J=Dnu)BSaFE!!bjCW@ zXCN&VH#2`~`$6{Q@S_Mg)+C0^kwHFvK07mSJQkv1XH-2!sJcujEx4b)2!yRjM9IBF zFAwNmv{b@J#GdS2%WX7Np@wq%D@Hzp6af8~^;Y-sb{?6Ttg|h&j)J`tci~R^{kHSx zefJIL^ItztFZy)k4!Wn;L+uBTr{34gt_}R>N$t;Fu$`1IUI6{ikH&CW;C1>}+*o=`lParcY$hcg$&ON6v4jZ(3m>}SBcNtV%wb|&v7fDG%w zbW7idr#aE;WF6EI@5w|$Lm0m|pWeZ|J?P!IHM5T+)EFVFNX3U}6J_SlOpl$Rmc^Jk z$NK<)LdXulKL86%GcgEKrGF?!RlXf7<9j3+X>>3uG# z5nOO#SjIY4rkpPDXl$WFa5}ugCf6Jd($@Ac(4FnihZwv#EKk9K4$G0;*+N*?F|g)W z=`Tob9TgY~+$X9Jg?CiM;y(O1^^je7)d2->*V2_d8QrvbO^#b}!2)n8&&ZE=dA6iR z1>oi|N!l|HJ+cS{jT`fC(9H;AMwg#;YRZ;{?(7|H4$Gd<;20}-%U%{VaLyAb6Rydc zlkuN7T{%b>kF4ji2&R_zl!(`?V2(St`eISloNs8)5U9ki>tX`9$dOrmTAIgagakU1 zkIYBE2>rBCG=;3ZKArPig|z;rwGrJaZ2WDmoUS5vuT8?62dIe)v2uMjdC`;nQ+jM{ z)iEu7M*3=3)?^AMTx~80n|^t?QaAV00}iiiqd+#MH~8r61uA@~Y0C{BJ|*=r03L3h z?HQoR(6(+P|EPrI#Cv>}D_jx5TU{&y`tB>YrI&XFnT8{P^+M%*W9tqBm z2zrS+HkZeKMlHxL)&!c)^r+kWmkj-8#v#VK+vjZwie61?u@Af*rw$9(&P!}k$h2ej z1oRRe+ry8UY>TOLT)T@y26_l~WM;GYUctGgOf&bvn+N-aFN5lHCptoqq3s7@TDUCh zSD0-c0~w9hkETDAD@+fkGd34Y>eKXba$_pAtd-1}X@599hojXpFaS=dlkRAR=9RSB z+A^QR(yWA6;JfXn76AO&Bd_<5A0LgaJXtQ(Jb{j@snCSm#_lqX^hNI*?`v0$ypy8D zAIt#$n-`R;sjzVC+#c^3EoZVhvrxyPumx(*6*uw^2+Tjnq`;}N`dysC;P7(ja3oS? zVF&uo=Brpj!eQav9ePH~Du{pTjVrhd`pPtHe9xD%pMLPLw*Z-;NIQ95208F9)Jrg6 zIi^L+R5jsJGY%+Ii7=p6dZ`bI5`Hz}65o+(vUV{h@afD~)B6KM(-JX->QX}-;5=EY zQAu<%8lXhU1BRpU>3v!v-FU%LUbd8VPV?L+bZ>C<&cu6xA(EPHEDJ98x;$&?53tfY zgWhq{K8uAYT_L*n9{Vb0|GN50oGCVxr-EU$I=RV6$w>uBrG zr@I&uNLQ*;<|%v6jm3}cH(wkw4}gjvp^#jhTPG(P3$iz9dg>2sA8A@9wFfxV6Eh5t`B(Yu+gL2UD&O$T(B&;{m&8$|5%N^vqJt7Chc|a4MePhbFS`H-q ztQqjs1#-uVpQ2iKRtl4R555L>6?O~3`AMD{Csb4OninG+01;=NHWl>wZ&%pO{pW1o zp7KLrd@(75X9kg;tUp9v&3jY4cu?L&dwDIcGkN8|Ao6s0C)FoPyzT+kT0pGegM3L6 zEk+h*aeoNvE9IqOvnd`uFD7YY^6NRs1;Eo$*wZdwo14@CGyM8an{Bu95TbRM+-+&R zzoX`9D7RDfh2S8r>##Fs*e6i8xs!T?py};IfBP!5RVy+{ywlevo5phX4F=m9r%JI7 z7ygb%T1iK!)0&ldf@^do)_sBQt6I=`v8$dQA_tvlE$e|hEm>!6Uc$wX=%cAzoBnwY zrsIn5?gG9F4aqYpCGW&qM1lH#Z9!}eP7T;uMB>s1B=4CIQd?P`&ZDbovPa#ab83Z? z?iin+C$mRqS}NnzwCylk9)^rPe3dDZt}X9z}R6Ni6a$7JPi*mr>6DFg#~M#7ozw4|Ye=P|vOl1#Fbp zIhLqNKhXne=anwoe>W0Qtnkr-%5(v-GQlOWhiSNZM6K~+6+x;PxiB2a0o9Lw-_TXN zOa8IHtuiah0HkhV?eo<^ni0J=1*;VlN%52Z1Xyg=O1{>@MVD)cIT7Fakk*2>m492e zea2LIg-p89^v%%GBc0gj@_|upYoqk2(W7c<1&+z9S@T-xYMWEkD(XwbCPhX(4{a#- zwcEIL2i!dkoxo1OAW4T;AEbtb7QK)j_CxJODbI0M*udyhRmKKur8pib!Z~8Fvo`;d zQdd2GYwt;?;Sr)>^=gdY4Z?c<`HNI`bqYP#a`e!%uzG@yZsTToehE*s$*!=}T~DDK z{6l+3#F6_rrCLxVb=|cCFCreo*rM&ZiKV4R zV=LWst97r~^&vMhok3BrWuy-alIZEOVV148j| z%cYJqQk_Dz5hHh_l4}MQ2{_SBIB9S_E6(u}ucD=)f~K9-I-g4O$A98Dg67(@UpF3) zqAKXfelkP_e1_b0%K2$p8oe8FB`bnFqFFj&d`ZBi^*PKGcxaoPd34U{1@tSgF!*I} z*Di;nlta!g{nfoSVtgLoodEpuJE&gq69x*?0+XJQOepcNetI27Zh!FdQa$aRPp!sp zDh5R>=U_)3yNV*lwnjpr`HB;_8uf6Z^EmmmUq@>ih|teCsp(K)P;?jXdr*I3IVCln z+uE(K>qbfE!ZD1nUxAKX&uF}1H55$UtG_N)@kLx>%1&2~G69@je}=(YWyD)jD%@QT z%yVv-9ULD-{xSunC2kIF#6i8AHBq9}Zk7{T>i}8}YZ?`ItRao)teA{eC9lIH=@p8{ zpI8rAF$@e<{+X{JrtxJzu^w;+ChWtBT79}NiKMoYTtRJ6lX&-(qB+Hy5*aKYy%AeK zw-JcYIDBc&AEz~ug(z{Cd?oIu4S z@X!SN7T>o(s0kSgiPTBlsjqq?@#qT3lYs9J0(fTVF*7nF&0S@=Si1UPOeK)Qw~B&c zz;Geoe55mXR8BVZ5VWnh-o36{H=8c^_p#`LlgS9JQFtAz5KkG(meL_WWpE&Daew>V zT7A3Q-t)|>nIr&qT|~_*JbX$xfA&B{2Cy^#d^RBNAH!)(R8Cw@;dw12x=*iEEW-m5 zR1R?~a~S1xyat=&N`j?10J<`M301}j<{Cai$2V(U%vWmnW1HjVYBq+afSoQTCmM}J!)~TF z%wCy>H-y*noHy;Ta;%|}e|@`+d4DPgTM;UByBthv)-j5<;C@_X3{}V!uD*;XFS* zU8Gl5ceJv9GasY=L1Sd%8Xh5@y9ZI%X+3nI>GQ!be|R^0D|Y2I>J6@>GhMd4?HGF$BsvnKb!tsL^O8jt%S~ij zHk?_7Wv;CemD@q9a7PU~e$=y{I5T{tIKRasw*2WD&nEnBt6&SAJkWR~u}&OVZ2e$4 zzA&!$nAN!5s0HDSe;5Ol?2Bo#cF+hL02)vw7Eu-Dc@5Bl0g(!?^7CGl>K{GkmXnLK zgVe#4RU?MMgh36;v;@|>Tli{g@HxJ7LIH{vOO)K&Hhr#Dc1EkbrT zNp(k0JAkW(+8c%`5bxcp`h+8@LZeEKQeQ|g%<_3H?F$%bJ%CnsWcVbHgbpNcJB6hd zyPW^9`R=F5L?^FX`tc1DQC}fq!E|hER>O=mKb|i>hHVd`6t@O)vunus?5&^5{58{S z_NN%o3Wx1jNLzrYTxB||#7$?(^8)BC2Js$O-yQd27ydMPNg@DXE%I;?CE@~}-IS|o zC(6wG+}b*BMy;WaJ=Y!t(bo2q+Z<1;*;V#!q*T9nz*JRbRhX-JLJhpyI*2&mpBttO zsU1s|mR;uQi|3PHgK>dnN-Y?u@ctTWZQmJ~`Dgmm5T8_5`<2Y`2KcW^o1DQ@cP0nP zb6BeDZ*%8swj^_Fptimfhy!IJVt;+e-P7V%OM0A7CS`3u8WB@_R83%wO%vsYskCk3 z@yJY0O9n||YZ9*2bGeO94^IcbUi1T(!?x=&NzKZhePFLQOGv4ODHk)uyS;`CSPuGP zX0P;-Ly0-%Zy1tid)9RV9#iA)It_($u&CvpJf+%Jyu%3Wj?r>OM2VmIY7lT6SmJb1 z&IwQeeQ3)O(7L@n3M|IXS5lNh27>-KNL+Uo%81VxppQL$Q5A+W=FdOe z4kl}$iu*1T965}3)v!H#RkFV~%raY{0n*`bFtCUDX;PJ&GCjf>KBtyG)4D?vSGAf& zx95v>47h=DQqf=1Yqe>;!oD(j6a;0)rD!FZHi${6sy`zF)*L^+Pe_u+VA^g5DtL1qV`vLUgq=m1o0L0H&JJh+9_feM_OhWJErODOM5bL<(os5Hz^b%L0p+^;bX)I00AEYJ;h>VItyH4DIB6Vh77lVs z%I~SYcQxvbrg|!w=RC_xJIz?|naC>9NVh-+QjkIwxHXQLc9T6iBHrq~5aN1s=Y+YW zhNhH`RIaRt&R0Vys}`p?p?lm-M(KPxf0DhIcwO{pk#wguVo{Xc*F3+IRtR+Ple5hd zK}3~R?<2pM0jPO+^ze~xw^>ynukbc1UFevhD-#>g#4l?h?Ms`xuqgz7 zH0;XcE&swN@(!S#9cMw2HEqxjsD7GLT-IP&LGkoq2hygGhw0Nu7G}7X)Z-lW-=R>` zR~jaDUTtR&u%ympx(3>NLKW2liR@8!6Udw$IU>*6q60nRzgFneHJCmOF zB&rma7eq}j7d(z$C*L#a>KkiGmHNVc=-7G~dlJH$N0UA`c=y!%ZH^X4zAT>s@I$jz zib^iC8LcX*26~&gli?sihRB{)v?D}@5WVo_#}KBMM6!!SRHZkz@F4-N(Pm9*-KMQ1sW+J({373UrHHy4j9NLnIapT zycWj-w~6JG>&9~z%^SL+0bDOQKltgpm^PHM5yjMZSNfC8SOs+D8x62(Lmqdm@!(5E=q+ZD#K z@18B{b%thLpl-{K5aN+q{e&W*M&5N#(lDysa33uYB}iAegB(QW6^->d107VrnZlH*`zYDoO_Y|G&H(e|f=PtqxlqgTxVj zb3TKJ3Q7r7^6U8hb4WJk+f$N)3Vbx8QbKh98`}OB?^F>lY1Ko7B6Pjjp)8-t_0ekrGE16 zw(Yf)XVI!<836qwG!0$NFh#-qV#;@VF;+x`BdY3$lxG zT9bnzW%_wO&EcH~GMyTGM;Ql` zlpn{A=3pjeW}MI_DR5m&P8evyvmPg)+1i=KD0~115F)aq5)U`#h7sF>Vz$d1{^3D! zXqSf8O!TBcY4a~;*St30+$eM5nS5q*+=8X|o_qC7@@oBo4i=`Qh*b{`nna&EP9M5| zem(P+QB8)B{7Ue>1zpd5&KKuad|2q|=jDp_uT!;<(C1f%H)lWf+sT@YI4Bqf$UoCk zevQv>`GchL-&(()00H^M>CZHlf5!QB{P|lP{|4t*%l?Nl z^8Xg4>~Bzh(_H=;<*#kp`Wuu#(_a1==dW$T1^d5y`8N&be^CBla{QU&@hddtw@AOC z{IBhJqs9C&iT}|2Gn3#K2jsUzy&V?6v9tce2>EZVKZj}lI%WPXdXRtS9Q?Q5pMz_D z1#$kC&u`V%{~gl#-->@e6aQ-i+tB_RiSvi%Kb@KXE|LfPpHI;LK>4Ga{%k#d+2!Bz cf#Dylw2V09TNn`t2zKQL9pRsaA1 literal 0 HcmV?d00001 diff --git a/data-raw/data.R b/data-raw/data.R new file mode 100644 index 0000000..ecbd60d --- /dev/null +++ b/data-raw/data.R @@ -0,0 +1,66 @@ +library(readr) +library(here) +library(grantham) + +# Grantham distances' matrix +grantham_distances_matrix <- + readr::read_csv( + file = here::here('data-raw', 'grantham_distance_matrix.csv'), + col_types = 'ciiiiiiiiiiiiiiiiiii', + col_select = -1 + ) %>% + as.matrix() %>% + `rownames<-`(., colnames(.)) + +# Sort the rows and columns by the order present in `amino_acids()`. This +# ordering should already be as in the return value of `amino_acids()`, but just +# in case... +grantham_distances_matrix <- + grantham_distances_matrix[amino_acids(), amino_acids()] + +# The values for the amino acid properties in "amino_acids_properties.csv" were +# directly obtained from Table 1 of Grantham (1974). +amino_acids_properties <- + readr::read_csv( + file = here::here('data-raw', 'amino_acids_properties.csv'), + col_types = 'cdd' + ) %>% # Next line is just ensure that the order comes out the same as in `amino_acids()`. + dplyr::left_join(tibble::tibble(amino_acid = amino_acids()), ., by = 'amino_acid') + +# The 20 amino acids. +n_amino_acids <- length(amino_acids()) + +mean_chemical_distance <- + with(amino_acids_properties, + c( + 'c' = mean(outer(c, c, function(x, y) abs(x - y))[grantham:::sltm_k(n_amino_acids)]), + 'p' = mean(outer(p, p, function(x, y) abs(x - y))[grantham:::sltm_k(n_amino_acids)]), + 'v' = mean(outer(v, v, function(x, y) abs(x - y))[grantham:::sltm_k(n_amino_acids)]) + ) + ) %>% + signif(digits = 4) %>% + round(digits = 3) + +# The mean weighting factors (as they are referred to in the caption of Table 1 +# of R. Grantham (1974) as used as indicated in that caption. If we were to +# calculate them here from the `mean_chemical_distance` one would find that the +# alpha value (1.833) is slightly off by a small percentage 0.11% (calculated +# value is 1.831.) +# As the difference is relatively minor, we stick with the values reported in +# the original paper to avoid confusion. +mean_weighting_factors <- c('alpha' = 1.833, 'beta' = 0.1018, 'gamma' = 0.000399) + +# These variables end up in R/sysdata.rda +usethis::use_data( + amino_acids_properties, + grantham_distances_matrix, + mean_chemical_distance, + mean_weighting_factors, + internal = TRUE, + overwrite = TRUE +) + +# These end up in data/*.rda +usethis::use_data(amino_acids_properties, overwrite = TRUE) +usethis::use_data(grantham_distances_matrix, overwrite = TRUE) + diff --git a/data-raw/grantham_distance_matrix.csv b/data-raw/grantham_distance_matrix.csv new file mode 100644 index 0000000..2cba7cf --- /dev/null +++ b/data-raw/grantham_distance_matrix.csv @@ -0,0 +1,21 @@ +,Ser,Arg,Leu,Pro,Thr,Ala,Val,Gly,Ile,Phe,Tyr,Cys,His,Gln,Asn,Lys,Asp,Glu,Met,Trp +Ser,0,110,145,74,58,99,124,56,142,155,144,112,89,68,46,121,65,80,135,177 +Arg,110,0,102,103,71,112,96,125,97,97,77,180,29,43,86,26,96,54,91,101 +Leu,145,102,0,98,92,96,32,138,5,22,36,198,99,113,153,107,172,138,15,61 +Pro,74,103,98,0,38,27,68,42,95,114,110,169,77,76,91,103,108,93,87,147 +Thr,58,71,92,38,0,58,69,59,89,103,92,149,47,42,65,78,85,65,81,128 +Ala,99,112,96,27,58,0,64,60,94,113,112,195,86,91,111,106,126,107,84,148 +Val,124,96,32,68,69,64,0,109,29,50,55,192,84,96,133,97,152,121,21,88 +Gly,56,125,138,42,59,60,109,0,135,153,147,159,98,87,80,127,94,98,127,184 +Ile,142,97,5,95,89,94,29,135,0,21,33,198,94,109,149,102,168,134,10,61 +Phe,155,97,22,114,103,113,50,153,21,0,22,205,100,116,158,102,177,140,28,40 +Tyr,144,77,36,110,92,112,55,147,33,22,0,194,83,99,143,85,160,122,36,37 +Cys,112,180,198,169,149,195,192,159,198,205,194,0,174,154,139,202,154,170,196,215 +His,89,29,99,77,47,86,84,98,94,100,83,174,0,24,68,32,81,40,87,115 +Gln,68,43,113,76,42,91,96,87,109,116,99,154,24,0,46,53,61,29,101,130 +Asn,46,86,153,91,65,111,133,80,149,158,143,139,68,46,0,94,23,42,142,174 +Lys,121,26,107,103,78,106,97,127,102,102,85,202,32,53,94,0,101,56,95,110 +Asp,65,96,172,108,85,126,152,94,168,177,160,154,81,61,23,101,0,45,160,181 +Glu,80,54,138,93,65,107,121,98,134,140,122,170,40,29,42,56,45,0,126,152 +Met,135,91,15,87,81,84,21,127,10,28,36,196,87,101,142,95,160,126,0,67 +Trp,177,101,61,147,128,148,88,184,61,40,37,215,115,130,174,110,181,152,67,0 diff --git a/data-raw/grantham_distance_matrix.ods b/data-raw/grantham_distance_matrix.ods new file mode 100644 index 0000000000000000000000000000000000000000..07acbd7368741d7833cd33fbfc88d52f8a05297e GIT binary patch literal 23335 zcmb4q1CVIH(&x;bnLD;^+qP}nwr$(CZQHhO+rG2+|F*twYpdS(wzjKsPLj$m>61>< zo$fkvl0d*n003YB02m-KiosUw5i|e*0RN=FRRGrJ*2YfmcEUuwsyugMz)5|*2XqYG>&!-#`;E%X2!-&a{pJKf4li_!up#Nw6!rYH+6RS zS8t9CwEBjI##Y9Ei?$B5|1%Qz-yqGc^-YZ(X$8!kto7|2{{tA~-+-NLZLRF|ZH%q{ z1Mc6kYv*8V>R{~XXrTX}*!_1SIy&h)IsZR!X#WGJ|9P~3hcdCU)ps)f|K-un*3Q}P zuRNXq!>s(f(f)z_A0ENL!2Y$I{(j7V6V%_FwZ4tHiLs*-jf0WNbmFv4ARS!LEmyGd z%o-mp2#5pOM7u?9A9IKr2<<}#!cI?2qKbS`#YOJYVT}mmZcJ0=wLaEZWJ6iM5|bf0 z@r0OJ^Eo9)Crx|tCdbBF3ngv__Ps@5aV|xNs|^LaZb~pUE0oo2V*#04#v_+(7eW6r zo4pv+NK*_LI8L;H`SXz%2K&qbxQVK~(`8t_Fk{N2JbQos^}(oK_hMiHm);#^%C?DbTSY5O+HBInb-hvov#6Sjr{4$0A>)PSC^Ye5GHusS#^eLKnP9q*K3RABqDsS!Xs!PQ7C|NTA9ERv0ArSXlKYlo#6W zB#db8SL6lOEyD+fDYPMOxCgr(M)0qwQM#);TXdnLkb|4m*+TR&{APT$YVvL`iLz$F zM6)riaJ&(?QjYR5#l#x<7{ICcX{Iw~O-q}({iPe{Ak+G)fg%w>!$9Sj(U^2he#j{S zQhMMLzg^?jqxDEksaJw5C{X5fl|!W*U2DMF{?&fC>-bUWce#q&d8BJjmvn{b86bLd zd@gt;5>#f)uvgE4CRJ={b#)a7pJX-_WiLazg#9Nkictn!s?f4&?cAZqYYsG)H-t$q zx!x-Im=$(|<@9^eqh0sm2wUIxp(YnJYG;deql_u{sdt)kPq4}E8Z-6hub@D|I823n z0{;pQ_&*8`;_te5baJ;ccKkaS>ZxQh*G`TBB?P^WZZobl1!THf+a^n${aSD-x*zjIPuy9A zD2ZI%PrN>E$XTDNleRQ!32;HE^n#gCrwnipF$VkO`&v{GF~Jx?3ukM0Uj2D^2JwSX zk-#cLt3uJpt7{!phEULOp1Ja< zAhn`l3jkM(AEjFcyq-4X23&B64zX1o7AxeGNp2XjOO<0K)<|Nled{;wto_;N72`1K zg#`s8qxbiw{Q;LFo#pT#{72ZlCJxhFjtdmHfhuUpV*)S(T4%F>jX~SgNyMcE3H8-h z3Mt%XjPc`>73t{-B;Vm4EafZB~*484;PSV>v9D;Y%4~c?p0?n{d9RZQi|t9 zOT$N7KN${*(|2HMI2Afp8LyyIZ?ZpC-6jVu*_1H)p5J7FE4oet6JQOY$=7$)fyhC7 zzG!4RpZDTdWJpPMztX7!%CUljPJ?xKKJ^BJAoAYqU*+90D_MHQ9cTrMHapc53oyZf z#ET8@|8_8VK!?hFi7?zURJ69}^W(&~e?f!XK^RKc3xE0(QcHhMYzoU7k|p2b-xHQ{ z=wf-~6=wg7wP}lveY!Zi#2*+;@q*J0kX?$MYK1>v0@P7_iDpuUylG(|0*^#sKx>z5 zVB9g`_HDuSz9p-^k+c?}C67*)l}@>ljTK0Y8EHJ(9e4Nhlj4^$C*5s?e?Pl6%!~WA ziA1r@LF#3Tg*RyNTT8ZR@(M&f$)`tUId6@06vvpZ&RVCEd!vQW(1IiOhbLZb*MykO zhI+-3K>`A~l6z(_hf{}L6YAUY#^LKJ;2EkSbLsASKb|s z8D7^J-Y&K+=qFA#Z{MrK7u;>>;ErtW*ylNX_fKAxyOIFx(j;@@8{YOvSwyXbL2NU! z&`*(;ZO<=Jy#gMv>jE&v6hVwCGraZ_lF@4R@{&sS>g0Ph+oHj7NoD&Sy`-U?#z(oI zC)N)QIMb6w2m`kvydDFo6Wx$2pI!-wkvHKboTNNjxeV6{m`^!3p;4!4{jWT~Wv7ZK zMizrmdh|u0^#nEB_I;5C9HCtKP=z-gfp+-Oio0F(NZ1AK3}>-uLL&<=De{p0NL1|a zr?g_XXx4;Vg(Ib+q-a|x^2hga@ptqbE=eYALU8`F#~M#5fj?NXoLj~baLiFb!|_|)5|j%A*Q%q2+C)RjLEfDgqfn{!YJ?9c`x_{ zKH#M-Q2ZkX#G>was+|tnhG@66>M_I@>vG@bCJ`_C+Q0F#q)ul|hj6$};z})I4uocz zK;6S_V`|cpK8Quwz)w$2U+9pYG?gX~)hiQ4jLu1^7%Q4W0KN92nf{VelV^~p5xTQ`qX_Z+sDNSnS1O|S;- zd*J-!6(_0|yncSA7Qmon36WyHR)-fn_hPo)x%9+!Mtr4^vjaL8>4gI?m4}lKJe$%D z!KdrhSJTM%pTy-dt$QGVLOp3^=%9?wc;VKzVISew?&~twoH=y{FG!F`>Ya@YGg zv2;z2arpsP=XIE6r6X}8UF?CC)8_ln;cUq2`}vJ6>$@FVr~};bK2fr|age*pxo_>$ zYB~l|vC@aC+%!dc(mEcjQq^zuI0@#m0e+n^tNZJ{Z`MHc?GyL1B*kr)5oKj4DMyZM z9og*5F0Ay}AI3{p5n^2mM|lq=5&4GdCTsN=c80<{h^SLFnV+7w17odX&0e^k+sww+ z$b!KxDczIndFZrP{zbBPg)xSjO+=zVu2oz#cP1u-ZE9m3C>7+Y)@$KmPB-L}TG-d! z!Fnkm?a+z)^N*)wlmuUqQcXetB(ftNHdh$f|dNL+s0!x!*@|xj>*8zJu zl_kJTWF6aR4;=a4W#Q(rqF|isQlT1EgyjAt4M}a4$>WdHW;!|E+Q}N<%v1NaQbTP< zs06$9wF~C>qmz~3-MEY!VrFVXYPQIO@~U4H)g4zMQN;Eyqc-m~yER9d5=X+sBXqm; zK^^vrok(umpiaQ#w*4s$BAD6C8vTt zDuW|;r{^-r8kG9)T9)m_L8A=eW*A(o3ePIutGc!qw`Ydm-C0BY%sce3+>iXdS>Nt$ zy1Le73->Pi^7`ngM#5`JJ7#Ly`5W^rn^romH7DP#zfrShwtrzVHB&gg>y^<5&_fBB z3o%G7ZA&v^r^MD~>@t_OW`C;9R;Np8dBZYDOVXl87MN#s?YR7bM{55#&2*R^oLLrG zc6K);V&v}9=&v-dS`rB-s1e|1n&n5IE!y0g-Iy&-;&jvSY!3BTRUUt0#Nry#Pp|Y+ z)BZ;BO1z($aaW94U?<*Y*APb0R%Aoy29okP+zQbfu%@1u6W}r;ef0>o*Zp}Qh zc(*L}D7KoEB=Y=}qwIK^l+o)pQMj5sv)oyRr(j54*_j#Q)IBGgkRHO6RAmtXS)<|% zb4bOIuqRHUaSj_OWKNRF;5?e*P-IC4-7Rn-tx9;yF%)+j7?^Fv*uO-XGo3Z?MRrrX76pKa1jUqcOwnorzYI97%Y+jtRXUNxO`v zylLR;iE#2wvcT`;C9u907~)D{XUJ)fk1l=L)z=imZ?kp)&~YJ*Wy^eCl@7v2`BMwP zyOyp+IdJ(Y(GoxBfk-txooqql9B^MO?{ zoV%0X-|hEehPi|NbL4or`U#-QrialxHCCm&N!kgmZefu8gte3O^m{JmP;J^czdvZg zeRhxWcb~&Ab0La`uf~`R1lMZw+6tWu%`Ymg%Kgd_ts=|D{C& z4i~@%9AFN{wSn}MR-zYM;^!d+F;*-yWqpV zZArFQ)-Z3E7cpxiWcZ3Hp8$nSbI5q$K%<;Gbj)uMILSPtVYY~D|FTI|v)rpCIiif> z8doWKI&fPQ8QTMP-l@D-HyMW}kLdx|>|qcztStqiW_YOtP&^Y{tb^(Mh@qS`mW7+U z*y2FQG`wM4?N7+{pl$M4wp8z&!+4vYU3S;Z}I7`t5`OXAIz6;K1rIy)dxg zQN4b`10Wi=x#IX+hImj;h=EnpyDn53`0NfX(5t5B`@rLSEmu=L(QzK>$LDmWTv5yJ zQDaVE&+-L;(9veb`fyAj&!4kg7zS%Xs#m62_dgpl0eYYKXgC7-_OT}2m-Ohj9!(>! zZCH|K)vxPPsb`PcO$YEom(6<&x`E%c^COb{PJl>OD~cLFmkwHZ;bQ=)kW^cP{e|az zv67%2LP-}nkP_d;!A3NkM#xw$U#l-dz_^V6aGwVjQ$V;eM^!QdIy&ZL-o1Owu#2c| z*Up6%hN9JPJOENVcLpX9+9vMM2KBb~#y3wKp_^(M8zRHPS2QcUQ&|1E)4sRJlJH}_ zy0}-xq?JG7Bn{%S{5#j#A2y@EMTkvoNWjQV0?DmRLNZ1#_YANAz(Ja?4wPpN9xgjE z%q@(hO=iB&lw5B=(6txu24NUZA@F-yS%PiYXOka(954 zkhA4s?`uUb2tvj)FxFfIw7QQQv0rq?&XNF)%D|t&wSmHRatQt6PfD*E=f53Odf%uUEOC z*e(7mMnsvtGBs%OwZ8um5F!m4}TKQ2zlNHk@>cO!5xnUq5R%(7`JEoJb zf?bMzfA$po7x`;;4M>h4UQ~fOp#oUn^d=eD90}N%VK^?(y-v|KG~vB~pBnjDEIz8tMae2JaKnC>{xs+tW`||rFu!$g=ZA*x~J_x#qX*q&m?2(+!6c9 z?dK#Aa3%s?WQB0K3Z>Qb{J=m)X{2AYYB(>=FJV%3Jvi~XLLqV3{FVty1{&HPjyZz<)-`hJO+?N##Lg*VTuBhH`O?d{;)!J#y{5`2}RTj%%D@X5tR z%{U4mdS{A1^pC*1`4qOu$jX2g?=-rX3?yDIG%XG5_yWQts^e>ptTB zW6dxPaHSWeGf1e9$rc3q<`$h~u#jzjCy8rz&V4?U<@exqm&&uNCL_v>Hgn9AI4O8;qYHeb8Iw ze_E$oIRg!iP^$&ocsSItqWE>Ng@`NFv&dhen>a3<1BH;xqzgdumOz$ULgGyWN^sZD zwY3w)@>WPI;eXmELdp_kTI~;t7icKeClhq0`B8@=OeZ&_kw`l%=DR~mLvm;Aqb3Q#_JW<|h2xuw^OAvNcVg)zVCdw6;pT+RF#>$e6UO5dvWYNH z2M>|p*>t|jnOam5OS~A)`=<}Oy1Wlm=5FaSAPfl@RSAwJ9x!$C(qClv(85&8kyoJ$NYC)33uzQJ=w#`h`0&JjIba9mmZ0^IE^E2hdpg2Y21lVKM!q3h( z_&?W=G|{|Dk-zR@;OqZ%{rJb9>}c%dWNu^nFVEVwhP>kj8=OzCZu~tbew-Et7dW;+ z=0GwF6v?@^F2{flAENqYyb;v*bF$SKQOq_%U7E{D4#MU_tL6Eot*bH3E{&i|697K> z17)eb*8vvbD}A1q%kfsUtp)1L=8JABTebr37QiK#6?4-MvuZ}i{-`A%>1x7 zwmCi1oZ`StQc~0Bs}Fl8kLGH_%T_6D&7U75bD8WH6Ks~+-5#x8$(K?QpGD;hGQ*vb>6X-}Th90?BnIe5MiH1^C;3*__AB+6262X$ZY2Z}vFsbuUUS|+(i zg0m6f%6eWe*fT4gGmp!wf_7>zY9jgQ2Y?i~BWZqxF7b0LX|(!{5q+@_{u#;gAQQhu z7B91|2}PicM@OlrfUk-9Q$>>iJ7&`TRzC^hHrQQ(0&NW&{&5>Bq6mD63MZj2&9?4N z>ublOpNP0jPJvgsx!W;heVw7Qk08J!SECpM*o8p3rPkn_N1sLk<=@weKh%4!Uc#tx z1+iec$yBo|^q*K>hql84E&kZaIC*`!}iq*l_t!9JY?0fM5m7o zSGVYh80j276`vll`@7jmaDk^O-+|)+_hmH0(OjTlZZ3y|^q?f2D=DMN2aNz24{9f@ zq`?!3M<8r?rd5n%7n>Tau}Z;9i3*dAQ@WF9QODMT!WIe&+*d5VW$(SoZJ@L+4=(h{ z`jkjH&Ge zodbe9)fx?lv4YBJcv9uP9$QyA`GW6IWZ@;^95MP3iztk|I&t?)N#{R5aI-3CNR)C1 zRMB$F6wt?nDaDa0BmZX5`<77`G?^X%DwsI!Le=CoM5H2%FW)nN8uRuY8ObtXrL8%n( zWS*2`)iTlKAAS!E0*|-zagk<+r!4)GX{3=^9B`ddfGAu(73L}w4+LzM+&r+q@D3nO zw_qQkkw|UjK&rAvJQa&BUre;Yn7t-Jh@;-;h7*b?L-v{YvCur>wSqcK%A+|piD4*4 z0LS`kP++`5FTT@}Agq=;T|>f;DPHNlea7!0w3BQOiHi4>_nRBjS#aRK6I%Ea^q=F% zT@v<>GB5x@F#dlXNBGBP?g5hgw* z8Zi?w7AhSk1Qj6$9X=8{IW-v>6Acq5D>WGdEdvJ~6&njH1&as;uM!KVAh)0dyNHqi z6S*)Ks{}u%3O|*S2&cXjhnT3ih@_IRyq1)lvZSJxvXrR2jG~6Tq^gRVq?)Oew!Mm` zk&=#`w1KmxzJ-Q~i=K>#iK>*oih`c1nxTfWftH4awvw5Snw_?^y|%obj-jQ#hKqrW zv$2MmshP2*ldYYNrGuNZrLmojy}P}Ii;Ii4jlYgVsG+sLnM;VdSG==Vu$@o5jeoMc ze}rpjnr~c=TU5SJe6hccRc_R#9wTWpqJH zT7G$QXSmG!h+(8qOz)r;)3e(!m6sOqN=9yhK{DH zqNe()uKJ?R#>(Edn#RA>+|kt5+1cFC)z;YA*^yq|pVcr@(mGn!GvC}j)X~>f+c#U) zztlc3(KxhNKeFC3x!pN3-!;C}F}2wET%} z5ql?+wK#0OikPXLh3xF=FZ$1drXjE|a{rx4hPZrna_sxsuijH#DJX-7X;`0BgxS zuN8`J`Z3OvP9Mc~+6@{4t7B(}2OP}I7b9QwY_;0CYLj&^ZrHQ$U2lcBX7M4FKjAh(2puiMfCpd4OU|sp5hk9cT!l3w%;uv^13}&x16N)@fLiniaNL? za`)Y}%nGyRN*XSh$eR7Ufr(|1((DRzm4?7_LRLCF6O)X>fqAR02EMo z5HA@B-ra)A6c0}p8U$0t&z<^e33!YUa2v<<3n3*KC!GTc?)Rx5BiKe)Nwtw5hiAMQ zm#l~kv5~gL8CVo^R|mB6Si(Q*Hxmlr*t4;q*%U4C-m~s}!a9K~<2oP3 z;RUHELrhq(8I(F8`FOB8ias8u%n8&xJxWCQO9`;YF7+DCtuQU(baKJYuY9i`*>t+% zKG=k-xcV#Zl-M%gZbdi(T7wZrRp{;8>>cnk9=^^;gzU}M??N<1Zk$G(STOXs9Q^c5 z*VyFYA8=wgNEkT1t$1Z8+jWm*wJ>@1;_WXg1YjId%-}L4*1Z&k#6WKtvA-R${rwq4 zEfG9ZDCwq6Z*fe-RdNjJb;++W_+nS8Inu+*l^8e%ngS3;jBTlzmWR9mS;%!ANU;_W zketiGr#!=i}A^;-5Y19qQiGwJ>kQ6Ehw8eNI z{S;DqYq?6;9QTGsg8A7Cb`m|0kG`|R1yffwuHZ|Jbn4mkS73bRO1XrxWW2FENsci# zSF!!imxJ3wi#xh%l-H_Qlty0BV60yLbYZRQX}kB>cGVKku*Po>nM@}n=b^dao0Jy*eT~2JXLv}p@SQA54k~{YCH7|B} z_eELqr)J#e=WLGD#Rh%w8!rm!5W`?Rrx15Q#Dc47h-b{Avon={3#cKQ+yfYfG$VP0 zP!k^vir5NXKF@bq7gGoxkA;uOVNNjG!75XF7qvTR_N>kFIGxXfvEqa%2`itsD}6wm z$Gr(+U+n6vVx~l2_+53s6o}g0VAZAvWaaBk#No(+yL!64a+JxlFW=-cp$`%G(h!bZ zV4?^!Oy4!474MN%T;g2f#!9m)ND67$$FC6E*X*6^jGz+M1V7m33K6oG8=3eE#I$i4zD5^0%wb<*QX@KKCyV_vYe52q-PED z2FL3py^}ypW=?7{;hcL#^OiaSic*E-dcdb>$2J7#z5@MpY!`GN{Z{bryp1Tkl{}*~*dYCwOL4@x4Z>tT zV53ah$`NMm1dVz8qX>Q(C)zK{`l)9Jv-Ba``%Cdyp{e*%C7POc-`U(!42LX&=#?NM z;5!VbSUd(D!{pl0ePS!kFijmPJAE4=`>$b2T0HVrPp0_G1VIET*eN94?eeWJ;V_R* zSna4dfp#d_su!kI0eEB*>`EGz#SXsVY z58Lr%DS7sQC~G|G`pi|ume&XLkppR^jhM8xuQE*p{$@kQlrYifhP0Y07dRi zJjrZccD;$2e#lt*NYuj3xU@kFx_tF?kt|Kkv+3~oz8UvET-F+E*`Xrq zb|5HVv{9+ef47KQ{mKArN~J;KmAi$o#*O)p8I!)ERY{$$^3lUqI=ZLH|Q zRzDa+kZ3{4TfB#-@$>%fkiP+soTz5e5y2#$)labA3}H3ikC=YTu16Q)Gr3Gwz1IbI z`H-M8B({62<^|lj^vq4;!-0m{Wu^uUBnD432w_UPhZ*>ce-ovFTwvZ{$*|oM&{_Bk z0_`QZvkeroHEeMpuRBpm0IBLyf3{g@6x8d)cLcL+(zTqp^{#v(kPlM^1;kG}0 zY1|NI+Ef$)MoVqq^R*^I!AXoj;35@-f-^)l8s}$0uTnz+3e*z18*3v$ZMbYRauMp zJ8ps=xf6^2Alu`U2g zTe4T7{nT4BmkFy~)be2>{*Fi^dWGyLQsuWIW_GXn?SRooVMvtV_RNxjwN+>ceA^MBC0wc8)$oZv_38TZi^0tYkb`^HU7k1l zbx($W11{@r<|sQO%!!(tAo;u*8 zI62UH*k}~c0FP1?%7!}ed-@$+T|%5BYGMqXqfi%N%0G>DQbyM?T~_sFa@e-f=|@Sr zN2seQ>`;zr^oTdwI&6HrTNgqETcb_$Cc`i%T^1|vU`2dw=KABWdTzQ6Fa&h1B3$N0 z0xJcXOm7P%VKQjKGRprTQfoZ1B!cWP-s|%bGG%{aNUwUXfBI!X_pV*u>iB61oh>vbb5lGhX;6D`JvoNoH#0qo`NR&t0W)|9pqE~m#f_3^%}XpV`5-h7yA<&L2cNu+i=8Ut96Px>)-fecdJumc!FT0F^XZgv@xf^!}{x4{x2>{AwjY2HR&;WRnPhay1Q8 zTipC)fiHddK52P?^-^5KEpi#`u*Rj%n>%S!lsLdQ1}GJsLsa?$Q|aVZ#%`Jkg6h%%xcLC5Dw(kR zREa%|l=Jd?bg_fOHOLmB?!jE!<{MZgl$p)dM3wz+ZCFoL(~F!?j=trNHW{~tsQgOh z8jI|b9?eEidKmJ&5yU9Hv+va4(5^rR@Nq;*c11=SGNc#cJs6+jSDCA&!0)O{hO;^FI1A>xCwy=2sB*V=C76;>pybX za8E8M>#$H1?cTGH6u$M4N05dFM~Y_zN)hA8G7)V;qvc(BIeKLlk< zoxmg7{V3H`WNz&E!AXlAZwvp}v?r2Sq{}DtXHE1#aP#RmhYSTbde;GV zi~&t@o?Np~feDRFnYtxM)0}@dT8}`K3?^fsVY+!Ws~iqbebPEqjEpqm9Ti10MoV(FNe`Ie!L7JaRjmCwk-K2_mwYV~J0Z#{$S$i<3Jv$d!dbyuUDNYI5lF@r ztZQ@rrk{zYb+Ti=(k@cr`2#)!-Xz`X<`{F8EY>fc z5+!jq_wL6>@`CL=&X;x%nyz$YuBPdqEh=(tUAQYc)p?t#a# zxT}spT+r*3bi~m`t)W=-wF)5-y}C}6ErjS_Dmpd_rB&)uhK%#Gq$yU5rb+E@Ixr0x z6^cfaCc&9|bTOKF;)-?thJ;sgJ2TN!Zwubz|+bif$z*jzmQJC>+NJDkdnlH z9-mVE9O0cU4d^}wP3mHh5hzN?XErMQ@LF^&3?4apekRx*E+f4<7S%J+1?Sm}imS}5 z2+3Hke{E5p+1V^2geBq$hbVpIgs(p~Rknlp<+=TlojdD8W3&VRGrA%I8Esc+^Ej~q ze~UklYZXU2%ibU;&E3^j?am32q+=sNM5TqbX}=#|BlBpxf~q=A)Kk+l6ZB@wXNes5 z%l=)m)gE_+1@e$C?5#LTQaYUelxqq37juMDPGHC@bnOgK1zVy=dI3U6pEp=*`#B_C z0HZ&hK2wzG0)fj+m!(6H4>)cRghv>C`dxww#yK9V?{RM&h)tbe0ToLJ4$PhvzLC6= zAZPTW5{naIC_QUmF({w+V;;;x3A*L>fR+b|a3{^;FK6w0%Cr2*w(#lNB?(BjsE~7= z4k@Sn_YE@bpz1vR2ns{#-&1{|E=_FH10MR(igMu+6lT6Q&B-V$+3o<;r>R-#J$TDI z3~XZZr&dkoHqz&bBbux%w>*$Hk&t%hi!vfUsihy3im!D;+Q;ngKskkVCPacK?I&*I zPcB>JX4*?-7)ypw*E<>Jr9C4NrIF*)Yx#B07Dfp!<~pwd7{sX8n}#*v?~h-Pw>U+aD)bYSJdQcrDximX zhrzd+U`n;~d`qQf-L~|3@M`}I1w(z&~JRjECXsZV2RM@ zTIq?bsiFNkQv2k2t7eTtkym>FjRlt!I6-$L3l9;GSOI#{?`s{y*z`8~1w-0sD;-F= zkQ~Hujn?z-Rv55|IEisY?!V2?SA|PFuoq6=rTIuzbLkVH%Z&E$IdXHq zhk@mF#1melh;H{U@sGi=iP+a=5(M8;eDF4&uM-thO@Rhp|LR^+r&%hkQ9|grt}fb) zJVcr{U)KTKnGI5*7{e1lI#=OTd!+U>lGiVU?C4*eh+8i6*Ye{-+MT|mLm)6Q{;wh? zC0gRW4R|D%2u-%PU-*QjgLRseXJz0SIv1(-#6ct4R@D3S&X(p{oh>4!B+cT?X>2{@ zqMgv4zf%N9H|UYmA`@3oPfZ>Je7hO&*Ho?tp8m2In4=S2+h2`%D>Z1pFZr@{-T;2mkfL zWkqT!SDqTT$~@pcmwq5o`(gcXUjU?S+MmI;L*yq>tHJ!o>hP=VA+}@u-t1|QJ|h^M zy|?GL%Ud+Pq$@@J@oe9#htQe86R0#>A^=GCtu zaQhe|H^(>c26jipx7l}nHx_r#KDN`(`z|h;f#!jF#}~jydUJH13%*#f&he8tQ1t@$ zDz_QZ`Q{Z?X7CNjE{3;jL%YD{W%HW#_ch?;D=qBOTPu9hm*#Ja@6x>5L^M|EjPeFi z74~Z0i?EwqoXxPyuC4m>Lj%%hOUA(s1^kg!lWhRP>w7J+_EwnBn5kk*qhnH%+q=?Y zj%39tPKC2~IMh)qr5_-px_g+#*E#Q_Z&)kay%`&_;FBy@f^Y1OhxOLhDSwU?Y;8e18}9gR#170<$P&5aFiBHz#i7|zB!cH)e4eA4mqaoe+SHz%I8Ph`t?8jdr1yO< ztq-ily94{IEpcv$OQF{JFDMZ&q+wGHng>s-di!_){u7B$Fr&jgxHhL?&?G~Y*^hyF zVXziB$us+jR$>`W``y4mPU_M-)S85L!YctSk4F5e#V0ZFt;`fBf-5_r(kD6=g}jQC zsjx%*g+!SwXmZ|t<;7dpYA(mku8XAanypHyM$!OS<%-Bd*PD$|+1{AAhx@_)+qCA- zdZjQ|qUHQ${ZIGmLe2DTTFxReh)S3d0{N8_7np*|@1aT>&g(ExCDv92nLz6?73b%8 zpl6%&Kas7KA$mm+s2MG)r2g>3F|JCnO^$y< z#?TS@^QIR49H4V0W|X;~5SrK>7fc@ppTvd~8TwKJ**HbCuJ%x#>^my;<{{gr2dJf; zV<0R<*sy4KYi++Wa7xd${M$q;m$VrK7Tp@qGS7|A3hX}-w|9=|j%T8m-1J}EaQl9` z=>P1^r%@Q0vC4vrR`A?dHf(NAp3^+88aZTrZe(-7+V72s6 zqHqNto#M`G5(~B*sM_#-v!nVlX}Xo4icX5|8(zw0n7ifx5;(y4w#O9qX}u{5!z}2> zNSQQVc}8Cr7egFBaqV?ss*=8!yBp_9gD_g)(1Y50|6tm(3V9Mthj~iBy&yzw)-@{Y>;$oQy`wa^guY2bw!Ca(B zs;6qMRMCZ`Y!zKs{5rw={xV*U`TA?%l@@SmReJEy>n!y%A7Uny*52D$a3(YpCAm)K zw9w`j>E|oKmJBT9*kV1P$Nh4FP_E7_LOzabngy!?)WGch6oX>IGG}9jBrrXW)u!-R zXLBZh3!ucG29}a-M5}ssAj->TVP0`tEt?0r26sizmZaiAdp~mWS>5(A{q${VYTTT+>ZjM|+>aCCY$% zA940#-iaDUOkl$!g}){Zu+Wn(@{HQ^WlZD41*Ekm2t}{2Ej3kWi}b!#gesJ`U3UDK zsYr7~ZcaKTd2Fc`i{Mk$O)pce2w?nKdJ|*?zavhO5N)dse6f|ZxtT_~4A|&Ye3pXE z0vf^0c?(#|E$U#nbn`nq_OH~MEDM<{9j7rl_>)N+i*2`e&s>4}{!;lmrQ`xL=N|-J z7ezHse%#u*s)JgCXJ&-9Mvh7*ZyI>BGiN>B?au|17uNKPcYgfN%`7sZeAZj^=stFkFY8%XKX_TJ= zw~yz9jye7h2wJ+CxHvkcWW2`nwqM?YAD*!0lI;T-!_;hhX_zEvf)Uo*IFqsF zZ0N`J(!PgJaelvqAb%6_`qvdk`y$YYgmo!QzIi z>`!oF7h^zWP--LOj3nsw%XSk}^tVXl9!!N^DJ&kH;mg!3Kemil=Yw)C3%le>e^>E} z-%M)fvoURwFG;Zhq%x-bWGGS%j^s*dnDs1B4b(}Z4Qy0lW@BHass*VvU9qe=sYf8n z`K0r-U@rP82o)Q#Y9$*hJ>(A&4hn9|aODK^={$KTo-7SHG-kAQN30S~jK5FVxAXk| z%2NYDUqL+8io|_$7_+T)I;e$~hfzzFEO9Q#t2g6K+-(`x3bj(ZijM^uCAwgZAmNYj zXje?4&5q}qBOPaA6Gr$jb@Un`{*qhcSj}FB|GSB&&7)K6kSnww12I+_&UtA|jMI2u zNgDcKiOA>|9fKG~loi~ff=CN15n^j3){B=qgyNrrnPt>b7T`>TV3OggZO}y+S`{|K zj7%X;dd~_N0#!2tXy5t9;qdTjwi&f`>-=-h^X&CKd+nKL?Q{0o>r=~xf7;M`Ex^1!f2E%S*YSBohiAW+ z>|7`C;!FY^10!ll1y{T2s`+5B1A<>sDl~MM)lih$Aj+;v=t=X^_Z_iH3Ok`EN(&=7 zn9`;^GbukmFOg`b-tyRR;g~JIv&^WQv`t@(2Yn|xED+0nMNp!Hl7dCbzJ%GN@yF`> zA>$Cut2o`Ed3|wUQyHXi;I;G4j0?^X9Jy>K3MtQbAMdlk)6HTQMr)Cw^C8j1rn4x6EBTe z)q-o*-POgo7VYRe^OP&G4*d`okJXIqwQM{<1|gh&AhqCF&MKiuAJ>Rh4pn@(^Vym> z{qT{sfRIGweW!iyla-5p*G9e2k1pfP0NF$cc8=RZ!)W)CX))_?m_+o07{nv*ngACL zO3b>DWiyd9{BYvn{i``mV8;`vfmY>OkFM-maAnUl?3}bXl871$jzLK%&qOTb$~kd8 zSZ2h)GxyLTDPTA}aNxk%*%o@#>y+!5JQQR7;1LO>LPeru{r#n6qf&tG0b8u19GGFo zVR@Y?D9S<9ZBtoZ~g^Ow;O>KZkZLQBFmat!DYWGQUq+RadPpKmTEvrUhir7#j{FxW%b$%MyLiYV@hExbFMDIjrg+kV=AGgl*c zZHor$84&0*@aSPAYkqkdT*%Yrj#yvm)PO5ap7a}$?~&R87rG^SZ#^U{>Bb*WzOHDZ z(i|$#h${4-9u8zFPXuZ~P42x9*?U)=0Dx#NPMO|O0M+51>sGRtHs`3R+SvQfwlV>P z@}2@%no8-!@Aj6UKu{nz)ZjO7PbrE;2PDvg*q=YLq-4zDRNGT12GAv&HKo*ik4!a1 zB=bLeUTVYQjLVCj=metOoyH>n2Wp578_l_GkqAC=)el7eSY7^*9vOejGg&80>lu5hhbmihV0J;&MT0W_t4J zk)0yupvc;!Keb)| zq-SSZJca&s!{VIohPB(4&8B9+#?F^QK{%GYa~6ossbN=vm6276GW1L4#?yj_@gK!y z!p%?t;l7*k^IuW|w~1TNA_mHKXEgP4$SF!y2fDy}xt_Iv5|+O8ot&N(t*M?T6mPHv z|8TpKWx6Nu4VNnc&4Xiwn*O?Gq!p2&db(*cvXar+3Oh3nB7G`yX?8!p1ueMjQ@ONM*V(m+w;xYQE*JhLe37=I66DV3y&39D$2 zy=|Ih=Ycp@M@|o9C$)C;&7Is`FIr8|(qR}XI(Kih%M{d4`}#SbZTVn@CShqlI)S+n zh*6~YR3R)~V0{p`y6_>--K3eLi9?5HO7FMS0OoJ@Kh)4TuUWq&op0l-yGiYNzfCRy zr^a$IPBy-;Q`PL`g_Fkj;vR=8OLF?|+r;ve)9MAC%S>hjmG#G^9s(9*cq|@XZx;wL zS-yW~lv@)!W{qY4U}1#17T{}&GU`V#G1^S}{32|X&eH@HAvSD!LdVv8P+T7#e8a~o z#x+5klOrgs+HA4&ePteU_;&SkOBCF$J_uJgbav-n$W=;v-A%m8n5BdgHe(zduDJ`6 z*c>YQ*Eg?jsUI2V(X99~^cva39jyNP@_7b7>|<8Zv}Z*2P4!3vq8O|K^-`X*O=v-1 zzMIffa;ea=`y?lyR541Q>6Q!?7Esg%n8SU%N_tm8lct(?CBs>d`AYn!*v6^2d%$8k zVmUInA+@e}>E&1cP0<-5ViadAdc#%((d)eNm|vkHo4w&FFEvDqFfNFAL>M`*S4>ph zc!+8RYulmCgwBs=UK}rQ+a)sh;H8sj`xyD2tY7!Cc-&*F7_e52s`q5ja@nU6&wifG zFZDcU2Ym`Bh(o%5im1{jUMD=%e18Rzzwy!XZC*=}m)yM`AOcp^5v6LF0^_nCplv%7 zD(jBRl&gPWIe??{#fDN1N4`ChP32L%cd(@qMpthx(xDY|*&fxK| zi)jLfFCKU^DXUuSy9nzltdN~Dq$j{#Nd<1B(gldC-5Y{6$V-DAKYy)Eok1fB9&B+j~t&5 zsJis)oXkv*6GFPL_YhsV_-m601=L`Zx;53LgDd#;5MI5g=;%(0iQ0a@uI6QZ`MW2E zLc1uj{DB{ythFN|O}2cNF|WC#bVp&Kjg`m9g{wb=OSij2e{6`Kr4YhUmcNWo(Nmrm z2kB=44kroB*)y$@c(3N(XB(S^Zg`#f*;!qAtQE5=12|)70)`IPKX?k+^5|sVeJ7QS z1M`4`O>}p%eCKwIlk>7?brJ4(=%9)YJxo%rCTH@LWx5s%%o);8`clcmd=u(?I8^zg zp^tLM=}ELQVt2q3>xAjkUXMbbgkURKbu8;srlC zGBF-h_fMQ<^5-7E8hPLTog4vtvscE*NG4Z8^WAaXgbCyYJdT;Wo>UiiVq>|_+JXov zA5nO)*U)NUlaZ9RQN}GiD1y8-HLn)@rLRgJez0Ty zcr>gNtN5sC*oU(-)o`SSlH}b)V%KRNS1@PG;dk37sZ4<8;41&N-CiI?nl>=!LyjOr zV#7O0_h_N;HD5cpv+kCl#wAL1Gz&S=WH^xPVENNKAc?qr* zSB)@5Jd=bl2NIJPman9ac$E~UGEz@Fz)l#}XTs+24I&YS)*+!DeSl}Qh*?A;Rnxck z#6eg`_aTgAK>}t(l5R%AFAI z#h={tnvgF7go9p<7j<9f=P4ux$uikdG=D=q8k$-@@YU8)AUEe>DQ{+ zWq*!`)u&K`lz}6iU9x#*SEI8}kA((cK3I+lIac+E{ej(Jp4yKxAS>qjXoS5k*3 zoMM!GoB*M;Lxf@-QHMeSx?A}REwigF^+Z3NkX>j@DjdE@y_2G~}+6 zY7{8KQmeo)elHm;#8>)=-yO&6&3G}82NdpKu|GZ1Bk&w=mBd4UCh!JTlAEB_80;AK zY;LI8ro*5#LPvy`uil0|Y?@&zENr#k5yC*yQ^h}XO-Hl3mEubMwnrwqlyp+zG zt^~x^c4jN_j}9)|NOHKFH+2^SwtGRTJ?x#Da1qFNh)p63lL!OM(ZV;E)Tqg>8Z<~( z`PZ;s_%#4477i+B{G@2rv#Nse91nmclaN-dg~-*+udvj<-6{5yb6!zC6~95+vXBbEZqm|wjU6X16d6rPjW z^)7AV%sT1@V0315jMdmo*A8QVptYb{sGlI5raC%5AupEpB`3mmEqcLYWB7FI3rOhO z0s1<1&%oIad*zQnT17U2HHoh5F}v~lC+OR*{M}a1wl|WV@-I#l+zHwxJe$@V*mvq1 zz0{YVq!3=-j-RMwa-vmp>FM-d)=?+(<~+J4KFl|{;EQ(i)}!-U+2pyl6fIx-2yuCk3 z*FqBGsyW}Ci-O!6=5TflT30yYq0qj>bFKy&kGXwhGh^)|XbTjIi?#}B#S;wE+&0$M zCP%zk4b|Gw)uz2D`O@WWgss>5ul)y98kwVFct^8xx@P9^mx(i7%wr@1gM;>Hu?u~z ziRh(wUNgg%kh>527;s{leAEgta9zEV^C0eRfg9F^$#u|hMRQEBMr}z9!?>9;Z^>&l zrKNjE2~b)#g+UyXRr?2qNj3#A$C!TV!3(R$GVaJN-!;qeWf3)O71B!;5?jo@$FSw2 zOD^D3H%dhAnBWsLLAG6|v=~L7oVy*wr4JX*a$DFPH!kYS&$5;+I)7G0(bP)dUojc$U+VufNJ>?$OI5lNkh3Oms|m@ik6IRP+0RAW{1?*#Tgc z#q(r{eZRi6o%`xf{ReAzeMEZ{@01Ab6Eep;e)>LR5*rZ%;toMHcCyigH>54=)zKSL z$LG2t>b37PM+hyG+ePfs_~pj0QkeRTy>NsCZKF#KjGY&zHAm*f5zs%*iA)lb_t3xu*L&ZjG@M1m5x9aUOY<0RtSe6COBAFyfM^Yr>i3SV?(rzvo)Gj@jxE5-~T zR^fTCeRglfPo7)QYBkFxZMpMuD@B2&^(SKTJ>dRsrW(C;BIj2iMc zR1U8~+Qh7+1ciCopOt{5M(?%c*Rhcec{U85j(GlS>g@#U6$mot z6Ex3i#plHm3$xX4kELZeg}94{8sCrBejRmQ``#>l{xX4H$;)fKXmhi;#oj-__OWy` zg`1cIPHRz}EnYuv7+QCM(dLKYoyQXDgG|Ymywtf)!(=WqS)MQEN2|${?;Q9vP1lr7 zObY6l>l>~vkFcc)gp`6Nv7Z^8uTk<)R)3CaESqn5t6$@UoNd=$r?Dx;Y@B6yGOhI< zTYC207uXT2tC1(qAnjvW6**cUai41J5kP@>wzc!XfL5H=-b#%=Ckk)PJz0xsU@5a{ zV=-&KiScajYwG+~KbJ_r`>}lZ-o)%;w@03+_?34H4C;L?j1?pj126TYMai$e3gcpe z2Pybw9YicQ_ymZDhDKjzfyXO_>jc^}yw|uElymvg3XB{5*e*;$%1_x%(tJGxFKMuC z_Ixm}2(LNipKv6$%sx6qC!{}>55K$s&&P;uxoppQ=ZM&Eqosy=0%rr)#D?VE2vB%W zvQB8a6oa)&)omlx%o=IpBt}Cb3>xo05tBi<6aJB~&uwbdm>6r6#_F#=TP8F?X$0%? z*`)}#;OCk9S`xE|mS6(ziH;{xRGa7RwV8VZ+v&;Kc7ahEW-AO-82kpEC2N1xmIls4cvyEPMX_6-v(~Q7h(r*AjS%tky$OkWrB?e`*@?dr~7QvX!!omK2AAiY(WEh?hu0QdzRH zb0-a<>myI$k9SYqsvu*;g;-!~*M!{{rjbe0jKImBtKO{w#TTyf4PU{;yP5uRfdG=2wRv zdOF%M-m5tCG^Sx~`*HT^;*afMHT`sSIT`Y&4T@5O%7V{Z+HJpFsOex=v`Ugxjjp&^Iy?>c{_+5TSXuRRj`2c=)>w!hc; zStfgHUjLx;GqQh^WdE-!@BcyNmVWztmA}Tc_zx<-(r|yT^VgUjqWK+&}-se|rAP9{q`iy|p-GQ85zQ{4Y@K-@SeXyZx+2zBM_F zUn!-3_xqK|@DuxXYc$AiTiin6{_gl|?et#*%temculR;PJ%3kQeS1qzs^3>y|Ecol kF#Vc6e&*#{6Vm=omR3{5K%(zZQ0^lSD`e6s)%kh#KjD2$VE_OC literal 0 HcmV?d00001 diff --git a/data/amino_acids_properties.rda b/data/amino_acids_properties.rda new file mode 100644 index 0000000000000000000000000000000000000000..324606a7fd50dfff0eb1668b9c5ed54bd83c0bfc GIT binary patch literal 507 zcmV)m7y|6?e_sFR|F}W`fG`XJ06Cwn0|BrBb``aTq*D_n2*#5DBTX0~f?_b4FdCT}U;;dbr>TT(Of@o-)J#U18$vt? z6GlTrK+wbh8eud5$kC=i83c+Xr-?B%^d?N6lS2Rv36o4CA&A3B!J|V!1P79JS{Xph z5xdo8h2kk1l-$YmRnmMp{VjPz0uVr13zz^h*|S^-QH(+2cY9 zjU<0)z!E1kgi)hqNZ^|X9r$tYc&XRCo+@%+(#j{hn;2SJ0U}8-6KGuG>l8v%g8B@b z2qa5l5}MM$ZGae^rle9ON`?zgvmy~|r0#G?raL~0`xGAiPptxFuAki1osOH0+~hsi zz9kz|4HM92W?-EdP>`foDKOxHAb|i%=^RadO3Z;RG}$?2z#^OTZ^2oN!!W2sh9;R( zL{P4`9Hk+bU^h~gg`lYg=XoazGuK}%mhl<%eIU(X?`D9`pyP;?2yH%DQ2mfdVase^ z^9@boS`JH8*K{!mVL&Ebh)FEI*fA2RoJ(Gx6nd02V*8SjHhz`v2m#xX%cT5DyQrajVGl%kYWu7 zricJwjDW}h(?BIXBubiS00000XaE`j00YtnQbC{q0001bfChj701W|D)QWnh#Ay#C z4KxOt4KxEl00TfgqfSAnSA72|R77@U6kSbS#ZX+0%;N>zCRrHPBed3uhVdqn(ZIz> z<`<@t^HUmGZMSn0BFmO4#?s`2S-4t|T^lTzbBLMEw&L9`(bFWkjWK#J+ZwU z)7pPL&S-{O};l2#xQiA4}d9aZt^OL{`AzceI}PNWsk2>^vq0R#Zi z1d^If42vTZLe3|3gNfEgNBP9PrSq+ggXvVHppunJsZn$Rbfqc76sqmYud-pTn9=6h z+)Y;?+{kMnR^~dUlil6g_-pw#{(N2T@<0gj<9izpMM}P>wxf-nFCtWPNXVe+0Z43_ zsiQ2#IBVPQivcZBCAN4l`4^AP+A&As%csj6ckd0sDx2k!xa=3wT*xf zL_e`rWkf_pAqa{g3g3drq*PL(0}pMAQ@z@-={EB0-6gKPt>jkP{?6|HObRc9FYBXo z8xqFTS1*z9Y6i3SWo6uME*|ET<6LKGe4eFs@6Ror&St%gxw74GI5pX`Gcdufi(p-5 zEv;;;+Lmg$Pg_58hkGA<3sD(O-`rLaxyP-_?r)^!K-03wk|lDgiFV7aVH@dqM^~gy ziDcNfO=t*qGDsBh(Bf-aG74}3kZu;CNS8+lqgO#(T!\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\arguments{ +\item{lhs}{A value or the magrittr placeholder.} + +\item{rhs}{A function call using the magrittr semantics.} +} +\value{ +The result of calling `rhs(lhs)`. +} +\description{ +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +} +\keyword{internal}