diff --git a/NAMESPACE b/NAMESPACE
index 12c299b..d4cc69b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -48,4 +48,5 @@ importFrom(stats,density)
 importFrom(stats,dnorm)
 importFrom(syntenet,interspecies_synteny)
 importFrom(syntenet,intraspecies_synteny)
+importFrom(utils,head)
 importFrom(utils,read.table)
diff --git a/R/data.R b/R/data.R
index 5b418b8..d92a959 100644
--- a/R/data.R
+++ b/R/data.R
@@ -102,6 +102,7 @@
 #'   \item{dup1}{Character, duplicated gene 1.}
 #'   \item{dup2}{Character, duplicated gene 2.}
 #'   \item{Ks}{Numeric, Ks values.}
+#'   \item{type}{Factor, duplication mode.}
 #' }
 #' @examples 
 #' data(gmax_ks)
diff --git a/R/data_validation.R b/R/data_validation.R
new file mode 100644
index 0000000..e0c84e5
--- /dev/null
+++ b/R/data_validation.R
@@ -0,0 +1,37 @@
+
+#' Check if gene names in set 1 are present in set 2
+#' 
+#' @param ref_ids Character vector of reference gene set.
+#' @param test_ids Character vector of test gene set.
+#' @param setnames Character vector of length with set names. 
+#' Default: \code{c("gene pairs", "CDS")}
+#' 
+#' @return TRUE if names match, otherwise an error is shown.
+#' @importFrom utils head
+#' @details 
+#' This internal function can be used, for instance, to check if CDS names
+#' match gene IDs in the gene pair list.
+#' @noRd
+check_geneid_match <- function(
+        ref_ids, test_ids, setnames = c("gene pairs", "CDS")
+) {
+    
+    mismatch_ids <- ref_ids[!ref_ids %in% test_ids]
+    mismatch_perc <- length(mismatch_ids) / length(ref_ids)
+    mismatch_perc <- round(mismatch_perc * 100, 2)
+    
+    if(mismatch_perc >0) {
+        stop(
+            mismatch_perc, "%", " (N=", length(mismatch_ids), ") of the IDs in ", setnames[1], 
+            " were not found in ", setnames[2], ".\n", 
+            "All gene IDs in ", setnames[1], " must be in ", setnames[2], 
+            ". Did you check if gene IDs match?",
+            "\n\nHere are some examples of nonmatching IDs (from ", setnames[1], ") :\n",
+            paste0(head(mismatch_ids, n = 5), collapse = "\n"),
+            "\n\nAnd here are some examples of IDs in ", setnames[2], ":\n",
+            paste0(head(test_ids, n = 5), collapse = "\n")
+        )
+    }
+    
+    return(TRUE)
+}
\ No newline at end of file
diff --git a/R/ka_ks_analyses.R b/R/ka_ks_analyses.R
index 0bd76bd..8773005 100644
--- a/R/ka_ks_analyses.R
+++ b/R/ka_ks_analyses.R
@@ -11,6 +11,8 @@
 #' "YN", "MYN", "MS", "MA", "GNG", "GLWL", "GLPB", "GMLWL", "GMLPB", "GYN", 
 #' and "GMYN". Default: "MYN".
 #' @param threads Numeric indicating the number of threads to use. Default: 1.
+#' @param verbose Logical indicating whether progress messages should be 
+#' printed on screen. Default: FALSE.
 #' 
 #' @return A list of data frames containing gene pairs and their Ka, Ks,
 #' and Ka/Ks values.
@@ -42,16 +44,24 @@
 #' 
 #' kaks <- pairs2kaks(gene_pairs_list, cds)
 #' 
-pairs2kaks <- function(gene_pairs_list, cds, model = "MYN", threads = 1) {
+pairs2kaks <- function(
+        gene_pairs_list, cds, model = "MYN", threads = 1, verbose = FALSE
+) {
     
     kaks_list <- lapply(seq_along(gene_pairs_list), function(x) {
         
-        # Get pairs for species x
+        # Get pairs and CDS for species x
         species <- names(gene_pairs_list)[x]
+        if(verbose) { message("Calculating rates for species '", species, "'") }
+        
         pairs <- gene_pairs_list[[x]]
         names(pairs)[c(1, 2)] <- c("dup1", "dup2")
         pairs$dup1 <- gsub("^[a-zA-Z]{2,5}_", "", pairs$dup1)
         pairs$dup2 <- gsub("^[a-zA-Z]{2,5}_", "", pairs$dup2)
+        fcds <- cds[[species]]
+        
+        # Check if IDs in pairs are all present in CDS
+        c1 <- check_geneid_match(unique(c(pairs$dup1, pairs$dup2)), names(fcds))
         
         # Remove CDS that are not multiple of 3
         fcds <- cds[[species]]
diff --git a/data/gmax_ks.rda b/data/gmax_ks.rda
index 42a56f2..466f379 100644
Binary files a/data/gmax_ks.rda and b/data/gmax_ks.rda differ
diff --git a/dev/01_create_pkg.R b/dev/01_create_pkg.R
deleted file mode 100644
index c2cef92..0000000
--- a/dev/01_create_pkg.R
+++ /dev/null
@@ -1,72 +0,0 @@
-## ********************
-## Create the R package
-## ********************
-
-## To get started, install R from https://cran.r-project.org/
-## and RStudio Desktop https://rstudio.com/products/rstudio/download/#download
-## You can install both of them for free.
-
-## Next, open RStudio as the code that will run benefits from running inside
-## RStudio for interactivity purposes.
-
-## Next, you might need to install several R packages that you can install with
-## the following code:
-if (!requireNamespace("remotes", quietly = TRUE)) {
-    install.packages("remotes")
-}
-remotes::install_cran(
-    c(
-        "available",
-        "BiocManager",
-        "biocthis",
-        "devtools",
-        "knitr",
-        "pkgdown",
-        "RefManageR",
-        "rmarkdown",
-        "rstudioapi",
-        "sessioninfo",
-        "styler",
-        "usethis"
-    )
-)
-if (!requireNamespace("BiocStyle", quietly = TRUE)) {
-    BiocManager::install("BiocStyle")
-}
-## In case you want the development version of biocthis from GitHub
-# BiocManager::install("lcolladotor/biocthis")
-
-## Here's a very quick summary of why these packages are useful:
-## * available: to check the name of your package
-## * BiocManager: to install Bioconductor packages
-## * BiocStyle: for styling your vignette and linking to other packages
-## * devtools: to develop R packages
-## * knitr: for making your vignette
-## * pkgdown: for creating beautiful documentation websites
-## * RefManageR: for citing utilities in your package vignette
-## * rmarkdown: for making the README.md and processing your vignette
-## * remotes: for installing R packages from several locations
-## * rstudioapi: for navigating across files in RStudio
-## * sessioninfo: for detailed R session information useful to you and your users
-## * usethis: for creating templates that will jump start your R package work
-
-
-## Package names have some properties. You can also use:
-available::available("doubletrouble")
-## to check that your package name is not taken and that it doesn't have
-## a meaning that you might not be aware of.
-
-usethis::create_package("doubletrouble")
-## This opens a new window in RStudio
-
-## Note: If you create packages frequently, check the help file for
-## usethis::use_description() for more information on how to set some R author
-## defaults.
-
-## Add package development files from biocthis
-biocthis::use_bioc_pkg_templates()
-
-## Move to the next step: setting up Git and GitHub for your package
-rstudioapi::navigateToFile(usethis::proj_path("dev", "02_git_github_setup.R"))
-
-## This template was made using https://lcolladotor.github.io/biocthis/
diff --git a/dev/02_git_github_setup.R b/dev/02_git_github_setup.R
deleted file mode 100644
index 9260dcc..0000000
--- a/dev/02_git_github_setup.R
+++ /dev/null
@@ -1,50 +0,0 @@
-## Did you miss the previous step? The one about creating your package
-rstudioapi::navigateToFile(usethis::proj_path("dev", "01_create_pkg.R"))
-
-## ********************
-## Setup Git and GitHub
-## ********************
-
-## Note that Bioconductor doesn't allow *.Rproj files
-## So we have to ignore it before anything else
-usethis::use_git_ignore("*.Rproj")
-usethis::use_git() ## Choose the option to make the commit, then to restart RStudio
-
-## After the restart, continue by connecting your local git repository to
-## GitHub. You might want to use the `organisation` and `private` arguments
-args(usethis::use_github)
-
-## If this is your first time running use_github(), you might have to also run:
-usethis::gh_token_help()
-## The above command will suggest that you read more at
-## https://usethis.r-lib.org/articles/articles/git-credentials.html
-## which contains the latest recommendations by the usethis authors for
-## configuring your R to GitHub connection.
-usethis::create_github_token()
-gitcreds::gitcreds_set()
-## Type your GitHub token, not your password! Otherwise you might run into this
-## problem: https://github.com/r-lib/usethis/issues/1347
-
-## In some situations, gitcreds::gitcreds_set() might not work. For example,
-## gitcreds::gitcreds_set() is not supported on Linux as discussed at
-## https://github.com/r-lib/gitcreds/issues/29. In these situations,
-## you have to rely on the old workflow of editing your
-## .Renviron file with contents like (note the empty line at the end!):
-# GITHUB_PAT=YOUR_40_CHARACTERS_TOKEN
-#
-usethis::edit_r_environ()
-## Then re-start your R session.
-rstudioapi::restartSession()
-## Editing the .Renviron is strongly discouraged now since it stores as
-## simple text your GitHub personal access token (PAT) instead of the
-## more secure approach provided by gitcreds.
-
-## Now run use_github()
-usethis::use_github()
-## Follow any prompts, such as running on the terminal:
-## git push --set-upstream origin master
-
-## Move to the next step: setting up your package core files
-rstudioapi::navigateToFile(usethis::proj_path("dev", "03_core_files.R"))
-
-## This template was made using https://lcolladotor.github.io/biocthis/
diff --git a/dev/03_core_files.R b/dev/03_core_files.R
deleted file mode 100644
index db8f20a..0000000
--- a/dev/03_core_files.R
+++ /dev/null
@@ -1,108 +0,0 @@
-## Did you miss the previous step? The one about setting up Git and GitHub
-rstudioapi::navigateToFile(usethis::proj_path("dev", "02_git_github_setup.R"))
-
-## ***********************************************************
-## Setup the core files for your Bioconductor-friendly package
-## ***********************************************************
-
-## Edit your package DESCRIPTION file
-## Check http://r-pkgs.had.co.nz/description.html for details
-## as well as http://bioconductor.org/developers/package-guidelines/#description
-
-## Check https://github.com/lcolladotor/biocthis/blob/master/DESCRIPTION
-## for an example.
-
-## You'll at least want to edit the version to 0.99.0 (or lower) since that's
-## the version number you will need to use with Bioconductor.
-
-## You will also want to add the biocViews field, for example:
-## biocViews: Software
-
-## Many Bioconductor packages use the following license:
-## license: Artistic-2.0
-
-## You might want to add the Date field as well, which is used when creating
-## the package citation information. Use the YYYY-MM-DD format. For example:
-## Date: 2020-04-29
-
-## This function sets all these defaults for you
-biocthis::use_bioc_description()
-## However, you still need to edit parts of it manually
-rstudioapi::navigateToFile(usethis::proj_path("DESCRIPTION"))
-
-## Create your README.Rmd file
-biocthis::use_bioc_readme_rmd()
-devtools::build_readme()
-
-## Edit accordingly. You might want to install your package also using
-## devtools::build() or the RStudio keyboard shortcut:
-## macoS: command + shift + B
-## Windows: control + shift + B
-
-## Click on the `knit` button on your README.Rmd file to create the README.md
-## file.
-
-## Add a NEWS.md file
-## See http://bioconductor.org/developers/package-guidelines/#news for more
-## details about Bioconductor news files.
-biocthis::use_bioc_news_md()
-
-## Add information for users and contributors
-biocthis::use_bioc_coc()
-usethis::use_tidy_contributing()
-biocthis::use_bioc_support()
-biocthis::use_bioc_issue_template()
-biocthis::use_bioc_citation()
-
-## Add badges to the README.Rmd file
-usethis::use_lifecycle_badge("Experimental")
-usethis::use_bioc_badge()
-## NOTE: If your Bioconductor package is an experiment, annotation or workflow
-## package, you will need to edit the resulting badge!
-usethis::use_github_actions_badge("R-CMD-check-bioc")
-
-## Enable using tests
-usethis::use_testthat()
-usethis::use_test("duplication_classification") 
-usethis::use_test("ka_ks_analyses") 
-usethis::use_coverage()
-
-## Re-knit your README.Rmd file to update your README.md file
-devtools::build_readme()
-
-## Add a vignette template
-pkg <- basename(usethis::proj_get())
-biocthis::use_bioc_vignette(pkg, paste("Introduction to", pkg))
-
-## Add a Bioconductor-friendly GitHub actions workflow to check your package
-biocthis::use_bioc_github_action()
-## If:
-## * your package doesn't have testthat tests, change to: has_testthat = 'false'
-## * you don't want to run the covr step, change to: run_covr = 'false'
-## * you don't want to use pkgdown, change to: run_pkgdown = 'false
-rstudioapi::navigateToFile(usethis::proj_path(".github", "workflows", "check-bioc.yml"))
-
-## Setup up your global git config
-usethis::edit_git_config()
-## Use the information that matches your GitHub account
-## Example contents (4 space indentation):
-# [user]
-#     name = Your Full Name
-#     email = your.email@somewhere.com
-#
-
-## ************************* WARNING START *********************************
-## WARNING: git commit before running this next function!
-## Otherwise you can lose your work!!!
-## ************************* WARNING END ***********************************
-##
-## Deploy with pkgdown at least once locally such that the automatic updates
-## from GitHub actions will work. This creates the gh-pages branch in your
-## GitHub repository in such a way that pkgdown will recognize it and be
-## able to use it later.
-pkgdown::deploy_to_branch() ## Check the WARNING above before running this!
-
-## Move to the next step: updating your package code before a "git commit"
-rstudioapi::navigateToFile(usethis::proj_path("dev", "04_update.R"))
-
-## This template was made using https://lcolladotor.github.io/biocthis/
diff --git a/dev/04_update.R b/dev/04_update.R
deleted file mode 100644
index 2dbaf25..0000000
--- a/dev/04_update.R
+++ /dev/null
@@ -1,31 +0,0 @@
-## Did you miss the previous step? The one about setting up the R package core
-## files.
-rstudioapi::navigateToFile(usethis::proj_path("dev", "03_core_files.R"))
-
-## ********************************************
-## Update your package code before a git commit
-## ********************************************
-
-## Automatically re-style the code in your package to a Bioconductor-friendly
-## format
-## Note that you can pair this function with the RStudio "Reformat code"
-## button on the magic wand drop down menu. The keyboard shortcut is
-## macOS: shift + command + A
-## Windows: shift + control + A
-styler::style_pkg(transformers = biocthis::bioc_style())
-styler::style_dir(usethis::proj_path("dev"), transformers = biocthis::bioc_style())
-styler::style_dir(
-    usethis::proj_path("vignettes"),
-    transformers = biocthis::bioc_style(),
-    filetype = "Rmd"
-)
-styler::style_file(usethis::proj_path("README.Rmd"), transformers = biocthis::bioc_style())
-
-## Re-make the documentation files
-devtools::document()
-
-## You might also need to update the README.md by re-rendering the README.Rmd
-## file.
-devtools::build_readme()
-
-## This template was made using https://lcolladotor.github.io/biocthis/
diff --git a/inst/script/data_acquisition.md b/inst/script/data_acquisition.md
index 0e7d89e..c0002ef 100644
--- a/inst/script/data_acquisition.md
+++ b/inst/script/data_acquisition.md
@@ -180,16 +180,25 @@ in the S. cerevisiae genome.
 ``` r
 library(Biostrings)
 
-# Get duplicated genes
-data(scerevisiae_kaks)
-c_full <- scerevisiae_kaks[, c("dup1", "dup2", "type")]
+# Load and process data
+data("yeast_seq")
+data("yeast_annot")
+pdata <- syntenet::process_input(yeast_seq, yeast_annot)
+
+data(diamond_intra)
 
-dup_genes <- unique(c(c_full$dup1, c_full$dup2))
-dup_genes <- gsub(".*_", "", dup_genes)
+# Classify gene pairs
+c_standard <- classify_gene_pairs(
+    annotation = pdata$annotation,
+    blast_list = diamond_intra,
+    scheme = "standard"
+)
 
-dup_sd <- c_full[c_full$type == "SD", ]
-dup_sd <- unique(c(dup_sd$dup1, dup_sd$dup2))
-dup_sd <- gsub(".*_", "", dup_sd)
+# Get TD-derived pairs
+td_pairs <- c_standard$Scerevisiae |>
+    dplyr::filter(type == "TD")
+td_pairs <- unique(c(td_pairs$dup1, td_pairs$dup2))
+td_pairs <- gsub(".*_", "", td_pairs)
 
 # Get CDS and keep only longest isoform
 cds_scerevisiae_full <- readDNAStringSet(
@@ -197,7 +206,14 @@ cds_scerevisiae_full <- readDNAStringSet(
 ) |> ensembl_longest_isoform()
 
 # Keep only duplicated genes
-cds_scerevisiae <- cds_scerevisiae_full[names(cds_scerevisiae_full) %in% dup_wgd]
+cds_scerevisiae <- cds_scerevisiae_full[names(cds_scerevisiae_full) %in% 
+                                            td_pairs]
+
+# Write, read, and export file
+out <- tempfile(fileext = ".fa")
+writeXStringSet(cds_scerevisiae, filepath = out)
+
+cds_scerevisiae <- Biostrings::readDNAStringSet(out)
 
 usethis::use_data(cds_scerevisiae, compress = "xz", overwrite = TRUE)
 ```
@@ -231,6 +247,16 @@ cds <- list(Scerevisiae = cds_scerevisiae_all)
 scerevisiae_kaks_list <- pairs2kaks(c_extended, cds)
 scerevisiae_kaks <- scerevisiae_kaks_list$Scerevisiae
 
+fungi_kaks2 <- fungi_kaks
+fungi_kaks2 <- lapply(fungi_kaks2, function(x) {
+    
+    x$Ka <- signif(x$Ka, 3)
+    x$Ks <- signif(x$Ks, 3)
+    x$Ka_Ks <- signif(x$Ka_Ks, 3)
+    
+    return(x)
+})
+
 usethis::use_data(scerevisiae_kaks, compress = "xz", overwrite = TRUE)
 ```
 
@@ -278,10 +304,12 @@ cds <- list(Gmax = cds)
 # Calculate Ks values
 gmax_kaks_list <- pairs2kaks(c_binary, cds)
 gmax_ks <- gmax_kaks_list$Gmax
-gmax_ks <- gmax_ks[, c("dup1", "dup2", "Ks")]
+gmax_ks <- gmax_ks[, c("dup1", "dup2", "Ks", "type")]
 
 gmax_ks <- gmax_ks[gmax_ks$Ks <= 2, ]
 gmax_ks <- gmax_ks[!is.na(gmax_ks$Ks), ]
 
+gmax_ks$Ks <- signif(gmax_ks$Ks, 3) # to reduce object size
+
 usethis::use_data(gmax_ks, compress = "xz", overwrite = TRUE)
 ```
diff --git a/man/gmax_ks.Rd b/man/gmax_ks.Rd
index 7f02492..96009d6 100644
--- a/man/gmax_ks.Rd
+++ b/man/gmax_ks.Rd
@@ -10,6 +10,7 @@ A data frame with the following variables:
 \item{dup1}{Character, duplicated gene 1.}
 \item{dup2}{Character, duplicated gene 2.}
 \item{Ks}{Numeric, Ks values.}
+\item{type}{Factor, duplication mode.}
 }
 }
 \usage{
diff --git a/man/pairs2kaks.Rd b/man/pairs2kaks.Rd
index d9e66ef..c2b8d47 100644
--- a/man/pairs2kaks.Rd
+++ b/man/pairs2kaks.Rd
@@ -4,7 +4,7 @@
 \alias{pairs2kaks}
 \title{Calculate Ka, Ks, and Ka/Ks from duplicate gene pairs}
 \usage{
-pairs2kaks(gene_pairs_list, cds, model = "MYN", threads = 1)
+pairs2kaks(gene_pairs_list, cds, model = "MYN", threads = 1, verbose = FALSE)
 }
 \arguments{
 \item{gene_pairs_list}{List of data frames containing duplicated gene pairs
@@ -19,6 +19,9 @@ Possible values are "Li", "NG86", "NG", "LWL", "LPB", "MLWL", "MLPB", "GY",
 and "GMYN". Default: "MYN".}
 
 \item{threads}{Numeric indicating the number of threads to use. Default: 1.}
+
+\item{verbose}{Logical indicating whether progress messages should be
+printed on screen. Default: FALSE.}
 }
 \value{
 A list of data frames containing gene pairs and their Ka, Ks,
diff --git a/tests/testthat/test-data_validation.R b/tests/testthat/test-data_validation.R
new file mode 100644
index 0000000..4f4f2e2
--- /dev/null
+++ b/tests/testthat/test-data_validation.R
@@ -0,0 +1,9 @@
+
+# Start tests ----
+test_that("check_geneid_match() flags mismatches between gene sets", {
+    
+    set1 <- c("gene1", "gene2A", "gene3", "gene4A")
+    set2 <- c("gene1", "gene2", "gene3", "gene4")
+    
+    expect_error(check_geneid_match(set1, set2))
+})
diff --git a/tests/testthat/test-ka_ks_analyses.R b/tests/testthat/test-ka_ks_analyses.R
index 10a2ea6..b458c5b 100644
--- a/tests/testthat/test-ka_ks_analyses.R
+++ b/tests/testthat/test-ka_ks_analyses.R
@@ -33,7 +33,7 @@ cds2$Scerevisiae$Q0055 <- Biostrings::subseq(
 #----Start tests----------------------------------------------------------------
 test_that("pairs2kaks() returns a data frame with Ka, Ks, and Ka/Ks", {
     
-    kaks <- pairs2kaks(gene_pairs_list, cds)
+    kaks <- pairs2kaks(gene_pairs_list, cds, verbose = TRUE)
     kaks2 <- pairs2kaks(gene_pairs_list, cds2)
     
     expect_equal(class(kaks), "list")
diff --git a/vignettes/doubletrouble_vignette.Rmd b/vignettes/doubletrouble_vignette.Rmd
index 7bf4004..8189641 100644
--- a/vignettes/doubletrouble_vignette.Rmd
+++ b/vignettes/doubletrouble_vignette.Rmd
@@ -352,14 +352,14 @@ package (see `?get_intron_counts()` for a summary of all functions).
 
 
 Here, we will create a list of `TxDb` objects from a list of `GRanges` objects
-using the function `makeTxDbFromGRanges` 
+using the function `makeTxDbFromGRanges()` 
 from `r BiocStyle::Biocpkg("txdbmaker")`. Importantly, to create
 a `TxDb` from a `GRanges`, the `GRanges` object must contain genomic coordinates
 for all features, including transcripts, exons, etc. Because of that, we
 will use annotation from the example data set `yeast_annot`,
 which was not processed with `syntenet::process_input()`.
 
-```{r}
+```{r message=FALSE}
 library(txdbmaker)
 # Create a list of `TxDb` objects from a list of `GRanges` objects
 txdb_list <- lapply(yeast_annot, txdbmaker::makeTxDbFromGRanges)
@@ -469,6 +469,10 @@ kaks <- pairs2kaks(gene_pairs, cds_list)
 head(kaks)
 ```
 
+Importantly, `pairs2kaks()` expects all genes in the gene pairs to be present
+in the CDS, with matching names. Species abbreviations in gene pairs (added
+by `r BiocStyle::Biocpkg("syntenet")`) are automatically removed, so you should
+not add them to the sequence names of your CDS.
 
 # Identifying and visualizing $K_s$ peaks
 
@@ -587,7 +591,7 @@ age boundaries highlighted in the histogram of $K_s$ values.
 head(gmax_ks)
 
 # Classify gene pairs by age group
-pairs_age_group <- split_pairs_by_peak(gmax_ks, peaks)
+pairs_age_group <- split_pairs_by_peak(gmax_ks[, c(1,2,3)], peaks)
 
 # Inspecting the output
 names(pairs_age_group)
@@ -599,6 +603,31 @@ head(pairs_age_group$pairs)
 pairs_age_group$plot
 ```
 
+Age groups can also be used to identify SD gene pairs that likely originated
+from whole-genome duplications. The rationale here is that segmental duplicates
+with $K_s$ values near $K_s$ peaks (indicating WGD events) were likely
+created by such WGDs. In a similar logic, SD pairs with $K_s$ values that
+are too distant from $K_s$ peaks (e.g., >2 standard deviations away from
+the mean) were likely created by duplications of large genomic segments, but 
+not duplications of the entire genome. 
+
+As an example, to find gene pairs in the soybean genome that likely originated 
+from the WGD event shared by all legumes (at ~58 million years ago), 
+you'd need to extract SD pairs in age group 2 using the following code:
+
+```{r}
+# Get all pairs in age group 2
+pairs_ag2 <- pairs_age_group$pairs[pairs_age_group$pairs$peak == 2, c(1,2)]
+
+# Get all SD pairs
+sd_pairs <- gmax_ks[gmax_ks$type == "SD", c(1,2)]
+
+# Merge tables
+pairs_wgd_legumes <- merge(pairs_ag2, sd_pairs)
+
+head(pairs_wgd_legumes)
+```
+
 # Data visualization
 
 Last but not least, `r BiocStyle::Biocpkg("doubletrouble")` provides users