diff --git a/DESCRIPTION b/DESCRIPTION
index 0fa9a949..3424dc57 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -82,4 +82,7 @@ Imports:
     XVector,
     yaml
 Suggests:
-    knitr
+    knitr,
+    testthat,
+    mockery,
+    readr
diff --git a/R/cleanup.R b/R/cleanup.R
index 4fe074ee..9bc646b6 100755
--- a/R/cleanup.R
+++ b/R/cleanup.R
@@ -160,9 +160,10 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
     prot <- prot %>%
         as_tibble() %>%
         # filter(grepl("\\*", {{by_column}})) %>%		  # Keep only rows with Query (*) for GenContext
-        filter(!grepl("^-$", {{ by_column }})) %>% # remove "-"
-        filter(!grepl("^NA$", {{ by_column }})) %>% # remove "NA"
-        filter(!grepl("^$", {{ by_column }})) # remove empty rows
+        filter(!grepl("^-$", .[[by_column]])) %>%   # remove "-"
+        filter(!grepl("^NA$", .[[by_column]])) %>%  # remove "NA"
+        filter(!grepl("^$", .[[by_column]])) %>%    # remove empty rows
+        filter(!grepl("^\\s*$", .[[by_column]]))     # remove rows with only spaces
 
     return(prot)
 }
diff --git a/R/fa2domain.R b/R/fa2domain.R
index 6dc6f622..55517c85 100644
--- a/R/fa2domain.R
+++ b/R/fa2domain.R
@@ -22,6 +22,16 @@ runIPRScan <- function(
         # destPartition = "LocalQ",
         # destQoS = "shortjobs"
     ) {
+    # Validate inputs
+    if (is.null(filepath_fasta) || filepath_fasta == "") {
+        stop("filepath_fasta cannot be NULL or empty")
+    }
+    if (is.null(filepath_out) || filepath_out == "") {
+        stop("filepath_out cannot be NULL or empty")
+    }
+    if (!all(appl %in% c("Pfam", "Gene3D"))) {
+        stop("Invalid application specified")
+    }
     # construct interproscan command
     cmd_iprscan <- stringr::str_glue(
         "iprscan -i {filepath_fasta} -b {filepath_out} --cpu 4 -f TSV ",
@@ -283,7 +293,7 @@ getDomainsFromFA <- function(
                 if (verbose) {
                     msg <- stringr::str_glue(
                         "accession number: {header} had no domains for the ",
-                        "selected analyes: {paste(analysis, collapse = ',')}\n"
+                        "selected analyses: {paste(analysis, collapse = ',')}\n"
                     )
                     warning(msg)
                 }
diff --git a/man/acc2FA.Rd b/man/acc2FA.Rd
new file mode 100644
index 00000000..1a7a27e9
--- /dev/null
+++ b/man/acc2FA.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CHANGED-pre-msa-tree.R
+\name{acc2FA}
+\alias{acc2FA}
+\title{acc2FA converts protein accession numbers to a fasta format.}
+\usage{
+acc2FA(accessions, outpath, plan = "sequential")
+}
+\arguments{
+\item{accessions}{Character vector containing protein accession numbers to generate fasta sequences for.
+Function may not work for vectors of length > 10,000}
+
+\item{outpath}{\link{str} Location where fasta file should be written to.}
+
+\item{plan}{}
+}
+\description{
+Resulting fasta file is written to the outpath.
+}
+\examples{
+\dontrun{
+acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
+Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
+EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
+}
+}
+\author{
+Samuel Chen, Janani Ravi
+}
+\keyword{accnum,}
+\keyword{fasta}
diff --git a/tests/testthat.R b/tests/testthat.R
new file mode 100644
index 00000000..2a5dc27b
--- /dev/null
+++ b/tests/testthat.R
@@ -0,0 +1,4 @@
+library(testthat)
+library(MolEvolvR)
+
+test_check("MolEvolvR")
\ No newline at end of file
diff --git a/tests/testthat/test-cleanup.R b/tests/testthat/test-cleanup.R
new file mode 100644
index 00000000..c465ed57
--- /dev/null
+++ b/tests/testthat/test-cleanup.R
@@ -0,0 +1,588 @@
+context("cleanup")
+test_that("cleanup", {
+    # cleanup
+    # Test with normal string
+    expect_equal(cleanString("Hello World"), "Hello_World")
+    
+    # Test with multiple spaces
+    expect_equal(cleanString("Hello     World"), "Hello_World")
+    
+    # Test with special characters
+    expect_equal(cleanString("Hello@World!"), "HelloWorld")
+    
+    # Test with alphanumeric characters and underscore
+    expect_equal(cleanString("Test_String 123"), "Test_String_123")
+    
+    # Test with dots
+    expect_equal(cleanString("Version 1.0.0"), "Version_1.0.0")
+    
+    # Test with empty string
+    expect_equal(cleanString(""), "")
+    
+    # Test with only spaces
+    expect_equal(cleanString("    "), "_")
+    
+    # Test with non-alphanumeric characters
+    expect_equal(cleanString("~!@#$%^&*()"), "")
+    
+    # Test with mixed characters
+    expect_equal(cleanString("Hello !@#$% World"), "Hello__World")
+    
+    # Test with trailing and leading spaces
+    expect_equal(cleanString("  Test  "), "_Test_")
+    
+    # Test with numbers and underscores
+    expect_equal(cleanString("Name_123 Test"), "Name_123_Test")
+    
+    # extractAccNum
+    # Test with a string containing a pipe character
+    expect_equal(extractAccNum("ID|ABC1234 Some Description"), "ABC1234")
+    
+    # Test with a string containing multiple spaces
+    expect_equal(extractAccNum("ID|DEF5678    More Info"), "DEF5678")
+    
+    # Test with a string without a pipe character
+    expect_equal(extractAccNum("ABC9876 Some Description"), "ABC9876")
+    
+    # Test with a string that has leading spaces
+    expect_equal(extractAccNum("   ID|GHI1357 Description"), "GHI1357")
+    
+    # Test with a string that has trailing spaces
+    expect_equal(extractAccNum("ID|JKL2468 Description   "), "JKL2468")
+    
+    # Test with only an accession number
+    expect_equal(extractAccNum("XYZ1234"), "XYZ1234")
+    
+    # Test with a string with only spaces
+    expect_equal(extractAccNum("    "), "")
+    
+    # Test with a string that contains special characters
+    expect_equal(extractAccNum("ID|MNO5678_Extra Info"), "MNO5678_Extra")
+    
+    # ensureUniqAccNum
+    # Test with unique accession numbers
+    accnums1 <- c("ABC1234", "DEF5678", "GHI9012")
+    expect_equal(ensureUniqAccNum(accnums1), c("ABC1234_1", "DEF5678_1", 
+                                               "GHI9012_1"))
+    
+    # Test with duplicate accession numbers
+    accnums2 <- c("ABC1234", "ABC1234", "DEF5678", "DEF5678", "GHI9012")
+    expect_equal(ensureUniqAccNum(accnums2), 
+                 c("ABC1234_1", "ABC1234_2", "DEF5678_1", 
+                   "DEF5678_2", "GHI9012_1"))
+    
+    # Test with all identical accession numbers
+    accnums3 <- c("XYZ9999", "XYZ9999", "XYZ9999")
+    expect_equal(ensureUniqAccNum(accnums3), 
+                 c("XYZ9999_1", "XYZ9999_2", "XYZ9999_3"))
+    
+    # Test with empty input
+    accnums4 <- character(0)
+    expect_equal(ensureUniqAccNum(accnums4), character(0))
+    
+    # Test with a single accession number
+    accnums5 <- c("SINGLE_ACC")
+    expect_equal(ensureUniqAccNum(accnums5), c("SINGLE_ACC_1"))
+    
+    # Test with mixed duplicate and unique accession numbers
+    accnums6 <- c("A", "B", "A", "C", "B", "B")
+    expect_equal(ensureUniqAccNum(accnums6), 
+                 c("A_1", "A_2", "B_1", "B_2", "B_3", "C_1"))
+    
+    # cleanFAHeaders
+    fasta_sample <- c(
+        ">sp|P12345|ProteinA Description 1",
+        ">sp|P67890|ProteinB Description 2",
+        ">sp|P12345|ProteinA Description 3",
+        ">sp|P67890|ProteinB Description 4"
+    )
+    names(fasta_sample) <- fasta_sample  # Set names to headers
+    
+    # Run the function
+    cleaned_fasta <- cleanFAHeaders(fasta_sample)
+    
+    # Expected headers after processing
+    expected_headers <- c("P12345_1", "P12345_2", "P67890_1", "P67890_2")
+    
+    # Check if the names of cleaned_fasta match expected_headers
+    expect_equal(names(cleaned_fasta), expected_headers)
+    
+    # Check that the contents of cleaned_fasta remain unchanged
+    expect_equal(as.vector(cleaned_fasta), as.vector(fasta_sample))
+    
+    fasta_unique <- c(
+        ">sp|P12345|UniqueProteinA",
+        ">sp|P67890|UniqueProteinB"
+    )
+    names(fasta_unique) <- fasta_unique
+    
+    cleaned_unique_fasta <- cleanFAHeaders(fasta_unique)
+    
+    expected_unique_headers <- c("P12345_1", "P67890_1")
+    expect_equal(names(cleaned_unique_fasta), expected_unique_headers)
+    
+    # Sample input data
+    prot_data <- tibble::tibble(
+        DomArch = c("ABC123", "-", "NA", "", "XYZ789", " "),
+        other_col = c(1, 2, 3, 4, 5, 6)
+    )
+    
+    # Expected output after removing rows
+    expected_output <- tibble::tibble(
+        DomArch = c("ABC123", "XYZ789"),
+        other_col = c(1, 5)
+    )
+    
+    # Run the function
+    result <- removeEmptyRows(prot_data, by_column = "DomArch")
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Case 1: No rows removed
+    prot_data_no_removal <- tibble::tibble(
+        DomArch = c("ABC123", "XYZ789"),
+        other_col = c(1, 2)
+    )
+    expect_equal(removeEmptyRows(prot_data_no_removal), prot_data_no_removal)
+    
+    # Case 2: All rows removed
+    prot_data_all_removed <- tibble::tibble(
+        DomArch = c("-", "NA", "", " "),
+        other_col = c(1, 2, 3, 4)
+    )
+    expect_equal(removeEmptyRows(prot_data_all_removed), 
+                 tibble::tibble(DomArch = character(0), other_col = numeric(0)))
+    
+    # Case 3: Empty data frame
+    prot_data_empty <- tibble::tibble(DomArch = character(0), 
+                                      other_col = numeric(0))
+    expect_equal(removeEmptyRows(prot_data_empty), prot_data_empty)
+    
+    # Input data with repeated domains
+    prot_data <- tibble::tibble(
+        DomArch = c("A B B C", "X X Y", "P P P Q", "R R R S"),
+        other_col = c(1, 2, 3, 4)
+    )
+    
+    # Input data with repeated domains
+    prot_data <- tibble::tibble(
+        DomArch = c("A A A", "B B", "C C C D D"),
+        other_col = c(1, 2, 3)
+    )
+    
+    # Input data with repeated and single question marks
+    prot_data <- tibble::tibble(
+        GenContext = c("A ? ? B", "? ?", "C ?? C", "D ? > ? D"),
+        other_col = c(1, 2, 3, 4)
+    )
+    
+    # Input data with single question marks only
+    prot_data <- tibble::tibble(
+        GenContext = c("?", "? ? ?", "A ? B"),
+        other_col = c(1, 2, 3)
+    )
+    
+    # Expected output after replacing single question marks
+    expected_output <- tibble::tibble(
+        GenContext = c("X", "X(s)", "A X B"),
+        other_col = c(1, 2, 3)
+    )
+    
+    # Run the function
+    result <- replaceQuestionMarks(prot_data, by_column = "GenContext")
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Input data containing asterisks
+    query_data <- tibble::tibble(
+        GenContext = c("A * B", "*C*D*", "E*F*"),
+        other_col = c(1, 2, 3)
+    )
+    
+    # Expected output after removing asterisks
+    expected_output <- tibble::tibble(
+        GenContext = c("A  B", "CD", "EF"),
+        other_col = c(1, 2, 3)
+    )
+    
+    # Run the function
+    result <- removeAsterisks(query_data, colname = "GenContext")
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Input data with no asterisks
+    query_data <- tibble::tibble(
+        GenContext = c("A B", "C D", "E F"),
+        other_col = c(1, 2, 3)
+    )
+    
+    # Expected output (no changes)
+    expected_output <- query_data
+    
+    # Run the function
+    result <- removeAsterisks(query_data, colname = "GenContext")
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Sample input data
+    prot <- tibble::tibble(
+        DomArch = c("A", "B", "A", "C", "D"),
+        value = c(1, 2, 3, 4, 5)
+    )
+    
+    # Expected output after removing rows where `DomArch` appears only once
+    expected_output <- tibble::tibble(
+        DomArch = c("A", "A"),
+        value = c(1, 3)
+    )
+    
+    # Run the function
+    result <- removeTails(prot, by_column = "DomArch", keep_domains = FALSE)
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Input data with no single occurrence rows
+    prot <- tibble::tibble(
+        DomArch = c("A", "A", "B", "B"),
+        value = c(1, 2, 3, 4)
+    )
+    
+    # Expected output (should remain unchanged)
+    expected_output <- prot
+    
+    # Run the function
+    result <- removeTails(prot, by_column = "DomArch", keep_domains = FALSE)
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Sample input data with special characters and extra spaces in the species names
+    prot <- tibble::tibble(
+        Species.orig = c("Escherichia coli sp.", 
+                         "Bacillus str. subtilis", 
+                         "Lactobacillus = plantarum", 
+                         "Staphylococcus aureus"),
+        value = c(1, 2, 3, 4)
+    )
+    
+    # Expected output after cleaning species names
+    expected_output <- tibble::tibble(
+        Species.orig = c("Escherichia coli sp.", 
+                         "Bacillus str. subtilis", 
+                         "Lactobacillus = plantarum", 
+                         "Staphylococcus aureus"),
+        value = c(1, 2, 3, 4),
+        Species = c("Escherichia coli sp", 
+                    "Bacillus str subtilis", 
+                    "Lactobacillus plantarum", 
+                    "Staphylococcus aureus")
+    )
+    
+    # Run the function
+    result <- cleanSpecies(prot, removeEmptyRows = FALSE)
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Sample input data with an empty Species entry
+    prot <- tibble::tibble(
+        Species.orig = c("Escherichia coli sp.", "", 
+                         "Lactobacillus = plantarum", 
+                         "Staphylococcus aureus"),
+        value = c(1, 2, 3, 4)
+    )
+    
+    # Expected output after cleaning and removing empty rows
+    expected_output <- tibble::tibble(
+        Species.orig = c("Escherichia coli sp.", 
+                         "Lactobacillus = plantarum", 
+                         "Staphylococcus aureus"),
+        value = c(1, 3, 4),
+        Species = c("Escherichia coli sp", 
+                    "Lactobacillus plantarum", 
+                    "Staphylococcus aureus")
+    )
+    
+    # Run the function with removeEmptyRows = TRUE
+    result <- cleanSpecies(prot, removeEmptyRows = TRUE)
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Sample input data with original ClustName
+    prot <- tibble::tibble(
+        ClustName.orig = c("SIG+TM+TM", "ABC+DEF", "XYZ+SIG", "TM+TM"),
+        value = c(1, 2, 3, 4)
+    )
+    
+    # Domains to rename
+    domains_rename <- tibble::tibble(
+        old = c("SIG", "ABC"),
+        new = c("Signal", "ABC_Transporter")
+    )
+    
+    # Domains to keep
+    domains_keep <- tibble::tibble(
+        domains = c("Signal", "ABC_Transporter")
+    )
+    
+    # Expected output after renaming and filtering
+    expected_output <- tibble::tibble(
+        ClustName.orig = c("SIG+TM+TM", "ABC+DEF", "XYZ+SIG"),
+        value = c(1, 2, 3),
+        ClustName = c("Signal+TM+TM", "ABC_Transporter+DEF", "XYZ+Signal")
+    )
+    
+    # Run the function
+    result <- cleanClusters(prot, domains_rename, domains_keep, 
+                            condenseRepeatedDomains = FALSE, 
+                            removeTails = FALSE, 
+                            removeEmptyRows = FALSE)
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Sample input data with ClustName containing tails
+    prot <- tibble::tibble(
+        ClustName.orig = c("SIG+TM+1", "ABC+DEF", "XYZ+SIG+2"),
+        value = c(1, 2, 3)
+    )
+    
+    # Domains to rename (empty for this test)
+    domains_rename <- tibble::tibble(
+        old = character(0),
+        new = character(0)
+    )
+    
+    # Domains to keep (empty for this test)
+    domains_keep <- tibble::tibble(
+        domains = character(0)
+    )
+    
+    # Expected output after removing tails
+    expected_output <- tibble::tibble(
+        ClustName.orig = c("ABC+DEF"),
+        value = c(2),
+        ClustName = c("ABC+DEF")
+    )
+    
+    # Sample input data
+    prot <- tibble::tibble(
+        DomArch.orig = c("SIG+TM+TM", "ABC+DEF", "XYZ+SIG", "TM+TM"),
+        value = c(1, 2, 3, 4)
+    )
+    
+    # Domains to rename
+    domains_rename <- tibble::tibble(
+        old = c("SIG", "ABC"),
+        new = c("Signal", "ABC_Transporter")
+    )
+    
+    # Domains to keep
+    domains_keep <- tibble::tibble(
+        domains = c("Signal", "ABC_Transporter")
+    )
+    
+    # Expected output after renaming and filtering
+    expected_output <- tibble::tibble(
+        DomArch.orig = c("SIG+TM+TM", "ABC+DEF", "XYZ+SIG"),
+        value = c(1, 2, 3),
+        DomArch = c("Signal+TM+TM", "ABC_Transporter+DEF", "XYZ+Signal")
+    )
+    
+    # Run the function
+    result <- cleanDomainArchitecture(prot, old = "DomArch.orig", new = "DomArch",
+                                      domains_keep = domains_keep,
+                                      domains_rename = domains_rename,
+                                      condenseRepeatedDomains = FALSE,
+                                      removeTails = FALSE,
+                                      removeEmptyRows = FALSE)
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Sample input data with repeated domains
+    prot <- tibble::tibble(
+        DomArch.orig = c("SIG+TM+TM+TM", "ABC+ABC+DEF", "XYZ+SIG+SIG"),
+        value = c(1, 2, 3)
+    )
+    
+    # Domains to rename (empty for this test)
+    domains_rename <- tibble::tibble(
+        old = character(0),
+        new = character(0)
+    )
+    
+    # Domains to keep
+    domains_keep <- tibble::tibble(
+        domains = c("SIG", "ABC")
+    )
+    
+    # Expected output after condensing repeated domains
+    expected_output <- tibble::tibble(
+        DomArch.orig = c("SIG+TM+TM+TM", "ABC+ABC+DEF", "XYZ+SIG+SIG"),
+        value = c(1, 2, 3),
+        DomArch = c("SIG+TM(s)", "ABC(s)+DEF", "XYZ+SIG(s)")
+    )
+    
+    # Run the function with condenseRepeatedDomains = TRUE
+    result <- cleanDomainArchitecture(prot, old = "DomArch.orig", new = "DomArch",
+                                      domains_keep = domains_keep,
+                                      domains_rename = domains_rename,
+                                      condenseRepeatedDomains = TRUE,
+                                      removeTails = FALSE,
+                                      removeEmptyRows = FALSE)
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Sample input data with an empty DomArch entry
+    prot <- tibble::tibble(
+        DomArch.orig = c("SIG+TM+TM", "", "ABC+DEF"),
+        value = c(1, 2, 3)
+    )
+    
+    # Domains to rename (empty for this test)
+    domains_rename <- tibble::tibble(
+        old = character(0),
+        new = character(0)
+    )
+    
+    # Domains to keep
+    domains_keep <- tibble::tibble(
+        domains = c("SIG", "ABC")
+    )
+    
+    # Expected output after removing empty rows
+    expected_output <- tibble::tibble(
+        DomArch.orig = c("SIG+TM+TM", "ABC+DEF"),
+        value = c(1, 3),
+        DomArch = c("SIG+TM+TM", "ABC+DEF")
+    )
+    
+    # Run the function with removeEmptyRows = TRUE
+    result <- cleanDomainArchitecture(prot, old = "DomArch.orig", new = "DomArch",
+                                      domains_keep = domains_keep,
+                                      domains_rename = domains_rename,
+                                      condenseRepeatedDomains = FALSE,
+                                      removeTails = FALSE,
+                                      removeEmptyRows = TRUE)
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Sample input data with question marks
+    prot <- tibble::tibble(
+        DomArch.orig = c("SIG+TM???", "ABC+???DEF", "XYZ+SIG"),
+        value = c(1, 2, 3)
+    )
+    
+    # Domains to rename (empty for this test)
+    domains_rename <- tibble::tibble(
+        old = character(0),
+        new = character(0)
+    )
+    
+    # Domains to keep
+    domains_keep <- tibble::tibble(
+        domains = c("SIG", "ABC")
+    )
+    
+    # Expected output after replacing question marks
+    expected_output <- tibble::tibble(
+        DomArch.orig = c("SIG+TM???", "ABC+???DEF", "XYZ+SIG"),
+        value = c(1, 2, 3),
+        DomArch = c("SIG+TMXXX", "ABC+XXXDEF", "XYZ+SIG")
+    )
+    
+    # Run the function with question mark replacement
+    result <- cleanDomainArchitecture(prot, old = "DomArch.orig", new = "DomArch",
+                                      domains_keep = domains_keep,
+                                      domains_rename = domains_rename,
+                                      condenseRepeatedDomains = FALSE,
+                                      removeTails = FALSE,
+                                      removeEmptyRows = FALSE)
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Sample input data
+    prot <- tibble::tibble(
+        GeneDescription = c("Gene A.", 
+                            "Protein B%2C protein C.", 
+                            "Enzyme%2C catalytic."),
+        value = c(1, 2, 3)
+    )
+    
+    # Expected output after cleaning
+    expected_output <- tibble::tibble(
+        GeneDescription = c("Gene A.", 
+                            "Protein B%2C protein C.", 
+                            "Enzyme%2C catalytic."),
+        value = c(1, 2, 3),
+        GeneDesc = c("Gene A.", 
+                     "Protein B, protein C.", 
+                     "Enzyme, catalytic.")
+    )
+    
+    # Run the function
+    result <- cleanGeneDescription(prot, "GeneDescription")
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Sample input data
+    prot <- tibble::tibble(
+        AccNum = c("A1", "A1", "B1", "B1", "C1"),
+        Description = c("Short", 
+                        "Longer Description", 
+                        "Medium", "Shortest", "Unique")
+    )
+    
+    # Expected output after selecting longest duplicates
+    expected_output <- tibble::tibble(
+        AccNum = c("A1", "B1", "C1"),
+        Description = c("Longer Description", "Shortest", "Unique")
+    )
+    
+    # Run the function
+    result <- selectLongestDuplicate(prot, "Description")
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+    # Sample input data
+    prot <- tibble::tibble(
+        Lineage = c("Bacteria; Firmicutes; Bacilli; Lactobacillales",
+                    "Bacteria; Proteobacteria; Gammaproteobacteria",
+                    "Archaea; Euryarchaeota; Methanobacteria")
+    )
+    
+    # Rename mapping
+    lins_rename <- tibble::tibble(
+        old = c("Bacteria", "Firmicutes", "Archaea"),
+        new = c("Bacterium", "Firmicute", "Archaean")
+    )
+    
+    # Expected output after renaming
+    expected_output <- tibble::tibble(
+        Lineage = c("Bacterium; Firmicute; Bacilli; Lactobacillales",
+                    "Bacterium; ProteoBacterium; GammaproteoBacterium",
+                    "Archaean; Euryarchaeota; MethanoBacterium")
+    )
+    
+    # Run the function
+    result <- cleanLineage(prot, lins_rename)
+    
+    # Check if the result matches the expected output
+    expect_equal(result, expected_output)
+    
+})
\ No newline at end of file
diff --git a/tests/testthat/test-fa2domain.R b/tests/testthat/test-fa2domain.R
new file mode 100644
index 00000000..2f508c3a
--- /dev/null
+++ b/tests/testthat/test-fa2domain.R
@@ -0,0 +1,225 @@
+context("fa2domain")
+test_that("fa2domain", {
+    library(mockery)
+    library(readr)
+    library(glue)
+    # runIPRScan
+    # Define file paths using system.file to locate files in the package
+    filepath_fasta <- system.file("tests", "example_fasta.fa", package = "MolEvolvR")
+    filepath_out <- tempfile()  # Temporary file for output
+    
+    # Set application options
+    mock_appl_single <- "Pfam"
+    mock_appl_multiple <- c("Pfam", "Gene3D")
+    
+    # Create a sample TSV file in extdata and read it
+    sample_tsv_path <- system.file("tests", "example_iprscan_valid.tsv", package = "MolEvolvR")
+    
+    # Read the TSV file into a dataframe
+    sample_tsv <- read.csv(sample_tsv_path, sep = "\t", header = TRUE) 
+    
+    # Mock the system function to avoid running the real command
+    mock_system <- mock(0L)  # Simulate successful system call
+    
+    # Patch the system and readIPRScanTSV functions
+    stub(runIPRScan, "system", mock_system)
+    stub(runIPRScan, "readIPRScanTSV", function(x) read.csv(sample_tsv_path, sep = "\t"))
+    
+    ## TEST 1: Command construction for single application
+    result_single <- runIPRScan(filepath_fasta, filepath_out, appl = mock_appl_single)
+    expected_cmd_single <- glue("iprscan -i {filepath_fasta} -b {filepath_out} --cpu 4 -f TSV ",
+                                "--appl {mock_appl_single}")
+    
+    # Capture the actual command from the mock
+    actual_cmd_single <- mock_args(mock_system)[[1]]
+    
+    # Verify that the expected command matches the actual command
+    expect_equal(as.character(unlist(actual_cmd_single)), as.character(expected_cmd_single))
+    
+    # Clear the mock calls for the next test
+    mock_system <- mock(0L)
+    stub(runIPRScan, "system", mock_system)
+    
+    ## TEST 3: Real result from reading TSV file
+    expect_equal(result_single, sample_tsv)
+    
+    ## TEST 4: Error handling when system command fails
+    mock_system_fail <- mock(1L)  # Simulate non-zero exit code
+    stub(runIPRScan, "system", mock_system_fail)
+    
+    # Expect a warning and return NULL on failure
+    expect_warning(result_fail <- runIPRScan(filepath_fasta, filepath_out, appl = mock_appl_single),
+                   regexp = "interproscan exited with non-zero code")
+    expect_null(result_fail)
+    
+    ## TEST 5: Error handling for missing or invalid inputs
+    # Invalid `filepath_fasta`
+    expect_error(runIPRScan(NULL, filepath_out, appl = mock_appl_single), 
+                 "filepath_fasta cannot be NULL or empty")
+    
+    # Invalid `filepath_out`
+    expect_error(runIPRScan(filepath_fasta, NULL, appl = mock_appl_single), 
+                 "filepath_out cannot be NULL or empty")
+    
+    # Invalid `appl`
+    expect_error(runIPRScan(filepath_fasta, filepath_out, appl = "InvalidApp"), 
+                 "Invalid application specified")
+    
+    # readIPRScanTSV
+    # Read the TSV file using the function
+    df_ipr <- readIPRScanTSV(sample_tsv_path)
+    
+    # Check that the returned object is a data frame
+    expect_s3_class(df_ipr, "data.frame")
+    
+    # getIPRScanColNames
+    # Call the function to get the column names
+    col_names <- getIPRScanColNames()
+    
+    # Check that the result is a character vector
+    expect_type(col_names, "character")
+    
+    # Define the expected column names
+    expected_col_names <- c(
+        "AccNum", "SeqMD5Digest", "SLength", "Analysis",
+        "DB.ID", "SignDesc", "StartLoc", "StopLoc", "Score",
+        "Status", "RunDate", "IPRAcc", "IPRDesc"
+    )
+    
+    # Check that the column names match exactly
+    expect_equal(col_names, expected_col_names)
+    expect_type(col_names, "character")
+    
+    # Ensure there are exactly 13 columns
+    expect_length(col_names, 13)
+    
+    # getIPRScanColTypes
+    col_types <- getIPRScanColTypes()
+    
+    # Check that col_types is of the expected class
+    # readr::cols() returns col_spec object
+    expect_s3_class(col_types, "col_spec")  
+    
+    # Verify that each column has the correct type
+    expect_equal(col_types$cols$AccNum, col_character())
+    expect_equal(col_types$cols$SeqMD5Digest, col_character())
+    expect_equal(col_types$cols$SLength, col_integer())
+    expect_equal(col_types$cols$Analysis, col_character())
+    expect_equal(col_types$cols$DB.ID, col_character())
+    expect_equal(col_types$cols$SignDesc, col_character())
+    expect_equal(col_types$cols$StartLoc, col_integer())
+    expect_equal(col_types$cols$StopLoc, col_integer())
+    expect_equal(col_types$cols$Score, col_double())
+    expect_equal(col_types$cols$Status, col_character())
+    expect_equal(col_types$cols$RunDate, col_character())
+    expect_equal(col_types$cols$IPRAcc, col_character())
+    expect_equal(col_types$cols$IPRDesc, col_character())
+    
+    # Optionally, check that there are no additional columns defined
+    expect_length(col_types$cols, 13)
+    
+    # createIPRScanDomainTable
+    
+    # Load the sample FASTA file
+    fasta <- Biostrings::readAAStringSet(filepath_fasta)
+    
+    # Read the sample InterProScan TSV file
+    df_iprscan <- readIPRScanTSV(sample_tsv_path)
+    
+    # Example accession number for testing
+    accnum <- df_iprscan$AccNum[1]
+    
+    # Test case 1: Valid inputs
+    df_iprscan_domains <- createIPRScanDomainTable(accnum, fasta, df_iprscan)
+    
+    # Check that the output is a data frame
+    expect_s3_class(df_iprscan_domains, "data.frame")
+    
+    # Validate the structure of the output
+    expect_true(all(c("AccNum", "DB.ID", "StartLoc", "StopLoc", "seq_domain", 
+                      "id_domain") %in% names(df_iprscan_domains)))
+    
+    # Validate the content of the seq_domain column
+    # Ensure no empty sequences
+    expect_true(all(nchar(df_iprscan_domains$seq_domain) > 0))  
+    
+    # Validate the id_domain structure
+    expect_true(all(grepl("^(~*\\w+(-\\w+-\\d+_\\d+)?)+$", df_iprscan_domains$id_domain)))
+    
+    # Test case 2: No matching accession number
+    empty_df <- createIPRScanDomainTable("non_existent_accnum", fasta, df_iprscan)
+    expect_s3_class(empty_df, "data.frame")
+    expect_equal(nrow(empty_df), 0)
+    
+    # Test case 3: No domains in input data frame
+    empty_iprscan <- df_iprscan[0, ]  # Create an empty df_iprscan
+    empty_domains_df <- createIPRScanDomainTable(accnum, fasta, empty_iprscan)
+    expect_s3_class(empty_domains_df, "data.frame")
+    expect_equal(nrow(empty_domains_df), 0)
+    
+    # convertIPRScanDomainTable2FA
+
+    # Test case 1: Valid domain data
+    fasta_domains <- convertIPRScanDomainTable2FA(df_iprscan_domains)
+    
+    # Check that the output is an AAStringSet
+    expect_s4_class(fasta_domains, "AAStringSet")
+    
+    # Check that the correct number of sequences are returned
+    expect_equal(length(fasta_domains), nrow(df_iprscan_domains))
+
+    # Check that the names of the sequences match the id_domain column
+    expect_equal(names(fasta_domains), as.character(df_iprscan_domains$id_domain))
+    
+    # Test case 2: Empty input data frame
+    empty_domains <- convertIPRScanDomainTable2FA(data.frame())
+    expect_s4_class(empty_domains, "AAStringSet")
+    expect_equal(length(empty_domains), 0)
+    
+    # Test case 3: Data frame with no domains
+    empty_df_iprscan <- df_iprscan[0, ]  # Create an empty df_iprscan
+    empty_domains_df <- convertIPRScanDomainTable2FA(empty_df_iprscan)
+    expect_s4_class(empty_domains_df, "AAStringSet")
+    expect_equal(length(empty_domains_df), 0)
+    
+    # getDomainsFromFA
+    # Test case 1: Valid input
+    fasta_domains <- getDomainsFromFA(fasta, df_iprscan)
+    
+    # Check that the output is an AAStringSet
+    expect_s4_class(fasta_domains, "AAStringSet")
+    
+    # Check that the output contains the expected sequences
+    expect_true(length(fasta_domains) > 0)  # Ensure there are some domains extracted
+    
+    # Test case 2: Empty input FASTA
+    empty_fasta <- Biostrings::AAStringSet()
+    empty_fasta_domains <- getDomainsFromFA(empty_fasta, df_iprscan)
+
+    expect_s4_class(empty_fasta_domains, "AAStringSet")
+    expect_equal(length(empty_fasta_domains), 0)
+    
+    # Test case 3: Empty input df_iprscan
+    empty_iprscan <- data.frame()  # Create an empty df_iprscan
+    empty_domains_iprscan <- getDomainsFromFA(fasta, empty_iprscan)
+    
+    expect_s4_class(empty_domains_iprscan, "AAStringSet")
+    expect_equal(length(empty_domains_iprscan), 0)
+    
+    # Test case 4: Verbose output
+    analysis <- c("Pfam", "Gene3D")
+    expect_warning(
+        getDomainsFromFA(fasta, empty_iprscan, verbose = TRUE),
+        regexp = stringr::str_glue(
+            "accession number: aaeB_6~~~aaeB_4 had no domains for the selected analyses: ",
+            "{paste(unique(analysis), collapse = ',')}\n"
+        )
+    )
+    
+    # Test case 5: Verbose output for some valid accession numbers
+    fasta_domains_verbose <- getDomainsFromFA(fasta, df_iprscan, verbose = TRUE)
+    
+    # Check that the output is still an AAStringSet
+    expect_s4_class(fasta_domains_verbose, "AAStringSet")
+    
+})
\ No newline at end of file