diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
new file mode 100644
index 0000000..b69bca1
--- /dev/null
+++ b/.github/workflows/tests.yaml
@@ -0,0 +1,176 @@
+name: Static analysis and tests
+
+on:
+  push:
+    branches:
+      - main
+      - dev
+  pull_request:
+    branches:
+      - main
+      - dev
+
+env:
+  VENV_PATH: ~/.venv
+  REPORT_PATH: tests/data/reports/
+
+
+jobs:
+  setup-env:
+    name: Setup and cache environment
+    permissions:
+      contents: read
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.10
+
+      - name: Cache dependencies
+        uses: actions/cache@v2
+        with:
+          path: ~/.venv
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/test-requirements.txt') }}
+      
+      - name: Install dependencies
+        run: |
+          python -m venv ~/.venv
+          source ~/.venv/bin/activate
+          python -m pip install --upgrade pip
+          python -m pip install .
+          python -m pip install -r test-requirements.txt
+
+  formatting:
+    name: Formatting
+    permissions:
+      contents: read
+    needs: setup-env
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Restore pip dependencies
+      uses: actions/cache@v2
+      with:
+        path: ~/.venv
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/test-requirements.txt') }}
+
+    - name: Check imports and formatting
+      run: |
+        source ~/.venv/bin/activate
+        isort --check-only --diff --profile black .
+        black --check --diff .
+
+  Linting:
+    name: Linting
+    permissions:
+      contents: read
+    needs: setup-env
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Restore pip dependencies
+      uses: actions/cache@v2
+      with:
+        path: ~/.venv
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/test-requirements.txt') }}
+
+    # C0301: Line too long
+    - name: Run pylint
+      run: |
+        source ~/.venv/bin/activate
+        pylint --fail-under=9 --disable=C0301 AmpliGone/ tests/ > ${{ env.REPORT_PATH }}pylint-report.txt
+
+    # E501: Line too long, W503: Line break before binary operator, E203: Whitespace before ':'
+    # The last two make it non-PEP8 compliant, but are automatically done by black.
+    - name: Run flake8
+      run: |
+        source ~/.venv/bin/activate
+        flake8 --ignore=E501,W503,E203 AmpliGone/ tests/ --output-file=${{ env.REPORT_PATH }}flake8-report.txt
+
+  static-checking:
+    name: Static checking
+    permissions:
+      contents: read
+    needs: setup-env
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Restore pip dependencies
+      uses: actions/cache@v2
+      with:
+        path: ~/.venv
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/test-requirements.txt') }}
+
+    - name: Run mypy
+      run: |
+        source ~/.venv/bin/activate
+        mypy --disallow-untyped-defs --disallow-incomplete-defs --ignore-missing-imports --disallow-untyped-decorators --strict-equality \
+        --warn-redundant-casts --warn-unused-ignores --warn-return-any --warn-unreachable AmpliGone/ tests/ > ${{ env.REPORT_PATH }}mypy-report.txt
+
+    - name: Run bandit
+      run: |
+        source ~/.venv/bin/activate
+        bandit -r AmpliGone/ -f json -o ${{ env.REPORT_PATH }}bandit-report.json
+
+  run-tests:
+    name: Run Tests
+    permissions:
+      contents: read
+    needs: setup-env
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Restore pip dependencies
+      uses: actions/cache@v2
+      with:
+        path: ~/.venv
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/test-requirements.txt') }}
+
+    - name: Run tests
+      run: |
+        source ~/.venv/bin/activate
+        pytest -sv --cov=AmpliGone/ --cov-report=xml:tests/data/reports/coverage.xml tests/
+
+    - name: Upload coverage report
+      if: always()
+      uses: actions/upload-artifact@v3
+      with:
+        name: coverage-report
+        path: tests/data/reports/coverage.xml
+
+  sonarcloud:
+    name: SonarCloud Scan
+    needs: run-tests
+    runs-on: ubuntu-latest
+    if: always()
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0 # Shallow clones should be disabled for SonarCloud analysis
+
+    - name: Download coverage report
+      uses: actions/download-artifact@v3
+      with:
+        name: coverage-report
+        path: tests/data/coverage_reports/coverage.xml
+
+    - name: SonarCloud Scan
+      if: always()
+      uses: sonarsource/sonarcloud-github-action@master
+      env:
+        SONAR_TOKEN: ${{ secrets.SONARCLOUD_TOKEN }}
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # needed to get PR info
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 2789de3..3002f83 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,7 +45,6 @@ htmlcov/
 .coverage.*
 .cache
 nosetests.xml
-coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
@@ -132,4 +131,6 @@ dmypy.json
 # IDE
 .vscode/
 test/
-notes.txt
\ No newline at end of file
+notes.txt
+
+*:Zone.Identifier
\ No newline at end of file
diff --git a/AmpliGone/__init__.py b/AmpliGone/__init__.py
index 955210f..d35a1ee 100644
--- a/AmpliGone/__init__.py
+++ b/AmpliGone/__init__.py
@@ -1,2 +1,16 @@
+"""
+AmpliGone: A tool which accurately finds and removes primer sequences from NGS reads in an amplicon experiment.
+
+Attributes
+----------
+__version__ : str
+    The current version of the AmpliGone package.
+
+__prog__ : str
+    The name of the AmpliGone program.
+"""
+
+# pylint: disable=C0103
+# AmpliGone package name is not following snake_case naming convention
 __version__ = "1.3.0"
 __prog__ = "AmpliGone"
diff --git a/AmpliGone/__main__.py b/AmpliGone/__main__.py
index 28678b5..fa421b8 100644
--- a/AmpliGone/__main__.py
+++ b/AmpliGone/__main__.py
@@ -9,11 +9,11 @@
 from collections import defaultdict
 from concurrent.futures import ProcessPoolExecutor
 from itertools import chain
-from typing import Callable, List, Set, Tuple
+from typing import Callable
 
 import pandas as pd
 import parmap
-from rich import print
+from rich import print as pprint
 from rich.console import Console
 from rich.progress import Progress, SpinnerColumn
 
@@ -21,8 +21,8 @@
 import AmpliGone.alignmentpreset as AlignmentPreset
 from AmpliGone import __prog__, __version__
 from AmpliGone.args import get_args
-from AmpliGone.cut_reads import CutReads
-from AmpliGone.fasta2bed import CoordinateListsToBed, find_or_read_primers
+from AmpliGone.cut_reads import cut_reads
+from AmpliGone.fasta2bed import coord_lists_to_bed, find_or_read_primers
 from AmpliGone.io_ops import SequenceReads, write_output
 from AmpliGone.log import log
 
@@ -80,7 +80,7 @@ def check_loaded_index(
 
 def primer_df_to_primer_index(
     primer_df: pd.DataFrame, bind_virtual_primer: bool = True
-) -> Tuple[defaultdict, defaultdict]:
+) -> tuple[defaultdict, defaultdict]:
     """
     Convert primer DataFrame to primer index dictionaries.
 
@@ -93,7 +93,7 @@ def primer_df_to_primer_index(
 
     Returns
     -------
-    Tuple[defaultdict, defaultdict]
+    tuple[defaultdict, defaultdict]
         A tuple of two defaultdicts representing the forward and reverse primer indices.
 
     Raises
@@ -122,10 +122,10 @@ def primer_df_to_primer_index(
         )
         sys.exit(1)
 
-    forward_dict = defaultdict(set)
-    reverse_dict = defaultdict(set)
+    forward_dict: defaultdict[str, set[None | int]] = defaultdict(set)
+    reverse_dict: defaultdict[str, set[None | int]] = defaultdict(set)
 
-    reference_set: Set[str] = set(primer_df["ref"].unique())
+    reference_set: set[str] = set(primer_df["ref"].unique())
     for refid in reference_set:
         forward_dict[refid] = set()
         reverse_dict[refid] = set()
@@ -181,18 +181,12 @@ def coordinates_to_index(
     for _, refid, start, end in forward_primers_df[
         ["ref", "start", "end"]
     ].itertuples():
-        refid: str
-        start: int
-        end: int
         forward_dict[refid].update(
             coordinates_to_index(forward_primers_df, refid, start, end)
         )
 
     # iterate over reverse_primers_df and add the coordinates between "start" and "end" to the reverse_dict
     for _, refid, start, end in reverse_primer_df[["ref", "start", "end"]].itertuples():
-        refid: str
-        start: int
-        end: int
         reverse_dict[refid].update(
             coordinates_to_index(reverse_primer_df, refid, start, end)
         )
@@ -284,9 +278,9 @@ def correct_fragment_lookaround_size(args: argparse.Namespace) -> argparse.Names
 def parallel_dispatcher(
     indexed_reads: SequenceReads,
     args: argparse.Namespace,
-    primer_sets: Tuple[defaultdict, defaultdict],
+    primer_sets: tuple[defaultdict, defaultdict],
     preset: str,
-    matrix: List[int],
+    matrix: list[int],
 ) -> pd.DataFrame:
     """
     Wrapping function that actually calls the parallelization function to process the primer removal process of the reads.
@@ -297,11 +291,11 @@ def parallel_dispatcher(
         The indexed reads to be processed.
     args : argparse.Namespace
         The command-line arguments.
-    primer_sets : Tuple[defaultdict, defaultdict]
+    primer_sets : tuple[defaultdict, defaultdict]
         The primer sequences to be removed.
     preset : str
         The preset configuration for processing.
-    matrix : List[int]
+    matrix : list[int]
         The matrix for processing.
 
     Returns
@@ -316,12 +310,12 @@ def parallel_dispatcher(
             record=False,
         ),
         transient=True,
-        disable=True if args.verbose is True or args.quiet is True else False,
+        disable=args.quiet or args.verbose,
     ) as progress:
         progress.add_task("[yellow]Removing primer sequences...", total=None)
         processed_reads = parallel(
             indexed_reads.frame,
-            CutReads,
+            cut_reads,
             args.threads,
             primer_sets,
             args.reference,
@@ -340,10 +334,10 @@ def parallel(
     frame: pd.DataFrame,
     function: Callable[..., pd.DataFrame],
     workers: int,
-    primer_sets: Tuple[defaultdict, defaultdict],
+    primer_sets: tuple[defaultdict, defaultdict],
     reference: str,
     preset: str,
-    scoring: List[int],
+    scoring: list[int],
     fragment_lookaround_size: int,
     amplicon_type: str,
 ) -> pd.DataFrame:
@@ -358,13 +352,13 @@ def parallel(
         The function to apply to the DataFrame.
     workers : int
         The number of workers to use for parallel processing.
-    primer_df : Tuple[defaultdict, defaultdict]
+    primer_df : tuple[defaultdict, defaultdict]
         A tuple containing the indexes of the primer coordinates to remove.
     reference : str
         The reference sequence to use for alignment.
     preset : str
         The preset to use for alignment.
-    scoring : List[int]
+    scoring : list[int]
         The scoring matrix to use for alignment.
         The size of the fragment lookaround.
     fragment_lookaround_size : int
@@ -378,7 +372,7 @@ def parallel(
     """
     frame_split = [frame.iloc[i::workers] for i in range(workers)]
     tr = [*range(workers)]
-    return pd.concat(
+    df = pd.concat(
         parmap.map(
             function,
             zip(frame_split, tr),
@@ -388,19 +382,56 @@ def parallel(
             scoring,
             fragment_lookaround_size,
             amplicon_type,
-            workers,
             pm_processes=workers,
         )
     )
+    # parmap.map sometimes returns Any, but we know it's a DataFrame
+    if not isinstance(df, pd.DataFrame):
+        raise TypeError(f"{df} should be a DataFrame")
+    return df
 
 
-def main():
+def main(provided_args: list[str] | None = None) -> None:
+    """
+    Main function to process command-line arguments and execute the AmpliGone tool.
+
+    Parameters
+    ----------
+    provided_args : list of str, optional
+        A list of command-line arguments to parse. If None, the arguments will be taken from sys.argv.
+
+    Returns
+    -------
+    None
+
+    Notes
+    -----
+    This function orchestrates the entire process of reading input files, processing reads, and writing output files.
+    It performs the following steps:
+    1. Parses the command-line arguments using the `get_args` function.
+    2. Validates the provided arguments and sets the logging level.
+    3. Loads the input reads and primers using concurrent futures for parallel execution.
+    4. Checks the loaded reads and primers, and adjusts thread count if necessary.
+    5. Processes the reads to remove primer sequences using parallel processing.
+    6. Logs the results and writes the output files.
+
+    Examples
+    --------
+    >>> import sys
+    >>> sys.argv = ['script.py', '--input', 'input.fasta', '--primers', 'primers.fasta', '--reference', 'reference.fasta', '--output', 'output.bed']
+    >>> main()
+    """
+    if provided_args:
+        args = get_args(provided_args)
+    else:
+        args = get_args(sys.argv[1:])
+
     if len(sys.argv[1:]) < 1:
-        print(
+        pprint(
             f"{__prog__} was called but no arguments were given, please try again.\nUse '{__prog__} -h' to see the help document"
         )
         sys.exit(1)
-    args = get_args(sys.argv[1:])
+
     # check if verbose and quiet aren't both set
     if args.verbose is True and args.quiet is True:
         log.error(
@@ -485,7 +516,7 @@ def main():
                 axis=1,
             )
         ]
-        CoordinateListsToBed(filtered_primer_df, args.export_primers)
+        coord_lists_to_bed(filtered_primer_df, args.export_primers)
 
     processed_reads = processed_reads.drop(columns=["Removed_coordinates"])
 
diff --git a/AmpliGone/alignmentmatrix.py b/AmpliGone/alignmentmatrix.py
index 45d9806..412864d 100644
--- a/AmpliGone/alignmentmatrix.py
+++ b/AmpliGone/alignmentmatrix.py
@@ -1,11 +1,63 @@
+"""
+This module provides functions to calculate and validate scoring matrices for sequence alignment.
+
+Functions
+---------
+get_scoring_matrix(input_matrix: Optional[List[str]]) -> List[int]
+    Calculate the scoring matrix based on the input matrix.
+
+_input_to_dict(input_matrix: Optional[List[str]]) -> Optional[Dict[str, int]]
+    Convert the input matrix to a dictionary.
+
+_valid_scoring_list_length(input_list: List[str]) -> bool
+    Check if the length of the input list is either 4, 6, or 7.
+
+_scoring_has_negative_values(input_list: List[int]) -> bool
+    Check if the input list contains any negative values.
+
+_sort_matrix_dict(matrix_dict: Dict[str, int], required_4: List[str], required_6: List[str], required_7: List[str]) -> Dict[str, int]
+    Sort the given matrix dictionary based on the provided order of keys.
+
+_get_ordered_values(matrix_dict: Dict[str, int], required_4: List[str], required_6: List[str], required_7: List[str]) -> List[int]
+    Get the ordered values from the matrix dictionary based on the number of keys present.
+
+_log_invalid_combination_error(matrix_keys: List[str], required_keys: List[str]) -> None
+    Log an error message and exit the program when an invalid combination of scoring matrix keys is encountered.
+
+_validate_matrix_combinations(matrix_dict: Dict[str, int]) -> List[int]
+    Validate the combinations of matrix values in the given matrix dictionary.
+
+Notes
+-----
+This module is designed to handle the calculation and validation of scoring matrices used in sequence alignment. It ensures that the input matrices are in the correct format, contain valid values, and have the appropriate length. The module also provides functions to sort and order the matrix values based on predefined requirements.
+
+Examples
+--------
+>>> input_matrix = ['match=1', 'mismatch=2', 'gap_o1=3', 'gap_e1=4']
+>>> get_scoring_matrix(input_matrix)
+[1, 2, 3, 4]
+
+>>> input_matrix = ['match=1', 'mismatch=2', 'gap_o1=3', 'gap_e1=4', 'gap_o2=5', 'gap_e2=6']
+>>> get_scoring_matrix(input_matrix)
+[1, 2, 3, 4, 5, 6]
+
+>>> input_matrix = ['match=1', 'mismatch=2', 'gap_o1=3', 'gap_e1=4', 'gap_o2=5', 'gap_e2=6', 'mma=7']
+>>> get_scoring_matrix(input_matrix)
+[1, 2, 3, 4, 5, 6, 7]
+
+>>> input_matrix = ['match=1', 'mismatch=2', 'gap_o1=3', 'gap_e1=4', 'gap_o2=5', 'gap_e2=6', 'mma=-7']
+>>> get_scoring_matrix(input_matrix)
+SystemExit: Given scoring matrix contains a negative value. The scoring matrix may only contain non-negative integers. Please check your input and try again.
+"""
+
 import os
 import sys
-from typing import Dict, List
+from typing import Dict, List, Optional
 
 from AmpliGone.log import log
 
 
-def get_scoring_matrix(input_matrix: List[str] | None) -> List[int]:
+def get_scoring_matrix(input_matrix: Optional[List[str]]) -> List[int]:
     """
     Calculate the scoring matrix based on the input matrix.
 
@@ -74,7 +126,7 @@ def get_scoring_matrix(input_matrix: List[str] | None) -> List[int]:
     return _validate_matrix_combinations(matrix_dict)
 
 
-def _input_to_dict(input_matrix: List[str] | None) -> Dict[str, int] | None:
+def _input_to_dict(input_matrix: Optional[List[str]]) -> Optional[Dict[str, int]]:
     """
     Convert the input matrix to a dictionary.
 
diff --git a/AmpliGone/alignmentpreset.py b/AmpliGone/alignmentpreset.py
index afb3ed8..c100969 100644
--- a/AmpliGone/alignmentpreset.py
+++ b/AmpliGone/alignmentpreset.py
@@ -1,3 +1,50 @@
+"""
+This module provides functions and classes for determining the optimal alignment preset for sequencing reads.
+
+Functions
+---------
+get_alignment_preset(input_args: argparse.Namespace, indexed_reads: SequenceReads) -> str
+    Get the alignment preset for the given reads.
+
+find_preset(threads: int, data: pd.DataFrame) -> str
+    Find the preset based on the statistics calculated from the input data.
+
+_qual_to_ord_dispatcher(qdata: str, threads: int) -> List[int]
+    Convert a string of characters to a list of ASCII values minus 33 using multiple threads.
+
+_process_chunk(chunk: str) -> List[int]
+    Converts each character in the given chunk to its corresponding ASCII value minus 33.
+
+_determine_preset(avg_len: float, avg_qual: float, quality_range: int, length_range: int) -> str
+    Determine the preset based on the calculated statistics.
+
+_calc_avg_read_length(sequence_list: List[str]) -> float
+    Calculate the average length of a list of sequences.
+
+_calc_avg_read_qual(quality_list: List[int]) -> float
+    Calculate the average quality score of a list of quality scores.
+
+_get_unique_quality_scores(quality_list: List[int]) -> int
+    Return the number of unique quality scores in a list.
+
+_get_unique_read_lengths(sequence_list: List[str]) -> int
+    Return the number of unique lengths of strings in a list.
+
+_sequence_statistics_dispatcher(reads_list: List[str], ordinal_qualities_list: List[int], threads: int) -> Tuple[float, float, int, int]
+    Calculate sequence statistics using multiple threads.
+
+Notes
+-----
+This module is designed to handle sequencing reads and determine the optimal alignment preset based on the quality and length of the reads. It uses parallel processing to efficiently handle large datasets and calculate necessary statistics.
+
+Examples
+--------
+>>> import pandas as pd
+>>> data = pd.DataFrame({'Sequence': ['ATCG', 'GCTA'], 'Qualities': ['!@#$%', '&*()']})
+>>> find_preset(4, data)
+'sr'
+"""
+
 import argparse
 from concurrent.futures import ProcessPoolExecutor
 from typing import Generator, List, Tuple
@@ -42,6 +89,9 @@ def get_alignment_preset(
 
     """
     if input_args.alignment_preset is not None:
+        # this check is mostly for mypy to understand that alignment_preset is not Any
+        if not isinstance(input_args.alignment_preset, str):
+            raise TypeError("alignment_preset should be a string")
         return input_args.alignment_preset
     log.info("Finding optimal alignment-preset for the given reads")
     sample_size = min(len(indexed_reads.tuples), 15000)
@@ -120,14 +170,14 @@ def _extract_read_data(data: pd.DataFrame) -> Tuple[List[str], str]:
         return list_of_reads, qualities_str
 
     reads_list, quality_data = _extract_read_data(data)
-    ord_quality_list: List[int] = _qual_to_ord_dispatcher(quality_data, threads)
+    ord_quality_list: list[int] = _qual_to_ord_dispatcher(quality_data, threads)
     avg_len, avg_qual, quality_range, length_range = _sequence_statistics_dispatcher(
         reads_list, ord_quality_list, threads
     )
     return _determine_preset(avg_len, avg_qual, quality_range, length_range)
 
 
-def _qual_to_ord_dispatcher(qdata: str, threads) -> List[int]:
+def _qual_to_ord_dispatcher(qdata: str, threads: int) -> list[int]:
     """
     Convert a string of characters to a list of ASCII values minus 33 using multiple threads.
 
@@ -183,8 +233,8 @@ def _create_chunks(lst: str, n: int) -> Generator:
             yield lst[start:end]
             start = end
 
-    qdata_chunks: List[str] = list(_create_chunks(qdata, threads))
-    ordinal_quality_list: List[int] = []
+    qdata_chunks: list[str] = list(_create_chunks(qdata, threads))
+    ordinal_quality_list: list[int] = []
     with ProcessPoolExecutor(max_workers=threads) as pool:
         results = pool.map(_process_chunk, qdata_chunks)
         for result in results:
@@ -192,7 +242,7 @@ def _create_chunks(lst: str, n: int) -> Generator:
     return ordinal_quality_list
 
 
-def _process_chunk(chunk: str):
+def _process_chunk(chunk: str) -> list[int]:
     """
     Converts each character in the given chunk to its corresponding ASCII value minus 33.
 
@@ -332,7 +382,7 @@ def _is_long_read(avg_len: float) -> bool:
             # this is probably 'short read' illumina NextSeq data
             # --> set the 'SR' preset
             return "sr"
-        ##! previous if-statement is not False.
+        # ! previous if-statement is not False.
         # this is probably 'long read' illumina MiSeq data
         # --> the 'SR' preset still applies but we keep it split
         # in case a custom set of parameters is necessary in the future
@@ -341,7 +391,7 @@ def _is_long_read(avg_len: float) -> bool:
         # this is probably oxford nanopore data
         # --> set the preset to 'map-ont'
         return "map-ont"
-    ##! previous if-statement is not True.
+    # ! previous if-statement is not True.
     # this might be very 'unstable' nextseq data,
     # or from a platform we currently dont really support officially.
     # fallback to 'sr' preset
diff --git a/AmpliGone/args.py b/AmpliGone/args.py
index b2b0657..3b6984e 100644
--- a/AmpliGone/args.py
+++ b/AmpliGone/args.py
@@ -1,3 +1,23 @@
+"""
+This module provides functionality for parsing command-line arguments for the AmpliGone tool using the argparse library.
+It includes custom argument validation functions, a flexible argument formatter, and a rich argument parser for enhanced
+command-line interface (CLI) experience.
+
+Functions
+---------
+get_args(givenargs: List[str]) -> argparse.Namespace
+    Parses the given command-line arguments and returns them as an argparse namespace.
+
+Classes
+-------
+FlexibleArgFormatter(argparse.HelpFormatter)
+    A subclass of argparse.HelpFormatter that improves the formatting of help text.
+
+RichParser(argparse.ArgumentParser)
+    A subclass of argparse.ArgumentParser that uses rich.print for displaying messages.
+
+"""
+
 import argparse
 import multiprocessing
 import os
@@ -52,7 +72,7 @@ def check_file_extensions(allowed_extensions: Iterable[str], fname: str) -> str:
             parser.error(f"File {fname} doesn't end with one of {allowed_extensions}")
         return os.path.abspath(fname)
 
-    def check_file_exists(fname: str) -> str | None:
+    def check_file_exists(fname: str) -> str:
         """Check if the given file `fname` exists and return the absolute path.
 
         Parameters
@@ -74,10 +94,11 @@ def check_file_exists(fname: str) -> str | None:
         if os.path.isfile(fname):
             return fname
         parser.error(f'Error: File "{fname}" does not exist.')
+        raise argparse.ArgumentTypeError(f'Error: File "{fname}" does not exist.')
 
     parser = RichParser(
         prog=f"[bold]{__prog__}[/bold]",
-        usage=f"[bold]{__prog__}[/bold] \[required options] \[optional arguments]",
+        usage=f"[bold]{__prog__}[/bold] \\[required options] \\[optional arguments]",
         description=f"[bold underline]{__prog__}[/bold underline]: An accurate and efficient tool to remove primers from NGS reads in reference-based experiments",
         formatter_class=FlexibleArgFormatter,
         add_help=False,
@@ -271,12 +292,15 @@ class FlexibleArgFormatter(argparse.HelpFormatter):
     * Changes the behaviour of the metavar to be only printed once per long AND shorthand argument, instead of printing the metavar multiple times for every possible flag.
     """
 
-    def __init__(self, prog):
+    def __init__(self, prog: str) -> None:
         term_width = shutil.get_terminal_size().columns
         max_help_position = min(max(24, term_width // 2), 80)
         super().__init__(prog, max_help_position=max_help_position)
 
-    def _get_help_string(self, action):
+    # action is actually an argparse._StoreAction object, which is a subclass of argparse.Action
+    # _StoreAction violates the Liskov Substitution Principle
+    # see: https://mypy.readthedocs.io/en/stable/common_issues.html#incompatible-overrides
+    def _get_help_string(self, action: argparse.Action) -> str:
         """ """
         help_text = action.help
         if (
@@ -286,9 +310,12 @@ def _get_help_string(self, action):
             and action.default is not None
         ):
             help_text += f"\n  ([underline]default: {str(action.default)}[/underline])"
+        if not help_text:
+            raise AssertionError("Help text should always be present")
         return help_text
 
-    def _format_action_invocation(self, action):
+    # see comment above
+    def _format_action_invocation(self, action: argparse.Action) -> str:
         """ """
         if not action.option_strings or action.nargs == 0:
             return super()._format_action_invocation(action)
@@ -296,17 +323,19 @@ def _format_action_invocation(self, action):
         args_string = self._format_args(action, default)
         return ", ".join(action.option_strings) + " " + args_string
 
-    def _split_lines(self, text, width):
+    def _split_lines(self, text: str, width: int) -> list[str]:
         return self._para_reformat(text, width)
 
-    def _fill_text(self, text, width, indent):
+    def _fill_text(self, text: str, width: int, _: str) -> str:
         lines = self._para_reformat(text, width)
         return "\n".join(lines)
 
-    def _indents(self, line):
+    def _indents(self, line: str) -> tuple[int, int]:
         """Return line indent level and "sub_indent" for bullet list text."""
-
-        indent = len(re.match(r"( *)", line).group(1))
+        matched_line = re.match(r"( *)", line)
+        if not matched_line:
+            raise AssertionError("Line should always match this regex pattern: ( *)")
+        indent = len(matched_line.group(1))
         if list_match := re.match(r"( *)(([*\-+>]+|\w+\)|\w+\.) +)", line):
             sub_indent = indent + len(list_match.group(2))
         else:
@@ -314,14 +343,14 @@ def _indents(self, line):
 
         return (indent, sub_indent)
 
-    def _split_paragraphs(self, text):
+    def _split_paragraphs(self, text: str) -> list[str]:
         """Split text in to paragraphs of like-indented lines."""
 
         text = textwrap.dedent(text).strip()
         text = re.sub("\n\n[\n]+", "\n\n", text)
 
         last_sub_indent = None
-        paragraphs = []
+        paragraphs: list[str] = []
         for line in text.splitlines():
             (indent, sub_indent) = self._indents(line)
             is_text = re.search(r"[^\s]", line) is not None
@@ -334,10 +363,11 @@ def _split_paragraphs(self, text):
             last_sub_indent = sub_indent if is_text else None
         return paragraphs
 
-    def _para_reformat(self, text, width):
+    def _para_reformat(self, text: str, width: int) -> list[str]:
         """Reformat text, by paragraph."""
 
-        paragraphs = []
+        paragraphs: list[str] = []
+
         for paragraph in self._split_paragraphs(text):
             (indent, sub_indent) = self._indents(paragraph)
 
diff --git a/AmpliGone/cut_reads.py b/AmpliGone/cut_reads.py
index 8ca0682..6230f4d 100644
--- a/AmpliGone/cut_reads.py
+++ b/AmpliGone/cut_reads.py
@@ -1,27 +1,105 @@
+"""
+This module provides functions to cut read sequences based on primer locations and reference mapping. It includes
+functions to handle sequence reads, quality strings, and CIGAR information to accurately cut and process reads.
+
+Functions
+---------
+cut_read(seq: str, qual: str, position_needs_cutting: Callable[..., bool], primer_list: Tuple[int, ...], position_on_reference: int, cut_direction: int, read_direction: int, cigar: List[List[int]], query_start: int, query_end: int, fragment_lookaround_size: int) -> Tuple[str, str, List[int], int, int]
+    Cut a read sequence and quality string based on read-direction, CIGAR information, orientation, and fragment position.
+
+cut_reads(data: Tuple[pd.DataFrame, int], primer_sets: Tuple[defaultdict, defaultdict], reference: str, preset: str, scoring: List[int], fragment_lookaround_size: int, amplicon_type: str) -> pd.DataFrame
+    Cut reads based on primer locations and reference mapping.
+
+Notes
+-----
+- The `cut_read` function processes individual read sequences and quality strings, cutting them based on the provided
+  parameters.
+- The `cut_reads` function processes a DataFrame of reads, cutting them based on primer locations and reference mapping.
+- The module uses the `mappy` library for sequence alignment and the `pandas` library for data manipulation.
+- The `position_in_or_before_primer` and `position_in_or_after_primer` functions are used to determine if a position
+  needs cutting based on primer locations.
+"""
+
+# pylint: disable=E1120
+# pylint doesnt understand that the position_in_or_after_primer function is not being called the lines around 174
 import os
 from collections import defaultdict
+from dataclasses import dataclass
 from typing import Callable, List, Tuple
 
+# mappy is a C extension, so it is added to the pylint extension allow list
 import mappy as mp
 import pandas as pd
 
 from AmpliGone.log import log
 
-from .cutlery import PositionInOrAfterPrimer, PositionInOrBeforePrimer
+from .cutlery import position_in_or_after_primer, position_in_or_before_primer
+
+
+@dataclass
+class Read:
+    """
+    A class to represent a sequencing read.
+
+    Attributes
+    ----------
+    name : str
+        The name or identifier of the read.
+    seq : str
+        The nucleotide sequence of the read.
+    qual : str
+        The quality scores of the read, encoded as a string.
+    """
+
+    name: str
+    seq: str
+    qual: str
+
+
+@dataclass
+class CuttingParameters:
+    """
+    A class to represent the parameters required for cutting reads.
+
+    Attributes
+    ----------
+    position_needs_cutting : Callable[..., bool]
+        A function that determines if a position needs cutting based on various criteria.
+
+    primer_list : Tuple[int, ...]
+        A tuple containing the list of primer positions.
+
+    position_on_reference : int
+        The position on the reference sequence.
+
+    cut_direction : int
+        The direction of the cut (e.g., 1 for forward, -1 for reverse).
+
+    read_direction : int
+        The direction of the read (e.g., 1 for forward, -1 for reverse).
+
+    cigar : List[List[int]]
+        The CIGAR string representing the alignment, as a list of operations.
+
+    query_range : dict[str, int]
+        A dictionary containing the start and end positions of the query sequence.
+
+    fragment_lookaround_size : int
+        The size of the fragment lookaround region.
+    """
+
+    position_needs_cutting: Callable[..., bool]
+    primer_list: Tuple[int, ...]
+    position_on_reference: int
+    cut_direction: int
+    read_direction: int
+    cigar: List[List[int]]
+    query_range: dict[str, int]
+    fragment_lookaround_size: int
 
 
 def cut_read(
-    seq: str,
-    qual: str,
-    PositionNeedsCutting: Callable[..., bool],
-    primer_list: Tuple[int, ...],
-    position_on_reference: int,
-    cut_direction: int,
-    read_direction: int,
-    cigar: List[List[int]],
-    query_start: int,
-    query_end: int,
-    fragment_lookaround_size: int,
+    read: Read, params: CuttingParameters
 ) -> Tuple[str, str, List[int], int, int]:
     """
     Cut a read sequence and quality string based read-direction, cigar-information, orientation, and fragment position.
@@ -30,24 +108,34 @@ def cut_read(
     ----------
     seq : str
         The read sequence.
+
     qual : str
         The quality string.
+
     PositionNeedsCutting : Callable[..., bool]
         A function that returns True if the position on the reference needs to be cut.
+
     primer_list : Tuple[int, ...]
         A tuple of integers representing the positions of primers on the reference.
+
     position_on_reference : int
         The position on the reference where the read sequence starts.
+
     cut_direction : int
         The direction in which the read sequence needs to be cut.
+
     read_direction : int
         The direction in which the read sequence is read.
+
     cigar : List[List[int]]
         A list of lists representing the CIGAR string.
+
     query_start : int
         The start position of the read sequence on the query.
+
     query_end : int
         The end position of the read sequence on the query.
+
     fragment_lookaround_size : int
         The size of the fragment lookaround.
 
@@ -60,46 +148,126 @@ def cut_read(
     removed_coords = []
 
     # Whether to start at the end or at the start of the read sequence
-    if read_direction == cut_direction:
+    if params.read_direction == params.cut_direction:
         # Start at the position that first matches the reference (skip soft clipped regions)
-        position_on_sequence = query_start
+        position_on_sequence = params.query_range["start"]
     else:
         # End at the position that last matches the reference (skip soft clipped regions)
-        position_on_sequence = query_end
+        position_on_sequence = params.query_range["end"]
 
-    for cigar_len, cigar_type in cigar:
+    for cigar_len, cigar_type in params.cigar:
         while cigar_len > 0 and (
-            PositionNeedsCutting(
-                position_on_reference, primer_list, fragment_lookaround_size
+            params.position_needs_cutting(
+                params.position_on_reference,
+                params.primer_list,
+                params.fragment_lookaround_size,
             )
             or cigar_type not in (0, 7)  # always end with a match
         ):
             cigar_len -= 1
-            removed_coords.append(position_on_reference)
+            removed_coords.append(params.position_on_reference)
 
             # Increment position on sequence if match/insert (in seq)/match(seq)/mismatch(seq)
             if cigar_type in (0, 1, 7, 8):
-                position_on_sequence += read_direction * cut_direction
+                position_on_sequence += params.read_direction * params.cut_direction
 
             # Increment position on reference if match/deletion (in seq)/match(seq)/mismatch(seq)
             if cigar_type in (0, 2, 7, 8):
-                position_on_reference += cut_direction
-        if not PositionNeedsCutting(
-            position_on_reference, primer_list, fragment_lookaround_size
+                params.position_on_reference += params.cut_direction
+        if not params.position_needs_cutting(
+            params.position_on_reference,
+            params.primer_list,
+            params.fragment_lookaround_size,
         ) and cigar_type in (0, 7):
             break
 
-    if read_direction == cut_direction:
-        seq = seq[position_on_sequence:]
-        qual = qual[position_on_sequence:]
-        query_end -= position_on_sequence
-        return seq, qual, removed_coords, query_start, query_end
-    seq = seq[:position_on_sequence]
-    qual = qual[:position_on_sequence]
-    return seq, qual, removed_coords, query_start, query_end
+    if params.read_direction == params.cut_direction:
+        read.seq = read.seq[position_on_sequence:]
+        read.qual = read.qual[position_on_sequence:]
+        params.query_range["end"] -= position_on_sequence
+        return (
+            read.seq,
+            read.qual,
+            removed_coords,
+            params.query_range["start"],
+            params.query_range["end"],
+        )
+    read.seq = read.seq[:position_on_sequence]
+    read.qual = read.qual[:position_on_sequence]
+    return (
+        read.seq,
+        read.qual,
+        removed_coords,
+        params.query_range["start"],
+        params.query_range["end"],
+    )
+
+
+def log_cache_info(index: int, total_reads: int, _threadnumber: int) -> None:
+    """
+    Logs cache information for the primer position functions.
+
+    Parameters
+    ----------
+    index : int
+        The current index of the read being processed.
+
+    total_reads : int
+        The total number of reads to be processed.
+
+    _threadnumber : int
+        The thread number of the current process.
+
+    Returns
+    -------
+    None
+        This function does not return any value. It logs the cache information.
+
+    Notes
+    -----
+    This function logs the completion percentage of read processing and the cache usage and hit ratio
+    for the `position_in_or_before_primer` and `position_in_or_after_primer` functions. It also handles
+    potential division by zero errors when calculating cache hit ratios.
+    """
+    completion_percentage = round(index / total_reads * 100)
+    maxsize = position_in_or_before_primer.cache_info().maxsize
+    currsize = position_in_or_before_primer.cache_info().currsize
+    cache_usage_before = (
+        currsize / maxsize * 100 if maxsize is not None and currsize is not None else 0
+    )
+    maxsize = position_in_or_after_primer.cache_info().maxsize
+    currsize = position_in_or_after_primer.cache_info().currsize
+    cache_usage_after = (
+        currsize / maxsize * 100 if maxsize is not None and currsize is not None else 0
+    )
+    # TODO: clean up this section of safely dividing by zero
+    cache_misses = position_in_or_before_primer.cache_info().misses
+    cache_hit_ratio_before = (
+        (position_in_or_before_primer.cache_info().hits / cache_misses)
+        if cache_misses != 0
+        else 0
+    )
+    cache_misses = position_in_or_after_primer.cache_info().misses
+    cache_hit_ratio_after = (
+        (position_in_or_after_primer.cache_info().hits / cache_misses)
+        if cache_misses != 0
+        else 0
+    )
+    log.debug(
+        # mypy doesnt understand that the position_in_or_before_primer has a __qualname__ attribute,
+        # because it thinks its the wrapper (lru_cache) function, which does not have a __qualname__ attribute
+        f"Thread {_threadnumber} @ processID {os.getpid()}\t::\t"
+        f"Reads processing {completion_percentage}% complete.\n\t"
+        f"MODULE {position_in_or_before_primer.__module__}.{position_in_or_before_primer.__qualname__} "  # type: ignore[attr-defined]
+        f"CACHE INFORMATION\n\t\tCache size usage = {cache_usage_before:.2f}%\n\t\t"
+        f"Cache hit ratio = {cache_hit_ratio_before:.2f}%\n\t"
+        f"MODULE {position_in_or_after_primer.__module__}.{position_in_or_after_primer.__qualname__} "
+        f"CACHE INFORMATION\n\t\tCache size usage = {cache_usage_after:.2f}%\n\t\t"
+        f"Cache hit ratio = {cache_hit_ratio_after:.2f}%"
+    )
 
 
-def CutReads(
+def cut_reads(
     data: Tuple[pd.DataFrame, int],
     primer_sets: Tuple[defaultdict, defaultdict],
     reference: str,
@@ -107,7 +275,6 @@ def CutReads(
     scoring: List[int],
     fragment_lookaround_size: int,
     amplicon_type: str,
-    workers: int,
 ) -> pd.DataFrame:
     """
     Cut reads based on primer locations and reference mapping.
@@ -117,18 +284,25 @@ def CutReads(
     data : Tuple[pd.DataFrame, int]
         A tuple containing a pandas DataFrame with columns "Readname", "Sequence", and "Qualities",
         and an integer representing the thread number.
+
     primer_sets : Tuple[defaultdict, defaultdict]
         A tuple containing two defaultdicts, one for forward primers and one for reverse primers. These defaultdicts contain the primer coordinates to remove.
+
     reference : str
         The reference genome sequence.
+
     preset : str
         The preset used for minimap2 alignment.
+
     scoring : List[int]
         The scoring matrix used for minimap2 alignment.
+
     fragment_lookaround_size : int
         The number of bases to look around a fragment when cutting reads.
+
     amplicon_type : str
         The type of amplicon, either "end-to-end", "end-to-mid", or "fragmented".
+
     workers : int
         The number of workers to use for parallel processing.
 
@@ -138,21 +312,20 @@ def CutReads(
         A pandas DataFrame with columns "Readname", "Sequence", "Qualities", and "Removed_coordinates",
         representing the processed reads and the coordinates that were removed.
     """
-    Frame, _threadnumber = data
+    frame, _threadnumber = data
     log.debug(
-        f"Initiated thread {_threadnumber} @ process ID {os.getpid()} :: Processing {len(Frame)} reads."
+        f"Initiated thread {_threadnumber} @ process ID {os.getpid()} :: Processing {len(frame)} reads."
     )
 
-    FWDict, RVDict = primer_sets
+    fw_dict, rv_dict = primer_sets
 
-    Aln = mp.Aligner(
+    aligner = mp.Aligner(
         reference,
         preset=preset,
         best_n=1,
         scoring=scoring,
         extra_flags=0x4000000,  # Distinguish between match and mismatch: MM_F_EQX flag in minimap2
     )
-
     processed_readnames = []
     processed_sequences = []
     processed_qualities = []
@@ -161,48 +334,24 @@ def CutReads(
     max_iter = (
         10  # If more iterations are needed, the sequence is discarded (not recorded)
     )
-    total_reads = len(Frame)
+    total_reads = len(frame)
     for index, (_, name, seq, qual) in enumerate(
-        Frame[["Readname", "Sequence", "Qualities"]].itertuples(), 1
+        frame[["Readname", "Sequence", "Qualities"]].itertuples(), 1
     ):
-        name: str
-        seq: str
-        qual: str
-        if total_reads >= 10 and index % (total_reads // 10) == 0 and log.level == 10:
-            completion_percentage = round(index / total_reads * 100)
-            maxsize = PositionInOrBeforePrimer.cache_info().maxsize
-            currsize = PositionInOrBeforePrimer.cache_info().currsize
-            cache_usage_before = (
-                currsize / maxsize * 100
-                if maxsize is not None and currsize is not None
-                else 0
-            )
-            maxsize = PositionInOrAfterPrimer.cache_info().maxsize
-            currsize = PositionInOrAfterPrimer.cache_info().currsize
-            cache_usage_after = (
-                currsize / maxsize * 100
-                if maxsize is not None and currsize is not None
-                else 0
-            )
-            # todo: clean up this section of safely dividing by zero
-            cache_misses = PositionInOrBeforePrimer.cache_info().misses
-            cache_hit_ratio_before = (
-                (PositionInOrBeforePrimer.cache_info().hits / cache_misses)
-                if cache_misses != 0
-                else 0
-            )
-            cache_misses = PositionInOrAfterPrimer.cache_info().misses
-            cache_hit_ratio_after = (
-                (PositionInOrAfterPrimer.cache_info().hits / cache_misses)
-                if cache_misses != 0
-                else 0
-            )
-            log.debug(
-                f"Thread {_threadnumber} @ processID {os.getpid()}\t::\tReads processing {completion_percentage}% complete.\n\tMODULE {PositionInOrBeforePrimer.__module__}.{PositionInOrBeforePrimer.__qualname__} CACHE INFORMATION\n\t\tCache size usage = {cache_usage_before:.2f}%\n\t\tCache hit ratio = {cache_hit_ratio_before:.2f}%\n\tMODULE {PositionInOrAfterPrimer.__module__}.{PositionInOrAfterPrimer.__qualname__} CACHE INFORMATION\n\t\tCache size usage = {cache_usage_after:.2f}%\n\t\tCache hit ratio = {cache_hit_ratio_after:.2f}%"
-            )
+        if (
+            total_reads >= 10 and index % (total_reads // 10) == 0 and log.level == 10
+        ):  # TODO: explain this
+            log_cache_info(index, total_reads, _threadnumber)
+
+        if len(seq) < 42:
+            # Length of the read has to be at least ~42bp because the default k-mer size for the short reads preset (sr) is 21.
+
+            log.debug(f"Read with name '{name}' is too short to be processed.")
+            continue
 
-        removed_coords_fw = []
-        removed_coords_rv = []
+        read = Read(name, seq, qual)
+        removed_coords_fw: list[int | None] = []
+        removed_coords_rv: list[int | None] = []
         previous_seq: str = "impossible"
         cutting_is_done = False
 
@@ -210,32 +359,32 @@ def CutReads(
             if cutting_is_done:
                 break
 
-            for hit in Aln.map(
-                seq
+            for hit in aligner.map(
+                read.seq
             ):  # Yields only one (or no) hit, as the aligner object was initiated with best_n=1
-                if len(seq) < 5 and len(qual) < 5:
+                if len(read.seq) < 5 and len(read.qual) < 5:
                     cutting_is_done = True
                     break
 
-                if seq == previous_seq:
-                    processed_readnames.append(name)
-                    processed_sequences.append(seq)
-                    processed_qualities.append(qual)
+                if read.seq == previous_seq:
+                    processed_readnames.append(read.name)
+                    processed_sequences.append(read.seq)
+                    processed_qualities.append(read.qual)
                     removed_coords_per_read.append(
                         removed_coords_fw + removed_coords_rv
                     )
                     cutting_is_done = True
                     break
 
-                previous_seq = seq
+                previous_seq = read.seq
 
                 # Fetch the primer coordinates that correspond to the reference that the read maps to
                 # we're using tuples here because they are hashable
 
-                FWTuple: Tuple[int, ...] = tuple(FWDict[hit.ctg])
-                RVTuple: Tuple[int, ...] = tuple(RVDict[hit.ctg])
+                fw_tuple: Tuple[int, ...] = tuple(fw_dict[hit.ctg])
+                rv_tuple: Tuple[int, ...] = tuple(rv_dict[hit.ctg])
 
-                if not FWTuple or not RVTuple:
+                if not fw_tuple or not rv_tuple:
                     log.debug(
                         f"Thread {_threadnumber} @ processID {os.getpid()}\t::\tRead with name '{name}' aligns to '{hit.ctg}', but there are no primers affiliated with '{hit.ctg}'."
                     )
@@ -249,19 +398,19 @@ def CutReads(
                     or (amplicon_type == "end-to-mid" and hit.strand == 1)
                     or amplicon_type == "fragmented"
                 ):
-                    seq, qual, removed_fw, qstart, qend = cut_read(
-                        seq,
-                        qual,
-                        PositionNeedsCutting=PositionInOrBeforePrimer,
-                        primer_list=FWTuple,
+                    params = CuttingParameters(
+                        position_needs_cutting=position_in_or_before_primer,
+                        primer_list=fw_tuple,
                         position_on_reference=hit.r_st,
                         cut_direction=1,
                         read_direction=hit.strand,
                         cigar=hit.cigar,
-                        query_start=qstart,
-                        query_end=qend,
+                        query_range={"start": qstart, "end": qend},
                         fragment_lookaround_size=fragment_lookaround_size,
                     )
+                    read.seq, read.qual, removed_fw, qstart, qend = cut_read(
+                        read, params
+                    )
                     removed_coords_fw.extend(removed_fw)
 
                 if (
@@ -269,19 +418,19 @@ def CutReads(
                     or (amplicon_type == "end-to-mid" and hit.strand == -1)
                     or amplicon_type == "fragmented"
                 ):
-                    seq, qual, removed_rv, qstart, qend = cut_read(
-                        seq,
-                        qual,
-                        PositionNeedsCutting=PositionInOrAfterPrimer,
-                        primer_list=RVTuple,
+                    params = CuttingParameters(
+                        position_needs_cutting=position_in_or_after_primer,
+                        primer_list=rv_tuple,
                         position_on_reference=hit.r_en,
                         cut_direction=-1,
                         read_direction=hit.strand,
                         cigar=list(reversed(hit.cigar)),
-                        query_start=qstart,
-                        query_end=qend,
+                        query_range={"start": qstart, "end": qend},
                         fragment_lookaround_size=fragment_lookaround_size,
                     )
+                    read.seq, read.qual, removed_rv, qstart, qend = cut_read(
+                        read, params
+                    )
                     removed_coords_rv.extend(removed_rv)
 
     return pd.DataFrame(
diff --git a/AmpliGone/cutlery.py b/AmpliGone/cutlery.py
index 53d430d..85f674e 100644
--- a/AmpliGone/cutlery.py
+++ b/AmpliGone/cutlery.py
@@ -1,9 +1,38 @@
+"""
+This module provides functions to determine if a position is within a specified distance of primer positions.
+
+Functions
+---------
+position_in_or_before_primer(pos: int, clist: List[int], max_lookaround: int) -> bool
+    Determine if a position is within the maximum distance of the closest position in the list of primer positions
+    and the position is less than or equal to the closest position in the list.
+
+position_in_or_after_primer(pos: int, clist: List[int], max_lookaround: int) -> bool
+    Determine if a position is within the maximum distance of the closest position in the list of primer positions
+    and the position is greater than or equal to the closest position in the list.
+
+Notes
+-----
+These functions use caching to improve performance for repeated calls with the same arguments. The cache size is set to a maximum of 2,000,000 entries.
+
+Examples
+--------
+>>> from cutlery import position_in_or_before_primer, position_in_or_after_primer
+>>> primer_positions = [100, 200, 300]
+>>> position_in_or_before_primer(150, primer_positions, 50)
+True
+>>> position_in_or_after_primer(250, primer_positions, 50)
+True
+"""
+
 from functools import lru_cache
 from typing import List
 
 
 @lru_cache(maxsize=2000000)
-def PositionInOrBeforePrimer(pos: int, clist: List[int], max_lookaround: int) -> bool:
+def position_in_or_before_primer(
+    pos: int, clist: List[int], max_lookaround: int
+) -> bool:
     """
     Determine if a position is within the maximum distance of the closest position in the list of primer positions
     and the position is less than or equal to the closest position in the list.
@@ -24,13 +53,18 @@ def PositionInOrBeforePrimer(pos: int, clist: List[int], max_lookaround: int) ->
         less than or equal to the closest position in the list, False otherwise.
 
     """
-    d = lambda x: abs(x - pos)
-    near = min(clist, key=d, default=0)
+
+    def _default(x: int) -> int:
+        return abs(x - pos)
+
+    near = min(clist, key=_default, default=0)
     return abs(pos - near) < max_lookaround and pos <= near
 
 
 @lru_cache(maxsize=2000000)
-def PositionInOrAfterPrimer(pos: int, clist: List[int], max_lookaround: int) -> bool:
+def position_in_or_after_primer(
+    pos: int, clist: List[int], max_lookaround: int
+) -> bool:
     """
     Determine if a position is within the maximum distance of the closest position in the list of primer positions
     and the position is greater than or equal to the closest position in the list.
@@ -51,6 +85,9 @@ def PositionInOrAfterPrimer(pos: int, clist: List[int], max_lookaround: int) ->
         greater than or equal to the closest position in the list, False otherwise.
 
     """
-    d = lambda x: abs(x - pos)
-    near = min(clist, key=d, default=0)
+
+    def _default(x: int) -> int:
+        return abs(x - pos)
+
+    near = min(clist, key=_default, default=0)
     return abs(pos - near) < max_lookaround and pos >= near
diff --git a/AmpliGone/fasta2bed.py b/AmpliGone/fasta2bed.py
index 917b274..e147598 100644
--- a/AmpliGone/fasta2bed.py
+++ b/AmpliGone/fasta2bed.py
@@ -1,3 +1,68 @@
+"""
+This module provides functions and classes for processing sequencing reads and primers, including finding ambiguous options, parsing CIGAR strings, counting CIGAR errors, and generating coordinates for primers.
+
+Functions
+---------
+find_ambiguous_options(seq: str) -> List[str]
+    Find all possible unambiguous sequences from a sequence containing ambiguous nucleotides.
+
+parse_cigar_obj(cig_obj: Cigar) -> Tuple[str, str]
+    Parse a Cigar object and return the original cigar string and a cleaned cigar string.
+
+count_cigar_errors(cigar: str) -> int
+    Count the number of errors (insertions, deletions, and mismatches) in a CIGAR string.
+
+get_coords(seq: str, ref_seq: str, err_rate: float = 0.1) -> Tuple[str, int, int, int]
+    Get the coordinates of the best primer option for a given sequence.
+
+find_or_read_primers(primerfile: str, referencefile: str, err_rate: float) -> pd.DataFrame
+    Find or read primers from a given file.
+
+choose_best_fitting_coordinates(fw_coords: Tuple[str, int, int, int], rv_coords: Tuple[str, int, int, int]) -> Tuple[str, int, int, int] | None
+    Compares the forward and reverse coordinates and returns the best fitting coordinates based on their scores.
+
+coord_list_gen(primerfile: str, referencefile: str, err_rate: float = 0.1) -> Generator[Dict[str, Union[str, int]], None, None]
+    Generate a list of coordinates for primers found in a reference sequence.
+
+coord_lists_to_bed(df: pd.DataFrame, outfile: str) -> None
+    Write the coordinates in BED format to a file.
+
+parse_args(args: list[str] | None = None) -> argparse.Namespace
+    Parse command-line arguments.
+
+main(args: list[str] | None = None) -> None
+    Main function to process the command-line arguments and generate the BED file with primer coordinates.
+
+Notes
+-----
+This module is designed to handle the processing of sequencing reads and primers, including finding ambiguous options, parsing CIGAR strings, counting CIGAR errors, and generating coordinates for primers. It includes functions to read primers from files, generate coordinates, and write the results to a BED file. The main function orchestrates the entire process based on command-line arguments.
+
+Examples
+--------
+>>> from fasta2bed import find_ambiguous_options, parse_cigar_obj, count_cigar_errors, get_coords, find_or_read_primers, choose_best_fitting_coordinates, coord_list_gen, coord_lists_to_bed
+>>> seq = "ATGCR"
+>>> find_ambiguous_options(seq)
+['ATGCA', 'ATGCG']
+
+>>> cig_obj = Cigar("10M2D5M")
+>>> parse_cigar_obj(cig_obj)
+('10M2D5M', '10M2D5M')
+
+>>> count_cigar_errors("10=2I1=5D3=")
+7
+
+>>> seq = "ATCG"
+>>> ref_seq = "ATCGATCG"
+>>> get_coords(seq, ref_seq, err_rate=0.1)
+('ATCG', 0, 4, 100)
+
+>>> primerfile = "primers.fasta"
+>>> referencefile = "reference.fasta"
+>>> err_rate = 0.1
+>>> df = find_or_read_primers(primerfile, referencefile, err_rate)
+>>> coord_lists_to_bed(df, "output.bed")
+"""
+
 import argparse
 import os
 import re
@@ -302,7 +367,7 @@ def find_or_read_primers(
         log.info("Primer coordinates are given in BED format, skipping primer search")
         return read_bed(primerfile)
     return pd.DataFrame(
-        CoordListGen(
+        coord_list_gen(
             primerfile=primerfile,
             referencefile=referencefile,
             err_rate=err_rate,
@@ -372,7 +437,7 @@ def choose_best_fitting_coordinates(
     return best_fitting or None
 
 
-def CoordListGen(
+def coord_list_gen(
     primerfile: str,
     referencefile: str,
     err_rate: float = 0.1,
@@ -434,15 +499,15 @@ def CoordListGen(
 
     primers = list(SeqIO.parse(primerfile, "fasta"))
 
-    ref_file = list(SeqIO.parse(referencefile, "fasta"))
-    ref_seq = [str(ref.seq) for ref in ref_file]
-    ref_id = [ref.id for ref in ref_file]
+    ref_files = list(SeqIO.parse(referencefile, "fasta"))
+    ref_seqs = [str(ref.seq) for ref in ref_files]
+    ref_ids = [ref.id for ref in ref_files]
 
     # The loop in a loop here is not a particularly efficient way of doing this.
     # But this is the easiest implementation for now, and it's not like this is a particularly
     # cpu or time intensive process anyway.
     # Might come back to this when there's more time to create a better solution.
-    for ref_seq, ref_id in zip(ref_seq, ref_id):
+    for ref_seq, ref_id in zip(ref_seqs, ref_ids):
         log.info(f"Searching for primers in reference-id: [yellow]{ref_id}[/yellow]")
         for primer in primers:
             seq = str(primer.seq)
@@ -477,19 +542,19 @@ def CoordListGen(
             if not represent_as_score:
                 score = percentage
 
-            yield dict(
-                ref=ref_id,
-                start=start,
-                end=end,
-                name=primer.id,
-                score=score,
-                strand=strand,
-                seq=seq,
-                revcomp=revcomp,
-            )
+            yield {
+                "ref": ref_id,
+                "start": start,
+                "end": end,
+                "name": primer.id,
+                "score": score,
+                "strand": strand,
+                "seq": seq,
+                "revcomp": revcomp,
+            }
 
 
-def CoordinateListsToBed(df: pd.DataFrame, outfile: str) -> None:
+def coord_lists_to_bed(df: pd.DataFrame, outfile: str) -> None:
     """
     Write the coordinates in BED format to a file.
 
@@ -517,17 +582,53 @@ def CoordinateListsToBed(df: pd.DataFrame, outfile: str) -> None:
     >>> CoordinateListsToBed(df, 'regions.bed')
 
     """
-    return df[["ref", "start", "end", "name", "score", "strand"]].to_csv(
+    df[["ref", "start", "end", "name", "score", "strand"]].to_csv(
         outfile, sep="\t", na_rep=".", header=False, index=False
     )
 
 
-if __name__ == "__main__":
-    import argparse
+def parse_args(args: list[str] | None = None) -> argparse.Namespace:
+    """
+    Parse command-line arguments.
 
-    args = argparse.ArgumentParser()
+    Parameters
+    ----------
+    args : list of str, optional
+        A list of command-line arguments to parse. If None, the arguments will be taken from sys.argv.
 
-    args.add_argument(
+    Returns
+    -------
+    argparse.Namespace
+        An argparse.Namespace object containing the parsed command-line arguments.
+
+    Notes
+    -----
+    This function defines and parses the command-line arguments for the script. The following arguments are supported:
+    - --primers: The path to the FASTA file containing primers. This argument is required.
+    - --reference: The path to the FASTA file with the reference sequence. This argument is required.
+    - --output: The path to the output BED file with coordinates of the primers. This argument is required.
+    - --primer-mismatch-rate: The fraction of mismatches a primer can have with respect to the reference. Defaults to 0.1.
+    - --verbose: A flag to enable verbose output for debugging purposes.
+
+    Examples
+    --------
+    >>> import sys
+    >>> sys.argv = ['script.py', '--primers', 'primers.fasta', '--reference', 'reference.fasta', '--output', 'output.bed']
+    >>> args = parse_args()
+    >>> args.primers
+    'primers.fasta'
+    >>> args.reference
+    'reference.fasta'
+    >>> args.output
+    'output.bed'
+    >>> args.primer_mismatch_rate
+    0.1
+    >>> args.verbose
+    False
+    """
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
         "--primers",
         metavar="File",
         type=str,
@@ -535,7 +636,7 @@ def CoordinateListsToBed(df: pd.DataFrame, outfile: str) -> None:
         required=True,
     )
 
-    args.add_argument(
+    parser.add_argument(
         "--reference",
         metavar="File",
         type=str,
@@ -543,14 +644,14 @@ def CoordinateListsToBed(df: pd.DataFrame, outfile: str) -> None:
         required=True,
     )
 
-    args.add_argument(
+    parser.add_argument(
         "--output",
         metavar="File",
         type=str,
         help="The output BED file with coordinates of the primers.",
         required=True,
     )
-    args.add_argument(
+    parser.add_argument(
         "--primer-mismatch-rate",
         metavar="File",
         type=float,
@@ -558,20 +659,50 @@ def CoordinateListsToBed(df: pd.DataFrame, outfile: str) -> None:
         default=0.1,
     )
 
-    args.add_argument(
+    parser.add_argument(
         "--score-representation",
         action="store_true",
         help="Present the alignment score in the bed file instead of the match-percentage for each primer option (default).",
     )
 
-    args.add_argument(
+    parser.add_argument(
         "--verbose",
         action="store_true",
         help="Print debug information",
     )
 
-    flags = args.parse_args()
+    return parser.parse_args(args)
 
+
+def main(args: list[str] | None = None) -> None:
+    """
+    Main function to process the command-line arguments and generate the BED file with primer coordinates.
+
+    Parameters
+    ----------
+    args : list of str, optional
+        A list of command-line arguments to parse. If None, the arguments will be taken from sys.argv.
+
+    Returns
+    -------
+    None
+
+    Notes
+    -----
+    This function orchestrates the entire process of reading primers, aligning them to the reference sequence,
+    and writing the coordinates to a BED file. It performs the following steps:
+    1. Parses the command-line arguments using the `parse_args` function.
+    2. Sets the logging level to DEBUG if the verbose flag is set.
+    3. Reads or finds the primers using the `find_or_read_primers` function.
+    4. Writes the coordinates of the primers to the output BED file using the `coord_lists_to_bed` function.
+
+    Examples
+    --------
+    >>> import sys
+    >>> sys.argv = ['script.py', '--primers', 'primers.fasta', '--reference', 'reference.fasta', '--output', 'output.bed']
+    >>> main()
+    """
+    flags = parse_args(args)
     if flags.verbose:
         log.setLevel("DEBUG")
 
@@ -582,4 +713,8 @@ def CoordinateListsToBed(df: pd.DataFrame, outfile: str) -> None:
         represent_as_score=flags.score_representation,
     )
 
-    CoordinateListsToBed(df, flags.output)
+    coord_lists_to_bed(df, flags.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/AmpliGone/io_ops.py b/AmpliGone/io_ops.py
index eb2f83e..6b36e3f 100644
--- a/AmpliGone/io_ops.py
+++ b/AmpliGone/io_ops.py
@@ -1,3 +1,58 @@
+"""
+This module provides various input/output operations for the AmpliGone package.
+
+Functions
+---------
+read_bed(filename: str) -> pd.DataFrame
+    Reads a BED file and returns a pandas DataFrame.
+
+output_file_opener(output_file: str, threads: int) -> TextIO | PgzipFile
+    Opens an output file for writing, with optional gzip compression.
+
+write_output(output: str, read_records: List[Dict[Hashable, Any]], threads: int) -> None
+    Writes the reads to the output file.
+
+Classes
+-------
+SequenceReads
+    A class for reading and indexing sequence reads from FASTQ or BAM files.
+
+    Methods
+    -------
+    __init__(self, inputfile: str)
+        Initializes the SequenceReads object and reads the input file.
+    _read_fastq(self, inputfile: str) -> None
+        Reads a FASTQ file and stores the reads.
+    _read_bam(self, inputfile: str) -> None
+        Reads a BAM file and stores the reads.
+    _is_fastq(self, filename: str) -> bool
+        Checks if the given file is a FASTQ file.
+    _is_zipped(self, filename: str) -> bool
+        Checks if the given file is a gzipped file.
+    _is_bam(self, filename: str) -> bool
+        Checks if the given file is a BAM file.
+    _load_bam(self, inputfile: str) -> AlignmentFile
+        Loads a BAM file and returns a AlignmentFile object.
+    _open_gzip_fastq_file(self, filename: str) -> TextIO
+        Opens a gzip file for reading and returns an opened file object.
+    _open_fastq_file(self, filename: str) -> TextIO
+        Opens a FASTQ file for reading and returns an opened file object.
+    _fastq_opener(self, inputfile: str) -> TextIO
+        Opens a FASTQ file for reading, with optional gzip decompression.
+    _flip_strand(self, seq: str, qual: str) -> Tuple[str, str]
+        Returns the reverse complement of a DNA sequence and its quality score.
+
+Examples
+--------
+>>> bed_df = read_bed('path/to/file.bed')
+>>> print(bed_df.head())
+
+>>> seq_reads = SequenceReads('path/to/file.fastq')
+>>> print(seq_reads.frame.head())
+
+>>> write_output("output.txt", [{"Readname": "read1", "Sequence": "ATCG", "Qualities": "20"}], 4)
+"""
+
 import gzip
 import os
 import pathlib
@@ -6,8 +61,8 @@
 
 import pandas as pd
 import pgzip
-import pysam
 from pgzip import PgzipFile
+from pysam.libcalignmentfile import AlignmentFile
 
 from AmpliGone.log import log
 
@@ -47,14 +102,14 @@ def read_bed(filename: str) -> pd.DataFrame:
         usecols=range(6),
         header=None,
         names=["ref", "start", "end", "name", "score", "strand"],
-        dtype=dict(
-            ref=str,
-            start="Int64",
-            end="Int64",
-            name=str,
-            score=str,
-            strand=str,
-        ),
+        dtype={
+            "ref": str,
+            "start": "Int64",
+            "end": "Int64",
+            "name": str,
+            "score": str,
+            "strand": str,
+        },
     )
     primer_df = primer_df[
         ~(
@@ -67,9 +122,51 @@ def read_bed(filename: str) -> pd.DataFrame:
 
 
 class SequenceReads:
+    """
+    A class for reading and indexing sequence reads from FASTQ or BAM files.
+
+    Attributes
+    ----------
+    tuples : list[tuple[str, str, str] | None]
+        A list to store the read name, sequence, and quality score tuples.
+    frame : pd.DataFrame
+        A DataFrame to store the indexed reads.
+
+    Methods
+    -------
+    __init__(self, inputfile: str)
+        Initializes the SequenceReads object and reads the input file.
+    _read_fastq(self, inputfile: str) -> None
+        Reads a FASTQ file and stores the reads.
+    _read_bam(self, inputfile: str) -> None
+        Reads a BAM file and stores the reads.
+    _is_fastq(self, filename: str) -> bool
+        Checks if the given file is a FASTQ file.
+    _is_zipped(self, filename: str) -> bool
+        Checks if the given file is a gzipped file.
+    _is_bam(self, filename: str) -> bool
+        Checks if the given file is a BAM file.
+    _load_bam(self, inputfile: str) -> AlignmentFile
+        Loads a BAM file and returns a AlignmentFile object.
+    _open_gzip_fastq_file(self, filename: str) -> TextIO
+        Opens a gzip file for reading and returns an opened file object.
+    _open_fastq_file(self, filename: str) -> TextIO
+        Opens a FASTQ file for reading and returns an opened file object.
+    _fastq_opener(self, inputfile: str) -> TextIO
+        Opens a FASTQ file for reading, with optional gzip decompression.
+    _flip_strand(self, seq: str, qual: str) -> Tuple[str, str]
+        Returns the reverse complement of a DNA sequence and its quality score.
+
+    Examples
+    --------
+    >>> seq_reads = SequenceReads('path/to/file.fastq')
+    >>> print(seq_reads.frame.head())
+
+    """
+
     def __init__(self, inputfile: str):
         log.debug(f"Starting INDEXREADS process\t@ ProcessID {os.getpid()}")
-        self.tuples = []
+        self.tuples: list[tuple[str, str, str] | None] = []
         if self._is_fastq(inputfile):
             log.debug("INDEXREADS :: Parsing reads from FASTQ file")
             self._read_fastq(inputfile)
@@ -202,9 +299,9 @@ def _is_bam(self, filename: str) -> bool:
         """
         return ".bam" in pathlib.Path(filename).suffixes
 
-    def _load_bam(self, inputfile: str) -> pysam.AlignmentFile:
+    def _load_bam(self, inputfile: str) -> AlignmentFile:
         """
-        Load a BAM file and return a pysam.AlignmentFile object.
+        Load a BAM file and return a AlignmentFile object.
 
         Parameters
         ----------
@@ -213,7 +310,7 @@ def _load_bam(self, inputfile: str) -> pysam.AlignmentFile:
 
         Returns
         -------
-        pysam.AlignmentFile
+        AlignmentFile
             A file object for reading the BAM file.
 
         Examples
@@ -223,7 +320,7 @@ def _load_bam(self, inputfile: str) -> pysam.AlignmentFile:
         ...     print(read)
 
         """
-        return pysam.AlignmentFile(inputfile, "rb")
+        return AlignmentFile(inputfile, "rb")
 
     def _open_gzip_fastq_file(self, filename: str) -> TextIO:
         """
@@ -274,7 +371,7 @@ def _open_fastq_file(self, filename: str) -> TextIO:
         !''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
 
         """
-        return open(filename, "rt")
+        return open(filename, "rt", encoding="utf-8")
 
     def _fastq_opener(self, inputfile: str) -> TextIO:
         """
@@ -348,9 +445,32 @@ def _flip_strand(self, seq: str, qual: str) -> Tuple[str, str]:
 
 
 def output_file_opener(output_file: str, threads: int) -> TextIO | PgzipFile:
+    """
+    Open an output file for writing, with optional gzip compression.
+
+    Parameters
+    ----------
+    output_file : str
+        The path to the output file. If the file extension is '.gz', the file will be opened with gzip compression.
+    threads : int
+        The number of threads to use for writing the output file when using gzip compression.
+
+    Returns
+    -------
+    TextIO | PgzipFile
+        An opened file object for writing. If the file is gzipped, a PgzipFile object is returned; otherwise, a standard TextIO object is returned.
+
+    Examples
+    --------
+    >>> with output_file_opener("output.txt", 4) as f:
+    ...     f.write("This is a test.")
+    ...
+    >>> with output_file_opener("output.txt.gz", 4) as f:
+    ...     f.write("This is a gzipped test.")
+    """
     if ".gz" in output_file:
         return pgzip.open(output_file, "wt", compresslevel=6, thread=threads)
-    return open(output_file, "w")
+    return open(output_file, "w", encoding="utf-8")
 
 
 def write_output(
@@ -382,11 +502,11 @@ def write_output(
     >>> write_output("output.txt", [{"Readname": "read1", "Sequence": "ATCG", "Qualities": "20"}], 4)
     """
     with output_file_opener(output, threads) as fileout:
-        for index, k in enumerate(read_records):
-            for key in read_records[index]:
+        for read_record in read_records:
+            for key in read_record:
                 if key == "Readname":
-                    fileout.write("@" + read_records[index][key] + "\n")
+                    fileout.write("@" + read_record[key] + "\n")
                 elif key == "Sequence":
-                    fileout.write(read_records[index][key] + "\n" + "+" + "\n")
+                    fileout.write(read_record[key] + "\n" + "+" + "\n")
                 elif key == "Qualities":
-                    fileout.write(read_records[index][key] + "\n")
+                    fileout.write(read_record[key] + "\n")
diff --git a/AmpliGone/log.py b/AmpliGone/log.py
index 3684487..e1becbb 100644
--- a/AmpliGone/log.py
+++ b/AmpliGone/log.py
@@ -1,3 +1,36 @@
+"""
+This module sets up a central logging object using the Rich library for enhanced logging output.
+
+Imports
+--------
+import logging
+    Standard Python logging module.
+from rich.highlighter import NullHighlighter
+    Import NullHighlighter from the Rich library to disable highlighting.
+from rich.logging import RichHandler
+    Import RichHandler from the Rich library to handle logging with Rich's features.
+
+Variables
+---------
+FORMAT : str
+    The format string for log messages.
+log : logging.Logger
+    The central logging object configured to use RichHandler.
+
+Notes
+-----
+This module configures the logging system to use RichHandler from the Rich library, which provides enhanced logging output with features like rich text formatting and better readability. The logging level is set to DEBUG, and the log messages are formatted to include the date and time.
+
+Examples
+--------
+>>> from log import log
+>>> log.debug("This is a debug message.")
+>>> log.info("This is an info message.")
+>>> log.warning("This is a warning message.")
+>>> log.error("This is an error message.")
+>>> log.critical("This is a critical message.")
+"""
+
 import logging
 
 from rich.highlighter import NullHighlighter
@@ -6,7 +39,7 @@
 # Central logging object using Rich's logging library
 FORMAT = "%(message)s"
 logging.basicConfig(
-    level="INFO",
+    level="DEBUG",
     format=FORMAT,
     datefmt="[%d/%m/%y %H:%M:%S]",
     handlers=[
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cad335d..4ecb81c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## [1.3.1](https://github.com/RIVM-bioinformatics/AmpliGone/compare/v1.3.0...v1.3.1) (2024-04-03)
+
+
+### Bug Fixes
+
+* don't replace NA name values when reading BED file ([6d0c192](https://github.com/RIVM-bioinformatics/AmpliGone/commit/6d0c192a0b997d98778c2a2f0281e19208aedcac))
+
 ## [1.3.0](https://github.com/RIVM-bioinformatics/AmpliGone/compare/v1.2.1...v1.3.0) (2023-08-08)
 
 
diff --git a/CITATION.cff b/CITATION.cff
index 93df80c..a509d1c 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -15,7 +15,7 @@ authors:
       Environment (RIVM)
     orcid: 'https://orcid.org/0009-0002-6384-3446'
   - name: "The RIVM-IDS Bioinformatics team"
-version: 1.3.0 #x-release-please-version
+version: 1.3.1 #x-release-please-version
 doi: 10.5281/zenodo.7684307
 identifiers:
   - type: doi
diff --git a/sonar-project.properties b/sonar-project.properties
new file mode 100644
index 0000000..7c311ec
--- /dev/null
+++ b/sonar-project.properties
@@ -0,0 +1,15 @@
+sonar.sources=AmpliGone
+sonar.tests=tests
+
+sonar.projectKey=RIVM-bioinformatics_AmpliGone
+sonar.organization=rivm-bioinformatics
+sonar.host.url=https://sonarcloud.io
+
+sonar.language=python
+sonar.sourceEncoding=UTF-8
+sonar.python.version=3.10
+sonar.python.coverage.reportPaths=tests/data/reports/coverage.xml
+sonar.python.pylint.reportPaths=tests/data/reports/pylint-report.txt
+sonar.python.bandit.reportPaths=tests/data/reports/bandit-report.json
+sonar.python.flake8.reportPaths=tests/data/reports/flake8-report.txt
+sonar.python.mypy.reportPaths=tests/data/reports/mypy-report.txt
\ No newline at end of file
diff --git a/test-requirements.txt b/test-requirements.txt
new file mode 100644
index 0000000..c98654f
--- /dev/null
+++ b/test-requirements.txt
@@ -0,0 +1,8 @@
+pytest==8.3.2
+pytest-cov==6.0.0
+pylint==3.3.1
+black==24.8.0
+isort==5.13.2
+mypy==1.13.0
+flake8==7.1.1
+bandit==1.7.10
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/config.yaml b/tests/config.yaml
new file mode 100644
index 0000000..6c14bcb
--- /dev/null
+++ b/tests/config.yaml
@@ -0,0 +1,115 @@
+# Synthetic data: 
+# reference is 200 nucleotides long
+# The primers are 10 and match the first and last 10 nucleotides of the reads.
+# reads are 2 sequences, raw reads 120 nucleotides, without primers 100.  
+# processed reads have a 20 nucleotide overlap with each other.
+
+
+happy_sars_cov_2:
+  pipeline_args:
+    --input: "tests/data/inputs_outputs/sars-cov-2.fastq"
+    --output: "tests/data/inputs_outputs/sars-cov-2-output.fastq"
+    --reference: "tests/data/references/SARS-CoV-2-reference.fasta"
+    --primers: "tests/data/primers/ARTIC-V5.3.2.fasta"
+    --amplicon-type: "end-to-end"
+  test_args:
+    comparison_file: "tests/data/expected_outputs/sars-cov-2-eo.fastq"
+    fails: False
+    expected_log_message: "writing output files"
+
+happy_synthetic:
+  pipeline_args:
+    --input: "tests/data/inputs_outputs/synthetic.fastq"
+    --output: "tests/data/inputs_outputs/synthetic-output.fastq"
+    --reference: "tests/data/references/synthetic.fasta"
+    --primers: "tests/data/primers/synthetic.fasta"
+    --amplicon-type: "end-to-end"
+  test_args:
+    comparison_file: "tests/data/expected_outputs/synthetic-eo.fastq"
+    fails: False
+    expected_log_message: "removed a total of [bold cyan]40[/bold cyan] nucleotides"
+
+happy_synthetic_bed:
+  pipeline_args:
+    --input: "tests/data/inputs_outputs/synthetic.fastq"
+    --output: "tests/data/inputs_outputs/synthetic-bed-output.fastq"
+    --reference: "tests/data/references/synthetic.fasta"
+    --primers: "tests/data/primers/synthetic.bed"
+    --amplicon-type: "end-to-end"
+  test_args:
+    comparison_file: "tests/data/expected_outputs/synthetic-eo.fastq"
+    fails: False
+    expected_log_message: "removed a total of [bold cyan]40[/bold cyan] nucleotides"
+
+synthetic_mismatched_primers:
+  pipeline_args:
+    --input: "tests/data/inputs_outputs/synthetic.fastq"
+    --output: "tests/data/inputs_outputs/synthetic-mismatched-output.fastq"
+    --reference: "tests/data/references/synthetic.fasta"
+    --primers: "tests/data/primers/synthetic-no-match.fasta"
+    --amplicon-type: "end-to-end"
+  test_args:
+    fails: True
+    expected_log_message: "ampligone was unable to match any primers to the reference"
+
+empty_input:
+  pipeline_args:
+    --input: "tests/data/inputs_outputs/empty.fastq"
+    --output: "tests/data/inputs_outputs/empty-output.fastq"
+    --reference: "tests/data/references/synthetic.fasta"
+    --primers: "tests/data/primers/synthetic.fasta"
+    --amplicon-type: "end-to-end"
+  test_args:
+    fails: True
+    expected_log_message: "Empty input file"
+
+empty_reference:
+  pipeline_args:
+    --input: "tests/data/inputs_outputs/synthetic.fastq"
+    --output: "tests/data/inputs_outputs/synthetic-output.fastq"
+    --reference: "tests/data/references/empty.fasta"
+    --primers: "tests/data/primers/synthetic.fasta"
+    --amplicon-type: "end-to-end"
+  test_args:
+    fails: True
+
+empty_primers:
+  pipeline_args:
+    --input: "tests/data/inputs_outputs/synthetic.fastq"
+    --output: "tests/data/inputs_outputs/synthetic-output.fastq"
+    --reference: "tests/data/references/synthetic.fasta"
+    --primers: "tests/data/primers/empty.fasta"
+    --amplicon-type: "end-to-end"
+  test_args:
+    fails: True
+
+corrupted_input:
+  pipeline_args:
+    --input: "tests/data/inputs_outputs/corrupted.fastq"
+    --output: "tests/data/inputs_outputs/corrupted-output.fastq"
+    --reference: "tests/data/references/synthetic.fasta"
+    --primers: "tests/data/primers/synthetic.fasta"
+    --amplicon-type: "end-to-end"
+  test_args:
+    fails: True
+
+wrong_format_input:
+  pipeline_args:
+    --input: "tests/data/inputs_outputs/wrong-format.fastq"
+    --output: "tests/data/inputs_outputs/wrong-format-output.fastq"
+    --reference: "tests/data/references/synthetic.fasta"
+    --primers: "tests/data/primers/synthetic.fasta"
+    --amplicon-type: "end-to-end"
+  test_args:
+    fails: True
+
+too_short_sequences:
+  pipeline_args:
+    --input: "tests/data/inputs_outputs/too-short.fastq"
+    --output: "tests/data/inputs_outputs/too-short-output.fastq"
+    --reference: "tests/data/references/synthetic.fasta"
+    --primers: "tests/data/primers/synthetic.fasta"
+    --amplicon-type: "end-to-end"
+  test_args:
+    fails: True
+
diff --git a/tests/data/expected_outputs/sars-cov-2-eo.fastq b/tests/data/expected_outputs/sars-cov-2-eo.fastq
new file mode 100644
index 0000000..8ca2e48
--- /dev/null
+++ b/tests/data/expected_outputs/sars-cov-2-eo.fastq
@@ -0,0 +1,20 @@
+@SRR30635841.4
+ATATTCTGAGCCCTGTGATGAATCAACAGTTTGAGTTGGTAGTCCCAAAATCTTTGAGGCTACAGCATTCTGTGAATTATAAGGTGAAATAAAGACAGCTTTTCTCCAAGCAGGGTTACGTGTAAGGAATTCTCTTACCACGCCTATTTGTGGCCTGTTAATTGCAGATGAAACATCATGCGTGATAACACCCTTATAAAACATTTTAAAGCATTGAGCTGATTTGTCTTTATGTGCTTTAAGCTTATTATCATAAACCAAAGCACTCACAGTG
++
+CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCC*CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCC*CCCCCC*CCCCCCC5CCCCCCCCC
+@SRR30635841.5
+TACTCTGCAAGAAGTAGACTAAAGCATAAAGATAGAGAAAAGGGGCTTCAAGGCCAGCAGCAACGAGCAAAAGGTGTGAGTAAACTGTTACAAACAACAACAGCAAGTTGCAAACAAAGTGAACACCCTTGGAGAGTGCTAGTTGCCATCTCTTTTTGAGAGTTATGATTTTGGAAGCGCTCTGAAAAACAGCAAGAAGTGCAACGCCAACAATAAGCCATCCGAAAGGGAGTGAGGCTTGTATCGGTATCGTTGCAGTAGCGCGAAC
++
+CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCC5CCCCCCCCCCCCCCCCCCC*CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC*5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5
+@SRR30635841.2
+TTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAATGGGTAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCA
++
+CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCC5CCCCCCCCCCCCCCCCCC5CCCCC*C5CCCCC*
+@SRR30635841.3
+GAGTGTTGGGTATAAGCCAGTAATTCTAACATAGTGCTCTTGTGGCACTAGTGTAGGTGCACTTAATGGCATTACTGTATGTGATGTCAGCACAAAATAATCACCAACATTTAATTTGTAAGTTGTTGTACCTCGGTAAACAACAGCATCACCATAGTCACCTTTTTCAAAGGTGTACTCTCCTATTTGTACTTTACTGTTTTTAGTTACACGATAACCAGTAAAGACATAATTTCGGTTAAGTGGTGGTCTAGGTTTACCAACTTCCCATG
++
+CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC*CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC*CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCC**CCCCCC5C
+@SRR30635841.1
+TTATTAACAATAAGTAGGGACTGGGTCTTCGAATCTAAAGTAGTACCAAAAATCCAGCCTCTTATTATGTTAGACTTCTCAATGGAAGCAAAATAAACACCATCATTAAATGGTAGGACAGGGTTATCAAACCTCTTAGTACCATTGGTCCCAGATATAACATGGAACCAAGTAACATTGGAAAAGAAAGGTAAGAACAAGTCCTGAGTTGAATGTAAAACTGAGGATCTGAAAACTTTGTCAGGGTAATAAACACCACGTGTGAAAG
++
+CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCC*C5C*CCCCCCCCCC5CC
diff --git a/tests/data/expected_outputs/synthetic-eo.fastq b/tests/data/expected_outputs/synthetic-eo.fastq
new file mode 100644
index 0000000..105fae3
--- /dev/null
+++ b/tests/data/expected_outputs/synthetic-eo.fastq
@@ -0,0 +1,8 @@
+@read_number_2_last_120_of_ref
+CCTCGTGGGGCCTACACCTGACCAGGAGCCGCACTGACAGGACCACGCTTCATCATAACTTTGGCGGCTGGGCAACGGATTTAATGGTACATAACTCATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_number_1_first_120_of_ref
+TCTAGGGAGTGACGTGGACCCCGGATTGATACAGGATCACATGTAGAAAAGGTAGTCGGACAAGTTACCGCTACCCTCGACCTCGTGGGGCCTACACCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/tests/data/inputs_outputs/corrupted.fastq b/tests/data/inputs_outputs/corrupted.fastq
new file mode 100644
index 0000000..d3dc897
Binary files /dev/null and b/tests/data/inputs_outputs/corrupted.fastq differ
diff --git a/tests/data/inputs_outputs/empty.fastq b/tests/data/inputs_outputs/empty.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/inputs_outputs/influenza.fastq b/tests/data/inputs_outputs/influenza.fastq
new file mode 100644
index 0000000..618ed73
--- /dev/null
+++ b/tests/data/inputs_outputs/influenza.fastq
@@ -0,0 +1,20 @@
+@b253b01d-3c76-4251-bbd1-9652881ceb2f
+ACGCGTGATCAGCAAAAGCGGGGGATAATTCTATTAACCATGAAGACTATCATTGCTTTGAGCAACATTCTATGTCTTGTTTTCGCTCCAAAGAAATACCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCTATACGTACCAAACGGAACGATAGTGAAAACAATCACAAATGACCGAATTGAAGTTACTAATGCTACTGAGTTGGTTCAGAATTCATCCCAATAGGTAAAATATGCGACGGTCCCCATCAGATCCTTGATGGAGGGAACTGCCACACTAATAGATGCTCTATTGGGGGACCCTCAGTGTGACGGCGTTCAAAATAAGGAATGGGACCTTTTTGTTGAACGAAGCAGAGCCAACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGAATTCACTAGTTGCACTCATCCGGCACACTGGAGTTTTAAAAATGAAAGCTTCAATTGGACTGGAGTCAAGCAAAACGGAACAAGTTCTGCGTGCAAAAGGGGATCTAGTAGTAGTTTTTTTAGTAGATTAAATTGGTTGACCCACTTAAGCAACATATATCCAGCACAGAACGTGACTATGCCAAACAAGGAACAATTTGACAAATTGTACATTTTGGGGGGTTCACCACCCGGATACGGACAAGAACCAAATCTCCTGTTCGCCCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAAGGACATTCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACACTTTTGATTAACAGCACAGGGAATCTAATTGCTCTAAGAGGGTTACTTCAAAATACGAAATGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGGATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCGTTCCAAAATGTAAACAGGATCACATACAGGGCCTGTCCCAGATATGTTAAGCAAAGCACCCTGAAATTGGCAACAGGAATGC
++
+=<;999::<=;:;::744*)(),4<899AA??>==>@>??>>??<<<<<??3322245679<=?@@@??=>>>=?====<>>>@=;:;+++,(,,,2=>>>>;<<?>?@@?BFC3222267?@B98832*))*+1000176..**)*-//2;<;:;<<??:6778;=C?C@?>??@ABADB@B<9:::9:;=>@??@B@A?==<<=<<;;;==??@5400078888,**+/1.-.45;??=;99210&&%&),7566.---88<=;;77669<9300/..((()0/77>?>===<=<;;=>5422566=;;;;;<=>>?<=99999>555631232889@<>=<99:<BABA>:::999;:=<776::;.....<<<<<<<===<<==A78:@?;::::>??A@><;<;;::<84(%%&&%'()*++,-*+++/344...-,,,,43/.../*199<?B;===@==8<932243498850001132:>>AA>=;;;;=?<<<;<?<9999:>.*,,.534<>><<<;<>=:;<><A<-----::;:65667711105786-(((('))++1668{{>9988::;:;9644447<<<;:::;<<<=;:;;:<>?AC@?@@@ABDA@::9=?@-12.++++-0))))('*)*.68:4...-+++)()+*,,.21)((/;633335:;8=;;87773007:8<>=;;<<;=<;::::6>?=:8998:;;:;::9::;<===>???>833335;?<=<99:556<<:+*4,,9;<<87556600099=:9:99<<=>=<;;;<??=<<=<996/////7899;;0//4:20,-../A@<;:::<65567;===>@=:98,,&%%&&&(+.07898:7;:9610013=;99:<>B444355AAABA:999444464)((((.0123==>=<;;967752*****,77----/3579><;:66667<:889;=?><==:643446?;C=:888832///788<998))&&'*+69:;;:::;<?=:98887)(((*456877678:=>>;::;9;;98999::
+@dbdcfc8a-6609-4268-9867-fc4bec4bbaac
+ACGCGTGATCAGCAACCAGAGACTAATTCTATTAACCATGAAGACTATCATTTAGCACGAGCAACCATTCTATGTATCTTGTTTTCGCTCAAAAAATACCTGGAAATGACAATAACACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACAAATGACCGAATTGAAGTTACTAATGCTACTGAGTTGGTTCAGAATTCATCAATAGGTAAAATATGCGACAGTCCCCATCAGATCCTTGATGGAGGGAACTGCACACTAATAGATGCTCTATTGGGGGACCCTCAGTGTGACGGCGTTCAAAATAAGGAATGGGACCTTTTTGTTGAACGAAGCGAGCCAACAACAACTGCCACCCTTATGATGTACGGATTAACTTCCCTTAGGTCACTAGTTATATTAACTGCTGGAGTTTAAAAATGAAAGCTCAATTGGACTGGAGTCCGTAAAACGGAACAAGTTCTGCGTGCAAAAGGATCTAGTAGTAGTTTTTTTAGTAGATTAAATTGGTTGACCCACTTAAACAACATATCAGCACAGAACGTGACTATGCCAAACAAGGAACAATTTGACAAATTGTACATTTGGGGGTTCACCACCGGATACGGACAGAGCCCGATCTCCCTGTTCGCCCAATCATCAGGAAGAATCACATTATCTACCAAAAGAAGCCAAGCAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGGGACATTCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACAGAATGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGAATCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCGTTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACCCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACCAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGATGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAATCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGAAGCTGAATCGATTGATCGGAAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAGGTAGAAGGAAGAGTTCAAGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCCAACATACGTTGACCCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAAGAAGCAACTGGGGGAAAATGCTGAGGATACTAGGAAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATAAGAAATGAAACTTATGACCACAATGTGTACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCGGGGTACAAGATTGGATCCTATGGATTTCCTTTGCCATGTCATGTTTTTTTGCTTTGTATTGCTTTGTTGGGGTTTCATCATGTGGGCCTGCCAAAGGGCAACATTAGATTTTAACATTTGCATTTGAGTGCATTAATTAAACACCCTTGTTTCTACTGATCACGCGT
++
+++,--..003.****''''&&%%%(()09977888==<;5511111126666-++++++++,6..***+8643341111178<<796;:;:<>CF=??>===?A>DDE@@><77)((()5567888730..033/..--01;<;;22223;==@AB===;9;;99:9:==>>DDB?@???A@?>=<;;;<<765566535434677999988656889910044;@@??55576677:==>87)))))43234;9:;?0//000::;>?@;;;:51786454337;>55444;96660)))))((,-/<999...8::;:66666<=@BGBI>88777=>A>=<::;<@>BBA<787-,,,./22254,+)''(()0/02..1114567::><))'&&+-)(****/,+-+*****)(%&&''((&&&&&&''(*+22222275397.+*(')+'''*,0001971//23***+345=:8889<;87:>;77778?>>>04=859<9888778;;??=970.011244>>@>=?=:77778:;0,,,,,)))))11133+**77/+*).)''(&&&&(*2458999:>???@BBDDFJC97,++++3433400-(''''/888,+*&&+-('&&&''&&&%%$$&'(199*)+9=<;;:1019::<=,++++:876635,(((()@887775.520--21,)(*,,15::<==??==>@AB@=76679;=;;<;:9::;;>:&&'&(89:;0///998677<(''+::666?>;;::;=>=<<<=BC=(((():8;:<;<==A@@??@B@>;:99/..+*25///<=>;;:5+((((+(-/00013349<?<(''':><;7752+''''**.//>?>=>@?>>>?>><<;<=?ABA@?877<<10,+,,'&&(())69:=;;;:;<<;<;:;==>=@A??>>>>>=566/8:;;=999<C>//////,,,-.;7666466666655552.---.0**+;<>>99<;63354,.49:<=@>>>>==>-,++-965)))*,:<331-...,,.46<=9(&&&(///557779:<44445<9978899@;@@AAADC=86556211))+:<??>??@.-.-245@999:;A++++,0489=@AACCBD?>1-3::9989:?AB9+****;<::89*'(.578<::9;<;;@AA?=<<<?AB@A=;<;;439<=>>>?=<<<<9210/1349@?1+'&(&&%%*,,176;<<5224890//014.*,//0...0852*)))35546778:;@>=;;7----*)).78;<>?>::966888>ACCDB@??>????@?>>?@@AA>;:989:<=??@FDEE>==426773,,,,)(*134,,+''''**))176778<<>973349;<<<<<><<<==>>A@?CCA=354*5**(*....8.--.0:=C?@>==876--3,,''(((/7;?AAAACB:>96/1*)))(((((?>@@?>>??@<<<7677757777<?@@@0000823...26?>??AA98888AAA223344478<>??<<<<=A=76777755679::9:=?:<**01,---9-,*,,,+.3)'''',-01026;99;==<<;;<<<<==97779+;<==:<<;;>>>>>7924:9;<>A@BA?<<==?BB;9989<535@;788***+;:<<<<32.+./.57777721111112557>@A==><=@=>:99978788<7442'000118;>A::666:9;=@=:8777
+@70880d96-c688-4852-afd4-07330abdc8ff
+ACGCGTGATCAGCGAAACCAGGGGATAATTCTATTAACCATGAAGACTATCATTGCTTTGAGCAACATTCTATGTCTTGTTTTCGCTCAAAATGCCTGGAAATGACAATAGCGGCGACGCTGTACGCTTAGGCACCATGCAGTACCAAACGGAACGATAAGGCAAAAACAATCACAAATGACCGAATTGGAGAGTTACTTAATGCTACTGAGTTGGTTCAGAATTCATCAATAGGTAAAATATGCGACGGTCCCCATCAGATCCTTGATGGAGGGAACTGCACACTAATAGATGCTCTATTGGGGGACCCTCAGGTGTGACGGCATTCAAAATAAGGAATGGGACCTTTTGTGAACGAAGCAAGAACCAACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTGGGTCACTGGATGCCTCATCCGGCACACGCGAGTTTTAAAAATGAAAACTTCAATTGGGCTGGAGTCAAGCAAAACGGAACAAATTCTGCGTGCAAAAGGGGATCTAGTAGTAGTTTTTTAGTAGATTAAATTGGTTGACTTACTTAAACAACATATATCCAGCACAAGACGTGACTATGCCAAACAAGGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGATACGGACAAGAACCAAATCTCTGTTCGCCCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATACCAAGGACATTCCTAGCAGAATAGCGCATCTATTGGACAATAGTAAAGCCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAATGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACGTTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACCCTGAAATTGGCAACAGGAATGCAAAATGTACCAGAGAAACAAACCAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGATGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCCAAAATTCTGAGGAAGAGGACAATCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGGAAGCTGAATCGATTGATCGGAAAAACCAACGAGAAATTCCATCAGATTGAAAAGAATTCTCTTGAGGTAGAAGGAAGAGTTCAAGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACGATTGACCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGAAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCGGATCAATAAGAAATGAAACTTATGACCACAATGTGTACAGGGATGAAGCATTCAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATGTGTTTTTTTTGCTTTTGCTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAGGAGCAGCATTAGATGCAACATTTGCATTTGAGTGCATTAATTAAAAACACCCTTGTTTCTACTGATCACG
++
+:9:;::<<=>=77688=>=322+++29<>BA::9=>?===>>@?<<;;<@@@A@><=>=?@;;<::;;;=;;;;<=<<>===AE::::66666.....67665343.,,+**&&&+,//.../*)&&%%%%&&&/3>>99(((((;;<=>=<<<<:::8)(((((((+2334A@@@@BEA@@?=====>3221000223++*,48>333326667:?@?983236;=>>>??44444;@CECA>=<<:)))))0,+24433599<=>><=9887,+/4322*)*59=>?888887<<3/-.--,,99:>876(&%$%&&&&&&&&&*,/;;AAEB977799<<<<434490+&&&+,.2264----))))*,++++,=;;=<;<<<=?A@AB@B87778=<==<)(''')003++++,66::90///-//07>=77744('&&&'((*-)0.2106<:;5(((()><<98555570.19698--/025;=====@BE<D?>=;9:::<=,(,*+./3:;;<=;33344789;008::;>@@CBCBCB?>>=21'''''):<<=?>>>===>{>2112788((((((./28887:::<;:9878888?699:=CAA@A?A88442125,..-34458:@?>>?<;=>>><;::9:;:;=<<><;1<+))))699>>>>AABCA@87778;????A@?<==><=>===?@?BDHA=?>>?B=+++'&&&'+B@?AB3>==767;=?@>>>>><;8+').//4//248<=;1002<=99871))(((+,-':<<.--(&&31*)21+((()979=;>>2222311:9??AB;9978111,*,---1:;;?==;9844444354666410021/42224:;?:8=?DB961114:::D@<<<<<@??=<<;9989:77<?=B?9862/*+))*8888=<90.-.3;>=>>;88999:==>>======<<<9211:999<<;9;::;=44444=<::;:8++++,89:;;;;:9;:;<,.55222570+4767:5<==>?><;:<<>892...*('&&'(47662226<??86777;<::::99::<=>?<8732232,,,,,--->;=?>>>AACB?>?>>>A<A?<>?>><;;::==9875003=999<=A?=<999:''',7988,,(((()35:88889:?A;;999;::;;;==A9>:;;;:88755112=>>?@?>==>??C;:5,4558:<<=?>===<=DC?<;99;=:;9;;<:99::;:=?>===?AB@?><:;80(((/;><.(''),./178:;;;21777==+++-8888811249::=@?>><<<=>?>??@BD8=<<=>>BB=<<<;<<=@>;88732221/03322,+,,,==<;22-+++++..27:;<?@><<<==>>>=??989437=??>>>@;>??>@CFDDKFM@?>;-:752:9862222++/<F@1////889:<@A@==@?<=<3333@;00002;,*''%%%')-5;@AA????=<<<;:,*))*+,03.-''(.277789@>=87,,,,<=ABBAA;;32213720-+%%(++++-/,,../=>=...0>?>?))/0299<?>==>??>>=0////2562+*(((()*<=32--..,,,,/488<;<<9977+..41/00.2;899--)'&%$$$%&''''%%%$&((100121.--.:<;;;;=?//3,,,,****+-*)&&&&)*047224>=@BA@<<;;:<<>>@?@ABCDBA>=<,;<@BC??=<;;<<>==;
+@405024e8-3a9e-48a9-82d4-37d40bf98f8c
+ACGCGTGATCAGCGAAAGCAGGGGATAATTCTATTAACCATGAAGACTATCATTGCTTTGAGCAACATTCTATGTCTTGTTTTCGCTCCAAAAAATACCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACAAATGACCGAATTGAAGTTACTAATGCTACTGAGTTGGTTCAGAATTCATCAATAGGTAAAATATGCGACAGTCCCCATCAGATCCTTGATGGAGGGAACTGCACGCACTAATAGATGCTCTATTGGGGGACCCTCAGTGTGACGGCGTTCAAAATAAGGAATGGGACCTTTTTGTTGGGCAAAACGAAACCAACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACTGGAGTTTAAAAATGAAAGCTTCAATTGGACTGGAGTCAAGCAAAACGGAACAAGTTCTGCGTGCAAGGATCTAGTAGTAGTTTTTTAGTAGATTAAATTGGTTGACCCGCCAAACAACATATATCACTTTAGAACGTGACTATGCCAAACAAGGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGATACGGACAAGAATAAATCTCCTGTTCGCCCAATCATCAGGGAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGGGACATTCCTAGCAGAATAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGAATCTAATTGCCCCTAGGGGTTACTTCAAAATACGAAATGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCGTTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCAAAGCACCCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACCAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGATGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGAAGAGGACAATCAGCAGATCTCAAAAGCACTCAAACAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGAAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAGGTAGAAGGAAGAGTTCAAGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACGATTGACCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAACAAAGCCAACTGAGGGAAAATGCTGAGGATATGGGAAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATAAGAAATGAAACTTATGACCACAATGTGTACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATGTCATGTTTTTTGCTTTGTATTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGATGCAACATTTGCATTTGAGTGCATTAATTAAAAACACCCTTGTTTCTACTGATCACGCGT
++
+,,,,112::;633233472/0+*(*-6;@A@?65555;?,+++*+4488<+'&&&&**('&&'''534444;;;==>>==?@@C8777-..3<;6745422888:;5@?AA>=<<99:::<=>><<:;:=333311126<<;<<<;;;:9::;?@<;;;;??;99889;=====A@?@@@BB@>=<<=>>AB><<;::88;;::9;;<8,++//:=>>6612221+******,,,-<>@CC><;;<<:9989:<;>>>==<;:<<<>A436881039400-,)++,-10564461111178;9333599:8994,+)()++99987:?>@BDCB?>=<=>??<<;99;=B7777(''&()'&&('&&&'''0(((((,)&&&&'*,2:;<<@AAA985448@@A@765448878788;:;:<:::;:;;;;5564344***,,0011259964/,--,-.0118:<>><<;9999::99;:=>>@DD?=<<;;543459;;;====>))('*>?9997799;??>903322447?@<555544444;93'''''&'1.,+++.11/&&&)*((((-../11===<<;;;<6/+(((()+,:@C>><;<>AB@?0///07///.1+))+5::;=<;889:;::99:<<:10''&&'-19<('3334=<<<=>?@?@.,,&&(*,00---56::;A>====<-3>;9<<<<<=><;<::;==><<<<=AA>;8889;??===<<><=>==>43:56<<@5444242--.3.('',134=??>===>?=668=@B@=882+6888899=>>AA>?A@BC9550///**+.;<<==;:::8<;45475:<>>?>=>@CFB:7777;A?>>>?@@<;>=?>?>?>A?>?@??@==;:9:78:>:5543+****)),--+---..///<:;;;=???>===;;;;;===>=A@@>=@@@<;:;;<=>@DCCAAA@??>=?>?@?>><:8864556:===><<<>>@@?@>>>?BCB776646-7:<=?@==<>==1111;=>?//..02610''')***04679>>=<<71255=???>>:9899<+++*+../;==@>.----(''(,99:5=>;79;@@@?>???=>===>?><;<<<5<<<<==?@?@AAB?=9993((34789;;=?;323:;<?>=@>>><>==;;;76)((((449<<>>?@@@?@@A@@<;7<;<<==?AA;;;;;AA=;:::9996666888779;;BC????@AABAABABFFCBBBABBA?@A?>==<=<<9899;:::>9:56>>==>?A@BA@AA@?>?>====>?BBCCDDC??>>>???>=<;<=>>?922229;:<=988-+000:;:<<>=<=<;==>@??9889:>==;8798888688,,,46=??98=@C@@===??A@>BD@89,(((&&(/000:-,2:=@C<98111((::8889:@;==889:AC?@=?=?A?3300023=5<=643222380--+++,.488<=ABC7666<;;;;:;<=ABDB??>>>@@@@><===<:999=BA====>>?@>?==<77888==<<>?<=,+1'')*+===><84223-,,,,479@;,000017=?>????<;;;::<<<>:;?==?@?<<<<>BB@8:>>>?>DCBEC@?@?>@@=99889==BA=<<<:=<=<;;;;<>?/9;6567:;;<>>>=>>@@@BABB=?@AACC@=<;;;>?AAABBEDFKCC@>224?ADD@>=;;;<=?544///
+@63c385b5-aad4-4178-a973-b5dd8a909558
+ACGCGTGATCAGCGAAAGCAGGGGATAATTCTATTAACCATGAAGACTATCATTGCTTTGAGCAACATTCTATGTCTTGTTTTCGCTCAAAAAATACCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACAAATGACCGAATTGAAGTTACTAATGCTACTGAGTTGGTTCAGAATTCATCAATAGGTAAAATATGCGACAGTCCCCATCAGATCCTTGATGGAGGAACTGCCACTAATAGATGCTCTATTGGGGGACCCTCAGTGCATGACGGCGTTCAAAATAAGGAATGGGACCTTTTTGTGAACGAAGCAGAGCCAACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTGGTTGCCTCATCCGGCACACTGGAGTTTAAAAATGAAAGCTTCAATTGGACTAGGTCAAAACAAAACGGAACAAGTTCTGCGTGCAAGGGGATCTGGTAATAGTTTTTTAGTAGATTAAATTGGTTGACCCACTTAAACAACATATATCCAGCACAAGAACGTGACTATGCCAAACAAGGAACAATTTGACAAATTGTACATTTGGGAGTTCACCACCCGGATACGGACAAGAACCAAATCTCCCTGTTCGCCCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGGGACATTTCTAGCAGAATAAGCATCTATT
++
+<<:::::<==;00001:542++++,8:;A@==<;:;;<<===?><==<:;558976:;<A@;;;;<@@@=>===?<====ABCC28;<889DFCD6=9666667:==<<<=>===<889:=?><;;;<?<<</....488911111;99:;<@@<<=<<?>><<<<<?C=AAA@@??@AABA@0003577;;::>?==<>>?::9735---56566=844525>???777?>=<<=<<;<@=<:::;;=<<;;;<=222222:::;<>><;<.-2911.--/8899::<;;<;:6666677=<:99::0000)(((()10001;;559==A?=99988=<<;99:891,+'&'/0001<=999==>;;;;:=<<;:::::<<<<=>>@@??<;:99:;;<322::980+0..../666)((((****.5;:;:::>?>=>=777791635CDA>A>=;<===<77;::91)(())+-*''''(?@>=<<<>@=<>==?<::::98((**,365/...**(((+,56019<<;<=@@=<<;=;;<;<>?@5;=9<?@5@:A@>?;69E,(()***('(.=<@?????8864,,,--/=<==<B>>@DA>@---777::>99:634/()((,565ACB>>CBB76588=>D;:><;:;;7=998335;;::?>>;?B@DCC54445@DEC>22*))*6<?666553/357==>4('''(48=>AAA>>==>?<ADC?>?ABC;=?=:77;<987533888ABC33334B@?>===GAAABA;9::
\ No newline at end of file
diff --git a/tests/data/inputs_outputs/sars-cov-2.fastq b/tests/data/inputs_outputs/sars-cov-2.fastq
new file mode 100644
index 0000000..896087c
--- /dev/null
+++ b/tests/data/inputs_outputs/sars-cov-2.fastq
@@ -0,0 +1,20 @@
+@SRR30635841.1 VL00553:3:AACM5FTM5:1:1101:26525:1000 length=301
+AATTCACAGACTTTAATAACAACATTAGTAGCGTTATTAACAATAAGTAGGGACTGGGTCTTCGAATCTAAAGTAGTACCAAAAATCCAGCCTCTTATTATGTTAGACTTCTCAATGGAAGCAAAATAAACACCATCATTAAATGGTAGGACAGGGTTATCAAACCTCTTAGTACCATTGGTCCCAGATATAACATGGAACCAAGTAACATTGGAAAAGAAAGGTAAGAACAAGTCCTGAGTTGAATGTAAAACTGAGGATCTGAAAACTTTGTCAGGGTAATAAACACCACGTGTGAAAG
++SRR30635841.1 VL00553:3:AACM5FTM5:1:1101:26525:1000 length=301
+CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCC*C5C*CCCCCCCCCC5CC
+@SRR30635841.2 VL00553:3:AACM5FTM5:1:1101:34648:1133 length=301
+GAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAATGGGTAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCA
++SRR30635841.2 VL00553:3:AACM5FTM5:1:1101:34648:1133 length=301
+CCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCC5CCCCCCCCCCCCCCCCCC5CCCCC*C5CCCCC*
+@SRR30635841.3 VL00553:3:AACM5FTM5:1:1101:54190:1133 length=301
+TCATTGCTAGAAAACTCATCTGAGATATTGAGTGTTGGGTATAAGCCAGTAATTCTAACATAGTGCTCTTGTGGCACTAGTGTAGGTGCACTTAATGGCATTACTGTATGTGATGTCAGCACAAAATAATCACCAACATTTAATTTGTAAGTTGTTGTACCTCGGTAAACAACAGCATCACCATAGTCACCTTTTTCAAAGGTGTACTCTCCTATTTGTACTTTACTGTTTTTAGTTACACGATAACCAGTAAAGACATAATTTCGGTTAAGTGGTGGTCTAGGTTTACCAACTTCCCATG
++SRR30635841.3 VL00553:3:AACM5FTM5:1:1101:54190:1133 length=301
+5CCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC*CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC*CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCC**CCCCCC5C
+@SRR30635841.4 VL00553:3:AACM5FTM5:1:1101:54606:1133 length=301
+TGTGGTTTGAGTGAATATGACATAGTCATATTCTGAGCCCTGTGATGAATCAACAGTTTGAGTTGGTAGTCCCAAAATCTTTGAGGCTACAGCATTCTGTGAATTATAAGGTGAAATAAAGACAGCTTTTCTCCAAGCAGGGTTACGTGTAAGGAATTCTCTTACCACGCCTATTTGTGGCCTGTTAATTGCAGATGAAACATCATGCGTGATAACACCCTTATAAAACATTTTAAAGCATTGAGCTGATTTGTCTTTATGTGCTTTAAGCTTATTATCATAAACCAAAGCACTCACAGTG
++SRR30635841.4 VL00553:3:AACM5FTM5:1:1101:54606:1133 length=301
+5C5CCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCC*CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCCCCCCCCCCCCCCCC*CCCCCC*CCCCCCC5CCCCCCCCC
+@SRR30635841.5 VL00553:3:AACM5FTM5:1:1101:51596:1151 length=301
+GCCAAAGCCTCATTATTATTCTTACAAAGTTTATACTCTGCAAGAAGTAGACTAAAGCATAAAGATAGAGAAAAGGGGCTTCAAGGCCAGCAGCAACGAGCAAAAGGTGTGAGTAAACTGTTACAAACAACAACAGCAAGTTGCAAACAAAGTGAACACCCTTGGAGAGTGCTAGTTGCCATCTCTTTTTGAGAGTTATGATTTTGGAAGCGCTCTGAAAAACAGCAAGAAGTGCAACGCCAACAATAAGCCATCCGAAAGGGAGTGAGGCTTGTATCGGTATCGTTGCAGTAGCGCGAAC
++SRR30635841.5 VL00553:3:AACM5FTM5:1:1101:51596:1151 length=301
+CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5CCCCC5CCCCCCCCCCCCCCCCCCC*CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC*5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC5
\ No newline at end of file
diff --git a/tests/data/inputs_outputs/synthetic.fastq b/tests/data/inputs_outputs/synthetic.fastq
new file mode 100644
index 0000000..6e72dfb
--- /dev/null
+++ b/tests/data/inputs_outputs/synthetic.fastq
@@ -0,0 +1,8 @@
+@read_number_1_first_120_of_ref
+GGAAATTCATTCTAGGGAGTGACGTGGACCCCGGATTGATACAGGATCACATGTAGAAAAGGTAGTCGGACAAGTTACCGCTACCCTCGACCTCGTGGGGCCTACACCTGACCAGGAGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_number_2_last_120_of_ref
+CTACCCTCGACCTCGTGGGGCCTACACCTGACCAGGAGCCGCACTGACAGGACCACGCTTCATCATAACTTTGGCGGCTGGGCAACGGATTTAATGGTACATAACTCATCATTCTACGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
\ No newline at end of file
diff --git a/tests/data/inputs_outputs/too-short.fastq b/tests/data/inputs_outputs/too-short.fastq
new file mode 100644
index 0000000..3ddd8fa
--- /dev/null
+++ b/tests/data/inputs_outputs/too-short.fastq
@@ -0,0 +1,8 @@
+@read_number_1_first_120_of_ref
+GGAAATTCATTCTAGGGAGTGACGTGGACCCCGGATTGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_number_2_last_120_of_ref
+CTACCCTCGACCTCGTGGGGCCTACACCTGACCAGGAGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
\ No newline at end of file
diff --git a/tests/data/inputs_outputs/wrong_format.fastq b/tests/data/inputs_outputs/wrong_format.fastq
new file mode 100644
index 0000000..98588fe
--- /dev/null
+++ b/tests/data/inputs_outputs/wrong_format.fastq
@@ -0,0 +1,6 @@
++read_number_1_first_120_of_ref
+GGAAATTCATTCTAGGGAGTGACGTGGACCCCGGATTGATACAGGATCACATGTAGAAAAGGTAGTCGGACAAGTTACCGCTACCCTCGACCTCGTGGGGCCTACACCTGACCAGGAGCC
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
++read_number_2_last_120_of_ref
+CTACCCTCGACCTCGTGGGGCCTACACCTGACCAGGAGCCGCACTGACAGGACCACGCTTCATCATAACTTTGGCGGCTGGGCAACGGATTTAATGGTACATAACTCATCATTCTACGTA
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
\ No newline at end of file
diff --git a/tests/data/primers/ARTIC-V5.3.2.fasta b/tests/data/primers/ARTIC-V5.3.2.fasta
new file mode 100644
index 0000000..4153564
--- /dev/null
+++ b/tests/data/primers/ARTIC-V5.3.2.fasta
@@ -0,0 +1,384 @@
+>MN908947.3:47-78_LEFT
+CTCTTGTAGATCTGTTCTCTAAACGAACTTT
+>MN908947.3:419-447_RIGHT
+GCTTAGTAGAAGTTGAAAAAGGCGTTTT
+>MN908947.3:344-366_LEFT
+TCGTACGTGGCTTTGGAGACTC
+>MN908947.3:707-732_RIGHT
+AGCTTGGCACTGATCCTTATGAAGA
+>MN908947.3:638-661_LEFT
+AGAACGGTAATAAAGGAGCTGGT
+>MN908947.3:1018-1047_RIGHT
+CAGACACCTTTTGAAATTAAATTGGCAAA
+>MN908947.3:970-995_LEFT
+CATGAAATTGCTTGGTACACGGAAC
+>MN908947.3:1340-1370_RIGHT
+ACTTACCCCAAAATGCTGTTGTTAAAATTT
+>MN908947.3:1292-1320_LEFT
+TTTGTGGCACTGAGAATTTGACTAAAGA
+>MN908947.3:1660-1692_RIGHT
+GACTTTAAACTTAATGAAGAGATCGCCATTAT
+>MN908947.3:1574-1596_LEFT
+GTGTTGTTGGAGAAGGTTCCGA
+>MN908947.3:1945-1972_RIGHT
+GTTTTACAGAAGGCCGCTATAACAATA
+>MN908947.3:1882-1905_LEFT
+GCTGCTCGTGTTGTACGATCAAT
+>MN908947.3:2259-2284_RIGHT
+CACCTGTGCAAAGGAAATTAAGGAG
+>MN908947.3:2229-2252_LEFT
+TGCTTGTGAAATTGTCGGTGGAC
+>MN908947.3:2603-2629_RIGHT
+GTACACCAGTTTGTATTAACGGGCTT
+>MN908947.3:2533-2563_LEFT
+GTCTTGAAAACTGGTGATTTACAACCATTA
+>MN908947.3:2900-2933_RIGHT
+TCATAAAAACTTTGCAACCAGTATCTGAATTAC
+>MN908947.3:2854-2880_LEFT
+CTCGGTACAGAAGTAAATGAGTTCGC
+>MN908947.3:3233-3254_RIGHT
+AACAAGACGGCAGTGAGGACA
+>MN908947.3:3184-3213_LEFT
+GAGCAAGAAGAAGATTGGTTAGATGATGA
+>MN908947.3:3560-3584_RIGHT
+TGGGTGGTAGTTGTGTTTTAAGCG
+>MN908947.3:3510-3540_LEFT
+CATGCAAGTTGAATCTGATGATTACATAGC
+>MN908947.3:3883-3913_RIGHT
+CCTAAAGAGGAAGTTAAGCCATTTATAACT
+>MN908947.3:3791-3824_LEFT
+CTGTCTTTGATAAAAATCTCTATGACAAACTTG
+>MN908947.3:4147-4180_RIGHT
+GTTTTAACTGCTGTGGTTATACCTACTAAAAAG
+>MN908947.3:4079-4108_LEFT
+GTGACATTGACATCACTTTCTTAAAGAAA
+>MN908947.3:4457-4488_RIGHT
+TAGTTTCAACTATACAGCGTAAATATAAGGG
+>MN908947.3:4403-4425_LEFT
+CACATGCAGAAGAAACACGCAA
+>MN908947.3:4776-4803_RIGHT
+CATCTCACTTGCTGGTTCCTATAAAGA
+>MN908947.3:4723-4756_LEFT
+AATGGTTATCTTACTTCTTCTTCTAAAACACCT
+>MN908947.3:5089-5119_RIGHT
+CCTCATAATTCACATGAAGGTAAAACATTT
+>MN908947.3:5036-5063_LEFT
+GACAACAGTTTGGTCCAACTTATTTGG
+>MN908947.3:5398-5429_RIGHT
+GCACTTATCTTAGCCTACTGTAATAAGACAG
+>MN908947.3:5344-5370_LEFT
+GCTCTACAAGATGCTTATTACAGAGC
+>MN908947.3:5716-5744_RIGHT
+CATGGTACATTTACTTGTGCTAGTGAGT
+>MN908947.3:5671-5696_LEFT
+CCTTTTGTTATGATGTCAGCACCAC
+>MN908947.3:6031-6062_RIGHT
+GCAAGCTTCGATAATTTTAAGTTTGTATGTG
+>MN908947.3:5891-5923_LEFT
+CCATAAAACCAGTTACTTATAAATTGGATGGT
+>MN908947.3:6257-6288_RIGHT
+CTAATAAAGCCACGTATAAACCAAATACCTG
+>MN908947.3:6204-6237_LEFT
+GAAAGGAGCTAAATTGTTACATAAACCTATTGT
+>MN908947.3:6562-6595_RIGHT
+GTAGACAATTCTAGTCTTACTATTAAGAAACCT
+>MN908947.3:6515-6542_LEFT
+TAAAAATTACAGAAGAGGTTGGCCACA
+>MN908947.3:6882-6915_RIGHT
+CGGTAAATTTTGTCTAGAGGCTTCATTTAATTA
+>MN908947.3:6823-6854_LEFT
+AATTCTAGAATTAAAGCATCTATGCCGACTA
+>MN908947.3:7199-7229_RIGHT
+CATCTTTTAAATGGGATTTAACTGCTTTTG
+>MN908947.3:7145-7179_LEFT
+GTTTAGATTCTTTAGACACCTATCCTTCTTTAGA
+>MN908947.3:7518-7545_RIGHT
+AACAAGAGTCGAATGTACAACTATTGT
+>MN908947.3:7456-7482_LEFT
+GTGCATGTTGTAGACGGTTGTAATTC
+>MN908947.3:7819-7850_RIGHT
+TCTCATTTTGTTAACTTAGACAACCTGAGAG
+>MN908947.3:7768-7797_LEFT
+CATCTTTACTTTGATAAAGCTGGTCAAAA
+>MN908947.3:8136-8169_RIGHT
+GTCCTTAGACAATGTCTTATCTACTTTTATTTC
+>MN908947.3:8085-8112_LEFT
+AAAACTCAAAACACTAGTTGCAACTGC
+>MN908947.3:8468-8498_RIGHT
+AGAATAACTTACCTTTTAAGTTGACATGTG
+>MN908947.3:8406-8436_LEFT
+CGTTAAAGATTTCATGTCATTGTCTGAACA
+>MN908947.3:8781-8806_RIGHT
+CCAGCGTGGTGGTAGTTATACTAAT
+>MN908947.3:8732-8761_LEFT
+CAGATACTTGTTTTGCTAACAAACATGCT
+>MN908947.3:9107-9129_RIGHT
+TACGCCCTGACACACGTTATGT
+>MN908947.3:9023-9052_LEFT
+CAATTTTTAAAGATGCTTCTGGTAAGCCA
+>MN908947.3:9397-9423_RIGHT
+GCATCTATAGTAGCTGGTGGTATTGT
+>MN908947.3:9299-9324_LEFT
+GATCTTTACCAGGAGTTTTCTGTGG
+>MN908947.3:9673-9706_RIGHT
+CCTTTCTGGATAACAATTGCTTATATCATTTGT
+>MN908947.3:9571-9604_LEFT
+GGTGTTTATTCTGTTATTTACTTGTACTTGACA
+>MN908947.3:9949-9971_RIGHT
+GCTGCTTGTTGTCATCTCGCAA
+>MN908947.3:9896-9929_LEFT
+ATAATAAGTACAAGTATTTTAGTGGAGCAATGG
+>MN908947.3:10266-10295_RIGHT
+TAATGTTCAACTCAGGGTTATTGGACATT
+>MN908947.3:10215-10245_LEFT
+TGAAGATTTACTCATTCGTAAGTCTAATCA
+>MN908947.3:10587-10615_RIGHT
+AGGTAACTTTTATGGACCTTTTGTTGAC
+>MN908947.3:10527-10557_LEFT
+TTTTTGTTACATGCACCATATGGAATTACC
+>MN908947.3:10897-10927_RIGHT
+TTGGGTAGTGCTTTATTAGAAGATGAATTT
+>MN908947.3:10832-10865_LEFT
+CCGTTTTAGATATGTGTGCTTCATTAAAAGAAT
+>MN908947.3:11201-11232_RIGHT
+CTGTAGCTTATTTTAATATGGTCTATATGCC
+>MN908947.3:11152-11181_LEFT
+AAACATAAGCATGCATTTCTCTGTTTGTT
+>MN908947.3:11514-11536_RIGHT
+TGTCATGTTTTTGGCCAGAGGT
+>MN908947.3:11463-11494_LEFT
+GTGGGCTCTTATAATCTCTGTTACTTCTAAC
+>MN908947.3:11832-11863_RIGHT
+CACTGTACAGTCTAAAATGTCAGATGTAAAG
+>MN908947.3:11785-11811_LEFT
+AACATTAAATTGTTGGGTGTTGGTGG
+>MN908947.3:12161-12185_RIGHT
+AGGCTGTTGCTAATGGTGATTCTG
+>MN908947.3:12112-12137_LEFT
+TCCCTTCCATCATATGCAGCTTTTG
+>MN908947.3:12477-12510_RIGHT
+GGTTGTCATACCAGACTATAACACATATAAAAA
+>MN908947.3:12419-12444_LEFT
+CAAGAGATGGTTGTGTTCCCTTGAA
+>MN908947.3:12794-12819_RIGHT
+GAGGTAGGTTTGTACTTGCACTGTT
+>MN908947.3:12752-12774_LEFT
+GCACTGATGACAATGCGTTAGC
+>MN908947.3:13121-13146_RIGHT
+GTGGGGGACAACCAATCACTAATTG
+>MN908947.3:13075-13099_LEFT
+GCTTTTGCTGTAGATGCTGCTAAA
+>MN908947.3:13458-13480_RIGHT
+GTTTTTAAACGGGTTTGCGGTG
+>MN908947.3:13415-13435_LEFT
+ATCAACTCCGCGAACCCATG
+>MN908947.3:13787-13815_RIGHT
+TCAACGTCTTACTAAATACACAATGGCA
+>MN908947.3:13738-13767_LEFT
+ACTTCTTTAAGTTTAGAATAGACGGTGAC
+>MN908947.3:14120-14144_RIGHT
+AGGTAGTGGAGTTCCTGTTGTAGA
+>MN908947.3:14073-14100_LEFT
+CTCAATGGTAACTGGTATGATTTCGGT
+>MN908947.3:14427-14457_RIGHT
+GTGAGAAAAATATTTGTTGATGGTGTTCCA
+>MN908947.3:14375-14407_LEFT
+CTTTAATGTTTTATTCTCTACAGTGTTCCCAC
+>MN908947.3:14745-14775_RIGHT
+GAATTAAAACACTTCTTCTTTGCTCAGGAT
+>MN908947.3:14700-14725_LEFT
+GACTTTGCTGTGTCTAAGGGTTTCT
+>MN908947.3:15065-15095_RIGHT
+GAATCTTAAGTATGCCATTAGTGCAAAGAA
+>MN908947.3:15016-15045_LEFT
+CACTTTTCGCATATACAAAACGTAATGTC
+>MN908947.3:15386-15416_RIGHT
+ACACCGTTTCTATAGATTAGCTAATGAGTG
+>MN908947.3:15342-15366_LEFT
+TCACTTGTTCTTGCTCGCAAACAT
+>MN908947.3:15716-15742_RIGHT
+TGACGATGCTGTTGTGTGTTTCAATA
+>MN908947.3:15659-15688_LEFT
+CTTTGTGAATGAGTTTTACGCATATTTGC
+>MN908947.3:16028-16059_RIGHT
+TATAGATGCTTACCCACTTACTAAACATCCT
+>MN908947.3:15992-16018_LEFT
+TGGTACACTTATGATTGAACGGTTCG
+>MN908947.3:16386-16409_RIGHT
+AATGCTCCAGGTTGTGATGTCAC
+>MN908947.3:16285-16311_LEFT
+GTGCTTGCATACGTAGACCATTCTTA
+>MN908947.3:16650-16679_RIGHT
+AAAGCTACTGAGGAGACATTTAAACTGTC
+>MN908947.3:16624-16647_LEFT
+TCAAGCTTTTTGCAGCAGAAACG
+>MN908947.3:17004-17033_RIGHT
+AATATCTCAGATGAGTTTTCTAGCAATGT
+>MN908947.3:16962-16994_LEFT
+CAAGAGCACTATGTTAGAATTACTGGCTTATA
+>MN908947.3:17333-17362_RIGHT
+GACAGCAGATATAGTTGTCTTTGATGAAA
+>MN908947.3:17182-17212_LEFT
+CACTATGTGAGAAGGCATTAAAATATTTGC
+>MN908947.3:17560-17582_RIGHT
+GGCGTTGTCCTGCTGAAATTGT
+>MN908947.3:17478-17507_LEFT
+GGCACACTAGAACCAGAATATTTCAATTC
+>MN908947.3:17859-17886_RIGHT
+GACTATGTCATATTCACTCAAACCACT
+>MN908947.3:17813-17839_LEFT
+GGGACTACCAACTCAAACTGTTGATT
+>MN908947.3:18181-18212_RIGHT
+ACATGACCTATAGAAGACTCATCTCTATGAT
+>MN908947.3:18121-18153_LEFT
+GTGTTGACACTAAATTCAAAACTGAAGGTTTA
+>MN908947.3:18504-18527_RIGHT
+GGACTTCCTTGGAATGTAGTGCG
+>MN908947.3:18460-18484_LEFT
+CGCCTGGAGATCAATTTAAACACC
+>MN908947.3:18835-18860_RIGHT
+ATGCACATGTAGCTAGTTGTGATGC
+>MN908947.3:18789-18815_LEFT
+GGTAACCTACAAAGCAACCATGATCT
+>MN908947.3:19170-19195_RIGHT
+ACAGATGGTGTATGCCTATTTTGGA
+>MN908947.3:19087-19112_LEFT
+TCTATGATGCACAGCCTTGTAGTGA
+>MN908947.3:19469-19495_RIGHT
+CAATTTAGGTGGTGCTGTCTGTAGAC
+>MN908947.3:19415-19449_LEFT
+AGTGTCAGATATAGATTATGTACCACTAAAGTCT
+>MN908947.3:19770-19796_RIGHT
+GTTAATGTAGCATTTGAGCTTTGGGC
+>MN908947.3:19721-19750_LEFT
+AGTTGATGGTGTTGATGTAGAATTGTTTG
+>MN908947.3:20091-20121_RIGHT
+AAACAAGCTAGTCTTAATGGAGTCACATTA
+>MN908947.3:20028-20054_LEFT
+GCCCGTAATGGTGTTCTTATTACAGA
+>MN908947.3:20408-20441_RIGHT
+TGAATTAGAAGATTTTATTCCTATGGACAGTAC
+>MN908947.3:20358-20388_LEFT
+GGTTTACATCTACTGATTGGACTAGCTAAA
+>MN908947.3:20729-20758_RIGHT
+GTGTGACCTTCAAAATTATGGTGATAGTG
+>MN908947.3:20650-20676_LEFT
+AATTACAATCTAGTCAAGCGTGGCAA
+>MN908947.3:21018-21051_RIGHT
+GCTAATAAATGGGATCTCATTATTAGTGATATG
+>MN908947.3:20991-21018_LEFT
+ATTGGTGATTGTGCAACTGTACATACA
+>MN908947.3:21372-21402_RIGHT
+TTGTCTTCCTATTCTTTATTTGACATGAGT
+>MN908947.3:21322-21352_LEFT
+ATGTCATGCATGCAAATTACATATTTTGGA
+>MN908947.3:21696-21722_RIGHT
+CTCAGTTTTACATTCAACTCAGGACT
+>MN908947.3:21579-21607_LEFT
+TTTATTGCCACTAGTCTCTAGTCAGTGT
+>MN908947.3:21927-21960_RIGHT
+CGCTACTAATGTTGTTATTAAAGTCTGTGAATT
+>MN908947.3:21866-21894_LEFT
+GAGGCTGGATTTTTGGTACTACTTTAGA
+>MN908947.3:22238-22266_RIGHT
+TGGTAGATTTGCCAATAGGTATTAACAT
+>MN908947.3:22156-22189_LEFT
+GGTTATTTTAAAATATATTCTAAGCACACGCCT
+>MN908947.3:22517-22547_RIGHT
+GAGTCCAACCAACAGAATCTATTGTTAGAT
+>MN908947.3:22466-22494_LEFT
+CGTTGAAATCCTTCACTGTAGAAAAAGG
+>MN908947.3:22839-22866_RIGHT
+AGATGATTTTACAGGCTGCGTTATAGC
+>MN908947.3:22742-22774_LEFT
+ATGTCTATGCAGATTCATTTGTAATTAGAGGT
+>MN908947.3:23119-23140_RIGHT
+GCACCAGCAACTGTTTGTGGA
+>MN908947.3:23078-23109_LEFT
+AACCATACAGAGTAGTAGTACTTTCTTTTGA
+>MN908947.3:23452-23478_RIGHT
+CCTACTTGGCGTGTTTATTCTACAGG
+>MN908947.3:23229-23258_LEFT
+CAAAAAGTTTCTGCCTTTCCAACAATTTG
+>MN908947.3:23609-23631_RIGHT
+GGGCACGTAGTGTAGCTAGTCA
+>MN908947.3:23563-23589_LEFT
+GCAGGTATATGCGCTAGTTATCAGAC
+>MN908947.3:23914-23944_RIGHT
+GTCAAACAAATTTACAAAACACCACCAATT
+>MN908947.3:23823-23853_LEFT
+GCAATATGGCAGTTTTTGTACACAATTAAA
+>MN908947.3:24209-24231_RIGHT
+CTTCTGGTTGGACCTTTGGTGC
+>MN908947.3:24160-24189_LEFT
+GATGAAATGATTGCTCAATACACTTCTGC
+>MN908947.3:24535-24560_RIGHT
+CAAATTGATAGGTTGATCACAGGCA
+>MN908947.3:24442-24468_LEFT
+ACGCTTGTTAAACAACTTAGCTCCAA
+>MN908947.3:24815-24839_RIGHT
+GAAAAGCACACTTTCCTCGTGAAG
+>MN908947.3:24751-24774_LEFT
+CATGTGACTTATGTCCCTGCACA
+>MN908947.3:25120-25151_RIGHT
+AATGAGGTTGCCAAGAATTTAAATGAATCTC
+>MN908947.3:25053-25082_LEFT
+TGATTTAGGTGACATCTCTGGCATTAATG
+>MN908947.3:25423-25452_RIGHT
+GAACTGTAACTTTGAAGCAAGGTGAAATC
+>MN908947.3:25372-25402_LEFT
+CATTACACATAAACGAACTTATGGATTTGT
+>MN908947.3:25744-25777_RIGHT
+TAAACTTTGTAAGAATAATAATGAGGCTTTGGC
+>MN908947.3:25653-25680_LEFT
+GTAACAGTTTACTCACACCTTTTGCTC
+>MN908947.3:26048-26072_RIGHT
+GAGTACAGACACTGGTGTTGAACA
+>MN908947.3:26011-26039_LEFT
+TCACTTCAGACTATTACCAGCTGTACTC
+>MN908947.3:26382-26411_RIGHT
+GTTAACGTGAGTCTTGTAAAACCTTCTTT
+>MN908947.3:26339-26362_LEFT
+CATCCTTACTGCGCTTCGATTGT
+>MN908947.3:26730-26756_RIGHT
+TTTACAGAATAAATTGGATCACCGGT
+>MN908947.3:26593-26621_LEFT
+AGGTTTCCTATTCCTTACATGGATTTGT
+>MN908947.3:26989-27009_RIGHT
+AGGACGCTGTGACATCAAGG
+>MN908947.3:26958-26981_LEFT
+GTGGACATCTTCGTATTGCTGGA
+>MN908947.3:27349-27376_RIGHT
+CTCAATTAGATGAAGAGCAACCAATGG
+>MN908947.3:27200-27226_LEFT
+GATGTTTCATCTCGTTGACTTTCAGG
+>MN908947.3:27583-27603_RIGHT
+CTTTTGCTTGTCCTGACGGC
+>MN908947.3:27530-27558_LEFT
+TCATCCTCTAGCTGATAACAAATTTGCA
+>MN908947.3:27927-27950_RIGHT
+CTGTAGCTGCATTTCACCAAGAA
+>MN908947.3:27832-27860_LEFT
+TATCTTTTGGTTCTCACTTGAACTGCAA
+>MN908947.3:28209-28237_RIGHT
+AAGACTTTTTAGAGTATCATGACGTTCG
+>MN908947.3:28135-28166_LEFT
+TTCCTGTTTACCTTTTACAATTAATTGCCAG
+>MN908947.3:28513-28539_RIGHT
+GATGACCAAATTGGCTACTACCGAAG
+>MN908947.3:28473-28493_LEFT
+TCGAGGACAAGGCGTTCCAA
+>MN908947.3:28849-28873_RIGHT
+AGTTCAAGAAATTCAACTCCAGGC
+>MN908947.3:28808-28829_LEFT
+GCAGTCAAGCCTCTTCTCGTT
+>MN908947.3:29203-29224_RIGHT
+GCTTCAGCGTTCTTCGGAATG
+>MN908947.3:29159-29183_LEFT
+CTGATTACAAACATTGGCCGCAAA
+>MN908947.3:29538-29559_RIGHT
+TGCAGACCACACAAGGCAGAT
+>MN908947.3:29462-29486_LEFT
+CTGCAGATTTGGATGATTTCTCCA
+>MN908947.3:29840-29873_RIGHT
+GTGATTTTAATAGCTTCTTAGGAGAATGACAAA
diff --git a/tests/data/primers/SARS-CoV-2-ARTIC-V5.3.2.scheme.bed b/tests/data/primers/SARS-CoV-2-ARTIC-V5.3.2.scheme.bed
new file mode 100644
index 0000000..2da0b06
--- /dev/null
+++ b/tests/data/primers/SARS-CoV-2-ARTIC-V5.3.2.scheme.bed
@@ -0,0 +1,192 @@
+MN908947.3	47	78	SARS-CoV-2_400_1_LEFT_1	1	+
+MN908947.3	419	447	SARS-CoV-2_400_1_RIGHT_1	1	-
+MN908947.3	344	366	SARS-CoV-2_400_2_LEFT_0	2	+
+MN908947.3	707	732	SARS-CoV-2_400_2_RIGHT_0	2	-
+MN908947.3	638	661	SARS-CoV-2_400_3_LEFT_1	1	+
+MN908947.3	1018	1047	SARS-CoV-2_400_3_RIGHT_0	1	-
+MN908947.3	970	995	SARS-CoV-2_400_4_LEFT_0	2	+
+MN908947.3	1340	1370	SARS-CoV-2_400_4_RIGHT_0	2	-
+MN908947.3	1292	1320	SARS-CoV-2_400_5_LEFT_0	1	+
+MN908947.3	1660	1692	SARS-CoV-2_400_5_RIGHT_0	1	-
+MN908947.3	1574	1596	SARS-CoV-2_400_6_LEFT_1	2	+
+MN908947.3	1945	1972	SARS-CoV-2_400_6_RIGHT_1	2	-
+MN908947.3	1882	1905	SARS-CoV-2_400_7_LEFT_2	1	+
+MN908947.3	2259	2284	SARS-CoV-2_400_7_RIGHT_2	1	-
+MN908947.3	2229	2252	SARS-CoV-2_400_8_LEFT_0	2	+
+MN908947.3	2603	2629	SARS-CoV-2_400_8_RIGHT_0	2	-
+MN908947.3	2533	2563	SARS-CoV-2_400_9_LEFT_0	1	+
+MN908947.3	2900	2933	SARS-CoV-2_400_9_RIGHT_0	1	-
+MN908947.3	2854	2880	SARS-CoV-2_400_10_LEFT_0	2	+
+MN908947.3	3233	3254	SARS-CoV-2_400_10_RIGHT_0	2	-
+MN908947.3	3184	3213	SARS-CoV-2_400_11_LEFT_0	1	+
+MN908947.3	3560	3584	SARS-CoV-2_400_11_RIGHT_0	1	-
+MN908947.3	3510	3540	SARS-CoV-2_400_12_LEFT_0	2	+
+MN908947.3	3883	3913	SARS-CoV-2_400_12_RIGHT_0	2	-
+MN908947.3	3791	3824	SARS-CoV-2_400_13_LEFT_0	1	+
+MN908947.3	4147	4180	SARS-CoV-2_400_13_RIGHT_0	1	-
+MN908947.3	4079	4108	SARS-CoV-2_400_14_LEFT_0	2	+
+MN908947.3	4457	4488	SARS-CoV-2_400_14_RIGHT_0	2	-
+MN908947.3	4403	4425	SARS-CoV-2_400_15_LEFT_0	1	+
+MN908947.3	4776	4803	SARS-CoV-2_400_15_RIGHT_0	1	-
+MN908947.3	4723	4756	SARS-CoV-2_400_16_LEFT_0	2	+
+MN908947.3	5089	5119	SARS-CoV-2_400_16_RIGHT_0	2	-
+MN908947.3	5036	5063	SARS-CoV-2_400_17_LEFT_0	1	+
+MN908947.3	5398	5429	SARS-CoV-2_400_17_RIGHT_0	1	-
+MN908947.3	5344	5370	SARS-CoV-2_400_18_LEFT_0	2	+
+MN908947.3	5716	5744	SARS-CoV-2_400_18_RIGHT_0	2	-
+MN908947.3	5671	5696	SARS-CoV-2_400_19_LEFT_0	1	+
+MN908947.3	6031	6062	SARS-CoV-2_400_19_RIGHT_0	1	-
+MN908947.3	5891	5923	SARS-CoV-2_400_20_LEFT_0	2	+
+MN908947.3	6257	6288	SARS-CoV-2_400_20_RIGHT_0	2	-
+MN908947.3	6204	6237	SARS-CoV-2_400_21_LEFT_0	1	+
+MN908947.3	6562	6595	SARS-CoV-2_400_21_RIGHT_0	1	-
+MN908947.3	6515	6542	SARS-CoV-2_400_22_LEFT_0	2	+
+MN908947.3	6882	6915	SARS-CoV-2_400_22_RIGHT_0	2	-
+MN908947.3	6823	6854	SARS-CoV-2_400_23_LEFT_0	1	+
+MN908947.3	7199	7229	SARS-CoV-2_400_23_RIGHT_0	1	-
+MN908947.3	7145	7179	SARS-CoV-2_400_24_LEFT_0	2	+
+MN908947.3	7518	7545	SARS-CoV-2_400_24_RIGHT_0	2	-
+MN908947.3	7456	7482	SARS-CoV-2_400_25_LEFT_0	1	+
+MN908947.3	7819	7850	SARS-CoV-2_400_25_RIGHT_0	1	-
+MN908947.3	7768	7797	SARS-CoV-2_400_26_LEFT_0	2	+
+MN908947.3	8136	8169	SARS-CoV-2_400_26_RIGHT_0	2	-
+MN908947.3	8085	8112	SARS-CoV-2_400_27_LEFT_0	1	+
+MN908947.3	8468	8498	SARS-CoV-2_400_27_RIGHT_0	1	-
+MN908947.3	8406	8436	SARS-CoV-2_400_28_LEFT_0	2	+
+MN908947.3	8781	8806	SARS-CoV-2_400_28_RIGHT_0	2	-
+MN908947.3	8732	8761	SARS-CoV-2_400_29_LEFT_0	1	+
+MN908947.3	9107	9129	SARS-CoV-2_400_29_RIGHT_0	1	-
+MN908947.3	9023	9052	SARS-CoV-2_400_30_LEFT_0	2	+
+MN908947.3	9397	9423	SARS-CoV-2_400_30_RIGHT_0	2	-
+MN908947.3	9299	9324	SARS-CoV-2_400_31_LEFT_1	1	+
+MN908947.3	9673	9706	SARS-CoV-2_400_31_RIGHT_0	1	-
+MN908947.3	9571	9604	SARS-CoV-2_400_32_LEFT_0	2	+
+MN908947.3	9949	9971	SARS-CoV-2_400_32_RIGHT_0	2	-
+MN908947.3	9896	9929	SARS-CoV-2_400_33_LEFT_0	1	+
+MN908947.3	10266	10295	SARS-CoV-2_400_33_RIGHT_0	1	-
+MN908947.3	10215	10245	SARS-CoV-2_400_34_LEFT_0	2	+
+MN908947.3	10587	10615	SARS-CoV-2_400_34_RIGHT_0	2	-
+MN908947.3	10527	10557	SARS-CoV-2_400_35_LEFT_0	1	+
+MN908947.3	10897	10927	SARS-CoV-2_400_35_RIGHT_0	1	-
+MN908947.3	10832	10865	SARS-CoV-2_400_36_LEFT_0	2	+
+MN908947.3	11201	11232	SARS-CoV-2_400_36_RIGHT_0	2	-
+MN908947.3	11152	11181	SARS-CoV-2_400_37_LEFT_0	1	+
+MN908947.3	11514	11536	SARS-CoV-2_400_37_RIGHT_0	1	-
+MN908947.3	11463	11494	SARS-CoV-2_400_38_LEFT_0	2	+
+MN908947.3	11832	11863	SARS-CoV-2_400_38_RIGHT_0	2	-
+MN908947.3	11785	11811	SARS-CoV-2_400_39_LEFT_0	1	+
+MN908947.3	12161	12185	SARS-CoV-2_400_39_RIGHT_0	1	-
+MN908947.3	12112	12137	SARS-CoV-2_400_40_LEFT_0	2	+
+MN908947.3	12477	12510	SARS-CoV-2_400_40_RIGHT_0	2	-
+MN908947.3	12419	12444	SARS-CoV-2_400_41_LEFT_0	1	+
+MN908947.3	12794	12819	SARS-CoV-2_400_41_RIGHT_0	1	-
+MN908947.3	12752	12774	SARS-CoV-2_400_42_LEFT_0	2	+
+MN908947.3	13121	13146	SARS-CoV-2_400_42_RIGHT_0	2	-
+MN908947.3	13075	13099	SARS-CoV-2_400_43_LEFT_0	1	+
+MN908947.3	13458	13480	SARS-CoV-2_400_43_RIGHT_0	1	-
+MN908947.3	13415	13435	SARS-CoV-2_400_44_LEFT_0	2	+
+MN908947.3	13787	13815	SARS-CoV-2_400_44_RIGHT_0	2	-
+MN908947.3	13738	13767	SARS-CoV-2_400_45_LEFT_0	1	+
+MN908947.3	14120	14144	SARS-CoV-2_400_45_RIGHT_0	1	-
+MN908947.3	14073	14100	SARS-CoV-2_400_46_LEFT_0	2	+
+MN908947.3	14427	14457	SARS-CoV-2_400_46_RIGHT_0	2	-
+MN908947.3	14375	14407	SARS-CoV-2_400_47_LEFT_0	1	+
+MN908947.3	14745	14775	SARS-CoV-2_400_47_RIGHT_0	1	-
+MN908947.3	14700	14725	SARS-CoV-2_400_48_LEFT_0	2	+
+MN908947.3	15065	15095	SARS-CoV-2_400_48_RIGHT_0	2	-
+MN908947.3	15016	15045	SARS-CoV-2_400_49_LEFT_0	1	+
+MN908947.3	15386	15416	SARS-CoV-2_400_49_RIGHT_0	1	-
+MN908947.3	15342	15366	SARS-CoV-2_400_50_LEFT_0	2	+
+MN908947.3	15716	15742	SARS-CoV-2_400_50_RIGHT_0	2	-
+MN908947.3	15659	15688	SARS-CoV-2_400_51_LEFT_0	1	+
+MN908947.3	16028	16059	SARS-CoV-2_400_51_RIGHT_0	1	-
+MN908947.3	15992	16018	SARS-CoV-2_400_52_LEFT_2	2	+
+MN908947.3	16386	16409	SARS-CoV-2_400_52_RIGHT_2	2	-
+MN908947.3	16285	16311	SARS-CoV-2_400_53_LEFT_0	1	+
+MN908947.3	16650	16679	SARS-CoV-2_400_53_RIGHT_0	1	-
+MN908947.3	16624	16647	SARS-CoV-2_400_54_LEFT_1	2	+
+MN908947.3	17004	17033	SARS-CoV-2_400_54_RIGHT_1	2	-
+MN908947.3	16962	16994	SARS-CoV-2_400_55_LEFT_1	1	+
+MN908947.3	17333	17362	SARS-CoV-2_400_55_RIGHT_1	1	-
+MN908947.3	17182	17212	SARS-CoV-2_400_56_LEFT_0	2	+
+MN908947.3	17560	17582	SARS-CoV-2_400_56_RIGHT_0	2	-
+MN908947.3	17478	17507	SARS-CoV-2_400_57_LEFT_0	1	+
+MN908947.3	17859	17886	SARS-CoV-2_400_57_RIGHT_0	1	-
+MN908947.3	17813	17839	SARS-CoV-2_400_58_LEFT_0	2	+
+MN908947.3	18181	18212	SARS-CoV-2_400_58_RIGHT_0	2	-
+MN908947.3	18121	18153	SARS-CoV-2_400_59_LEFT_0	1	+
+MN908947.3	18504	18527	SARS-CoV-2_400_59_RIGHT_0	1	-
+MN908947.3	18460	18484	SARS-CoV-2_400_60_LEFT_0	2	+
+MN908947.3	18835	18860	SARS-CoV-2_400_60_RIGHT_0	2	-
+MN908947.3	18789	18815	SARS-CoV-2_400_61_LEFT_0	1	+
+MN908947.3	19170	19195	SARS-CoV-2_400_61_RIGHT_0	1	-
+MN908947.3	19087	19112	SARS-CoV-2_400_62_LEFT_2	2	+
+MN908947.3	19469	19495	SARS-CoV-2_400_62_RIGHT_0	2	-
+MN908947.3	19415	19449	SARS-CoV-2_400_63_LEFT_0	1	+
+MN908947.3	19770	19796	SARS-CoV-2_400_63_RIGHT_0	1	-
+MN908947.3	19721	19750	SARS-CoV-2_400_64_LEFT_0	2	+
+MN908947.3	20091	20121	SARS-CoV-2_400_64_RIGHT_0	2	-
+MN908947.3	20028	20054	SARS-CoV-2_400_65_LEFT_0	1	+
+MN908947.3	20408	20441	SARS-CoV-2_400_65_RIGHT_0	1	-
+MN908947.3	20358	20388	SARS-CoV-2_400_66_LEFT_0	2	+
+MN908947.3	20729	20758	SARS-CoV-2_400_66_RIGHT_0	2	-
+MN908947.3	20650	20676	SARS-CoV-2_400_67_LEFT_1	1	+
+MN908947.3	21018	21051	SARS-CoV-2_400_67_RIGHT_1	1	-
+MN908947.3	20991	21018	SARS-CoV-2_400_68_LEFT_0	2	+
+MN908947.3	21372	21402	SARS-CoV-2_400_68_RIGHT_0	2	-
+MN908947.3	21322	21352	SARS-CoV-2_400_69_LEFT_0	1	+
+MN908947.3	21696	21722	SARS-CoV-2_400_69_RIGHT_0	1	-
+MN908947.3	21579	21607	SARS-CoV-2_400_70_LEFT_0	2	+
+MN908947.3	21927	21960	SARS-CoV-2_400_70_RIGHT_0	2	-
+MN908947.3	21866	21894	SARS-CoV-2_400_71_LEFT_0	1	+
+MN908947.3	22238	22266	SARS-CoV-2_400_71_RIGHT_0	1	-
+MN908947.3	22156	22189	SARS-CoV-2_400_72_LEFT_0	2	+
+MN908947.3	22517	22547	SARS-CoV-2_400_72_RIGHT_0	2	-
+MN908947.3	22466	22494	SARS-CoV-2_400_73_LEFT_0	1	+
+MN908947.3	22839	22866	SARS-CoV-2_400_73_RIGHT_0	1	-
+MN908947.3	22742	22774	SARS-CoV-2_400_74_LEFT_0	2	+
+MN908947.3	23119	23140	SARS-CoV-2_400_74_RIGHT_0	2	-
+MN908947.3	23078	23109	SARS-CoV-2_400_75_LEFT_1	1	+
+MN908947.3	23452	23478	SARS-CoV-2_400_75_RIGHT_1	1	-
+MN908947.3	23229	23258	SARS-CoV-2_400_76_LEFT_0	2	+
+MN908947.3	23609	23631	SARS-CoV-2_400_76_RIGHT_0	2	-
+MN908947.3	23563	23589	SARS-CoV-2_400_77_LEFT_0	1	+
+MN908947.3	23914	23944	SARS-CoV-2_400_77_RIGHT_0	1	-
+MN908947.3	23823	23853	SARS-CoV-2_400_78_LEFT_0	2	+
+MN908947.3	24209	24231	SARS-CoV-2_400_78_RIGHT_0	2	-
+MN908947.3	24160	24189	SARS-CoV-2_400_79_LEFT_0	1	+
+MN908947.3	24535	24560	SARS-CoV-2_400_79_RIGHT_0	1	-
+MN908947.3	24442	24468	SARS-CoV-2_400_80_LEFT_0	2	+
+MN908947.3	24815	24839	SARS-CoV-2_400_80_RIGHT_0	2	-
+MN908947.3	24751	24774	SARS-CoV-2_400_81_LEFT_0	1	+
+MN908947.3	25120	25151	SARS-CoV-2_400_81_RIGHT_0	1	-
+MN908947.3	25053	25082	SARS-CoV-2_400_82_LEFT_0	2	+
+MN908947.3	25423	25452	SARS-CoV-2_400_82_RIGHT_0	2	-
+MN908947.3	25372	25402	SARS-CoV-2_400_83_LEFT_0	1	+
+MN908947.3	25744	25777	SARS-CoV-2_400_83_RIGHT_0	1	-
+MN908947.3	25653	25680	SARS-CoV-2_400_84_LEFT_2	2	+
+MN908947.3	26048	26072	SARS-CoV-2_400_84_RIGHT_2	2	-
+MN908947.3	26011	26039	SARS-CoV-2_400_85_LEFT_0	1	+
+MN908947.3	26382	26411	SARS-CoV-2_400_85_RIGHT_0	1	-
+MN908947.3	26339	26362	SARS-CoV-2_400_86_LEFT_0	2	+
+MN908947.3	26730	26756	SARS-CoV-2_400_86_RIGHT_0	2	-
+MN908947.3	26593	26621	SARS-CoV-2_400_87_LEFT_1	1	+
+MN908947.3	26989	27009	SARS-CoV-2_400_87_RIGHT_1	1	-
+MN908947.3	26958	26981	SARS-CoV-2_400_88_LEFT_2	2	+
+MN908947.3	27349	27376	SARS-CoV-2_400_88_RIGHT_2	2	-
+MN908947.3	27200	27226	SARS-CoV-2_400_89_LEFT_2	1	+
+MN908947.3	27583	27603	SARS-CoV-2_400_89_RIGHT_0	1	-
+MN908947.3	27530	27558	SARS-CoV-2_400_90_LEFT_0	2	+
+MN908947.3	27927	27950	SARS-CoV-2_400_90_RIGHT_0	2	-
+MN908947.3	27832	27860	SARS-CoV-2_400_91_LEFT_0	1	+
+MN908947.3	28209	28237	SARS-CoV-2_400_91_RIGHT_0	1	-
+MN908947.3	28135	28166	SARS-CoV-2_400_92_LEFT_0	2	+
+MN908947.3	28513	28539	SARS-CoV-2_400_92_RIGHT_0	2	-
+MN908947.3	28473	28493	SARS-CoV-2_400_93_LEFT_0	1	+
+MN908947.3	28849	28873	SARS-CoV-2_400_93_RIGHT_0	1	-
+MN908947.3	28808	28829	SARS-CoV-2_400_94_LEFT_0	2	+
+MN908947.3	29203	29224	SARS-CoV-2_400_94_RIGHT_0	2	-
+MN908947.3	29159	29183	SARS-CoV-2_400_95_LEFT_0	1	+
+MN908947.3	29538	29559	SARS-CoV-2_400_95_RIGHT_0	1	-
+MN908947.3	29462	29486	SARS-CoV-2_400_96_LEFT_1	2	+
+MN908947.3	29840	29873	SARS-CoV-2_400_96_RIGHT_0	2	-
diff --git a/tests/data/primers/empty.fasta b/tests/data/primers/empty.fasta
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/primers/synthetic-no-match.fasta b/tests/data/primers/synthetic-no-match.fasta
new file mode 100644
index 0000000..1dece2a
--- /dev/null
+++ b/tests/data/primers/synthetic-no-match.fasta
@@ -0,0 +1,8 @@
+>read_number_1_LEFT
+AGCTTAGCTA
+>read_number_1_RIGHT
+TGCATGCAAT
+>read_number_2_LEFT
+CGTACGTAGC
+>read_number_2_RIGHT
+TACGATCGTA
\ No newline at end of file
diff --git a/tests/data/primers/synthetic.bed b/tests/data/primers/synthetic.bed
new file mode 100644
index 0000000..5066d6c
--- /dev/null
+++ b/tests/data/primers/synthetic.bed
@@ -0,0 +1,4 @@
+MN908947.3	0	10	SARS-CoV-2_400_1_LEFT_1	1	+
+MN908947.3	110	120	SARS-CoV-2_400_1_RIGHT_1	1	-
+MN908947.3	80	90	SARS-CoV-2_400_2_LEFT_1	1	+
+MN908947.3	190	200	SARS-CoV-2_400_2_RIGHT_1	1	-
\ No newline at end of file
diff --git a/tests/data/primers/synthetic.fasta b/tests/data/primers/synthetic.fasta
new file mode 100644
index 0000000..54bd943
--- /dev/null
+++ b/tests/data/primers/synthetic.fasta
@@ -0,0 +1,8 @@
+>read_number_1_LEFT
+GGAAATTCAT
+>read_number_1_RIGHT
+ACCAGGAGCC
+>read_number_2_LEFT
+CTACCCTCGA
+>read_number_2_RIGHT
+ATTCTACGTA
\ No newline at end of file
diff --git a/tests/data/references/SARS-CoV-2-reference.fasta b/tests/data/references/SARS-CoV-2-reference.fasta
new file mode 100644
index 0000000..a1da7b7
--- /dev/null
+++ b/tests/data/references/SARS-CoV-2-reference.fasta
@@ -0,0 +1,429 @@
+>MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome
+ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAA
+CGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAAC
+TAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTG
+TTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTC
+CCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC
+GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG
+CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGAT
+GCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTC
+GTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCT
+TCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA
+GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTG
+TTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG
+CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTG
+TCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTG
+CTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA
+ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAA
+CCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCAC
+CAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA
+GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACT
+ACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG
+GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCG
+CACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCA
+CGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACA
+ACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA
+GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT
+TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAG
+GTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCG
+TGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCC
+GCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTG
+ATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG
+GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTT
+AAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAA
+TTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGT
+AAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTA
+GGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC
+TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT
+AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAA
+GCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGT
+ACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAA
+GGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT
+GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAA
+ATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC
+ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAA
+TTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAG
+AAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT
+TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAA
+CAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTC
+AACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT
+AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACA
+GTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA
+CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAG
+TTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGT
+GAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTAT
+TATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAA
+TGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT
+GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAACTGAAAGTA
+AACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAAC
+TCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCA
+GATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTG
+ATGTTGTTCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAAT
+GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAAT
+GGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTA
+TTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGCACATGC
+AGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAA
+TATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAA
+CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTA
+TGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTT
+TCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACACCTGAAG
+AACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTATTCTGGACAATCTACACA
+ACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC
+CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTA
+AGGTGTTTACAACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACA
+ACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGT
+AAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAACTG
+ATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA
+TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAA
+ATAGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTA
+ACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAAT
+GAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGT
+GGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT
+TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATCTAGTACAACAGGAGTC
+ACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGT
+GAGTACACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAG
+ACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAG
+TTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG
+TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACCAAACCAAC
+CATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAA
+CCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGT
+GATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAAC
+CTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG
+TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGA
+ATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGA
+AAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAA
+TAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTT
+ACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG
+CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC
+AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTA
+TTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAG
+CAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAA
+TTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC
+TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAG
+GCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCT
+TAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAA
+TGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCT
+ATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC
+TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATC
+TTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTT
+GTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAG
+GTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGT
+GATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA
+GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCA
+TCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGAC
+AACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAAT
+GTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACT
+AGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT
+AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTG
+AACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGT
+TGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTT
+ACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTG
+GTGCTTGTATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT
+ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAG
+AATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAG
+CACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTTCCTTTT
+TGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAA
+ATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTA
+ACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCC
+ATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGC
+ACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACAC
+CATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATTTT
+TAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTAT
+GAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACC
+TTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATC
+AGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCA
+GGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTACACCACTAATTCAACCTATTG
+GTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTA
+CTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTC
+CTTATGTCATTCACTGTACTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTT
+ACTTGTACTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTT
+CACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGG
+TTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTG
+CGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATGTGCTATTACCTCTTAC
+GCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGC
+TACAGAGAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTC
+TTTACCAACCACCACAAACCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCC
+ATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTT
+GATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCTTAACCCTAATTATGAAG
+ATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGG
+ACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAG
+TTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTT
+ACCAATGTGCTATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG
+TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCAT
+GCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTA
+CGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAGGTGGTT
+TCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTA
+ACACAAGACCATGTTGACATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTG
+CTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGA
+TGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACA
+ATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGAGTACTC
+AATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTATGGGTATTATTGCTATGTC
+TGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC
+ACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATA
+TGGTTGATACTAGTTTGTCTGGTTTTAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACT
+AATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTG
+ACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCT
+CTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTG
+TGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTC
+TTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTG
+GTGTTTATGATTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAA
+GAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTA
+GCCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAAC
+TCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCTCTTAGCTAA
+AGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTA
+GACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTA
+GTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGA
+TTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCA
+GCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTG
+AGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAA
+TGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACA
+ACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACAT
+TTACTTATGCATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAG
+TGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCT
+GCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTA
+CACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACT
+TGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATC
+TATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTAT
+ACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCT
+ACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGAT
+GCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGT
+GTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGAATCCTTTGG
+TGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTA
+AAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAG
+TCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCA
+GTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCA
+CAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAA
+ATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTT
+GTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTAAGGATTGTC
+CAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCA
+ACGTCTTACTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGAC
+ACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATG
+ATTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTT
+AAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTAGATAATCAA
+GATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTG
+TAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGT
+TGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTA
+AAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATG
+ACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACCTACAAGTTTTGG
+ACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAG
+CTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGT
+ATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTC
+AGTAGCTGCACTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTAT
+GACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTC
+AGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATCAG
+ACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCT
+AACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGAC
+TTTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCC
+TACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTC
+TCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAG
+GAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAG
+TGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCTAACATGCTT
+AGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATA
+GATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATATGTTAAACC
+AGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTC
+ACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTAC
+AACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGC
+ATATTTGCGTAAACATTTCTCAATGATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTAT
+GCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTA
+TGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATAC
+AATGCTAGTTAAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCC
+GGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTA
+TAGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATA
+CATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTTACTAATGAT
+AACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTG
+TTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTT
+ATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTAT
+GTTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATT
+GTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAA
+TACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGT
+GATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTG
+AGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCT
+TTCATGGGAAGTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACT
+AAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACC
+GAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATT
+AAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATC
+TCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGG
+GACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGT
+GTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGAT
+AAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACAT
+TAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGA
+AATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTACGTGCTAAGCACTATGTGTAC
+ATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATT
+TCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCC
+TGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCA
+GCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCAC
+AAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTA
+TAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGC
+TCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTA
+ATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTT
+GCAATTTACAAGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTC
+TTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACA
+CTAAATTCAAAACTGAAGGTTTATGTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAAGACT
+CATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATCACCCGCGAA
+GAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTG
+TTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTA
+TGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAA
+CACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAA
+GTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATC
+TATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTT
+TCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTA
+TGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCA
+TGGTAATGCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTT
+AAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAA
+AGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAA
+CCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGT
+GACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCTGACAAATTCACAGATGGTG
+TATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAG
+AGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCAC
+ACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTC
+CATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTAT
+AACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTATCTCGATGCT
+TATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGA
+ACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGG
+ACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTA
+GAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTA
+AACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGA
+CTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAA
+CCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTAT
+TTAGAAATGCCCGTAATGGTGTTCTTATTACAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCC
+CAAACAAGCTAGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAG
+AAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTA
+AACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATT
+AGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGTTTACATCTA
+CTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTA
+CAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTT
+ATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTG
+ACTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAA
+AATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCT
+ATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTC
+GCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTA
+TACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTAC
+GGGTACGCTGCTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGAT
+TGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAA
+ATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCT
+AGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATG
+GGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTG
+GATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTG
+GAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTA
+AGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAG
+GTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAA
+CAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCA
+ATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCA
+GTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATG
+TCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGC
+TTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCC
+CTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCAT
+TTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGC
+GAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTC
+AAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTA
+TTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTAT
+TAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCA
+GGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATA
+ATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTT
+GAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATT
+GTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTG
+TTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATC
+ATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTAT
+GCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTG
+ATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTC
+TAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGA
+GATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACT
+TTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACT
+TTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAAC
+AAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTC
+TGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGA
+GATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAAC
+CAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTA
+CTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGC
+TGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACT
+CAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTG
+GTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTAC
+CACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCA
+ACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAA
+TAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACC
+AATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCA
+TTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATT
+GCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACC
+TTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGG
+ACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTG
+GAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAA
+AATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCA
+CAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATA
+TCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAG
+TTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCT
+ACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTA
+TGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAA
+GAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTT
+TCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACA
+CATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACC
+TGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTA
+GGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTG
+CCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCC
+ATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGT
+ATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACG
+ACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGA
+ATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTC
+GCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCT
+TGCTGTTTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGT
+GTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTG
+GCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAAT
+AATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTT
+CTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTA
+CTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATG
+GGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCA
+ACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGC
+CTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAAT
+TTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTAC
+TCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTAT
+TCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGT
+GAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT
+CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGA
+TTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTC
+CTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTA
+AGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAAT
+AAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTC
+ATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC
+TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGT
+GATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAA
+GAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTG
+ACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAG
+CAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTAC
+TATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA
+AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAAC
+CAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGA
+GCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACA
+TACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAAT
+TTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACT
+GTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT
+ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTG
+CTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAA
+GATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTG
+TAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCC
+GTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAA
+TTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT
+GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGA
+AGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTCTG
+ATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAG
+TAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACT
+GCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTC
+CAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG
+TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCT
+GGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAA
+AAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAAC
+ATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGT
+AGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCA
+ATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG
+TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGG
+CAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCC
+AAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACA
+ATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACG
+TGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGC
+TGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC
+TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGAT
+TTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATG
+CAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTT
+GTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT
+TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTAC
+GATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT
+TTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAA
diff --git a/tests/data/references/empty.fasta b/tests/data/references/empty.fasta
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/references/indexed_ref.mmi b/tests/data/references/indexed_ref.mmi
new file mode 100644
index 0000000..58bdbaa
Binary files /dev/null and b/tests/data/references/indexed_ref.mmi differ
diff --git a/tests/data/references/synthetic.fasta b/tests/data/references/synthetic.fasta
new file mode 100644
index 0000000..05cae80
--- /dev/null
+++ b/tests/data/references/synthetic.fasta
@@ -0,0 +1,2 @@
+>homemade_reference
+GGAAATTCATTCTAGGGAGTGACGTGGACCCCGGATTGATACAGGATCACATGTAGAAAAGGTAGTCGGACAAGTTACCGCTACCCTCGACCTCGTGGGGCCTACACCTGACCAGGAGCCGCACTGACAGGACCACGCTTCATCATAACTTTGGCGGCTGGGCAACGGATTTAATGGTACATAACTCATCATTCTACGTA
\ No newline at end of file
diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/e2e/config_parser.py b/tests/e2e/config_parser.py
new file mode 100644
index 0000000..0bd1921
--- /dev/null
+++ b/tests/e2e/config_parser.py
@@ -0,0 +1,58 @@
+"""
+This module provides a configuration parser for reading and parsing YAML configuration files.
+
+Classes
+-------
+ConfigParser
+    A class to parse YAML configuration files and return the configuration as a dictionary.
+"""
+
+import yaml
+
+from AmpliGone.log import log as logger
+
+
+class ConfigParser:
+    """
+    A class to parse YAML configuration files.
+
+    Attributes
+    ----------
+    logger : logging.Logger
+        The logger instance for logging messages.
+
+    Methods
+    -------
+    parse_config(config_file: str) -> dict[str, dict[str, dict[str, str]]]
+        Parses the given YAML configuration file and returns the configuration as a dictionary.
+    """
+
+    def __init__(self) -> None:
+        """
+        Initializes the ConfigParser with a logger instance.
+        """
+        self.logger = logger
+
+    def parse_config(self, config_file: str) -> dict[str, dict[str, dict[str, str]]]:
+        """
+        Parses the given YAML configuration file and returns the configuration as a dictionary.
+
+        Parameters
+        ----------
+        config_file : str
+            The path to the YAML configuration file.
+
+        Returns
+        -------
+        dict[str, dict[str, dict[str, str]]]
+            The parsed configuration as a nested dictionary.
+        """
+        with open(config_file, "r", encoding="utf-8") as file:
+            config: dict[str, dict[str, dict[str, str]]] = yaml.safe_load(file)
+            return config
+
+    def edit_config(self, config_file: str) -> None:
+        """
+        Placeholder for editing configuration files
+        """
+        raise NotImplementedError("Editing configuration files is not yet supported.")
diff --git a/tests/e2e/test_e2e.py b/tests/e2e/test_e2e.py
new file mode 100644
index 0000000..14d1116
--- /dev/null
+++ b/tests/e2e/test_e2e.py
@@ -0,0 +1,181 @@
+"""
+Module for end-to-end testing of the AmpliGone CLI program.
+
+This module contains the TestE2e class which provides methods to run end-to-end tests
+on the AmpliGone CLI program. The tests are configured using a YAML file and include
+various scenarios such as real-world data, synthetic data, and edge cases.
+"""
+
+import logging
+import os
+from typing import Generator
+
+import pytest
+
+from AmpliGone.__main__ import main
+from tests.e2e.config_parser import ConfigParser
+
+
+class TestE2e:  # pylint: disable=too-few-public-methods
+    """
+    Class for end-to-end testing of the AmpliGone CLI program.
+
+    This class provides methods to run end-to-end tests on the AmpliGone CLI program.
+    The tests are configured using a YAML file and include various scenarios such as
+    real-world data, synthetic data, and edge cases.
+
+    Attributes
+    ----------
+    config_parser : ConfigParser
+        An instance of the ConfigParser class to parse the configuration file.
+    config : dict
+        A dictionary containing the parsed configuration data.
+
+    Methods
+    -------
+    _cleanup()
+        Fixture to clean up output files after tests are run.
+    _order_fastq_by_name(fastq_lines)
+        Orders the lines of a FASTQ file by read name.
+    _compare_outputs(output_file, expected_output_file)
+        Compares the output file with the expected output file.
+    _args_to_list(args)
+        Converts a dictionary of arguments to a list.
+    test_ampligone(test_case, caplog)
+        Runs the AmpliGone CLI program with the specified test case and captures logs.
+    """
+
+    config_parser = ConfigParser()
+    config: dict[str, dict[str, dict[str, str]]] = config_parser.parse_config(
+        "tests/config.yaml"
+    )
+
+    @pytest.fixture(
+        scope="session", autouse=True
+    )  # session scope is used to run the fixture only once
+    def _cleanup(self) -> Generator[None, None, None]:
+        """
+        Fixture to clean up output files after tests are run.
+
+        This fixture runs after all tests in the session have completed and removes
+        any output files generated during the tests.
+
+        Yields
+        ------
+        None
+        """
+        yield  # this is to wait for the test to finish before cleaning up
+        for case in self.config.values():
+            output_path = case["pipeline_args"]["--output"]
+            if os.path.exists(output_path):
+                os.remove(output_path)
+
+    def _order_fastq_by_name(self, fastq_lines: list[str]) -> list[str]:
+        """
+        Orders the lines of a FASTQ file by read name.
+        Used because the order of reads in a FASTQ file is not guaranteed.
+
+        Parameters
+        ----------
+        fastq_lines : list of str
+            The lines of the FASTQ file.
+
+        Returns
+        -------
+        list of str
+            The ordered lines of the FASTQ file.
+        """
+        chunks = [fastq_lines[i : i + 4] for i in range(0, len(fastq_lines), 4)]
+        sorted_chunks = sorted(chunks, key=lambda x: x[0])
+        sorted_fastq = [line for chunk in sorted_chunks for line in chunk]
+        return sorted_fastq
+
+    def _compare_outputs(self, output_file: str, expected_output_file: str) -> None:
+        """
+        Compares the output file with the expected output file.
+        Specifically, it compares the FASTQ files generated by the AmpliGone CLI program.
+
+        Parameters
+        ----------
+        output_file : str
+            The path to the output file generated by the AmpliGone CLI program.
+        expected_output_file : str
+            The path to the expected output file.
+
+        Raises
+        ------
+        AssertionError
+            If the output file does not match the expected output file.
+        """
+        with open(output_file, "r", encoding="utf-8") as output, open(
+            expected_output_file, "r", encoding="utf-8"
+        ) as expected_output:
+            output_lines = output.readlines()
+            expected_output_lines = expected_output.readlines()
+            # the fastq file is unordered, so we need to sort it by the read name
+            output_lines = self._order_fastq_by_name(output_lines)
+            expected_output_lines = self._order_fastq_by_name(expected_output_lines)
+            for line1, line2 in zip(output_lines, expected_output_lines):
+                if line1 != line2:
+                    raise AssertionError(f"Output: {line1}, Expected Output: {line2}")
+
+    def _args_to_list(self, args: dict[str, str]) -> list[str]:
+        """
+        Converts a dictionary of arguments to a list.
+
+        Parameters
+        ----------
+        args : dict of str
+            The dictionary of arguments.
+
+        Returns
+        -------
+        list of str
+            The list of arguments.
+        """
+        return [item for pain in args.items() for item in pain]
+
+    @pytest.mark.parametrize(
+        "test_case", list(config.values()), ids=list(config.keys())
+    )
+    def test_ampligone(
+        self, test_case: dict[str, dict[str, str]], caplog: pytest.LogCaptureFixture
+    ) -> None:
+        """
+        Runs the AmpliGone CLI program with the specified test cases from the config.yml file.
+
+
+        Parameters
+        ----------
+        test_case : dict
+            The test case configuration.
+        caplog : pytest.LogCaptureFixture
+            The log capture fixture.
+
+        Raises
+        ------
+        SystemExit
+            If the test case is expected to fail.
+        AssertionError
+            If the output file does not match the expected output file or if the
+            expected log message is not found in the logs.
+        """
+        args = self._args_to_list(test_case["pipeline_args"])
+
+        with caplog.at_level(logging.DEBUG, logger="rich"):
+            if test_case["test_args"]["fails"]:
+                with pytest.raises(SystemExit):
+                    main(args)
+            else:
+                main(args)
+                self._compare_outputs(
+                    test_case["pipeline_args"]["--output"],
+                    test_case["test_args"]["comparison_file"],
+                )
+        if test_case["test_args"].get(
+            "expected_log_message", None
+        ):  # I dont think its necessary to check the logs in every case
+            assert (
+                test_case["test_args"]["expected_log_message"].lower()
+                in caplog.text.lower()
+            )
diff --git a/tests/unit/test_args.py b/tests/unit/test_args.py
new file mode 100644
index 0000000..aecd5ff
--- /dev/null
+++ b/tests/unit/test_args.py
@@ -0,0 +1,143 @@
+"""
+Unit tests for the `get_args` function in the AmpliGone module.
+
+This module contains unit tests for the `get_args` function, which parses command-line arguments for the AmpliGone pipeline.
+
+Test Scenarios
+--------------
+- Test with the `--help` flag to ensure it raises `SystemExit`.
+- Test with no arguments to ensure it raises `SystemExit`.
+- Test with invalid arguments to ensure it raises `SystemExit`.
+- Test with necessary arguments to ensure they are parsed correctly.
+
+Classes
+-------
+TestArgs
+    A class containing unit tests for the `get_args` function.
+"""
+
+import pytest
+
+from AmpliGone.args import get_args
+from tests.e2e.config_parser import ConfigParser
+
+
+class TestArgs:
+    """
+    Unit tests for the `get_args` function in the AmpliGone module.
+
+    This class contains unit tests for the `get_args` function,
+    which parses command-line arguments for the AmpliGone pipeline.
+
+    Attributes
+    ----------
+    config_parser : ConfigParser
+        An instance of the ConfigParser class to parse the configuration file.
+    config : dict
+        The parsed configuration dictionary.
+    pipeline_args : dict
+        The dictionary of pipeline arguments from the configuration.
+    happy_arg_list : list
+        The list of arguments for the happy flow scenario.
+
+    Methods
+    -------
+    test_help()
+        Test that the `get_args` function raises `SystemExit` when the `--help` flag is provided.
+    test_no_args()
+        Test that the `get_args` function raises `SystemExit` when no arguments are provided.
+    test_invalid_args()
+        Test that the `get_args` function raises `SystemExit` when invalid arguments are provided.
+    test_necessary_args()
+        Test that the `get_args` function correctly parses the necessary arguments.
+    """
+
+    config_parser = ConfigParser()
+    config = config_parser.parse_config("tests/config.yaml")
+    pipeline_args = config["happy_sars_cov_2"]["pipeline_args"]
+    happy_arg_list = [item for pain in pipeline_args.items() for item in pain]
+
+    def test_help(self) -> None:
+        """
+        Test that the `get_args` function raises `SystemExit` when the `--help` flag is provided.
+
+        This test ensures that the `get_args` function correctly handles the `--help` flag by raising `SystemExit`.
+
+        Parameters
+        ----------
+        self : TestArgs
+            The instance of the test class.
+
+        Returns
+        -------
+        None
+            This function does not return any value. It asserts that `SystemExit` is raised.
+        """
+        with pytest.raises(SystemExit):
+            get_args(["--help"])
+
+    def test_no_args(self) -> None:
+        """
+        Test that the `get_args` function raises `SystemExit` when no arguments are provided.
+
+        This test ensures that the `get_args` function correctly handles the case where no arguments are provided by raising `SystemExit`.
+
+        Parameters
+        ----------
+        self : TestArgs
+            The instance of the test class.
+
+        Returns
+        -------
+        None
+            This function does not return any value. It asserts that `SystemExit` is raised.
+        """
+        with pytest.raises(SystemExit):
+            get_args([])
+
+    def test_invalid_args(self) -> None:
+        """
+        Test that the `get_args` function raises `SystemExit` when invalid arguments are provided.
+
+        This test ensures that the `get_args` function correctly handles invalid arguments by raising `SystemExit`.
+
+        Parameters
+        ----------
+        self : TestArgs
+            The instance of the test class.
+
+        Returns
+        -------
+        None
+            This function does not return any value. It asserts that `SystemExit` is raised.
+        """
+        with pytest.raises(SystemExit):
+            get_args(["--invalid"])
+
+    def test_necessary_args(self) -> None:
+        """
+        Test that the `get_args` function correctly parses the necessary arguments.
+
+        This test ensures that the `get_args` function correctly parses the necessary arguments and matches them with the expected values.
+
+        Parameters
+        ----------
+        self : TestArgs
+            The instance of the test class.
+
+        Returns
+        -------
+        None
+            This function does not return any value. It asserts that the parsed arguments match the expected values.
+        """
+        args = vars(
+            get_args(self.happy_arg_list)
+        )  # get_args returns a Namespace object, so we need to convert it to a dictionary
+
+        for args_key, args_value in args.items():
+            for expected_key, expected_value in self.pipeline_args.items():
+                if args_key == expected_key[2:]:  # remove the '--' from the key
+                    if args_key == "amplicon_type":
+                        assert args_value == expected_value
+                    else:  # args_value has absolute path, expected_value has relative path
+                        assert expected_value in args_value
diff --git a/tests/unit/test_cut_reads.py b/tests/unit/test_cut_reads.py
new file mode 100644
index 0000000..7511c42
--- /dev/null
+++ b/tests/unit/test_cut_reads.py
@@ -0,0 +1,342 @@
+"""
+Unit tests for the `cut_reads` function in the AmpliGone module.
+
+This module contains unit tests for the `cut_reads` function,
+which processes sequencing reads by cutting them based on primer locations and reference mapping.
+
+Extra tests that could be added if deemed necessary:
+- Tests with faulty inputs, like an empty DataFrame, invalid sequences or qualities.
+- Tests with different parameters, like different scoring thresholds or presets.
+
+
+Test Scenarios
+--------------
+- Test with reads that are too short to be processed.
+- Test with no primers available for the reference.
+- Test with valid data and different amplicon types.
+- Test with primers partially on the read.
+- Test with primers not on the read.
+
+Classes
+-------
+TestCutReads
+    A class containing unit tests for the `cut_reads` function.
+"""
+
+from collections import defaultdict
+
+import pandas as pd
+import pytest
+
+from AmpliGone.cut_reads import cut_reads
+
+# Length of the read should theoretically be ~42bp because the default k-mer size for the short reads preset (-sr) is 21.
+# However, in practice this does not work because the -sr preset includes a `-s 40` parameter which means that -
+# the scoring threshold is set to 40. This score is very hard to achieve with only 42bp reads.
+# This means that for testing purposes reads should be around 50-100bp long.
+# To make sure everything works as expected, the read length that will be used for testing is 100bp.
+
+AMPLICON_TYPES = (
+    "end-to-end",
+    "end-to-mid",
+    "fragmented",
+)  # cannot be in the class because the pytest.mark.parametrize decorator needs to access it before the class is created
+
+
+class TestCutReads:
+    """
+    Unit tests for the `cut_reads` function in the AmpliGone module.
+
+    This class contains unit tests for the `cut_reads` function,
+    which processes sequencing reads by cutting them based on primer locations and reference mapping.
+
+    Attributes
+    ----------
+    HAPPY_SEQ : str
+        A sample sequence used for testing.
+    HAPPY_QUAL : str
+        A sample quality string corresponding to the HAPPY_SEQ.
+    reference : str
+        The path to the reference genome sequence used for testing.
+    preset : str
+        The preset used for minimap2 alignment.
+    scoring : list of int
+        The scoring matrix used for minimap2 alignment.
+    fragment_lookaround_size : int
+        The number of bases to look around a fragment when cutting reads.
+
+    Methods
+    -------
+    test_cut_reads_too_short()
+        Test that the `cut_reads` function skips reads that are too short to be processed.
+    test_cut_reads_no_primers()
+        Test that the `cut_reads` function handles cases where no primers are available for the reference.
+    test_cut_reads_happy(amplicon_type)
+        Test a happy path scenario for the `cut_reads` function with different amplicon types.
+    test_cut_reads_primer_half_on_read(amplicon_type)
+        Test a scenario where the FW and RV primers are only partially on the read.
+    test_cut_reads_wrong_primers(amplicon_type)
+        Test a scenario where the primers are not on the read.
+    """
+
+    HAPPY_SEQ = "GGAAATTCATTCTAGGGAGTGACGTGGACCCCGGATTGATACAGGATCACATGTAGAAAAGGTAGTCGGACAAGTTACCGCTACCCTCGACCTCGTGGGG"
+    HAPPY_QUAL = "EE10%-1#-@7F&@?7(13;-$)A7.7/3(I(.9)&//,$G9?HA'DG=/;3C)2:@C!2/#;8.#7'98AC;FG>E>;E'>'$100G&44763?0,@7I"
+
+    reference = "/home/raaijmag/IDS/AmpliGone/tests/data/references/synthetic.fasta"
+    preset = "sr"
+    scoring: list[int] = []
+    fragment_lookaround_size = 10000
+
+    def test_cut_reads_too_short(self) -> None:
+        """
+        Test that the `cut_reads` function skips reads that are too short to be processed.
+
+        This test ensures that reads shorter than the minimum required length (42bp) are not processed
+        and are skipped by the `cut_reads` function. It might be useful to add a warning message in the future,
+        for reads under 100bp.
+
+        This does not raise an error, but returns an empty DataFrame.
+
+        Parameters
+        ----------
+        self : TestCutReads
+            The instance of the test class containing the test data and parameters.
+
+        Returns
+        -------
+        None
+            This function does not return any value. It asserts that the result DataFrame is empty.
+        """
+
+        short_seq = self.HAPPY_SEQ[:20] + self.HAPPY_SEQ[-20:]
+        short_qual = self.HAPPY_QUAL[:20] + self.HAPPY_QUAL[-20:]
+
+        data: tuple[pd.DataFrame, int] = (
+            pd.DataFrame(
+                {
+                    "Readname": ["read_number_1"],
+                    "Sequence": [short_seq],
+                    "Qualities": [short_qual],
+                }
+            ),
+            0,
+        )
+        primer_1 = set(range(0, 5))
+        primer_2 = set(range(15, 20))
+        primer_sets = (
+            defaultdict(set, {"synthetic_reference": primer_1}),
+            defaultdict(set, {"synthetic_reference": primer_2}),
+        )
+        result = cut_reads(
+            data,
+            primer_sets,
+            self.reference,
+            self.preset,
+            self.scoring,
+            self.fragment_lookaround_size,
+            "end-to-end",
+        )
+        assert result.empty
+
+    def test_cut_reads_no_primers(self) -> None:
+        """
+        Test that the `cut_reads` function handles cases where no primers are available for the reference.
+
+        This test ensures that the `cut_reads` function can process reads even when no primers are available for the reference.
+        It verifies that the function does not remove any coordinates in such cases.
+
+        Parameters
+        ----------
+        self : TestCutReads
+            The instance of the test class containing the test data and parameters.
+
+        Returns
+        -------
+        None
+            This function does not return any value. It asserts that the "Removed_coordinates" column in the result DataFrame is empty.
+        """
+        data: tuple[pd.DataFrame, int] = (
+            pd.DataFrame(
+                {
+                    "Readname": ["read_number_1"],
+                    "Sequence": [self.HAPPY_SEQ],
+                    "Qualities": [self.HAPPY_QUAL],
+                }
+            ),
+            0,
+        )
+        empty_primer_1: set = set()
+        empty_primer_2: set = set()
+        primer_sets = (
+            defaultdict(set, {"synthetic_reference": empty_primer_1}),
+            defaultdict(set, {"synthetic_reference": empty_primer_2}),
+        )
+        result = cut_reads(
+            data,
+            primer_sets,
+            self.reference,
+            self.preset,
+            self.scoring,
+            self.fragment_lookaround_size,
+            "end-to-end",
+        )
+        assert not result["Removed_coordinates"].iloc[0]  # empty list
+
+    @pytest.mark.parametrize("amplicon_type", AMPLICON_TYPES)
+    def test_cut_reads_happy(self, amplicon_type: str) -> None:
+        """
+        Test a happy path scenario for the `cut_reads` function with different amplicon types.
+
+        This test ensures that the `cut_reads` function processes reads correctly for different amplicon types,
+        including "end-to-end", "end-to-mid", and "fragmented".
+
+        Parameters
+        ----------
+        self : TestCutReads
+            The instance of the test class containing the test data and parameters.
+        amplicon_type : str
+            The type of amplicon, either "end-to-end", "end-to-mid", or "fragmented".
+
+        Returns
+        -------
+        None
+            This function does not return any value. It asserts that the removed coordinates match the expected coordinates.
+        """
+        data: tuple[pd.DataFrame, int] = (
+            pd.DataFrame(
+                {
+                    "Readname": ["read_number_1"],
+                    "Sequence": [self.HAPPY_SEQ],
+                    "Qualities": [self.HAPPY_QUAL],
+                }
+            ),
+            0,
+        )
+        primer_1 = set(range(0, 10))
+        primer_2 = set(range(91, 101))
+        primer_sets = (
+            defaultdict(set, {"synthetic_reference": primer_1}),
+            defaultdict(set, {"synthetic_reference": primer_2}),
+        )
+        result = cut_reads(
+            data,
+            primer_sets,
+            self.reference,
+            self.preset,
+            self.scoring,
+            self.fragment_lookaround_size,
+            amplicon_type,
+        )
+        if amplicon_type == "end-to-end" or amplicon_type == "fragmented":
+            ete_coords: list[int] = result["Removed_coordinates"].iloc[0]
+            ete_expected_coords = list(primer_1) + list(primer_2)
+            assert ete_coords.sort() == ete_expected_coords.sort()
+        else:
+            assert amplicon_type == "end-to-mid"
+            etm_coords: list[int] = result["Removed_coordinates"].iloc[0]
+            etm_expected_coords = list(primer_1)
+            assert etm_coords.sort() == etm_expected_coords.sort()
+
+    @pytest.mark.parametrize("amplicon_type", AMPLICON_TYPES)
+    def test_cut_reads_primer_half_on_read(self, amplicon_type: str) -> None:
+        """
+        Test a scenario where the FW and RV primers are only partially on the read.
+
+        This test ensures that the `cut_reads` function can handle cases where the forward (FW) and reverse (RV) primers
+        are only partially present on the read sequence.
+
+        Parameters
+        ----------
+        self : TestCutReads
+            The instance of the test class containing the test data and parameters.
+        amplicon_type : str
+            The type of amplicon, either "end-to-end", "end-to-mid", or "fragmented".
+
+        Returns
+        -------
+        None
+            This function does not return any value. It asserts that the removed coordinates match the expected coordinates.
+        """
+        seq = "ATCGC" + self.HAPPY_SEQ[5:-5] + "ATCGC"
+        data: tuple[pd.DataFrame, int] = (
+            pd.DataFrame(
+                {
+                    "Readname": ["read_number_1"],
+                    "Sequence": [seq],
+                    "Qualities": [self.HAPPY_QUAL],
+                }
+            ),
+            0,
+        )
+        primer_1 = set(range(0, 10))
+        primer_2 = set(range(91, 101))
+        primer_sets = (
+            defaultdict(set, {"synthetic_reference": primer_1}),
+            defaultdict(set, {"synthetic_reference": primer_2}),
+        )
+        result = cut_reads(
+            data,
+            primer_sets,
+            self.reference,
+            self.preset,
+            self.scoring,
+            self.fragment_lookaround_size,
+            amplicon_type,
+        )
+        if amplicon_type == "end-to-end" or amplicon_type == "fragmented":
+            ete_coords: list[int] = result["Removed_coordinates"].iloc[0]
+            ete_expected_coords = list(primer_1) + list(primer_2)
+            assert ete_coords.sort() == ete_expected_coords.sort()
+        else:
+            assert amplicon_type == "end-to-mid"
+            etm_coords: list[int] = result["Removed_coordinates"].iloc[0]
+            etm_expected_coords = list(primer_1)
+            assert etm_coords.sort() == etm_expected_coords.sort()
+
+    @pytest.mark.parametrize("amplicon_type", AMPLICON_TYPES)
+    def test_cut_reads_wrong_primers(self, amplicon_type: str) -> None:
+        """
+        Test a scenario where the primers are not on the read.
+
+        This test ensures that the `cut_reads` function can handle cases where the forward (FW) and reverse (RV) primers
+        are not present on the read sequence.
+
+        Parameters
+        ----------
+        self : TestCutReads
+            The instance of the test class containing the test data and parameters.
+        amplicon_type : str
+            The type of amplicon, either "end-to-end", "end-to-mid", or "fragmented".
+
+        Returns
+        -------
+        None
+            This function does not return any value. It asserts that no coordinates are removed.
+        """
+        seq = "ATCGCATCGC" + self.HAPPY_SEQ[10:-10] + "ATCGCATCGC"
+        data: tuple[pd.DataFrame, int] = (
+            pd.DataFrame(
+                {
+                    "Readname": ["read_number_1"],
+                    "Sequence": [seq],
+                    "Qualities": [self.HAPPY_QUAL],
+                }
+            ),
+            0,
+        )
+        primer_1 = set(range(0, 10))
+        primer_2 = set(range(91, 101))
+        primer_sets = (
+            defaultdict(set, {"synthetic_reference": primer_1}),
+            defaultdict(set, {"synthetic_reference": primer_2}),
+        )
+        result = cut_reads(
+            data,
+            primer_sets,
+            self.reference,
+            self.preset,
+            self.scoring,
+            self.fragment_lookaround_size,
+            amplicon_type,
+        )
+        assert not result["Removed_coordinates"].iloc[0]
diff --git a/tests/unit/test_cutlery.py b/tests/unit/test_cutlery.py
new file mode 100644
index 0000000..fdd3d94
--- /dev/null
+++ b/tests/unit/test_cutlery.py
@@ -0,0 +1,105 @@
+"""
+Unit tests for the `cutlery` module in the AmpliGone package.
+
+This module contains unit tests for the functions in the `cutlery` module, which provide utility functions for processing sequencing reads.
+
+Test Scenarios
+--------------
+- Test the `position_in_or_before_primer` function with various read positions.
+- Test the `position_in_or_after_primer` function with various read positions.
+
+Functions
+---------
+test_position_in_or_before_primer(read, result)
+    Tests the `position_in_or_before_primer` function with various read positions and expected results.
+test_position_in_or_after_primer(read, result)
+    Tests the `position_in_or_after_primer` function with various read positions and expected results.
+"""
+
+import pytest
+
+from AmpliGone import cutlery
+
+
+@pytest.mark.parametrize(
+    "read, result",
+    [
+        (22, True),
+        (25, True),
+        (10, False),
+        (26, False),
+    ],
+    ids=[
+        "before_within_lookaround",
+        "same_as_primerstart",
+        "before_outside_lookaround",
+        "higher_than_primerstart",
+    ],
+)
+def test_position_in_or_before_primer(read: int, result: bool) -> None:
+    """
+    Tests the `position_in_or_before_primer` function with various read positions and expected results.
+
+    Parameters
+    ----------
+    read : int
+        The read position to test.
+    result : bool
+        The expected result indicating whether the read position is in or before the primer.
+
+    Returns
+    -------
+    None
+        This function does not return any value. It asserts that the function's output matches the expected result.
+    """
+    primer_positions = (25, 35)
+    max_lookaround = 10
+    outcome = cutlery.position_in_or_before_primer(
+        read, primer_positions, max_lookaround
+    )
+    if outcome != result:
+        raise AssertionError(
+            f"Expected {result} but got {outcome} while running cutlery.position_in_or_before_primer({read}, {primer_positions}, {max_lookaround})"
+        )
+
+
+@pytest.mark.parametrize(
+    "read, result",
+    [
+        (22, False),
+        (10, False),
+        (25, True),
+        (26, True),
+    ],
+    ids=[
+        "before_within_lookaround",
+        "before_outside_lookaround",
+        "same_as_primerstart",
+        "higher_than_primerstart",
+    ],
+)
+def test_postition_in_or_after_primer(read: int, result: bool) -> None:
+    """
+    Tests the `position_in_or_after_primer` function with various read positions and expected results.
+
+    Parameters
+    ----------
+    read : int
+        The read position to test.
+    result : bool
+        The expected result indicating whether the read position is in or after the primer.
+
+    Returns
+    -------
+    None
+        This function does not return any value. It asserts that the function's output matches the expected result.
+    """
+    primer_positions = (25, 35)
+    max_lookaround = 10
+    outcome = cutlery.position_in_or_after_primer(
+        read, primer_positions, max_lookaround
+    )
+    if outcome != result:
+        raise AssertionError(
+            f"Expected {result} but got {outcome} while running cutlery.position_in_or_before_primer({read}, {primer_positions}, {max_lookaround})"
+        )
diff --git a/tests/unit/test_fasta2bed.py b/tests/unit/test_fasta2bed.py
new file mode 100644
index 0000000..88aa01d
--- /dev/null
+++ b/tests/unit/test_fasta2bed.py
@@ -0,0 +1,53 @@
+import os
+from typing import Generator
+
+import pandas as pd
+import pytest
+
+from AmpliGone.fasta2bed import main
+
+
+@pytest.fixture()
+def setup() -> Generator[tuple[str, str, str, str], None, None]:
+    path_to_fasta = "tests/data/primers/ARTIC-V5.3.2.fasta"
+    path_to_reference = "tests/data/references/SARS-CoV-2-reference.fasta"
+    path_to_output = "tests/data/primers/new.bed"
+    path_to_example = "tests/data/primers/SARS-CoV-2-ARTIC-V5.3.2.scheme.bed"
+
+    yield path_to_fasta, path_to_reference, path_to_output, path_to_example
+
+    if os.path.exists(path_to_output):
+        os.remove(path_to_output)
+
+
+class TestFasta2Bed:
+    def compare_bed_files(self, result: str, example: str) -> None:
+        res_df = pd.read_csv(result, sep="\t", header=None)
+        example_df = pd.read_csv(example, sep="\t", header=None)
+
+        # drop the names [3], they are not important
+        # and
+        # drop the score column [4], as AmpliGone uses it to store the alignment score
+        # while ARTIC (files used for testing) uses it to store the primer pool
+        res_df = res_df.drop(columns=[3, 4])
+        example_df = example_df.drop(columns=[3, 4])
+
+        if not res_df.equals(example_df):
+            raise AssertionError(f"{result} and {example} are not equal")
+
+    def test_fasta2bed(self, setup: tuple[str, str, str, str]) -> None:
+        path_to_fasta, path_to_reference, path_to_output, path_to_example = setup
+        args = [
+            "--primers",
+            path_to_fasta,
+            "--reference",
+            path_to_reference,
+            "--output",
+            path_to_output,
+        ]
+        main(args)
+        if not os.path.exists(path_to_output):
+            raise AssertionError(f"{path_to_output} was not created")
+        if os.path.getsize(path_to_output) == 0:
+            raise AssertionError(f"{path_to_output} is empty")
+        self.compare_bed_files(path_to_output, path_to_example)