diff --git a/src/hipscat_import/verification/arguments.py b/src/hipscat_import/verification/arguments.py
index 86c139b1..fb7bee27 100644
--- a/src/hipscat_import/verification/arguments.py
+++ b/src/hipscat_import/verification/arguments.py
@@ -2,50 +2,51 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass, field
 from pathlib import Path
-from typing import List, Optional
 
-from hipscat.catalog import Catalog
-from hipscat.io.validation import is_valid_catalog
+import attrs
 from upath import UPath
 
-from hipscat_import.runtime_arguments import RuntimeArguments
-
-
-@dataclass
-class VerificationArguments(RuntimeArguments):
-    """Data class for holding verification arguments"""
-
-    ## Input
-    input_catalog_path: str | Path | UPath | None = None
-    """Path to an existing catalog that will be inspected."""
-    input_catalog: Optional[Catalog] = None
-    """In-memory representation of a catalog. If not provided, it will be loaded
-    from the input_catalog_path."""
-
-    ## Verification options
-    field_distribution_cols: List[str] = field(default_factory=list)
-    """List of fields to get the overall distribution for. e.g. ["ra", "dec"].
-    Should be valid columns in the parquet files."""
-
-    def __post_init__(self):
-        self._check_arguments()
-
-    def _check_arguments(self):
-        super()._check_arguments()
-        if not self.input_catalog_path and not self.input_catalog:
-            raise ValueError("input catalog is required (either input_catalog_path or input_catalog)")
-        if not self.input_catalog:
-            if not is_valid_catalog(self.input_catalog_path):
-                raise ValueError("input_catalog_path not a valid catalog")
-            self.input_catalog = Catalog.read_from_hipscat(catalog_path=self.input_catalog_path)
-        if not self.input_catalog_path:
-            self.input_catalog_path = self.input_catalog.catalog_path
-
+# from hipscat_import.runtime_arguments import RuntimeArguments
+
+
+def _dir_exists(instance: VerificationArguments, attribute: attrs.Attribute, value: UPath):
+    """This function will be used as a validator for attributes of VerificationArguments."""
+    if not value.is_dir():
+        raise ValueError(f"{attribute.name} must be an existing directory")
+
+
+def _path_exists(instance: VerificationArguments, attribute: attrs.Attribute, value: UPath):
+    """This function will be used as a validator for attributes of VerificationArguments."""
+    if not value.exists():
+        raise ValueError(f"{attribute.name} must be an existing file or directory")
+
+
+@attrs.define(kw_only=True)
+class VerificationArguments:
+    """Container for verification arguments."""
+
+    input_catalog_path: str | Path | UPath = attrs.field(converter=UPath, validator=_dir_exists)
+    """Path to an existing catalog that will be inspected. This must be a directory
+    containing the Parquet dataset and metadata sidecars."""
+    output_path: str | Path | UPath = attrs.field(converter=UPath)
+    """Base path where output files should be written."""
+    output_report_filename: str = attrs.field(factory=lambda: "verifier_results.csv")
+    """Filename for the verification report that will be generated."""
+    output_distributions_filename: str = attrs.field(factory=lambda: "field_distributions.csv")
+    """Filename for the field distributions that will be calculated."""
+    truth_total_rows: int | None = attrs.field(default=None)
+    """Total number of rows expected in this catalog."""
+    truth_schema: str | Path | UPath | None = attrs.field(
+        default=None,
+        converter=attrs.converters.optional(UPath),
+        validator=attrs.validators.optional(_path_exists),
+    )
+    """Path to a Parquet file or dataset containing the expected schema.
+    If you provided the 'use_schema_file' argument when importing the catalog, use the same value here.
+    If not provided, the catalog's _common_metadata file will be used as the source of truth.
+    """
+
+    # [FIXME] Connect this with RuntimeArguments.provenance_info. Even then, does this ever get written to file?
     def additional_runtime_provenance_info(self) -> dict:
-        return {
-            "pipeline": "verification pipeline",
-            "input_catalog_path": self.input_catalog_path,
-            "field_distribution_cols": self.field_distribution_cols,
-        }
+        return {"pipeline": "verification pipeline", **{k: str(v) for k, v in vars(self).items()}}
diff --git a/src/hipscat_import/verification/run_verification.py b/src/hipscat_import/verification/run_verification.py
index 2b7d5954..e8e33ea8 100644
--- a/src/hipscat_import/verification/run_verification.py
+++ b/src/hipscat_import/verification/run_verification.py
@@ -1,14 +1,539 @@
-"""Run pass/fail checks and generate verification report of existing hipscat table."""
+"""Run pass/fail tests and generate verification report of existing hipscat table."""
+
+import collections
+import datetime
+from pathlib import Path
+
+import attrs
+import hipscat.io.validation
+import pandas as pd
+import pyarrow.dataset
 
 from hipscat_import.verification.arguments import VerificationArguments
 
 
-def run(args):
-    """Run verification pipeline."""
+def run(args: VerificationArguments, write_mode: str = "a"):
+    """Create a `Verifier` using `args`, run all tests, and write reports.
+
+    Parameters
+    ----------
+    args : VerificationArguments
+        Arguments to construct the Verifier.
+    write_mode : str, optional
+        Mode to be used when writing output files.
+
+    Returns
+    -------
+    Verifier
+        The `Verifier` instance used to perform the tests. The `results_df` and
+        `distributions_df` properties contain the same information as written reports.
+
+    Raises
+    ------
+    TypeError
+        If `args` is not provided or is not an instance of `VerificationArguments`.
+    """
     if not args:
         raise TypeError("args is required and should be type VerificationArguments")
     if not isinstance(args, VerificationArguments):
         raise TypeError("args must be type VerificationArguments")
 
-    # implement everything else.
-    raise NotImplementedError("Verification not yet implemented.")
+    verifier = Verifier.from_args(args)
+    verifier.run(write_mode=write_mode)
+
+    return verifier
+
+
+Result = collections.namedtuple(
+    "Result", ["passed", "test", "target", "description", "affected_files", "datetime"]
+)
+"""Verification test result."""
+
+
+def now():
+    """Return the current time as a string."""
+    return datetime.datetime.now(datetime.timezone.utc).strftime("%Y/%m/%d %H:%M:%S %Z")
+
+
+@attrs.define
+class Verifier:
+    """Class for verification tests. Instantiate using the `from_args` method."""
+
+    args: VerificationArguments = attrs.field()
+    """Arguments to use during verification."""
+    files_ds: pyarrow.dataset.Dataset = attrs.field()
+    """Pyarrow dataset, loaded from the actual files on disk."""
+    metadata_ds: pyarrow.dataset.Dataset = attrs.field()
+    """Pyarrow dataset, loaded from the _metadata file."""
+    common_ds: pyarrow.dataset.Dataset = attrs.field()
+    """Pyarrow dataset, loaded from the _common_metadata file."""
+    truth_schema: pyarrow.Schema = attrs.field()
+    """Pyarrow schema to be used as truth. This will be loaded from args.truth_schema
+    if provided, and then hipscat columns and metadata will be added if not already present.
+    If args.truth_schema not provided, the catalog's _common_metadata file will be used."""
+    truth_src: str = attrs.field()
+    """'truth_schema' if args.truth_schema was provided, else '_common_metadata'."""
+    results: list[Result] = attrs.field(factory=list)
+    """List of results, one for each test that has been done."""
+    _distributions_df: pd.DataFrame | None = attrs.field(default=None)
+
+    @classmethod
+    def from_args(cls, args: VerificationArguments) -> "Verifier":
+        """Create a `Verifier` instance from the provided arguments.
+
+        This method initializes the `Verifier` by setting up the necessary datasets
+        and schemas based on the input arguments.
+
+        Parameters
+        ----------
+            args : VerificationArguments
+                Arguments for the Verifier.
+
+        Returns
+        -------
+            Verifier : An instance of the `Verifier` class.
+        """
+        # make sure the output directory exists
+        args.output_path.mkdir(exist_ok=True, parents=True)
+
+        # load a dataset from the actual files on disk
+        files_ds = pyarrow.dataset.dataset(
+            args.input_catalog_path,
+            ignore_prefixes=[
+                ".",
+                "_",
+                "catalog_info.json",
+                "partition_info.csv",
+                "point_map.fits",
+                "provenance_info.json",
+            ],
+        )
+
+        # load a dataset from the _metadata file
+        metadata_ds = pyarrow.dataset.parquet_dataset(f"{args.input_catalog_path}/_metadata")
+
+        # load a dataset from the _common_metadata file
+        common_ds = pyarrow.dataset.parquet_dataset(f"{args.input_catalog_path}/_common_metadata")
+
+        # load the input schema if provided, else use the _common_metadata schema
+        if args.truth_schema is not None:
+            truth_schema = pyarrow.dataset.parquet_dataset(args.truth_schema).schema
+            truth_src = "truth_schema"
+        else:
+            truth_schema = common_ds.schema
+            truth_src = "_common_metadata"
+
+        return cls(
+            args=args,
+            files_ds=files_ds,
+            metadata_ds=metadata_ds,
+            common_ds=common_ds,
+            truth_schema=truth_schema,
+            truth_src=truth_src,
+        )
+
+    def run(self, write_mode: str = "a") -> None:
+        """Run all tests and write reports."""
+        self.test_is_valid_catalog()
+        self.test_file_sets()
+        self.test_num_rows()
+        self.test_rowgroup_stats(write_mode=write_mode)
+        self.test_schemas()
+
+        self.write_results(write_mode=write_mode)
+
+    @property
+    def results_df(self) -> pd.DataFrame:
+        """Verifier results as a dataframe."""
+        return pd.DataFrame(self.results)
+
+    def truth_schema_plus_common_metadata(self) -> pyarrow.Schema:
+        """Copy of `truth_schema` with hipscat fields and metadata added from `common_ds.schema`."""
+        hipscat_cols = ["Norder", "Dir", "Npix", "_hipscat_index"]
+        new_fields = [
+            self.common_ds.schema.field(fld) for fld in hipscat_cols if fld not in self.truth_schema.names
+        ]
+
+        # use pandas metadata from common_ds but keep all other metadata from truth_schema
+        metadata = self.truth_schema.metadata or {}
+        metadata[b"pandas"] = self.common_ds.schema.metadata[b"pandas"]
+
+        return pyarrow.schema(list(self.truth_schema) + new_fields).with_metadata(metadata)
+
+    def test_file_sets(self) -> bool:
+        """Test that files in _metadata match the parquet files on disk. Add one `Result` to `results`.
+
+        This is a simple test that can be especially useful to run after copying or moving
+        the catalog to a different local or cloud-based destination.
+
+        Returns
+        -------
+            bool: True if the file sets match, else False.
+        """
+        test = "file sets"
+        description = "Test that files in _metadata match files on disk."
+        test_info = dict(test=test, description=description)
+        print(f"\nStarting: {description}")
+
+        targets = "_metadata vs files on disk"
+        base_dir = str(self.args.input_catalog_path)
+        files_ds_files = [f.removeprefix(base_dir).strip("/") for f in self.files_ds.files]
+        metadata_ds_files = [f.removeprefix(base_dir).strip("/") for f in self.metadata_ds.files]
+        failed_files = list(set(files_ds_files).symmetric_difference(metadata_ds_files))
+        passed = len(failed_files) == 0
+        self._append_result(passed=passed, target=targets, affected_files=failed_files, **test_info)
+
+        print(f"Result: {'PASSED' if passed else 'FAILED'}")
+        return passed
+
+    def test_is_valid_catalog(self) -> bool:
+        """Test if the provided catalog is a valid HiPSCat catalog. Add one `Result` to `results`.
+
+        Returns
+        -------
+            bool: True if the catalog is valid, else False.
+        """
+        test = "is valid catalog"
+        target = self.args.input_catalog_path
+        # [FIXME] How to get the hipscat version?
+        description = "Test that this is a valid HiPSCat catalog using hipscat version <VERSION>."
+        print(f"\nStarting: {description}")
+
+        passed = hipscat.io.validation.is_valid_catalog(target, strict=True)
+        self._append_result(test=test, description=description, passed=passed, target=target.name)
+        print(f"Result: {'PASSED' if passed else 'FAILED'}")
+        return passed
+
+    def test_num_rows(self) -> bool:
+        """Test the number of rows in the dataset. Add two `Results` to `results`.
+
+        File footers are compared with _metadata and the user-supplied truth (if provided).
+
+        Returns
+        -------
+            bool: True if all checks pass, else False.
+        """
+        test = "num rows"
+        description = "Test that number of rows are equal."
+        test_info = dict(test=test, description=description)
+        print(f"\nStarting: {description}")
+
+        # get the number of rows in each file, indexed by file path. we treat this as truth.
+        files_df = self._load_nrows(self.files_ds, explicit_count=True)
+
+        # check _metadata
+        targets = "_metadata vs file footers"
+        print(f"\t{targets}")
+        metadata_df = self._load_nrows(self.metadata_ds)
+        row_diff = files_df - metadata_df
+        failed_frags = row_diff.loc[row_diff.num_rows != 0].index.to_list()
+        passed = len(failed_frags) == 0
+        self._append_result(passed=passed, target=targets, affected_files=failed_frags, **test_info)
+
+        # check user-supplied total
+        if self.args.truth_total_rows is not None:
+            targets = "user total vs file footers"
+            print(f"\t{targets}")
+            _passed = self.args.truth_total_rows == files_df.num_rows.sum()
+            self._append_result(passed=_passed, target=targets, **test_info)
+        else:
+            _passed = True  # this test did not fail. this is only needed for the return value.
+
+        all_passed = all([passed, _passed])
+        print(f"Result: {'PASSED' if all_passed else 'FAILED'}")
+        return all_passed
+
+    def _load_nrows(self, dataset: pyarrow.dataset.Dataset, explicit_count: bool = False) -> pd.DataFrame:
+        """Load the number of rows in each file in the dataset.
+
+        Parameters
+        ----------
+            dataset : pyarrow.dataset.Dataset
+                The dataset from which to load the number of rows.
+            explicit_count : bool
+                If True, explicitly count the rows in each fragment.
+
+        Returns
+        -------
+            pd.DataFrame: A DataFrame with the number of rows per file, indexed by file path.
+        """
+        nrows_df = pd.DataFrame(
+            columns=["num_rows", "frag_path"],
+            data=[
+                (
+                    # [TODO] check cpu/ram usage to try to determine if there is a difference here
+                    frag.count_rows() if explicit_count else frag.metadata.num_rows,
+                    frag.path.removeprefix(str(self.args.input_catalog_path)).strip("/"),
+                )
+                for frag in dataset.get_fragments()
+            ],
+        )
+        nrows_df = nrows_df.set_index("frag_path").sort_index()
+        return nrows_df
+
+    def test_rowgroup_stats(self, *, write_mode: str = "a") -> bool:
+        """Test that statistics were recorded for all row groups. Add a `Result` to `results`.
+
+        If the test passes, `distributions_df` is written to file.
+
+        Parameters
+        ----------
+            write_mode : str
+                Mode to be used when writing the output file.
+
+        Returns
+        -------
+            bool: True if the test passes, else False.
+        """
+        test = "rowgroup stats"
+        description = "Test that statstistics were recorded for all row groups."
+        target = "_metadata"
+        test_info = dict(test=test, description=description, target=target)
+        print(f"\nStarting: {description}")
+
+        common_truth_schema = self.truth_schema_plus_common_metadata()
+        self._distributions_df = None  # start fresh
+        try:
+            assert set(self.distributions_df.index) == set(common_truth_schema.names)
+        except AssertionError:
+            passed = False
+        else:
+            passed = True
+        self._append_result(passed=passed, **test_info)
+        print(f"Result: {'PASSED' if passed else 'FAILED'}")
+
+        if passed:
+            fout = self.args.output_path / self.args.output_distributions_filename
+            fout.parent.mkdir(exist_ok=True, parents=True)
+            header = False if (write_mode == "a" and fout.is_file()) else True
+            self.distributions_df.to_csv(fout, mode=write_mode, header=header, index=True)
+            print(f"Distributions written to {fout}")
+
+        return passed
+
+    @property
+    def distributions_df(self) -> pd.DataFrame:
+        """Distributions (min and max) for each column in the catalog.
+
+        Returns
+        -------
+            pd.DataFrame: A DataFrame with 'minimum' and 'maximum' indexed by column name.
+
+        Raises
+        ------
+            pyarrow.ArrowTypeError: If a schema mismatch is encountered while gathering statistics.
+            AssertionError: If the gathered statistics do not contain all expected columns.
+        """
+        if self._distributions_df is not None:
+            return self._distributions_df
+
+        print("Gathering distributions (min/max) for fields.")
+        common_truth_schema = self.truth_schema_plus_common_metadata()
+
+        try:
+            rowgrp_stats = [
+                rg.statistics for frag in self.metadata_ds.get_fragments() for rg in frag.row_groups
+            ]
+        except pyarrow.ArrowTypeError as exc:
+            msg = "Distributions failed due to mismatched schemas. Run 'test_schemas' to find problematic files."
+            raise pyarrow.ArrowTypeError(msg) from exc
+
+        dist = pd.json_normalize(rowgrp_stats)
+
+        # if dist doesn't contain all expected columns, fail now
+        msg = "Statistics not found"
+        assert set([c.split(".")[0] for c in dist.columns]) == set(common_truth_schema.names), msg
+
+        min_ = dist[[f"{c}.min" for c in common_truth_schema.names]].min()
+        min_ = min_.rename(index={name: name.removesuffix(".min") for name in min_.index})
+
+        max_ = dist[[f"{c}.max" for c in common_truth_schema.names]].max()
+        max_ = max_.rename(index={name: name.removesuffix(".max") for name in max_.index})
+
+        self._distributions_df = pd.DataFrame({"minimum": min_, "maximum": max_}).rename_axis(index="field")
+        return self._distributions_df
+
+    def test_schemas(self) -> bool:
+        """Test the equality of schemas and their metadata. Add `Result`s to `results`.
+
+        This method performs up to four tests:
+        1. Schema metadata includes a correct pandas schema.
+        2. _common_metadata matches user-supplied `args.truth_schema` (schema and metadata), if provided.
+        3. _metadata matches Verifier `truth_schema` (schema and metadata).
+        4. File footers match Verifier `truth_schema` (schema and metadata).
+
+        Returns
+        -------
+            bool: True if all tests pass, else False.
+        """
+        test, testmd = "schema", "schema metadata"
+        test_info = dict(test=test, description="Test that schemas are equal.")
+        testmd_info = dict(test=testmd, description="Test that schema metadata is equal.")
+        print(f"\nStarting: {test_info['description']}")
+
+        passed_cm = self._test_schema__common_metadata(test_info, testmd_info)
+        passed_md = self._test_schema__metadata(test_info, testmd_info)
+        passed_ff = self._test_schema_file_footers(test_info, testmd_info)
+
+        all_passed = all([passed_cm, passed_md, passed_ff])
+        print(f"Result: {'PASSED' if all_passed else 'FAILED'}")
+        return all_passed
+
+    def _test_schema__common_metadata(self, test_info: dict, testmd_info: dict) -> bool:
+        """Test _common_metadata schema and metadata against the truth schema.
+
+        This method performs up to two tests:
+        1. Schema metadata includes a correct pandas schema.
+        2. _common_metadata matches user-supplied `args.truth_schema` (schema and metadata), if provided.
+
+        Parameters
+        ----------
+            test_info : dict
+                Information related to the schema test.
+            testmd_info : dict
+                Information related to the metadata test.
+
+        Returns
+        -------
+            bool: True if all tests pass, else False.
+        """
+        pandas_passed = self._test_schema__common_metadata_pandas()
+
+        if self.truth_src == "_common_metadata":
+            # no input schema provided => _common_metadata is being used as truth, so skip the rest
+            return pandas_passed
+
+        # an input schema was provided as truth, so we need to test _common_metadata against it
+        targets = f"_common_metadata vs {self.truth_src}"
+        print(f"\t{targets}")
+        common_truth_schema = self.truth_schema_plus_common_metadata()
+
+        # check schema and metadata separately because we want to report the results separately
+        passed = self.common_ds.schema.equals(common_truth_schema, check_metadata=False)
+        self._append_result(passed=passed, target=targets, **test_info)
+        passedmd = self.common_ds.schema.metadata == common_truth_schema.metadata
+        self._append_result(passed=passedmd, target=targets, **testmd_info)
+
+        return all([pandas_passed, passed, passedmd])
+
+    def _test_schema__common_metadata_pandas(self) -> bool:
+        """Test that the pandas metadata in _common_metadata matches the actual field names
+        and types in `truth_schema`.
+
+        Returns
+        -------
+            bool: True if the pandas metadata matches the expected schema and index columns, else False.
+        """
+        test = "schema metadata"
+        description = "Test that pandas metadata contains correct field names and types."
+        target = "b'pandas' in _common_metadata"
+        test_info = dict(test=test, description=description, target=target)
+        print(f"\t{target}")
+
+        common_truth_schema = self.truth_schema_plus_common_metadata()
+        base_schema = pyarrow.schema([pyarrow.field(fld.name, fld.type) for fld in common_truth_schema])
+        pandas_md = common_truth_schema.pandas_metadata
+        pfields = [
+            pyarrow.field(pcol["name"], pyarrow.from_numpy_dtype(pcol["pandas_type"]))
+            for pcol in pandas_md["columns"]
+        ]
+        pandas_schema = pyarrow.schema(pfields)
+
+        passed = base_schema.equals(pandas_schema) and (pandas_md["index_columns"] == ["_hipscat_index"])
+        self._append_result(passed=passed, **test_info)
+        return passed
+
+    def _test_schema__metadata(self, test_info: dict, testmd_info: dict) -> bool:
+        """Test _metadata schema and metadata against the truth schema.
+
+        Parameters
+        ----------
+            test_info : dict
+                Information related to the schema test.
+            testmd_info : dict
+                Information related to the metadata test.
+
+        Returns
+        -------
+            bool: True if both schema and metadata match the truth source, else False.
+        """
+        targets = f"_metadata vs {self.truth_src}"
+        print(f"\t{targets}")
+        common_truth_schema = self.truth_schema_plus_common_metadata()
+
+        # check schema and metadata separately because we want to report the results separately
+        passed = self.metadata_ds.schema.equals(common_truth_schema, check_metadata=False)
+        self._append_result(passed=passed, target=targets, **test_info)
+        passedmd = self.metadata_ds.schema.metadata == common_truth_schema.metadata
+        self._append_result(passed=passedmd, target=targets, **testmd_info)
+
+        return all([passed, passedmd])
+
+    def _test_schema_file_footers(self, test_info: dict, testmd_info: dict) -> bool:
+        """Test the file footers schema and metadata against the truth schema.
+
+        Parameters
+        ----------
+            test_info : dict
+                Information related to the test results for schema comparison.
+            testmd_info : dict
+                Information related to the test results for metadata comparison.
+
+        Returns
+        -------
+            bool: True if all schema and metadata tests pass, else False.
+        """
+        targets = f"file footers vs {self.truth_src}"
+        print(f"\t{targets}")
+        common_truth_schema = self.truth_schema_plus_common_metadata()
+
+        affected_files, affectedmd_files = [], []
+        for frag in self.files_ds.get_fragments():
+            frag_path = str(Path(frag.path).relative_to(self.args.input_catalog_path))
+            # check schema and metadata separately because we want to report the results separately
+            if not frag.physical_schema.equals(common_truth_schema, check_metadata=False):
+                affected_files.append(frag_path)
+            if not frag.physical_schema.metadata == common_truth_schema.metadata:
+                affectedmd_files.append(frag_path)
+
+        passed = len(affected_files) == 0
+        self._append_result(passed=passed, target=targets, affected_files=affected_files, **test_info)
+        passedmd = len(affectedmd_files) == 0
+        self._append_result(passed=passedmd, target=targets, affected_files=affectedmd_files, **testmd_info)
+
+        return all([passed, passedmd])
+
+    def _append_result(
+        self,
+        *,
+        test: str,
+        target: str,
+        description: str,
+        passed: bool,
+        affected_files: list[str] | None = None,
+    ):
+        """Create a `Result` and append it to `self.results`."""
+        self.results.append(
+            Result(
+                datetime=now(),
+                passed=passed,
+                test=test,
+                target=target,
+                description=description,
+                affected_files=affected_files or [],
+            )
+        )
+
+    def write_results(self, *, write_mode: str = "a") -> None:
+        """Write the verification results to a file.
+
+        Parameters
+        ----------
+            write_mode : str
+                Mode to be used when writing output file.
+        """
+        fout = self.args.output_path / self.args.output_report_filename
+        fout.parent.mkdir(exist_ok=True, parents=True)
+        header = False if (write_mode == "a" and fout.is_file()) else True
+        self.results_df.to_csv(fout, mode=write_mode, header=header, index=False)
+        print(f"\nVerifier results written to {fout}")
diff --git a/tests/hipscat_import/conftest.py b/tests/hipscat_import/conftest.py
index 1cd8cbf2..6747b874 100644
--- a/tests/hipscat_import/conftest.py
+++ b/tests/hipscat_import/conftest.py
@@ -10,6 +10,8 @@
 import pytest
 from hipscat import pixel_math
 
+from tests.hipscat_import.verification.fixture import VerifierFixture
+
 # pylint: disable=missing-function-docstring, redefined-outer-name
 
 
@@ -300,3 +302,52 @@ def assert_parquet_file_index(file_name, expected_values):
         npt.assert_array_equal(values, expected_values)
 
     return assert_parquet_file_index
+
+
+@pytest.fixture
+def malformed_catalog_dirs(test_data_dir):
+    base_dir = test_data_dir / "malformed_catalogs"
+    catalog_dirs = {dr.name: dr for dr in base_dir.iterdir() if dr.is_dir()}
+    # valid_truth dir contains a README pointing to the valid catalog used to generate malformed ones
+    # resolve the path
+    catalog_dirs["valid_truth"] = test_data_dir / (catalog_dirs["valid_truth"] / "README").read_text()
+    return catalog_dirs
+
+
+@pytest.fixture(params=["valid_truth", "wrong_files"])
+def verifier_for_file_sets(request, malformed_catalog_dirs, tmp_path):
+    return VerifierFixture.from_param(request.param, malformed_catalog_dirs, tmp_path)
+
+
+@pytest.fixture(params=["valid_truth", "no_rowgroup_stats"])
+def verifier_for_is_valid_catalog(request, malformed_catalog_dirs, tmp_path):
+    return VerifierFixture.from_param(request.param, malformed_catalog_dirs, tmp_path)
+
+
+@pytest.fixture(params=["valid_truth", "wrong_rows"])
+def verifier_for_num_rows(request, malformed_catalog_dirs, tmp_path):
+    return VerifierFixture.from_param(request.param, malformed_catalog_dirs, tmp_path)
+
+
+@pytest.fixture(params=["valid_truth", "no_rowgroup_stats"])
+def verifier_for_rowgroup_stats(request, malformed_catalog_dirs, tmp_path):
+    return VerifierFixture.from_param(request.param, malformed_catalog_dirs, tmp_path)
+
+
+@pytest.fixture(params=["valid_truth", "no_rowgroup_stats"])
+def verifier_for_runner(request, malformed_catalog_dirs, tmp_path):
+    return VerifierFixture.from_param(request.param, malformed_catalog_dirs, tmp_path)
+
+
+@pytest.fixture(
+    params=[
+        "valid_truth",
+        "schema",
+        "schema_with_md_truth",
+        "schema_with_cmd_truth",
+        "schema_with_import_truth",
+        "schema_with_no_truth",
+    ]
+)
+def verifier_for_schemas(request, malformed_catalog_dirs, tmp_path):
+    return VerifierFixture.from_param(request.param, malformed_catalog_dirs, tmp_path)
diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.extra_column.parquet b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.extra_column.parquet
new file mode 100644
index 00000000..f30ed667
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.extra_column.parquet differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.missing_column.parquet b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.missing_column.parquet
new file mode 100644
index 00000000..d793a2cd
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.missing_column.parquet differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.no_metadata.parquet b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.no_metadata.parquet
new file mode 100644
index 00000000..08b1a375
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.no_metadata.parquet differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.parquet b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.parquet
new file mode 100644
index 00000000..e0cb8d94
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.parquet differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.wrong_dtypes.parquet b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.wrong_dtypes.parquet
new file mode 100644
index 00000000..2237419d
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.wrong_dtypes.parquet differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/_common_metadata b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/_common_metadata
new file mode 100644
index 00000000..a72be7f8
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/_common_metadata differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/_common_metadata.import b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/_common_metadata.import
new file mode 100644
index 00000000..dfc011fa
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/_common_metadata.import differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/_metadata b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/_metadata
new file mode 100644
index 00000000..fce59f1a
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/_metadata differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/Norder=0/Dir=0/Npix=11.parquet b/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/Norder=0/Dir=0/Npix=11.parquet
new file mode 100644
index 00000000..11e599de
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/Norder=0/Dir=0/Npix=11.parquet differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/_common_metadata b/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/_common_metadata
new file mode 100644
index 00000000..4cf7a744
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/_common_metadata differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/_metadata b/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/_metadata
new file mode 100644
index 00000000..925d3f67
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/_metadata differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/valid_truth/README b/tests/hipscat_import/data/malformed_catalogs/valid_truth/README
new file mode 100644
index 00000000..8dd3ea47
--- /dev/null
+++ b/tests/hipscat_import/data/malformed_catalogs/valid_truth/README
@@ -0,0 +1 @@
+small_sky_object_catalog
\ No newline at end of file
diff --git a/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.extra_file.parquet b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.extra_file.parquet
new file mode 100644
index 00000000..94585fd9
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.extra_file.parquet differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.extra_rows.parquet b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.extra_rows.parquet
new file mode 100644
index 00000000..a51234ca
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.extra_rows.parquet differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.parquet b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.parquet
new file mode 100644
index 00000000..e0cb8d94
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.parquet differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_common_metadata b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_common_metadata
new file mode 100644
index 00000000..4cf7a744
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_common_metadata differ
diff --git a/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_metadata b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_metadata
new file mode 100644
index 00000000..42c25a75
Binary files /dev/null and b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_metadata differ
diff --git a/tests/hipscat_import/verification/fixture.py b/tests/hipscat_import/verification/fixture.py
new file mode 100644
index 00000000..3a8415de
--- /dev/null
+++ b/tests/hipscat_import/verification/fixture.py
@@ -0,0 +1,115 @@
+"""Run pass/fail tests and generate verification report of existing hipscat table."""
+
+from pathlib import Path
+
+import attrs
+import yaml
+
+from hipscat_import.verification.arguments import VerificationArguments
+from hipscat_import.verification.run_verification import Verifier
+
+
+@attrs.define
+class VerifierFixture:
+    """Class to generate pytest fixtures for verification tests. Instantiate using the 'from_param' method."""
+
+    test_targets: dict[str, list | dict] = attrs.field(validator=attrs.validators.instance_of(dict))
+    """Dictionary mapping test names to targets."""
+    verifier: Verifier = attrs.field(validator=attrs.validators.instance_of(Verifier))
+    """Verifier instance that the fixture will use to run verification tests."""
+    assert_passed: bool | dict = attrs.field(validator=attrs.validators.instance_of((bool, dict)))
+    """Expected result(s) of the test(s) this verifier will run."""
+
+    @classmethod
+    def from_param(
+        cls, fixture_param: str, malformed_catalog_dirs: dict[str, Path], tmp_path: Path
+    ) -> "VerifierFixture":
+        """Create a VerifierFixture from the given fixture parameter.
+
+        Fixture definitions, including the expected test outcomes, are defined in fixture_defs.yaml.
+
+        Parameters
+        ----------
+            fixture_param : str
+                The fixture parameter key to look up fixture definitions.
+            malformed_catalog_dirs : dict[str, Path]
+                A mapping of malformed test dataset names to their directories.
+            tmp_path : Path
+                A temporary path for output.
+
+        Returns:
+            VerifierFixture: An instance of VerifierFixture configured with the specified parameters.
+        """
+        with open(Path(__file__).parent / "fixture_defs.yaml", "r") as fin:
+            fixture_defs = yaml.safe_load(fin)
+        fixture_def = fixture_defs[fixture_param]
+
+        truth_schema = fixture_def.get("truth_schema")
+        if truth_schema is not None:
+            truth_schema = malformed_catalog_dirs[truth_schema.split("/")[0]] / truth_schema.split("/")[1]
+
+        args = VerificationArguments(
+            input_catalog_path=malformed_catalog_dirs[fixture_def["input_dir"]],
+            output_path=tmp_path,
+            truth_schema=truth_schema,
+            truth_total_rows=fixture_def.get("truth_total_rows"),
+        )
+
+        fixture = cls(
+            test_targets=fixture_defs["test_targets"],
+            verifier=Verifier.from_args(args),
+            assert_passed=fixture_def["assert_passed"],
+        )
+        return fixture
+
+    @staticmethod
+    def unpack_assert_passed(
+        assert_passed: bool | dict, *, targets: list | None = None
+    ) -> tuple[bool, list] | dict:
+        """Unpack assert_passed and return a tuple or dictionary based on the provided targets.
+
+        Parameters
+        ----------
+            assert_passed : bool, or dict
+                A boolean indicating pass/fail status or a dictionary with target-specific statuses.
+            targets list, or None
+                A list of targets that assert_passed should apply to. If None, the return type is a
+                tuple with a bool indicating whether the test is expected to pass and a list of
+                parquet file suffixes that are expected to fail. Otherwise, the return type is a dict
+                with a key for each target and values indicating pass/fail for the given target.
+
+        Returns
+        -------
+            tuple[bool, list] | dict:
+                - If assert_passed is a boolean:
+                    - If targets is None, returns a tuple (assert_passed, []).
+                    - Else, returns a dict of {target: assert_passed}.
+                - If assert_passed is a dictionary:
+                    - If targets is None, assert_passed is expected to contain a single item with
+                      key=False and value=list of file suffixes that should have failed. The item
+                      is returned as a tuple.
+                    - Else, assert_passed is expected to have a key for every target. The
+                      assert_passed dict is returned.
+
+        Raises
+        ------
+            AssertionError: If assert_passed is a dict but it does not have the expected key(s).
+        """
+
+        if isinstance(assert_passed, bool):
+            if targets is None:
+                return assert_passed, []
+            return {target: assert_passed for target in targets}
+
+        # assert_passed is a dict
+
+        if targets is None:
+            # Expecting a single item with key=False, value=list of file suffixes that should have failed.
+            msg = "Unexpected key. There is probably a bug in the fixture definition."
+            assert set(assert_passed) == {False}, msg
+            return False, assert_passed[False]
+
+        # Expecting one key per target
+        msg = "Unexpected set of targets. There is probably a bug in the fixture definition."
+        assert set(assert_passed) == set(targets), msg
+        return assert_passed
diff --git a/tests/hipscat_import/verification/fixture_defs.yaml b/tests/hipscat_import/verification/fixture_defs.yaml
new file mode 100644
index 00000000..c4333e91
--- /dev/null
+++ b/tests/hipscat_import/verification/fixture_defs.yaml
@@ -0,0 +1,156 @@
+# region ---- Tests and their targets
+# fixture's 'assert_passed' will be coerced to a dict indexed by test and/or target.
+test_targets:
+  num_rows:
+    - _metadata
+    - user total
+  schema:
+    schema:
+      - _common_metadata
+      - _metadata
+      - file footers
+    'schema metadata':
+      - "b'pandas' in _common_metadata"
+      - _common_metadata
+      - _metadata
+      - file footers
+# endregion
+# region ---- Fixture params and their definitions
+# valid_truth should pass all tests
+valid_truth:
+  input_dir: valid_truth
+  truth_schema: valid_truth/_common_metadata
+  truth_total_rows: 131
+  assert_passed: True
+# no_rowgroup_stats is used for test_rowgroup_stats, test_is_valid_catalog, and verification_runner
+no_rowgroup_stats:
+  input_dir: no_rowgroup_stats
+  assert_passed: False
+# schema* is used for test_schemas
+schema:
+  # Case: test bad_schemas catalog given valid_truth schema as truth_schema
+  input_dir: bad_schemas
+  truth_schema: valid_truth/_common_metadata
+  assert_passed:
+    schema:
+      _common_metadata: False  # _common_metadata has wrong dtypes
+      _metadata: True
+      file footers:
+        False:
+          - .extra_column.parquet
+          - .missing_column.parquet
+          - .wrong_dtypes.parquet
+    'schema metadata':
+      "b'pandas' in _common_metadata": True
+      _common_metadata: True
+      _metadata: False  # _metadata is missing b'pandas' metadata
+      file footers:
+        False:
+          - .no_metadata.parquet
+schema_with_cmd_truth:
+  # Case: test bad_schemas catalog given a truth_schema that has the wrong dtypes
+  input_dir: bad_schemas
+  truth_schema: bad_schemas/_common_metadata
+  assert_passed:
+    schema:
+      _common_metadata: True
+      _metadata: False  # truth_schema has wrong dtypes
+      file footers:
+        False:
+          - .extra_column.parquet
+          - .missing_column.parquet
+          - .no_metadata.parquet
+          - .parquet
+    'schema metadata':
+      "b'pandas' in _common_metadata": False  # b'pandas' dtypes != truth_schema dtypes
+      _common_metadata: True
+      _metadata: False  # _metadata is missing b'pandas' metadata
+      file footers:
+        False:
+          - .no_metadata.parquet
+schema_with_import_truth:
+  # Case: Test bad_schemas catalog given a truth_schema with custom metadata that should be preserved, but
+  #       missing hipscat fields and b'pandas' metadata. This schema could have been used during catalog import.
+  input_dir: bad_schemas
+  truth_schema: bad_schemas/_common_metadata.import
+  assert_passed:
+    schema:
+      _common_metadata: False  # _common_metadata has wrong dtypes
+      _metadata: True
+      file footers:
+        False:
+          - .extra_column.parquet
+          - .missing_column.parquet
+          - .wrong_dtypes.parquet
+    'schema metadata':
+      "b'pandas' in _common_metadata": True
+      _common_metadata: False  # _common_metadata is missing the custom metadata
+      _metadata: False  # _metadata is missing all metadata
+      file footers:
+        False:
+          # Every files fails because the custom metadata is missing.
+          - .extra_column.parquet
+          - .missing_column.parquet
+          - .no_metadata.parquet
+          - .parquet
+          - .wrong_dtypes.parquet
+schema_with_no_truth:
+  # Case: Test bad_schemas catalog given no truth_schema
+  input_dir: bad_schemas
+  truth_schema: null  # _common_metadata will be used as the source of truth
+  assert_passed:
+    schema:
+      _common_metadata: null  # this test should not run
+      _metadata: False  # truth_schema has wrong dtypes
+      file footers:
+        False:
+          - .extra_column.parquet
+          - .missing_column.parquet
+          - .no_metadata.parquet
+          - .parquet
+    'schema metadata':
+      "b'pandas' in _common_metadata": False  # b'pandas' dtypes != truth_schema dtypes
+      _common_metadata: null  # this test should not run
+      _metadata: False  # _metadata is missing b'pandas' metadata
+      file footers:
+        False:
+          - .no_metadata.parquet
+schema_with_md_truth:
+  # Case: Test bad_schemas catalog given a truth_schema with no metadata
+  input_dir: bad_schemas
+  truth_schema: bad_schemas/_metadata
+  assert_passed:
+    schema:
+      _common_metadata: False  # _common_metadata has wrong dtypes
+      _metadata: True
+      file footers:
+        False:
+          - .extra_column.parquet
+          - .missing_column.parquet
+          - .wrong_dtypes.parquet
+    'schema metadata':
+      "b'pandas' in _common_metadata": True
+      _common_metadata: True
+      _metadata: False  # _metadata is missing b'pandas' metadata
+      file footers:
+        False:
+          - .no_metadata.parquet
+# wrong_files is used for test_file_sets
+wrong_files:
+  input_dir: wrong_files_and_rows
+  assert_passed:
+    False:
+      - .missing_file.parquet
+      - .extra_file.parquet
+# wrong_rows is used for test_num_rows
+wrong_rows:
+  input_dir: wrong_files_and_rows
+  truth_total_rows: 131
+  assert_passed:
+    _metadata:
+      False:
+        - .missing_file.parquet
+        - .extra_file.parquet
+        - .extra_rows.parquet
+    'user total': False
+# endregion
diff --git a/tests/hipscat_import/verification/generate_malformed_catalogs.py b/tests/hipscat_import/verification/generate_malformed_catalogs.py
new file mode 100644
index 00000000..5809fd89
--- /dev/null
+++ b/tests/hipscat_import/verification/generate_malformed_catalogs.py
@@ -0,0 +1,229 @@
+import random
+import shutil
+from pathlib import Path
+
+import attrs
+import pyarrow
+import pyarrow.dataset
+import pyarrow.parquet
+
+DATA_DIR = Path(__file__).parent.parent.parent.parent / "tests/hipscat_import/data"
+VALID_CATALOG_DIR = DATA_DIR / "small_sky_object_catalog"
+MALFORMED_CATALOGS_DIR = DATA_DIR / "malformed_catalogs"
+
+
+def run(
+    valid_catalog_dir: Path = VALID_CATALOG_DIR, malformed_catalogs_dir: Path = MALFORMED_CATALOGS_DIR
+) -> None:
+    """Generate malformed catalogs to be used as test data for verification.
+    This only needs to be run once unless/until it is desirable to regenerate the dataset.
+    """
+    Generate.run(valid_catalog_dir=valid_catalog_dir, malformed_catalogs_dir=malformed_catalogs_dir)
+
+
+@attrs.define
+class ValidBase:
+    dataset: pyarrow.dataset.Dataset = attrs.field()
+    frag: pyarrow.dataset.FileFragment = attrs.field()
+    tbl: pyarrow.Table = attrs.field()
+    schema: pyarrow.Schema = attrs.field()
+    valid_catalog_dir: Path = attrs.field()
+    malformed_catalogs_dir: Path = attrs.field()
+    insert_dir: str = attrs.field(factory=str)
+
+    @classmethod
+    def from_dirs(cls, valid_catalog_dir: Path, malformed_catalogs_dir: Path) -> "ValidBase":
+        valid_ds = pyarrow.dataset.parquet_dataset(valid_catalog_dir / "_metadata")
+        valid_frag = next(valid_ds.get_fragments())
+        valid_tbl = valid_frag.to_table()
+        return cls(
+            dataset=valid_ds,
+            frag=valid_frag,
+            tbl=valid_tbl,
+            schema=valid_tbl.schema,
+            valid_catalog_dir=valid_catalog_dir,
+            malformed_catalogs_dir=malformed_catalogs_dir,
+        )
+
+    @property
+    def fmeta(self) -> Path:
+        return self.malformed_catalogs_dir / self.insert_dir / "_metadata"
+
+    @property
+    def fcmeta(self) -> Path:
+        return self.malformed_catalogs_dir / self.insert_dir / "_common_metadata"
+
+    @property
+    def fdata(self) -> Path:
+        frag_key = Path(self.frag.path).relative_to(self.valid_catalog_dir)
+        return self.malformed_catalogs_dir / self.insert_dir / frag_key
+
+
+@attrs.define
+class Generate:
+    def run(
+        self,
+        valid_catalog_dir: Path = VALID_CATALOG_DIR,
+        malformed_catalogs_dir: Path = MALFORMED_CATALOGS_DIR,
+    ) -> None:
+        """Generate malformed catalogs to be used as test data for verification.
+        This only needs to be run once unless/until it is desirable to regenerate the dataset.
+        """
+        if malformed_catalogs_dir.is_dir():
+            print(f"Output directory exists. Remove it and try again.\n{malformed_catalogs_dir}")
+            return
+        print(f"Generating malformed catalogs from valid catalog at {valid_catalog_dir}...")
+
+        valid = ValidBase.from_dirs(
+            valid_catalog_dir=valid_catalog_dir, malformed_catalogs_dir=malformed_catalogs_dir
+        )
+        generate = Generate()
+        generate.valid_truth(valid)
+        generate.bad_schemas(valid)
+        generate.no_rowgroup_stats(valid)
+        generate.wrong_files_and_rows(valid)
+
+    def malformed(self, valid: ValidBase) -> None:
+        """Case: <TEMPLATE>"""
+        valid.insert_dir = ""
+        self._start_new_catalog(valid)
+        self._collect_and_write_metadata(valid)
+        print(f"Invalid catalog written to {valid.fmeta.parent}")
+
+    def bad_schemas(self, valid: ValidBase) -> None:
+        """Case: Files are altered in a way that affects the schema after _metadata gets written."""
+        valid.insert_dir = "bad_schemas"
+        self._start_new_catalog(valid)
+
+        # Write new files with the correct schema
+        fextra_col = valid.fdata.with_suffix(".extra_column.parquet")
+        fmissing_col = valid.fdata.with_suffix(".missing_column.parquet")
+        fno_metadata = valid.fdata.with_suffix(".no_metadata.parquet")
+        fwrong_types = valid.fdata.with_suffix(".wrong_dtypes.parquet")
+        for _fout in [fmissing_col, fextra_col, fwrong_types]:
+            pyarrow.parquet.write_table(valid.tbl, _fout)
+
+        # Write a _metadata that is correct except for missing file-level metadata
+        self._collect_and_write_metadata(valid, schema=valid.schema.remove_metadata())
+
+        # Overwrite the new files using incorrect schemas.
+        # drop the file-level metadata
+        pyarrow.parquet.write_table(valid.tbl.replace_schema_metadata(None), fno_metadata)
+        # drop a column
+        pyarrow.parquet.write_table(valid.tbl.drop_columns("dec_error"), fmissing_col)
+        # add an extra column
+        extra_col = pyarrow.array(random.sample(range(1000), len(valid.tbl)))
+        extra_col_tbl = valid.tbl.add_column(5, pyarrow.field("extra", pyarrow.int64()), extra_col)
+        pyarrow.parquet.write_table(extra_col_tbl, fextra_col)
+        # change some types
+        wrong_dtypes = [
+            fld if not fld.name.startswith("ra") else fld.with_type(pyarrow.float16()) for fld in valid.schema
+        ]
+        wrong_dtypes_schema = pyarrow.schema(wrong_dtypes).with_metadata(valid.schema.metadata)
+        pyarrow.parquet.write_table(valid.tbl.cast(wrong_dtypes_schema), fwrong_types)
+
+        # Write a _common_metadata with the wrong dtypes.
+        pyarrow.parquet.write_metadata(schema=wrong_dtypes_schema, where=valid.fcmeta)
+
+        # Write a _common_metadata with custom metadata and no hipscat columns.
+        # This mimics a schema that could have been passed as 'use_schema_file' upon import.
+        fcustom_md = valid.fcmeta.with_suffix(".import")
+        hipscat_cols = ["_hipscat_index", "Norder", "Dir", "Npix"]
+        import_fields = [fld for fld in valid.schema if not fld.name in hipscat_cols]
+        import_schema = pyarrow.schema(import_fields).with_metadata({b"custom_key": b"custom_value"})
+        pyarrow.parquet.write_metadata(schema=import_schema, where=fcustom_md)
+
+        print(f"Invalid catalog written to {valid.fmeta.parent}")
+
+    def no_rowgroup_stats(self, valid: ValidBase) -> None:
+        """Case: ."""
+        valid.insert_dir = "no_rowgroup_stats"
+        self._start_new_catalog(valid)
+        # drop the row group statistics
+        pyarrow.parquet.write_table(valid.tbl, valid.fdata, write_statistics=False)
+        self._collect_and_write_metadata(valid)
+        print(f"Invalid catalog written to {valid.fmeta.parent}")
+
+    def valid_truth(self, valid: ValidBase) -> None:
+        """Case: This is the valid catalog that we start with and will be used as the expected truth during testing."""
+        valid.insert_dir = "valid_truth"
+        base_dir = valid.fmeta.parent
+        base_dir.mkdir(parents=True)
+
+        # write a README pointing to the valid_catalog_dir used to generate malformed datasets
+        with open(base_dir / "README", "w") as fout:
+            fout.writelines(str(valid.valid_catalog_dir.relative_to(DATA_DIR)))
+
+        print(f"Valid, truth README written to {base_dir}")
+
+    def wrong_files_and_rows(self, valid: ValidBase) -> None:
+        """Case: ."""
+        valid.insert_dir = "wrong_files_and_rows"
+        self._start_new_catalog(valid)
+
+        fmissing_file = valid.fdata.with_suffix(".missing_file.parquet")
+        fextra_file = valid.fdata.with_suffix(".extra_file.parquet")
+        fextra_rows = valid.fdata.with_suffix(".extra_rows.parquet")
+
+        pyarrow.parquet.write_table(valid.tbl, fmissing_file)
+        pyarrow.parquet.write_table(valid.tbl, fextra_rows)
+        self._collect_and_write_metadata(valid)
+
+        fmissing_file.unlink()
+        pyarrow.parquet.write_table(valid.tbl, fextra_file)
+        pyarrow.parquet.write_table(self._tbl_with_extra_rows(valid), fextra_rows)
+
+        print(f"Invalid catalog written to {valid.fmeta.parent}")
+
+    def _tbl_with_extra_rows(self, valid: ValidBase) -> pyarrow.Table:
+        """Generate a table with extra rows."""
+        # generate new rows
+        rng = range(len(valid.tbl))
+        nrows, new_rows = 2, {}
+        for col in valid.tbl.column_names:
+            if col not in ("_hipscat_index", "id"):
+                # just take a random sample
+                new_rows[col] = valid.tbl.column(col).take(random.sample(rng, nrows))
+            else:
+                # increment the max value to avoid duplicates
+                max_id = valid.tbl.column(col).sort()[-1].as_py()
+                new_rows[col] = [i + max_id for i in range(1, nrows + 1)]
+
+        # add the rows to the table
+        new_tbl = pyarrow.concat_tables([valid.tbl, pyarrow.Table.from_pydict(new_rows, schema=valid.schema)])
+        return new_tbl
+
+    @staticmethod
+    def _start_new_catalog(valid: ValidBase, with_ancillaries: bool = False) -> None:
+        # Start a new catalog by creating the directory and copying in valid files.
+        valid.fdata.parent.mkdir(parents=True)
+        shutil.copy(valid.frag.path, valid.fdata)
+
+        root_files = valid.valid_catalog_dir.iterdir()
+        if not with_ancillaries:
+            root_files = [fin for fin in root_files if fin.name.endswith("metadata")]
+        for fin in root_files:
+            if fin.is_file():
+                shutil.copy(fin, valid.malformed_catalogs_dir / valid.insert_dir / fin.name)
+
+    @staticmethod
+    def _collect_and_write_metadata(valid: ValidBase, schema: pyarrow.Schema | None = None) -> None:
+        base_dir = valid.fmeta.parent
+        schema = schema or valid.schema
+        ignore_prefixes = [
+            ".",
+            "_",
+            "catalog_info.json",
+            "partition_info.csv",
+            "point_map.fits",
+            "provenance_info.json",
+        ]
+        dataset = pyarrow.dataset.dataset(base_dir, ignore_prefixes=ignore_prefixes)
+        metadata_collector = []
+        for frag in dataset.get_fragments():
+            frag.ensure_complete_metadata()
+            frag.metadata.set_file_path(str(Path(frag.path).relative_to(base_dir)))
+            metadata_collector.append(frag.metadata)
+        pyarrow.parquet.write_metadata(
+            schema=schema, where=valid.fmeta, metadata_collector=metadata_collector
+        )
diff --git a/tests/hipscat_import/verification/test_run_verification.py b/tests/hipscat_import/verification/test_run_verification.py
index c672af7f..85666374 100644
--- a/tests/hipscat_import/verification/test_run_verification.py
+++ b/tests/hipscat_import/verification/test_run_verification.py
@@ -1,7 +1,11 @@
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
 import pytest
 
 import hipscat_import.verification.run_verification as runner
-from hipscat_import.verification.arguments import VerificationArguments
+from tests.hipscat_import.verification.fixture import VerifierFixture
 
 
 def test_bad_args():
@@ -14,12 +18,183 @@ def test_bad_args():
         runner.run(args)
 
 
-def test_no_implementation(tmp_path, small_sky_object_catalog):
-    """Womp womp. Test that we don't have a verification pipeline implemented"""
-    args = VerificationArguments(
-        input_catalog_path=small_sky_object_catalog,
-        output_path=tmp_path,
-        output_artifact_name="small_sky_object_verification_report",
+def test_basic_run(verifier_for_runner):
+    """Verification runner should execute all tests and write reports to file.
+    Tests should pass with valid catalogs and fail with malformed catalogs."""
+    args = verifier_for_runner.verifier.args
+    # start fresh. delete any existing output files.
+    filenames = [args.output_report_filename, args.output_distributions_filename]
+    [(args.output_path / filename).unlink(missing_ok=True) for filename in filenames]
+
+    # run the tests
+    verifier = runner.run(args)
+
+    # Show that the verification passed or failed as expected
+    tests_passed = verifier.results_df.passed.all()
+    assert tests_passed == verifier_for_runner.assert_passed, "runner tests"
+
+    # Show that the output files were or were not written as expected
+    all_output_written = True
+    try:
+        _check_file_output(verifier)
+    except AssertionError:
+        all_output_written = False
+    assert all_output_written == verifier_for_runner.assert_passed, "runner output"
+
+
+def _check_file_output(verifier: runner.Verifier) -> None:
+    """Verifier should have written two reports to file."""
+    # verifier.record_results() writes this file
+    freport = verifier.args.output_path / verifier.args.output_report_filename
+    assert freport.is_file(), f"File not found {freport}"
+    results = pd.read_csv(freport)
+    # the affected_files lists cause problems. just exclude them
+    cols = [c for c in results.columns if not c == "affected_files"]
+    assert results[cols].equals(verifier.results_df[cols]), "Mismatched results"
+
+    # verifier.test_rowgroup_stats() writes this file
+    fdistributions = verifier.args.output_path / verifier.args.output_distributions_filename
+    assert fdistributions.is_file(), f"File not found {fdistributions}"
+    distributions = pd.read_csv(fdistributions, index_col="field")
+    # values are floats, so use np.allclose
+    min_passed = np.allclose(distributions.minimum, verifier.distributions_df.minimum, equal_nan=True)
+    max_passed = np.allclose(distributions.maximum, verifier.distributions_df.maximum, equal_nan=True)
+    assert min_passed and max_passed, "Mismatched distributions"
+
+
+def test_test_file_sets(verifier_for_file_sets):
+    """Files on disk should match files in _metadata for catalogs that are not malformed."""
+    # run the test
+    verifier = verifier_for_file_sets.verifier
+    verifier.results = []  # ensure a fresh start
+    verifier.test_file_sets()
+
+    # check the result
+    result = verifier.results_df.squeeze()
+    _check_one_result(result, verifier_for_file_sets.assert_passed, "file_sets")
+
+
+def test_test_is_valid_catalog(verifier_for_is_valid_catalog):
+    """hipscat's is_valid_catalog should pass for valid catalogs, else fail."""
+    # run the test
+    verifier = verifier_for_is_valid_catalog.verifier
+    verifier.results = []  # ensure a fresh start
+    verifier.test_is_valid_catalog()
+
+    # check the result
+    result = verifier.results_df.squeeze()
+    _check_one_result(result, verifier_for_is_valid_catalog.assert_passed, "is_valid_catalog")
+
+
+def test_test_num_rows(verifier_for_num_rows):
+    """Row count tests should pass for catalogs that are not malformed."""
+    # run the test
+    verifier = verifier_for_num_rows.verifier
+    verifier.results = []  # ensure a fresh start
+    verifier.test_num_rows()
+
+    # check the results
+    targets = verifier_for_num_rows.test_targets["num_rows"]
+    _check_results(verifier_for_num_rows, targets)
+
+
+def test_test_rowgroup_stats(verifier_for_rowgroup_stats):
+    """Row group statistics should be present in _metadata for files that are not malformed."""
+    # run the test
+    verifier = verifier_for_rowgroup_stats.verifier
+    verifier.results = []  # ensure a fresh start
+    verifier.test_rowgroup_stats()
+
+    # check the result
+    result = verifier.results_df.squeeze()
+    _check_one_result(result, verifier_for_rowgroup_stats.assert_passed, test_name="rowgroup_stats")
+
+
+def test_test_schemas(verifier_for_schemas):
+    """Schemas should contain correct columns, dtypes, and metadata for catalogs that are not malformed."""
+    # run the tests
+    verifier = verifier_for_schemas.verifier
+    verifier.results = []  # ensure a fresh start
+    verifier.test_schemas()
+
+    # Two tests were run ('schema' and 'schema metadata') with several targets per test.
+    test_targets = verifier_for_schemas.test_targets["schema"]  # dict maps test -> targets
+    assert_passed = verifier_for_schemas.unpack_assert_passed(  # dict maps test -> assertion
+        verifier_for_schemas.assert_passed, targets=test_targets.keys()
     )
-    with pytest.raises(NotImplementedError, match="not yet implemented"):
-        runner.run(args)
+
+    # Check results for each test separately.
+    for test, targets in test_targets.items():
+        results = verifier.results_df.loc[verifier.results_df.test == test]
+        _check_results(verifier_for_schemas, targets, results=results, assert_passed=assert_passed[test])
+
+
+def _check_results(
+    verifier_fixture: VerifierFixture,
+    targets: list,
+    *,
+    results: pd.DataFrame | None = None,
+    assert_passed: bool | dict | None = None,
+) -> None:
+    """Check the results of verification tests for the given targets.
+
+    Parameters
+    ----------
+        verifier_fixture : VerifierFixture
+            The fixture containing the verifier and its results.
+        targets : list
+            The list of test targets to check. There should be one result per target.
+        results : pd.DataFrame or None
+            The test results to check. If None, verifier_fixture.verifier.results_df will be used.
+        assert_passed : bool, dict, or None
+            Whether the test should have passed for each target. If None,
+            verifier_fixture.assert_passed is used.
+
+    Raises
+    ------
+        AssertionError: If any results are unexpected.
+    """
+    results = verifier_fixture.verifier.results_df if results is None else results
+    assert_passed = verifier_fixture.assert_passed if assert_passed is None else assert_passed
+
+    # dict with one entry per target
+    _assert_passed = verifier_fixture.unpack_assert_passed(assert_passed, targets=targets)
+    for target, assertion in _assert_passed.items():
+        # Expecting one result per target so squeeze to a series
+        result = results.loc[results.target.str.startswith(target)].squeeze()
+        _check_one_result(result, assertion, test_name=target)
+
+
+def _check_one_result(result: pd.Series, assertion: bool | dict | None, test_name: str) -> None:
+    """Check the result of a single verification test.
+
+    Parameters
+    ----------
+        result : pd.Series
+            Test result reported by the verifier.
+        assertion : bool, or dict, or None
+            The expected outcome of the test. None indicates that the test should have been skipped.
+            A boolean indicates a simple pass/fail. A dict indicates expected failure and the
+            list of file suffixes expected in the result's affected_files field.
+        test_name : str
+            The name of the test being verified.
+
+    Raises
+    ------
+        AssertionError: If the result does not match the assertion.
+    """
+    if assertion is None:
+        # This test should have been skipped
+        msg = f"Unexpected result for: {test_name}. There is probably a bug in the code."
+        assert len(result.passed) == 0, msg
+        return
+
+    assert_passed, bad_suffixes = VerifierFixture.unpack_assert_passed(assertion)
+
+    # Show that the target passed or failed the test as expected
+    assert result.passed if assert_passed else not result.passed, test_name
+
+    # Show that all files that should have failed the test actually did, and no more.
+    # We're only trying to match file suffixes so strip the rest of the file path out of results.
+    found_suffixes = ["".join(Path(file).suffixes) for file in result.affected_files]
+    assert set(bad_suffixes) == set(found_suffixes), test_name + " affected_files"
diff --git a/tests/hipscat_import/verification/test_verification_arguments.py b/tests/hipscat_import/verification/test_verification_arguments.py
index 8ebd6c81..646af0c2 100644
--- a/tests/hipscat_import/verification/test_verification_arguments.py
+++ b/tests/hipscat_import/verification/test_verification_arguments.py
@@ -8,63 +8,33 @@
 
 def test_none():
     """No arguments provided. Should error for required args."""
-    with pytest.raises(ValueError):
+    with pytest.raises(TypeError):
         VerificationArguments()
 
 
 def test_empty_required(tmp_path):
     """*Most* required arguments are provided."""
     ## Input path is missing
-    with pytest.raises(ValueError, match="input_catalog_path"):
-        VerificationArguments(
-            output_path=tmp_path,
-            output_artifact_name="small_sky_object_verification_report",
-        )
+    with pytest.raises(TypeError, match="input_catalog_path"):
+        VerificationArguments(output_path=tmp_path)
 
 
 def test_invalid_paths(tmp_path, small_sky_object_catalog):
     """Required arguments are provided, but paths aren't found."""
     ## Prove that it works with required args
-    VerificationArguments(
-        input_catalog_path=small_sky_object_catalog,
-        output_path=tmp_path,
-        output_artifact_name="small_sky_object_verification_report",
-    )
+    VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
 
-    ## Input path is invalid catalog
-    with pytest.raises(ValueError, match="input_catalog_path not a valid catalog"):
-        VerificationArguments(
-            input_catalog_path="path",
-            output_path=f"{tmp_path}/path",
-            output_artifact_name="small_sky_object_verification_report",
-        )
+    ## Input path is not an existing directory
+    with pytest.raises(ValueError, match="input_catalog_path must be an existing directory"):
+        VerificationArguments(input_catalog_path="path", output_path=f"{tmp_path}/path")
 
 
 def test_good_paths(tmp_path, small_sky_object_catalog):
     """Required arguments are provided, and paths are found."""
     tmp_path_str = str(tmp_path)
-    args = VerificationArguments(
-        input_catalog_path=small_sky_object_catalog,
-        output_path=tmp_path,
-        output_artifact_name="small_sky_object_verification_report",
-    )
+    args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
     assert args.input_catalog_path == small_sky_object_catalog
     assert str(args.output_path) == tmp_path_str
-    assert str(args.tmp_path).startswith(tmp_path_str)
-
-
-def test_catalog_object(tmp_path, small_sky_object_catalog):
-    """Required arguments are provided, and paths are found."""
-    small_sky_catalog_object = Catalog.read_from_hipscat(catalog_path=small_sky_object_catalog)
-    tmp_path_str = str(tmp_path)
-    args = VerificationArguments(
-        input_catalog=small_sky_catalog_object,
-        output_path=tmp_path,
-        output_artifact_name="small_sky_object_verification_report",
-    )
-    assert args.input_catalog_path == small_sky_object_catalog
-    assert str(args.output_path) == tmp_path_str
-    assert str(args.tmp_path).startswith(tmp_path_str)
 
 
 @pytest.mark.timeout(5)
@@ -72,11 +42,7 @@ def test_provenance_info(small_sky_object_catalog, tmp_path):
     """Verify that provenance info includes verification-specific fields.
     NB: This is currently the last test in alpha-order, and may require additional
     time to teardown fixtures."""
-    args = VerificationArguments(
-        input_catalog_path=small_sky_object_catalog,
-        output_path=tmp_path,
-        output_artifact_name="small_sky_object_verification_report",
-    )
+    args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
 
     runtime_args = args.provenance_info()["runtime_args"]
     assert "input_catalog_path" in runtime_args