From c1dd12b492f65d05b0339433a954424c7e9ff35a Mon Sep 17 00:00:00 2001 From: Troy Raen Date: Sun, 15 Sep 2024 01:40:33 -0700 Subject: [PATCH 1/6] fixup verifier arguments --- src/hipscat_import/verification/arguments.py | 85 ++++++++++---------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/src/hipscat_import/verification/arguments.py b/src/hipscat_import/verification/arguments.py index 86c139b1..fb7bee27 100644 --- a/src/hipscat_import/verification/arguments.py +++ b/src/hipscat_import/verification/arguments.py @@ -2,50 +2,51 @@ from __future__ import annotations -from dataclasses import dataclass, field from pathlib import Path -from typing import List, Optional -from hipscat.catalog import Catalog -from hipscat.io.validation import is_valid_catalog +import attrs from upath import UPath -from hipscat_import.runtime_arguments import RuntimeArguments - - -@dataclass -class VerificationArguments(RuntimeArguments): - """Data class for holding verification arguments""" - - ## Input - input_catalog_path: str | Path | UPath | None = None - """Path to an existing catalog that will be inspected.""" - input_catalog: Optional[Catalog] = None - """In-memory representation of a catalog. If not provided, it will be loaded - from the input_catalog_path.""" - - ## Verification options - field_distribution_cols: List[str] = field(default_factory=list) - """List of fields to get the overall distribution for. e.g. ["ra", "dec"]. - Should be valid columns in the parquet files.""" - - def __post_init__(self): - self._check_arguments() - - def _check_arguments(self): - super()._check_arguments() - if not self.input_catalog_path and not self.input_catalog: - raise ValueError("input catalog is required (either input_catalog_path or input_catalog)") - if not self.input_catalog: - if not is_valid_catalog(self.input_catalog_path): - raise ValueError("input_catalog_path not a valid catalog") - self.input_catalog = Catalog.read_from_hipscat(catalog_path=self.input_catalog_path) - if not self.input_catalog_path: - self.input_catalog_path = self.input_catalog.catalog_path - +# from hipscat_import.runtime_arguments import RuntimeArguments + + +def _dir_exists(instance: VerificationArguments, attribute: attrs.Attribute, value: UPath): + """This function will be used as a validator for attributes of VerificationArguments.""" + if not value.is_dir(): + raise ValueError(f"{attribute.name} must be an existing directory") + + +def _path_exists(instance: VerificationArguments, attribute: attrs.Attribute, value: UPath): + """This function will be used as a validator for attributes of VerificationArguments.""" + if not value.exists(): + raise ValueError(f"{attribute.name} must be an existing file or directory") + + +@attrs.define(kw_only=True) +class VerificationArguments: + """Container for verification arguments.""" + + input_catalog_path: str | Path | UPath = attrs.field(converter=UPath, validator=_dir_exists) + """Path to an existing catalog that will be inspected. This must be a directory + containing the Parquet dataset and metadata sidecars.""" + output_path: str | Path | UPath = attrs.field(converter=UPath) + """Base path where output files should be written.""" + output_report_filename: str = attrs.field(factory=lambda: "verifier_results.csv") + """Filename for the verification report that will be generated.""" + output_distributions_filename: str = attrs.field(factory=lambda: "field_distributions.csv") + """Filename for the field distributions that will be calculated.""" + truth_total_rows: int | None = attrs.field(default=None) + """Total number of rows expected in this catalog.""" + truth_schema: str | Path | UPath | None = attrs.field( + default=None, + converter=attrs.converters.optional(UPath), + validator=attrs.validators.optional(_path_exists), + ) + """Path to a Parquet file or dataset containing the expected schema. + If you provided the 'use_schema_file' argument when importing the catalog, use the same value here. + If not provided, the catalog's _common_metadata file will be used as the source of truth. + """ + + # [FIXME] Connect this with RuntimeArguments.provenance_info. Even then, does this ever get written to file? def additional_runtime_provenance_info(self) -> dict: - return { - "pipeline": "verification pipeline", - "input_catalog_path": self.input_catalog_path, - "field_distribution_cols": self.field_distribution_cols, - } + return {"pipeline": "verification pipeline", **{k: str(v) for k, v in vars(self).items()}} From 6a217bf92025af2fde22bf8091cd28333cfec07b Mon Sep 17 00:00:00 2001 From: Troy Raen Date: Wed, 14 Aug 2024 01:41:28 -0600 Subject: [PATCH 2/6] add Verifier class --- .../verification/run_verification.py | 530 +++++++++++++++++- 1 file changed, 525 insertions(+), 5 deletions(-) diff --git a/src/hipscat_import/verification/run_verification.py b/src/hipscat_import/verification/run_verification.py index 2b7d5954..670a95f4 100644 --- a/src/hipscat_import/verification/run_verification.py +++ b/src/hipscat_import/verification/run_verification.py @@ -1,14 +1,534 @@ -"""Run pass/fail checks and generate verification report of existing hipscat table.""" +"""Run pass/fail tests and generate verification report of existing hipscat table.""" + +import collections +import datetime +from pathlib import Path + +import attrs +import hipscat.io.validation +import pandas as pd +import pyarrow.dataset from hipscat_import.verification.arguments import VerificationArguments -def run(args): - """Run verification pipeline.""" +def run(args: VerificationArguments, write_mode: str = "a"): + """Create a Verifier using args, run all tests, and write reports. + + Parameters + ---------- + args : VerificationArguments + Arguments to construct the Verifier. + write_mode : str, optional + Mode to be used when writing output files. + + Returns + ------- + Verifier + An instance of the Verifier class after running the verification process. + + Raises + ------ + TypeError + If 'args' is not provided or is not an instance of VerificationArguments. + """ if not args: raise TypeError("args is required and should be type VerificationArguments") if not isinstance(args, VerificationArguments): raise TypeError("args must be type VerificationArguments") - # implement everything else. - raise NotImplementedError("Verification not yet implemented.") + verifier = Verifier.from_args(args) + verifier.run(write_mode=write_mode) + + return verifier + + +Result = collections.namedtuple( + "Result", ["passed", "test", "target", "description", "affected_files", "datetime"] +) +"""Verification test result.""" + + +def now(): + """Return the current time as a string.""" + return datetime.datetime.now(datetime.timezone.utc).strftime("%Y/%m/%d %H:%M:%S %Z") + + +@attrs.define +class Verifier: + """Class for verification tests. Instantiate using the 'from_args' method.""" + + args: VerificationArguments = attrs.field() + """Arguments to use during verification.""" + files_ds: pyarrow.dataset.Dataset = attrs.field() + """Pyarrow dataset, loaded from the actual files on disk.""" + metadata_ds: pyarrow.dataset.Dataset = attrs.field() + """Pyarrow dataset, loaded from the _metadata file.""" + common_ds: pyarrow.dataset.Dataset = attrs.field() + """Pyarrow dataset, loaded from the _common_metadata file.""" + truth_schema: pyarrow.Schema = attrs.field() + """Pyarrow schema to be used as truth. This will be loaded from args.truth_schema + if provided, and then hipscat columns and metadata will be added if not already present. + If args.truth_schema not provided, the catalog's _common_metadata file will be used.""" + truth_src: str = attrs.field() + """'truth_schema' if args.truth_schema was provided, else '_common_metadata'.""" + results: list[Result] = attrs.field(factory=list) + """List of results, one for each test that has been done.""" + _distributions_df: pd.DataFrame | None = attrs.field(default=None) + + @classmethod + def from_args(cls, args: VerificationArguments) -> "Verifier": + """Create a Verifier instance from the provided arguments. + + This method initializes the Verifier by setting up the necessary datasets + and schemas based on the input arguments. + + Parameters + ---------- + args : VerificationArguments: + Arguments for the Verifier. + + Returns + ------- + Verifier: An instance of the Verifier class. + """ + # make sure the output directory exists + args.output_path.mkdir(exist_ok=True, parents=True) + + # load a dataset from the actual files on disk + files_ds = pyarrow.dataset.dataset( + args.input_catalog_path, + ignore_prefixes=[ + ".", + "_", + "catalog_info.json", + "partition_info.csv", + "point_map.fits", + "provenance_info.json", + ], + ) + + # load a dataset from the _metadata file + metadata_ds = pyarrow.dataset.parquet_dataset(f"{args.input_catalog_path}/_metadata") + + # load a dataset from the _common_metadata file + common_ds = pyarrow.dataset.parquet_dataset(f"{args.input_catalog_path}/_common_metadata") + + # load the input schema if provided, else use the _common_metadata schema + if args.truth_schema is not None: + truth_schema = pyarrow.dataset.parquet_dataset(args.truth_schema).schema + truth_src = "truth_schema" + else: + truth_schema = common_ds.schema + truth_src = "_common_metadata" + + return cls( + args=args, + files_ds=files_ds, + metadata_ds=metadata_ds, + common_ds=common_ds, + truth_schema=truth_schema, + truth_src=truth_src, + ) + + def run(self, write_mode: str = "a"): + """Run all tests and write reports.""" + self.test_file_sets() + self.test_is_valid_catalog() + self.test_num_rows() + self.test_rowgroup_stats(write_mode=write_mode) + self.test_schemas() + + self.write_results(write_mode=write_mode) + + @property + def results_df(self) -> pd.DataFrame: + """Verifier results as a dataframe.""" + return pd.DataFrame(self.results) + + def truth_schema_plus_common_metadata(self) -> pyarrow.Schema: + """Copy of truth_schema with hipscat fields and metadata added from common_ds.schema.""" + hipscat_cols = ["Norder", "Dir", "Npix", "_hipscat_index"] + new_fields = [ + self.common_ds.schema.field(fld) for fld in hipscat_cols if fld not in self.truth_schema.names + ] + + # use pandas metadata from common_ds but keep all other metadata from truth_schema + metadata = self.truth_schema.metadata or {} + metadata[b"pandas"] = self.common_ds.schema.metadata[b"pandas"] + + return pyarrow.schema(list(self.truth_schema) + new_fields).with_metadata(metadata) + + def test_file_sets(self) -> bool: + """Test that files in _metadata match files on disk. Add one Result to results. + + Returns + ------- + bool: True if the file sets match, else False. + """ + test = "file sets" + description = "Test that files in _metadata match files on disk." + test_info = dict(test=test, description=description) + print(f"\nStarting: {description}") + + targets = "_metadata vs files on disk" + base_dir = str(self.args.input_catalog_path) + files_ds_files = [f.removeprefix(base_dir).strip("/") for f in self.files_ds.files] + metadata_ds_files = [f.removeprefix(base_dir).strip("/") for f in self.metadata_ds.files] + failed_files = list(set(files_ds_files).symmetric_difference(metadata_ds_files)) + passed = len(failed_files) == 0 + self._append_result(passed=passed, target=targets, affected_files=failed_files, **test_info) + + print(f"Result: {'PASSED' if passed else 'FAILED'}") + return passed + + def test_is_valid_catalog(self) -> bool: + """Test if the provided catalog is a valid HiPSCat catalog. Add one Result to results. + + Returns + ------- + bool: True if the catalog is valid, else False. + """ + test = "is valid catalog" + target = self.args.input_catalog_path + # [FIXME] How to get the hipscat version? + description = "Test that this is a valid HiPSCat catalog using hipscat version ." + print(f"\nStarting: {description}") + + passed = hipscat.io.validation.is_valid_catalog(target, strict=True) + self._append_result(test=test, description=description, passed=passed, target=target.name) + print(f"Result: {'PASSED' if passed else 'FAILED'}") + return passed + + def test_num_rows(self) -> bool: + """Test the number of rows in the dataset. Add two Results to results. + + File footers are compared with _metadata and the user-supplied truth (if provided). + + Returns + ------- + bool: True if all checks pass, else False. + """ + test = "num rows" + description = "Test that number of rows are equal." + test_info = dict(test=test, description=description) + print(f"\nStarting: {description}") + + # get the number of rows in each file, indexed by file path. we treat this as truth. + files_df = self._load_nrows(self.files_ds, explicit_count=True) + + # check _metadata + targets = "_metadata vs file footers" + print(f"\t{targets}") + metadata_df = self._load_nrows(self.metadata_ds) + row_diff = files_df - metadata_df + failed_frags = row_diff.loc[row_diff.num_rows != 0].index.to_list() + passed = len(failed_frags) == 0 + self._append_result(passed=passed, target=targets, affected_files=failed_frags, **test_info) + + # check user-supplied total + if self.args.truth_total_rows is not None: + targets = "user total vs file footers" + print(f"\t{targets}") + _passed = self.args.truth_total_rows == files_df.num_rows.sum() + self._append_result(passed=_passed, target=targets, **test_info) + else: + _passed = True # this test did not fail. this is only needed for the return value. + + all_passed = all([passed, _passed]) + print(f"Result: {'PASSED' if all_passed else 'FAILED'}") + return all_passed + + def _load_nrows(self, dataset: pyarrow.dataset.Dataset, explicit_count: bool = False) -> pd.DataFrame: + """Load the number of rows in each file in the dataset. + + Parameters + ---------- + dataset : pyarrow.dataset.Dataset + The dataset from which to load the number of rows. + explicit_count : bool + If True, explicitly count the rows in each fragment. + + Returns + ------- + pd.DataFrame: A DataFrame with the number of rows per file, indexed by file path. + """ + nrows_df = pd.DataFrame( + columns=["num_rows", "frag_path"], + data=[ + ( + # [TODO] check cpu/ram usage to try to determine if there is a difference here + frag.count_rows() if explicit_count else frag.metadata.num_rows, + frag.path.removeprefix(str(self.args.input_catalog_path)).strip("/"), + ) + for frag in dataset.get_fragments() + ], + ) + nrows_df = nrows_df.set_index("frag_path").sort_index() + return nrows_df + + def test_rowgroup_stats(self, *, write_mode: str = "a") -> bool: + """Test that statistics were recorded for all row groups. Add a Result to results. + + If the test passes, distributions_df is written to file. + + Parameters + ---------- + write_mode : str + Mode to be used when writing the output file. + + Returns + ------- + bool: True if the test passes, else False. + """ + test = "rowgroup stats" + description = "Test that statstistics were recorded for all row groups." + target = "_metadata" + test_info = dict(test=test, description=description, target=target) + print(f"\nStarting: {description}") + + common_truth_schema = self.truth_schema_plus_common_metadata() + self._distributions_df = None # start fresh + try: + assert set(self.distributions_df.index) == set(common_truth_schema.names) + except AssertionError: + passed = False + else: + passed = True + self._append_result(passed=passed, **test_info) + print(f"Result: {'PASSED' if passed else 'FAILED'}") + + if passed: + fout = self.args.output_path / self.args.output_distributions_filename + fout.parent.mkdir(exist_ok=True, parents=True) + header = False if (write_mode == "a" and fout.is_file()) else True + self.distributions_df.to_csv(fout, mode=write_mode, header=header, index=True) + print(f"Distributions written to {fout}") + + return passed + + @property + def distributions_df(self) -> pd.DataFrame: + """Distributions (min and max) for each column in the catalog. + + Returns + ------- + pd.DataFrame: A DataFrame with 'minimum' and 'maximum' indexed by column name. + + Raises + ------ + pyarrow.ArrowTypeError: If a schema mismatch is encountered while gathering statistics. + AssertionError: If the gathered statistics do not contain all expected columns. + """ + if self._distributions_df is not None: + return self._distributions_df + + print("Gathering distributions (min/max) for fields.") + common_truth_schema = self.truth_schema_plus_common_metadata() + + try: + rowgrp_stats = [ + rg.statistics for frag in self.metadata_ds.get_fragments() for rg in frag.row_groups + ] + except pyarrow.ArrowTypeError as exc: + msg = "Distributions failed due to mismatched schemas. Run 'test_schemas' to find problematic files." + raise pyarrow.ArrowTypeError(msg) from exc + + dist = pd.json_normalize(rowgrp_stats) + + # if dist doesn't contain all expected columns, fail now + msg = "Statistics not found" + assert set([c.split(".")[0] for c in dist.columns]) == set(common_truth_schema.names), msg + + min_ = dist[[f"{c}.min" for c in common_truth_schema.names]].min() + min_ = min_.rename(index={name: name.removesuffix(".min") for name in min_.index}) + + max_ = dist[[f"{c}.max" for c in common_truth_schema.names]].max() + max_ = max_.rename(index={name: name.removesuffix(".max") for name in max_.index}) + + self._distributions_df = pd.DataFrame({"minimum": min_, "maximum": max_}).rename_axis(index="field") + return self._distributions_df + + def test_schemas(self) -> bool: + """Test the equality of schemas and their metadata. Add Results to results. + + This method performs up to four tests: + 1. Schema metadata includes a correct pandas schema. + 2. _common_metadata matches user-supplied truth_schema (schema and metadata), if provided. + 3. _metadata matches Verifier truth_schema (schema and metadata). + 4. File footers match Verifier truth_schema (schema and metadata). + + Returns + ------- + bool: True if all tests pass, else False. + """ + test, testmd = "schema", "schema metadata" + test_info = dict(test=test, description="Test that schemas are equal.") + testmd_info = dict(test=testmd, description="Test that schema metadata is equal.") + print(f"\nStarting: {test_info['description']}") + + passed_cm = self._test_schema__common_metadata(test_info, testmd_info) + passed_md = self._test_schema__metadata(test_info, testmd_info) + passed_ff = self._test_schema_file_footers(test_info, testmd_info) + + all_passed = all([passed_cm, passed_md, passed_ff]) + print(f"Result: {'PASSED' if all_passed else 'FAILED'}") + return all_passed + + def _test_schema__common_metadata(self, test_info: dict, testmd_info: dict) -> bool: + """Test _common_metadata schema and metadata against the truth schema. + + This method performs up to two tests: + 1. Schema metadata includes a correct pandas schema. + 2. _common_metadata matches user-supplied truth_schema (schema and metadata), if provided. + + Parameters + ---------- + test_info : dict + Information related to the schema test. + testmd_info : dict + Information related to the metadata test. + + Returns + ------- + bool: True if all tests pass, else False. + """ + pandas_passed = self._test_schema__common_metadata_pandas() + + if self.truth_src == "_common_metadata": + # no input schema provided => _common_metadata is being used as truth, so skip the rest + return pandas_passed + + # an input schema was provided as truth, so we need to test _common_metadata against it + targets = f"_common_metadata vs {self.truth_src}" + print(f"\t{targets}") + common_truth_schema = self.truth_schema_plus_common_metadata() + + # check schema and metadata separately because we want to report the results separately + passed = self.common_ds.schema.equals(common_truth_schema, check_metadata=False) + self._append_result(passed=passed, target=targets, **test_info) + passedmd = self.common_ds.schema.metadata == common_truth_schema.metadata + self._append_result(passed=passedmd, target=targets, **testmd_info) + + return all([pandas_passed, passed, passedmd]) + + def _test_schema__common_metadata_pandas(self) -> bool: + """Test that the pandas schema in _common_metadata metadata matches the truth schema. + + Returns + ------- + bool: True if the pandas metadata matches the expected schema and index columns, else False. + """ + test = "schema metadata" + description = "Test that pandas metadata contains correct field names and types." + target = "b'pandas' in _common_metadata" + test_info = dict(test=test, description=description, target=target) + print(f"\t{target}") + + common_truth_schema = self.truth_schema_plus_common_metadata() + base_schema = pyarrow.schema([pyarrow.field(fld.name, fld.type) for fld in common_truth_schema]) + pandas_md = common_truth_schema.pandas_metadata + pfields = [ + pyarrow.field(pcol["name"], pyarrow.from_numpy_dtype(pcol["pandas_type"])) + for pcol in pandas_md["columns"] + ] + pandas_schema = pyarrow.schema(pfields) + + passed = base_schema.equals(pandas_schema) and (pandas_md["index_columns"] == ["_hipscat_index"]) + self._append_result(passed=passed, **test_info) + return passed + + def _test_schema__metadata(self, test_info: dict, testmd_info: dict) -> bool: + """Test _metadata schema and metadata against the truth schema. + + Parameters + ---------- + test_info : dict + Information related to the schema test. + testmd_info : dict + Information related to the metadata test. + + Returns + ------- + bool: True if both schema and metadata match the truth source, else False. + """ + targets = f"_metadata vs {self.truth_src}" + print(f"\t{targets}") + common_truth_schema = self.truth_schema_plus_common_metadata() + + # check schema and metadata separately because we want to report the results separately + passed = self.metadata_ds.schema.equals(common_truth_schema, check_metadata=False) + self._append_result(passed=passed, target=targets, **test_info) + passedmd = self.metadata_ds.schema.metadata == common_truth_schema.metadata + self._append_result(passed=passedmd, target=targets, **testmd_info) + + return all([passed, passedmd]) + + def _test_schema_file_footers(self, test_info: dict, testmd_info: dict) -> bool: + """Test the file footers schema and metadata against the truth schema. + + Parameters + ---------- + test_info : dict + Information related to the test results for schema comparison. + testmd_info : dict + Information related to the test results for metadata comparison. + + Returns + ------- + bool: True if all schema and metadata tests pass, else False. + """ + targets = f"file footers vs {self.truth_src}" + print(f"\t{targets}") + common_truth_schema = self.truth_schema_plus_common_metadata() + + affected_files, affectedmd_files = [], [] + for frag in self.files_ds.get_fragments(): + frag_path = str(Path(frag.path).relative_to(self.args.input_catalog_path)) + # check schema and metadata separately because we want to report the results separately + if not frag.physical_schema.equals(common_truth_schema, check_metadata=False): + affected_files.append(frag_path) + if not frag.physical_schema.metadata == common_truth_schema.metadata: + affectedmd_files.append(frag_path) + + passed = len(affected_files) == 0 + self._append_result(passed=passed, target=targets, affected_files=affected_files, **test_info) + passedmd = len(affectedmd_files) == 0 + self._append_result(passed=passedmd, target=targets, affected_files=affectedmd_files, **testmd_info) + + return all([passed, passedmd]) + + def _append_result( + self, + *, + test: str, + target: str, + description: str, + passed: bool, + affected_files: list[str] | None = None, + ): + """Create a Result and append it to self.results.""" + self.results.append( + Result( + datetime=now(), + passed=passed, + test=test, + target=target, + description=description, + affected_files=affected_files or [], + ) + ) + + def write_results(self, *, write_mode: str = "a") -> None: + """Write the verification results to a file. + + Parameters + ---------- + write_mode : str + Mode to be used when writing output file. + """ + fout = self.args.output_path / self.args.output_report_filename + fout.parent.mkdir(exist_ok=True, parents=True) + header = False if (write_mode == "a" and fout.is_file()) else True + self.results_df.to_csv(fout, mode=write_mode, header=header, index=False) + print(f"\nVerifier results written to {fout}") From f226d2a73cdc235a3034108f482e5ca728a4e043 Mon Sep 17 00:00:00 2001 From: Troy Raen Date: Wed, 14 Aug 2024 01:41:53 -0600 Subject: [PATCH 3/6] add malformed_catalogs test data --- .../Dir=0/Npix=11.extra_column.parquet | Bin 0 -> 9939 bytes .../Dir=0/Npix=11.missing_column.parquet | Bin 0 -> 8482 bytes .../Dir=0/Npix=11.no_metadata.parquet | Bin 0 -> 5857 bytes .../Norder=0/Dir=0/Npix=11.parquet | Bin 0 -> 8880 bytes .../Dir=0/Npix=11.wrong_dtypes.parquet | Bin 0 -> 8604 bytes .../bad_schemas/_common_metadata | Bin 0 -> 4018 bytes .../bad_schemas/_common_metadata.import | Bin 0 -> 735 bytes .../malformed_catalogs/bad_schemas/_metadata | Bin 0 -> 5773 bytes .../Norder=0/Dir=0/Npix=11.parquet | Bin 0 -> 7956 bytes .../no_rowgroup_stats/_common_metadata | Bin 0 -> 4018 bytes .../no_rowgroup_stats/_metadata | Bin 0 -> 4804 bytes .../malformed_catalogs/valid_truth/README | 1 + .../Norder=0/Dir=0/Npix=11.extra_file.parquet | Bin 0 -> 8880 bytes .../Norder=0/Dir=0/Npix=11.extra_rows.parquet | Bin 0 -> 8904 bytes .../Norder=0/Dir=0/Npix=11.parquet | Bin 0 -> 8880 bytes .../wrong_files_and_rows/_common_metadata | Bin 0 -> 4018 bytes .../wrong_files_and_rows/_metadata | Bin 0 -> 7541 bytes .../generate_malformed_catalogs.py | 229 ++++++++++++++++++ 18 files changed, 230 insertions(+) create mode 100644 tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.extra_column.parquet create mode 100644 tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.missing_column.parquet create mode 100644 tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.no_metadata.parquet create mode 100644 tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.parquet create mode 100644 tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.wrong_dtypes.parquet create mode 100644 tests/hipscat_import/data/malformed_catalogs/bad_schemas/_common_metadata create mode 100644 tests/hipscat_import/data/malformed_catalogs/bad_schemas/_common_metadata.import create mode 100644 tests/hipscat_import/data/malformed_catalogs/bad_schemas/_metadata create mode 100644 tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/Norder=0/Dir=0/Npix=11.parquet create mode 100644 tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/_common_metadata create mode 100644 tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/_metadata create mode 100644 tests/hipscat_import/data/malformed_catalogs/valid_truth/README create mode 100644 tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.extra_file.parquet create mode 100644 tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.extra_rows.parquet create mode 100644 tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.parquet create mode 100644 tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_common_metadata create mode 100644 tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_metadata create mode 100644 tests/hipscat_import/verification/generate_malformed_catalogs.py diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.extra_column.parquet b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.extra_column.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f30ed667dbec8450a1f45f3f20d2fb05f99b2d22 GIT binary patch literal 9939 zcmd5?3wV=7*8b9l@&y`bX;V`uSPF#7tx0b*Tx`Ciw}f73p-o!EHc8VaG)dE@X`3Qz zU==H@zx=2yi|YmC(Ju0^EFywj49X&kx{DyN!Vf4Y>T(lU1s2)=oiCR*O^WQZu)BFU zedk zfKoxXK%YbY11OT8Nc9I^gUv9=GtoYU_DHlJL|X!i0ZjmX$`@0|fPV%}M!&CN^Cz_T zf)0be0ZoL>-yqKb-2fehjF0i$XkSG85GWaaqkzXD{}wm|xEuO`pzEM%piz*=f_A_* z9{hRG7*IdRB0&2f{}Gsuz5?L;z$EB*f{uXy81y}e!%wC@gKR2@2mU7PqS5{%=mYSH z;NM02E3~hGJ_P+8vLJ*N4q#q3T z7zybeUb8qVIfN8e|I4|wj94!Ye^?qsNHGCORUTFk8mR6*;+i`2|YVjKZSg zl9{Dtvu2l9s5O;Ua~_)eaCMDVr>`~C)f-Lo8q5}Jqphjg-r{I&Yj<|ce`LWg9(65T z#0D-}5}6?2#%E9wQSrhsQ>`E@ZgE)L)tICIbrkdGaOeWNjMJZ{=oTJLZAI3v@uRrO4xSL(ho)A7ItfN6<8M0%QZC)h zlX1UkLWh0mur->-;&q}WjCSEK>2nlR05W20Y4}DUO$kBC(TS9u*Ulb;F8)>)$*39- z>C=U6)CtT239$`m;8s??5B$rl-AZ^%_5=$3GS=1z+b-C3!e172M_IktH;3ddyNu0n zr(K48oZtw~Z~}G?tNg|x6Ve{DCK=f|!BR*XFIl}CCt{Q=kq$47&1*`0a+CO}E7Z)< zMTcfpe)3$+Z{p0K?Xk=~wcr#Jn=f3lX7ac*E1xpRR(>sgd0)HycW;-C-@o~(@jFX0 zc8hJhXUFazoKbWzqH)eanSF-h%>}2W8_rIi#NE1KhI$ZjCE6OBAQ<4=7R_4Ew%9Nr z?sA0aP_$?-ANwLW)IKH$`=WlHD^c<7lAT^>jQl0zV4 zVCas{LJg2bi>VwQ8HbLRVMHN3xu(N|i&T3lYu8ETA`UXJ3$@b4b!GDsxh_=T(bL@Q z{9RMseXp)d>h`7yGxH}WK#DR))+ z`Eb2s^(oV;v)|4gv8v;Nu65&+$*~k&9oD_N;k5?6XL)zgaku)3mc@&H5crliAl5!+ z5OR5=1hItz&y`9iu$jUiWGYTDH24%i<;@dXelG!1Fawjlhk&yjm z@ywwf>lcB8T!IaM9oxT*!{gx|rK!=p!8Elg4BS*6PUIt~krk+O@8ejFhq?jn4%EBv zfDZ6PJ{(A%LcS*kdDh7IHasbWR3&tyU_-9*--DkB-7}y;d>OSKvTD#&&@dcJa#izS za}2tt(4Ga_4$^|Y=YK=l_*v*v08FGBLEA7+72>D{snIVI7y;Y@`!>Fqe?OP1!LU^@ z?FV{}KayGq^~azm(Nzo?nep2gatLq%Y&S!`812<)HvoT$Ar+9FM7uv^45oDqvYY5j zUOC4>yCB~Nx`J`ZD{l$N1iAql$k!kS8ZVv#_(?=qQIs6A-vLP)i=o?(>5_5ELFup= z%%6$ZoeVD}8m}TDUXx<>bu$~&KLOcV*i@mt3~{`UF~|#XCv3=z^>MTZqmKysH_@(x z?FxPk``T*3Z!1;=9%G*y)TEZJr=HJPM~QctWIH1WT!q<$@MO`pOI&h}K9FdJPdWkUOit7|C;ulYsoKprAfY z@6KF!j>#L*3QpXMD6?sz&1obgfA<~({d{Nx;f#Dj<%#wwh5i;S1M}56xjQs&vCY)( zxdJ?~t;!UAm%+vsVx*9D&5#k#rNR`X5-`k1k}E&Z-0F4u7-NOr%D-tqpyz{A$ZG zz^>)`y@07FiarA@J70R3LtmP+bkftGlRnh4503#U>kpfc16`9Z{B)X(@ygQAeppka16@a!ihs!wnxD?eUq{R=yrUa{ z)T2k9ev6nLI-9*6vmAT%@Ihkz_3Jkg)TKGU`u^6a%`iLvu zfNopfmlp|+dudDbHuOK++%W{`l4Xq*5&mlS#Hj>r@2;6n(31Oc0nojtWm7Fs78`Ne z0Ca6m&S@gN>eaRFKx)(DgBEWa8D-Re|0H28szFPL`q4Lbt^&H}ymIL+4xPSeWY;@j zUDRsF2ivgQIZ^xS{`wIySpNj?&qVXu+WQUwsgtXC$B8^)%X23Q|Mu^9m%NPu7ulR- z11vR}Mw|fezWm$=UlI9@k}i5X`j?v)rUI!AF}gKCcl)Hw*N9yH+BU%s$glP{3JG2t z`0Qw)JN5aFF@(=j4;V}Mb9)RaKw0^!;i-f_#%)L^`0F!&p9*wk9&%(8-u1^73W8aq z_dm-Ki>oEd-xBuJ>uu(vwQW?0mj4CdPEK$*u&b&JzR43ivI zlfJRmkOXV1!(wy#6^*)i27L>WTMRAQT5XFK=JS&bR+yWt^`vk4?7WFtNedQq2NM`Y zlBt&T4~W7m>lKB`+LDrb3sNw15&uGG%2Y z>M5Q_l<-KR2nE0fCu^le0#>(?tvVM};?*il%$ zl405y^aGMXa+A7h%Qd3-4TVQTubqbY4XE9^^GPBauR*&%{I*mL- zlUA0Y&nM$DCS9tksY+ce_UI+%+Wb6>QD#)mYpgdoRc4)~xV0)D@sgMns)~x-%9av^ zN?KPD5PJu)SEt!c{Ta`jd;<9Mf751t;siRUYF%vuFsDbrol^b=IGNWE16&o~i zpUzR6Q|XN-I9}vPTs*JTL~@~1SBNXsWyae4Tz`)0X0?&HyM0+zM$g=;Y*p&^22H9j zr;JH&$z{jU=jUO~@;Z9vrOMLYTB#{bE%wX-`IodADtg9QW!GUX|EX9kW#&p}UpevX zLan1kmoGKmE-$^}ZK=W^c9ux+;yzsY_F`c?jBD3YD|6LTc7q zRjqmx8+WMKY&z@h@?`F@o`L&IXRWa73b2m~0`^S<_T$gR6=V}?4FmRywW38|VD5P~ zdd7W6`I>E!I&rq0eeI_JeW<-{tsp+rJ^h3CcCDJ^s{YRRc0-xXyDq_JtXmgq%_KjQ zn?U`YoU?|EHm$}Jdx(8hqR%&b=G}K*`aWx(y!rHZv{sr_qfK8>JP)lf^tQrzxh7KUl->DE#aZsqs-+HgjCo2&ks`fAX%&-uk6{YDwHDVH zd;gV`;@UzBcUvvF=5pG|HI_?wF}crFq?6oEnls5C_7aqrr?jS|fRQssR>hyNVH zm*tzCOmU&#Us^AJxV-V{;g8HBH=|XLwb!#VVh~b28J7)BLCobj7%z($D%eK>n^f=n zfan<#irj1{D05b_L(lfwGjf*A?cLw^-e5c9PXp=Sv#t?EItC&6_6*O+#a`Z*e?3w4 zm_Iu;y~SVGiJH;PL@(AIdD1XUlaDVgVng#tyYqo34rY$~s4YkB^`~Hp{qg5`=HEMh z;#%&VyElLJp8P?fAVIMXdIpP&{(21N`*ix1(MQNKy3b`O`6aLEwo9CNb_nFNYXTqwka}3 z?hJ!~Bjbb1#W%XNmvM2uhzRm9Xb15*gCKO&t5s0+4lj}MHOSoW{7KWMNs(C>?z&y< z|2z9}&faIAeI9YSkg1glxPlk>f?aflU`>L68cR`|Y5Mg9Y6OQXOdK(iOC9C$sTrWT zJeoSk6HjC30?yJRkX*0J_St#&4&C854E5&C@zn?Ip_ z2y`6u6=)`G{swsg=qFGwWIT))MEgA2$3SBA5`i?H>sg`*%8w0!pwoL9i_$m(&VhkR(XT6w0y;v^(;Ms zYBq4ux|B=-H=}?`;%5jGY-T}1`kI9FOXJd4r-=G!5&h3hF(Vh-i4r7G`0xP~MYh*S z!MVf=K|8yL#F~MK?(@u3=D&* z#W(}xsS>I*nT$iH$}yr4o&xjXAwW)h8EdzeDnlG(-~i6b05?#QoW%{`1fFtRkez>E zUU2x;4akDwG+}0u7*@Pw{~xvAtu{WbZNrC>L!{krqJ>ppwq965N*s5eJAK&-l$lInL*s; zFZ57$3U}pbF;+t%#Q>DVWjX@Fw7_NuWeOO&U12Xt!aR#qDl& zhv&gbKg1E8nkd>iv40*>V-4dW?mTEgO8-2#DHtVo4uzDMOB6!(i$-*uZ%xh=B!o9o zQdO(lYzasCtMoM-(U~k!PgdrTA>sydZ3)Flp?ygLinL;ax|B1Jyu?ylIHFCQ^v5}( zH94Z=*(9V$_dx^wa%iLBjQqu{H5svADU7sW8J(|fqJhx3RW4g+XiY=0?VBpvFqMrh z&PZ`%!|U$KO@m%{O%xTl>#i&3UjUBYU-dU&aFVg>@(UauT})AO{*edf{wH)f`z0kD zx?XSwHLk4@n8X z$-uzZcU&G1lppD(ZXt5_Z^p`i!E@agwFGngyhn-L(|h&>V4&xug7Y%u- zX-a}6zZL0$RO_AtONm}AZCFY0!SnZSBKq-9B>jo-XC|HLC3qmsc@7wKsXjkXaQYLw zQum<$X?NFHU_f3xO+@%hB{Sy{biKB9K0!y>yXC-Ok9UU|C{If|X#oayic8xFuYLBJ zP9U}8H)Gc9nasDEzPXpM4(*tAM1A**`?mmtOP;>)GKbDzJ-Pc;umS2R-eo_GIZMI3DfDc}L z_{}ef{HN+}dN2Cd+3v{$QrpKFw*rHmvkIRha>a9d1p6SrG}0<0_`~RjrT~L^k918X ze6eoSG{T?hvCIa_>$XhHBm76)75N1J_0-?z0Rx4{d?kbr{Be_#VDXfvcgW<=S;?(+=QF8v23{opT{IIp_KhhySOq z^)KfRoQ`9bS#2(l$>3dPbDAyTbMl>W>08H%P8ErIriIV&n7PF}Co>)`poB>tlhxud z2>5unPfZ}d1}MQO(r_C>4f0e7=^{5Sp8K!}Uu;v;E1n-mkHwO*1CX0#Y9>VTh=@JW zhscJ^V?c{vA{m((8*c1RlpFlgz0pst|C4u+=(;>&MAz}vA-bN=1ksJzRp}3vpn^qo z`lHe`ipt0pCbB*j(wIn-wH)k)+&B_j`L)+WJTCL1> z8XOkbJAHOLEShZ=yLnl(EaYXG*Y6^R**>Su)M~b5!`kU{xcm`Ct8ux-I*M$Yne>l}A}kvch0Ww%ZWvBqST+q}kqTi0fvM;%bg?yv2G73$L@Q{$DJy{swD?IE`d0dLbj?Uzl6y zH&U!e%y2+;9bJZg?Zo;UkPmU@!%T{aef0by;;YRMcTKFPurQ9Bc*RKkgydIqM1GUR zdg_;6S64%hgqWQctN&lSXLq{WV(_-Q+0(#oGrA3KKRFCsegl4EcaXWaTihO7tCL8j z`LiUmq}kZ8*14Gz1b~gFzV9FVWsR5C{1q_qVC|Vd5 z!?ZBU7FMQEMCm(-uq>ii*1(qV_+v+5^%{ohV9*ap1}QB@oA3-%T?|`Ak;Y;7HLC0L z^?6n8#`1cv(OGG$wkeDnyR6lqS9CSxsU-Tmkd4MJ@o1FV<;J{DsZm$ZY|s^~Xw+Nn zCcmaoqw->`0;^G1NBaBBx=v4plCd>9>~^ENw%O=Vc^enOPF)~1syoOy9S!K;ps#8+ zm)kp7ol+|`I_wy~)~>O2v|v1`QLiY_gismZvE&6uZcYt&UqLVBs)tX5%+TB~Mx zYm3FNwHqB(?Tu>0OJY)L>+8!Jywysrthqia_AX+to8z%X;!*}GB%T=j2*|MR$NzxFhkxp5KlGNwm z`Ky{6^;*Aij)u)cOl>K({)T#)-Q?7^n`~^{@nUlsomb0~eb9PF*O$>*?=hC6j>@Cz zW(Df;m*R@CiMNJPwc@Pzn#%2i_r~D3uPI-P9Wp=mwtu*KiqglcZD&34nIGyOTia$G z$yLj>*Y=89S9o1w_t=0g-kM2&Ave+bYuRTj3OWq>Q0#H)sM@5qhvq$eUWVUmp}h6$ zuW7CHc~+OHylOelvhcY&Y;4U2wY|fXS7kM;WG1(pF$~L;?4`;E6~14|AnbSwwoX`f zfg3$#Y*W3Ez0V{5$;CX_Kdxj9i@nfNx+vnWtQ5^b{@6(m_D80ws5Z{g;?51t2qB?S z6UrBRzbhnUJYvXL@nskj`LOprX|V4A2py4glocBL<0a1x{I&J-HI-~C;>NGW`QK5h z3;Rf>gqA@~-Xn+~$o?rwknjVe21e1rR1zu1>0h+r=EG*m zpd)52Oex#n6si|?*iht@Fw13dxA`pInI@NOCi`&93O~%Uq(yj^NisHMQdHc36d(V+ KW+DFXi2nhl-J4VZ literal 0 HcmV?d00001 diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.no_metadata.parquet b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.no_metadata.parquet new file mode 100644 index 0000000000000000000000000000000000000000..08b1a3757b9d0f530f14aecc93cf038655c628bf GIT binary patch literal 5857 zcmd5=30PFs9zS;&;0`i4EO#0dX$-iG8w|)QBKNY1$ZiC)7Rqj znVOnk@yL(oQ&Y^PvB)gTLQP41N{Xp37gBP|?ETK2VH5q{_r9|C?#J)`{@Xd{fBxs( zGxw4trucAjZi9;3!p3p6GLAS9vW#UvmJv%vuCcJRmXm`@6&VGLQ?lfeQbQ~tp8!rl zmqTa4{{*|k_5*Mlx*qz6kcR*#fnk6*>=$7F0_+JU;BUbr;oA@R0(XF;uwMggm0DsA zzJZvbum?bHhddne1js&sBQP2`pwyFN;IDx|wEG@0UqRjj90tw-V-WKb?Eb(lpba)9 z`ZGel4EYeCLt7R2IPB-a4&dGJ4+3ri;{i9=oq!#Pb%uTs@Bl1fvjXcqJAr>em!W+ZWLLljdMGd!@B&&BPJN^0$f3R9W{z-cWn80dG~FvI&Wf{g(XbONOhFoUhdE45boa0#>d#E)5*l4AV^}xB$SmZR zR@SV-W{|DYZm`PU!O@9RKcsOUGSp?5>%-dNkGQ#e@SeI6BS(!M=!7sq43CJ6ik=uVDRy#Pd_rPUa>|rPQy)u9&zL%G`i#eCW@csQ%*vgemtXM2 zoWi2H#U-U>ROq)%Tc}1 zmTzZy_P?2;^JqLygp(26R3Jj=<7LfVvWVj0H`n7>0JC?nYD=hmy zc=g`u@V|YKGHT!ES4Qnj^xv&7+5M=~zQO+S2d#>y9TdtZN4)>^>ENcbfnM_Frpak` zbSS>r$%V7&-WFw9rftz=qrPIrAF}87D6uaFrh1E8*R&>&LbqPk-+RBqPA1@%rOm}gO z?(>bhxKh9U2ZhCsQ3uuu`y-Q6b5jqW$(#~hF!a;ZmDd+!>ReSVL9JtBo&EpxYIMq( zJ*lrp7cKp^;<3DykJmh&SFxtOVENheGhCO?AJNb_N=F|{zH4aj>RQao%`q+SE>?ZV zl3(J?U*o!$w_mRJnZZ4mSH(HSai*VC)z+ryzV0)^|7ITbkMzz%fu$HYkuELY zqW#SiVL99G1(DmL90M04fav7Zp^@$o`WO7Ty9*~Xp9oe7#bvW{&Ez&ak$t zj*Xtju%nfaCf2|zyj5QDCagZ!v)*CY{D2QHoPw3ipAdSDwypd$K6oRdtXtPuvT@>~ zDYKy&E#9vzq}F`4stRqdzPx1#XxFPbdq5#4;=chcxtM&IVLPWS@>+b9wjoPCKL#dq zuWddKHUwV!drVWz{i*+TV@{$)((Q)R+f0 z99g7)j?ok@j-1#?!-^DbOip2y-E4afCJv|gP(sVY%^G>Z+xv8Y{>id{B|%2Xl!Vq z%rp-DlG>Lp9dnVV*wAu1oY@Nd)vz!N%EsFkwqSClcxn*k?;_g9frY)Ie;VcDE0OEL#_CJE zw}TBgPl_E>zwuqCeKY(=GLQR!4f<@)2+CnE^66kwylvME>eu;XJW2Vr%P%dZ{vmH! zeMR*Pt{2)U?{b=Z32ZEhK6RP$$hWrGZ$taDW%C`t1|i6kr+Qb|m=MY(AFU0gTom^tkK!DQnLcD37vtMYSxe2J<>v3B*;{p_Zl%fZHJ zZ+C8GSignC8$N_;AZsc<+lJlFsP@kO;R_nD{zb*t)bsAThg!kpOpBXo>afklIsl?QkM_&rTQ`X96!o`>G(MWYzR11 z5k_^xCrcwJ2f6Qiozd%O`b=6u)%N!`t=y5NVpncz!m~52HgXHY>MfqHH^M2z*4H`h z!kKjUr-6Z#W~;u#6w0uYY1mUg|aM@K;L;xzMfYHliK?WjPDq< zF#phbsN>ciQc54BBH!2y}hA?i}{jK8i_VEUx}2{ee+9lViKOnNKcuamF7QZYC6JV z0+DpE6jqT0xe8+PBq{%ilC^s(Ql7}izbcEi!&Qngy``ig=iD`ZD)sjnKa%Px6ANp^ zgt(sZB9kEY8BZ!gPwCK}4in{HnNyTfm}43eDdo|A!BYL;k3=UyG%y0yLL=HCEnN3S z>lr?0YFfhVsp+vb*<+dkt0{c@6tH_o$eaM#J(%EDqY|(EVt>OqrXM&`_e$mSmNcR^-kblT%VMCJP_7Rocqj fvhsrBxmuq|z5mjN@&y`fX%kZ@SlbdJ6i7l_8W5W=X`3eWLJMuuBGTO2q$FwDG`%4+ zvW+3k$-Hnlct;=YVLTj%h#(h(GQ=Ajg3u{b6cjzxZ(NDytMOGL|KMAT@CT1(Tf$56v~d`axE;e6_lFoBv3nkl5I zvqA|Ki}pvLPr>uS)4;y~Ga>sc=mdB<_*c;$1Ns;=4m1t&(~!RdoCu_Ve+Ev1?f^&$ zx(fOL@=Kt2VH!0Yco{ZhAoeF*#$Xa@Ry4x2xsy$^H* z^aW@NZ2ks$4(K~j4`f1&7exCU+J`|h^i2RBgZxY2Xy6{`M}ocu-44otJQ=hTwkhDx zfF^*3K^6zv5BWF1Z1fcY-vFjVzYBB}{JWs9Ks=$0dLOb`AOZL*uuDYy51==}r-6SB z?a$Eu8uS+E@1VV)??ER)HwlX=G4M9f+weOI_UA$W2R;V3-Tsdliv2qkLJ=ieA0yfjlSS4_f+$WDmnxy}h~30I#G7pq{&%RrcRS9l+&|wa%beJX3n~O_8s|ZMx!k#EYcP04RcCL%gX0g zRL+}URc$oY)GoMl;a!XB%oeN7UfXqwyLym6k4G2#)V$#|MYjrQY8$G4S(w0=`2`XzADUVVT0t;ThTjelq4?O-Vqq#5Bq$=wP=&w{RPaWK-hNw5rP;5JsiAN*6S-CB5)ae+d=nzePnwi|XU;4crl9#${!%|X`fQ?nIb;Zvg? zAMr#dc@ZmzRes@DOK8rlSxr_>v=ox2^PbSciOW!@(XkcDMa^mVZI<8vH8nT0^zgiz z1CP}Gx7hXmUiZS|OOG?jI?2lQGbWu}d%s=1_H)%!`#UuM^GfC9x3}Csc~^PP9=UhV z{N%T9$|*e**Rd1f&_lB1dBk8;;R!SRH;D156*#^fe6`Ep%%*cYCZJD0`$~DR|L|6G!Up67zPvb za0aLoy$`0M5z)Kaej-;|Fj8XWkxU^AF4l z4n4a8RWMX0WEL(?gcO7HpIr*x)8hFRlGpF0f_0h9)3UcV%SxKF4;)fIkW~2Iv+4r{ z6;<}CBcIjHD{_u~qw4W*?yZxJPuQDxaJnHS=Qj@)RerXw>fs{ynm_xi8y>%_^R5Q} z)5o3bPJOv>{JO5&x;IRgk!>lxIJR$gV{LXTH@&-Q@zr6{)hW_TqTcC^h;@hz2A$pn zQF4ihyHe>?wo>>(rNp8!(YpYuP@T{Udx?;O87T{G0^af%O8VXSzH^IiF)y;Wv^2GZ z^5BX0@uVlmNwrSPmxlmT5uM1zH0j>7pPUFSitn8eemu$%JsG)_ zj87C2_7{!Ff*;FF6~%;3gt)S%7Mnd(?w^U5^Q2cZq(?GR2MqBODvFV~1BL_?Zbb#P zd}3eqVhfz1h}Q7LPw=E`rP41ZkdVUN`wjHNp^bzy@)xye$%ws5Nw@{e$a?L(tuHiw znb+CDElw`B12X9oGB&nBMjA9WJX5dU)bE*EM^S;h@4kBO1>neCWq$(($6LCtyucIE zc@(8iIQZbq-$R$NOPgL&rcvI>!ACHcI;=`;~9-SqeU)~9+`bnSv zQAicvn4ja(j@(yIe+ntpb;s;Wq;K0BrK-)a8h-HUVRQ+zYF<6KVC+k6E+Tc%?d|CM z^{=QtIt#%;nC+8teX132c!?R`mGOvl;_fxW5B?Sv)`W}V?49! zgYOAHy8PJ@FM`)+YLbD$b>hu;5_}@=-`E`JM9y>i1t+kk-u*ZM*;^RBb3H0MHZo=MKu)JU^kCw;&Vr39YwPEFo@^9-BRMPQH^QUY{hrG*fzHQs@ki z%5AW7GG${9C5iJ{9d@@_G#Vcu6Jp3`4J8^u8ZBn7LEb!^F8p-h*uEPW-!bSBy67f| zOExCaqcO$ofaIo`kP4AHEMiai0kQ#k^=a{mm6DqB*p2=6y1`HQ8~uj-mpt&K|B~02 z^twEqq}TDSBfXxF6X}iF%i{Oc*e6n2{HZdTqEfOXabB~>X7)+W36`X@2gM?5lgsb+ z_|j*lEl7_%L(&&!rKNLf?ohF_R!h=7X15*o9>2>4i+ZQsWm^;} z)6!yY=_H4Nx6_P&qG%_%x7k~K&L$6$Dzc}_rz+C1VSATOKcy17>FG-;Njy_oS#F%k zJ!>Qzlctfsu7+ZUVJbl6Zw|v`Gnxjbh+!HSZ38RQXd?9OL|7QsYb#(&c>J-WuzEei zv@_@jWCsEE8r7hsX|FNnEUqz?GE8|M`f6tA-7bHvt~%SKENinASGQU`rOtAv#-ewr zn#?9mSBgTnSxchv03=S^0T5qkfW3fru zTT{kqbr-VZSan61v!bs4b*XiCwAGj@lx18FsK27!Ufn;=TAu}T`A@~-u5{IO4poz| zZm@ZDTXZVNwdyh;-qu>oA##$}gx5%iszgiHXaD?*>T6AgPRn#XTZgE%r8RWcRI6N8 zkD<-#WaAzzHm}8Vtvb2-&1dBLvUsX}mSU`<;)r##80+ze;)=2vYz`yVil@5OTI}k- zH~Po@qv|!^t?IunT^vO>W=}(#7%~pRGvw0b#wU%?c=J6PubpMJ&EzU)gDM~3U&Bhd`d^gSJr6_M(2 zUMoUpBk@wuh)!JjxK8s!e5ih!-uDI>i~NyJn!fXui`0isBz}geB0t)u5U*i62+v9T z9V^yqiLQY(C6vN|NR^w>10hty8RC?-ceCm==2ErE$-{l*dvjYN-FH N_wkQRbMgPG{6A|>(+~gv literal 0 HcmV?d00001 diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.wrong_dtypes.parquet b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/Norder=0/Dir=0/Npix=11.wrong_dtypes.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2237419d42f8048caa412c215b2d48cb8697394f GIT binary patch literal 8604 zcmd5?3wV=Nn*RSZloZ<1(k7-*u>=T}Ta(b1288BM+N24+&_bKG{E_C?HYG_z)AWXD zWM>!!Z|jA1@rLLs%kZ!)B8mtGWbn?2C@|dk)%4P#S0o=%XYB^(pW}&GKZ}>lFf8G>)PLal#?-Lq!S0h9@Q^k4Q-!nU*e=+$7By zH9GU=F}Gxmm5m!eK|V42)=876Or4gaP^zZq=H<^QP|uuo+w3`o8b+%tD%KlHjHbDz zW#tv~Dy!x%sID>B*3~a$Ew?Xfu-fbnXJb>dt7Wm<<8AdVY4f)SIy$?$dzRj@?9RJ_ zcQ5Axm#<9B6bm!*sQ8o&sleqB3nVK9l8b4Q<%#lRBDv@vnPSEi94AUFpzxIfpeXRC zY4HWRRNTRxu8#A8k63}_EIY?yvb0$mEjEkXGR!i@qOvS!d)O`PCf3WYWB0MU*caGs z?8EF)R$>`tiMPDV9%3)CEtVe3?Uou#hh>sQW=Xc_En17$(!#D_SF?@mO7>2654)dT z!){=0>;r5EdxgEge$D=wJt~r^wlHWHn zQ=Htp0Bu%&0bUbI_6g-jM#%RiWlEz1jhX4t5b-5JskoNQ^f9ELrbP{vAsS^YD@i0g zHZLKCT8q6+m{3K6lxj##p~@3dgrU*|5v9Qi(4WS1_<-G5N?U|R7<&ujJ8 z?M$x_Q1auW`?s2?&AQ0h*4Em_N4lJrtfb|aN6C+j!iI|)DP}||;`H8#M48;=@D&{w zKX0hQ89=LV-VstTk@4;s(_fnskko+z@1iC9X!*vQ<(D&iQyVeZRb0fJh)A5+I}u?b zMv0z8K9!A}V+!0i8j*oMIyO@*2(Ny8d25@)8BXh+k`=UkTbBHRtjs|}#0=#6GK!I6 zf$#cAKQuv@A=t$$rhI2)A1)QME;y^mXqLyqoF2X@bD_dKhaY6gGGi7 z#LSa$WD}6O>+plG5VMa@74E<+Ctf^skXV2H`^yOG{KEUby)teq%+CBUDHo`rUrBxw z7!>bWdl(pEe@gs`;1l^D<-831%?pKFftr;Y9(ffQZ2J2*JAhRFhV^?1(i=v9K;)}e zPd!6RFXqx+fdlk5D0Vz@PXaLb4SRVMP;=l2bqkTV{dS}p7&^QDf{Ea`?xZJ)+<)ZE zW?*pd2`&8!< z`Ut^Y>E5%zkWYX79KlJ?Y)jpa{-@e{MgoJHf{Aj%Uo4zDi=c1My4eIh#UGRaLwnn| zIDne;_!CZGaBFta62hBac)klrZTW5TitS@ln(g1-OIVL7c_mTb_0rBYz|g|y&c8y7 za+i-?{|49~^=#ms?bz*f%G-^9d7l_;{9VF_MDybFHyr>{U#v;^jL0*$J@y6RU;B^U z6|Z5y<-TsR0am$OV?GBTy71ULUlIAGiuIx$=wI!+TLq*xrPUC?1=&6@B;p{A2QM8Q~DdK*!G6bc@wDO+x zU8qTepXq#^MaicPoLzQXZBaeT#ym%Wch-UlDP^d?A3H>Qb3l0k?SxpgLFLZs;van*j1Y|tG0wfHHLk(u%2ulpP220g%k z?HAw;eZ0y4PhM8?8}clY-@tc_{6;=JYDF6E?CRK^r(ywB=&SpCv- z2}@;M5m;nzbq73NziejKLRn;ekXfc=$#^wiFr1v#QkmE4al+mkaJymA=yJLpiy~#b zmqqQ}K4K^fcwP2Zhf@Y?Z@}a0?o+hdTAcQFBKJ7ktqyCu73NE2PA|+|-X_wwdVzjw zfo$2b{$L`bkhvVBe?$~v*`O#~-uC?5=otFM141BETQ&uegr_BIvqn4NWrLhFx?8O= z-3(A%%MnUtw5PD}2D|G0(H8A*pk|QMzHCJS|C)h&pM}`v0|i z?xeRlt?jLCTs3gJY;D%IZc+?<-B$dWp_9zL!`bF{wR(wEnLAA}O)0~H?cF*9lq%?^ z%a&2nM5d~$!aS3IvPjpa<&aMgQ^GJzC5U|HGE6R`ZDRBc)5PeSIGI)(q35US*i>Cgn>nx1YR+3+U)$`qcN_DK`gV+!*K9Lall}pR zxyxUwV_fwfx7%i@YP5Ou?e+6vXUJ3944q`0&N}q3t1WMIl(;)NozA4RdE6Mk%58LY zHeo!atyY^?JH0B;ZXn|_E}P19m)QvR}vG)*r^K`$fFD{+QSFbaYxI9*KjlW*64%C^IZeoWVE#~}rwaqq9 zRkK~GRyb?5y}E#-s4g5&biBxsqP(TjMRH*?*C^`DRm~1VabJ!a=Xa90`+a#$&HUWz zef8$9#kH#5oH8!ErNsH?41mGg5z{*|51nt^fF`)ydue<&7D zmAkI{YB}lC#ac&?&7f|+R$d0h+g^_~#GDj$eS4%!U8*DdbKv~-jrFyrZrgMtmxrjm zr89Nc)u`Qeuc^cC;^K}Ko6qLGR-W7g)-!T{*}OG=TM712NyNTcjQ#jaaYfn0TEmFF z;;m`7m$(O>je&7rSH2c_)ZIAS-B;UB5&Bqr+gn3?=JNfc_qM}Ka@BPGdwX$}FT5_% zXRKcrYt59ukef*T^_;WCd7ai;KK2;7V-DAVQk7`+N| z?=ehCxYptt(~#NoX+Zr}hHEA9Qe1>ieERrKJz>7MhznZF<##~*q?5M4-g1%Z{OFXk zq|bj@O#gV2V7=g<85*|R9e;tER zNpg85-=jC5cES$GC$}rCW-@jYR-5#wokSDd{eoHUsm{G cm!+J6XPqMB$xMp6+CS>w@Q0##_1K3Wcsq$A6r?%rd&L`@B zvWGqGiT`c?z#ewawc|Q*S~`tc5biynzkBZaxO^ZTF1NS;wan!2PmH1EYPly&Ztumt z{5{)ba*uO+Iamgo^EbmVxgUZ4gJ~IS#)CjRw}N0CL~K-h2<(W)%L8%S8=&szPxV*F?A6sZm`xO{ zZKD1LikP;A!uG;9r7VVooPvNyyLt&o;$yLamUR-*ww&~xv9{AqigBAGNQSJZnE0Jt zty3}UFU{FK%ci5dtyxb5yogC? z%5tq6wuNS;FK=L9p}cZ5u@hWE)9(oq;&L@bp7iRKX;Ju9}NGeG>gxh1D@_9i;q<-fw>cAW0=rkEtWUH9nfVr6(+ zT(RAADXrpttx7@jG`=bPsKj6QTZ&YefYRY*;{SA~ ze|VC>eSv>!lH!MRQT)J=NO4OanD9|kbVZ*sjV9g*;5m!#1A+t(AUJ=Q08|Wa#Edl7 z@icIoiYLtqO+niOxNX!oFrM*;Pbxx~{DH3pms2ZzW%&Lp`tUo5A9e@aTX@zL_J4oQ RFgN{S{``qyKEVIh{{fIA=CJ?( literal 0 HcmV?d00001 diff --git a/tests/hipscat_import/data/malformed_catalogs/bad_schemas/_common_metadata.import b/tests/hipscat_import/data/malformed_catalogs/bad_schemas/_common_metadata.import new file mode 100644 index 0000000000000000000000000000000000000000..dfc011fa65c46113e0681c61e6d23fc4c2837cbc GIT binary patch literal 735 zcmZ`%+fKqj5Zxd#N_fSzi4VN^CI}+&@;2KRyaa?MS|BE*H!cDeS}x&l{3n0NnJJJy z=%nq=%sD$}X1gsg%ek~VtqSEha!3Bm&SgX{eUVa9V^8EVrhKQb$f_U#pp7rt3&0J? zA=Chdpba@Ek&wVQlr#>w!6b~)n8o-Lz=jIr%D77~2~i%sPp{J~IoIWZH>*XS^~MP6 z)thCvVy>O{`>@xUV4g6Y90=(1$Rc;-ay>)GAt*F7ZbI(+BL*35m6M*;9P~QRTc?68 z;gB>sIb0rOX%|ZvaW>H}$Gpj%)si*9Y2hxG!{b6*?BjNWn~byjqeB95>*ln((XE}H z2F*kN`rrMNwA^;%GIcsn{6Dy|X>%VvOej5=e&5WlR&g!FqSbbg zcIW?n=9|&WYO^diAqRr@Vnpq)>r2IoCLbqqV8pNb%LO72V8+lyrms-cMVn-33q`}I z8lw0bG+1RnjOfo)jY82FQKs(=NyiBhz53E+VEIppeKC7H8i5JnigUy#CdP|f>wu+KS2ilE<2B7#+bFIpyMrNEUX1h~ z4dRMqzp~?3_J)+N2kGw;l|riYX&(SWQl?d$J3O5+y2<4~Y zkUU_a7@9!&en9!FpFTZCAHM-CT>r&J!pz-TOCZaCYrAIId&_3QZ51#NBxU~y{U%6X z$`;Jca}Q0J&u1PsVP2{28s?vq_8v7)mPzDO9{cF?6ZFRu77@l>gamR;qE~9WCfa@5 zCbAEAE7D$=J_>t@^xHcj*>9d6^YGRg`|sMq+#*_)e;K8BN4fIrxK>E2WaYY6{yT0fA21@p z@bi%J-4K0wo<4ieTzz6{_tx+OT>Z@(H}?Ph;SdJpNdjz#O3EK+h6w=+8*5`>m2$ma zUM+cRVh&+?Y-0{-dTnG5aXhgyhfLm>nL{Xd?98F4n}+64+Eq()sO^!door)j-aLwR zQtz>Cb&~J3Pj%Aw#F#okc;iH!DBQ80PAYDCO(!i^&83r=M{d&YuT{gS!UIxaeY;ky z|Ih#O4n^s)&o~N8uT94hj3*A{h{zi|a)jiLH#w4Y)2JK?yXsnwls&PqkUGe6xoi3J z;yZeitx}3HHUnQwR~T!sWQkp1ti)0!UYATVb`^8b@iN|_Y~7)>j4chEVUbNT z_&_tXLpcf=HieTU>zK{rg-fL-Bb$*36q0O-S8^pBXr^m;pFOiw(=TPdn48UA*YdID z`B~_8=`28>?Z94fthK>ssrZ_!UsTc=KB2X)b`?sT;(Gegz;ibkVeh26+a!DTKM@Oy>g*-4z_6V%lY1 zv^;hzG1wd!uj6?%lOPv`c$}zV^BI3kuhqshcrn}vZx)Suxw;yjI6EF4j|T6A2x<2r LUnzuS;YIYn3p>_@ literal 0 HcmV?d00001 diff --git a/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/Norder=0/Dir=0/Npix=11.parquet b/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/Norder=0/Dir=0/Npix=11.parquet new file mode 100644 index 0000000000000000000000000000000000000000..11e599de14c5ea6fddea68136a060165c096bc57 GIT binary patch literal 7956 zcmdT}d3cjmy8qIK@&y`9OH)%QwG;>y3M8Q^4U5f}v`G`X&_bIuh%{T9lq3yV+NQ`g zG7f_49^UVF z-sPP4J@0!qp3Y+`r9!T7hd{WSE)lMb6H>_(wVtM5jiZKexT5$WL%GxuzJQtxn#rfB zvwRU1kM<{^&%txS)4=})j)&}TpcCLF;9o|26zEgX7|=AxPec9=a3YWb{slM*y7xdb z(9fV_kY55N@-wNS!0%x*8uC1}JJBAC_HAfOK_fvqp!fL_>SN%aK{L?rOW6Dg?bksc zf&L1b0-L`>E(iSx>VS-o@giuSL;C|zHu?&H$07d;m<&7s{cz9^pxK~tkf(rNf^8c3 zGoT5eA&@124nh7MFc*D=z&C(d(C-5s1^+JS8xV(|O??R2ED#U;W!NR5{Tk>^@R{IW zLHi4|zXiPo`UmJB=qJ!g&`tbeDiwGu=xz8N0sHfyuffNm|3$PjK*PY#0Zj)@1sz5% zJ_FtY8wD^ObOHUvu)P`We9(S==InpQQS9$j426_%OPp|PTn z&XlRsBvRS*TzTG%e8tRJv*+Aapk!3)!Xk~fSf`&`Qd(9%ucC7Pf~snRvBtFU_C>~ zgmI}W<5D+|Ox-?Q+)0b+TXV4RDPbIij{rbXWFrg}o{cXNHnIDl19ReV=wiQ;GnA(2 z0FS2Hk)QAR0&aGYCqkiVYCUKb!K7?_TR@OD3N2Am7+=z- z15^xhU=bMk4j@g5K-o!|l%LnaF4#7HJBwsgBZ%~AL%~-8^FbmMj0U!|@iqI$j2uf;Ym*48aJNz#7dknPMJx^ zSEOi`X5PC~a^JVqyz!+U%&&Rxk=j3|x;{MUUevj~lS$EvR&SXx>E!zR?8^0DDmEW# zQT^A8m6PAzb>HNDGpW=!R_Z=Y)zL0pO3 zlhcL6x(etsM+BV2QU`r84?VTeX+Ua_ z3IZhq!$?v-_LVY8LKX7JICPQ{BZ}ZDJO>`aVr>oIyap9 zYEj09)?3@QPR=IVLVQTnQ-^q)-5M>#*F#gc4iUGfiZ_iMv=9PeN{KLfq0r=zV1JlX zs&G`_l{Gv&wy-!W<0$c#Q9Zks_L&#keZEFt)MG=cPu!U%-jVie`dg9S-K< zEnYi%P=~ZIv1@|dL^8W;Pwx1bbYWat)GVnD;tgZO7t#mybc3mEzolnt z@rBM_S-g}Xem&#bu7*d8b{!MDe{7E{ZkgBF(o?Z_GQ}G+2la(l$ImzQdUe!d+248R z&*ydkhwm%98pN+~$};LQJlZrnae0f#QhK76_iY(!c+-%iBa(^vR8 zbmFFua{?SXhrgga1gY|IfACRArQeyK=FpD3mrs8VDb;%0oJ*u{(;KCVov<2u_=zEO z3A1K?9k@vR^Gz-ybI26}A8PGe z9|I}RrCrB?;TdOtIzh&Ga?P=y2%lX3)UfBk>&B~6fRPQUJ8vhrDdFTIB7agn%T93X zu}D2Id~}WEHyn}cfkN$8V&>*`YzIc`lGeSrNFSnGEq&i;CEspkZRnse-Y7VOKX-9 zeCXW0>xh2T;|YHv{OOF-9R&BMc+LVNUd`v{2u^x@chVm8Kjmvp28NaS6UBtTSTJQ4 zLGLSD<`8rjy;}^791QHV0hK8UC+xuRuI$34gx5d)R11*W`P&gI_ly-dtl!*AShs$} zYNEdDxqTaek%dp5e}O~it{B_)GT1QnMDWc$SnV9ap}N1lLkzb4j`wGxdFH8`4+E)B zH}H-VdHU{0J|+B%|9znRMGUyY8zKu}jnkR&3HZo`N8bE`$bT$vqxYhJmGf>HklH@d zvIQ7vnVRETA8IZaenQ!j*-) zIkY7Hp4Aa3mHO4IQugEU{D1SZ{5rSiv>UP5;q>~g=D=d7$7YY6dw6ADN)|sX6JH!V z*vXf+O-_jt{8oqEZ5Afu-8mtSyih6OFw*duqYZM^Ms?lyV3h5-rto}4kJwEzA+==N zNIDrsV+SNB{)BXhl-(lsWbP;H7k`fyujRD#w2i;~8GQrqwi|h3+~7+=e1jLh_(tw- z@h`rD#lQR}75{6?G4=l0D7BbQ-840YqSA6i30||uX7-ED@s?$=YkIM@(G_%i{8=+I z7iRTc?pcd+GP9!U==!&_R?D(HX15*oo}kMGi#n&>Wn0`=7WJ|?5b_ektf0qfZM505 zVC@OIy`gSJqou)a4G_889x&U?0W-{(W!XJ2cY5kc->L8Z=0V-`Qe4du&V_-VV!~h9 zRo4T2pugUleoniy)wht{;%m$;`T`&5vHNjORKKFj=sSF%zaDu%XF<#)pV;@EU$^*b z^TVM$&{Iq}h?~I{Bk>cJU(FGDgb(!8CH+-h^*a&+? zXZD52Vc-p!@n6PfGWRCC&+lyX5UDhGs${A(3mdj;>GV>{pqrkxoDwB6m6hd&nbDhB zv~A=x@@uUxW*DXdM1FG_CYMpwGa81eXVmqqOr`3hZzjT`ZoRq!wuHxz9fj5F7^ay) zKOh;TFduEgGfa6tY*jOKZdcHxtPIC?V*BHy{Y{jl-R;SiWEp8XauXO30&Gi^hYB8$h#_5%EtCoz* zI4v^$Qj?)f64gsxHmwF@R62AGjrI1B-eqx@HJP-Cm&ByjS63I+1j^NVMO}5D*jtIc zVY=Vh9hX|~HK}zZF1OiG?Kf!@!5V|qMeLBHC9FS>%3*O=I;>KK#BNk|>4LVxnpivo z<3)}nWepWhk_(HWT2f=Ebl9{--8rh8-%R4}@#WDwqH{BOO@@{wMp;)*8K>1<#ExUt zYEUyxYwx_6+$~Kt#tKQ&Fa>&xP)_FIavj*9!Nn3vGmu=>U;o-(Qt6G=W#As`(G6BJ={0iGSARYGY>B+t zY>dV}$T}*wYF*KJUpX&V-fPjkb?L9GRz{h_Yb`Eoz*!bMSFadbomuN@w#v#JHjToI z^T=h7?)iUD-eTvb)m@EqOA(5lvwpJyd(Ehqv*)8nB}W_k(#oy{o4Yz>mARTMPI3n8 zTFTV9%_R**PIA`idh#j5UT!rT6hU^323@dJo!hGONXWg%FvYR67T1`X+-D`^+EV$+ z-B!U~bM@d0*_+Ny?lUcW-{q4gd%x)xOrAE4utjS2QdAR7?0#gMx%jFE<`UAnCfvso zVx-QUhBh%|9E4|T$cG)DG-Bf|@Tk0YmQ^n$S2-J0c|ZRuRvNvZS=&;4RYhi_j*)cx zV@gSXqKNrpZ~k8Xw4`g-z9nrE5~hZ6+0by$BR}&n2uVkUj&({N7O_VG+hX&n#rZ9b zot5m5Fz+5aVO{Q?dE&6+h{u>N#(tk^0ez#LqBQQr76 zzQ`wvgZ|Cr-4y$=^I_&tqD-&Zw=`%EOtE^sQ`kpfX6%udDV>2Qo+NE!Iz?USAO3_# KQSE&7wTCfjJ9E<=Z^HPW>y%t8G>ZUT1trD+0-Hpv-Vaw8%EP; ztsi7c{)TTX(X$rQ8bH=}G|6fyYi~*4Fg6BHj!}Nyzm$BNe^8=XO9^p{nQh64b{PFO zLsCSz(rdcf{S~@=r*obFRE&eLDa3*&#mH{RSbV4gdK3PDq*XeNrc+PlP_D z@%$Qpio{yV7uq0;Yj~&@%9jY464(ddR0(`9gu3v?PrFLxs;iz6a()bZ`ClZ@9c!Xo zR!gmk(U4=qKXc9<-jLktNLTsmZmC{SOH4*`3y~x=hel~uG?YqD2VI&vaLu{&R;tIK zt1>W@kGOwqDYNKQAdcp_t|4}MhF6cZ4aS6>p6C}jt>53d__3F5*6kU{q7+q0&K0h$L&cG~I3%1HHcw_6N zo8Zafg%}lD!?uI5Fci7aRXPJptfevPeV8Kdr7gcXV7_UArp&I?(junBF})hqV~TY+ zv-8kBgi}?WYFHUWF6{i*%aF+iHi`Q|x^Ib7XdE3s>GsNoPwzzEIjWsw! z@Tt&Eb7WRM6)-bNi|jBg=jA)m4UlOCFt@qRaf=9hcp;#^Sc% z-|4K>(jYJ!ts(TXq_6J8*3(6IYL;39t6mMDAGtQG`Tr+wN#8U*8TwXrp7hyBSD@C^ zW`*`gpRd3>)TK#t!Sdv}S#l?agFRT9wS@BYbXaq+*GbEGmY|leb)`C{I)>8tOekMV zet`%$&rWfS@e$6pHXU<)Sf9{I=iyKj>AX9}S75-zyZHI=5SKN94n+Zp$$X@*W%%T? zu>)#v0`s_lLPGfv-cgVYa7}P%GX40H=)3Uwz@|&#Gmdf!>U_gKPpND^Q(2g~gsoFEZ; z(vY+a5JmeCj}-seR{Yd-qsdML{09FaF4TXzAA$ka`*?o>*D3Dy^3ZnqgQ4RC4L(61 z91iTC;=L((s6XV_T>c=?!_Q+oer*Q9YkCPhNG^E??E1I==Zjo!*I(|hpK`hP@N4}a DYK`T} literal 0 HcmV?d00001 diff --git a/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/_metadata b/tests/hipscat_import/data/malformed_catalogs/no_rowgroup_stats/_metadata new file mode 100644 index 0000000000000000000000000000000000000000..925d3f6717d1cbf510695837a100c9cf98b5052f GIT binary patch literal 4804 zcmcIo&2Jl35MPB5(hs4k!IneQl3J=IM4H%+-NqG_*xkhTn#5_`)L#fi`(=Bb-Su{D z$8oA$IB={uaH^^baYCp!-{g&nz=WnSzVkS+MLKv(QDw@Tw#6OQwi`B`o&!d2 zJ^?GHvJG2N?Hm5}=QGon8HP%|wW`@S=797`pEQ}ZwEYp7?5fb0Kq zVs>u!Y2X?~zZ+fxKzJj@-AH87677d=wdMcm!W?)MXQnRAq7%LeSm^&|#{a{G(97x2 z^oI++Ngv~zVBmELei-IN#21(6eCKiwVAu+BL^VK{4 z@81r+@Q0qwoB{LO_g%;PJ4Au&emnGXHuU4{8DPJdb79?`c5#2c68h@O8F2r&I_H~k zqUvM}1(p2u(37j7zvj*W`r-rESWGzA%-7dKzg#;5;^XTsBFR@5^0)cW)A=(%zKXe! zBk4}TAzNQx4t*UB{e9gF?|=L;WOQiS|7`I+hM8RmOj)9-h|R#C=N<*gnc0yWdfPCY z!8`Mx1&5DQ@biWFAZ63@r4p$}K~pqTkT=`94iXhj)s>xL8r8DX>RKomY@3?gP}Cqu zn{C7D_8ASSuF5T>8){2b#Fhx+kAkWR;+k2-wGXzFH&=p(hpsWhRs=N#*AH3Y&_=A# z%+{@#*M>fMzyx&dZ~-iF+zQ&FR|%z!D5>ZTajcpF#&L?^e0w!H#5b&}SD3x}200^2 z`_UTS$UgZ5-)QOgYV3CfWW7O^wElbbx%3fbZSZ6T<%j+2laKQcYKT{pLmZ=KTr#2_ zN+|_7ki|JX1cmSgmhG^x4t^12bp^&aXW7jakmYU*hTblx^08tx zyC)_HZ@KX^h84}atEboGF*&O89@&>1wK=kqWcl45U+@Z_-?sa)8(#kOGvf= zD_)Zd4{&{3DeO1bc~&bMx-O-170F1p%J)Go6^}@%4&ro5u)b8xR+O~fAv``4kqjO1 zbGo2)s(=@fid?+7oQumT#AP)pnt50*WW$sn(UnvZFmg4a-l(eGj4m13y>bfdMVt6c zKEGCKZSt8!B|l{Q0m>Jan_Ay4K4X=60qruxLcUo}CfcP!L`OM@(Js+vay7}w)#OMb ztQNT*udS?>9DBU>LX5)M`j&>VkP7*5sgSEFskMHLD)&2Rx2wy{)TnP|t6bRMEk=7W zWi{DYBRFy@344}27>rBV*xxG^x1w3<1H?blQS*a#mYWjn<-fAU$myl-Q8DTB#_gja zr4qH{;xb}ys|I_4K*Do3osK@t!f4H*U`&C&Xr$f0}c)8}EokYWtWx z+LTi|_50|!9DUYk+|?Nu>av*dKP4za5mS0cj%I60GGReI(p7rq|DU)y zbyGI-P`46Yr_MIT0-UvCCQj<3$;Dv|=TauQpcwhC9M$(E4QsHlpXFnn_4=BIwN7y3 z8HKZaAQlpBf>9UR8+`0QFvHmQST^m{T4-ZD_Oo?tTU--++ZJhaRhX}mPG?|0OObxJ zfV2pxn4d-kLUj!cq4&JuO zSt^9s~LqG!8Tk^3#yN1Dpt?fPV%~ zg6;rF4!R2Z0P;(q_=F5Y3lAv!Q@EGJ@0!IV)KtB@nE$DVo7UU_Q zov=*iBh>`ILqi#w}8l9XXOubo@ zHfC)4xbe4S+?tt{JwZHCa@(ZIIa8)ilgi}NbMx|N6ewoSx_$N?g-S-HE-Kb&OLY1< zrDf$6b1SRn&9AO87;Ebm+_~_sMfE1L#cFG4v^$y>J6-N(PfM$}&DY-1>F-){_tKx; z6S#L78@PC7e7caImPf@Urio%4R$)x)@|e_%$*IfY#m8we{m)D>EeG3)62?$?BmyXk zY_H+Mv$3VZc6N95VE#NFUE)>phSL2$_zTTGDXvpao=X?{a;ga zv&#<8t3B{Y{eP!A-{0$6czo$`CPgb+xqil^lWXs{Dc63kcxr#A>VICTn*8>b`zPo8}ls5m(}EDe1xyy=BpA;>u#v zh|~*l;=@VeeF>*6?y1$9wh8RX&6&A@sepDReDo!SAmh+UN{lFir@(A@2$0iW!P>2$iV+7HIDoSH-ZSF)m7>@0rGoX@%+r#$HcLudat|C*K9F4e z-m}UBMU~aI>LZ`k&(k=@zES=7H}}>{#wYGAI5=IOmiLA zt*yzzl$SVMUD_snBuX5TynSc>S0T+9&f&G^X^p@s3i3&M{_ zI-(aNpOWy2Lc~7Nh)npg>~vvF=tPJsZ*H~PLh1fl>T;g=YL@s&R{DS;enLhu5_iCm zfWob)pq5YU%U*1eBNWjZUg{G(@mjI?iwPv8aQA)#{cva_;f(yEik6Jns}zM>u#C*t z&fEGzP`J#Q|l=zaQEF;&%FQ~xvTtdz~Fdu z*OeD|33LHPDH9JqIP>?=W$ls{^5{~@!PDhngR=U0HX`1XzRb&`w6SZ**B)=c(gtL)zhCsN_E{a`x5Ef{zjQ%GpvRme0msN%B-5#04^B&QoEB# z?Q};6`hNZEt*ZgMAGGWP%=)P8eZcB7l}C8=`30+{KJWqQL#=-6Lm=h8wB;BuFyrj^ zC&(Djtoq=4!jGK5qEMSkw2rJWh1!ZgJ2^taCDXQ=RA?~ zfg9C}i1p{cyMmz3FZlIWSF^Uj?94Zla)Cg4EfiEg|P&KS7PIV)P?4{0)n5Ydu9Ta`t_>m6~9{o z4DM~)Yy~P);!fCrfi04v7Q*YFd$touZT{t`<=byfv|GNqkFYNNsFg&0&x^a(0fP&k zIsY<`&RuqE_p4w7)YHB3(hz1KDksH|Q$PEPm_`HOQ2{@2OB&jJSW z5BmxUANa!>HNk@Hw;$$7rHf>`#|V4;cP~D^)09X*{^BO=odwH_w(@9c>@QXZp;YQu zu1ML9Gil(b2zSnLYI4r?9uEJZpBxR(N03IViEEH|9!?j2b>P^(8yMd) z=n=Z;CZv{bOr}R;iP-_kO*0`KB4t>_p6~-?1LoDI#V1x;dfH<*_S@?Qzrx?>cgTOq z3!eC2^7bXZF0W4F>-erCzMd~9;v2JB|ooj#Y_n>jOML1yF`lDRM^Ba>5ehl-80T9WBDxoog^`!L^*=Veiw z-$M*DeQt-P*=oy#wcF?N_``~3bCb=|M&vGAo5^ZwGr@dGrp*m=hr5yVt)8!$QjocH zX;(j5JsV&VxB%x_#wwj`yaIyhT8l26hLEZFI zT+0y-)o4#4;dOS^`@t9OufJx1({Q#T7cwmVfw^&CX3-wQA8kZ+9bIzYSkeCawmI)cBi-6 zOl{4r>}lY1m|IP)esUOi{3iT~qJzx6-PY=LG`opZmOE8CRhEej+gm#Qlyc~%XD+2g z@k~`!g<&T5S|i$+JdOOi8cP_4sRWVV9EQndRE>;=VHz2ABP&y>BJ>?ZSRB@?D`87` z{Mb=gy^diz81w^@L5d2{COpGb6u?$BL+5h(>a;bvMtOO=xumAe>@IUuI819r$HfM#H7~O)D+jY zRjBofhMI`jyNJDEy4MkoORe|RsdXeSm&s7$tMUJH9O_dIk3$vj{T5G7XTeZdE95u}AAaVEka_jBf-0D1ahR(%C zd2db`hs9OQj$_ekux6UB{&}f$b+*?UE9K?f9FTumhpnc6oONC^*7Bc<#Z~33^$(Sk zux_w*beXjZ`?c~iAl|k*tRdzktqs>mr=nC%>a+j+H4Sw}z27`t$L1lbw$yrmZH>Zd zaqHVH4mR$=V)K~Y*UFQ#-+D&Ym)Tw8HJ6}{N+RlJG3xP$;)=2vYz-r7#a+{8DRK7S z8~x+{QTdwhQuwjA{X^ALgnqEvcGnP}xm^F~+O`@V{0&J zogEf=x!tN!cyJy$ZQP!}F?kD}n-*6M&Mk#Mbk2HB2JAJXK94;g-Kspau`exbEm&PO zev91MZg!9}Sl3yu&h02|Dt3^wPS=-DIregw$)NDDV>Ib}W$N56om)!oJ%%X>owc~e z)Z{)ZCD)e9OYXKx_L^%1U&P*YE^?n~+54`5G}-%2H-GZ9X@o6SvzMZpXhQcR+swsN z6`e~+8`^OnONo&>cN*Hnkg*evKT(AIu{VD|e_GPDci)mW2?5*DL2EE_(wj%@s|x6Kl>_EqUTnGn?Lfx&Y$g%AmP1{)Dfo6#$DOB zXhY41q)z3j!xQ<$;-G&Ad2b5+*!eKCDN%;U)Y{^+wN0^jJX6@0z>LsKUWRN2Uh$-9 UkEK)8Q2(g+@kge)_`fRu4~P8IUjP6A literal 0 HcmV?d00001 diff --git a/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.extra_rows.parquet b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.extra_rows.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a51234cac257e34bc0ddb1acf7dc25d9c40bea10 GIT binary patch literal 8904 zcmd5?3wTpiwmxY?Ie`XKXkv;5ODQ2jfuyvh0kJtrn>3*hT4<9Nk>=GVB}vmf+NQ`D z9mc^jj$Bl(7hm}Jw3p%IdJz#sFerog91#RYy@-OKcX)}6qJz%;_eq*IO^ePa%-npe z^Y8W8d#%0p+IyeVkNHfET*wu^ClKzX%Z1%ZLTWTc_0aTdNz^b7SDZX-IF~xW7f@3{ zv-vc2mM^A~(LM_L3_KS+4g4Qq4rG4;9S1K1{|ee;L7#xefo4E{8uGV+lYtcQ&%i0r zy$i|%{RsLH@^3(?{48oX@DglpfV>dxV`xu6`xdn2pfR9a(EEHD^%3wr&@A*j37bEm zy%%%{^aW@dZ2k&)0qA?s5y*3f)CZ8w0r9|JhFu!ke+0b& zJ`4P-Xn&6O*Pu5+e*^6S{Qx=vx{hB)iGVkO-h$szus;v_U+_uje-Z8Rpb_Bbf@XrI zgZ3jAp8{`$jRKenx`6%?*j|rz5oj0qzXMP4>6?p_DfXv~qL30knIznhluK$xAWRXa zWQyrtrI>y>DPk_7q=QJI`!~+{)nusdBn~~1;v^+=houasc_T)q@<&}KNE%yHvy%$krrF=vuwvh=1YQ*)WlJ?G}Rw-hTGmAa%Saj>++myQX^QsqXlfg>e~B;PN&`GG%7~^O0<9#NCdIQ8 z1SyyH@|4^!+R$MiI&4d$v4|_t5~qdmBz=y6%0PCMn2~P>(v%n^P0ONuyiRsI^zgT_ zNJce+NS_{*(@J0wNQ~vDf!kR5KJe>VyNBRS8VMBo)vT=@wmq<034cY<9bxsd-Z@Bn zd`e`1qE`Bp$j4ER@B}BWa#-aT0i~FZn6)WM+<9~>pAM9~0KDPWAldctaKRIj4iHGjBDIYqiSii4R^`9@*Onqz1 zy;FBq73`LIb}vkS>$-x911YVG4k&%|)GsVQuGn;H)^zT+P4f(+ETd2s`_eOoBYMli zYl@V`rV*kGDUuUul0$sdMPjH!OkmKu2oDo}pHwb1vUPC`#e%wMtPXQ<_-0|fkIm-^ zsP#xMVZv3Yjm9(qRml@@!{sOfC6cd96H^sDF*iI9W_luIt3t__a+Nyh%XsLig-!!f zgH#YG85oAsif|Sv(_~Z$kBmd7DKVlLorQ>Sc>KDqn|d}(m3EEC>aNJ_Tis-<%@SGOU9{-OVUqP3lDkE{ z%NrN#5HlEbc?H7sa$)3Zr8C(~;ekwvgkuwT0aVdEp%wEICj~Q78r=k(?j%amePiFb zMSD$4Y+i4xH#!fFXfH={ahzn&xZZihjWvXaf6YABW%td4howlI$chP)gIT|LBD65I zcR{$RNJrvg6jD+yQHa?m8u1A~os%g{ik=85m91W@Et>9MiSFh|exD;bIWcp<5WgU! z7%6hVkbq*XgrL?=?#o_si6a`(T8`*3j-*E-xipD{6zkq^pq~zHB%G07LeY{DdzIo? z3zqTudi$on(72TzM`xrsBe5NoN;XN^*ajJC(Ae<0`k0{K>uMuKg>Jw7$8*mCNA9fr zD=<9X+;#am4xcWfD5YTkn%Vyi-Nc=;Vh&v{-G90gY*^m7z(&NIGVk(n=+wtP%Jp;T zT>iqUAf(F2eSwD|m49n`hC|y6Upf65q*T`}bH5>d+h4CxY=+hF{Z9;|%b8URn!tsV zUubs{sh#fVK;N%_w{zT4h*joZN8P@V<{&V6Zuo>IW~eDJ`6VlLx)$% ze$5d(?<>)6AZ9M!kxfAAj>Gr8NX!nND&B!vPQG~P0I@##hsy}+{G#7|_2a}XFgx?@ zlzgC)^Wv!2fFa?l>kk9Nw|$4fk&YhMNEWo(p<{ z6NCImh}?JN%yYodp5rRci;#a^T%1fWd^tH4NL^@cC?fc&`p9gcavxJ*AlQ1LWFs)# zd3N^>VCcI~m=lD*^y&GuZO|WX{8$bQ$;^}01dD$wF#@U9?YkBey;NSmjNtv}?paIp zV;@iX6X8#fKYfJYu5|ZVVA!Mi>>R-XUN` zx=Pb?UXP)i^Qk#D>>hA3m1a`|;_Yl^lAJt9NcRasy z9WcD;sq-&#=={4T^t=K#L_HCBV>_yyBiPsUm$!+*#^3VZBbsNQzJ5QD`eYsNVr$ zx7FkmpW`jdW)F%bmR4uL<@ROI&RUcme}-f)&dth>s3V7pjkQ{q?KZh=uy+TXPFOTK zY)R z$TwG_zdrc@X93Klh}g%^FDAay{BWZsdWs4MaWhyk5$!! z(-QT+*gd<`y*87-)ytj+PKViR@&?Ia;0c=WCyNd;_ja4t=V)~ksXTwWY`Q!f8@9J} z`YH3Eo0+|w5~ngXHC2Y$k=GjW#xXO6-y}BBBsu?p;F zLoMkauo^mj&<8;)cf4#A?$y(;@V0CJ}-0X5< z{2Hgu(b0_Y36(meyulQ13LmD%%^h zh?m5q*4NdQ*88i}dPP%RTw6oGn!+)3<^qou4r zx5{pI)z~d^h0JDD_38rFlKN;oiSZ&wvdWff2g!xmP$#Q5)Yz@s(pZj~7Icuf`+T|e z_Q>2CJPn4tX0^rl$QbV`Wvu@n3JqNRwJE?ay6;X{`1!~H5m0l^GqF^ zhlJWv>x1=m3a7=bZ?`zuxCe{PV|HIDPtJbp8DC#!cb(5%hB_*XtDB{$$DfKT!Dg^E zjH?xQo!?UC?7uhq$NjVNwa}#qVs8hBs;4;pV72Y8BR=yZ{S#~3Y9P64{`s}Nw8j%% zm&85RryFd|T}WXG;q@OvXs;gVa%#MeB^rd+59jz1Jdn>(&3PwKC?}J(jY{ z7Mx|#b9KnrnoL?}hb6DlZq+C}IFFpR$ezD8d5fN#7FQk4Ek!VT&iYIS>@}mlfIT1G zssgmJFD+~>SY35NOP;gc>>y{buCr2|-%;LD>L6#Gt}mZ?*vnlegCf9=(V`1fsPnsY zZW+1v7^WWbZfK!l^T65Vlm! zUW#g>iQbQFGapY?VlE+VYR7#nBSz}{8E6wj#!h&qo;=v`Nh3PmLbu9eV_9_@xysp~ zss{L1vC_!>%-UAqsVX%Yb&M?LkEtO2i6ZKcz4`n3(~_>e`@cFH1fPuXwVIM=~jDsDIQ4_#@MN{2!M815IVwGynhq literal 0 HcmV?d00001 diff --git a/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.parquet b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/Norder=0/Dir=0/Npix=11.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e0cb8d948d9a14a7ac5937856204a9fa48684fad GIT binary patch literal 8880 zcmd5?3wV>|z5mjN@&y`fX%kZ@SlbdJ6i7l_8W5W=X`3eWLJMuuBGTO2q$FwDG`%4+ zvW+3k$-Hnlct;=YVLTj%h#(h(GQ=Ajg3u{b6cjzxZ(NDytMOGL|KMAT@CT1(Tf$56v~d`axE;e6_lFoBv3nkl5I zvqA|Ki}pvLPr>uS)4;y~Ga>sc=mdB<_*c;$1Ns;=4m1t&(~!RdoCu_Ve+Ev1?f^&$ zx(fOL@=Kt2VH!0Yco{ZhAoeF*#$Xa@Ry4x2xsy$^H* z^aW@NZ2ks$4(K~j4`f1&7exCU+J`|h^i2RBgZxY2Xy6{`M}ocu-44otJQ=hTwkhDx zfF^*3K^6zv5BWF1Z1fcY-vFjVzYBB}{JWs9Ks=$0dLOb`AOZL*uuDYy51==}r-6SB z?a$Eu8uS+E@1VV)??ER)HwlX=G4M9f+weOI_UA$W2R;V3-Tsdliv2qkLJ=ieA0yfjlSS4_f+$WDmnxy}h~30I#G7pq{&%RrcRS9l+&|wa%beJX3n~O_8s|ZMx!k#EYcP04RcCL%gX0g zRL+}URc$oY)GoMl;a!XB%oeN7UfXqwyLym6k4G2#)V$#|MYjrQY8$G4S(w0=`2`XzADUVVT0t;ThTjelq4?O-Vqq#5Bq$=wP=&w{RPaWK-hNw5rP;5JsiAN*6S-CB5)ae+d=nzePnwi|XU;4crl9#${!%|X`fQ?nIb;Zvg? zAMr#dc@ZmzRes@DOK8rlSxr_>v=ox2^PbSciOW!@(XkcDMa^mVZI<8vH8nT0^zgiz z1CP}Gx7hXmUiZS|OOG?jI?2lQGbWu}d%s=1_H)%!`#UuM^GfC9x3}Csc~^PP9=UhV z{N%T9$|*e**Rd1f&_lB1dBk8;;R!SRH;D156*#^fe6`Ep%%*cYCZJD0`$~DR|L|6G!Up67zPvb za0aLoy$`0M5z)Kaej-;|Fj8XWkxU^AF4l z4n4a8RWMX0WEL(?gcO7HpIr*x)8hFRlGpF0f_0h9)3UcV%SxKF4;)fIkW~2Iv+4r{ z6;<}CBcIjHD{_u~qw4W*?yZxJPuQDxaJnHS=Qj@)RerXw>fs{ynm_xi8y>%_^R5Q} z)5o3bPJOv>{JO5&x;IRgk!>lxIJR$gV{LXTH@&-Q@zr6{)hW_TqTcC^h;@hz2A$pn zQF4ihyHe>?wo>>(rNp8!(YpYuP@T{Udx?;O87T{G0^af%O8VXSzH^IiF)y;Wv^2GZ z^5BX0@uVlmNwrSPmxlmT5uM1zH0j>7pPUFSitn8eemu$%JsG)_ zj87C2_7{!Ff*;FF6~%;3gt)S%7Mnd(?w^U5^Q2cZq(?GR2MqBODvFV~1BL_?Zbb#P zd}3eqVhfz1h}Q7LPw=E`rP41ZkdVUN`wjHNp^bzy@)xye$%ws5Nw@{e$a?L(tuHiw znb+CDElw`B12X9oGB&nBMjA9WJX5dU)bE*EM^S;h@4kBO1>neCWq$(($6LCtyucIE zc@(8iIQZbq-$R$NOPgL&rcvI>!ACHcI;=`;~9-SqeU)~9+`bnSv zQAicvn4ja(j@(yIe+ntpb;s;Wq;K0BrK-)a8h-HUVRQ+zYF<6KVC+k6E+Tc%?d|CM z^{=QtIt#%;nC+8teX132c!?R`mGOvl;_fxW5B?Sv)`W}V?49! zgYOAHy8PJ@FM`)+YLbD$b>hu;5_}@=-`E`JM9y>i1t+kk-u*ZM*;^RBb3H0MHZo=MKu)JU^kCw;&Vr39YwPEFo@^9-BRMPQH^QUY{hrG*fzHQs@ki z%5AW7GG${9C5iJ{9d@@_G#Vcu6Jp3`4J8^u8ZBn7LEb!^F8p-h*uEPW-!bSBy67f| zOExCaqcO$ofaIo`kP4AHEMiai0kQ#k^=a{mm6DqB*p2=6y1`HQ8~uj-mpt&K|B~02 z^twEqq}TDSBfXxF6X}iF%i{Oc*e6n2{HZdTqEfOXabB~>X7)+W36`X@2gM?5lgsb+ z_|j*lEl7_%L(&&!rKNLf?ohF_R!h=7X15*o9>2>4i+ZQsWm^;} z)6!yY=_H4Nx6_P&qG%_%x7k~K&L$6$Dzc}_rz+C1VSATOKcy17>FG-;Njy_oS#F%k zJ!>Qzlctfsu7+ZUVJbl6Zw|v`Gnxjbh+!HSZ38RQXd?9OL|7QsYb#(&c>J-WuzEei zv@_@jWCsEE8r7hsX|FNnEUqz?GE8|M`f6tA-7bHvt~%SKENinASGQU`rOtAv#-ewr zn#?9mSBgTnSxchv03=S^0T5qkfW3fru zTT{kqbr-VZSan61v!bs4b*XiCwAGj@lx18FsK27!Ufn;=TAu}T`A@~-u5{IO4poz| zZm@ZDTXZVNwdyh;-qu>oA##$}gx5%iszgiHXaD?*>T6AgPRn#XTZgE%r8RWcRI6N8 zkD<-#WaAzzHm}8Vtvb2-&1dBLvUsX}mSU`<;)r##80+ze;)=2vYz`yVil@5OTI}k- zH~Po@qv|!^t?IunT^vO>W=}(#7%~pRGvw0b#wU%?c=J6PubpMJ&EzU)gDM~3U&Bhd`d^gSJr6_M(2 zUMoUpBk@wuh)!JjxK8s!e5ih!-uDI>i~NyJn!fXui`0isBz}geB0t)u5U*i62+v9T z9V^yqiLQY(C6vN|NR^w>10hty8RC?-ceCm==2ErE$-{l*dvjYN-FH N_wkQRbMgPG{6A|>(+~gv literal 0 HcmV?d00001 diff --git a/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_common_metadata b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_common_metadata new file mode 100644 index 0000000000000000000000000000000000000000..4cf7a744a1681971dbfc1dc15ea897e169977525 GIT binary patch literal 4018 zcmcInTW{J}5JqiPwX14ZYL$veeTXdeY1;+}bVH?;_SgYqHX((kwsEzJd;?5uE&7wTCfjJ9E<=Z^HPW>y%t8G>ZUT1trD+0-Hpv-Vaw8%EP; ztsi7c{)TTX(X$rQ8bH=}G|6fyYi~*4Fg6BHj!}Nyzm$BNe^8=XO9^p{nQh64b{PFO zLsCSz(rdcf{S~@=r*obFRE&eLDa3*&#mH{RSbV4gdK3PDq*XeNrc+PlP_D z@%$Qpio{yV7uq0;Yj~&@%9jY464(ddR0(`9gu3v?PrFLxs;iz6a()bZ`ClZ@9c!Xo zR!gmk(U4=qKXc9<-jLktNLTsmZmC{SOH4*`3y~x=hel~uG?YqD2VI&vaLu{&R;tIK zt1>W@kGOwqDYNKQAdcp_t|4}MhF6cZ4aS6>p6C}jt>53d__3F5*6kU{q7+q0&K0h$L&cG~I3%1HHcw_6N zo8Zafg%}lD!?uI5Fci7aRXPJptfevPeV8Kdr7gcXV7_UArp&I?(junBF})hqV~TY+ zv-8kBgi}?WYFHUWF6{i*%aF+iHi`Q|x^Ib7XdE3s>GsNoPwzzEIjWsw! z@Tt&Eb7WRM6)-bNi|jBg=jA)m4UlOCFt@qRaf=9hcp;#^Sc% z-|4K>(jYJ!ts(TXq_6J8*3(6IYL;39t6mMDAGtQG`Tr+wN#8U*8TwXrp7hyBSD@C^ zW`*`gpRd3>)TK#t!Sdv}S#l?agFRT9wS@BYbXaq+*GbEGmY|leb)`C{I)>8tOekMV zet`%$&rWfS@e$6pHXU<)Sf9{I=iyKj>AX9}S75-zyZHI=5SKN94n+Zp$$X@*W%%T? zu>)#v0`s_lLPGfv-cgVYa7}P%GX40H=)3Uwz@|&#Gmdf!>U_gKPpND^Q(2g~gsoFEZ; z(vY+a5JmeCj}-seR{Yd-qsdML{09FaF4TXzAA$ka`*?o>*D3Dy^3ZnqgQ4RC4L(61 z91iTC;=L((s6XV_T>c=?!_Q+oer*Q9YkCPhNG^E??E1I==Zjo!*I(|hpK`hP@N4}a DYK`T} literal 0 HcmV?d00001 diff --git a/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_metadata b/tests/hipscat_import/data/malformed_catalogs/wrong_files_and_rows/_metadata new file mode 100644 index 0000000000000000000000000000000000000000..42c25a75d9d52403b970626e604a8028c77f1a2a GIT binary patch literal 7541 zcmcgxO>7&-6<*1zz_@OaMkYa_av(7E)@f{uq)A6K>O#9Kkz7`yEJo&EQK-AiCArk@ zlID*{%5f3&U<5uDJrq3zIpk~(XkFoL27qwmcum;a&( zQCvu?nfKnz_q{i7-t4X(a0#DtY4v%KaWB`DqH0LaHyG#A^+orhmSdcku~C(1<5Esl zsLoN9j?`+kT&3ODK+)+h!ieQ~xtdd}*Im2MR~&CJ47IwntyQnDfoRJXEyXKZYe4qh zqoP)+DN_ANGjnQ>*7cj^)gP~Z#OLGbJ6v5(e+}*^{XrFKOL-N&F%QfC7=hF+wtilZMm2mBc#mgP3Mx6 zaV|1&dmaA3kSU1#>FS#Ea)(G&qC|dw+4Y}g_V3?gzxWO?A?RNu=Jcv zUVOjH1Q(Q2`OM*Z`7QR>tL!(<4wVl=pIcOZaqziKrB8ZJD*t=4yE=A8S$q~;Uo5cy zzQ+FXT89rtoQ4BhHlIG}Ir%*Eboq2sx|h(4b@tEe^CR?+AFMeSt*o*36lL>E*V6^| z`;BZY*JvxXJyT5qSE;ZUk_35xUE9PS{W1?!uCi z(2M)m8|=q72=2GhPq^HUPV1bw|GnSE9Vk1L!CzKfpRBOo++)A|k>&YT?AeD`KSR$y zc>n#EU;kkQ1J~cSpvK{Xb4&@F&x6(^^b0Ojli)A9Hci5x zOF5cEVm=jU5{uasok=LOtTU6aW?EV%fz73w+;lHjBr_+~++V-;*h60Oj+C>bawS8AUm&w<~DqX_%T)&Z!=- zHXBBzZ7a%hNmc47H`Kb6lj;(fKlZ35n8WiKj(vC-zP0Ij@}x_qpM^)u;rKolmTZ89 zX4dZphB?^c9t!B%lbgWAa?4YdhC!%o03@%MrI9c_inD;=y>J-G63!T^^Ce*zuU9hw zYDcSoA#L#l-{`lBVMO~g2C8NVNe|CqysmrztO1^EqJ6)Aw)ian;I(ZS$r6r$8I_EP zL*-`y;dhr|q>gl^Py;1lXr~S9&kav@dR3L`5K z*+i)?aNB#^=Yb8|Nu3_0A9Qy-PZ;+VE|J(z-J_q{+|O4w@H#C-IgUGkU%Y5h1(?q#r}<#|P9ms8a9&Q6{jo>cRNPDTK0Ox+!yJjCSSlCPc1)Lz_(?Vb ze36q7OD1O9vXpLQW;UnN*^fi|p7j%D>k{-d!v+R;0dVE7?@* zIPLGml+zSrhs>iy!mwuH)84pbjn+vfec+E%A0Ym|rkd>KnXSpNmQ!&t5_+aRE+)2a z)H)h+Bvd>rE(3h)Sy)5x$(ylrq!ro~Fh6_!59hP#SX;g$k~j?ImJn-ak|A9&V<(D6 zc#q;%kr1z|6e}2hkvL;6%kD`5fa}4=7iPwgMgH0(- zxsS-BeI=q(zsJXA{9dDR>*!x;tI$M)cu-DSDAgPZgAA~mSONHb@&PcWuzkVWXScAn@TnIFGOFJ6Y zI_sCXumNo}hYvwURsxB0~chScAgQ4RqS-3+yVR2x56Q7%`i}=IcXWZ-7_pVz?rE-fr f1g={TdF#G+;StZf`q3K0jQe4 None: + """Generate malformed catalogs to be used as test data for verification. + This only needs to be run once unless/until it is desirable to regenerate the dataset. + """ + Generate.run(valid_catalog_dir=valid_catalog_dir, malformed_catalogs_dir=malformed_catalogs_dir) + + +@attrs.define +class ValidBase: + dataset: pyarrow.dataset.Dataset = attrs.field() + frag: pyarrow.dataset.FileFragment = attrs.field() + tbl: pyarrow.Table = attrs.field() + schema: pyarrow.Schema = attrs.field() + valid_catalog_dir: Path = attrs.field() + malformed_catalogs_dir: Path = attrs.field() + insert_dir: str = attrs.field(factory=str) + + @classmethod + def from_dirs(cls, valid_catalog_dir: Path, malformed_catalogs_dir: Path) -> "ValidBase": + valid_ds = pyarrow.dataset.parquet_dataset(valid_catalog_dir / "_metadata") + valid_frag = next(valid_ds.get_fragments()) + valid_tbl = valid_frag.to_table() + return cls( + dataset=valid_ds, + frag=valid_frag, + tbl=valid_tbl, + schema=valid_tbl.schema, + valid_catalog_dir=valid_catalog_dir, + malformed_catalogs_dir=malformed_catalogs_dir, + ) + + @property + def fmeta(self) -> Path: + return self.malformed_catalogs_dir / self.insert_dir / "_metadata" + + @property + def fcmeta(self) -> Path: + return self.malformed_catalogs_dir / self.insert_dir / "_common_metadata" + + @property + def fdata(self) -> Path: + frag_key = Path(self.frag.path).relative_to(self.valid_catalog_dir) + return self.malformed_catalogs_dir / self.insert_dir / frag_key + + +@attrs.define +class Generate: + def run( + self, + valid_catalog_dir: Path = VALID_CATALOG_DIR, + malformed_catalogs_dir: Path = MALFORMED_CATALOGS_DIR, + ) -> None: + """Generate malformed catalogs to be used as test data for verification. + This only needs to be run once unless/until it is desirable to regenerate the dataset. + """ + if malformed_catalogs_dir.is_dir(): + print(f"Output directory exists. Remove it and try again.\n{malformed_catalogs_dir}") + return + print(f"Generating malformed catalogs from valid catalog at {valid_catalog_dir}...") + + valid = ValidBase.from_dirs( + valid_catalog_dir=valid_catalog_dir, malformed_catalogs_dir=malformed_catalogs_dir + ) + generate = Generate() + generate.valid_truth(valid) + generate.bad_schemas(valid) + generate.no_rowgroup_stats(valid) + generate.wrong_files_and_rows(valid) + + def malformed(self, valid: ValidBase) -> None: + """Case: