diff --git a/CHANGELOG.md b/CHANGELOG.md index 9163f9d..6a0f07b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# v0.4.3 + +- Fix issue where passthrough table columns could be marked for compaction + # v0.4.2 - Refactored temporary table creation to use sqlalchemy constructs diff --git a/subsetter/_version.py b/subsetter/_version.py index df12433..f6b7e26 100644 --- a/subsetter/_version.py +++ b/subsetter/_version.py @@ -1 +1 @@ -__version__ = "0.4.2" +__version__ = "0.4.3" diff --git a/subsetter/sampler.py b/subsetter/sampler.py index 9d90c48..db45393 100644 --- a/subsetter/sampler.py +++ b/subsetter/sampler.py @@ -691,6 +691,7 @@ def __init__(self, source: DatabaseConfig, config: SamplerConfig) -> None: self.source_engine = source.database_engine(env_prefix="SUBSET_SOURCE_") self.compact_columns: Dict[Tuple[str, str], Set[str]] = {} self.temp_tables = TempTableCreator() + self.passthrough_tables: Set[str] = set() def sample( self, @@ -699,6 +700,7 @@ def sample( truncate: bool = False, create: bool = False, ) -> None: + self.passthrough_tables = set(plan.passthrough) meta, _ = DatabaseMetadata.from_engine(self.source_engine, list(plan.queries)) if self.config.infer_foreign_keys != "none": meta.infer_missing_foreign_keys( @@ -740,6 +742,11 @@ def _get_compact_columns( "Table %s has columns configured for compaction but is not found", table, ) + elif table in self.passthrough_tables: + LOGGER.warning( + "Cannot compact columns on passthrough table %s", + table, + ) else: compact_columns[table_key] = set(cols) @@ -752,6 +759,8 @@ def _get_compact_columns( for table_key, table_meta in meta.tables.items(): if len(table_meta.primary_key) != 1: continue + if f"{table_key[0]}.{table_key[1]}" in self.passthrough_tables: + continue col = table_meta.table_obj.columns[table_meta.primary_key[0]] if not issubclass(col.type.python_type, int): # type: ignore