Skip to content

Commit a0ae2d5

Browse files
authored
Merge pull request #9 from lsst-dm/tickets/DM-34970
DM-34970: Add support for removed datasets in UUID migration script
2 parents 440ef46 + 1864a10 commit a0ae2d5

File tree

1 file changed

+26
-2
lines changed

1 file changed

+26
-2
lines changed

migrations/_oneshot/datasets/int_1.0.0_to_uuid_1.0.0/2101fbf51ad3.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,8 @@ def _gen_refs(metadata: sa.schema.MetaData) -> Iterator[Tuple[str, str, int, Dic
286286
run = _get_table(metadata, "run")
287287
collection = _get_table(metadata, "collection")
288288

289+
dataset_ids = set()
290+
289291
tag_tables = [table for table in metadata.tables.values() if table.name.startswith("dataset_tags_")]
290292
for table in tag_tables:
291293

@@ -331,7 +333,29 @@ def _gen_refs(metadata: sa.schema.MetaData) -> Iterator[Tuple[str, str, int, Dic
331333
dstype_name = row[col_dataset_type_name]
332334
dataset_id = row[col_dataset_id]
333335
dataId = dict((col.name, row[col]) for col in dim_cols)
336+
dataset_ids.add(dataset_id)
337+
338+
yield run_name, dstype_name, dataset_id, dataId
334339

340+
# Also look at removed datasets that are only known to datastore.
341+
removed_ids = set()
342+
for table_name in ("file_datastore_records", "dataset_location_trash"):
343+
table = _get_table(metadata, table_name)
344+
col_dataset_id = table.columns["dataset_id"]
345+
sql = sa.select(col_dataset_id).select_from(table)
346+
_LOG.debug("sql: %s", sql)
347+
result = metadata.bind.execute(sql)
348+
for row in result:
349+
dataset_id = row[col_dataset_id]
350+
if dataset_id not in dataset_ids:
351+
removed_ids.add(dataset_id)
352+
if removed_ids:
353+
_LOG.debug("found %s removed datasets", len(removed_ids))
354+
# Run name and dataset type name can be anything that is non-raw.
355+
run_name = ""
356+
dstype_name = ""
357+
dataId = {}
358+
for dataset_id in removed_ids:
335359
yield run_name, dstype_name, dataset_id, dataId
336360

337361

@@ -399,13 +423,13 @@ def _fill_uuid_column(table: sa.schema.Table, map_table: sa.schema.Table) -> Non
399423
sql = table.update().values(
400424
id_uuid=sa.select([map_table.columns.uuid]).where(
401425
map_table.columns.id == table.columns.id
402-
)
426+
).scalar_subquery()
403427
)
404428
else:
405429
sql = table.update().values(
406430
dataset_id_uuid=sa.select([map_table.columns.uuid]).where(
407431
map_table.columns.id == table.columns.dataset_id
408-
)
432+
).scalar_subquery()
409433
)
410434
op.get_bind().execute(sql)
411435
_LOG.debug("Filled uuids in table %r", table.name)

0 commit comments

Comments
 (0)