Skip to content

Commit

Permalink
Rename cleanup_temp_tables to cleanup_tables in warehouse and catalog (
Browse files Browse the repository at this point in the history
  • Loading branch information
amritghimire authored Aug 2, 2024
1 parent 2ca19a4 commit 53f1dca
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 19 deletions.
12 changes: 5 additions & 7 deletions src/datachain/catalog/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,16 +1217,14 @@ def remove_dataset_version(
def get_temp_table_names(self) -> list[str]:
return self.warehouse.get_temp_table_names()

def cleanup_temp_tables(self, names: Iterable[str]) -> None:
def cleanup_tables(self, names: Iterable[str]) -> None:
"""
Drop tables created temporarily when processing datasets.
Drop tables passed.
This should be implemented even if temporary tables are used to
ensure that they are cleaned up as soon as they are no longer
needed. When running the same `DatasetQuery` multiple times we
may use the same temporary table names.
This should be implemented to ensure that the provided tables
are cleaned up as soon as they are no longer needed.
"""
self.warehouse.cleanup_temp_tables(names)
self.warehouse.cleanup_tables(names)
self.id_generator.delete_uris(names)

def create_dataset_from_sources(
Expand Down
2 changes: 1 addition & 1 deletion src/datachain/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,7 @@ def garbage_collect(catalog: "Catalog"):
print("Nothing to clean up.")
else:
print(f"Garbage collecting {len(temp_tables)} tables.")
catalog.cleanup_temp_tables(temp_tables)
catalog.cleanup_tables(temp_tables)


def completion(shell: str) -> str:
Expand Down
4 changes: 2 additions & 2 deletions src/datachain/data_storage/metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def init(self, uri: StorageURI) -> None:
def close(self) -> None:
"""Closes any active database or HTTP connections."""

def cleanup_temp_tables(self, temp_table_names: list[str]) -> None:
def cleanup_tables(self, temp_table_names: list[str]) -> None:
"""Cleanup temp tables."""

def cleanup_for_tests(self) -> None:
Expand Down Expand Up @@ -457,7 +457,7 @@ def close(self) -> None:
"""Closes any active database connections."""
self.db.close()

def cleanup_temp_tables(self, temp_table_names: list[str]) -> None:
def cleanup_tables(self, temp_table_names: list[str]) -> None:
"""Cleanup temp tables."""
self.id_generator.delete_uris(temp_table_names)

Expand Down
10 changes: 4 additions & 6 deletions src/datachain/data_storage/warehouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -915,14 +915,12 @@ def get_temp_table_names(self) -> list[str]:
if self.is_temp_table_name(t)
]

def cleanup_temp_tables(self, names: Iterable[str]) -> None:
def cleanup_tables(self, names: Iterable[str]) -> None:
"""
Drop tables created temporarily when processing datasets.
Drop tables passed.
This should be implemented even if temporary tables are used to
ensure that they are cleaned up as soon as they are no longer
needed. When running the same `DatasetQuery` multiple times we
may use the same temporary table names.
This should be implemented to ensure that the provided tables
are cleaned up as soon as they are no longer needed.
"""
for name in names:
self.db.drop_table(Table(name, self.db.metadata), if_exists=True)
Expand Down
4 changes: 2 additions & 2 deletions src/datachain/query/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,10 +1201,10 @@ def cleanup(self) -> None:
# implementations, as errors may close or render unusable the existing
# connections.
metastore = self.catalog.metastore.clone(use_new_connection=True)
metastore.cleanup_temp_tables(self.temp_table_names)
metastore.cleanup_tables(self.temp_table_names)
metastore.close()
warehouse = self.catalog.warehouse.clone(use_new_connection=True)
warehouse.cleanup_temp_tables(self.temp_table_names)
warehouse.cleanup_tables(self.temp_table_names)
warehouse.close()
self.temp_table_names = []

Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -1120,7 +1120,7 @@ def test_garbage_collect(cloud_test_catalog, from_cli, capsys):
captured = capsys.readouterr()
assert captured.out == "Garbage collecting 4 tables.\n"
else:
catalog.cleanup_temp_tables(temp_tables)
catalog.cleanup_tables(temp_tables)
assert catalog.get_temp_table_names() == []


Expand Down

0 comments on commit 53f1dca

Please sign in to comment.