From 02f62af267ae17ed75e31057378845edfa45902c Mon Sep 17 00:00:00 2001 From: Kyle Mumma Date: Tue, 25 Jun 2024 10:20:06 -0500 Subject: [PATCH] writing complete --- snuba/cli/migrations.py | 24 +++--- snuba/migrations/autogeneration/diff.py | 76 +++---------------- snuba/migrations/autogeneration/main.py | 70 +++++++++++++++-- ...n.py => test_generate_python_migration.py} | 31 ++++---- tests/migrations/autogeneration/test_ui.py | 18 ++++- 5 files changed, 116 insertions(+), 103 deletions(-) rename tests/migrations/autogeneration/{test_generate_migration.py => test_generate_python_migration.py} (90%) diff --git a/snuba/cli/migrations.py b/snuba/cli/migrations.py index 4159d68d794..359148c189a 100644 --- a/snuba/cli/migrations.py +++ b/snuba/cli/migrations.py @@ -384,15 +384,19 @@ def add_node( @migrations.command() @click.argument("storage_path", type=str) -@click.option( - "--name", - type=str, -) -def generate(storage_path: str, migration_name: Optional[str] = None) -> None: +@click.option("--name", type=str, help="optional name for the migration") +def generate(storage_path: str, name: Optional[str] = None) -> None: """ - Given a path to modified storage yaml definition (inside of snuba repo), - creates a snuba migration for the given modifications. - The migration will be written into the local directory. The user is responsible for making + Given a path to user-modified storage.yaml definition (inside snuba/datasets/configuration/*/storages/*.py), + and an optional name for the migration, + generates a snuba migration based on the schema modifications to the storage.yaml. + + Currently only column addition is supported. + + The migration is generated based on the diff between HEAD and working dir. Therefore modifications to the + storage should be uncommitted in the working dir. + + The generated migration will be written into the local directory. The user is responsible for making the commit, PR, and merging. """ expected_pattern = r"(.+/)?snuba/datasets/configuration/.*/storages/.*\.(yml|yaml)" @@ -401,5 +405,5 @@ def generate(storage_path: str, migration_name: Optional[str] = None) -> None: f"Storage path {storage_path} does not match expected pattern {expected_pattern}" ) - autogeneration.generate(storage_path, migration_name=migration_name) - click.echo("This function is under construction.") + path = autogeneration.generate(storage_path, migration_name=name) + click.echo(f"Migration successfully generated at {path}") diff --git a/snuba/migrations/autogeneration/diff.py b/snuba/migrations/autogeneration/diff.py index c7f19c337ef..f3d188f9d9a 100644 --- a/snuba/migrations/autogeneration/diff.py +++ b/snuba/migrations/autogeneration/diff.py @@ -1,12 +1,7 @@ -import os from typing import Any, Sequence, cast -import yaml -from black import Mode, format_str # type: ignore - from snuba.clusters.storage_sets import StorageSetKey from snuba.datasets.configuration.utils import parse_columns -from snuba.migrations import group_loader from snuba.migrations.columns import MigrationModifiers from snuba.migrations.operations import AddColumn, DropColumn, OperationTarget from snuba.utils.schemas import Column, ColumnType, SchemaModifiers @@ -16,27 +11,21 @@ """ -def generate_migration( - oldstorage: str, newstorage: str, migration_name: str = "generated_migration" +def generate_python_migration( + oldstorage: dict[str, Any], newstorage: dict[str, Any] ) -> str: """ - Given 2 storage.yaml files, representing the diff of a modified storage.yaml - i.e. original and modified, generates a python migration based on the changes - and writes it to the correct place in the repo. Returns the path where the file - was written. + Input: + 2 storage.yaml files in yaml.safe_load format. + These representing the diff of a modified storage.yaml i.e. original and modified + + Generates and returns a python migration based on the changes. """ - old_stor_dict = yaml.safe_load(oldstorage) - new_stor_dict = yaml.safe_load(newstorage) - forwards, backwards = storage_diff_to_migration_ops(old_stor_dict, new_stor_dict) - migration = _migration_ops_to_migration(forwards, backwards) - return _write_migration( - migration, - StorageSetKey(new_stor_dict["storage"]["set_key"]), - migration_name, - ) + forwards, backwards = _storage_diff_to_migration_ops(oldstorage, newstorage) + return _migration_ops_to_migration(forwards, backwards) -def storage_diff_to_migration_ops( +def _storage_diff_to_migration_ops( oldstorage: dict[str, Any], newstorage: dict[str, Any] ) -> tuple[list[AddColumn], list[DropColumn]]: """ @@ -119,51 +108,6 @@ def backwards_ops(self) -> Sequence[operations.SqlOperation]: """ -def _write_migration( - migration: str, - storage_set: StorageSetKey, - migration_name: str, -) -> str: - """ - Input: - migration - python migration file (see snuba/snuba_migrations/*/000x_*.py for examples) - storage_set - the key of the storage-set you are writing the migration for - Writes the given migration to a new file at the correct place in the repo - (which is determined by storage-set key), and adds a reference to the new migration - in the group loader (snuba/group_loader/migrations.py) - """ - # make sure storage_set migration path exist - path = "snuba/snuba_migrations/" + storage_set.value - if not os.path.exists(path): - raise ValueError( - f"Migration path: '{path}' does not exist, perhaps '{storage_set.value}' is not a valid storage set key?" - ) - - # grab the group_loader for the storage set - group_loader_name = ( - "".join([word.capitalize() for word in storage_set.value.split("_")]) + "Loader" - ) - loader = getattr(group_loader, group_loader_name)() - assert isinstance(loader, group_loader.GroupLoader) - - # get the next migration number - existing_migrations = loader.get_migrations() - if not existing_migrations: - nextnum = 0 - nextnum = int(existing_migrations[-1].split("_")[0]) + 1 - - # write migration to file - newpath = f"{path}/{str(nextnum).zfill(4)}_{migration_name}.py" - if os.path.exists(newpath): - # this should never happen, but just in case - raise ValueError( - f"Error: The migration number {nextnum} was larger than the last migration in the group loader '{group_loader_name}', but the migration already exists" - ) - with open(newpath, "w") as f: - f.write(format_str(migration, mode=Mode())) - return newpath - - def _is_valid_add_column( oldstorage: dict[str, Any], newstorage: dict[str, Any] ) -> tuple[bool, str]: diff --git a/snuba/migrations/autogeneration/main.py b/snuba/migrations/autogeneration/main.py index d55487ac8a8..6d0aefb952f 100644 --- a/snuba/migrations/autogeneration/main.py +++ b/snuba/migrations/autogeneration/main.py @@ -2,20 +2,29 @@ import subprocess from typing import Optional -from snuba.migrations.autogeneration.diff import generate_migration +from black import Mode, format_str # type: ignore +from yaml import safe_load + +from snuba.clusters.storage_sets import StorageSetKey +from snuba.migrations import group_loader +from snuba.migrations.autogeneration.diff import generate_python_migration def generate(storage_path: str, migration_name: Optional[str] = None) -> str: # load into memory the given storage and the version of it at HEAD - new_storage, old_storage = get_working_and_head(storage_path) + tmpnew, tmpold = get_working_and_head(storage_path) + new_storage = safe_load(tmpnew) + old_storage = safe_load(tmpold) # generate the migration operations - if migration_name: - return generate_migration( - old_storage, new_storage, migration_name=migration_name - ) - else: - return generate_migration(old_storage, new_storage) + migration = generate_python_migration(old_storage, new_storage) + + # write the migration and return the path + return write_migration( + migration, + StorageSetKey(new_storage["storage"]["set_key"]), + migration_name if migration_name else "generated_migration", + ) def get_working_and_head(path: str) -> tuple[str, str]: @@ -58,3 +67,48 @@ def get_working_and_head(path: str) -> tuple[str, str]: working_file = f.read() return (working_file, head_file) + + +def write_migration( + migration: str, + storage_set: StorageSetKey, + migration_name: str, +) -> str: + """ + Input: + migration - python migration file (see snuba/snuba_migrations/*/000x_*.py for examples) + storage_set - the key of the storage-set you are writing the migration for + Writes the given migration to a new file at the correct place in the repo + (which is determined by storage-set key), and adds a reference to the new migration + in the group loader (snuba/group_loader/migrations.py) + """ + # make sure storage_set migration path exist + path = "snuba/snuba_migrations/" + storage_set.value + if not os.path.exists(path): + raise ValueError( + f"Migration path: '{path}' does not exist, perhaps '{storage_set.value}' is not a valid storage set key?" + ) + + # grab the group_loader for the storage set + group_loader_name = ( + "".join([word.capitalize() for word in storage_set.value.split("_")]) + "Loader" + ) + loader = getattr(group_loader, group_loader_name)() + assert isinstance(loader, group_loader.GroupLoader) + + # get the next migration number + existing_migrations = loader.get_migrations() + if not existing_migrations: + nextnum = 0 + nextnum = int(existing_migrations[-1].split("_")[0]) + 1 + + # write migration to file + newpath = f"{path}/{str(nextnum).zfill(4)}_{migration_name}.py" + if os.path.exists(newpath): + # this should never happen, but just in case + raise ValueError( + f"Error: The migration number {nextnum} was larger than the last migration in the group loader '{group_loader_name}', but the migration already exists" + ) + with open(newpath, "w") as f: + f.write(format_str(migration, mode=Mode())) + return newpath diff --git a/tests/migrations/autogeneration/test_generate_migration.py b/tests/migrations/autogeneration/test_generate_python_migration.py similarity index 90% rename from tests/migrations/autogeneration/test_generate_migration.py rename to tests/migrations/autogeneration/test_generate_python_migration.py index 7d8fb0c8f89..f0336eaecaa 100644 --- a/tests/migrations/autogeneration/test_generate_migration.py +++ b/tests/migrations/autogeneration/test_generate_python_migration.py @@ -1,14 +1,15 @@ -import re +from typing import Any import pytest +import yaml from black import Mode, format_str # type: ignore -from snuba.migrations.autogeneration.diff import generate_migration +from snuba.migrations.autogeneration.diff import generate_python_migration -def mockstoragewithcolumns(cols: list[str]) -> str: +def mockstoragewithcolumns(cols: list[str]) -> Any: colstr = ",\n ".join([s for s in cols]) - return f""" + storage = f""" version: v1 kind: writable_storage name: errors @@ -30,6 +31,7 @@ def mockstoragewithcolumns(cols: list[str]) -> str: local_table_name: errors_local dist_table_name: errors_dist """ + return yaml.safe_load(storage) def test_add_column() -> None: @@ -146,19 +148,12 @@ def backwards_ops(self) -> Sequence[operations.SqlOperation]: ), ] """ - written_path = generate_migration( - mockstoragewithcolumns(cols), - mockstoragewithcolumns(new_cols), - migration_name="kyles_migration", + migration = generate_python_migration( + mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols) ) - assert re.match( - r"snuba/snuba_migrations/events/[0-9][0-9][0-9][0-9]_kyles_migration.py", - written_path, + assert format_str(migration, mode=Mode()) == format_str( + expected_migration, mode=Mode() ) - with open(written_path, mode="r") as f: - assert format_str(f.read(), mode=Mode()) == format_str( - expected_migration, mode=Mode() - ) def test_modify_column() -> None: @@ -172,7 +167,7 @@ def test_modify_column() -> None: ValueError, match="Modification to columns in unsupported, column 'timestamp' was modified or reordered", ): - generate_migration( + generate_python_migration( mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols) ) @@ -190,7 +185,7 @@ def test_reorder_columns() -> None: ValueError, match="Modification to columns in unsupported, column 'timestamp' was modified or reordered", ): - generate_migration( + generate_python_migration( mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols) ) @@ -207,6 +202,6 @@ def test_delete_column() -> None: "{ name: newcol1, type: DateTime }", ] with pytest.raises(ValueError, match="Column removal is not supported"): - generate_migration( + generate_python_migration( mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols) ) diff --git a/tests/migrations/autogeneration/test_ui.py b/tests/migrations/autogeneration/test_ui.py index afc7645baee..bf387d9311f 100644 --- a/tests/migrations/autogeneration/test_ui.py +++ b/tests/migrations/autogeneration/test_ui.py @@ -1,7 +1,9 @@ import os import subprocess +from glob import glob -from snuba.migrations.autogeneration.main import get_working_and_head +from snuba.clusters.storage_sets import StorageSetKey +from snuba.migrations.autogeneration.main import get_working_and_head, write_migration def test_get_working_and_head() -> None: @@ -44,3 +46,17 @@ def test_get_working_and_head() -> None: new_storage, old_storage = get_working_and_head(os.path.join(dir, fname)) assert new_storage == "hello world\ngoodbye world" assert old_storage == "hello world\n" + + +def test_write_migration() -> None: + content = "bllop" + name = "kyles_migration" + write_migration(content, StorageSetKey.EVENTS, name) + written_migration = sorted( + glob(f"snuba/snuba_migrations/events/[0-9][0-9][0-9][0-9]_{name}.py") + )[0] + try: + with open(written_migration) as f: + assert f.read().strip() == content + finally: + os.remove(written_migration)