From 02f62af267ae17ed75e31057378845edfa45902c Mon Sep 17 00:00:00 2001
From: Kyle Mumma <kyle.mumma@sentry.io>
Date: Tue, 25 Jun 2024 10:20:06 -0500
Subject: [PATCH] writing complete

---
 snuba/cli/migrations.py                       | 24 +++---
 snuba/migrations/autogeneration/diff.py       | 76 +++----------------
 snuba/migrations/autogeneration/main.py       | 70 +++++++++++++++--
 ...n.py => test_generate_python_migration.py} | 31 ++++----
 tests/migrations/autogeneration/test_ui.py    | 18 ++++-
 5 files changed, 116 insertions(+), 103 deletions(-)
 rename tests/migrations/autogeneration/{test_generate_migration.py => test_generate_python_migration.py} (90%)

diff --git a/snuba/cli/migrations.py b/snuba/cli/migrations.py
index 4159d68d794..359148c189a 100644
--- a/snuba/cli/migrations.py
+++ b/snuba/cli/migrations.py
@@ -384,15 +384,19 @@ def add_node(
 
 @migrations.command()
 @click.argument("storage_path", type=str)
-@click.option(
-    "--name",
-    type=str,
-)
-def generate(storage_path: str, migration_name: Optional[str] = None) -> None:
+@click.option("--name", type=str, help="optional name for the migration")
+def generate(storage_path: str, name: Optional[str] = None) -> None:
     """
-    Given a path to modified storage yaml definition (inside of snuba repo),
-    creates a snuba migration for the given modifications.
-    The migration will be written into the local directory. The user is responsible for making
+    Given a path to user-modified storage.yaml definition (inside snuba/datasets/configuration/*/storages/*.py),
+    and an optional name for the migration,
+    generates a snuba migration based on the schema modifications to the storage.yaml.
+
+    Currently only column addition is supported.
+
+    The migration is generated based on the diff between HEAD and working dir. Therefore modifications to the
+    storage should be uncommitted in the working dir.
+
+    The generated migration will be written into the local directory. The user is responsible for making
     the commit, PR, and merging.
     """
     expected_pattern = r"(.+/)?snuba/datasets/configuration/.*/storages/.*\.(yml|yaml)"
@@ -401,5 +405,5 @@ def generate(storage_path: str, migration_name: Optional[str] = None) -> None:
             f"Storage path {storage_path} does not match expected pattern {expected_pattern}"
         )
 
-    autogeneration.generate(storage_path, migration_name=migration_name)
-    click.echo("This function is under construction.")
+    path = autogeneration.generate(storage_path, migration_name=name)
+    click.echo(f"Migration successfully generated at {path}")
diff --git a/snuba/migrations/autogeneration/diff.py b/snuba/migrations/autogeneration/diff.py
index c7f19c337ef..f3d188f9d9a 100644
--- a/snuba/migrations/autogeneration/diff.py
+++ b/snuba/migrations/autogeneration/diff.py
@@ -1,12 +1,7 @@
-import os
 from typing import Any, Sequence, cast
 
-import yaml
-from black import Mode, format_str  # type: ignore
-
 from snuba.clusters.storage_sets import StorageSetKey
 from snuba.datasets.configuration.utils import parse_columns
-from snuba.migrations import group_loader
 from snuba.migrations.columns import MigrationModifiers
 from snuba.migrations.operations import AddColumn, DropColumn, OperationTarget
 from snuba.utils.schemas import Column, ColumnType, SchemaModifiers
@@ -16,27 +11,21 @@
 """
 
 
-def generate_migration(
-    oldstorage: str, newstorage: str, migration_name: str = "generated_migration"
+def generate_python_migration(
+    oldstorage: dict[str, Any], newstorage: dict[str, Any]
 ) -> str:
     """
-    Given 2 storage.yaml files, representing the diff of a modified storage.yaml
-    i.e. original and modified, generates a python migration based on the changes
-    and writes it to the correct place in the repo. Returns the path where the file
-    was written.
+    Input:
+        2 storage.yaml files in yaml.safe_load format.
+        These representing the diff of a modified storage.yaml i.e. original and modified
+
+    Generates and returns a python migration based on the changes.
     """
-    old_stor_dict = yaml.safe_load(oldstorage)
-    new_stor_dict = yaml.safe_load(newstorage)
-    forwards, backwards = storage_diff_to_migration_ops(old_stor_dict, new_stor_dict)
-    migration = _migration_ops_to_migration(forwards, backwards)
-    return _write_migration(
-        migration,
-        StorageSetKey(new_stor_dict["storage"]["set_key"]),
-        migration_name,
-    )
+    forwards, backwards = _storage_diff_to_migration_ops(oldstorage, newstorage)
+    return _migration_ops_to_migration(forwards, backwards)
 
 
-def storage_diff_to_migration_ops(
+def _storage_diff_to_migration_ops(
     oldstorage: dict[str, Any], newstorage: dict[str, Any]
 ) -> tuple[list[AddColumn], list[DropColumn]]:
     """
@@ -119,51 +108,6 @@ def backwards_ops(self) -> Sequence[operations.SqlOperation]:
 """
 
 
-def _write_migration(
-    migration: str,
-    storage_set: StorageSetKey,
-    migration_name: str,
-) -> str:
-    """
-    Input:
-        migration - python migration file (see snuba/snuba_migrations/*/000x_*.py for examples)
-        storage_set - the key of the storage-set you are writing the migration for
-    Writes the given migration to a new file at the correct place in the repo
-    (which is determined by storage-set key), and adds a reference to the new migration
-    in the group loader (snuba/group_loader/migrations.py)
-    """
-    # make sure storage_set migration path exist
-    path = "snuba/snuba_migrations/" + storage_set.value
-    if not os.path.exists(path):
-        raise ValueError(
-            f"Migration path: '{path}' does not exist, perhaps '{storage_set.value}' is not a valid storage set key?"
-        )
-
-    # grab the group_loader for the storage set
-    group_loader_name = (
-        "".join([word.capitalize() for word in storage_set.value.split("_")]) + "Loader"
-    )
-    loader = getattr(group_loader, group_loader_name)()
-    assert isinstance(loader, group_loader.GroupLoader)
-
-    # get the next migration number
-    existing_migrations = loader.get_migrations()
-    if not existing_migrations:
-        nextnum = 0
-    nextnum = int(existing_migrations[-1].split("_")[0]) + 1
-
-    # write migration to file
-    newpath = f"{path}/{str(nextnum).zfill(4)}_{migration_name}.py"
-    if os.path.exists(newpath):
-        # this should never happen, but just in case
-        raise ValueError(
-            f"Error: The migration number {nextnum} was larger than the last migration in the group loader '{group_loader_name}', but the migration already exists"
-        )
-    with open(newpath, "w") as f:
-        f.write(format_str(migration, mode=Mode()))
-    return newpath
-
-
 def _is_valid_add_column(
     oldstorage: dict[str, Any], newstorage: dict[str, Any]
 ) -> tuple[bool, str]:
diff --git a/snuba/migrations/autogeneration/main.py b/snuba/migrations/autogeneration/main.py
index d55487ac8a8..6d0aefb952f 100644
--- a/snuba/migrations/autogeneration/main.py
+++ b/snuba/migrations/autogeneration/main.py
@@ -2,20 +2,29 @@
 import subprocess
 from typing import Optional
 
-from snuba.migrations.autogeneration.diff import generate_migration
+from black import Mode, format_str  # type: ignore
+from yaml import safe_load
+
+from snuba.clusters.storage_sets import StorageSetKey
+from snuba.migrations import group_loader
+from snuba.migrations.autogeneration.diff import generate_python_migration
 
 
 def generate(storage_path: str, migration_name: Optional[str] = None) -> str:
     # load into memory the given storage and the version of it at HEAD
-    new_storage, old_storage = get_working_and_head(storage_path)
+    tmpnew, tmpold = get_working_and_head(storage_path)
+    new_storage = safe_load(tmpnew)
+    old_storage = safe_load(tmpold)
 
     # generate the migration operations
-    if migration_name:
-        return generate_migration(
-            old_storage, new_storage, migration_name=migration_name
-        )
-    else:
-        return generate_migration(old_storage, new_storage)
+    migration = generate_python_migration(old_storage, new_storage)
+
+    # write the migration and return the path
+    return write_migration(
+        migration,
+        StorageSetKey(new_storage["storage"]["set_key"]),
+        migration_name if migration_name else "generated_migration",
+    )
 
 
 def get_working_and_head(path: str) -> tuple[str, str]:
@@ -58,3 +67,48 @@ def get_working_and_head(path: str) -> tuple[str, str]:
         working_file = f.read()
 
     return (working_file, head_file)
+
+
+def write_migration(
+    migration: str,
+    storage_set: StorageSetKey,
+    migration_name: str,
+) -> str:
+    """
+    Input:
+        migration - python migration file (see snuba/snuba_migrations/*/000x_*.py for examples)
+        storage_set - the key of the storage-set you are writing the migration for
+    Writes the given migration to a new file at the correct place in the repo
+    (which is determined by storage-set key), and adds a reference to the new migration
+    in the group loader (snuba/group_loader/migrations.py)
+    """
+    # make sure storage_set migration path exist
+    path = "snuba/snuba_migrations/" + storage_set.value
+    if not os.path.exists(path):
+        raise ValueError(
+            f"Migration path: '{path}' does not exist, perhaps '{storage_set.value}' is not a valid storage set key?"
+        )
+
+    # grab the group_loader for the storage set
+    group_loader_name = (
+        "".join([word.capitalize() for word in storage_set.value.split("_")]) + "Loader"
+    )
+    loader = getattr(group_loader, group_loader_name)()
+    assert isinstance(loader, group_loader.GroupLoader)
+
+    # get the next migration number
+    existing_migrations = loader.get_migrations()
+    if not existing_migrations:
+        nextnum = 0
+    nextnum = int(existing_migrations[-1].split("_")[0]) + 1
+
+    # write migration to file
+    newpath = f"{path}/{str(nextnum).zfill(4)}_{migration_name}.py"
+    if os.path.exists(newpath):
+        # this should never happen, but just in case
+        raise ValueError(
+            f"Error: The migration number {nextnum} was larger than the last migration in the group loader '{group_loader_name}', but the migration already exists"
+        )
+    with open(newpath, "w") as f:
+        f.write(format_str(migration, mode=Mode()))
+    return newpath
diff --git a/tests/migrations/autogeneration/test_generate_migration.py b/tests/migrations/autogeneration/test_generate_python_migration.py
similarity index 90%
rename from tests/migrations/autogeneration/test_generate_migration.py
rename to tests/migrations/autogeneration/test_generate_python_migration.py
index 7d8fb0c8f89..f0336eaecaa 100644
--- a/tests/migrations/autogeneration/test_generate_migration.py
+++ b/tests/migrations/autogeneration/test_generate_python_migration.py
@@ -1,14 +1,15 @@
-import re
+from typing import Any
 
 import pytest
+import yaml
 from black import Mode, format_str  # type: ignore
 
-from snuba.migrations.autogeneration.diff import generate_migration
+from snuba.migrations.autogeneration.diff import generate_python_migration
 
 
-def mockstoragewithcolumns(cols: list[str]) -> str:
+def mockstoragewithcolumns(cols: list[str]) -> Any:
     colstr = ",\n            ".join([s for s in cols])
-    return f"""
+    storage = f"""
 version: v1
 kind: writable_storage
 name: errors
@@ -30,6 +31,7 @@ def mockstoragewithcolumns(cols: list[str]) -> str:
 local_table_name: errors_local
 dist_table_name: errors_dist
 """
+    return yaml.safe_load(storage)
 
 
 def test_add_column() -> None:
@@ -146,19 +148,12 @@ def backwards_ops(self) -> Sequence[operations.SqlOperation]:
             ),
         ]
 """
-    written_path = generate_migration(
-        mockstoragewithcolumns(cols),
-        mockstoragewithcolumns(new_cols),
-        migration_name="kyles_migration",
+    migration = generate_python_migration(
+        mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols)
     )
-    assert re.match(
-        r"snuba/snuba_migrations/events/[0-9][0-9][0-9][0-9]_kyles_migration.py",
-        written_path,
+    assert format_str(migration, mode=Mode()) == format_str(
+        expected_migration, mode=Mode()
     )
-    with open(written_path, mode="r") as f:
-        assert format_str(f.read(), mode=Mode()) == format_str(
-            expected_migration, mode=Mode()
-        )
 
 
 def test_modify_column() -> None:
@@ -172,7 +167,7 @@ def test_modify_column() -> None:
         ValueError,
         match="Modification to columns in unsupported, column 'timestamp' was modified or reordered",
     ):
-        generate_migration(
+        generate_python_migration(
             mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols)
         )
 
@@ -190,7 +185,7 @@ def test_reorder_columns() -> None:
         ValueError,
         match="Modification to columns in unsupported, column 'timestamp' was modified or reordered",
     ):
-        generate_migration(
+        generate_python_migration(
             mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols)
         )
 
@@ -207,6 +202,6 @@ def test_delete_column() -> None:
         "{ name: newcol1, type: DateTime }",
     ]
     with pytest.raises(ValueError, match="Column removal is not supported"):
-        generate_migration(
+        generate_python_migration(
             mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols)
         )
diff --git a/tests/migrations/autogeneration/test_ui.py b/tests/migrations/autogeneration/test_ui.py
index afc7645baee..bf387d9311f 100644
--- a/tests/migrations/autogeneration/test_ui.py
+++ b/tests/migrations/autogeneration/test_ui.py
@@ -1,7 +1,9 @@
 import os
 import subprocess
+from glob import glob
 
-from snuba.migrations.autogeneration.main import get_working_and_head
+from snuba.clusters.storage_sets import StorageSetKey
+from snuba.migrations.autogeneration.main import get_working_and_head, write_migration
 
 
 def test_get_working_and_head() -> None:
@@ -44,3 +46,17 @@ def test_get_working_and_head() -> None:
     new_storage, old_storage = get_working_and_head(os.path.join(dir, fname))
     assert new_storage == "hello world\ngoodbye world"
     assert old_storage == "hello world\n"
+
+
+def test_write_migration() -> None:
+    content = "bllop"
+    name = "kyles_migration"
+    write_migration(content, StorageSetKey.EVENTS, name)
+    written_migration = sorted(
+        glob(f"snuba/snuba_migrations/events/[0-9][0-9][0-9][0-9]_{name}.py")
+    )[0]
+    try:
+        with open(written_migration) as f:
+            assert f.read().strip() == content
+    finally:
+        os.remove(written_migration)