Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin' into add-timeout-to-batch-writer
Browse files Browse the repository at this point in the history
  • Loading branch information
ayirr7 committed Jun 25, 2024
2 parents 296fbe1 + 4a28b37 commit dad7f7a
Show file tree
Hide file tree
Showing 14 changed files with 511 additions and 320 deletions.
8 changes: 7 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
},
{
"name": "Pytest Current File",
"type": "debugpy",
Expand All @@ -10,7 +17,6 @@
"${file}"
],
"console": "integratedTerminal",
"justMyCode": true // wasn't sure if I should keep or delete
}
]
}
7 changes: 7 additions & 0 deletions snuba/cli/migrations.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
from typing import Optional, Sequence

import click
Expand Down Expand Up @@ -392,5 +393,11 @@ def generate(
The migration will be written into the local directory. The user is responsible for making
the commit, PR, and merging.
"""
expected_pattern = r"(.+/)?snuba/datasets/configuration/.*/storages/.*\.(yml|yaml)"
if not re.fullmatch(expected_pattern, storage_path):
raise click.ClickException(
f"Storage path {storage_path} does not match expected pattern {expected_pattern}"
)

autogeneration.generate(storage_path)
click.echo("This function is under construction.")
14 changes: 7 additions & 7 deletions snuba/datasets/configuration/replays/storages/replays.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,19 +190,19 @@ allocation_policies:
- project_id
default_config_overrides:
is_enforced: 0
- name: BytesScannedWindowAllocationPolicy
- name: ReferrerGuardRailPolicy
args:
required_tenant_types:
- organization_id
- referrer
default_config_overrides:
is_enforced: 1
throttled_thread_number: 1
org_limit_bytes_scanned: 100000
- name: ReferrerGuardRailPolicy
is_enforced: 0
is_active: 0
- name: BytesScannedRejectingPolicy
args:
required_tenant_types:
- organization_id
- project_id
- referrer
default_config_overrides:
is_enforced: 0
is_active: 0
is_enforced: 0
15 changes: 8 additions & 7 deletions snuba/datasets/configuration/spans/storages/spans.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,22 +115,23 @@ allocation_policies:
- project_id
default_config_overrides:
is_enforced: 0
- name: BytesScannedWindowAllocationPolicy
- name: ReferrerGuardRailPolicy
args:
required_tenant_types:
- organization_id
- referrer
default_config_overrides:
is_enforced: 1
throttled_thread_number: 1
org_limit_bytes_scanned: 100000
- name: ReferrerGuardRailPolicy
is_enforced: 0
is_active: 0
- name: BytesScannedRejectingPolicy
args:
required_tenant_types:
- organization_id
- project_id
- referrer
default_config_overrides:
is_enforced: 0
is_active: 0
is_enforced: 0

query_processors:
- processor: UniqInSelectAndHavingProcessor
- processor: UUIDColumnProcessor
Expand Down
139 changes: 139 additions & 0 deletions snuba/migrations/autogeneration/diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from typing import cast

import yaml

from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.configuration.utils import parse_columns
from snuba.migrations.columns import MigrationModifiers
from snuba.migrations.operations import AddColumn, DropColumn, OperationTarget
from snuba.utils.schemas import Column, ColumnType, SchemaModifiers

"""
This file is for autogenerating the migration for adding a column to your storage.
"""


def generate_migration_ops(
oldstorage: str, newstorage: str
) -> tuple[list[AddColumn], list[DropColumn]]:
"""
Input:
old_storage, the original storage yaml in str format
new_storage, the modified storage yaml in str format
Returns a tuple (forwardops, backwardsops) this are the forward and backward migration
operations required to migrate the storage as described in the given yaml files.
Only supports adding columns, throws error for anything else.
"""
valid, reason = _is_valid_add_column(oldstorage, newstorage)
if not valid:
raise ValueError(reason)

oldcol_names = set(
col["name"] for col in yaml.safe_load(oldstorage)["schema"]["columns"]
)
newstorage_dict = yaml.safe_load(newstorage)
newcols = newstorage_dict["schema"]["columns"]

forwardops: list[AddColumn] = []
for i, col in enumerate(newcols):
if col["name"] not in oldcol_names:
column = _schema_column_to_migration_column(parse_columns([col])[0])
after = newcols[i - 1]["name"]
storage_set = StorageSetKey(newstorage_dict["storage"]["set_key"])
forwardops += [
AddColumn(
storage_set=storage_set,
table_name=newstorage_dict["schema"]["local_table_name"],
column=column,
after=after,
target=OperationTarget.LOCAL,
),
AddColumn(
storage_set=storage_set,
table_name=newstorage_dict["schema"]["dist_table_name"],
column=column,
after=after,
target=OperationTarget.DISTRIBUTED,
),
]
return (forwardops, [op.get_reverse() for op in reversed(forwardops)])


def _is_valid_add_column(oldstorage: str, newstorage: str) -> tuple[bool, str]:
"""
Input:
old_storage, the old storage yaml in str format
new_storage, the modified storage yaml in str format
Returns true if the changes to the storage is valid column addition, false otherwise,
along with a reasoning.
"""
oldstorage_dict = yaml.safe_load(oldstorage)
newstorage_dict = yaml.safe_load(newstorage)
if oldstorage_dict == newstorage_dict:
return True, "storages are the same"

# nothing changed but the columns
t1 = oldstorage_dict["schema"].pop("columns")
t2 = newstorage_dict["schema"].pop("columns")
if not (oldstorage_dict == newstorage_dict):
return (
False,
"Expected the only change to the storage to be the columns, but that is not true",
)
oldstorage_dict["schema"]["columns"] = t1
newstorage_dict["schema"]["columns"] = t2

# only changes to columns is additions
oldstorage_cols = oldstorage_dict["schema"]["columns"]
newstorage_cols = newstorage_dict["schema"]["columns"]

colnames_old = set(e["name"] for e in oldstorage_cols)
colnames_new = set(e["name"] for e in newstorage_cols)
if not colnames_old.issubset(colnames_new):
return (False, "Column removal is not supported")

pold, pnew = 0, 0
while pold < len(oldstorage_cols) and pnew < len(newstorage_cols):
curr_old = oldstorage_cols[pold]
curr_new = newstorage_cols[pnew]

if curr_old == curr_new:
pold += 1
pnew += 1
elif curr_new["name"] in colnames_old:
return (
False,
f"Modification to columns in unsupported, column '{curr_new['name']}' was modified or reordered",
)
else:
if pold == 0:
return (
False,
"Adding a column to the beginning is currently unsupported, please add it anywhere else.",
)
else:
pnew += 1
assert pold == len(oldstorage_cols) # should always hold
return True, ""


def _schema_column_to_migration_column(
column: Column[SchemaModifiers],
) -> Column[MigrationModifiers]:
"""
Given SchemaModifiers returns equivalent MigrationModifiers.
Only nullable is supported, throws error if conversion cant be made.
"""
newtype = cast(ColumnType[MigrationModifiers], column.type.get_raw())
mods = column.type.get_modifiers()
if not mods:
return Column(column.name, newtype)

# convert schema modifiers to migration modifiers
if mods.readonly:
raise ValueError("readonly modifier is not supported")
newtype = newtype.set_modifiers(MigrationModifiers(nullable=mods.nullable))
return Column(column.name, newtype)
36 changes: 26 additions & 10 deletions snuba/migrations/autogeneration/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
import os
import subprocess

from snuba.migrations.autogeneration.diff import generate_migration_ops

def generate(storage_path: str) -> tuple[str, str]:
storage_path = os.path.realpath(os.path.abspath(os.path.expanduser(storage_path)))

# get the version of the file at HEAD
def generate(storage_path: str) -> None:
# load into memory the given storage and the version of it at HEAD
new_storage, old_storage = get_working_and_head(storage_path)

# generate the migration operations
generate_migration_ops(old_storage, new_storage)


def get_working_and_head(path: str) -> tuple[str, str]:
"""
Given a path to a file, returns the contents of the file in the working directory
and the contents of it at HEAD in the git repo, as a tuple: (working, head)
preconditions:
- path is a valid path to a file in a git repo
"""
path = os.path.realpath(os.path.abspath(os.path.expanduser(path)))
# get the version at HEAD
try:
repo_path = (
subprocess.run(
Expand All @@ -14,15 +30,15 @@ def generate(storage_path: str) -> tuple[str, str]:
"rev-parse",
"--show-toplevel",
],
cwd=os.path.dirname(storage_path),
cwd=os.path.dirname(path),
capture_output=True,
check=True,
)
.stdout.decode("utf-8")
.strip()
)
repo_rel_path = os.path.relpath(storage_path, repo_path)
old_storage = subprocess.run(
repo_rel_path = os.path.relpath(path, repo_path)
head_file = subprocess.run(
["git", "show", f"HEAD:{repo_rel_path}"],
cwd=repo_path,
capture_output=True,
Expand All @@ -31,8 +47,8 @@ def generate(storage_path: str) -> tuple[str, str]:
except subprocess.CalledProcessError as e:
raise ValueError(e.stderr.decode("utf-8")) from e

# get the user-provided (modified) storage
with open(storage_path, "r") as f:
new_storage = f.read()
# working
with open(path, "r") as f:
working_file = f.read()

return old_storage, new_storage
return (working_file, head_file)
2 changes: 1 addition & 1 deletion snuba/migrations/clickhouse.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CLICKHOUSE_SERVER_MIN_VERSION = "21.8.12.29"
# Note: 21.8.12.29 and 21.8.13.1 are used in self-hosted builds
# even though SaaS clusters are all on 22.8 or above
CLICKHOUSE_SERVER_MAX_VERSION = "23.3.19.33"
CLICKHOUSE_SERVER_MAX_VERSION = "23.8.11.29"
Loading

0 comments on commit dad7f7a

Please sign in to comment.