-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #191 from pacificclimate/i-183-nev_var_name-check-…
…constraint #183 Add check constraint to meta_vars.net_var_name
- Loading branch information
Showing
9 changed files
with
242 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,8 @@ | ||
*.pyc | ||
venv | ||
*.egg* | ||
# don't commit poetry cache | ||
.cache | ||
# ignore ide specific folders | ||
.idea | ||
.vscode |
58 changes: 58 additions & 0 deletions
58
pycds/alembic/versions/78260d36e42b_add_net_var_name_valid_identifier_check.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
"""Add net_var_name valid identifier check constraint | ||
Revision ID: 78260d36e42b | ||
Revises: bb2a222a1d4a | ||
Create Date: 2024-01-04 23:22:47.992791 | ||
""" | ||
from alembic import op | ||
from pycds import get_schema_name | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "78260d36e42b" | ||
down_revision = "bb2a222a1d4a" | ||
branch_labels = None | ||
depends_on = None | ||
|
||
schema_name = get_schema_name() | ||
table_name = "meta_vars" | ||
constraint_name = "ck_net_var_name_valid_identifier" | ||
|
||
|
||
# Updates existing table data to conform to the new constraint. Because this table is small it is written for | ||
# readability over pure performance doing each replacement seperately. | ||
def update_table(): | ||
op.execute( | ||
f""" | ||
CREATE EXTENSION unaccent; | ||
-- strip diacritics from characters to make what we can valid ascii | ||
UPDATE {schema_name}.{table_name} SET net_var_name=unaccent(net_var_name); | ||
-- replace first characters that aren't letters or underscores with an underscore | ||
UPDATE {schema_name}.{table_name} SET net_var_name=regexp_replace(net_var_name, '^[^a-zA-Z_]', '_', 'g'); | ||
-- replace all other whitespace & non valid characters | ||
UPDATE {schema_name}.{table_name} SET net_var_name=regexp_replace(net_var_name, '[^a-zA-Z0-9_$]', '_', 'g'); | ||
""" | ||
) | ||
|
||
|
||
def regress_table(): | ||
# kept for convention. | ||
# basically impossible to reverse as we can't know what has been removed and what was an underscore already | ||
pass | ||
|
||
|
||
def upgrade(): | ||
update_table() | ||
op.create_check_constraint( | ||
constraint_name, | ||
table_name, | ||
"net_var_name ~ '^[a-zA-Z_][a-zA-Z0-9_$]*$'", | ||
schema=schema_name, | ||
) | ||
|
||
|
||
def downgrade(): | ||
op.drop_constraint(constraint_name, table_name, schema=schema_name, type_="check") | ||
op.execute("DROP EXTENSION unaccent;") | ||
regress_table() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
6 changes: 6 additions & 0 deletions
6
...ic_migrations/versions/v_78260d36e42b_add_net_var_name_valid_identifier_check/conftest.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import pytest | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def target_revision(): | ||
return "78260d36e42b" |
161 changes: 161 additions & 0 deletions
161
..._migrations/versions/v_78260d36e42b_add_net_var_name_valid_identifier_check/test_smoke.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
"""Smoke tests: | ||
- Upgrade adds check constraint to meta_vars.net_var_name such that they are valid sql identifiers | ||
(this boils down to values inserted into the column should not contain whitespace) | ||
- Check constraint can still enter valid data | ||
- Check constraint rejects bad data after migration | ||
""" | ||
|
||
# -*- coding: utf-8 -*- | ||
import logging | ||
import pytest | ||
from alembic import command | ||
from sqlalchemy import inspect | ||
from sqlalchemy.exc import IntegrityError | ||
from psycopg2.errors import CheckViolation | ||
|
||
logger = logging.getLogger("tests") | ||
|
||
# this is the revision *before* the migration is to be run. | ||
prior_revision = "bb2a222a1d4a" | ||
final_revision = "78260d36e42b" | ||
|
||
|
||
def get_insert_statement(schema_name, insertion_value, vars_id=1): | ||
return ( | ||
f"INSERT INTO {schema_name}.meta_vars(vars_id, net_var_name, standard_name, cell_method, display_name)\n" | ||
f"VALUES ({vars_id}, '{insertion_value}', 'test', 'test', 'test');\n" | ||
) | ||
|
||
|
||
@pytest.mark.usefixtures("new_db_left") | ||
@pytest.mark.parametrize( | ||
"prepared_schema_from_migrations_left", (prior_revision,), indirect=True | ||
) | ||
def test_upgrade( | ||
prepared_schema_from_migrations_left, | ||
alembic_config_left, | ||
schema_name, | ||
): | ||
"""test migration from bb2a222a1d4a to 78260d36e42b.""" | ||
|
||
# Set up database to version bb2a222a1d4a (previous migration) | ||
engine, script = prepared_schema_from_migrations_left | ||
|
||
# (insertion_id, insertion_value, expected_value) | ||
test_values = [ | ||
(1, "bad var name with whitespace", "bad_var_name_with_whitespace"), | ||
(2, "bad\nnewline", "bad_newline"), | ||
(3, "bad-dash", "bad_dash"), | ||
(4, "1badnumber", "_badnumber"), | ||
(5, "$badDollar", "_badDollar"), | ||
(6, "schön", "schon"), | ||
(7, "forêt", "foret"), | ||
(8, "tréma", "trema"), | ||
] | ||
|
||
def inserter(x): | ||
return get_insert_statement(schema_name, x[2], x[0]) | ||
|
||
statement = "\n".join(list(map(inserter, test_values))) | ||
engine.execute(statement) | ||
|
||
command.upgrade(alembic_config_left, "+1") | ||
|
||
result = engine.execute( | ||
f"SELECT vars_id, net_var_name FROM {schema_name}.meta_vars ORDER BY vars_id" | ||
) | ||
|
||
for index, row in enumerate(result): | ||
test_sample = test_values[index] | ||
assert row == (test_sample[0], test_sample[2]) | ||
|
||
|
||
@pytest.mark.usefixtures("new_db_left") | ||
@pytest.mark.parametrize( | ||
"prepared_schema_from_migrations_left", (final_revision,), indirect=True | ||
) | ||
def test_downgrade( | ||
prepared_schema_from_migrations_left, | ||
alembic_config_left, | ||
schema_name, | ||
): | ||
"""test migration from 78260d36e42b to bb2a222a1d4a.""" | ||
|
||
# Set up database to version bb2a222a1d4a (previous migration) | ||
engine, script = prepared_schema_from_migrations_left | ||
|
||
statement = get_insert_statement(schema_name, "good_var_name_with_underscores") | ||
engine.execute(statement) | ||
|
||
command.downgrade(alembic_config_left, "-1") | ||
|
||
result = engine.execute( | ||
f"SELECT vars_id, net_var_name FROM {schema_name}.meta_vars" | ||
) | ||
|
||
row = next(result) | ||
|
||
# After migration is run, bad strings should have whitespace replaced with underscores | ||
assert row == (1, "good_var_name_with_underscores") | ||
|
||
|
||
@pytest.mark.usefixtures("new_db_left") | ||
@pytest.mark.parametrize( | ||
"prepared_schema_from_migrations_left", (final_revision,), indirect=True | ||
) | ||
# Correct values should conform to https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS | ||
# Notably numbers and $ are allowed, but not as the first character | ||
@pytest.mark.parametrize( | ||
"test_value", | ||
["allow_underscores", "allowfullwords", "allowCapitals", "allowNumbers123"], | ||
) | ||
def test_check_good_constraint_values( | ||
prepared_schema_from_migrations_left, schema_name, test_value | ||
): | ||
# Set up database to version 78260d36e42b (after migration) | ||
engine, script = prepared_schema_from_migrations_left | ||
|
||
statement = get_insert_statement(schema_name, test_value) | ||
engine.execute(statement) | ||
|
||
result = engine.execute( | ||
f"SELECT vars_id, net_var_name FROM {schema_name}.meta_vars" | ||
) | ||
|
||
row = next(result) | ||
|
||
assert row == (1, test_value) | ||
|
||
|
||
@pytest.mark.usefixtures("new_db_left") | ||
@pytest.mark.parametrize( | ||
"prepared_schema_from_migrations_left", (final_revision,), indirect=True | ||
) | ||
# Correct values should conform to https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS | ||
# Notably numbers and $ are allowed, but not as the first character | ||
@pytest.mark.parametrize( | ||
"test_value", | ||
[ | ||
"bad space", | ||
"bad\nnewline", | ||
"bad-dash", | ||
"1badnumber", | ||
"$badDollar", | ||
"schön", | ||
"forêt", | ||
"tréma", | ||
], | ||
) | ||
def test_check_bad_constraint_values( | ||
prepared_schema_from_migrations_left, schema_name, test_value | ||
): | ||
# Set up database to version 78260d36e42b (after migration) | ||
engine, script = prepared_schema_from_migrations_left | ||
# This test passes bad data and expects an integrity error back from SQLAlchemy when executed | ||
with pytest.raises(IntegrityError) as excinfo: | ||
statement = get_insert_statement(schema_name, test_value) | ||
engine.execute(statement) | ||
|
||
# The specific exception raised by psycopg2 is stored internally in SQLAlchemy's IntegrityError | ||
# By checking this inner exception we can know that it is specifically a Check Constraint violation | ||
assert isinstance(excinfo.value.orig, CheckViolation) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters