diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index 4fb96cab..eb7c6929 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ *.pyc venv *.egg* +# don't commit poetry cache +.cache +# ignore ide specific folders +.idea +.vscode diff --git a/pycds/alembic/versions/78260d36e42b_add_net_var_name_valid_identifier_check.py b/pycds/alembic/versions/78260d36e42b_add_net_var_name_valid_identifier_check.py new file mode 100644 index 00000000..ec97b15e --- /dev/null +++ b/pycds/alembic/versions/78260d36e42b_add_net_var_name_valid_identifier_check.py @@ -0,0 +1,58 @@ +"""Add net_var_name valid identifier check constraint + +Revision ID: 78260d36e42b +Revises: bb2a222a1d4a +Create Date: 2024-01-04 23:22:47.992791 + +""" +from alembic import op +from pycds import get_schema_name + +# revision identifiers, used by Alembic. +revision = "78260d36e42b" +down_revision = "bb2a222a1d4a" +branch_labels = None +depends_on = None + +schema_name = get_schema_name() +table_name = "meta_vars" +constraint_name = "ck_net_var_name_valid_identifier" + + +# Updates existing table data to conform to the new constraint. Because this table is small it is written for +# readability over pure performance doing each replacement seperately. +def update_table(): + op.execute( + f""" + CREATE EXTENSION unaccent; + + -- strip diacritics from characters to make what we can valid ascii + UPDATE {schema_name}.{table_name} SET net_var_name=unaccent(net_var_name); + -- replace first characters that aren't letters or underscores with an underscore + UPDATE {schema_name}.{table_name} SET net_var_name=regexp_replace(net_var_name, '^[^a-zA-Z_]', '_', 'g'); + -- replace all other whitespace & non valid characters + UPDATE {schema_name}.{table_name} SET net_var_name=regexp_replace(net_var_name, '[^a-zA-Z0-9_$]', '_', 'g'); + """ + ) + + +def regress_table(): + # kept for convention. + # basically impossible to reverse as we can't know what has been removed and what was an underscore already + pass + + +def upgrade(): + update_table() + op.create_check_constraint( + constraint_name, + table_name, + "net_var_name ~ '^[a-zA-Z_][a-zA-Z0-9_$]*$'", + schema=schema_name, + ) + + +def downgrade(): + op.drop_constraint(constraint_name, table_name, schema=schema_name, type_="check") + op.execute("DROP EXTENSION unaccent;") + regress_table() diff --git a/pycds/database.py b/pycds/database.py index 145a0d74..e1f39e38 100644 --- a/pycds/database.py +++ b/pycds/database.py @@ -5,7 +5,7 @@ def check_migration_version( - executor, schema_name=get_schema_name(), version="bb2a222a1d4a" + executor, schema_name=get_schema_name(), version="78260d36e42b" ): """Check that the migration version of the database schema is compatible with the current version of this package. diff --git a/pycds/orm/tables.py b/pycds/orm/tables.py index 7f29dafd..21070736 100644 --- a/pycds/orm/tables.py +++ b/pycds/orm/tables.py @@ -33,6 +33,7 @@ from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship, synonym from sqlalchemy.schema import UniqueConstraint +from sqlalchemy.schema import CheckConstraint from geoalchemy2 import Geometry from citext import CIText @@ -297,6 +298,12 @@ class Variable(Base): UniqueConstraint( "network_id", "net_var_name", name="network_variable_name_unique" ), + # Values should conform to valid postgres identifiers: + # https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS + CheckConstraint( + "net_var_name ~ '^[a-zA-Z_][a-zA-Z0-9_$]*$'", + name="ck_net_var_name_valid_identifier", + ), ) def __repr__(self): diff --git a/tests/alembic_migrations/test_check_migration_version.py b/tests/alembic_migrations/test_check_migration_version.py index c822cb0b..58c37f85 100644 --- a/tests/alembic_migrations/test_check_migration_version.py +++ b/tests/alembic_migrations/test_check_migration_version.py @@ -7,7 +7,7 @@ def test_get_current_head(): - assert get_current_head() == "bb2a222a1d4a" + assert get_current_head() == "78260d36e42b" @pytest.mark.usefixtures("new_db_left") diff --git a/tests/alembic_migrations/versions/v_78260d36e42b_add_net_var_name_valid_identifier_check/__init__.py b/tests/alembic_migrations/versions/v_78260d36e42b_add_net_var_name_valid_identifier_check/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/alembic_migrations/versions/v_78260d36e42b_add_net_var_name_valid_identifier_check/conftest.py b/tests/alembic_migrations/versions/v_78260d36e42b_add_net_var_name_valid_identifier_check/conftest.py new file mode 100644 index 00000000..094e18ef --- /dev/null +++ b/tests/alembic_migrations/versions/v_78260d36e42b_add_net_var_name_valid_identifier_check/conftest.py @@ -0,0 +1,6 @@ +import pytest + + +@pytest.fixture(scope="module") +def target_revision(): + return "78260d36e42b" diff --git a/tests/alembic_migrations/versions/v_78260d36e42b_add_net_var_name_valid_identifier_check/test_smoke.py b/tests/alembic_migrations/versions/v_78260d36e42b_add_net_var_name_valid_identifier_check/test_smoke.py new file mode 100644 index 00000000..13651ba1 --- /dev/null +++ b/tests/alembic_migrations/versions/v_78260d36e42b_add_net_var_name_valid_identifier_check/test_smoke.py @@ -0,0 +1,161 @@ +"""Smoke tests: +- Upgrade adds check constraint to meta_vars.net_var_name such that they are valid sql identifiers + (this boils down to values inserted into the column should not contain whitespace) +- Check constraint can still enter valid data +- Check constraint rejects bad data after migration +""" + +# -*- coding: utf-8 -*- +import logging +import pytest +from alembic import command +from sqlalchemy import inspect +from sqlalchemy.exc import IntegrityError +from psycopg2.errors import CheckViolation + +logger = logging.getLogger("tests") + +# this is the revision *before* the migration is to be run. +prior_revision = "bb2a222a1d4a" +final_revision = "78260d36e42b" + + +def get_insert_statement(schema_name, insertion_value, vars_id=1): + return ( + f"INSERT INTO {schema_name}.meta_vars(vars_id, net_var_name, standard_name, cell_method, display_name)\n" + f"VALUES ({vars_id}, '{insertion_value}', 'test', 'test', 'test');\n" + ) + + +@pytest.mark.usefixtures("new_db_left") +@pytest.mark.parametrize( + "prepared_schema_from_migrations_left", (prior_revision,), indirect=True +) +def test_upgrade( + prepared_schema_from_migrations_left, + alembic_config_left, + schema_name, +): + """test migration from bb2a222a1d4a to 78260d36e42b.""" + + # Set up database to version bb2a222a1d4a (previous migration) + engine, script = prepared_schema_from_migrations_left + + # (insertion_id, insertion_value, expected_value) + test_values = [ + (1, "bad var name with whitespace", "bad_var_name_with_whitespace"), + (2, "bad\nnewline", "bad_newline"), + (3, "bad-dash", "bad_dash"), + (4, "1badnumber", "_badnumber"), + (5, "$badDollar", "_badDollar"), + (6, "schön", "schon"), + (7, "forêt", "foret"), + (8, "tréma", "trema"), + ] + + def inserter(x): + return get_insert_statement(schema_name, x[2], x[0]) + + statement = "\n".join(list(map(inserter, test_values))) + engine.execute(statement) + + command.upgrade(alembic_config_left, "+1") + + result = engine.execute( + f"SELECT vars_id, net_var_name FROM {schema_name}.meta_vars ORDER BY vars_id" + ) + + for index, row in enumerate(result): + test_sample = test_values[index] + assert row == (test_sample[0], test_sample[2]) + + +@pytest.mark.usefixtures("new_db_left") +@pytest.mark.parametrize( + "prepared_schema_from_migrations_left", (final_revision,), indirect=True +) +def test_downgrade( + prepared_schema_from_migrations_left, + alembic_config_left, + schema_name, +): + """test migration from 78260d36e42b to bb2a222a1d4a.""" + + # Set up database to version bb2a222a1d4a (previous migration) + engine, script = prepared_schema_from_migrations_left + + statement = get_insert_statement(schema_name, "good_var_name_with_underscores") + engine.execute(statement) + + command.downgrade(alembic_config_left, "-1") + + result = engine.execute( + f"SELECT vars_id, net_var_name FROM {schema_name}.meta_vars" + ) + + row = next(result) + + # After migration is run, bad strings should have whitespace replaced with underscores + assert row == (1, "good_var_name_with_underscores") + + +@pytest.mark.usefixtures("new_db_left") +@pytest.mark.parametrize( + "prepared_schema_from_migrations_left", (final_revision,), indirect=True +) +# Correct values should conform to https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS +# Notably numbers and $ are allowed, but not as the first character +@pytest.mark.parametrize( + "test_value", + ["allow_underscores", "allowfullwords", "allowCapitals", "allowNumbers123"], +) +def test_check_good_constraint_values( + prepared_schema_from_migrations_left, schema_name, test_value +): + # Set up database to version 78260d36e42b (after migration) + engine, script = prepared_schema_from_migrations_left + + statement = get_insert_statement(schema_name, test_value) + engine.execute(statement) + + result = engine.execute( + f"SELECT vars_id, net_var_name FROM {schema_name}.meta_vars" + ) + + row = next(result) + + assert row == (1, test_value) + + +@pytest.mark.usefixtures("new_db_left") +@pytest.mark.parametrize( + "prepared_schema_from_migrations_left", (final_revision,), indirect=True +) +# Correct values should conform to https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS +# Notably numbers and $ are allowed, but not as the first character +@pytest.mark.parametrize( + "test_value", + [ + "bad space", + "bad\nnewline", + "bad-dash", + "1badnumber", + "$badDollar", + "schön", + "forêt", + "tréma", + ], +) +def test_check_bad_constraint_values( + prepared_schema_from_migrations_left, schema_name, test_value +): + # Set up database to version 78260d36e42b (after migration) + engine, script = prepared_schema_from_migrations_left + # This test passes bad data and expects an integrity error back from SQLAlchemy when executed + with pytest.raises(IntegrityError) as excinfo: + statement = get_insert_statement(schema_name, test_value) + engine.execute(statement) + + # The specific exception raised by psycopg2 is stored internally in SQLAlchemy's IntegrityError + # By checking this inner exception we can know that it is specifically a Check Constraint violation + assert isinstance(excinfo.value.orig, CheckViolation) diff --git a/tests/helpers.py b/tests/helpers.py index e6c45ab7..e8e9a02f 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -230,7 +230,7 @@ def action(sesh): variables = [ TestVariable( - "air-temperature", + "air_temperature", "degC", "air_temperature", "time: point", @@ -241,7 +241,7 @@ def action(sesh): moti, ), TestVariable( - "average-direction", + "average_direction", "km/h", "wind_from_direction", "time: mean", @@ -252,7 +252,7 @@ def action(sesh): moti, ), TestVariable( - "dew-point", + "dew_point", "degC", "dew_point_temperature", "time: point",