Skip to content

Commit

Permalink
Merge pull request #191 from pacificclimate/i-183-nev_var_name-check-…
Browse files Browse the repository at this point in the history
…constraint

#183 Add check constraint to meta_vars.net_var_name
  • Loading branch information
Nospamas authored Jan 9, 2024
2 parents 583c5f5 + cec1d17 commit 975366c
Show file tree
Hide file tree
Showing 9 changed files with 242 additions and 5 deletions.
5 changes: 5 additions & 0 deletions .gitignore
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
*.pyc
venv
*.egg*
# don't commit poetry cache
.cache
# ignore ide specific folders
.idea
.vscode
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Add net_var_name valid identifier check constraint
Revision ID: 78260d36e42b
Revises: bb2a222a1d4a
Create Date: 2024-01-04 23:22:47.992791
"""
from alembic import op
from pycds import get_schema_name

# revision identifiers, used by Alembic.
revision = "78260d36e42b"
down_revision = "bb2a222a1d4a"
branch_labels = None
depends_on = None

schema_name = get_schema_name()
table_name = "meta_vars"
constraint_name = "ck_net_var_name_valid_identifier"


# Updates existing table data to conform to the new constraint. Because this table is small it is written for
# readability over pure performance doing each replacement seperately.
def update_table():
op.execute(
f"""
CREATE EXTENSION unaccent;
-- strip diacritics from characters to make what we can valid ascii
UPDATE {schema_name}.{table_name} SET net_var_name=unaccent(net_var_name);
-- replace first characters that aren't letters or underscores with an underscore
UPDATE {schema_name}.{table_name} SET net_var_name=regexp_replace(net_var_name, '^[^a-zA-Z_]', '_', 'g');
-- replace all other whitespace & non valid characters
UPDATE {schema_name}.{table_name} SET net_var_name=regexp_replace(net_var_name, '[^a-zA-Z0-9_$]', '_', 'g');
"""
)


def regress_table():
# kept for convention.
# basically impossible to reverse as we can't know what has been removed and what was an underscore already
pass


def upgrade():
update_table()
op.create_check_constraint(
constraint_name,
table_name,
"net_var_name ~ '^[a-zA-Z_][a-zA-Z0-9_$]*$'",
schema=schema_name,
)


def downgrade():
op.drop_constraint(constraint_name, table_name, schema=schema_name, type_="check")
op.execute("DROP EXTENSION unaccent;")
regress_table()
2 changes: 1 addition & 1 deletion pycds/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


def check_migration_version(
executor, schema_name=get_schema_name(), version="bb2a222a1d4a"
executor, schema_name=get_schema_name(), version="78260d36e42b"
):
"""Check that the migration version of the database schema is compatible
with the current version of this package.
Expand Down
7 changes: 7 additions & 0 deletions pycds/orm/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, synonym
from sqlalchemy.schema import UniqueConstraint
from sqlalchemy.schema import CheckConstraint
from geoalchemy2 import Geometry

from citext import CIText
Expand Down Expand Up @@ -297,6 +298,12 @@ class Variable(Base):
UniqueConstraint(
"network_id", "net_var_name", name="network_variable_name_unique"
),
# Values should conform to valid postgres identifiers:
# https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
CheckConstraint(
"net_var_name ~ '^[a-zA-Z_][a-zA-Z0-9_$]*$'",
name="ck_net_var_name_valid_identifier",
),
)

def __repr__(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/alembic_migrations/test_check_migration_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def test_get_current_head():
assert get_current_head() == "bb2a222a1d4a"
assert get_current_head() == "78260d36e42b"


@pytest.mark.usefixtures("new_db_left")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import pytest


@pytest.fixture(scope="module")
def target_revision():
return "78260d36e42b"
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
"""Smoke tests:
- Upgrade adds check constraint to meta_vars.net_var_name such that they are valid sql identifiers
(this boils down to values inserted into the column should not contain whitespace)
- Check constraint can still enter valid data
- Check constraint rejects bad data after migration
"""

# -*- coding: utf-8 -*-
import logging
import pytest
from alembic import command
from sqlalchemy import inspect
from sqlalchemy.exc import IntegrityError
from psycopg2.errors import CheckViolation

logger = logging.getLogger("tests")

# this is the revision *before* the migration is to be run.
prior_revision = "bb2a222a1d4a"
final_revision = "78260d36e42b"


def get_insert_statement(schema_name, insertion_value, vars_id=1):
return (
f"INSERT INTO {schema_name}.meta_vars(vars_id, net_var_name, standard_name, cell_method, display_name)\n"
f"VALUES ({vars_id}, '{insertion_value}', 'test', 'test', 'test');\n"
)


@pytest.mark.usefixtures("new_db_left")
@pytest.mark.parametrize(
"prepared_schema_from_migrations_left", (prior_revision,), indirect=True
)
def test_upgrade(
prepared_schema_from_migrations_left,
alembic_config_left,
schema_name,
):
"""test migration from bb2a222a1d4a to 78260d36e42b."""

# Set up database to version bb2a222a1d4a (previous migration)
engine, script = prepared_schema_from_migrations_left

# (insertion_id, insertion_value, expected_value)
test_values = [
(1, "bad var name with whitespace", "bad_var_name_with_whitespace"),
(2, "bad\nnewline", "bad_newline"),
(3, "bad-dash", "bad_dash"),
(4, "1badnumber", "_badnumber"),
(5, "$badDollar", "_badDollar"),
(6, "schön", "schon"),
(7, "forêt", "foret"),
(8, "tréma", "trema"),
]

def inserter(x):
return get_insert_statement(schema_name, x[2], x[0])

statement = "\n".join(list(map(inserter, test_values)))
engine.execute(statement)

command.upgrade(alembic_config_left, "+1")

result = engine.execute(
f"SELECT vars_id, net_var_name FROM {schema_name}.meta_vars ORDER BY vars_id"
)

for index, row in enumerate(result):
test_sample = test_values[index]
assert row == (test_sample[0], test_sample[2])


@pytest.mark.usefixtures("new_db_left")
@pytest.mark.parametrize(
"prepared_schema_from_migrations_left", (final_revision,), indirect=True
)
def test_downgrade(
prepared_schema_from_migrations_left,
alembic_config_left,
schema_name,
):
"""test migration from 78260d36e42b to bb2a222a1d4a."""

# Set up database to version bb2a222a1d4a (previous migration)
engine, script = prepared_schema_from_migrations_left

statement = get_insert_statement(schema_name, "good_var_name_with_underscores")
engine.execute(statement)

command.downgrade(alembic_config_left, "-1")

result = engine.execute(
f"SELECT vars_id, net_var_name FROM {schema_name}.meta_vars"
)

row = next(result)

# After migration is run, bad strings should have whitespace replaced with underscores
assert row == (1, "good_var_name_with_underscores")


@pytest.mark.usefixtures("new_db_left")
@pytest.mark.parametrize(
"prepared_schema_from_migrations_left", (final_revision,), indirect=True
)
# Correct values should conform to https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
# Notably numbers and $ are allowed, but not as the first character
@pytest.mark.parametrize(
"test_value",
["allow_underscores", "allowfullwords", "allowCapitals", "allowNumbers123"],
)
def test_check_good_constraint_values(
prepared_schema_from_migrations_left, schema_name, test_value
):
# Set up database to version 78260d36e42b (after migration)
engine, script = prepared_schema_from_migrations_left

statement = get_insert_statement(schema_name, test_value)
engine.execute(statement)

result = engine.execute(
f"SELECT vars_id, net_var_name FROM {schema_name}.meta_vars"
)

row = next(result)

assert row == (1, test_value)


@pytest.mark.usefixtures("new_db_left")
@pytest.mark.parametrize(
"prepared_schema_from_migrations_left", (final_revision,), indirect=True
)
# Correct values should conform to https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
# Notably numbers and $ are allowed, but not as the first character
@pytest.mark.parametrize(
"test_value",
[
"bad space",
"bad\nnewline",
"bad-dash",
"1badnumber",
"$badDollar",
"schön",
"forêt",
"tréma",
],
)
def test_check_bad_constraint_values(
prepared_schema_from_migrations_left, schema_name, test_value
):
# Set up database to version 78260d36e42b (after migration)
engine, script = prepared_schema_from_migrations_left
# This test passes bad data and expects an integrity error back from SQLAlchemy when executed
with pytest.raises(IntegrityError) as excinfo:
statement = get_insert_statement(schema_name, test_value)
engine.execute(statement)

# The specific exception raised by psycopg2 is stored internally in SQLAlchemy's IntegrityError
# By checking this inner exception we can know that it is specifically a Check Constraint violation
assert isinstance(excinfo.value.orig, CheckViolation)
6 changes: 3 additions & 3 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def action(sesh):

variables = [
TestVariable(
"air-temperature",
"air_temperature",
"degC",
"air_temperature",
"time: point",
Expand All @@ -241,7 +241,7 @@ def action(sesh):
moti,
),
TestVariable(
"average-direction",
"average_direction",
"km/h",
"wind_from_direction",
"time: mean",
Expand All @@ -252,7 +252,7 @@ def action(sesh):
moti,
),
TestVariable(
"dew-point",
"dew_point",
"degC",
"dew_point_temperature",
"time: point",
Expand Down

0 comments on commit 975366c

Please sign in to comment.