Skip to content

Commit

Permalink
refactor: move invariant check from DB staging to transformer
Browse files Browse the repository at this point in the history
  • Loading branch information
Josh-Cena committed Oct 19, 2024
1 parent da25013 commit 2c7ccae
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 0 deletions.
3 changes: 3 additions & 0 deletions ferry/transform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
)
from .import_courses import import_courses
from .import_evaluations import import_evaluations
from .invariants import check_invariants


def write_csvs(tables: dict[str, pd.DataFrame], data_dir: Path):
Expand Down Expand Up @@ -110,6 +111,8 @@ def transform(data_dir: Path) -> dict[str, pd.DataFrame]:
.loc[:, [column.key for column in db_table.columns]]
)

check_invariants(all_tables)

print("\033[F", end="")
print("Computing secondary attributes... ✔")

Expand Down
51 changes: 51 additions & 0 deletions ferry/transform/invariants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import pandas as pd


class InvariantError(Exception):
pass


def check_invariants(tables: dict[str, pd.DataFrame]):
"""
Check invariant:
- listing.season_code == course.season_code if listing.course_id == course.course_id.
- evaluation_questions.options is null iff evaluation_questions.is_narrative = True
- every course should have at least one listing.
"""

listing_with_course = tables["listings"][["course_id", "season_code"]].merge(
tables["courses"][["course_id", "season_code"]],
on="course_id",
suffixes=("_listing", "_course"),
)
diff_season_code = listing_with_course[
listing_with_course["season_code_listing"]
!= listing_with_course["season_code_course"]
]
if not diff_season_code.empty:
raise InvariantError(
f"listing.season_code != course.season_code for {diff_season_code}"
)

courses_no_listing = ~tables["courses"]["course_id"].isin(
tables["listings"]["course_id"]
)
if courses_no_listing.any():
raise InvariantError(
f"courses with no listing {tables['courses']['course_id'][courses_no_listing]}"
)

narrative_with_options = tables["evaluation_questions"][
(tables["evaluation_questions"]["is_narrative"] == True)
& (tables["evaluation_questions"]["options"].notnull())
]
non_narrative_without_options = tables["evaluation_questions"][
(tables["evaluation_questions"]["is_narrative"] == False)
& (tables["evaluation_questions"]["options"].isnull())
]
if not narrative_with_options.empty:
raise InvariantError(f"narrative with options {narrative_with_options}")
if not non_narrative_without_options.empty:
raise InvariantError(
f"non-narrative without options {non_narrative_without_options}"
)

0 comments on commit 2c7ccae

Please sign in to comment.