Skip to content

Commit

Permalink
fix: account for cross-listing splitting and merging when assigning c…
Browse files Browse the repository at this point in the history
…ourse_id
  • Loading branch information
Josh-Cena committed Oct 23, 2024
1 parent 19694e2 commit 1dd6572
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 7 deletions.
16 changes: 12 additions & 4 deletions ferry/transform/import_courses.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def resolve_cross_listings(listings: pd.DataFrame, data_dir: Path) -> pd.DataFra
)

next_course_id = max(course_id_cache.values(), default=0)
course_ids_assigned: set[int] = set()

def listing_group_to_id(group: pd.DataFrame) -> int:
nonlocal next_course_id
Expand All @@ -100,12 +101,19 @@ def listing_group_to_id(group: pd.DataFrame) -> int:
)
all_course_ids.discard(None)
if len(all_course_ids) > 1:
raise ValueError(
f"Unexpected: {group['temp_course_id']} is matched to multiple courses: {all_course_ids}"
logging.warning(
f"The following courses are mapped to multiple courses: {all_course_ids}:\n{listings.loc[group['temp_course_id'].index][['season_code', 'title', 'course_code', 'crns']]}\nThey will be merged into the first one"
)
if all_course_ids:
return cast(int, all_course_ids.pop())
already_assigned_ids = all_course_ids & course_ids_assigned
if already_assigned_ids:
logging.warning(f"Course ID {already_assigned_ids} is already used by another group; probably because cross-listings are split")
unassigned_ids = all_course_ids - course_ids_assigned
if unassigned_ids:
id = cast(int, unassigned_ids.pop())
course_ids_assigned.add(id)
return id
next_course_id += 1
course_ids_assigned.add(next_course_id)
return next_course_id

course_id = (
Expand Down
8 changes: 5 additions & 3 deletions ferry/transform/transform_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,11 @@ def assign_code(row: pd.Series):
(tag for tag, condition in tag_candidates.items() if condition), None
)


evaluation_questions["tag"] = pd.Series(dtype="string") if len(evaluation_questions) == 0 else evaluation_questions.apply(assign_code, axis=1)

evaluation_questions["tag"] = (
pd.Series(dtype="string")
if len(evaluation_questions) == 0
else evaluation_questions.apply(assign_code, axis=1)
)

return evaluation_questions

Expand Down

0 comments on commit 1dd6572

Please sign in to comment.