Skip to content

Commit 3119fd3

Browse files
committed
For xpro, conditionally ingest topics for each resource depending on the formatting of the topics (prolearn or mit-learn)
1 parent 94d9235 commit 3119fd3

File tree

3 files changed

+43
-18
lines changed

3 files changed

+43
-18
lines changed

learning_resources/etl/xpro.py

+17-9
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,11 @@ def _parse_datetime(value):
5454

5555
def parse_topics(resource_data: dict) -> list[dict]:
5656
"""
57-
Get a list containing {"name": <topic>} dict objects
57+
Get a list containing {"name": <topic>} dict objects.
58+
May be a mix of prolearn and mit-learn topics.
59+
If all prolearn topics, transform them to mit-learn topics.
60+
Otherwise, ignore the prolearn topics and return only mit-learn topics
61+
5862
Args:
5963
resource_data: course or program data
6064
Returns:
@@ -63,14 +67,18 @@ def parse_topics(resource_data: dict) -> list[dict]:
6367
extracted_topics = resource_data["topics"]
6468
if not extracted_topics:
6569
return []
66-
return transform_topics(
67-
[
68-
{"name": topic["name"].split(":")[-1].strip()}
69-
for topic in extracted_topics
70-
if topic
71-
],
72-
OfferedBy.xpro.name,
73-
)
70+
prolearn_topics = [topic for topic in extracted_topics if ":" in topic["name"]]
71+
if len(prolearn_topics) == len(extracted_topics):
72+
return transform_topics(
73+
[
74+
{"name": topic["name"].split(":")[-1].strip()}
75+
for topic in extracted_topics
76+
if topic
77+
],
78+
OfferedBy.xpro.name,
79+
)
80+
else:
81+
return [topic for topic in extracted_topics if ":" not in topic["name"]]
7482

7583

7684
def extract_programs():

learning_resources/etl/xpro_test.py

+21-7
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,23 @@ def test_program_run_start_date_value(
326326
)
327327

328328

329-
def test_parse_topics_data():
329+
@pytest.mark.parametrize(
330+
("raw_topics", "expected_topics"),
331+
[
332+
(["Technology:AI/Machine Learning", "Management"], ["Management"]),
333+
(
334+
["Technology:AI/Machine Learning", "Business:Management"],
335+
["AI", "Machine Learning", "Management"],
336+
),
337+
(["Machine Learning", "Management"], ["Machine Learning", "Management"]),
338+
(["AI", "Machine Learning"], ["AI", "Machine Learning"]),
339+
(
340+
["AI", "Machine Learning", "Technology:AI/Machine Learning"],
341+
["AI", "Machine Learning"],
342+
),
343+
],
344+
)
345+
def test_parse_topics_data(raw_topics, expected_topics):
330346
"""Test that topics are correctly parsed from the xpro data"""
331347
offeror = LearningResourceOfferorFactory.create(is_xpro=True)
332348
LearningResourceTopicMappingFactory.create(
@@ -345,10 +361,8 @@ def test_parse_topics_data():
345361
topic_name="Management",
346362
)
347363
course_data = {
348-
"topics": [{"name": "AI/Machine Learning"}, {"name": "Management"}],
364+
"topics": [{"name": topic} for topic in raw_topics],
349365
}
350-
assert sorted(parse_topics(course_data), key=lambda topic: topic["name"]) == [
351-
{"name": "AI"},
352-
{"name": "Machine Learning"},
353-
{"name": "Management"},
354-
]
366+
assert sorted(parse_topics(course_data), key=lambda topic: topic["name"]) == sorted(
367+
[{"name": topic} for topic in expected_topics], key=lambda topic: topic["name"]
368+
)

test_json/xpro_courses.json

+5-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
"courseruns": [],
1010
"next_run_id": null,
1111
"platform": "xPRO",
12-
"topics": [{ "name": "Business:Leadership & Organizations" }],
12+
"topics": [
13+
{ "name": "Organizations & Leadership" },
14+
{ "name": "Business:Leadership & Organizations" }
15+
],
1316
"format": "Online",
1417
"availability": "dated",
1518
"credits": "1.25"
@@ -38,7 +41,7 @@
3841
}
3942
],
4043
"next_run_id": 49,
41-
"topics": [{ "name": "Business:Leadership & Organizations" }],
44+
"topics": [{ "name": "Organizations & Leadership" }],
4245
"format": "In person",
4346
"availability": "dated",
4447
"credits": "2.25"

0 commit comments

Comments
 (0)