Skip to content

Commit 5950b9b

Browse files
committed
Assume all topics are in xpro format (those that aren't won't match existing topics and will be ignored).
1 parent 92f74ec commit 5950b9b

File tree

3 files changed

+50
-83
lines changed

3 files changed

+50
-83
lines changed

learning_resources/etl/loaders_test.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
LearningResourcePlatformFactory,
6161
LearningResourceRunFactory,
6262
LearningResourceTopicFactory,
63+
LearningResourceTopicMappingFactory,
6364
PodcastEpisodeFactory,
6465
PodcastFactory,
6566
ProgramFactory,
@@ -718,6 +719,49 @@ def test_load_topics(mocker, parent_factory, topics_exist):
718719
assert parent.learning_resource.topics.count() == 0
719720

720721

722+
@pytest.mark.parametrize(
723+
("raw_topics", "expected_topics"),
724+
[
725+
(["Technology:AI/Machine Learning", "Management"], ["Management"]),
726+
(
727+
["Technology:AI/Machine Learning", "Business:Management"],
728+
[],
729+
),
730+
(["Machine Learning", "Management"], ["Machine Learning", "Management"]),
731+
(["AI", "Machine Learning"], ["AI", "Machine Learning"]),
732+
(
733+
["AI", "Machine Learning", "Technology:AI/Machine Learning"],
734+
["AI", "Machine Learning"],
735+
),
736+
],
737+
)
738+
def test_load_mixed_topics_data(raw_topics, expected_topics):
739+
"""Test that topics are correctly parsed from data containing valid & invalid topics"""
740+
resource = LearningResourceFactory.create(is_course=True, topics=[])
741+
offeror = LearningResourceOfferorFactory.create(is_xpro=True)
742+
LearningResourceTopicMappingFactory.create(
743+
offeror=offeror,
744+
topic=LearningResourceTopicFactory.create(name="AI"),
745+
topic_name="AI/Machine Learning",
746+
)
747+
LearningResourceTopicMappingFactory.create(
748+
offeror=offeror,
749+
topic=LearningResourceTopicFactory.create(name="Machine Learning"),
750+
topic_name="AI/Machine Learning",
751+
)
752+
LearningResourceTopicMappingFactory.create(
753+
offeror=offeror,
754+
topic=LearningResourceTopicFactory.create(name="Management"),
755+
topic_name="Management",
756+
)
757+
758+
load_topics(resource, [{"name": topic} for topic in raw_topics])
759+
760+
assert sorted([topic.name for topic in resource.topics.all()]) == sorted(
761+
expected_topics
762+
)
763+
764+
721765
@pytest.mark.parametrize("instructor_exists", [True, False])
722766
def test_load_instructors(instructor_exists):
723767
"""Test that load_instructors creates and/or assigns instructors to the course run"""

learning_resources/etl/xpro.py

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from learning_resources.etl.utils import (
2121
generate_course_numbers_json,
2222
transform_delivery,
23-
transform_topics,
2423
)
2524
from main.utils import clean_data
2625

@@ -52,35 +51,6 @@ def _parse_datetime(value):
5251
return parse(value).replace(tzinfo=UTC) if value else None
5352

5453

55-
def parse_topics(resource_data: dict) -> list[dict]:
56-
"""
57-
Get a list containing {"name": <topic>} dict objects.
58-
May be a mix of prolearn and mit-learn topics.
59-
If all prolearn topics, transform them to mit-learn topics.
60-
Otherwise, ignore the prolearn topics and return only mit-learn topics
61-
62-
Args:
63-
resource_data: course or program data
64-
Returns:
65-
list of dict: list containing topic dicts with a name attribute
66-
"""
67-
extracted_topics = resource_data["topics"]
68-
if not extracted_topics:
69-
return []
70-
prolearn_topics = [topic for topic in extracted_topics if ":" in topic["name"]]
71-
if len(prolearn_topics) == len(extracted_topics):
72-
return transform_topics(
73-
[
74-
{"name": topic["name"].split(":")[-1].strip()}
75-
for topic in extracted_topics
76-
if topic
77-
],
78-
OfferedBy.xpro.name,
79-
)
80-
else:
81-
return [topic for topic in extracted_topics if ":" not in topic["name"]]
82-
83-
8454
def extract_programs():
8555
"""Loads the xPro catalog data""" # noqa: D401
8656
if settings.XPRO_CATALOG_API_URL:
@@ -155,7 +125,7 @@ def _transform_learning_resource_course(course):
155125
"published": any(
156126
course_run.get("current_price", None) for course_run in course["courseruns"]
157127
),
158-
"topics": parse_topics(course),
128+
"topics": course["topics"],
159129
"runs": [
160130
_transform_run(course_run, course) for course_run in course["courseruns"]
161131
],
@@ -205,7 +175,7 @@ def transform_programs(programs):
205175
program["current_price"]
206176
), # a program is only considered published if it has a product/price
207177
"url": program["url"],
208-
"topics": parse_topics(program),
178+
"topics": program["topics"],
209179
"platform": XPRO_PLATFORM_TRANSFORM.get(program["platform"], None),
210180
"resource_type": LearningResourceType.program.name,
211181
"delivery": transform_delivery(program.get("format")),

learning_resources/etl/xpro_test.py

Lines changed: 4 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,7 @@
2020
from learning_resources.etl.utils import (
2121
transform_delivery,
2222
)
23-
from learning_resources.etl.xpro import _parse_datetime, parse_topics
24-
from learning_resources.factories import (
25-
LearningResourceOfferorFactory,
26-
LearningResourceTopicFactory,
27-
LearningResourceTopicMappingFactory,
28-
)
23+
from learning_resources.etl.xpro import _parse_datetime
2924
from learning_resources.test_utils import set_up_topics
3025
from main.test_utils import any_instance_of
3126

@@ -109,7 +104,7 @@ def test_xpro_transform_programs(mock_xpro_programs_data):
109104
"published": bool(program_data["current_price"]),
110105
"url": program_data["url"],
111106
"availability": Availability.dated.name,
112-
"topics": parse_topics(program_data),
107+
"topics": program_data["topics"],
113108
"platform": PlatformType.xpro.name,
114109
"resource_type": LearningResourceType.program.name,
115110
"delivery": transform_delivery(program_data.get("format")),
@@ -156,7 +151,7 @@ def test_xpro_transform_programs(mock_xpro_programs_data):
156151
for course_run in course_data["courseruns"]
157152
),
158153
"availability": Availability.dated.name,
159-
"topics": parse_topics(course_data),
154+
"topics": course_data["topics"],
160155
"resource_type": LearningResourceType.course.name,
161156
"continuing_ed_credits": course_data.get("credits"),
162157
"pace": [Pace.self_paced.name],
@@ -233,7 +228,7 @@ def test_xpro_transform_courses(mock_xpro_courses_data):
233228
for course_run in course_data["courseruns"]
234229
),
235230
"availability": Availability.dated.name,
236-
"topics": parse_topics(course_data),
231+
"topics": course_data["topics"],
237232
"resource_type": LearningResourceType.course.name,
238233
"runs": [
239234
{
@@ -324,45 +319,3 @@ def test_program_run_start_date_value(
324319
assert transformed_programs[0]["runs"][0]["start_date"] == _parse_datetime(
325320
expected_dt
326321
)
327-
328-
329-
@pytest.mark.parametrize(
330-
("raw_topics", "expected_topics"),
331-
[
332-
(["Technology:AI/Machine Learning", "Management"], ["Management"]),
333-
(
334-
["Technology:AI/Machine Learning", "Business:Management"],
335-
["AI", "Machine Learning", "Management"],
336-
),
337-
(["Machine Learning", "Management"], ["Machine Learning", "Management"]),
338-
(["AI", "Machine Learning"], ["AI", "Machine Learning"]),
339-
(
340-
["AI", "Machine Learning", "Technology:AI/Machine Learning"],
341-
["AI", "Machine Learning"],
342-
),
343-
],
344-
)
345-
def test_parse_topics_data(raw_topics, expected_topics):
346-
"""Test that topics are correctly parsed from the xpro data"""
347-
offeror = LearningResourceOfferorFactory.create(is_xpro=True)
348-
LearningResourceTopicMappingFactory.create(
349-
offeror=offeror,
350-
topic=LearningResourceTopicFactory.create(name="AI"),
351-
topic_name="AI/Machine Learning",
352-
)
353-
LearningResourceTopicMappingFactory.create(
354-
offeror=offeror,
355-
topic=LearningResourceTopicFactory.create(name="Machine Learning"),
356-
topic_name="AI/Machine Learning",
357-
)
358-
LearningResourceTopicMappingFactory.create(
359-
offeror=offeror,
360-
topic=LearningResourceTopicFactory.create(name="Management"),
361-
topic_name="Management",
362-
)
363-
course_data = {
364-
"topics": [{"name": topic} for topic in raw_topics],
365-
}
366-
assert sorted(parse_topics(course_data), key=lambda topic: topic["name"]) == sorted(
367-
[{"name": topic} for topic in expected_topics], key=lambda topic: topic["name"]
368-
)

0 commit comments

Comments
 (0)