Assume all topics are in xpro format (those that aren't won't match existing topics and will be ignored).

mbertrand · mbertrand · commit 5950b9bd02ac · 2024-10-01T11:52:09.000-04:00
diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py
@@ -60,6 +60,7 @@
     LearningResourcePlatformFactory,
     LearningResourceRunFactory,
     LearningResourceTopicFactory,
+    LearningResourceTopicMappingFactory,
     PodcastEpisodeFactory,
     PodcastFactory,
     ProgramFactory,
@@ -718,6 +719,49 @@ def test_load_topics(mocker, parent_factory, topics_exist):
     assert parent.learning_resource.topics.count() == 0
 
 
+@pytest.mark.parametrize(
+    ("raw_topics", "expected_topics"),
+    [
+        (["Technology:AI/Machine Learning", "Management"], ["Management"]),
+        (
+            ["Technology:AI/Machine Learning", "Business:Management"],
+            [],
+        ),
+        (["Machine Learning", "Management"], ["Machine Learning", "Management"]),
+        (["AI", "Machine Learning"], ["AI", "Machine Learning"]),
+        (
+            ["AI", "Machine Learning", "Technology:AI/Machine Learning"],
+            ["AI", "Machine Learning"],
+        ),
+    ],
+)
+def test_load_mixed_topics_data(raw_topics, expected_topics):
+    """Test that topics are correctly parsed from data containing valid & invalid topics"""
+    resource = LearningResourceFactory.create(is_course=True, topics=[])
+    offeror = LearningResourceOfferorFactory.create(is_xpro=True)
+    LearningResourceTopicMappingFactory.create(
+        offeror=offeror,
+        topic=LearningResourceTopicFactory.create(name="AI"),
+        topic_name="AI/Machine Learning",
+    )
+    LearningResourceTopicMappingFactory.create(
+        offeror=offeror,
+        topic=LearningResourceTopicFactory.create(name="Machine Learning"),
+        topic_name="AI/Machine Learning",
+    )
+    LearningResourceTopicMappingFactory.create(
+        offeror=offeror,
+        topic=LearningResourceTopicFactory.create(name="Management"),
+        topic_name="Management",
+    )
+
+    load_topics(resource, [{"name": topic} for topic in raw_topics])
+
+    assert sorted([topic.name for topic in resource.topics.all()]) == sorted(
+        expected_topics
+    )
+
+
 @pytest.mark.parametrize("instructor_exists", [True, False])
 def test_load_instructors(instructor_exists):
     """Test that load_instructors creates and/or assigns instructors to the course run"""
diff --git a/learning_resources/etl/xpro.py b/learning_resources/etl/xpro.py
@@ -20,7 +20,6 @@
 from learning_resources.etl.utils import (
     generate_course_numbers_json,
     transform_delivery,
-    transform_topics,
 )
 from main.utils import clean_data
 
@@ -52,35 +51,6 @@ def _parse_datetime(value):
     return parse(value).replace(tzinfo=UTC) if value else None
 
 
-def parse_topics(resource_data: dict) -> list[dict]:
-    """
-    Get a list containing {"name": <topic>} dict objects.
-    May be a mix of prolearn and mit-learn topics.
-    If all prolearn topics, transform them to mit-learn topics.
-    Otherwise, ignore the prolearn topics and return only mit-learn topics
-
-    Args:
-        resource_data: course or program data
-    Returns:
-        list of dict: list containing topic dicts with a name attribute
-    """
-    extracted_topics = resource_data["topics"]
-    if not extracted_topics:
-        return []
-    prolearn_topics = [topic for topic in extracted_topics if ":" in topic["name"]]
-    if len(prolearn_topics) == len(extracted_topics):
-        return transform_topics(
-            [
-                {"name": topic["name"].split(":")[-1].strip()}
-                for topic in extracted_topics
-                if topic
-            ],
-            OfferedBy.xpro.name,
-        )
-    else:
-        return [topic for topic in extracted_topics if ":" not in topic["name"]]
-
-
 def extract_programs():
     """Loads the xPro catalog data"""  # noqa: D401
     if settings.XPRO_CATALOG_API_URL:
@@ -155,7 +125,7 @@ def _transform_learning_resource_course(course):
         "published": any(
             course_run.get("current_price", None) for course_run in course["courseruns"]
         ),
-        "topics": parse_topics(course),
+        "topics": course["topics"],
         "runs": [
             _transform_run(course_run, course) for course_run in course["courseruns"]
         ],
@@ -205,7 +175,7 @@ def transform_programs(programs):
                 program["current_price"]
             ),  # a program is only considered published if it has a product/price
             "url": program["url"],
-            "topics": parse_topics(program),
+            "topics": program["topics"],
             "platform": XPRO_PLATFORM_TRANSFORM.get(program["platform"], None),
             "resource_type": LearningResourceType.program.name,
             "delivery": transform_delivery(program.get("format")),
diff --git a/learning_resources/etl/xpro_test.py b/learning_resources/etl/xpro_test.py
@@ -20,12 +20,7 @@
 from learning_resources.etl.utils import (
     transform_delivery,
 )
-from learning_resources.etl.xpro import _parse_datetime, parse_topics
-from learning_resources.factories import (
-    LearningResourceOfferorFactory,
-    LearningResourceTopicFactory,
-    LearningResourceTopicMappingFactory,
-)
+from learning_resources.etl.xpro import _parse_datetime
 from learning_resources.test_utils import set_up_topics
 from main.test_utils import any_instance_of
 
@@ -109,7 +104,7 @@ def test_xpro_transform_programs(mock_xpro_programs_data):
             "published": bool(program_data["current_price"]),
             "url": program_data["url"],
             "availability": Availability.dated.name,
-            "topics": parse_topics(program_data),
+            "topics": program_data["topics"],
             "platform": PlatformType.xpro.name,
             "resource_type": LearningResourceType.program.name,
             "delivery": transform_delivery(program_data.get("format")),
@@ -156,7 +151,7 @@ def test_xpro_transform_programs(mock_xpro_programs_data):
                         for course_run in course_data["courseruns"]
                     ),
                     "availability": Availability.dated.name,
-                    "topics": parse_topics(course_data),
+                    "topics": course_data["topics"],
                     "resource_type": LearningResourceType.course.name,
                     "continuing_ed_credits": course_data.get("credits"),
                     "pace": [Pace.self_paced.name],
@@ -233,7 +228,7 @@ def test_xpro_transform_courses(mock_xpro_courses_data):
                 for course_run in course_data["courseruns"]
             ),
             "availability": Availability.dated.name,
-            "topics": parse_topics(course_data),
+            "topics": course_data["topics"],
             "resource_type": LearningResourceType.course.name,
             "runs": [
                 {
@@ -324,45 +319,3 @@ def test_program_run_start_date_value(
     assert transformed_programs[0]["runs"][0]["start_date"] == _parse_datetime(
         expected_dt
     )
-
-
-@pytest.mark.parametrize(
-    ("raw_topics", "expected_topics"),
-    [
-        (["Technology:AI/Machine Learning", "Management"], ["Management"]),
-        (
-            ["Technology:AI/Machine Learning", "Business:Management"],
-            ["AI", "Machine Learning", "Management"],
-        ),
-        (["Machine Learning", "Management"], ["Machine Learning", "Management"]),
-        (["AI", "Machine Learning"], ["AI", "Machine Learning"]),
-        (
-            ["AI", "Machine Learning", "Technology:AI/Machine Learning"],
-            ["AI", "Machine Learning"],
-        ),
-    ],
-)
-def test_parse_topics_data(raw_topics, expected_topics):
-    """Test that topics are correctly parsed from the xpro data"""
-    offeror = LearningResourceOfferorFactory.create(is_xpro=True)
-    LearningResourceTopicMappingFactory.create(
-        offeror=offeror,
-        topic=LearningResourceTopicFactory.create(name="AI"),
-        topic_name="AI/Machine Learning",
-    )
-    LearningResourceTopicMappingFactory.create(
-        offeror=offeror,
-        topic=LearningResourceTopicFactory.create(name="Machine Learning"),
-        topic_name="AI/Machine Learning",
-    )
-    LearningResourceTopicMappingFactory.create(
-        offeror=offeror,
-        topic=LearningResourceTopicFactory.create(name="Management"),
-        topic_name="Management",
-    )
-    course_data = {
-        "topics": [{"name": topic} for topic in raw_topics],
-    }
-    assert sorted(parse_topics(course_data), key=lambda topic: topic["name"]) == sorted(
-        [{"name": topic} for topic in expected_topics], key=lambda topic: topic["name"]
-    )