Skip to content

Commit 3c11fab

Browse files
committed
Some refactoring and unit tests
1 parent 411914f commit 3c11fab

File tree

5 files changed

+331
-17
lines changed

5 files changed

+331
-17
lines changed

learning_resources/etl/utils.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import boto3
2424
import rapidjson
2525
import requests
26-
from defusedxml.ElementTree import ParseError, parse
26+
from defusedxml import ElementTree
2727
from django.conf import settings
2828
from django.utils.dateparse import parse_duration
2929
from django.utils.text import slugify
@@ -420,10 +420,10 @@ def get_root_url_for_source(etl_source: str) -> tuple[str, str]:
420420
tuple[str, str]: The base URL and path
421421
"""
422422
mapping = {
423-
ETLSource.mitxonline.value: "https://courses.mitxonline.mit.edu",
424-
ETLSource.xpro.value: "https://courses.xpro.mit.edu",
425-
ETLSource.mit_edx.value: "https://www.edx.org",
426-
ETLSource.oll.value: "https://openlearninglibrary.mit.edu",
423+
ETLSource.mitxonline.value: settings.CONTENT_BASE_URL_MITXONLINE,
424+
ETLSource.xpro.value: settings.CONTENT_BASE_URL_XPRO,
425+
ETLSource.oll.value: settings.CONTENT_BASE_URL_OLL,
426+
ETLSource.mit_edx.value: settings.CONTENT_BASE_URL_EDX,
427427
}
428428
return mapping.get(etl_source)
429429

@@ -460,23 +460,27 @@ def get_url_from_module_id(
460460
log.warning("Module ID is empty")
461461
return None
462462
root_url = get_root_url_for_source(run.learning_resource.etl_source)
463-
with Path.open("video_metadata.json", "w") as f:
464-
json.dump(video_srt_metadata, f, indent=2)
463+
# OLL needs to have 'course-v1:' added to the run_id
464+
run_id = (
465+
f"course-v1:{run.run_id}"
466+
if run.learning_resource.etl_source == ETLSource.oll.value
467+
else run.run_id
468+
)
465469
if module_id.startswith("asset"):
466-
log.info("Getting URL for asset %s", module_id)
470+
log.debug("Getting URL for asset %s", module_id)
467471
asset_meta = (
468472
assets_metadata.get(Path(olx_path).parts[-1], {}) if assets_metadata else {}
469473
)
470474
video_meta = video_srt_metadata.get(module_id, {}) if video_srt_metadata else {}
471475
if video_meta:
472-
log.info("Found video metadata for %s", module_id)
473-
return f"{root_url}/xblock/{video_meta}"
476+
log.debug("Found video metadata for %s", module_id)
477+
return f"{root_url}/courses/{run_id}/jump_to/{video_meta.split('@')[-1]}"
474478
elif module_id.endswith(".srt"):
475-
log.info("NO VIDEO METADATA FOR %s", module_id)
479+
log.debug("No video metadata for %s", module_id)
476480
middle_path = asset_meta.get("custom_md5", "")
477481
return f"{root_url}/{(middle_path + '/') if middle_path else ''}{module_id}"
478482
elif module_id.startswith("block") and is_valid_uuid(module_id.split("@")[-1]):
479-
return f"{root_url}/xblock/{module_id}"
483+
return f"{root_url}/courses/{run_id}/jump_to_id/{module_id.split('@')[-1]}"
480484
else:
481485
log.warning("Unknown module ID format: %s", module_id)
482486
return None
@@ -505,7 +509,7 @@ def parse_video_transcripts_xml(
505509
"""
506510
transcript_mapping = {}
507511
try:
508-
root = parse(xml_content)
512+
root = ElementTree.fromstring(xml_content)
509513

510514
# Get the video url_name from the root video element
511515
video_url_name = root.get("url_name")
@@ -520,7 +524,7 @@ def parse_video_transcripts_xml(
520524
transcript_mapping[
521525
get_edx_module_id(f"static/{transcript_src}", run)
522526
] = get_edx_module_id(str(path), run)
523-
except ParseError:
527+
except ElementTree.ParseError:
524528
log.exception("Error parsing video XML for %s: %s", run, path)
525529
return transcript_mapping
526530

@@ -537,7 +541,7 @@ def get_video_metadata(olx_path: str, run: LearningResourceRun) -> dict:
537541
for root, _, files in os.walk(str(Path(olx_path, "video"))):
538542
path = "/".join(root.split("/")[3:])
539543
for filename in files:
540-
log.info("Processing video file %s in %s", filename, path)
544+
log.debug("Processing video file %s in %s", filename, path)
541545
extension_lower = Path(filename).suffix.lower()
542546
if extension_lower == ".xml":
543547
with Path.open(Path(root, filename), "rb") as f:

0 commit comments

Comments
 (0)