Skip to content

Commit 58ecb04

Browse files
authored
Populate more fields for canvas courses (#2404)
* adding start date * adding end date * adding method to get context info * add url field * adding migration to fix model * adding migration to fix model * fixing tests * fixing migration order * fixing migration order * reverting model changes * fixing issue with end dates missing * add log * log only if dates are undefined * log only if dates are undefined
1 parent c1ae257 commit 58ecb04

File tree

2 files changed

+50
-3
lines changed

2 files changed

+50
-3
lines changed

learning_resources/etl/canvas.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import zipfile
55
from collections import defaultdict
66
from collections.abc import Generator
7+
from datetime import datetime
78
from io import BytesIO
89
from pathlib import Path
910
from tempfile import TemporaryDirectory
@@ -73,20 +74,40 @@ def sync_canvas_archive(bucket, key: str, overwrite):
7374
return resource_readable_id, run
7475

7576

77+
def _course_url(course_archive_path) -> str:
78+
context_info = parse_context_xml(course_archive_path)
79+
return f"https://{context_info.get('canvas_domain')}/courses/{context_info.get('course_id')}/"
80+
81+
7682
def run_for_canvas_archive(course_archive_path, course_folder, overwrite):
7783
"""
7884
Generate and return a LearningResourceRun for a Canvas course
7985
"""
8086
checksum = calc_checksum(course_archive_path)
8187
course_info = parse_canvas_settings(course_archive_path)
8288
course_title = course_info.get("title")
89+
url = _course_url(course_archive_path)
90+
start_at = course_info.get("start_at")
91+
end_at = course_info.get("conclude_at")
92+
if start_at:
93+
try:
94+
start_at = datetime.fromisoformat(start_at)
95+
except (ValueError, TypeError):
96+
log.warning("Invalid start_at date format: %s", start_at)
97+
if end_at:
98+
try:
99+
end_at = datetime.fromisoformat(end_at)
100+
except (ValueError, TypeError):
101+
log.warning("Invalid start_at date format: %s", end_at)
102+
83103
readable_id = f"{course_folder}-{course_info.get('course_code')}"
84104
# create placeholder learning resource
85105
resource, _ = LearningResource.objects.update_or_create(
86106
readable_id=readable_id,
87107
defaults={
88108
"title": course_title,
89109
"published": False,
110+
"url": url,
90111
"test_mode": True,
91112
"etl_source": ETLSource.canvas.name,
92113
"platform": LearningResourcePlatform.objects.get(
@@ -100,6 +121,8 @@ def run_for_canvas_archive(course_archive_path, course_folder, overwrite):
100121
run_id=f"{readable_id}+canvas",
101122
learning_resource=resource,
102123
published=True,
124+
start_date=start_at,
125+
end_date=end_at,
103126
)
104127
run = resource.runs.first()
105128
resource_readable_id = run.learning_resource.readable_id
@@ -206,6 +229,21 @@ def transform_canvas_problem_files(
206229
yield problem_file_data
207230

208231

232+
def parse_context_xml(course_archive_path: str) -> dict:
233+
with zipfile.ZipFile(course_archive_path, "r") as course_archive:
234+
context = course_archive.read("course_settings/context.xml")
235+
root = ElementTree.fromstring(context)
236+
namespaces = {"ns": "http://canvas.instructure.com/xsd/cccv1p0"}
237+
context_info = {}
238+
item_keys = ["course_id", "root_account_id", "canvas_domain", "root_account_name"]
239+
for key in item_keys:
240+
element = root.find(f"ns:{key}", namespaces)
241+
if element is not None:
242+
context_info[key] = element.text
243+
244+
return context_info
245+
246+
209247
def parse_module_meta(course_archive_path: str) -> dict:
210248
"""
211249
Parse module_meta.xml and return publish/active status of resources.

learning_resources/etl/canvas_test.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,17 @@ def test_run_for_canvas_archive_creates_resource_and_run(tmp_path, mocker):
111111
"learning_resources.etl.canvas.parse_canvas_settings",
112112
return_value={"title": "Test Course", "course_code": "TEST101"},
113113
)
114+
mocker.patch(
115+
"learning_resources.etl.canvas.parse_context_xml",
116+
return_value={"course_id": "123", "canvas_domain": "mit.edu"},
117+
)
118+
114119
mocker.patch("learning_resources.etl.canvas.calc_checksum", return_value="abc123")
115120
# No resource exists yet
116-
course_archive_path = tmp_path / "archive.zip"
117-
course_archive_path.write_text("dummy")
121+
zip_path = tmp_path / "archive.zip"
122+
118123
_, run = run_for_canvas_archive(
119-
course_archive_path, course_folder=course_folder, overwrite=True
124+
zip_path, course_folder=course_folder, overwrite=True
120125
)
121126
resource = LearningResource.objects.get(readable_id=f"{course_folder}-TEST101")
122127
assert resource.title == "Test Course"
@@ -138,6 +143,10 @@ def test_run_for_canvas_archive_creates_run_if_none_exists(tmp_path, mocker):
138143
"learning_resources.etl.canvas.parse_canvas_settings",
139144
return_value={"title": "Test Course", "course_code": "TEST104"},
140145
)
146+
mocker.patch(
147+
"learning_resources.etl.canvas.parse_context_xml",
148+
return_value={"course_id": "123", "canvas_domain": "mit.edu"},
149+
)
141150
mocker.patch(
142151
"learning_resources.etl.canvas.calc_checksum", return_value="checksum104"
143152
)

0 commit comments

Comments
 (0)