diff --git a/.github/workflows/openapi-diff.yml b/.github/workflows/openapi-diff.yml index 15e296c2b3..7611fe521b 100644 --- a/.github/workflows/openapi-diff.yml +++ b/.github/workflows/openapi-diff.yml @@ -41,7 +41,7 @@ jobs: issue-number: ${{ github.event.pull_request.number }} body-includes: "## OpenAPI Changes" - name: Post changes as comment - uses: peter-evans/create-or-update-comment@23ff15729ef2fc348714a3bb66d2f655ca9066f2 # v3 + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 # Even if no changes, make sure to update old comment if it was found. if: steps.oasdif_summary.outputs.summary || steps.find_comment.outputs.comment-id with: diff --git a/Dockerfile b/Dockerfile index 118ccce197..60e0cba4bf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,7 @@ COPY apt.txt /tmp/apt.txt RUN apt-get update && \ apt-get install -y --no-install-recommends $(grep -vE "^\s*#" apt.txt | tr "\n" " ") && \ apt-get install libpq-dev postgresql-client -y --no-install-recommends && \ + apt-get install poppler-utils -y && \ apt-get clean && \ apt-get purge && \ rm -rf /var/lib/apt/lists/* diff --git a/RELEASE.rst b/RELEASE.rst index e0b7b3f313..92c6b270ec 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -1,6 +1,23 @@ Release Notes ============= +Version 0.39.3 +-------------- + +- Regenerate course metadata when course information updates (#2419) +- properly mock posthog.capture (#2409) +- Populate more fields for canvas courses (#2404) +- change problem set list permissions (#2406) +- fix(deps): update dependency ruff to v0.12.7 (#2413) +- fix(deps): update dependency litellm to v1.74.14 (#2412) +- raise page size of userlists and learning paths (#2408) +- Add support for ProgramCollection (#2369) +- Backgrounds for video buttons (#2405) +- fix(deps): update django-health-check digest to b0500d1 (#2391) +- chore(deps): update peter-evans/create-or-update-comment action to v4 (#2396) +- Process pdf problem sets (#2402) +- call user_created plugin method for SCIM users (#2387) + Version 0.39.0 (Released August 04, 2025) -------------- diff --git a/frontends/api/package.json b/frontends/api/package.json index 6aa15ba1b8..ba3005d1e0 100644 --- a/frontends/api/package.json +++ b/frontends/api/package.json @@ -30,7 +30,7 @@ "ol-test-utilities": "0.0.0" }, "dependencies": { - "@mitodl/mitxonline-api-axios": "^2025.6.23", + "@mitodl/mitxonline-api-axios": "2025.7.28", "@tanstack/react-query": "^5.66.0", "axios": "^1.6.3" } diff --git a/frontends/api/src/mitxonline/clients.ts b/frontends/api/src/mitxonline/clients.ts index 0bcf3e5366..5552e4ec46 100644 --- a/frontends/api/src/mitxonline/clients.ts +++ b/frontends/api/src/mitxonline/clients.ts @@ -2,6 +2,7 @@ import { B2bApi, CoursesApi, EnrollmentsApi, + ProgramCollectionsApi, ProgramsApi, UsersApi, } from "@mitodl/mitxonline-api-axios/v1" @@ -23,6 +24,11 @@ const usersApi = new UsersApi(undefined, BASE_PATH, axiosInstance) const b2bApi = new B2bApi(undefined, BASE_PATH, axiosInstance) const enrollmentsApi = new EnrollmentsApi(undefined, BASE_PATH, axiosInstance) const programsApi = new ProgramsApi(undefined, BASE_PATH, axiosInstance) +const programCollectionsApi = new ProgramCollectionsApi( + undefined, + BASE_PATH, + axiosInstance, +) const coursesApi = new CoursesApi(undefined, BASE_PATH, axiosInstance) export { @@ -30,6 +36,7 @@ export { b2bApi, enrollmentsApi, programsApi, + programCollectionsApi, coursesApi, axiosInstance, } diff --git a/frontends/api/src/mitxonline/hooks/courses/queries.ts b/frontends/api/src/mitxonline/hooks/courses/queries.ts index 4013379f36..83cd5b83c0 100644 --- a/frontends/api/src/mitxonline/hooks/courses/queries.ts +++ b/frontends/api/src/mitxonline/hooks/courses/queries.ts @@ -1,7 +1,7 @@ import { queryOptions } from "@tanstack/react-query" import type { CoursesApiApiV2CoursesListRequest, - PaginatedCourseWithCourseRunsList, + PaginatedV2CourseWithCourseRunsList, } from "@mitodl/mitxonline-api-axios/v1" import { coursesApi } from "../../clients" @@ -18,7 +18,7 @@ const coursesQueries = { coursesList: (opts?: CoursesApiApiV2CoursesListRequest) => queryOptions({ queryKey: coursesKeys.coursesList(opts), - queryFn: async (): Promise => { + queryFn: async (): Promise => { return coursesApi.apiV2CoursesList(opts).then((res) => res.data) }, }), diff --git a/frontends/api/src/mitxonline/hooks/programs/index.ts b/frontends/api/src/mitxonline/hooks/programs/index.ts index 40feec06f4..402720d0f3 100644 --- a/frontends/api/src/mitxonline/hooks/programs/index.ts +++ b/frontends/api/src/mitxonline/hooks/programs/index.ts @@ -1,3 +1,3 @@ -import { programsQueries } from "./queries" +import { programsQueries, programCollectionQueries } from "./queries" -export { programsQueries } +export { programsQueries, programCollectionQueries } diff --git a/frontends/api/src/mitxonline/hooks/programs/queries.ts b/frontends/api/src/mitxonline/hooks/programs/queries.ts index 8327aff306..891914f87a 100644 --- a/frontends/api/src/mitxonline/hooks/programs/queries.ts +++ b/frontends/api/src/mitxonline/hooks/programs/queries.ts @@ -1,20 +1,39 @@ import { queryOptions } from "@tanstack/react-query" import type { + PaginatedV2ProgramCollectionList, PaginatedV2ProgramList, + ProgramCollectionsApiProgramCollectionsListRequest, ProgramsApiProgramsListV2Request, + ProgramsApiProgramsRetrieveV2Request, + V2Program, } from "@mitodl/mitxonline-api-axios/v1" -import { programsApi } from "../../clients" +import { programCollectionsApi, programsApi } from "../../clients" const programsKeys = { root: ["mitxonline", "programs"], + programDetail: (opts: ProgramsApiProgramsRetrieveV2Request) => [ + ...programsKeys.root, + "detail", + opts, + ], programsList: (opts?: ProgramsApiProgramsListV2Request) => [ ...programsKeys.root, "list", opts, ], + programCollectionsList: ( + opts?: ProgramCollectionsApiProgramCollectionsListRequest, + ) => [...programsKeys.root, "collections", "list", opts], } const programsQueries = { + programDetail: (opts: ProgramsApiProgramsRetrieveV2Request) => + queryOptions({ + queryKey: programsKeys.programDetail(opts), + queryFn: async (): Promise => { + return programsApi.programsRetrieveV2(opts).then((res) => res.data) + }, + }), programsList: (opts: ProgramsApiProgramsListV2Request) => queryOptions({ queryKey: programsKeys.programsList(opts), @@ -24,4 +43,18 @@ const programsQueries = { }), } -export { programsQueries, programsKeys } +const programCollectionQueries = { + programCollectionsList: ( + opts: ProgramCollectionsApiProgramCollectionsListRequest, + ) => + queryOptions({ + queryKey: programsKeys.programCollectionsList(opts), + queryFn: async (): Promise => { + return programCollectionsApi + .programCollectionsList(opts) + .then((res) => res.data) + }, + }), +} + +export { programsQueries, programCollectionQueries, programsKeys } diff --git a/frontends/api/src/mitxonline/test-utils/factories/courses.ts b/frontends/api/src/mitxonline/test-utils/factories/courses.ts index b034489a3d..8ea469475a 100644 --- a/frontends/api/src/mitxonline/test-utils/factories/courses.ts +++ b/frontends/api/src/mitxonline/test-utils/factories/courses.ts @@ -1,14 +1,14 @@ import { mergeOverrides, makePaginatedFactory } from "ol-test-utilities" import type { PartialFactory } from "ol-test-utilities" -import type { CourseWithCourseRuns } from "@mitodl/mitxonline-api-axios/v1" +import type { V2CourseWithCourseRuns } from "@mitodl/mitxonline-api-axios/v1" import { faker } from "@faker-js/faker/locale/en" import { UniqueEnforcer } from "enforce-unique" const uniqueCourseId = new UniqueEnforcer() const uniqueCourseRunId = new UniqueEnforcer() -const course: PartialFactory = (overrides = {}) => { - const defaults: CourseWithCourseRuns = { +const course: PartialFactory = (overrides = {}) => { + const defaults: V2CourseWithCourseRuns = { id: uniqueCourseId.enforce(() => faker.number.int()), title: faker.lorem.words(3), readable_id: faker.lorem.slug(), @@ -80,9 +80,13 @@ const course: PartialFactory = (overrides = {}) => { approved_flexible_price_exists: faker.datatype.boolean(), }, ], + min_price: faker.number.int({ min: 0, max: 1000 }), + max_price: faker.number.int({ min: 1000, max: 2000 }), + include_in_learn_catalog: faker.datatype.boolean(), + ingest_content_files_for_ai: faker.datatype.boolean(), } - return mergeOverrides(defaults, overrides) + return mergeOverrides(defaults, overrides) } const courses = makePaginatedFactory(course) diff --git a/frontends/api/src/mitxonline/test-utils/factories/programs.ts b/frontends/api/src/mitxonline/test-utils/factories/programs.ts index 3600dfb6c5..e657cc5153 100644 --- a/frontends/api/src/mitxonline/test-utils/factories/programs.ts +++ b/frontends/api/src/mitxonline/test-utils/factories/programs.ts @@ -1,6 +1,9 @@ import { mergeOverrides, makePaginatedFactory } from "ol-test-utilities" import type { PartialFactory } from "ol-test-utilities" -import type { V2Program } from "@mitodl/mitxonline-api-axios/v1" +import type { + V2Program, + V2ProgramCollection, +} from "@mitodl/mitxonline-api-axios/v1" import { faker } from "@faker-js/faker/locale/en" import { UniqueEnforcer } from "enforce-unique" @@ -33,10 +36,17 @@ const program: PartialFactory = (overrides = {}) => { ], live: faker.datatype.boolean(), courses: [], + collections: [], req_tree: [], requirements: { - required: [faker.number.int()], - electives: [faker.number.int()], + courses: { + required: [faker.number.int()], + electives: [faker.number.int()], + }, + programs: { + required: [faker.number.int()], + electives: [faker.number.int()], + }, }, certificate_type: faker.lorem.word(), topics: [ @@ -52,6 +62,7 @@ const program: PartialFactory = (overrides = {}) => { availability: faker.helpers.arrayElement(["anytime", "dated"]), min_weekly_hours: `${faker.number.int({ min: 1, max: 5 })} hours`, max_weekly_hours: `${faker.number.int({ min: 6, max: 10 })} hours`, + start_date: faker.date.past().toISOString(), } return mergeOverrides(defaults, overrides) @@ -59,4 +70,19 @@ const program: PartialFactory = (overrides = {}) => { const programs = makePaginatedFactory(program) -export { program, programs } +const programCollection: PartialFactory = ( + overrides = {}, +) => { + const defaults: V2ProgramCollection = { + id: uniqueProgramId.enforce(() => faker.number.int()), + description: faker.lorem.paragraph(), + programs: programs({ count: 2 }).results.map((p) => p.id), + title: faker.lorem.words(3), + created_on: faker.date.past().toISOString(), + updated_on: faker.date.recent().toISOString(), + } + + return mergeOverrides(defaults, overrides) +} + +export { program, programs, programCollection } diff --git a/frontends/api/src/mitxonline/test-utils/urls.ts b/frontends/api/src/mitxonline/test-utils/urls.ts index bfa6d7bb62..8891cb38d2 100644 --- a/frontends/api/src/mitxonline/test-utils/urls.ts +++ b/frontends/api/src/mitxonline/test-utils/urls.ts @@ -1,5 +1,6 @@ import type { CoursesApiApiV2CoursesListRequest, + ProgramCollectionsApiProgramCollectionsListRequest, ProgramsApiProgramsListV2Request, } from "@mitodl/mitxonline-api-axios/v1" import { RawAxiosRequestConfig } from "axios" @@ -13,6 +14,7 @@ const currentUser = { } const enrollment = { + enrollmentsList: () => `${API_BASE_URL}/api/v1/enrollments/`, courseEnrollment: (id?: number) => `${API_BASE_URL}/api/v1/enrollments/${id ? `${id}/` : ""}`, } @@ -25,6 +27,13 @@ const b2b = { const programs = { programsList: (opts?: ProgramsApiProgramsListV2Request) => `${API_BASE_URL}/api/v2/programs/${queryify(opts)}`, + programDetail: (id: number) => `${API_BASE_URL}/api/v2/programs/${id}/`, +} + +const programCollections = { + programCollectionsList: ( + opts?: ProgramCollectionsApiProgramCollectionsListRequest, + ) => `${API_BASE_URL}/api/v2/program-collections/${queryify(opts)}`, } const courses = { @@ -37,4 +46,12 @@ const organization = { `${API_BASE_URL}/api/v0/b2b/organizations/${organizationSlug}/`, } -export { b2b, currentUser, enrollment, programs, courses, organization } +export { + b2b, + currentUser, + enrollment, + programs, + programCollections, + courses, + organization, +} diff --git a/frontends/main/package.json b/frontends/main/package.json index 2b8e1c6030..74657037d4 100644 --- a/frontends/main/package.json +++ b/frontends/main/package.json @@ -14,7 +14,7 @@ "@emotion/cache": "^11.13.1", "@emotion/styled": "^11.11.0", "@mitodl/course-search-utils": "3.3.2", - "@mitodl/mitxonline-api-axios": "^2025.6.23", + "@mitodl/mitxonline-api-axios": "2025.7.28", "@mitodl/smoot-design": "^6.10.0", "@next/bundle-analyzer": "^14.2.15", "@remixicon/react": "^4.2.0", diff --git a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/DashboardCard.tsx b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/DashboardCard.tsx index ea6459268a..2735425af8 100644 --- a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/DashboardCard.tsx +++ b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/DashboardCard.tsx @@ -10,7 +10,11 @@ import { } from "ol-components" import NextLink from "next/link" import { EnrollmentStatus, EnrollmentMode } from "./types" -import type { DashboardCourse, DashboardCourseEnrollment } from "./types" +import type { + DashboardResource, + DashboardCourse, + DashboardCourseEnrollment, +} from "./types" import { ActionButton, Button, ButtonLink } from "@mitodl/smoot-design" import { RiArrowRightLine, @@ -309,7 +313,7 @@ const CourseStartCountdown: React.FC<{ type DashboardCardProps = { Component?: React.ElementType - dashboardResource: DashboardCourse + dashboardResource: DashboardResource showNotComplete?: boolean className?: string courseNoun?: string @@ -319,6 +323,7 @@ type DashboardCardProps = { titleHref?: string | null buttonHref?: string | null } + const DashboardCard: React.FC = ({ dashboardResource, showNotComplete = true, @@ -331,7 +336,8 @@ const DashboardCard: React.FC = ({ titleHref, buttonHref, }) => { - const { title, marketingUrl, enrollment, run } = dashboardResource + const course = dashboardResource as DashboardCourse + const { title, marketingUrl, enrollment, run } = course const titleSection = isLoading ? ( <> @@ -373,7 +379,7 @@ const DashboardCard: React.FC = ({ /> = ({ }) const updateEnrollment = useUpdateEnrollment({ id: enrollment.id, - PatchedCourseRunEnrollmentRequest: { - //@ts-expect-error This will be fixed after https://github.com/mitodl/mitxonline/pull/2737 is released + PatchedUpdateCourseRunEnrollmentRequest: { receive_emails: formik.values.receive_emails, }, }) diff --git a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts index 87c992e494..81e5eb815c 100644 --- a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts +++ b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts @@ -13,6 +13,7 @@ import moment from "moment" const makeCourses = factories.courses.courses const makeProgram = factories.programs.program +const makeProgramCollection = factories.programs.programCollection const makeEnrollment = factories.enrollment.courseEnrollment const makeGrade = factories.enrollment.grade @@ -123,10 +124,25 @@ const setupProgramsAndCourses = () => { const programB = makeProgram({ courses: coursesB.results.map((c) => c.id), }) + const programCollection = makeProgramCollection({ + title: "Program Collection", + programs: [], + }) setMockResponse.get(urls.programs.programsList({ org_id: orgX.id }), { results: [programA, programB], }) + setMockResponse.get(urls.programCollections.programCollectionsList(), { + results: [programCollection], + }) + setMockResponse.get( + urls.programs.programsList({ id: programA.id, org_id: orgX.id }), + { results: [programA] }, + ) + setMockResponse.get( + urls.programs.programsList({ id: programB.id, org_id: orgX.id }), + { results: [programB] }, + ) setMockResponse.get( urls.courses.coursesList({ id: programA.courses, org_id: orgX.id }), { @@ -146,6 +162,7 @@ const setupProgramsAndCourses = () => { mitxOnlineUser, programA, programB, + programCollection, coursesA: coursesA.results, coursesB: coursesB.results, } diff --git a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/transform.ts b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/transform.ts index 634f6d96f7..27337d4f04 100644 --- a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/transform.ts +++ b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/transform.ts @@ -6,12 +6,17 @@ import { CourseRunEnrollment, - CourseWithCourseRuns, + V2CourseWithCourseRuns, V2Program, + V2ProgramCollection, } from "@mitodl/mitxonline-api-axios/v1" import { DashboardResourceType, EnrollmentStatus } from "./types" -import type { DashboardCourse, DashboardProgram } from "./types" +import type { + DashboardCourse, + DashboardProgram, + DashboardProgramCollection, +} from "./types" import { groupBy } from "lodash" const sources = { @@ -63,7 +68,7 @@ const mitxonlineEnrollments = (data: CourseRunEnrollment[]) => data.map((course) => mitxonlineEnrollment(course)) const mitxonlineUnenrolledCourse = ( - course: CourseWithCourseRuns, + course: V2CourseWithCourseRuns, ): DashboardCourse => { const run = course.courseruns.find((run) => run.id === course.next_run_id) return { @@ -89,7 +94,7 @@ const mitxonlineUnenrolledCourse = ( } const mitxonlineCourses = (raw: { - courses: CourseWithCourseRuns[] + courses: V2CourseWithCourseRuns[] enrollments: CourseRunEnrollment[] }) => { const enrollmentsByCourseId = groupBy( @@ -115,6 +120,7 @@ const mitxonlineCourses = (raw: { const mitxonlineProgram = (raw: V2Program): DashboardProgram => { return { + id: raw.id, key: getKey({ source: sources.mitxonline, resourceType: DashboardResourceType.Program, @@ -124,10 +130,23 @@ const mitxonlineProgram = (raw: V2Program): DashboardProgram => { title: raw.title, programType: raw.program_type, courseIds: raw.courses, + collections: raw.collections, description: raw.page.description, } } +const mitxonlineProgramCollection = ( + raw: V2ProgramCollection, +): DashboardProgramCollection => { + return { + id: raw.id, + type: DashboardResourceType.ProgramCollection, + title: raw.title, + description: raw.description ?? null, + programIds: raw.programs, + } +} + const sortDashboardCourses = ( program: DashboardProgram, courses: DashboardCourse[], @@ -157,5 +176,6 @@ export { mitxonlineEnrollments, mitxonlineCourses, mitxonlineProgram, + mitxonlineProgramCollection, sortDashboardCourses, } diff --git a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/types.ts b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/types.ts index 96047b9b54..b408f7e83a 100644 --- a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/types.ts +++ b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/types.ts @@ -1,6 +1,7 @@ const DashboardResourceType = { Course: "course", Program: "program", + ProgramCollection: "program_collection", } as const type DashboardResourceType = (typeof DashboardResourceType)[keyof typeof DashboardResourceType] @@ -47,15 +48,28 @@ type DashboardCourseEnrollment = { } type DashboardProgram = { + id: number key: string type: typeof DashboardResourceType.Program title: string programType?: string | null courseIds: number[] + collections: number[] description: string } -type DashboardResource = DashboardCourse | DashboardProgram +type DashboardProgramCollection = { + id: number + type: typeof DashboardResourceType.ProgramCollection + title: string + description?: string | null + programIds: number[] +} + +type DashboardResource = + | DashboardCourse + | DashboardProgram + | DashboardProgramCollection export { DashboardResourceType, EnrollmentStatus, EnrollmentMode } export type { @@ -63,4 +77,5 @@ export type { DashboardCourse, DashboardCourseEnrollment, DashboardProgram, + DashboardProgramCollection, } diff --git a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx index c1dbccd27f..747a700814 100644 --- a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx +++ b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx @@ -33,6 +33,12 @@ describe("OrganizationContent", () => { await screen.findByRole("heading", { name: `Your ${orgX.name} Home`, }) + + const programAHeader = await screen.findByText(programA.title) + const programBHeader = await screen.findByText(programB.title) + expect(programAHeader).toBeInTheDocument() + expect(programBHeader).toBeInTheDocument() + const programs = await screen.findAllByTestId("org-program-root") expect(programs.length).toBe(2) @@ -86,4 +92,49 @@ describe("OrganizationContent", () => { } }) }) + + test("Renders program collections", async () => { + const { orgX, programA, programB, programCollection, coursesA, coursesB } = + setupProgramsAndCourses() + setMockResponse.get(urls.enrollment.enrollmentsList(), []) + programCollection.programs = [programA.id, programB.id] + setMockResponse.get(urls.programCollections.programCollectionsList(), { + results: [programCollection], + }) + + renderWithProviders() + + const collectionHeader = await screen.findByRole("heading", { + name: programCollection.title, + }) + expect(collectionHeader).toBeInTheDocument() + const collectionItems = await screen.findAllByTestId( + "org-program-collection-root", + ) + expect(collectionItems.length).toBe(1) + const collection = within(collectionItems[0]) + expect(collection.getByText(programCollection.title)).toBeInTheDocument() + // Check that the first course from each program is displayed + expect(collection.getAllByText(coursesA[0].title).length).toBeGreaterThan(0) + expect(collection.getAllByText(coursesB[0].title).length).toBeGreaterThan(0) + }) + + test("Does not render a program separately if it is part of a collection", async () => { + const { orgX, programA, programB, programCollection } = + setupProgramsAndCourses() + setMockResponse.get(urls.enrollment.enrollmentsList(), []) + programCollection.programs = [programA.id, programB.id] + setMockResponse.get(urls.programCollections.programCollectionsList(), { + results: [programCollection], + }) + + renderWithProviders() + + const collectionItems = await screen.findAllByTestId( + "org-program-collection-root", + ) + expect(collectionItems.length).toBe(1) + const programs = screen.queryAllByTestId("org-program-root") + expect(programs.length).toBe(0) + }) }) diff --git a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx index f1c6376d14..dedf5924fb 100644 --- a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx +++ b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx @@ -4,16 +4,25 @@ import React from "react" import Image from "next/image" import { useFeatureFlagEnabled } from "posthog-js/react" import { FeatureFlags } from "@/common/feature_flags" -import { useQueries, useQuery } from "@tanstack/react-query" -import { programsQueries } from "api/mitxonline-hooks/programs" +import { useQuery } from "@tanstack/react-query" +import { + programsQueries, + programCollectionQueries, +} from "api/mitxonline-hooks/programs" import { coursesQueries } from "api/mitxonline-hooks/courses" import * as transform from "./CoursewareDisplay/transform" import { enrollmentQueries } from "api/mitxonline-hooks/enrollment" import { DashboardCard } from "./CoursewareDisplay/DashboardCard" -import { PlainList, Stack, styled, Typography } from "ol-components" -import { DashboardCourse, DashboardProgram } from "./CoursewareDisplay/types" +import { PlainList, Skeleton, Stack, styled, Typography } from "ol-components" +import { + DashboardProgram, + DashboardProgramCollection, +} from "./CoursewareDisplay/types" import graduateLogo from "@/public/images/dashboard/graduate.png" -import { OrganizationPage, V2Program } from "@mitodl/mitxonline-api-axios/v1" +import { + CourseRunEnrollment, + OrganizationPage, +} from "@mitodl/mitxonline-api-axios/v1" import { useMitxOnlineCurrentUser } from "api/mitxonline-hooks/user" const HeaderRoot = styled.div({ @@ -61,24 +70,6 @@ const OrganizationHeader: React.FC<{ org?: OrganizationPage }> = ({ org }) => { ) } -/** - * For an array of programs, fetch the associated courses. - * [program1, program2] => [[...courses1], [...courses2]] - */ - -const useMitxonlineProgramsCourses = (programs: V2Program[], orgId: number) => { - const courseGroupIds = - programs.map((program) => program.courses.map((id) => id as number)) ?? [] - - const courseGroups = useQueries({ - queries: courseGroupIds.map((courseIds) => - coursesQueries.coursesList({ org_id: orgId, id: courseIds }), - ), - }) - - return courseGroups -} - const DashboardCardStyled = styled(DashboardCard)({ borderRadius: "0px", borderTop: "none", @@ -102,12 +93,51 @@ const ProgramHeader = styled.div(({ theme }) => ({ borderRadius: "8px 8px 0px 0px", border: `1px solid ${theme.custom.colors.lightGray2}`, })) + +const OrgProgramCollectionDisplay: React.FC<{ + collection: DashboardProgramCollection + enrollments?: CourseRunEnrollment[] + orgId: number +}> = ({ collection, enrollments, orgId }) => { + return ( + + + + {collection.title} + + + + {collection.programIds.map((programId) => ( + + ))} + + + ) +} + const OrgProgramDisplay: React.FC<{ program: DashboardProgram - courses: DashboardCourse[] - coursesLoading: boolean + enrollments?: CourseRunEnrollment[] programLoading: boolean -}> = ({ program, courses }) => { + orgId?: number +}> = ({ program, enrollments, programLoading, orgId }) => { + const courses = useQuery( + coursesQueries.coursesList({ id: program.courseIds, org_id: orgId }), + ) + const skeleton = ( + + ) + if (programLoading || courses.isLoading) return skeleton + const transformedCourses = transform.mitxonlineCourses({ + courses: courses.data?.results ?? [], + enrollments: enrollments ?? [], + }) + return ( @@ -116,22 +146,85 @@ const OrgProgramDisplay: React.FC<{ {program.description} - - {transform.sortDashboardCourses(program, courses).map((course) => ( - - ))} + + {transform + .sortDashboardCourses(program, transformedCourses) + .map((course) => ( + + ))} ) } + +const ProgramCollectionItem: React.FC<{ + programId: number + enrollments?: CourseRunEnrollment[] + orgId: number +}> = ({ programId, enrollments, orgId }) => { + const program = useQuery( + programsQueries.programsList({ id: programId, org_id: orgId }), + ) + if (program.isLoading || !program.data?.results.length) { + return ( + + ) + } + const transformedProgram = transform.mitxonlineProgram( + program.data?.results[0], + ) + return ( + + ) +} + +const ProgramCard: React.FC<{ + program: DashboardProgram + enrollments?: CourseRunEnrollment[] + orgId?: number +}> = ({ program, enrollments, orgId }) => { + const courses = useQuery( + coursesQueries.coursesList({ + id: program.courseIds, + org_id: orgId, + }), + ) + const skeleton = ( + + ) + if (courses.isLoading) return skeleton + const transformedCourses = transform.mitxonlineCourses({ + courses: courses.data?.results ?? [], + enrollments: enrollments ?? [], + }) + if (courses.isLoading || !transformedCourses.length) return skeleton + // For now we assume the first course is the main one for the program. + const course = transformedCourses[0] + return ( + + ) +} + const OrganizationRoot = styled.div({ display: "flex", flexDirection: "column", @@ -150,42 +243,56 @@ const OrganizationContentInternal: React.FC< const orgId = org.id const enrollments = useQuery(enrollmentQueries.enrollmentsList()) const programs = useQuery(programsQueries.programsList({ org_id: orgId })) - const courseGroups = useMitxonlineProgramsCourses( - programs.data?.results ?? [], - orgId, + const programCollections = useQuery( + programCollectionQueries.programCollectionsList({}), ) if (!isOrgDashboardEnabled) return null - const transformedCourseGroups = courseGroups.map((courseGroup) => { - if (!courseGroup.data || !enrollments.data) return [] - return transform.mitxonlineCourses({ - courses: courseGroup.data?.results ?? [], - enrollments: enrollments.data ?? [], - }) - }) - const transformedPrograms = programs.data?.results.map((program) => - transform.mitxonlineProgram(program), + const transformedPrograms = programs.data?.results + .filter((program) => program.collections.length === 0) + .map((program) => transform.mitxonlineProgram(program)) + + const skeleton = ( + + + + + ) return ( - {programs.isLoading - ? "Programs Loading" - : transformedPrograms?.map((program, index) => { - const courses = transformedCourseGroups[index] - const programLoading = courseGroups[index].isLoading + {programs.isLoading || !transformedPrograms + ? skeleton + : transformedPrograms.map((program) => ( + + ))} + {programCollections.isLoading ? ( + skeleton + ) : ( + + {programCollections.data?.results.map((collection) => { + const transformedCollection = + transform.mitxonlineProgramCollection(collection) return ( - ) })} + + )} ) } @@ -201,7 +308,10 @@ const OrganizationContent: React.FC = ({ const b2bOrganization = mitxOnlineUser?.b2b_organizations.find( (org) => org.slug.replace("org-", "") === orgSlug, ) - if (isLoadingMitxOnlineUser || isLoadingMitxOnlineUser) return "Loading" + const skeleton = ( + + ) + if (isLoadingMitxOnlineUser || isLoadingMitxOnlineUser) return skeleton return b2bOrganization ? ( ) : null diff --git a/frontends/main/src/app-pages/DashboardPage/UserListDetailsContent.tsx b/frontends/main/src/app-pages/DashboardPage/UserListDetailsContent.tsx index 6df0045290..a8f171e915 100644 --- a/frontends/main/src/app-pages/DashboardPage/UserListDetailsContent.tsx +++ b/frontends/main/src/app-pages/DashboardPage/UserListDetailsContent.tsx @@ -21,7 +21,11 @@ const UserListDetailsContent: React.FC = ( const { data: user } = useUserMe() const listQuery = useUserListsDetail(userListId) - const itemsQuery = useInfiniteUserListItems({ userlist_id: userListId }) + // Very high limit set here because pagination is not implemented on the frontend + const itemsQuery = useInfiniteUserListItems({ + userlist_id: userListId, + limit: 1000, + }) const router = useRouter() const items = useMemo(() => { diff --git a/frontends/main/src/app-pages/HomePage/HomePage.test.tsx b/frontends/main/src/app-pages/HomePage/HomePage.test.tsx index 1005423c91..792ed47070 100644 --- a/frontends/main/src/app-pages/HomePage/HomePage.test.tsx +++ b/frontends/main/src/app-pages/HomePage/HomePage.test.tsx @@ -17,10 +17,16 @@ import { import invariant from "tiny-invariant" import * as routes from "@/common/urls" import { assertHeadings } from "ol-test-utilities" -import { useFeatureFlagEnabled } from "posthog-js/react" +import { useFeatureFlagEnabled, usePostHog } from "posthog-js/react" jest.mock("posthog-js/react") const mockedUseFeatureFlagEnabled = jest.mocked(useFeatureFlagEnabled) +const mockedPostHogCapture = jest.fn() +jest.mock("posthog-js/react") +jest.mocked(usePostHog).mockReturnValue( + // @ts-expect-error Not mocking all of posthog + { capture: mockedPostHogCapture }, +) const assertLinksTo = ( el: HTMLElement, diff --git a/frontends/main/src/app-pages/HomePage/VideoShortsModal.tsx b/frontends/main/src/app-pages/HomePage/VideoShortsModal.tsx index 465253a159..651721217c 100644 --- a/frontends/main/src/app-pages/HomePage/VideoShortsModal.tsx +++ b/frontends/main/src/app-pages/HomePage/VideoShortsModal.tsx @@ -19,25 +19,41 @@ const Overlay = styled.div(({ theme }) => ({ }, })) -const CloseButton = styled(ActionButton)({ +const BaseButton = styled(ActionButton)(({ theme }) => ({ position: "absolute", - top: "16px", - right: "16px", zIndex: 1201, svg: { fill: "white", }, -}) + [`${theme.breakpoints.down("md")} and (orientation: portrait)`]: { + backgroundColor: "rgba(0, 0, 0, 0.3)", + borderRadius: "50%", + height: "58px", + width: "58px", + right: "26px", + "&&:hover": { + backgroundColor: "rgba(40, 40, 40, 0.6)", + }, + }, +})) -const MuteButton = styled(ActionButton)({ - position: "absolute", +const CloseButton = styled(BaseButton)(({ theme }) => ({ + top: "16px", + right: "16px", + [`${theme.breakpoints.down("md")} and (orientation: portrait)`]: { + top: "26px", + right: "26px", + }, +})) + +const MuteButton = styled(BaseButton)(({ theme }) => ({ right: "16px", bottom: "16px", - zIndex: 1201, - svg: { - fill: "white", + [`${theme.breakpoints.down("md")} and (orientation: portrait)`]: { + bottom: "26px", + right: "26px", }, -}) +})) const CarouselSlide = styled.div<{ width: number }>(({ width, theme }) => ({ width, diff --git a/frontends/main/src/app-pages/LearningPathDetailsPage/LearningPathDetailsPage.tsx b/frontends/main/src/app-pages/LearningPathDetailsPage/LearningPathDetailsPage.tsx index 57160dfd5e..42b93fd56b 100644 --- a/frontends/main/src/app-pages/LearningPathDetailsPage/LearningPathDetailsPage.tsx +++ b/frontends/main/src/app-pages/LearningPathDetailsPage/LearningPathDetailsPage.tsx @@ -28,9 +28,10 @@ const LearningPathDetailsPage: React.FC = () => { const id = parseInt(params.id) const pathQuery = useLearningPathsDetail(id) + // Very high limit set here because pagination is not implemented on the frontend const itemsQuery = useInfiniteLearningPathItems({ learning_resource_id: id, - limit: 100, + limit: 1000, }) const items = useMemo(() => { const pages = itemsQuery.data?.pages diff --git a/learning_resources/etl/canvas.py b/learning_resources/etl/canvas.py index 24b9b62c56..040e86a151 100644 --- a/learning_resources/etl/canvas.py +++ b/learning_resources/etl/canvas.py @@ -1,13 +1,19 @@ +import base64 import logging import sys import zipfile from collections import defaultdict from collections.abc import Generator +from datetime import datetime +from io import BytesIO from pathlib import Path from tempfile import TemporaryDirectory from defusedxml import ElementTree from django.conf import settings +from litellm import completion +from pdf2image import convert_from_path +from PIL import Image from learning_resources.constants import ( VALID_TUTOR_PROBLEM_TYPES, @@ -68,6 +74,11 @@ def sync_canvas_archive(bucket, key: str, overwrite): return resource_readable_id, run +def _course_url(course_archive_path) -> str: + context_info = parse_context_xml(course_archive_path) + return f"https://{context_info.get('canvas_domain')}/courses/{context_info.get('course_id')}/" + + def run_for_canvas_archive(course_archive_path, course_folder, overwrite): """ Generate and return a LearningResourceRun for a Canvas course @@ -75,6 +86,20 @@ def run_for_canvas_archive(course_archive_path, course_folder, overwrite): checksum = calc_checksum(course_archive_path) course_info = parse_canvas_settings(course_archive_path) course_title = course_info.get("title") + url = _course_url(course_archive_path) + start_at = course_info.get("start_at") + end_at = course_info.get("conclude_at") + if start_at: + try: + start_at = datetime.fromisoformat(start_at) + except (ValueError, TypeError): + log.warning("Invalid start_at date format: %s", start_at) + if end_at: + try: + end_at = datetime.fromisoformat(end_at) + except (ValueError, TypeError): + log.warning("Invalid start_at date format: %s", end_at) + readable_id = f"{course_folder}-{course_info.get('course_code')}" # create placeholder learning resource resource, _ = LearningResource.objects.update_or_create( @@ -82,6 +107,7 @@ def run_for_canvas_archive(course_archive_path, course_folder, overwrite): defaults={ "title": course_title, "published": False, + "url": url, "test_mode": True, "etl_source": ETLSource.canvas.name, "platform": LearningResourcePlatform.objects.get( @@ -95,6 +121,8 @@ def run_for_canvas_archive(course_archive_path, course_folder, overwrite): run_id=f"{readable_id}+canvas", learning_resource=resource, published=True, + start_date=start_at, + end_date=end_at, ) run = resource.runs.first() resource_readable_id = run.learning_resource.readable_id @@ -180,6 +208,7 @@ def transform_canvas_problem_files( problem_file_data = { key: file_data[key] for key in keys_to_keep if key in file_data } + path = file_data["source_path"] path = path[len(settings.CANVAS_TUTORBOT_FOLDER) :] path_parts = path.split("/", 1) @@ -188,10 +217,33 @@ def transform_canvas_problem_files( if problem_type in path_parts[1].lower(): problem_file_data["type"] = problem_type break - + if ( + problem_file_data["file_extension"].lower() == ".pdf" + and settings.CANVAS_PDF_TRANSCRIPTION_MODEL + ): + markdown_content = _pdf_to_markdown( + Path(olx_path) / Path(problem_file_data["source_path"]) + ) + if markdown_content: + problem_file_data["content"] = markdown_content yield problem_file_data +def parse_context_xml(course_archive_path: str) -> dict: + with zipfile.ZipFile(course_archive_path, "r") as course_archive: + context = course_archive.read("course_settings/context.xml") + root = ElementTree.fromstring(context) + namespaces = {"ns": "http://canvas.instructure.com/xsd/cccv1p0"} + context_info = {} + item_keys = ["course_id", "root_account_id", "canvas_domain", "root_account_name"] + for key in item_keys: + element = root.find(f"ns:{key}", namespaces) + if element is not None: + context_info[key] = element.text + + return context_info + + def parse_module_meta(course_archive_path: str) -> dict: """ Parse module_meta.xml and return publish/active status of resources. @@ -269,3 +321,75 @@ def extract_resources_by_identifierref(manifest_xml: str) -> dict: {"title": title, "files": files, "type": resource.get("type")} ) return dict(resources_dict) + + +def pdf_to_base64_images(pdf_path, dpi=200, fmt="JPEG", max_size=2000, quality=85): + """ + Convert a PDF file to a list of base64 encoded images (one per page). + Resizes images to reduce file size while keeping good OCR quality. + + Args: + pdf_path (str): Path to the PDF file + dpi (int): DPI for the output images (default: 200) + fmt (str): Output format ('JPEG' or 'PNG') (default: 'JPEG') + max_size (int): Maximum width/height in pixels (default: 2000) + quality (int): JPEG quality (1-100, default: 85) + + Returns: + list: List of base64 encoded strings (one per page) + """ + images = convert_from_path(pdf_path, dpi=dpi) + base64_images = [] + + for image in images: + # Resize the image if it's too large (preserving aspect ratio) + if max(image.size) > max_size: + image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) + + buffered = BytesIO() + + # Save with optimized settings + if fmt.upper() == "JPEG": + image.save(buffered, format="JPEG", quality=quality, optimize=True) + else: # PNG + image.save(buffered, format="PNG", optimize=True) + + img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") + base64_images.append(img_str) + + return base64_images + + +def _pdf_to_markdown(pdf_path): + markdown = "" + for im in pdf_to_base64_images(pdf_path): + response = completion( + api_base=settings.LITELLM_API_BASE, + custom_llm_provider=settings.LITELLM_CUSTOM_PROVIDER, + model=settings.CANVAS_PDF_TRANSCRIPTION_MODEL, + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": settings.CANVAS_TRANSCRIPTION_PROMPT, + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{im}", + }, + }, + ], + } + ], + ) + markdown_snippet = ( + response.json()["choices"][0]["message"]["content"] + .removeprefix("```markdown\n") + .removesuffix("\n```") + ) + + markdown += markdown_snippet + return markdown diff --git a/learning_resources/etl/canvas_test.py b/learning_resources/etl/canvas_test.py index 79873d87b8..14723526f9 100644 --- a/learning_resources/etl/canvas_test.py +++ b/learning_resources/etl/canvas_test.py @@ -11,6 +11,7 @@ parse_module_meta, run_for_canvas_archive, transform_canvas_content_files, + transform_canvas_problem_files, ) from learning_resources.etl.constants import ETLSource from learning_resources.etl.utils import get_edx_module_id @@ -32,6 +33,18 @@ def canvas_platform(): return LearningResourcePlatformFactory.create(code=PlatformType.canvas.name) +def canvas_zip_with_problem_files(tmp_path: str, files: dict[tuple[str, bytes]]) -> str: + """ + Create a Canvas zip with problem files in the tutorbot folder. + `files` is a list of tuples: (filename, content_bytes) + """ + zip_path = tmp_path / "canvas_course_with_problems.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + for filename, content in files: + zf.writestr(f"tutorbot/{filename}", content) + return zip_path + + @pytest.fixture def canvas_settings_zip(tmp_path): # Create a minimal XML for course_settings.xml @@ -98,12 +111,17 @@ def test_run_for_canvas_archive_creates_resource_and_run(tmp_path, mocker): "learning_resources.etl.canvas.parse_canvas_settings", return_value={"title": "Test Course", "course_code": "TEST101"}, ) + mocker.patch( + "learning_resources.etl.canvas.parse_context_xml", + return_value={"course_id": "123", "canvas_domain": "mit.edu"}, + ) + mocker.patch("learning_resources.etl.canvas.calc_checksum", return_value="abc123") # No resource exists yet - course_archive_path = tmp_path / "archive.zip" - course_archive_path.write_text("dummy") + zip_path = tmp_path / "archive.zip" + _, run = run_for_canvas_archive( - course_archive_path, course_folder=course_folder, overwrite=True + zip_path, course_folder=course_folder, overwrite=True ) resource = LearningResource.objects.get(readable_id=f"{course_folder}-TEST101") assert resource.title == "Test Course" @@ -125,6 +143,10 @@ def test_run_for_canvas_archive_creates_run_if_none_exists(tmp_path, mocker): "learning_resources.etl.canvas.parse_canvas_settings", return_value={"title": "Test Course", "course_code": "TEST104"}, ) + mocker.patch( + "learning_resources.etl.canvas.parse_context_xml", + return_value={"course_id": "123", "canvas_domain": "mit.edu"}, + ) mocker.patch( "learning_resources.etl.canvas.calc_checksum", return_value="checksum104" ) @@ -338,3 +360,80 @@ def test_transform_canvas_content_files_removes_unpublished_content(mocker, tmp_ # Ensure unpublished content is deleted and unpublished actions called bulk_unpub.assert_called_once_with([unpublished_cf.id], CONTENT_FILE_TYPE) + + +def test_transform_canvas_problem_files_pdf_calls_pdf_to_markdown( + tmp_path, mocker, settings +): + """ + Test that transform_canvas_problem_files calls _pdf_to_markdown for PDF files. + """ + + settings.CANVAS_TUTORBOT_FOLDER = "tutorbot/" + settings.CANVAS_PDF_TRANSCRIPTION_MODEL = "fake-model" + pdf_filename = "problemset1/problem.pdf" + pdf_content = b"%PDF-1.4 fake pdf content" + zip_path = canvas_zip_with_problem_files(tmp_path, [(pdf_filename, pdf_content)]) + + # return a file with pdf extension + fake_file_data = { + "run": "run", + "content": "original pdf content", + "archive_checksum": "checksum", + "source_path": f"tutorbot/{pdf_filename}", + "file_extension": ".pdf", + } + mocker.patch( + "learning_resources.etl.canvas._process_olx_path", + return_value=iter([fake_file_data]), + ) + + # Patch _pdf_to_markdown to return a known value + pdf_to_md = mocker.patch( + "learning_resources.etl.canvas._pdf_to_markdown", + return_value="markdown content from pdf", + ) + + # Patch Path(olx_path) / Path(problem_file_data["source_path"]) to exist + run = mocker.Mock() + + results = list(transform_canvas_problem_files(zip_path, run, overwrite=True)) + + pdf_to_md.assert_called_once() + assert results[0]["content"] == "markdown content from pdf" + assert results[0]["problem_title"] == "problemset1" + + +def test_transform_canvas_problem_files_non_pdf_does_not_call_pdf_to_markdown( + tmp_path, mocker, settings +): + """ + Test that transform_canvas_problem_files does not call _pdf_to_markdown for non-PDF files. + """ + settings.CANVAS_TUTORBOT_FOLDER = "tutorbot/" + settings.CANVAS_PDF_TRANSCRIPTION_MODEL = "fake-model" + html_filename = "problemset2/problem.html" + html_content = b"problem" + zip_path = canvas_zip_with_problem_files(tmp_path, [(html_filename, html_content)]) + + fake_file_data = { + "run": "run", + "content": "original html content", + "archive_checksum": "checksum", + "source_path": f"tutorbot/{html_filename}", + "file_extension": ".html", + } + mocker.patch( + "learning_resources.etl.canvas._process_olx_path", + return_value=iter([fake_file_data]), + ) + + pdf_to_md = mocker.patch("learning_resources.etl.canvas._pdf_to_markdown") + + run = mocker.Mock() + + results = list(transform_canvas_problem_files(zip_path, run, overwrite=True)) + + pdf_to_md.assert_not_called() + assert results[0]["content"] == "original html content" + assert results[0]["problem_title"] == "problemset2" diff --git a/learning_resources/models.py b/learning_resources/models.py index 88f7eec32e..bad9d0b830 100644 --- a/learning_resources/models.py +++ b/learning_resources/models.py @@ -3,7 +3,6 @@ import uuid from abc import abstractmethod from functools import cached_property -from hashlib import md5 from typing import TYPE_CHECKING, Optional from django.conf import settings @@ -25,6 +24,7 @@ PrivacyLevel, ) from main.models import TimestampedModel, TimestampedModelQuerySet +from main.utils import checksum_for_content if TYPE_CHECKING: from django.contrib.auth import get_user_model @@ -942,15 +942,8 @@ class ContentFile(TimestampedModel): summary = models.TextField(blank=True, default="") flashcards = models.JSONField(blank=True, default=list) - def content_checksum(self): - hasher = md5() # noqa: S324 - if self.content: - hasher.update(self.content.encode("utf-8")) - return hasher.hexdigest() - return None - def save(self, **kwargs): - self.checksum = self.content_checksum() + self.checksum = checksum_for_content(self.content) super().save(**kwargs) class Meta: diff --git a/learning_resources/permissions.py b/learning_resources/permissions.py index 50deec908d..0abc98dae2 100644 --- a/learning_resources/permissions.py +++ b/learning_resources/permissions.py @@ -6,7 +6,11 @@ from rest_framework.generics import get_object_or_404 from rest_framework.permissions import SAFE_METHODS, BasePermission -from learning_resources.constants import GROUP_STAFF_LISTS_EDITORS, PrivacyLevel +from learning_resources.constants import ( + GROUP_STAFF_LISTS_EDITORS, + GROUP_TUTOR_PROBLEM_VIEWERS, + PrivacyLevel, +) from learning_resources.models import LearningPath, UserList from main.permissions import is_admin_user, is_readonly @@ -115,3 +119,13 @@ def has_object_permission(self, request, view, obj): # noqa: ARG002 or obj.parent.privacy_level == PrivacyLevel.unlisted.value ) return request.user == obj.parent.author + + +class IsAdminOrTutorProblemViewer(BasePermission): + def has_permission(self, request, view): # noqa: ARG002 + user = request.user + if not user or not user.is_authenticated: + return False + if user.is_staff or user.is_superuser: + return True + return user.groups.filter(name=GROUP_TUTOR_PROBLEM_VIEWERS).exists() diff --git a/learning_resources/views.py b/learning_resources/views.py index 8f06668406..2c032cb4e4 100644 --- a/learning_resources/views.py +++ b/learning_resources/views.py @@ -23,7 +23,7 @@ from rest_framework.filters import OrderingFilter from rest_framework.generics import get_object_or_404 from rest_framework.pagination import LimitOffsetPagination -from rest_framework.permissions import BasePermission, IsAuthenticated +from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from rest_framework_nested.viewsets import NestedViewSetMixin @@ -33,7 +33,6 @@ from learning_resources import permissions from learning_resources.constants import ( GROUP_CONTENT_FILE_CONTENT_VIEWERS, - GROUP_TUTOR_PROBLEM_VIEWERS, LearningResourceRelationTypes, LearningResourceType, PlatformType, @@ -1391,16 +1390,7 @@ def retrieve(self, *_args, **_kwargs): class CourseRunProblemsViewSet(viewsets.ViewSet): """Viewset for all tutorbot problems and solutions for a course run""" - class IsAdminOrTutorProblemViewer(BasePermission): - def has_permission(self, request, view): # noqa: ARG002 - user = request.user - if not user or not user.is_authenticated: - return False - if user.is_staff or user.is_superuser: - return True - return user.groups.filter(name=GROUP_TUTOR_PROBLEM_VIEWERS).exists() - - permission_classes = (IsAdminOrTutorProblemViewer,) + permission_classes = () http_method_names = ["get"] lookup_field = "run_readable_id" @@ -1410,13 +1400,15 @@ def has_permission(self, request, view): # noqa: ARG002 detail=False, methods=["get"], url_path=r"(?P[^/]+)", + permission_classes=[AnonymousAccessReadonlyPermission], ) def list_problems(self, request, run_readable_id): # noqa: ARG002 """ - Generate a QuerySet for fetching Tutorbot problems for a course run + Fetch Tutorbot problem titles for a course run Returns: - QuerySet of CourseRunProblem objects for the course run + Array of strings of problem titles from TutorProblemFile objects for a + course run """ run = LearningResourceRun.objects.filter( run_id=run_readable_id, @@ -1434,8 +1426,15 @@ def list_problems(self, request, run_readable_id): # noqa: ARG002 detail=False, methods=["get"], url_path=r"(?P[^/]+)/(?P[^/]+)", + permission_classes=[permissions.IsAdminOrTutorProblemViewer], ) def retrieve_problem(self, request, run_readable_id, problem_title): # noqa: ARG002 + """ + Fetch Tutorbot problem and solution content for a course run + + Returns: + json object with problem and solution content for a specific problem + """ run = LearningResourceRun.objects.filter( run_id=run_readable_id, learning_resource__platform=PlatformType.canvas.name ).first() diff --git a/learning_resources/views_test.py b/learning_resources/views_test.py index 6cfb9e33b2..a0d5f70786 100644 --- a/learning_resources/views_test.py +++ b/learning_resources/views_test.py @@ -1418,7 +1418,17 @@ def test_learning_resources_summary_listing_endpoint_large_pagesize(): ], ) def test_course_run_problems_endpoint(client, user_role, django_user_model): - """Test course run problems endpoint""" + """ + Test course run problems endpoint. + + All types of users should be able to get a list of problem set titles + for a course run from api/v0/tutorproblems/ + + Only admin and group_tutor_problem_viewer users should be able to get + problem and solution content from + api/v0/tutorproblems// + Normal users and anonymous users should receive a 403 response + """ course_run = LearningResourceRunFactory.create( learning_resource=CourseFactory.create( platform=PlatformType.canvas.name @@ -1463,20 +1473,7 @@ def test_course_run_problems_endpoint(client, user_role, django_user_model): reverse("lr:v0:tutorproblem_api-list-problems", args=[course_run.run_id]) ) - if user_role in ["admin", "group_tutor_problem_viewer"]: - assert resp.json() == {"problem_set_titles": ["Problem Set 1", "Problem Set 2"]} - elif user_role == "normal": - assert resp.status_code == 403 - assert resp.json() == { - "detail": "You do not have permission to perform this action.", - "error_type": "PermissionDenied", - } - elif user_role == "anonymous": - assert resp.status_code == 403 - assert resp.json() == { - "detail": "Authentication credentials were not provided.", - "error_type": "NotAuthenticated", - } + assert resp.json() == {"problem_set_titles": ["Problem Set 1", "Problem Set 2"]} detail_resp = client.get( reverse( diff --git a/main/settings.py b/main/settings.py index ffdda65844..03c9c5c86b 100644 --- a/main/settings.py +++ b/main/settings.py @@ -34,7 +34,7 @@ from main.settings_pluggy import * # noqa: F403 from openapi.settings_spectacular import open_spectacular_settings -VERSION = "0.39.0" +VERSION = "0.39.3" log = logging.getLogger() diff --git a/main/settings_course_etl.py b/main/settings_course_etl.py index 4eec2f63db..a1bef5b17f 100644 --- a/main/settings_course_etl.py +++ b/main/settings_course_etl.py @@ -68,6 +68,14 @@ CANVAS_COURSE_BUCKET_PREFIX = get_string( "CANVAS_COURSE_BUCKET_PREFIX", "canvas/course_content" ) +CANVAS_PDF_TRANSCRIPTION_MODEL = get_string( + name="CANVAS_PDF_TRANSCRIPTION_MODEL", default=None +) +CANVAS_TRANSCRIPTION_PROMPT = get_string( + "CANVAS_TRANSCRIPTION_PROMPT", + """Transcribe the contents of this file into markdown. + Do not include anything but the markdown content in your response""", +) # More MIT URLs SEE_API_URL = get_string("SEE_API_URL", None) SEE_API_ACCESS_TOKEN_URL = get_string("SEE_API_ACCESS_TOKEN_URL", None) diff --git a/main/utils.py b/main/utils.py index 7c68eae647..bf8c03ace8 100644 --- a/main/utils.py +++ b/main/utils.py @@ -5,6 +5,7 @@ import os from enum import Flag, auto from functools import wraps +from hashlib import md5 from itertools import islice from urllib.parse import urljoin @@ -364,3 +365,14 @@ def clear_search_cache(): search_keys = cache.keys("views.decorators.cache.cache_header.search.*") cleared += cache.delete_many(search_keys) or 0 return cleared + + +def checksum_for_content(content: str) -> str: + """ + Generate a checksum based on the provided content string + """ + hasher = md5() # noqa: S324 + if content: + hasher.update(content.encode("utf-8")) + return hasher.hexdigest() + return None diff --git a/poetry.lock b/poetry.lock index f441fed477..782f978038 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1568,7 +1568,7 @@ django = ">=3.2" [[package]] name = "django-health-check" -version = "3.20.1.dev3+g7fc1e3a" +version = "3.20.1.dev4+gb0500d1" description = "Monitor the health of your Django app and its connected services." optional = false python-versions = ">=3.9" @@ -1587,8 +1587,8 @@ test = ["boto3", "celery", "django-storages", "pytest", "pytest-cov", "pytest-dj [package.source] type = "git" url = "https://github.com/revsys/django-health-check" -reference = "7fc1e3a70d2d98ccc388926a2fab0626acf0acff" -resolved_reference = "7fc1e3a70d2d98ccc388926a2fab0626acf0acff" +reference = "b0500d14c338040984f02ee34ffbe6643b005084" +resolved_reference = "b0500d14c338040984f02ee34ffbe6643b005084" [[package]] name = "django-imagekit" @@ -3523,14 +3523,14 @@ pytest = ["pytest (>=7.0.0)", "rich (>=13.9.4,<14.0.0)"] [[package]] name = "litellm" -version = "1.74.8" +version = "1.74.14" description = "Library to easily interface with LLM API providers" optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" groups = ["main"] files = [ - {file = "litellm-1.74.8-py3-none-any.whl", hash = "sha256:f9433207d1e12e545495e5960fe02d93e413ecac4a28225c522488e1ab1157a1"}, - {file = "litellm-1.74.8.tar.gz", hash = "sha256:6e0a18aecf62459d465ee6d9a2526fcb33719a595b972500519abe95fe4906e0"}, + {file = "litellm-1.74.14-py3-none-any.whl", hash = "sha256:39aad802d7aa3eabb1678552017d705fe76ba09fdeab8122206a24781ddc4248"}, + {file = "litellm-1.74.14.tar.gz", hash = "sha256:0e6029314a235dbce5d03376a48f7504a221b2e1e8b24f0b090ff3580a6e78be"}, ] [package.dependencies] @@ -3549,7 +3549,9 @@ tokenizers = "*" [package.extras] caching = ["diskcache (>=5.6.1,<6.0.0)"] extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "redisvl (>=0.4.1,<0.5.0) ; python_version >= \"3.9\" and python_version < \"3.14\"", "resend (>=0.8.0,<0.9.0)"] -proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "azure-identity (>=1.15.0,<2.0.0)", "azure-storage-blob (>=12.25.1,<13.0.0)", "backoff", "boto3 (==1.34.34)", "cryptography (>=43.0.1,<44.0.0)", "fastapi (>=0.115.5,<0.116.0)", "fastapi-sso (>=0.16.0,<0.17.0)", "gunicorn (>=23.0.0,<24.0.0)", "litellm-enterprise (==0.1.15)", "litellm-proxy-extras (==0.2.11)", "mcp (==1.10.0) ; python_version >= \"3.10\"", "orjson (>=3.9.7,<4.0.0)", "polars (>=1.31.0,<2.0.0) ; python_version >= \"3.10\"", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.18,<0.0.19)", "pyyaml (>=6.0.1,<7.0.0)", "rich (==13.7.1)", "rq", "uvicorn (>=0.29.0,<0.30.0)", "uvloop (>=0.21.0,<0.22.0) ; sys_platform != \"win32\"", "websockets (>=13.1.0,<14.0.0)"] +mlflow = ["mlflow (>3.1.4) ; python_version >= \"3.10\""] +proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "azure-identity (>=1.15.0,<2.0.0)", "azure-storage-blob (>=12.25.1,<13.0.0)", "backoff", "boto3 (==1.34.34)", "cryptography (>=43.0.1,<44.0.0)", "fastapi (>=0.115.5,<0.116.0)", "fastapi-sso (>=0.16.0,<0.17.0)", "gunicorn (>=23.0.0,<24.0.0)", "litellm-enterprise (==0.1.16)", "litellm-proxy-extras (==0.2.14)", "mcp (>=1.10.0,<2.0.0) ; python_version >= \"3.10\"", "orjson (>=3.9.7,<4.0.0)", "polars (>=1.31.0,<2.0.0) ; python_version >= \"3.10\"", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.18,<0.0.19)", "pyyaml (>=6.0.1,<7.0.0)", "rich (==13.7.1)", "rq", "uvicorn (>=0.29.0,<0.30.0)", "uvloop (>=0.21.0,<0.22.0) ; sys_platform != \"win32\"", "websockets (>=13.1.0,<14.0.0)"] +semantic-router = ["semantic-router ; python_version >= \"3.9\""] utils = ["numpydoc"] [[package]] @@ -5510,6 +5512,21 @@ pygments = "*" [package.extras] testing = ["ipython", "pexpect", "pytest", "pytest-cov"] +[[package]] +name = "pdf2image" +version = "1.17.0" +description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "pdf2image-1.17.0-py3-none-any.whl", hash = "sha256:ecdd58d7afb810dffe21ef2b1bbc057ef434dabbac6c33778a38a3f7744a27e2"}, + {file = "pdf2image-1.17.0.tar.gz", hash = "sha256:eaa959bc116b420dd7ec415fcae49b98100dda3dd18cd2fdfa86d09f112f6d57"}, +] + +[package.dependencies] +pillow = "*" + [[package]] name = "pexpect" version = "4.9.0" @@ -7482,30 +7499,30 @@ files = [ [[package]] name = "ruff" -version = "0.12.5" +version = "0.12.7" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "ruff-0.12.5-py3-none-linux_armv6l.whl", hash = "sha256:1de2c887e9dec6cb31fcb9948299de5b2db38144e66403b9660c9548a67abd92"}, - {file = "ruff-0.12.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d1ab65e7d8152f519e7dea4de892317c9da7a108da1c56b6a3c1d5e7cf4c5e9a"}, - {file = "ruff-0.12.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:962775ed5b27c7aa3fdc0d8f4d4433deae7659ef99ea20f783d666e77338b8cf"}, - {file = "ruff-0.12.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73b4cae449597e7195a49eb1cdca89fd9fbb16140c7579899e87f4c85bf82f73"}, - {file = "ruff-0.12.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b13489c3dc50de5e2d40110c0cce371e00186b880842e245186ca862bf9a1ac"}, - {file = "ruff-0.12.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1504fea81461cf4841778b3ef0a078757602a3b3ea4b008feb1308cb3f23e08"}, - {file = "ruff-0.12.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c7da4129016ae26c32dfcbd5b671fe652b5ab7fc40095d80dcff78175e7eddd4"}, - {file = "ruff-0.12.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ca972c80f7ebcfd8af75a0f18b17c42d9f1ef203d163669150453f50ca98ab7b"}, - {file = "ruff-0.12.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dbbf9f25dfb501f4237ae7501d6364b76a01341c6f1b2cd6764fe449124bb2a"}, - {file = "ruff-0.12.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c47dea6ae39421851685141ba9734767f960113d51e83fd7bb9958d5be8763a"}, - {file = "ruff-0.12.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c5076aa0e61e30f848846f0265c873c249d4b558105b221be1828f9f79903dc5"}, - {file = "ruff-0.12.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a5a4c7830dadd3d8c39b1cc85386e2c1e62344f20766be6f173c22fb5f72f293"}, - {file = "ruff-0.12.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:46699f73c2b5b137b9dc0fc1a190b43e35b008b398c6066ea1350cce6326adcb"}, - {file = "ruff-0.12.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a655a0a0d396f0f072faafc18ebd59adde8ca85fb848dc1b0d9f024b9c4d3bb"}, - {file = "ruff-0.12.5-py3-none-win32.whl", hash = "sha256:dfeb2627c459b0b78ca2bbdc38dd11cc9a0a88bf91db982058b26ce41714ffa9"}, - {file = "ruff-0.12.5-py3-none-win_amd64.whl", hash = "sha256:ae0d90cf5f49466c954991b9d8b953bd093c32c27608e409ae3564c63c5306a5"}, - {file = "ruff-0.12.5-py3-none-win_arm64.whl", hash = "sha256:48cdbfc633de2c5c37d9f090ba3b352d1576b0015bfc3bc98eaf230275b7e805"}, - {file = "ruff-0.12.5.tar.gz", hash = "sha256:b209db6102b66f13625940b7f8c7d0f18e20039bb7f6101fbdac935c9612057e"}, + {file = "ruff-0.12.7-py3-none-linux_armv6l.whl", hash = "sha256:76e4f31529899b8c434c3c1dede98c4483b89590e15fb49f2d46183801565303"}, + {file = "ruff-0.12.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:789b7a03e72507c54fb3ba6209e4bb36517b90f1a3569ea17084e3fd295500fb"}, + {file = "ruff-0.12.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e1c2a3b8626339bb6369116e7030a4cf194ea48f49b64bb505732a7fce4f4e3"}, + {file = "ruff-0.12.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32dec41817623d388e645612ec70d5757a6d9c035f3744a52c7b195a57e03860"}, + {file = "ruff-0.12.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47ef751f722053a5df5fa48d412dbb54d41ab9b17875c6840a58ec63ff0c247c"}, + {file = "ruff-0.12.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a828a5fc25a3efd3e1ff7b241fd392686c9386f20e5ac90aa9234a5faa12c423"}, + {file = "ruff-0.12.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5726f59b171111fa6a69d82aef48f00b56598b03a22f0f4170664ff4d8298efb"}, + {file = "ruff-0.12.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:74e6f5c04c4dd4aba223f4fe6e7104f79e0eebf7d307e4f9b18c18362124bccd"}, + {file = "ruff-0.12.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d0bfe4e77fba61bf2ccadf8cf005d6133e3ce08793bbe870dd1c734f2699a3e"}, + {file = "ruff-0.12.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06bfb01e1623bf7f59ea749a841da56f8f653d641bfd046edee32ede7ff6c606"}, + {file = "ruff-0.12.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e41df94a957d50083fd09b916d6e89e497246698c3f3d5c681c8b3e7b9bb4ac8"}, + {file = "ruff-0.12.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4000623300563c709458d0ce170c3d0d788c23a058912f28bbadc6f905d67afa"}, + {file = "ruff-0.12.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:69ffe0e5f9b2cf2b8e289a3f8945b402a1b19eff24ec389f45f23c42a3dd6fb5"}, + {file = "ruff-0.12.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a07a5c8ffa2611a52732bdc67bf88e243abd84fe2d7f6daef3826b59abbfeda4"}, + {file = "ruff-0.12.7-py3-none-win32.whl", hash = "sha256:c928f1b2ec59fb77dfdf70e0419408898b63998789cc98197e15f560b9e77f77"}, + {file = "ruff-0.12.7-py3-none-win_amd64.whl", hash = "sha256:9c18f3d707ee9edf89da76131956aba1270c6348bfee8f6c647de841eac7194f"}, + {file = "ruff-0.12.7-py3-none-win_arm64.whl", hash = "sha256:dfce05101dbd11833a0776716d5d1578641b7fddb537fe7fa956ab85d1769b69"}, + {file = "ruff-0.12.7.tar.gz", hash = "sha256:1fc3193f238bc2d7968772c82831a4ff69252f673be371fb49663f0068b7ec71"}, ] [[package]] @@ -9088,4 +9105,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "~3.12" -content-hash = "5f3bc029ac5f5738e82afac9782fb1199535f8ea69b82c645f89332bd0d5959c" +content-hash = "63e448c31390942ead05c25627a91c91d5bfec50d2e6460432ef7868c5e0ffe8" diff --git a/pyproject.toml b/pyproject.toml index 42acffc0a6..551a4983fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ django-cache-memoize = "^0.2.0" django-cors-headers = "^4.0.0" django-filter = "^2.4.0" django-guardian = "^3.0.0" -django-health-check = { git = "https://github.com/revsys/django-health-check", rev="7fc1e3a70d2d98ccc388926a2fab0626acf0acff" } # pragma: allowlist secret +django-health-check = { git = "https://github.com/revsys/django-health-check", rev="b0500d14c338040984f02ee34ffbe6643b005084" } # pragma: allowlist secret django-imagekit = "^5.0.0" django-ipware = "^7.0.0" django-json-widget = "^2.0.0" @@ -56,7 +56,7 @@ jedi = "^0.19.0" langchain = "^0.3.11" langchain-experimental = "^0.3.4" langchain-openai = "^0.3.2" -litellm = "1.74.8" +litellm = "1.74.14" llama-index = "^0.12.6" llama-index-agent-openai = "^0.4.1" llama-index-llms-openai = "^0.4.0" @@ -95,7 +95,7 @@ qdrant-client = {extras = ["fastembed"], version = "^1.14.3"} redis = "^5.0.0" requests = "^2.31.0" retry2 = "^0.9.5" -ruff = "0.12.5" +ruff = "0.12.7" selenium = "^4.30.0" sentry-sdk = "^2.25.1" social-auth-app-django = "^5.2.0" @@ -111,6 +111,7 @@ uwsgi = "^2.0.29" uwsgitop = "^0.12" wrapt = "^1.14.1" youtube-transcript-api = "^1.0.0" +pdf2image = "^1.17.0" [tool.poetry.group.dev.dependencies] bpython = "^0.25" diff --git a/users/adapters.py b/users/adapters.py index 5ce84983b8..09cd7bb6eb 100644 --- a/users/adapters.py +++ b/users/adapters.py @@ -1,6 +1,6 @@ from mitol.scim.adapters import UserAdapter -from authentication.api import user_created_actions +from authentication.hooks import get_plugin_manager from profiles.models import Profile @@ -40,13 +40,23 @@ def from_dict(self, d): self.obj.profile.name = d.get("fullName", d.get("name", "")) self.obj.profile.email_optin = d.get("emailOptIn", 1) == 1 + def save(self): + """ + Save the user object and any related objects, such as the profile. + """ + newly_created = self.is_new_user + super().save() + if newly_created: + pm = get_plugin_manager() + hook = pm.hook + hook.user_created(user=self.obj, user_data={}) + def _save_related(self): """ Save models related to the user """ self.obj.profile.user = self.obj self.obj.profile.save() - user_created_actions(user=self.obj, details=self.to_dict(), is_new=True) def _handle_replace_nested_path(self, nested_path, nested_value): """Per-path replacement handling""" diff --git a/users/adapters_test.py b/users/adapters_test.py index 63300562fe..bb153ba046 100644 --- a/users/adapters_test.py +++ b/users/adapters_test.py @@ -11,8 +11,12 @@ def test_save_related_creates_profile_and_favorites_if_missing(mocker, settings) Test that saving a LearnUserAdapter creates a profile and default favorites list """ settings.MITOL_AUTHENTICATION_PLUGINS = "learning_resources.plugins.FavoritesListPlugin,profiles.plugins.CreateProfilePlugin" - user = UserFactory() + # simulate a new and blank unsaved user + user = UserFactory.create() + user.scim_id = None + user.global_id = None + user.id = None scimUser = LearnUserAdapter(user) scimUser.request = RequestFactory() scimUser.obj.request = RequestFactory() diff --git a/vector_search/utils.py b/vector_search/utils.py index c100a95bb5..62c1199636 100644 --- a/vector_search/utils.py +++ b/vector_search/utils.py @@ -1,5 +1,6 @@ import logging import uuid +from typing import Optional from django.conf import settings from langchain.text_splitter import RecursiveCharacterTextSplitter @@ -21,6 +22,7 @@ serialize_bulk_content_files, serialize_bulk_learning_resources, ) +from main.utils import checksum_for_content from vector_search.constants import ( CONTENT_FILES_COLLECTION_NAME, QDRANT_CONTENT_FILE_INDEXES, @@ -253,11 +255,13 @@ def update_learning_resource_payload(serialized_document): def update_content_file_payload(serialized_document): - params = { - "resource_readable_id": serialized_document["resource_readable_id"], - "key": serialized_document["key"], - "run_readable_id": serialized_document["run_readable_id"], - } + search_keys = ["resource_readable_id", "key", "run_readable_id"] + params = {} + for key in search_keys: + if key in serialized_document: + params[key] = serialized_document[key] + if not params: + return points = [ point.id for point in retrieve_points_matching_params( @@ -319,18 +323,20 @@ def should_generate_resource_embeddings(serialized_document): return True -def should_generate_content_embeddings(serialized_document): +def should_generate_content_embeddings( + serialized_document: dict, point_id: Optional[str] = None +) -> bool: """ Determine if we should generate embeddings for a content file """ client = qdrant_client() - - # we just need metadata from the first chunk - point_id = vector_point_id( - f"{serialized_document['resource_readable_id']}." - f"{serialized_document.get('run_readable_id', '')}." - f"{serialized_document['key']}.0" - ) + if not point_id: + # we just need metadata from the first chunk + point_id = vector_point_id( + f"{serialized_document['resource_readable_id']}." + f"{serialized_document.get('run_readable_id', '')}." + f"{serialized_document['key']}.0" + ) response = client.retrieve( collection_name=CONTENT_FILES_COLLECTION_NAME, ids=[point_id], @@ -353,11 +359,25 @@ def _embed_course_metadata_as_contentfile(serialized_resources): ids = [] docs = [] for doc in serialized_resources: - if not should_generate_resource_embeddings(doc): - continue readable_id = doc["readable_id"] resource_vector_point_id = str(vector_point_id(readable_id)) serializer = LearningResourceMetadataDisplaySerializer(doc) + serialized_document = serializer.render_document() + checksum = checksum_for_content(str(serialized_document)) + key = f"{doc['readable_id']}.course_metadata" + serialized_document["checksum"] = checksum + serialized_document["key"] = key + document_point_id = vector_point_id( + f"{doc['readable_id']}.course_information.0" + ) + if not should_generate_content_embeddings( + serialized_document, document_point_id + ): + continue + # remove existing course info docs + remove_points_matching_params( + {"key": key}, collection_name=CONTENT_FILES_COLLECTION_NAME + ) split_texts = serializer.render_chunks() split_metadatas = [ { @@ -365,7 +385,11 @@ def _embed_course_metadata_as_contentfile(serialized_resources): "chunk_number": chunk_id, "chunk_content": chunk_content, "resource_readable_id": doc["readable_id"], + "run_readable_id": doc["readable_id"], "file_extension": ".txt", + "file_type": "course_metadata", + "key": key, + "checksum": checksum, **{key: doc[key] for key in ["offered_by", "platform"]}, } for chunk_id, chunk_content in enumerate(split_texts) diff --git a/vector_search/utils_test.py b/vector_search/utils_test.py index 10a9955819..f5af5abbd8 100644 --- a/vector_search/utils_test.py +++ b/vector_search/utils_test.py @@ -12,10 +12,12 @@ LearningResourceRunFactory, ) from learning_resources.models import LearningResource +from learning_resources.serializers import LearningResourceMetadataDisplaySerializer from learning_resources_search.serializers import ( serialize_bulk_content_files, serialize_bulk_learning_resources, ) +from main.utils import checksum_for_content from vector_search.constants import ( CONTENT_FILES_COLLECTION_NAME, QDRANT_CONTENT_FILE_PARAM_MAP, @@ -635,3 +637,89 @@ def test_embed_learning_resources_summarizes_only_contentfiles_with_summary(mock # Only contentfiles with summary should be passed expected_ids = [cf.id for cf in contentfiles_with_summary] summarize_mock.assert_called_once_with(expected_ids, True) # noqa: FBT003 + + +@pytest.mark.django_db +def test_embed_course_metadata_as_contentfile_uploads_points_on_change(mocker): + """ + Test that _embed_course_metadata_as_contentfile uploads points to Qdrant + if any property of a serialized_resource has changed + """ + + mock_client = mocker.patch("vector_search.utils.qdrant_client").return_value + mock_encoder = mocker.patch("vector_search.utils.dense_encoder").return_value + mock_encoder.model_short_name.return_value = "test-model" + mock_encoder.embed_documents.return_value = [[0.1, 0.2, 0.3]] + resource = LearningResourceFactory.create() + serialized_resource = next(serialize_bulk_learning_resources([resource.id])) + serializer = LearningResourceMetadataDisplaySerializer(serialized_resource) + rendered_document = serializer.render_document() + resource_checksum = checksum_for_content(str(rendered_document)) + + """ + Simulate qdrant returning a checksum for existing + record that matches the checksum of metadata doc + """ + mock_point = mocker.Mock() + mock_point.payload = {"checksum": "checksum2"} + mock_client.retrieve.return_value = [mock_point] + + _embed_course_metadata_as_contentfile([serialized_resource]) + + # Assert upload_points was called + assert mock_client.upload_points.called + args, kwargs = mock_client.upload_points.call_args + assert args[0] == CONTENT_FILES_COLLECTION_NAME + points = list(kwargs["points"]) + assert len(points) == 1 + assert points[0].payload["resource_readable_id"] == resource.readable_id + assert points[0].payload["checksum"] == resource_checksum + + # simulate qdrant returning the same checksum for the metadata doc + mock_point.payload = {"checksum": resource_checksum} + mock_client.upload_points.reset_mock() + _embed_course_metadata_as_contentfile([serialized_resource]) + + # nothing has changed - no updates to make + assert not mock_client.upload_points.called + + +@pytest.mark.parametrize( + ("serialized_document", "expected_params"), + [ + ( + {"resource_readable_id": "r1", "key": "k1", "run_readable_id": "run1"}, + {"resource_readable_id": "r1", "key": "k1", "run_readable_id": "run1"}, + ), + ( + {"resource_readable_id": "r2", "key": "k2"}, + {"resource_readable_id": "r2", "key": "k2"}, + ), + ( + {"run_readable_id": "run3"}, + {"run_readable_id": "run3"}, + ), + ({"test": "run3"}, None), + ], +) +def test_update_content_file_payload_only_includes_existing_keys( + mocker, serialized_document, expected_params +): + """ + Test that params only includes keys + that are defined in the input document + """ + mock_retrieve = mocker.patch( + "vector_search.utils.retrieve_points_matching_params", return_value=[] + ) + mocker.patch("vector_search.utils._set_payload") + + update_content_file_payload(serialized_document) + if expected_params: + # Check that retrieve_points_matching_params was called with only the expected keys + mock_retrieve.assert_called_once_with( + expected_params, + collection_name=CONTENT_FILES_COLLECTION_NAME, + ) + else: + mock_retrieve.assert_not_called() diff --git a/yarn.lock b/yarn.lock old mode 100644 new mode 100755 index 44a34201fc..06d15fcc97 --- a/yarn.lock +++ b/yarn.lock @@ -3018,13 +3018,13 @@ __metadata: languageName: node linkType: hard -"@mitodl/mitxonline-api-axios@npm:^2025.6.23": - version: 2025.6.23 - resolution: "@mitodl/mitxonline-api-axios@npm:2025.6.23" +"@mitodl/mitxonline-api-axios@npm:2025.7.28": + version: 2025.7.28 + resolution: "@mitodl/mitxonline-api-axios@npm:2025.7.28" dependencies: "@types/node": "npm:^20.11.19" axios: "npm:^1.6.5" - checksum: 10/0299c73b9bb18493f79d1ca6eb373286228c20d58b842cf3d69559aa38525e07ac9a833aa19995674eb1bcc4ef40cbc1572790f8726400bd7280c530798799cc + checksum: 10/87e696455f1f6555aa40f5e966f34f39484b58953eebc257e0a4e78bc30c9b6dc489577d02213970f72196f725557e53f0986fe32df1d5f44514a64bf5043201 languageName: node linkType: hard @@ -7356,7 +7356,7 @@ __metadata: resolution: "api@workspace:frontends/api" dependencies: "@faker-js/faker": "npm:^9.9.0" - "@mitodl/mitxonline-api-axios": "npm:^2025.6.23" + "@mitodl/mitxonline-api-axios": "npm:2025.7.28" "@tanstack/react-query": "npm:^5.66.0" "@testing-library/react": "npm:^16.1.0" axios: "npm:^1.6.3" @@ -13931,7 +13931,7 @@ __metadata: "@emotion/styled": "npm:^11.11.0" "@faker-js/faker": "npm:^9.9.0" "@mitodl/course-search-utils": "npm:3.3.2" - "@mitodl/mitxonline-api-axios": "npm:^2025.6.23" + "@mitodl/mitxonline-api-axios": "npm:2025.7.28" "@mitodl/smoot-design": "npm:^6.10.0" "@next/bundle-analyzer": "npm:^14.2.15" "@remixicon/react": "npm:^4.2.0"