Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions kolibri/core/content/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,12 +258,19 @@ def list(self, request, *args, **kwargs):
return super(RemoteViewSet, self).list(request, *args, **kwargs)


class CharInFilter(BaseInFilter, CharFilter):
pass


class ChannelMetadataFilter(FilterSet):
available = BooleanFilter(method="filter_available", label="Available")
contains_exercise = BooleanFilter(
method="filter_contains_exercise", label="Has exercises"
)
contains_quiz = BooleanFilter(method="filter_contains_quiz", label="Has quizzes")
included_languages = CharInFilter(
field_name="included_languages", label="Languages", distinct=True
)

class Meta:
model = models.ChannelMetadata
Expand Down Expand Up @@ -419,10 +426,6 @@ class UUIDInFilter(BaseInFilter, UUIDFilter):
pass


class CharInFilter(BaseInFilter, CharFilter):
pass


contentnode_filter_fields = [
"parent",
"parent__isnull",
Expand Down Expand Up @@ -473,6 +476,7 @@ class ContentNodeFilter(FilterSet):
keywords = CharFilter(method="filter_keywords")
channels = UUIDInFilter(field_name="channel_id")
languages = CharInFilter(field_name="lang_id")
included_languages = CharInFilter(field_name="included_languages")
categories__isnull = BooleanFilter(field_name="categories", lookup_expr="isnull")
lft__gt = NumberFilter(field_name="lft", lookup_expr="gt")
rght__lt = NumberFilter(field_name="rght", lookup_expr="lt")
Expand Down Expand Up @@ -673,10 +677,11 @@ def get_queryset(self):
return models.ContentNode.objects.filter(available=True)

def get_related_data_maps(self, items, queryset):
ids = [item["id"] for item in items]
assessmentmetadata_map = {
a["contentnode"]: a
for a in models.AssessmentMetaData.objects.filter(
contentnode__in=queryset
contentnode__in=ids
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

During the PR review today, we were discussing why the queryset was replaced with this ids array. Some ideas were that the queryset had never been required, since we had the items array, and items might be a smaller set than the queryset. Or, that this might be an optimization. Is that right? were there other motivations here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was an optimization, yes. Because we've already got the items query from the DB, doing the subquery with the queryset, rather than just passing in the materialized ids was slowing down the query somewhat. As the content node endpoint is only performant when we do limited queries (and hence we almost always paginate requests to it) - the risk of there being too many ids is mitigated.

).values(
"assessment_item_ids",
"number_of_assessments",
Expand All @@ -690,7 +695,7 @@ def get_related_data_maps(self, items, queryset):
files_map = {}

files = list(
models.File.objects.filter(contentnode__in=queryset).values(
models.File.objects.filter(contentnode__in=ids).values(
"id",
"contentnode",
"local_file__id",
Expand Down Expand Up @@ -725,7 +730,7 @@ def get_related_data_maps(self, items, queryset):
tags_map = {}

for t in (
models.ContentTag.objects.filter(tagged_content__in=queryset)
models.ContentTag.objects.filter(tagged_content__in=ids)
.values(
"tag_name",
"tagged_content",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,22 @@ class ContentContentnodeHasPrerequisite(Base):
to_contentnode_id = Column(CHAR(32), nullable=False, index=True)


class ContentContentnodeIncludedLanguages(Base):
__tablename__ = "content_contentnode_included_languages"
__table_args__ = (
Index(
"content_contentnode_included_languages_contentnode_id_language_id_7d14ec8b_uniq",
"contentnode_id",
"language_id",
unique=True,
),
)

id = Column(Integer, primary_key=True)
contentnode_id = Column(CHAR(32), nullable=False, index=True)
language_id = Column(String(14), nullable=False, index=True)


class ContentContentnodeRelated(Base):
__tablename__ = "content_contentnode_related"
__table_args__ = (
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 3.2.25 on 2024-12-18 00:14
from django.db import migrations
from django.db import models


class Migration(migrations.Migration):

dependencies = [
("content", "0039_channelmetadata_ordered_fields"),
]

operations = [
migrations.AddField(
model_name="contentnode",
name="included_languages",
field=models.ManyToManyField(
blank=True,
related_name="contentnodes",
to="content.Language",
verbose_name="languages",
),
),
]
12 changes: 12 additions & 0 deletions kolibri/core/content/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,18 @@ class ContentNode(base_models.ContentNode):
# needs a subsequent Kolibri upgrade step to backfill these values.
admin_imported = models.BooleanField(null=True)

# Languages that are in this node and/or any descendant nodes of this node
# for non-topic nodes, this is the language of the node itself
# for topic nodes, this is the union of all languages of all descendant nodes
# any language directly set on the topic nodes is ignored,
# as it is not meaningful to set a language on a topic node if it does not apply
# to any descendants.
# We do this to allow filtering of a topic tree by a specific language for
# multi-language channels.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think here I'm understanding the why but not the how of the M2M field. Meaning, I understand what's trying to happen and that the M2M field choice facilitates it, but I can't fully wrap my mind around why it has to be a many to many.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because languages are a separate table - and each contentnode can contain multiple languages. So it needs to be a many to many field, because for every content node, we could be keying to multiple other languages (and vice versa).

included_languages = models.ManyToManyField(
"Language", related_name="contentnodes", verbose_name="languages", blank=True
)

objects = ContentNodeManager()

class Meta:
Expand Down
Loading