Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions kolibri/core/content/public_api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from uuid import UUID

from django.core.exceptions import EmptyResultSet
from django.db import connection
from django.db.models import Q
from django.shortcuts import get_object_or_404
from rest_framework.exceptions import ValidationError
from rest_framework.response import Response
Expand Down Expand Up @@ -82,32 +82,46 @@ def _get_retrieve_queryset(self):

def _serialize(self, nodes, node, content_schema):
data = {}
files = models.File.objects.filter(contentnode__in=nodes)
# Materialise FK lists and use `__uuidin` (inlines as SQL literals) instead of
# `__in=<queryset>`, which gave postgres correlated subqueries it sometimes
# mis-planned into multi-hour query times on a fresh test database.
node_ids = list(nodes.values_list("id", flat=True))

files = models.File.objects.filter(contentnode_id__uuidin=node_ids)
through_tags = models.ContentNode.tags.through.objects.filter(
contentnode__in=nodes
contentnode_id__uuidin=node_ids
)
assessmentmetadata = models.AssessmentMetaData.objects.filter(
contentnode__in=nodes
contentnode_id__uuidin=node_ids
)
localfiles = models.LocalFile.objects.filter(files__in=files).distinct()
tags = models.ContentTag.objects.filter(
id__in=through_tags.values_list("contenttag_id", flat=True)

file_ids = list(files.values_list("id", flat=True))
localfiles = models.LocalFile.objects.filter(
files__id__uuidin=file_ids
).distinct()
languages = models.Language.objects.filter(
Q(id__in=files.values_list("lang_id", flat=True))
| Q(id__in=nodes.values_list("lang_id", flat=True))

contenttag_ids = list(through_tags.values_list("contenttag_id", flat=True))
tags = models.ContentTag.objects.filter(id__uuidin=contenttag_ids).distinct()

# Lang codes are short strings (not UUIDs) and the distinct set across a
# channel is tiny — dedupe and use a literal IN.
lang_ids = set(files.values_list("lang_id", flat=True)) | set(
nodes.values_list("lang_id", flat=True)
)
node_ids = nodes.values_list("id", flat=True)
lang_ids.discard(None)
languages = models.Language.objects.filter(id__in=lang_ids)

prerequisites = models.ContentNode.has_prerequisite.through.objects.filter(
from_contentnode_id__in=node_ids, to_contentnode_id__in=node_ids
from_contentnode_id__uuidin=node_ids,
to_contentnode_id__uuidin=node_ids,
)
related = models.ContentNode.related.through.objects.filter(
from_contentnode_id__in=node_ids, to_contentnode_id__in=node_ids
from_contentnode_id__uuidin=node_ids,
to_contentnode_id__uuidin=node_ids,
)
channel_metadata = models.ChannelMetadata.objects.filter(id=node.channel_id)

cursor = connection.cursor()

base = BASES[content_schema]

for qs in [
Expand All @@ -131,8 +145,14 @@ def _serialize(self, nodes, node, content_schema):
# directly from the database.
# One example is for JSON field data that is stored as a string in the database,
# we want to avoid that being coerced to Python objects.
cursor.execute(*qs.query.sql_with_params())
data[qs.model._meta.db_table] = [
try:
sql, params = qs.query.sql_with_params()
except EmptyResultSet:
# `__uuidin=[]` raises EmptyResultSet rather than emitting an empty IN.
data[table_name] = []
continue
cursor.execute(sql, params)
data[table_name] = [
# Coerce any UUIDs to their hex representation, as Postgres raw values will be UUIDs
dict(
zip(
Expand Down
13 changes: 12 additions & 1 deletion kolibri/core/content/test/test_public_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,18 @@ def _assert_data(self, Model, queryset):
field_names.add(BaseModel._mptt_meta.left_attr)
field_names.add(BaseModel._mptt_meta.right_attr)
field_names.add(BaseModel._mptt_meta.level_attr)
for response_data, obj in zip(response.data[Model._meta.db_table], queryset):
# Row order is not part of the API contract for these tables (only the
# ContentNode descendants response guarantees order, asserted separately).
# Sort both sides by pk so we test row equality, not incidental order.
pk_col = Model._meta.pk.column
response_rows = sorted(
response.data[Model._meta.db_table],
key=lambda r: str(r[pk_col]).replace("-", ""),
)
expected_rows = sorted(
queryset, key=lambda o: str(getattr(o, pk_col)).replace("-", "")
)
for response_data, obj in zip(response_rows, expected_rows):
# Ensure that we are not returning any empty objects
self.assertNotEqual(response_data, {})
for field in fields:
Expand Down
Loading