I am using the below set of serializers to achieve a join, which work fine on my development setup, but performs terribly when there is any distance between the website server and the database server. I got suspicious about the SQL that's running the show and did some logging; it seems that it's doing a new query for every entry and combining the results rather than doing the entire join at once and returning the join like I want. Here are my serializers:
class UserSerializer(serializers.ModelSerializer):
class Meta:
model = User
exclude = ('password', 'last_login', 'is_superuser', 'is_staff', 'is_active', 'date_joined',
'groups', 'user_permissions')
class DepartmentSerializer(serializers.HyperlinkedModelSerializer):
curator = UserSerializer()
class Meta:
model = Department
fields = '__all__'
class CategorySerializer(serializers.HyperlinkedModelSerializer):
class Meta:
model = Category
fields = '__all__'
class DetailedLinkedContentSerializer(serializers.HyperlinkedModelSerializer):
category = CategorySerializer()
department = DepartmentSerializer()
type = serializers.SerializerMethodField()
class Meta:
fields = '__all__'
model = LinkedContent
def get_type(self, obj):
return 'link'
class DetailedFileContentSerializer(serializers.HyperlinkedModelSerializer):
category = CategorySerializer()
department = DepartmentSerializer()
link_url = serializers.SerializerMethodField()
type = serializers.SerializerMethodField()
class Meta:
fields = '__all__'
model = FileContent
def get_link_url(self, obj):
return obj.file.url
def get_type(self, obj):
return obj.file_type
As you can see, I'm doing my 'join' by including fields in a serializer as serializers of other models, such as category = CategorySerializer(). It looks like that's what DRF recommends , unless I'm misunderstanding something. Here is a small sample of the hundreds and hundreds of queries being run on my development environment:
(0.001) SELECT "content_linkedcontent"."id", "content_linkedcontent"."link_text", "content_linkedcontent"."department_id", "content_linkedcontent"."category_id", "content_linkedcontent"."visibility_rank", "content_linkedcontent"."link_url" FROM "content_linkedcontent"; args=()
(0.001) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 3; args=(3,)
(0.001) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 24; args=(24,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 3; args=(3,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 3; args=(3,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 29; args=(29,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 4; args=(4,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 25; args=(25,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 1; args=(1,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 29; args=(29,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 1; args=(1,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 25; args=(25,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 1; args=(1,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 24; args=(24,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 3; args=(3,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 3; args=(3,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 28; args=(28,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 1; args=(1,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 28; args=(28,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 4; args=(4,)
So how can I do a real join with the information that I want using serializers in DRF?
I have manage to cut the query time in half by following the advice on this blog entry, here are my updated serializers and the view that use them:
class DetailedLinkedContentSerializer(serializers.HyperlinkedModelSerializer):
category = CategorySerializer()
department_query = Department.objects.all()
department_query = DepartmentSerializer.setup_eager_loading(department_query)
department = DepartmentSerializer(department_query)
# department = DepartmentSerializer()
type = serializers.SerializerMethodField()
class Meta:
fields = '__all__'
model = LinkedContent
def get_type(self, obj):
return 'link'
@staticmethod
def setup_eager_loading(queryset):
""" Perform necessary eager loading of data. """
queryset = queryset.select_related('category', 'department')
return queryset
class DetailedFileContentSerializer(serializers.HyperlinkedModelSerializer):
category = CategorySerializer()
department_query = Department.objects.all()
department_query = DepartmentSerializer.setup_eager_loading(department_query)
department = DepartmentSerializer(department_query)
# department = DepartmentSerializer()
link_url = serializers.SerializerMethodField()
type = serializers.SerializerMethodField()
class Meta:
fields = '__all__'
model = FileContent
def get_link_url(self, obj):
return obj.file.url
def get_type(self, obj):
return obj.file_type
@staticmethod
def setup_eager_loading(queryset):
""" Perform necessary eager loading of data. """
queryset = queryset.select_related('category', 'department')
return queryset
and my view that makes use of those serializers:
class DetailedContentView(views.APIView):
permission_classes = [IsAuthenticated, ContentCuratorOrReadOnly, IsGroupMember, ]
def get(self, request, *args, **kwargs):
context = {"request": request}
linked_content = LinkedContent.objects.all()
file_content = FileContent.objects.all()
# this line is newly added
linked_content = DetailedLinkedContentSerializer.setup_eager_loading(linked_content)
# this line too
file_content = DetailedLinkedContentSerializer.setup_eager_loading(file_content)
linked_content_serializer = DetailedLinkedContentSerializer(linked_content, many=True, context=context)
file_content_serializer = DetailedFileContentSerializer(file_content, many=True, context=context)
response = linked_content_serializer.data + file_content_serializer.data
response = sorted(response, key=lambda x: (x['department']['visibility_rank'], x['visibility_rank']))
return Response(response)
However, my solution to prefetch my Users serializer within my Department serializer doesn't seem to be doing the trick. Specifically, after updating my department serializer to:
class DepartmentSerializer(serializers.HyperlinkedModelSerializer):
curator = UserSerializer()
class Meta:
model = Department
fields = '__all__'
@staticmethod
def setup_eager_loading(queryset):
""" Perform necessary eager loading of data. """
queryset = queryset.select_related('curator')
return queryset
The following lines:
department_query = Department.objects.all()
department_query = DepartmentSerializer.setup_eager_loading(department_query)
department = DepartmentSerializer(department_query)
don't seem to be prefetching my curators as I want.
I updated my serializers using a mixin from a comment found in this blog :
class EagerLoadingMixin:
@classmethod
def setup_eager_loading(cls, queryset):
if hasattr(cls, "_SELECT_RELATED_FIELDS"):
queryset = queryset.select_related(*cls._SELECT_RELATED_FIELDS)
if hasattr(cls, "_PREFETCH_RELATED_FIELDS"):
queryset = queryset.prefetch_related(*cls._PREFETCH_RELATED_FIELDS)
return queryset
and added 'department__curator'
to my list of prefetch fields. Now it looks like this:
class DetailedFileContentSerializer(EagerLoadingMixin, serializers.HyperlinkedModelSerializer):
category = CategorySerializer()
department = DepartmentSerializer()
link_url = serializers.SerializerMethodField()
type = serializers.SerializerMethodField()
class Meta:
fields = '__all__'
model = FileContent
def get_link_url(self, obj):
return obj.file.url
def get_type(self, obj):
return obj.file_type
_SELECT_RELATED_FIELDS = ['department', 'category', 'department__curator']
The query now takes roughly 1/3 the time and doesn't contain hundreds of SELECTs. It still takes too long, but I believe I can solve that by changing switched to a different hosting solution for my database.
I'm not sure if my solution also works, but I tried solving this problem using a custom manager , as someone else commented on the blog post . (This is more of a Django solution than a DRF solution.) Note that this also fetches related objects even when you're not using a serializer (eg SomeModel.objects.all()
), which may or may not be what you want. Hopefully this answer will get better after a peer review.
Assumptions:
User - Listing: one-to-many
Listing - Item: many-to-many
Listing - Like: one-to-many
class PreFetchMixin:
def get_queryset(self):
queryset = super().get_queryset()
if hasattr(self, '_SELECT_RELATED_FIELDS'):
queryset = queryset.select_related(
*self._SELECT_RELATED_FIELDS)
if hasattr(self, '_PREFETCH_RELATED_FIELDS'):
queryset = queryset.prefetch_related(
*self._PREFETCH_RELATED_FIELDS)
if hasattr(self, '_ANNOTATIONS'):
queryset = queryset.annotate(**self._ANNOTATIONS)
return queryset
# PreFetchMixin must come first considering MRO
from django.db import models
from django.db.models import Count
class ListingManager(PreFetchMixin, models.Manager):
_SELECT_RELATED_FIELDS = ('user',)
_PREFETCH_RELATED_FIELDS = ('items',)
_ANNOTATIONS = {'num_likes': Count('like')}
And add a line to Listing
:
class Listing(models.Model):
...
objects = ListingManager()
...
If you want to use a DRF serializer, you need to make minor changes:
from rest_framework import serializers
# assumes that ItemSerializer is defined
class ListingSerializer(serializers.ModelSerializer):
items = ItemSerializer(many=True, read_only=True)
num_likes = serializers.IntegerField(read_only=True)
class Meta:
model = Listing
fields = '__all__'
In your view:
def get_queryset(self):
return (
super().get_queryset()
.select_related(relation1, relation2, ...)
.prefetch_related(relation3, relation4, ...)
)
That's it.
DRF under the hood does:
nestend_instance_for_serialization = getattr(instance, fk_field_name)
serialize_nested(instance.one_to_many_relation_field_name.all())
qs.all()
has a cache which is evaluated once and then used by drf and other loops.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.