[英]Efficient join using Django REST Framework serializers
我正在使用下面的一組序列化程序來實現連接,這在我的開發設置中運行良好,但在網站服務器和數據庫服務器之間存在任何距離時表現得非常糟糕。 我對運行節目的 SQL 產生了懷疑,並做了一些日志記錄; 似乎它正在對每個條目進行新查詢並組合結果,而不是一次完成整個連接並像我想要的那樣返回連接。 這是我的序列化程序:
class UserSerializer(serializers.ModelSerializer):
class Meta:
model = User
exclude = ('password', 'last_login', 'is_superuser', 'is_staff', 'is_active', 'date_joined',
'groups', 'user_permissions')
class DepartmentSerializer(serializers.HyperlinkedModelSerializer):
curator = UserSerializer()
class Meta:
model = Department
fields = '__all__'
class CategorySerializer(serializers.HyperlinkedModelSerializer):
class Meta:
model = Category
fields = '__all__'
class DetailedLinkedContentSerializer(serializers.HyperlinkedModelSerializer):
category = CategorySerializer()
department = DepartmentSerializer()
type = serializers.SerializerMethodField()
class Meta:
fields = '__all__'
model = LinkedContent
def get_type(self, obj):
return 'link'
class DetailedFileContentSerializer(serializers.HyperlinkedModelSerializer):
category = CategorySerializer()
department = DepartmentSerializer()
link_url = serializers.SerializerMethodField()
type = serializers.SerializerMethodField()
class Meta:
fields = '__all__'
model = FileContent
def get_link_url(self, obj):
return obj.file.url
def get_type(self, obj):
return obj.file_type
如您所見,我通過將序列化程序中的字段包含為其他模型的序列化程序(例如 category = CategorySerializer())來進行“連接”。 看起來這就是DRF 推薦的,除非我誤解了什么。 這是在我的開發環境中運行的成百上千個查詢的一個小示例:
(0.001) SELECT "content_linkedcontent"."id", "content_linkedcontent"."link_text", "content_linkedcontent"."department_id", "content_linkedcontent"."category_id", "content_linkedcontent"."visibility_rank", "content_linkedcontent"."link_url" FROM "content_linkedcontent"; args=()
(0.001) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 3; args=(3,)
(0.001) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 24; args=(24,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 3; args=(3,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 3; args=(3,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 29; args=(29,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 4; args=(4,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 25; args=(25,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 1; args=(1,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 29; args=(29,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 1; args=(1,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 25; args=(25,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 1; args=(1,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 24; args=(24,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 3; args=(3,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 3; args=(3,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 28; args=(28,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 1; args=(1,)
(0.000) SELECT "content_department"."id", "content_department"."name", "content_department"."description", "content_department"."curator_id", "content_department"."visibility_rank" FROM "content_department" WHERE "content_department"."id" = 28; args=(28,)
(0.000) SELECT "auth_user"."id", "auth_user"."password", "auth_user"."last_login", "auth_user"."is_superuser", "auth_user"."username", "auth_user"."first_name", "auth_user"."last_name", "auth_user"."email", "auth_user"."is_staff", "auth_user"."is_active", "auth_user"."date_joined" FROM "auth_user" WHERE "auth_user"."id" = 6; args=(6,)
(0.000) SELECT "content_category"."id", "content_category"."name", "content_category"."description" FROM "content_category" WHERE "content_category"."id" = 4; args=(4,)
那么如何在 DRF 中使用序列化程序對我想要的信息進行真正的連接呢?
通過遵循此博客條目中的建議,我設法將查詢時間縮短了一半,以下是我更新的序列化程序和使用它們的視圖:
class DetailedLinkedContentSerializer(serializers.HyperlinkedModelSerializer):
category = CategorySerializer()
department_query = Department.objects.all()
department_query = DepartmentSerializer.setup_eager_loading(department_query)
department = DepartmentSerializer(department_query)
# department = DepartmentSerializer()
type = serializers.SerializerMethodField()
class Meta:
fields = '__all__'
model = LinkedContent
def get_type(self, obj):
return 'link'
@staticmethod
def setup_eager_loading(queryset):
""" Perform necessary eager loading of data. """
queryset = queryset.select_related('category', 'department')
return queryset
class DetailedFileContentSerializer(serializers.HyperlinkedModelSerializer):
category = CategorySerializer()
department_query = Department.objects.all()
department_query = DepartmentSerializer.setup_eager_loading(department_query)
department = DepartmentSerializer(department_query)
# department = DepartmentSerializer()
link_url = serializers.SerializerMethodField()
type = serializers.SerializerMethodField()
class Meta:
fields = '__all__'
model = FileContent
def get_link_url(self, obj):
return obj.file.url
def get_type(self, obj):
return obj.file_type
@staticmethod
def setup_eager_loading(queryset):
""" Perform necessary eager loading of data. """
queryset = queryset.select_related('category', 'department')
return queryset
以及我使用這些序列化程序的觀點:
class DetailedContentView(views.APIView):
permission_classes = [IsAuthenticated, ContentCuratorOrReadOnly, IsGroupMember, ]
def get(self, request, *args, **kwargs):
context = {"request": request}
linked_content = LinkedContent.objects.all()
file_content = FileContent.objects.all()
# this line is newly added
linked_content = DetailedLinkedContentSerializer.setup_eager_loading(linked_content)
# this line too
file_content = DetailedLinkedContentSerializer.setup_eager_loading(file_content)
linked_content_serializer = DetailedLinkedContentSerializer(linked_content, many=True, context=context)
file_content_serializer = DetailedFileContentSerializer(file_content, many=True, context=context)
response = linked_content_serializer.data + file_content_serializer.data
response = sorted(response, key=lambda x: (x['department']['visibility_rank'], x['visibility_rank']))
return Response(response)
但是,我在 Department 序列化程序中預取我的 Users 序列化程序的解決方案似乎並沒有奏效。 具體來說,在將我的部門序列化程序更新為:
class DepartmentSerializer(serializers.HyperlinkedModelSerializer):
curator = UserSerializer()
class Meta:
model = Department
fields = '__all__'
@staticmethod
def setup_eager_loading(queryset):
""" Perform necessary eager loading of data. """
queryset = queryset.select_related('curator')
return queryset
以下幾行:
department_query = Department.objects.all()
department_query = DepartmentSerializer.setup_eager_loading(department_query)
department = DepartmentSerializer(department_query)
似乎沒有按照我的意願預取我的策展人。
我使用此博客中的評論中的 mixin 更新了我的序列化程序:
class EagerLoadingMixin:
@classmethod
def setup_eager_loading(cls, queryset):
if hasattr(cls, "_SELECT_RELATED_FIELDS"):
queryset = queryset.select_related(*cls._SELECT_RELATED_FIELDS)
if hasattr(cls, "_PREFETCH_RELATED_FIELDS"):
queryset = queryset.prefetch_related(*cls._PREFETCH_RELATED_FIELDS)
return queryset
並將'department__curator'
添加到我的預取字段列表中。 現在它看起來像這樣:
class DetailedFileContentSerializer(EagerLoadingMixin, serializers.HyperlinkedModelSerializer):
category = CategorySerializer()
department = DepartmentSerializer()
link_url = serializers.SerializerMethodField()
type = serializers.SerializerMethodField()
class Meta:
fields = '__all__'
model = FileContent
def get_link_url(self, obj):
return obj.file.url
def get_type(self, obj):
return obj.file_type
_SELECT_RELATED_FIELDS = ['department', 'category', 'department__curator']
查詢現在大約需要 1/3 的時間並且不包含數百個 SELECT。 它仍然需要很長時間,但我相信我可以通過為我的數據庫切換到不同的托管解決方案來解決這個問題。
我不確定我的解決方案是否也有效,但我嘗試使用自定義管理器解決這個問題,正如其他人在博客文章中評論的那樣。 (這更像是 Django 解決方案而不是 DRF 解決方案。)請注意,即使您沒有使用序列化程序(例如SomeModel.objects.all()
),這也會獲取相關對象,這可能是您想要的,也可能不是. 希望這個答案在同行評審后會變得更好。
假設:
用戶 - 列表:一對多
清單 - 項目:多對多
列表 - 喜歡:一對多
class PreFetchMixin:
def get_queryset(self):
queryset = super().get_queryset()
if hasattr(self, '_SELECT_RELATED_FIELDS'):
queryset = queryset.select_related(
*self._SELECT_RELATED_FIELDS)
if hasattr(self, '_PREFETCH_RELATED_FIELDS'):
queryset = queryset.prefetch_related(
*self._PREFETCH_RELATED_FIELDS)
if hasattr(self, '_ANNOTATIONS'):
queryset = queryset.annotate(**self._ANNOTATIONS)
return queryset
# PreFetchMixin must come first considering MRO
from django.db import models
from django.db.models import Count
class ListingManager(PreFetchMixin, models.Manager):
_SELECT_RELATED_FIELDS = ('user',)
_PREFETCH_RELATED_FIELDS = ('items',)
_ANNOTATIONS = {'num_likes': Count('like')}
並向Listing
添加一行:
class Listing(models.Model):
...
objects = ListingManager()
...
如果要使用 DRF 序列化程序,則需要做一些小改動:
from rest_framework import serializers
# assumes that ItemSerializer is defined
class ListingSerializer(serializers.ModelSerializer):
items = ItemSerializer(many=True, read_only=True)
num_likes = serializers.IntegerField(read_only=True)
class Meta:
model = Listing
fields = '__all__'
在您看來:
def get_queryset(self):
return (
super().get_queryset()
.select_related(relation1, relation2, ...)
.prefetch_related(relation3, relation4, ...)
)
就是這樣。
引擎蓋下的 DRF 執行以下操作:
nestend_instance_for_serialization = getattr(instance, fk_field_name)
serialize_nested(instance.one_to_many_relation_field_name.all())
qs.all()
有一個緩存,它被評估一次,然后被 drf 和其他循環使用。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.