LensKit Recommender 只返回部分用户的结果，否则返回空的 DataFrame。为什么会这样？

Question

我正在尝试使用 Python 的 LensKit 工具（特别是适应 UserUser 算法的推荐器 object），使用 Django 框架实现组推荐系统。 但是，它在某些情况下（针对某些特定用户）仅返回个人推荐，但它始终返回针对用户组的推荐（我创建了一个混合用户，其分数是组成员分数的平均值并为其请求推荐）。 下面是我为单个用户和一个组请求推荐的实现：

from rest_framework import viewsets, status
from .models import Movie, Rating, Customer, Recommendation
from .serializers import MovieSerializer, RatingSerializer, UserSerializer, GroupSerializer, CustomerSerializer, RecommendationSerializer
from rest_framework.response import Response
from rest_framework.decorators import action
from django.contrib.auth.models import User, Group
from rest_framework.authentication import TokenAuthentication
from rest_framework.permissions import IsAuthenticated, AllowAny
from pandas import Series
from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser
import lenskit.datasets as ds


class CustomerViewSet(viewsets.ModelViewSet):
    queryset = Customer.objects.all()
    serializer_class = CustomerSerializer
    authentication_classes = (TokenAuthentication,)
    permission_classes = (IsAuthenticated,)

@action(methods=['GET'], detail=False)
    def recommendations(self, request):
        if 'genre' in request.data:
            genre = request.data['genre']
        else:
            genre = 'All'

        user = request.user
        ratings = Rating.objects.filter(user=user.id)
        user_dict = {} #dictionary of user ratings

        name = user.username
        #print('name', name)

        #Deleting the recommendations resulted from previous requests before generating new ones
        Recommendation.objects.filter(name=name).delete()

        for rating in ratings:
            stars = rating.stars
            movieId = int(rating.movie.movieId)
            user_dict[movieId] = stars
        #print(user_dict)

        data = ds.MovieLens('datasets/')
        user_user = UserUser(15, min_nbrs=3)
        algo = Recommender.adapt(user_user)
        algo.fit(data.ratings)
        #print(algo)
        #print(data.ratings)

        """
        Recommendations are generated based on a user that is not included in a training set (hence, their id is -1),
        and a pandas.Series obtained from the ratings dictionary.
        """
        recs = algo.recommend(user=-1, ratings=Series(user_dict))
        #print(recs)

        #Parsing the resulting DataFrame and saving recommendations as objects
        for index, row in recs.iterrows():
            #print(row['item'])
            movieId = row['item']
            stars = row['score']

            if genre == 'All':
                Recommendation.objects.create(name=name, movieId=movieId, pred_stars=stars)
            if genre != 'All' and genre in movie.genres:
                Recommendation.objects.create(name=name, movieId=movieId, pred_stars=stars)

            #Stopping at 20 recommended items
            count = Recommendation.objects.filter(name=name).count()
            #print('count', count)
            if count >= 20:
                break

        #Returning the movies ordered by the predicted ratings for them
        recs = Recommendation.objects.filter(name=name).order_by('-pred_stars')
        rec_movies = []
        for rec in recs:
            mov = Movie.objects.get(movieId=rec.movieId)
            rec_movies.append(mov)

        serializer = MovieSerializer(rec_movies, many=True)
        return Response(serializer.data, status=status.HTTP_200_OK)


class GroupViewSet(viewsets.ModelViewSet):
    queryset = Group.objects.all()
    serializer_class = GroupSerializer
    authentication_classes = (TokenAuthentication,)
    permission_classes = (IsAuthenticated, )

    @action(methods=['GET'], detail=True)
    def recommendations(self, request, pk=None):
        if 'genre' in request.data:
            genre = request.data['genre']
        else:
            genre = 'All'

        #Checking if the user belongs to the group
        group = Group.objects.get(id=pk)
        users = group.user_set.all()
        #print(users)
        user = request.user
        #print(user)

        if user in users:
            # Deleting the recommendations resulted from previous requests before generating new ones
            Recommendation.objects.filter(name=group.name).delete()

            rating_dict = {} #a dictionary of average ratings for the group
            for user in users:
                ratings = Rating.objects.filter(user=user.id)
                for rating in ratings:
                    stars = rating.stars
                    movieId = int(rating.movie.movieId)

                    """
                    If the movie has already been rated by another member (i.e. a rating for it exists in the 
                    dictionary), an average rating is computed
                    """
                    if movieId in rating_dict:
                        x = rating_dict[movieId][0]
                        y = rating_dict[movieId][1]
                        x = (x * y + stars) / (y+1)
                        y += 1
                        rating_dict[movieId][0] = x
                        rating_dict[movieId][1] = y
                    #If not, the individual rating is simply insteted into the dictionary
                    else:
                        rating_dict[movieId] = [stars, 1]

            #Training the ML algorithm
            data = ds.MovieLens('datasets/')
            user_user = UserUser(15, min_nbrs=3)
            algo = Recommender.adapt(user_user)
            algo.fit(data.ratings)

            for key in rating_dict.keys():
                x = rating_dict[key][0]
                rating_dict[key] = x
            #print(rating_dict)

            #Requesting recommendations for the hybrid user
            recs = algo.recommend(user=-1, ratings=Series(rating_dict))
            #print(recs)

            genre = request.data['genre']
            name = group.name

            #Parsing the resulting DataFrame and saving the recommendations as objects
            for index, row in recs.iterrows():
                print(row['item'])
                movie = Movie.objects.get(movieId=str(int(row['item'])))
                stars = row['score']
                name = group.name
                if genre == 'All':
                    Recommendation.objects.create(name=name, movieId=movie.movieId, pred_stars=stars)
                if genre != 'All' and genre in movie.genres:
                    Recommendation.objects.create(name=name, movieId=movie.movieId, pred_stars=stars)

                #Stopping at 20 recommendations
                count = Recommendation.objects.filter(name=name).count()
                print('count', count)
                if count >= 20:
                    break

            #Returning movies ordered by the predicted score for the group
            recs = Recommendation.objects.filter(name=name).order_by('-pred_stars')
            rec_movies = []
            for rec in recs:
                mov = Movie.objects.get(movieId=rec.movieId)
                rec_movies.append(mov)
            serializer = MovieSerializer(rec_movies, many=True)

            return Response(serializer.data, status=status.HTTP_200_OK)

        else:
            response = {'message': 'You are not a member of this group'}
            return Response(response, status=status.HTTP_400_BAD_REQUEST)

这是工作响应的示例：

[
    {
        "id": 17521,
        "movieId": "318",
        "title": "Shawshank Redemption, The (1994)",
        "genres": "Crime|Drama",
        "link": "https://www.imdb.com/title/tt0111161/",
        "average_rating": 4.487138263665595,
        "no_ratings": 311,
        "poster": "/default-movie.jpg"
    },
    {
        "id": 17503,
        "movieId": "296",
        "title": "Pulp Fiction (1994)",
        "genres": "Comedy|Crime|Drama|Thriller",
        "link": "https://www.imdb.com/title/tt0110912/",
        "average_rating": 4.256172839506172,
        "no_ratings": 324,
        "poster": "/default-movie.jpg"
    },
    ...
]

无效的响应：

[]

在后一种情况下，打印 Recommender 返回的 DataFrame 显示：

Empty DataFrame
Columns: [item, score]
Index: []

我不确定我做错了什么。 有人可以帮忙吗？

Answer 1

此问题的最可能原因是用户-用户推荐器无法构建足够的可行邻域来提供推荐。 这是基于邻域的推荐的缺点。

解决方案是切换到始终可以为具有某些评级的用户推荐的算法（例如，矩阵分解算法之一），和/或在个性化协同过滤器无法推荐时使用诸如Popular的后备算法进行推荐。

（另一种解决方案是为 LensKit 实现各种冷启动推荐器或基于内容的推荐器之一，但该项目目前没有提供任何一个。）

LensKit Recommender 只返回部分用户的结果，否则返回空的 DataFrame。为什么会这样？

问题描述

1 个解决方案

解决方案1
0 已采纳 2021-05-23 02:53:56

LensKit Recommender 只返回部分用户的结果，否则返回空的 DataFrame。 为什么会这样？

问题描述

1 个解决方案

解决方案1 0 已采纳 2021-05-23 02:53:56

LensKit Recommender 只返回部分用户的结果，否则返回空的 DataFrame。为什么会这样？

解决方案1
0 已采纳 2021-05-23 02:53:56