计算偏差:
card() 表示集合包含的元素数量。
加权Slope One算法
# coding:utf-8 __author__ = ‘similarface‘ import codecs, os, sys from math import sqrt ‘‘‘ 该数据: {"用户":{"乐队":评分}} ‘‘‘ users2 = {"Amy": {"Taylor Swift": 4, "PSY": 3, "Whitney Houston": 4}, "Ben": {"Taylor Swift": 5, "PSY": 2}, "Clara": {"PSY": 3.5, "Whitney Houston": 4}, "Daisy": {"Taylor Swift": 5, "Whitney Houston": 3}} class recommender: def __init__(self, data, k=1, metric=‘pearson‘, n=5): self.k = k self.n = n self.username2id = {} self.userid2name = {} self.productid2name = {} self.metric = metric if self.metric == ‘pearson‘: self.fn = self.pearson if type(data).__name__ == ‘dict‘: self.data = data #频率值 同时对A,B都进行评分的用户数目 self.frequencies={} #样本A对样本B的偏差值 self.deviations={} def computerDeviation(self): ‘‘‘ 计算样本间的偏差 :return: ‘‘‘ #{"用户":{"乐队1":评分1,"乐队2":评分2,"乐队n":评分n}} =》 ratings={"乐队":评分} for ratings in self.data.values(): #"乐队n":评分n for (item,rating) in ratings.items(): #频率值 2样本同时都进行评分的用户数目 #setdefault 如果键在字典中,返回这个键所对应的值。如果键不在字典中,向字典 中插入这个键,并且以{}为这个键的值,并返回{} self.frequencies.setdefault(item, {}) #偏差值 self.deviations.setdefault(item, {}) for (item2,rating2) in ratings.items(): if item!=item2: self.frequencies[item].setdefault(item2,0) self.deviations[item].setdefault(item2,0.0) self.frequencies[item][item2]+=1 self.deviations[item][item2]+=rating-rating2 for (item,ratings) in self.deviations.items(): for item2 in ratings: #dev(i,j) ratings[item2]/=self.frequencies[item][item2] def convertProductID2name(self, id): ‘‘‘ 给定商品编号返回商品名称 ‘‘‘ if id in self.productid2name: return self.productid2name[id] else: return id def slopeOneRecommendations(self,userRatings): ‘‘‘ 遍历用户u评论的所有样本:u[i] 遍历用户u的偏差矩阵: dev[j,i] SUM((dev[j,i]+u[i])*c[j,i]) ==?c[j,i]=frequencies[j][i] :param userRatings: :return: ‘‘‘ recommendations={} frequencies={} for (useritem,userRating) in userRatings.items(): for (diffItem,diffRatting) in self.deviations.items(): if diffItem not in userRatings and useritem in self.deviations[diffItem]: freq=self.frequencies[diffItem][useritem] recommendations.setdefault(diffItem,0.0) frequencies.setdefault(diffItem,0) recommendations[diffItem]+=(diffRatting[useritem]+userRating)*freq frequencies[diffItem]+=freq recommendations=[(self.convertProductID2name(k),v /frequencies[k]) for k ,v in recommendations.items()] recommendations.sort(key=lambda artistTuple:artistTuple[1],reverse=True) return recommendations if __name__ == ‘__main__‘: r=recommender(users2) r.computerDeviation() g=users2[‘Ben‘] result=r.slopeOneRecommendations(g) print(result)
[(‘Whitney Houston‘, 3.375)]
时间: 2024-10-03 13:27:34