转:TopN推荐系统——推荐的实现与推荐效果的评价指标

转自:用户推荐系统_python 代码-豆瓣
书籍:项亮的<推荐系统实践>

import random
import math

class UserBasedCF:
    def __init__(self,train = None,test = None):
        self.trainfile = train
        self.testfile = test
        self.readData()

    def readData(self,train = None,test = None):
        self.trainfile = train or self.trainfile
        self.testfile = test or self.testfile
        self.traindata = {}
        self.testdata = {}
        for line in open(self.trainfile):
            userid,itemid,record,_ = line.split()
            self.traindata.setdefault(userid,{})
            self.traindata[userid][itemid]=record
        for line in open(self.testfile):
            userid,itemid,record,_ = line.split()
            self.testdata.setdefault(userid,{})
            self.testdata[userid][itemid]=record

    def userSimilarityBest(self,train = None):
        train = train or self.traindata
        self.userSimBest = dict()
        item_users = dict()
        for u,item in train.items():
            for i in item.keys():
                item_users.setdefault(i,set())
                item_users[i].add(u)
        user_item_count = dict()
        count = dict()
        for item,users in item_users.items():
            for u in users:
                user_item_count.setdefault(u,0)
                user_item_count[u] += 1
                for v in users:
                    if u == v:continue
                    count.setdefault(u,{})
                    count[u].setdefault(v,0)
                    count[u][v] += 1
        for u ,related_users in count.items():
            self.userSimBest.setdefault(u,dict())
            for v, cuv in related_users.items():
                self.userSimBest[u][v] = cuv / math.sqrt(user_item_count[u] * user_item_count[v] * 1.0)

    def recommend(self,user,train = None,k = 8,nitem = 40):
        train = train or self.traindata
        rank = dict()
        interacted_items = train.get(user,{})
        for v ,wuv in sorted(self.userSimBest[user].items(),key = lambda x : x[1],reverse = True)[0:k]:#获取与user相似度最高的k个用户
            for i , rvi in train[v].items():
                if i in interacted_items:
                    continue #只选择user没有评分过的物品进行推荐
                rank.setdefault(i,0)#设置初始值,以便做下面的累加运算
                rank[i] += wuv #书中为rank[i] +=rvi*wuv
        return dict(sorted(rank.items(),key = lambda x :x[1],reverse = True)[0:nitem])#用sorted方法对推荐的物品进行排序,预计评分高的排在前面,再取其中nitem个,nitem为每个用户推荐的物品数量

    def recallAndPrecision(self,train = None,test = None,k = 8,nitem = 10):
        train = train or self.traindata
        test = test or self.testdata
        hit = 0
        recall = 0
        precision = 0
        for user in train.keys():
            tu = test.get(user,{})#如果测试集中没有这个用户,则将tu初始化为空,避免test[user]报错
            rank = self.recommend(user, train = train,k = k,nitem = nitem)
            for item,_ in rank.items():
                if item in tu:
                    hit += 1
            recall += len(tu)
            precision += nitem
        return (hit / (recall * 1.0),hit / (precision * 1.0))

    def coverage(self,train = None,test = None,k = 8,nitem = 10):
        train = train or self.traindata
        test = test or self.testdata
        recommend_items = set()
        all_items = set()
        for user in train.keys():
            for item in train[user].keys():
                all_items.add(item)
            rank = self.recommend(user, train, k = k, nitem = nitem)
            for item,_ in rank.items():
                recommend_items.add(item)
        return len(recommend_items) / (len(all_items) * 1.0)

    def popularity(self,train = None,test = None,k = 8,nitem = 10):
        train = train or self.traindata
        test = test or self.testdata
        item_popularity = dict()
        for user ,items in train.items():
            for item in items.keys():
                item_popularity.setdefault(item,0)
                item_popularity[item] += 1
        ret = 0
        n = 0
        for user in train.keys():
            rank = self.recommend(user, train, k = k, nitem = nitem)
            for item ,_ in rank.items():
                ret += math.log(1+item_popularity[item])
                n += 1
        return ret / (n * 1.0)

def testUserBasedCF():
    train = ‘u1.base‘
    test = ‘u1.test‘
    cf = UserBasedCF(train,test)
    cf.userSimilarityBest()
    print("%3s%20s%20s%20s%20s" % (‘K‘,"precision",‘recall‘,‘coverage‘,‘popularity‘))
    for k in [5,10,20,40,80,160]:
        recall,precision = cf.recallAndPrecision( k = k)
        coverage = cf.coverage(k = k)
        popularity = cf.popularity(k = k)
        print("%3d%19.3f%%%19.3f%%%19.3f%%%20.3f" % (k,precision * 100,recall * 100,coverage * 100,popularity))

if __name__ == "__main__":
    testUserBasedCF()

基于项目的推荐系统,IBCF:

‘‘‘
Created on 2013-10-10

@author: Administrator
‘‘‘
import random
import math

class KNN:
    def __init__(self,train = None,test = None):
        self.trainfile = train
        self.testfile = test
        self.readData()

    def readData(self,train = None,test = None):
        self.trainfile = train or self.trainfile
        self.testfile = test or self.testfile
        self.traindata = {}
        self.testdata = {}
        for line in open(self.trainfile):
            userid,itemid,record,_ = line.split()
            self.traindata.setdefault(userid,{})
            self.traindata[userid][itemid]=record
        for line in open(self.testfile):
            userid,itemid,record,_ = line.split()
            self.testdata.setdefault(userid,{})
            self.testdata[userid][itemid]=record

    def ItemSim(self,train = None):
        train = train or self.traindata
        ItemSimcount = dict()
        Item_count = dict()
        for _,items in train.items():
            for itemidi in items.keys():
                Item_count.setdefault(itemidi,0)
                Item_count[itemidi] += 1
                for itemidj in items.keys():
                    if itemidi == itemidj:
                        continue
                    ItemSimcount.setdefault(itemidi,{})
                    ItemSimcount[itemidi].setdefault(itemidj,0)
                    ItemSimcount[itemidi][itemidj] +=1
        self.ItemSimlist = dict()
        for itemidi, related_item in ItemSimcount.items():
            self.ItemSimlist.setdefault(itemidi,{})
            for itemidj,wij in related_item.items():
                self.ItemSimlist[itemidi].setdefault(itemidj,0)
                self.ItemSimlist[itemidi][itemidj] = wij/math.sqrt(Item_count[itemidi]*Item_count[itemidj]*1.0)

    def recommend(self,user,train = None,k = 5,nitem = 10):
        train = train or self.traindata
        recommendlist = dict()
        User_Itemlist = train.get(user,{})
        for i,ri in User_Itemlist.items():
            for j,wij in sorted(self.ItemSimlist[i].items(),key = lambda x:x[1],reverse = True)[0:k]:
                if j in User_Itemlist:
                    continue
                recommendlist.setdefault(j,0)
                recommendlist[j] += float(ri)*wij
        return dict(sorted(recommendlist.items(),key = lambda x :x[1],reverse = True)[0:nitem])

    def recallAndPrecision(self,train = None,test = None,k = 5,nitem = 10):
        train = train or self.traindata
        test = test or self.testdata
        hit = 0
        recall = 0
        precision = 0
        for user in train.keys():
            tu = test.get(user,{})
            rank = self.recommend(user, train = train,k = k,nitem = nitem)
            for item,_ in rank.items():
                if item in tu:
                    hit += 1
            recall += len(tu)
            precision += nitem
        return (hit / (recall * 1.0),hit / (precision * 1.0))

    def coverage(self,train = None,test = None,k = 5,nitem = 10):
        train = train or self.traindata
        test = test or self.testdata
        recommend_items = set()
        all_items = set()
        for user in train.keys():
            for item in train[user].keys():
                all_items.add(item)
            rank = self.recommend(user, train, k = k, nitem = nitem)
            for item,_ in rank.items():
                recommend_items.add(item)
        return len(recommend_items) / (len(all_items) * 1.0)

    def popularity(self,train = None,test = None,k = 5,nitem = 10):
        train = train or self.traindata
        test = test or self.testdata
        item_popularity = dict()
        for user ,items in train.items():
            for item in items.keys():
                item_popularity.setdefault(item,0)
                item_popularity[item] += 1
        ret = 0
        n = 0
        for user in train.keys():
            rank = self.recommend(user, train, k = k, nitem = nitem)
            for item ,_ in rank.items():
                if item in item_popularity:
                    ret += math.log(1+item_popularity[item])
                    n += 1
        return ret / (n * 1.0)

def testKNNCF():
    train = ‘u1.base‘
    test = ‘u1.test‘
    cf = KNN(train,test)
    cf.ItemSim()
    print("%3s%20s%20s%20s%20s" % (‘K‘,"precision",‘recall‘,‘coverage‘,‘popularity‘))
    for k in [5,10,20,40,80,160]:
        recall,precision = cf.recallAndPrecision( k = k)
        coverage = cf.coverage(k = k)
        popularity = cf.popularity(k = k)
        print("%3d%19.3f%%%19.3f%%%19.3f%%%20.3f" % (k,precision * 100,recall * 100,coverage * 100,popularity))

if __name__ == "__main__":
    testKNNCF()
时间: 2024-08-30 16:43:37

转:TopN推荐系统——推荐的实现与推荐效果的评价指标的相关文章

推荐系统根据最近浏览进行推荐

前些日子,公司要求做推荐系统,不言而喻,推荐系统对于电商来说是很有好处的.当然我们是刚开始做,还得从简单的开始做,首先第一版是根据用户最近浏览的进行推荐.接下来,可能要根据相似用户进行相似用户的推荐.这样还要对会员进行分等级.所以后面的工作还有很多. 博客是个好东西,我热衷于博客记录下我做过的每一件事. 实现的过程如下: 流程图: 思路: 实现的步骤为: 1.得到用户信息. 技术实现:抓取用户最近浏览的一款商品的 SKUid;同时获取用户的UV(独立访客),这样用户一开网页我们就知道是哪个用户了

新闻推荐系统:基于内容的推荐算法(Recommender System:Content-based Recommendation)

因为开发了一个新闻推荐系统的模块,在推荐算法这一块涉及到了基于内容的推荐算法(Content-Based Recommendation),于是借此机会,基于自己看了网上各种资料后对该分类方法的理解,用尽量清晰明了的语言,结合算法和自己开发推荐模块本身,记录下这些过程,供自己回顾,也供大家参考~ 目录 一.基于内容的推荐算法 + TFIDF 二.在推荐系统中的具体实现技巧 正文 一.基于内容的推荐算法 + TFIDF 主流推荐算法大致可分为: 基于内容(相似度)的推荐 基于用户/物品相似度的协同过

图书馆推荐阅读:香港大学推荐的44本经典书籍

如果你半夜醒来发现自己已经好长时间没读书,而且没有任何负罪感的时候,你就必须知道,你已经堕落了.好书分享,值得收藏! 1.<拖延心理学> 作者: [美]简·博克/ [美]莱诺拉·袁 你想要向拖延的恶习开刀吗?这两位加利福尼亚心理学家在她们治疗拖延者的实践中精准地捕捉到了拖延的根本原因.这本书可以帮助读者减轻拖延,更好地享受生活. 2.<梦的解析> 作者: [奥地利]弗洛伊德 弗洛伊德的<梦的解析>被誉为精神分析的第一名著.它通过对梦境的科学探索和解释,找破了几千年来人类

推荐算法-协同过滤推荐算法

layout: post title: "推荐算法-协同过滤推荐算法" date: 2020-4-13 9:00:00 categories: [Algorithm] excerpt: "协同过滤推荐算法简介,参考https://zhuanlan.zhihu.com/p/40463528" 协同过滤推荐算法步骤 1.收集用户偏好 推测用户的喜好就要收集用户的信息,用户收藏的类别,评论,下载,转发这些信息可以数字化,作为用户信息二维矩阵的变量. 2.用户数据的减噪和归

项亮《推荐系统实践》读书笔记1-推荐系统评价指标

推荐系统评价指标 1.评分预测 预测准确度: 均方根误差(RMSE): 平均绝对误差(MAE): 关于这两个指标的优缺点,Netflix认为RMSE加大了对预测不准的用户物品评分的惩罚(平方项的惩罚),因为对系统的评测更加苛刻.研究表明,如果评分系统是基于整数建立的(即用户给的评分都是整数),那么对预测结果取整会降低MAE的误差. 2.Top N推荐:给用户一个个性化的推荐列表,预测准确率一般通过准确率和召回率度量. 令 R(u)是根据用户在训练集上的行为给用户作出的推荐列表,而T(u)是用户在

【推荐系统论文笔记】个性化推荐系统评价方法综述(了解概念——入门篇)

Overview of  the Evaluated Algorithms for the Personal Recommendation Systems   顾名思义,这篇中文论文讲述的是推荐系统的评价方法,也就是,如何去评价一个推荐系统的好与不好. 引言 1.个性化推荐系统通过建立用户与产品之间的二元关系 ,利用用户已有的选择过程或相似性关系挖掘每个用户潜在感兴趣的对象 ,进而进行个性化推荐 ,其本质就是信息过滤. 2.一个完整的推荐系统由3部分组成: 收集用户信息的行为记录模块: 分析用户

《推荐系统》基于图的推荐算法

1:概述 2:原理简介 3:代码实现 4:问题说明 一:概述 基于图的模型(graph-based model)是推荐系统中的重要内容.其实,很多研究人员把基于邻域的模型也称为基于图的模型,因为可以把基于邻域的模型看做基于图的模型的简单形式 在研究基于图的模型之前,首先需要将用户的行为数据,表示成图的形式,下面我们讨论的用户行为数据是用二元数组组成的,其中每个二元组(u,i)表示用户u对物品i的产生过行为,这种数据很容易用一个二分图表示 令G(V,E)表示用户物品二分图,其中 由用户顶点集合 和

推荐算法之用户推荐(UserCF)和物品推荐(ItemCF)对比

一.定义 UserCF:推荐那些和他有共同兴趣爱好的用户喜欢的物品 ItemCF:推荐那些和他之前喜欢的物品类似的物品 根据用户推荐重点是反应和用户兴趣相似的小群体的热点,根据物品推荐着重与用户过去的历史兴趣,即: UserCF是某个群体内的物品热门程度 ItemCF是反应本人的兴趣爱好,更加个性化 二.新闻类网站采用UserCF的原因: 用户大都喜欢热门新闻,特别细粒度的个性化可忽略不计 个性化新闻推荐更强调热点,热门程度和实效性是推荐的重点,个性化重要性则可降低 ItemCF需要维护一张物品

linux学习书籍推荐linux学习书籍推荐

引用地址:http://www.cnblogs.com/notepi/archive/2013/06/15/3137103.html Linux 学习书目推荐 Linux基础 1.<Linux与Unix Shell 编程指南> C语言基础 1.<C Primer Plus,5th Edition>[美]Stephen Prata著 2.<The  C Programming Language, 2nd Edition>[美]Brian W. Kernighan Davi