个性化推荐算法:GRM,CF,NBI的实现

All three algorithms GRM, CF, and NBI can provide each user an ordered queue
of all its uncollected movies. For an arbitrary user
ui, if the edgeui?ojis in the probe set
according to the training set,
oj is an uncollected movie for
ui, we measure the position ofoj
in the ordered queue. For example, if there are 1500 uncollected movies forui, andoj
is the 30th from the top, we say the position ofoj
is the top 30/1500, denoted byrij=0.02. --Tao Zhou

The global ranking method (GRM) sorts
all the objects in the descending order of degree and recommends those with highest degrees. 

/***************************************************************************
 *  对电影按照度进行排序,将排序的结果存放在rankResult_GRM.data
 ***************************************************************************/
#include<stdio.h>
#include<stdlib.h>

#define MOVIE_SIZE 	1682
#define USER_SIZE 	943	

typedef struct MovieInfo{
	int number;			//电影的编号
	int degree;			//电影的度
	//int rank;			//电影的排名
}MovieInfo;

MovieInfo movieInfo[MOVIE_SIZE + 1];
int matrix[MOVIE_SIZE + 1][USER_SIZE + 1];	//自动初始化,在main中不自动初始化,会报错
int degree[MOVIE_SIZE + 1];			//用来存放每部电影的degree,degree[0]不存放
						//degree[i]用来存放编号是i的电影的度

int main(){
	FILE* fMatrix;				//获得然后计算电影的度 <--matrix.data
						//数组的下标是电影的代号
	FILE* fRankOfGRM;			//将电影的排名写到文件中

	int i, j;
	int max_index;
	if( NULL == (fMatrix = fopen("matrix.data", "r"))){
		printf("open file(marix.data) error!\n");
		exit(0);
	}
	if( NULL == (fRankOfGRM = fopen("rankResult_GRM.data", "w"))){
		printf("open file(rankResult_GRM.data) error!\n");
		exit(0);
	}
	//将matrixData中的数据读入数组中
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		//printf("i = %d\n", i);
		for( j = 1; j <= USER_SIZE; j++ ){
			if( 1 != fscanf(fMatrix, "%d ", &matrix[i][j])){
				printf("fscanf error\n");
				exit(0);
			}
		}
	}
	/*
	for( i = 0; i < MOVIE_SIZE; i++ ){
		for( j = 0; j < USER_SIZE; j++ ){
			printf("%d", matrix[i][j]);
		}
		printf("\n");
	}
	*/

	//计算电影的度
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= USER_SIZE; j++ ){
			degree[i] = degree[i] + matrix[i][j];
		}
		movieInfo[i].number = i;		//得到每一个电影的ID,保留编号
		movieInfo[i].degree = degree[i];	//和电影的度
	}
	/*
	printf("show the degree of movie\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		printf("%4d", degree[i]);
		if( i % 10 == 0 ){
			printf("    row%d\n", i/10);
		}
	}
	*/
	//对电影按照度的大小进行排序
	for( i = 1; i < MOVIE_SIZE; i++ ){
		max_index = i;
		for( j = i + 1; j <= MOVIE_SIZE; j++ ){
			if( movieInfo[j].degree > movieInfo[max_index].degree ){
				max_index = j;
			}
		}
		if( max_index != i ){
			movieInfo[0].degree = movieInfo[i].degree;
			movieInfo[0].number = movieInfo[i].number;
			movieInfo[i].degree = movieInfo[max_index].degree;
			movieInfo[i].number = movieInfo[max_index].number;
			movieInfo[max_index].degree = movieInfo[0].degree;
			movieInfo[max_index].number = movieInfo[0].number;
		}
	}
	//将排序的结果输出
	/*
	printf("\nsrot\n\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		printf("%5d", movieInfo[i].degree);
		if( i % 10 == 0 ){
			printf("     row%d\n", i/10);
		}
	}
	*/
	//将排序的结果写入文件,               名次(by degree)    电影的ID,           度
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		fprintf(fRankOfGRM, "%d\t%d\t%d\t\n", i, movieInfo[i].number, movieInfo[i].degree);
	}
	fclose(fMatrix);
	fclose(fRankOfGRM);
	return 0;
}</span><span style="font-size:18px;font-family: Times-Roman;">
</span>
/*
 * 对ProbeSet中的每一条记录,
 * 	用户: 得到该用户的没有选择过的电影的个数
 * 	电影: 得到该电影的排名
 * */
#include<stdio.h>
#include<stdlib.h>

//size of porbe
#define SIZE_PORBE 	8252		//预测集的大小
#define SIZE_USER 	943		//数据集中用户的大小
#define SIZE_MOVIE 	1682		//数据集中电影的大小

typedef struct ItemPorbe{		//读入probe Set
	int user;
	int movie;
}ItemPorbe;

typedef struct ItemMovie{
	int movieId;
	int degree;
}ItemMovie;

ItemPorbe itemPorbe[SIZE_PORBE];
int unSelcMovNumOfUser[SIZE_PORBE+1];	//0号元素不使用,第i号元素表示第i个用户
					//值是用户没有选过的电影的个数
ItemMovie movieRank[SIZE_MOVIE+1];	//0号原始不使用,下标表示排名
					//值是电影的代号

int findMovieRank(int numberOfMovie);
double ratio[SIZE_PORBE];

int main(){
	FILE* fReadPorbeSet;
	int skiprate;
	int verifyPorbeSize = 0;
	int i;
	FILE* fUnselectedMoiveOfUser;
	int skipindex;
	FILE* fMoveRank;
	int skipindex_2;
	int skipdegree;

	int user;
	int movie;
	int temp_unselectForUser;
	int temp_movieRank;

	double average_ratio = 0.0;
	double sum = 0.0;
	double temp_ratio = 0.0;

	if( NULL == (fReadPorbeSet = fopen("porbeSet.data", "r"))){					//读入预测集中的数据
		printf("open file(porbeSet.data) error!\n");
		exit(1);
	}
	if( NULL == (fUnselectedMoiveOfUser = fopen("unselectedMovieNumberForUser.data", "r"))){	//得到每个用户没有评价过的电影的个数
		printf("open file(unselected.data) error!\n");
		exit(0);
	}
	if( NULL == (fMoveRank = fopen("rankResult_GRM.data", "r"))){					//得到电影的排名(按照电影的度)
		printf("open file(rankResult_GRM.data) error!\n");
		exit(0);
	}

	/*
	 * 读取probeSet中的数据
	 * */
	for( i = 0; i < SIZE_PORBE; i++ ){
		verifyPorbeSize++;
		if( 3 != fscanf(fReadPorbeSet, "%d %d %d", &itemPorbe[i].user, &itemPorbe[i].movie, &skiprate)){
			printf("fscanf 1 error\n");
			exit(0);
		}
	}
	/*
	printf("%d\n", verifyPorbeSize);
	for( i = 0; i < SIZE_PORBE; i++ )
		printf("%d\t%d\t\n", itemPorbe[i].user, itemPorbe[i].movie);

	*/

	/*
	 * 读取unselectedMovieNumberOfUser.data中的数据.得到每一个用户的没有选择的电影的个数
	 * */
	for( i = 1; i <= SIZE_USER; i++ ){
		if( 2 != fscanf(fUnselectedMoiveOfUser, "%d %d", &skipindex, &unSelcMovNumOfUser[i])){
			printf("fscanf 2 error\n");
			exit(0);
		}
	}
	/*
	printf("unselected Movie size of user:\n");
	for( i = 1; i <= SIZE_USER; i++ ){
		printf("%5d ", unSelcMovNumOfUser[i]);
		if( i % 10 == 0 )
			printf("\n");
	}
	*/

	/*
	 * 读取电影的Id和度,排行隐藏在下标中
	 */
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		if( 3 != fscanf(fMoveRank, "%d %d %d", &skipindex_2, &movieRank[i].movieId, &movieRank[i].degree)){
			printf("fscanf 3 error!");
			exit(0);
		}
	}
	/*
	printf("rank of movie!\n");
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		printf("%4d\t%4d\n", movieRank[i].movieId, movieRank[i].degree);
	}
	*/

	/*
	 * 计算训练集中用户选择的电影在由GRM给出的电影的排行中的名次除以改用户没有评价过的电影的总数
	 * */
	for( i = 0; i < SIZE_PORBE; i++ ){
		user = itemPorbe[i].user;
		movie = itemPorbe[i].movie;
		temp_unselectForUser = unSelcMovNumOfUser[user];
		temp_movieRank = findMovieRank(movie);

		//printf("\nitem of probe user and unselected movie %d %d\n", user, movie);
		//printf("the rank of movie %d \n", temp_movieRank);
		//printf("the number of unselected movie %d \n", temp_unselectForUser);

		ratio[i] = (double)temp_movieRank / (double)temp_unselectForUser;

		//temp_ratio = (double)temp_movieRank / (double)temp_unselectForUser;
		//printf("the raoit is %f\n", temp_ratio);
	}
	/*
	int temp_count = 0;
	for( i = 0; i < SIZE_PORBE; i++ ){
		printf("%12.8f", ratio[i]);
		if( (i+1)%10 == 0 )
			printf("------------%d\n", (i+10)/10);
		if( ratio[i] >= 1 )
			temp_count++;

	}
	*/
	for( i = 0; i < SIZE_PORBE; i++ ){
		sum += ratio[i];
	}
	average_ratio = sum / (double)SIZE_PORBE;
	printf("\n\nThe Rssult of GRM is %f\n", average_ratio);
	//printf("%d", temp_count);

	//printf("\n%d", findMovieRank(1));

	return 0;
}

//通过参数numberOfMovie,即电影的编号来查找电影的排名
//        该方法中解决了排名的并列问题
int findMovieRank(int numberOfMovie){
	int i,j;
	int degree;
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		if( numberOfMovie == movieRank[i].movieId ){
			degree = movieRank[i].degree;
			for( j = i; j > 0 && movieRank[j].degree == degree; j-- );
				return j+1;
		}
	}
}
/*
	for( i = 0; i < 10; i++ ){
		if( 1005 == a[i].name ){
			degree = a[i].degree;
			for( j = i; j > 0 && a[j].degree == degree; j-- );
				printf("%d\n", j+1);
		}
	}
*/

Thus far, the widest applied personal recommendation algorithm is collaborative filtering (&#1;CF) &#2;&#4;,
based on a similarity measure between users.

/*
 * 计算CF算法中的相似矩阵
 * */

#include<stdio.h>
#include<stdlib.h>

#define SIZE_MOVIE 1682
#define SIZE_USER  943

int adjacentMateix[SIZE_MOVIE][SIZE_USER];
int degreeOfUser[SIZE_USER + 1];

double similarity[SIZE_USER + 1][SIZE_USER + 1];	

void calculateSimilarity();
void showSimilarity();
void calculateDegreeOfUSer();
void readAdjacentMatrix();
void writeSimilarityToFile();

int main(){
	readAdjacentMatrix();
	calculateDegreeOfUSer();
	calculateSimilarity();
	//showSimilarity();
	writeSimilarityToFile();
	return 0;
}

/*
 * 读入adjacentMatrix
 * */
void readAdjacentMatrix(){
	FILE* fReadAdjacent;
	int i, j;
	if( NULL == (fReadAdjacent = fopen("matrix.data", "r"))){
		printf("open file(matrix.data) error!\n");
		exit(0);
	}
	//将文件中的adjacentMatrix读入到数组中
	for( i = 0; i < SIZE_MOVIE; i++ ){
		for( j = 0; j < SIZE_USER; j++ ){
			if( 1 != fscanf(fReadAdjacent, "%d ", &adjacentMateix[i][j])){
				printf("fscanf 1 error\n");
				exit(0);
			}
		}
	}
	/*
	for( i = 0; i < SIZE_MOVIE; i++ ){
		for( j = 0; j < SIZE_USER; j++ ){
			printf("%d", adjacentMateix[i][j]);
		}
		printf("\n");
	}*/
	fclose(fReadAdjacent);
}

//计算每个用户的度. 没有使用0号元素,这样可以使得下标表示用户的编号.
void calculateDegreeOfUSer(){
	int i, j;
	for( i = 0; i < SIZE_USER; i++ ){
		for( j = 0; j < SIZE_MOVIE; j++ ){
			degreeOfUser[i+1] += adjacentMateix[j][i];
		}
	}
	//test
	/*
	for( i = 1; i <= SIZE_USER; i++ ){
		printf("%6d", degreeOfUser[i]);
		if( i % 10 == 0 )
			printf("   row:%d\n", i/10);
	}
	*/
}

/*
 * 计算S矩阵:值是每两个用户的相似度
 * */
void calculateSimilarity(){
	int i;
	int j;
	int l;
	int molecule = 0;
	int denominator = 0;
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			molecule = 0;
			denominator = 0;
			for( l = 0; l < SIZE_MOVIE; l++ ){
				molecule = molecule + adjacentMateix[l][i-1] * adjacentMateix[l][j-1];
			}
			denominator = (degreeOfUser[i] < degreeOfUser[j]) ? degreeOfUser[i] : degreeOfUser[j];
			//用户的不为0!natural
			//printf("\n%d %d %d\n", degreeOfUser[i], degreeOfUser[j], denominator);
			similarity[i][j] = (double)molecule/denominator;
		}
	}
}

/*
 * 将S矩阵的结果输入文件中
 * */
void writeSimilarityToFile(){
	FILE* fOut;
	if( NULL == (fOut = fopen("similarityMatrix.data", "w"))){
		printf("open file(similarityMatrix.data) error");
		exit(0);
	}
	int i;
	int j;
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			fprintf(fOut, "%10.6f ", similarity[i][j]);
		}
		fprintf(fOut, "\n");
	}
	fclose(fOut);
}

/*
 * 打印S矩阵
 * */
void showSimilarity(){
	int i;
	int j;
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			printf("%10.6f", similarity[i][j]);
		}
		printf("---------%d\n", i);
	}
}</span><span style="font-size:18px;font-family: Times-Roman;">
</span>
#include<stdio.h>
#include<stdlib.h>

#define SIZE_USER 	943
#define SIZE_MOVIE 	1682
#define SIZE_PROBE_SET 	8252

typedef struct ProbeItem{
	int user;
	int movie;
}ProbeItem; 

typedef struct UnselectedMovieListOFUser{
	int user;		//用户的ID
	int unselectedNumber;	//用户没有选择过的电影的个数
	int list[SIZE_MOVIE+1];	//用户没有选择过的电影的列表:电影的ID列表
}UnselectedMovieListOFUser;

typedef struct MovieIdAndScore{
	int movieId;
	double score;
}MovieIdAndScore;

ProbeItem probeItem[SIZE_PROBE_SET];
UnselectedMovieListOFUser unselectedMovieListOfuser[SIZE_USER+1];	//用户没有选择的电影的list
double similarityMatrix[SIZE_USER + 1][SIZE_USER + 1];			//用户之间的相似矩阵
int unselectedMovieNumberOfUser[SIZE_USER + 1];				//用户没有选择的电影的number
int adjacentMatrix[SIZE_MOVIE+1][SIZE_USER+1];				//邻接矩阵
MovieIdAndScore movieIdAndScore[SIZE_MOVIE + 1][SIZE_USER + 1];		//用来存放每一个用户没有选择过的电影和电影的预测分数

double ratio[SIZE_PROBE_SET + 1];

void readSimilarityMatrix();
void readAdjacentMatrix();
void readUnSelectedMovieNumberOfUser();	/*
					 *  这个信息表明的是每个用户没有选择的电影的个数,
					 *  现在已经不足够,我们需要知道的是每个用户具体
					 *  没有选择过的电影是哪些。
					 */
void readUnselectedMoviteItemOfUser();
void calculateScore_Matrix();
void printMovieIdAndScoreMatrix();
double calculateScore(int user, int movie);
void sortUnselectedMovieOfUser();
void readProbeSet();
void CF();
int rankOfMovie(int user, int movie);

int main(){
	readAdjacentMatrix();
	readSimilarityMatrix();
	readUnSelectedMovieNumberOfUser();

	readUnselectedMoviteItemOfUser();
	calculateScore_Matrix();
	sortUnselectedMovieOfUser();
	//printMovieIdAndScoreMatrix();
	readProbeSet();
	CF();
	return 0;
}

/*
 * 读出用户之间的相似矩阵s[i][j]表明的是用户i和用户j的相似度
 */
void readSimilarityMatrix(){
	FILE* fIn;
	int i;
	int j;
	if( NULL == (fIn = fopen("similarityMatrix.data", "r"))){
		printf("open file(similaityMatrix.data) error\n");
		exit(0);
	}
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			if( 1 != fscanf(fIn, "%lf ", &similarityMatrix[i][j])){
				printf("fscanf error: %d %d\n", i,j);
				exit(0);
			}
		}
	}
	printf("read over\n");
	/*
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			printf("%10.6f", similarityMatrix[i][j]);
		}
		printf("--------------%d\n", i);
	}
	*/
	fclose(fIn);
}

/*
 * 读入用户的没有选择过的电影的个数
 * */
void readUnSelectedMovieNumberOfUser(){
	FILE* fIn;
	int i;
	int skipIndex;
	if( NULL == (fIn = fopen("unselectedMovieNumberForUser.data", "r"))){
		printf("open file(unselected) error!");
		exit(0);
	}
	for( i = 1; i <= SIZE_USER; i++ ){
		if( 2 != fscanf(fIn, "%d %d", &skipIndex, &unselectedMovieNumberOfUser[i])){
			printf("fscanf error:%d", i);
			exit(0);
		}
	}
	/*
	for( i = 1; i <= SIZE_USER; i++ ){
		printf("%6d", unselectedMovieNumberOfUser[i]);
		if( i % 10 == 0 )
			printf("   row:%d\n", i/10);
	}
	printf("\n");
	*/
	fclose(fIn);
}

/*
 * 读入每个用户的没有选择过的电影是哪些,
 * */
void readUnselectedMoviteItemOfUser(){
	/*
	FILE* fIn;
	if( NULL == (fIn = fopen("matrix.data", "r"))){
		printf("open file(matrix.data) error!\n");
		exit(0);
	}
	int i,j;
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			if( 1 != fscanf(fIn, "%d ", &adjacentMatrix[i][j])){
				printf("fscanf error:%d %d", i, j);
				exit(0);
			}
		}
	}
	*/
	int i,j;
	//readAdjacentMatrix();
	for( i = 1; i <= SIZE_USER; i++ ){
		unselectedMovieListOfuser[i].user = i;
		for( j = 1; j <= SIZE_MOVIE; j++ ){
			if( adjacentMatrix[j][i] == 0 ){			//没有选过的电影
				unselectedMovieListOfuser[i].unselectedNumber += 1;
				unselectedMovieListOfuser[i].list[j] = j;	//记录了没有选择的电影的代号
										//用户i已经选择的电影在list
										//中的值为0
			}
			//printf("---------%d\n", unselectedMovieListOfuser[2].list[j]);
		}
		//printf("%d\n", unselectedMovieListOfuser[i].unselectedNumber);
	}

	//fclose(fIn);
	//得到了每一个用户ui的没有选择过的电影列表的数据
	/*
	 *
	 * 测试用户i的list
	 *

	for( i = 0; i < SIZE_MOVIE; i++ ){
		printf("%d\t%d\n", unselectedMovieListOfuser[2].list[i], adjacentMatrix[i][2]);
	}
	 * */
}

/*
 * 计算每一个用户每一个没有选择的电影的score
 * */
void calculateScore_Matrix(){
	int i;
	int j;
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_MOVIE; j++ ){
			movieIdAndScore[j][i].movieId = unselectedMovieListOfuser[i].list[j];
			if( movieIdAndScore[j][i].movieId != 0 ){	//没有选过的电影,才要计算分数,
									//选择过的电影的分数是0
				movieIdAndScore[j][i].score = calculateScore(i, j);
			}
		}
	}
}

void printMovieIdAndScoreMatrix(){
	int i;
	int j;
	int count = 0;
	for( i = 1; i <= SIZE_USER; i++ ){
		printf(" user ----- %d \n", i);
		count = 0;
		for( j = 1; j <= SIZE_MOVIE; j++ ){
			printf("%10.6f", movieIdAndScore[j][i].score);
			count++;
		}
		printf("%d  \n\n", count);
	}
}

double calculateScore(int user, int movie){
	//printf("   caluclateScore %d %d\n", user, movie);
	double molecular = 0.0;
	double denominator = 0.0;
	int i;
	for( i = 1; i <= SIZE_USER; i++ ){
		if( user != i ){
			molecular = (double)molecular + similarityMatrix[i][user] * adjacentMatrix[movie][i];
		}
	}
	for( i = 1; i <= SIZE_USER; i++ ){
		if( user != i ){
			denominator = denominator + similarityMatrix[i][user];
		}
	}
	return molecular/denominator;
}

/*
 * 读adjacentMatrix
 * */
void readAdjacentMatrix(){
	FILE* fIn;
	if( NULL == (fIn = fopen("matrix.data", "r"))){
		printf("open file(matrix.data) error!\n");
		exit(0);
	}
	int i,j;
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			if( 1 != fscanf(fIn, "%d ", &adjacentMatrix[i][j])){
				printf("fscanf error:%d %d", i, j);
				exit(0);
			}
		}
	}
	fclose(fIn);
}

/*
 * 对每个用户的没有选择过的电影按照分数进行排序
 * */
void sortUnselectedMovieOfUser(){
	int i;
	int j;
	int k;
	int max_index;
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j < SIZE_MOVIE; j++ ){
			max_index = j;
			for( k = j + 1; k <= SIZE_MOVIE; k++ ){
				if( movieIdAndScore[k][i].score > movieIdAndScore[max_index][i].score )
					max_index = k;
			}
			if( j != max_index ){
				movieIdAndScore[0][i].score = movieIdAndScore[j][i].score;
				movieIdAndScore[0][i].movieId = movieIdAndScore[j][i].movieId;
				movieIdAndScore[j][i].score = movieIdAndScore[max_index][i].score;
				movieIdAndScore[j][i].movieId = movieIdAndScore[max_index][i].movieId;
				movieIdAndScore[max_index][i].score = movieIdAndScore[0][i].score;
				movieIdAndScore[max_index][i].movieId = movieIdAndScore[0][i].movieId;
			}
		}
	}
}
/*
void SelectSrot(int a[], int n){
	int i,j,min_index,temp;
	for( i = 0; i < n - 1; i++ ){
		min_index = i;
		for( j = i+1; j < n; j++ ){
			if( a[j] < a[min_index] )
				min_index = j;
		}
		if( min_index != i ){
			temp = a[i];
			a[i] = a[min_index];
			a[min_index] = temp;
		}
	}
}
*/

void readProbeSet(){
	FILE* fIn;
	int i;
	int skipDegree;
	if( NULL == (fIn = fopen("porbeSet.data", "r"))){
		printf("open file(probeSet.data) error!\n");
		exit(0);
	}
	for( i = 0; i < SIZE_PROBE_SET; i++ ){
		if( 3 != fscanf(fIn, "%d %d %d", &probeItem[i].user, &probeItem[i].movie, &skipDegree)){
			printf("fscanf error %d\n", i);
			exit(0);
		}
	}
	/*
	for( i = 0; i < SIZE_PROBE_SET; i++ ){
		printf("%10d%10d\n", probeItem[i].user, probeItem[i].movie);
	}
	*/
}

void CF(){
	int i;
	int user;
	int movie;
	double total = 0.0;
	for( i = 0; i < SIZE_PROBE_SET; i++ ){
		user = probeItem[i].user;
		movie = probeItem[i].movie;
		ratio[i] = (double) rankOfMovie(user, movie) / unselectedMovieNumberOfUser[user];
		total = total + ratio[i];
	}
	printf("%10.6f\n", total/SIZE_PROBE_SET);
}

/*
 * 返回用户user的个人电影排行榜中的movie的电影排行
 * */
int rankOfMovie(int user, int movie){
	int i;
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		if( movieIdAndScore[i][user].movieId == movie ){
			return i;
		}
	}
}

NBI

/*
 * 计算NBI中的W矩阵
 * 	W矩阵是针对object即电影的。
 * */
#include<stdio.h>
#include<stdlib.h>

#define USER_SIZE 	943
#define MOVIE_SIZE 	1682
#define PROBESET_SIZE	8252

typedef struct UnselectedMovieOfUser{
	int user;			//用户的ID
	int count_unselected;		//没有选择过的电影的数量
	int list[MOVIE_SIZE + 1];	//没有选择过的电影的列表,即电影的ID
}UnselectedMovieOfUser;

typedef struct ScoreOfUnselectedMovie{
	int movieId;			//电影的Id
	double score;			//电影的评分
}ScoreOfUnselectedMovie;

typedef struct Item{			//用于存放ProbeSet中的数据
	int user;
	int movie;
}Item;

ScoreOfUnselectedMovie scoreOfUnselectedMovie[MOVIE_SIZE + 1][USER_SIZE + 1];

UnselectedMovieOfUser unselectMovieOfUser[USER_SIZE + 1];
Item item[PROBESET_SIZE];

int adjacentMatrix[MOVIE_SIZE + 1][USER_SIZE + 1];	//用来存放邻接矩阵
double wMatrix[MOVIE_SIZE + 1][MOVIE_SIZE + 1];		//用来存放W矩阵w[i,j]: how likely j to choose i
int degreeOfUser[USER_SIZE + 1];			//用户的度
int degreeOfMoive[MOVIE_SIZE + 1];			//电影的度
double ratio[PROBESET_SIZE];

void readAdjacentMatrix();
void calculateDegreeOFUser();
void calculateDegreeOfMovie();
void calculateWMatrix();
double calculateW_molecular(int f,int s);
void statisticsUnselectedMovieOfUser();
void calculateScoreForUnselectedMovieOFUser();
double calculateScore(int user, int movie);
void sort();
void printSortResult();
void readProbeSet();
void NBI();
int getRank(int user, int movie);

int main(){
	readAdjacentMatrix();
	calculateDegreeOfMovie();
	calculateDegreeOFUser();
	calculateWMatrix();
	statisticsUnselectedMovieOfUser();
	calculateScoreForUnselectedMovieOFUser();
	sort();
	//printSortResult();
	readProbeSet();
	NBI();
	return 0;
}
/*
 * 读入邻接矩阵
 * */
void readAdjacentMatrix(){
	FILE* fin;
	if( NULL == (fin = fopen("matrix.data", "r"))){
		printf("open file(matrix.data) error!");
		exit(0);
	}
	int i;
	int j;
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= USER_SIZE; j++ ){
			if( 1 != fscanf(fin, "%d ", &adjacentMatrix[i][j])){
				printf("fscanf error::%d\t%d", i,j);
				exit(0);
			}
		}
	}
	//test
	/*
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		printf("%d  %d\n",i,  adjacentMatrix[i][1]);
	}
	printf("aa\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= USER_SIZE; j++ ){
			printf("%d ", adjacentMatrix[i][j]);
		}
		printf("\n");
	}
	*/
}
/*
 * 计算用户的度
 * */
void calculateDegreeOFUser(){
	int i;
	int j;
	for( i = 1; i <= USER_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			degreeOfUser[i] = degreeOfUser[i] + adjacentMatrix[j][i];
		}
	}
	//test
	/*
	printf("\ndegree of User\n");
	for( i = 1; i <= USER_SIZE; i++ ){
		printf("%5d", degreeOfUser[i]);
		if( i % 10 == 0 )
			printf("------%d\n", i/10);
	}
	*/
}

/*
 * 计算电影的度
 * */
void calculateDegreeOfMovie(){
	int i;
	int j;
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= USER_SIZE; j++ ){
			degreeOfMoive[i] = degreeOfMoive[i] + adjacentMatrix[i][j];
		}
	}
	//test
	/*
	printf("\ndegree of Movie\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		printf("%5d", degreeOfMoive[i]);
		if( i % 10 == 0 )
			printf("---------%d\n", i/10);
	}
	*/
}
/*
 * 计算 W 矩阵
 * */
void calculateWMatrix(){
	int i;
	int j;
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			wMatrix[i][j] = 0.0;
		}
	}
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		//printf("%d ", i);
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			if( degreeOfMoive[j] == 0 ){
				continue;
			}
			wMatrix[i][j] = calculateW_molecular(i,j)/degreeOfMoive[j];
		}
	}
	//test
	/*
	printf("\n------------------W Matrix----------------\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			printf("%10.6f", wMatrix[i][j]);
		}
		printf("---------%d\n", i);
	}
	*/
}
double calculateW_molecular(int f, int s){
	int i;
	int j;
	double sum = 0.0;
	for( i = 1; i <= USER_SIZE; i++ ){
		//printf("###%d %d %d\n", adjacentMatrix[f][i], adjacentMatrix[s][i], degreeOfUser[i]);
		sum = sum + ( (adjacentMatrix[f][i]*adjacentMatrix[s][i])/(double)degreeOfUser[i] );
	}
	//test
	return sum;
}

/*
 * 统计用户的没有选择过的电影的列表
 * 	初始化结构体数组: unselectMovieOfUser
 * 		typedef struct UnselectedMovieOfUser{
			int user;			//用户的ID
			int count_unselected;		//没有选择过的电影的数量
			int list[MOVIE_SIZE + 1];	//没有选择过的电影的列表,即电影的ID
		}UnselectedMovieOfUser;
 * */
void statisticsUnselectedMovieOfUser(){
	int i;
	int j;
	for( i = 1; i <= USER_SIZE; i++ ){
		unselectMovieOfUser[i].user = i;
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			if( 0 == adjacentMatrix[j][i] ){			//用户没有选过的电影, 计算其f'(oj)的值
				unselectMovieOfUser[i].count_unselected += 1;	//用户i的没有选择的电影数+1
				unselectMovieOfUser[i].list[j] = j;		//用户i的list数组中的j = j,表示第j部电影的度不为0
			}
		}
	}

	//test
	/*
	printf("below should be equal\n");
	for( i = 1; i <= USER_SIZE; i++ ){
		printf("%5d", unselectMovieOfUser[i].count_unselected);
		if( i % 10 == 0 )
			printf(" --------row%d\n", i / 10);
	}
	printf("\n");
	for( i = 1; i <= USER_SIZE; i++ ){
		printf("%5d", MOVIE_SIZE - degreeOfUser[i]);
		if( i % 10 == 0 ){
			printf(" --------row%d\n", i / 10);
		}
	}
	*/

}
/*
 * 对用户没有选择过的电影计算分数
 * 	初始化结构体数组: scoreOfUnselectedMovie
 * 			typedef struct ScoreOfUnselectedMovie{
				int movieId;			//电影的Id
				double score;			//电影的评分
			}ScoreOfUnselectedMovie;
 * */
void calculateScoreForUnselectedMovieOFUser(){
	int i;
	int j;
	for( i = 1; i <= USER_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			scoreOfUnselectedMovie[j][i].movieId = unselectMovieOfUser[i].list[j];
			if( scoreOfUnselectedMovie[j][i].movieId != 0 ){	//没有选过的电影
				scoreOfUnselectedMovie[j][i].score = calculateScore(i, j);
			}else{							//
				scoreOfUnselectedMovie[j][i].movieId = j;
			}
		}
	}

	/*
	printf("\n###############\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= USER_SIZE; j++ ){
			printf("%d ", adjacentMatrix[i][j]);
		}
		printf("\n");
	}

	for( i = 1; i <= USER_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			printf("%10.6f", scoreOfUnselectedMovie[j][i].score);
		}
		printf("-------------------%d\n\n", i);
	}
	*/
}
/*
 * 上一个函数的子函数
 * */
double calculateScore(int user, int movie){
	int i;
	double score = 0.0;
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		score = score + wMatrix[movie][i] * adjacentMatrix[i][user];
	}
	return score;
}
/*
 * 对用户没有选择过的电影按照得分进行排名
 * */
void sort(){
	int i;
	int j;
	int k;
	int max_index = 0;
	for( i = 1; i <= USER_SIZE; i++ ){
		for( j = 1; j < MOVIE_SIZE; j++ ){
			max_index = j;
			for( k = j + 1; k <= MOVIE_SIZE; k++ ){
				if( scoreOfUnselectedMovie[k][i].score > scoreOfUnselectedMovie[max_index][i].score ){
					max_index = k;
				}
			}
			if( max_index != j ){
				scoreOfUnselectedMovie[0][i].movieId = scoreOfUnselectedMovie[j][i].movieId;
				scoreOfUnselectedMovie[0][i].score = scoreOfUnselectedMovie[j][i].score;
				scoreOfUnselectedMovie[j][i].movieId = scoreOfUnselectedMovie[max_index][i].movieId;
				scoreOfUnselectedMovie[j][i].score = scoreOfUnselectedMovie[max_index][i].score;
				scoreOfUnselectedMovie[max_index][i].movieId = scoreOfUnselectedMovie[0][i].movieId;
				scoreOfUnselectedMovie[max_index][i].score = scoreOfUnselectedMovie[0][i].score;
			}
		}
	}
}
/*
 * 测试函数:输出一下排序的结果
 * */
void printSortResult(){
	int i;
	int j;
	for( i = 1; i <= USER_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			printf("%d\t%10.6f\n",scoreOfUnselectedMovie[j][i].movieId, scoreOfUnselectedMovie[j][i].score);
		}
		printf("----------------uer %d\n", i);
	}
}

/*
 * 读取peobeSet中的记录
 * */
void readProbeSet(){
	FILE* fin;
	if( NULL == (fin = fopen("porbeSet.data", "r"))){
		printf("open file(porbeSet.data) error!\n");
		exit(0);
	}
	int i;
	int skiprate;
	for( i = 0; i < PROBESET_SIZE; i++ ){
		if( 3 != fscanf(fin, "%d %d %d", &item[i].user, &item[i].movie, &skiprate)){
			printf("fscanf error: %d\n", i);
			exit(0);
		}
	}
	fclose(fin);
	//test
	/*
	for( i = 0; i < PROBESET_SIZE; i++ ){
		printf("%d\t%d\t--------%d\n", item[i].user, item[i].movie, i+1);
	}
	*/
}

void NBI(){
	int i;
	int user;
	int movie;
	double final_result = 0.0;
	/*
	printf("aaaaaaaaa\n");
	for( i = 1; i <= USER_SIZE; i++ ){
		printf("%5d", unselectMovieOfUser[i].count_unselected);
		if( i % 10 == 0 )
			printf(" --------row%d\n", i / 10);
	}
	*/
	printf("\n");
	printf("\n\n");
	for( i = 0; i < PROBESET_SIZE; i++ ){
		user = item[i].user;
		movie = item[i].movie;
		//printf("%d\t%d\t", user, movie);
		ratio[i] = (double)getRank(user, movie)/(MOVIE_SIZE - degreeOfUser[user]);
		//printf("%d\t%d\t%d\t\n", getRank(user, movie), unselectMovieOfUser[user].count_unselected,MOVIE_SIZE - degreeOfUser[user]);
	}
	/*
	printf("ratio array\n");
	for( i = 0; i < PROBESET_SIZE; i++ ){
		printf("%10.6f", ratio[i]);
	}
	*/
	for( i = 0; i < PROBESET_SIZE; i++ ){
		final_result = final_result + ratio[i];
	}
	printf("\n");
	printf("%10.6f", final_result);
	printf("%10.6f\n", (double)final_result/PROBESET_SIZE);
}

/*
 * 得到指定的用户(参数user)的指定的电影(参数movie)的排名
 * */
int getRank(int user, int movie){
	int i;
	int j;
	double score;
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		if( scoreOfUnselectedMovie[i][user].movieId == movie ){
			score = scoreOfUnselectedMovie[i][user].score;
			for( j = i; j > 0 && scoreOfUnselectedMovie[j][user].score == score; j-- );
				return j+1;
		}
	}
}
/*
int findMovieRank(int numberOfMovie){
	int i,j;
	int degree;
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		if( numberOfMovie == movieRank[i].movieId ){
			degree = movieRank[i].degree;
			for( j = i; j > 0 && movieRank[j].degree == degree; j-- );
				return j+1;
		}
	}
}
*/
时间: 2024-10-07 11:32:21

个性化推荐算法:GRM,CF,NBI的实现的相关文章

BAT大牛亲授-个性化推荐算法实战

第1章 个性化推荐算法综述 个性化推荐算法综述部分,主要介绍个性化推荐算法综述,本课程内容大纲以及本课程所需要准备的编程环境与基础知识. 1-1 个性化推荐算法综述 1-2 个性化召回算法综述 第2章 基于邻域的个性化召回算法LFM 本章节重点介绍一种基于邻域的个性化召回算法,LFM.从LFM算法的理论知识与数学原理进行介绍.并结合公开数据集,代码实战LFM算法. 2-1 LFM算法综述 2-2 LFM算法的理论基础与公式推导 2-3 基础工具函数的代码书写 2-4 LFM算法训练数据抽取 2-

产品学习之个性化推荐和热度算法详解

今日头条的走红带动了"个性化推荐"的概念,自此之后,内容型的产品,个性化算法就逐渐从卖点变为标配. 伴随着"机器学习","大数据"之类的热词和概念,产品的档次瞬间提高了很多.而各种推荐算法绝不仅仅是研发自己的任务,作为产品经理,必须深入到算法内部,参与算法的设计,以及结合内容对算法不断"调教",才能让产品的推荐算法不断完善,最终与自己的内容双剑合璧. 本文以新闻产品为例,结合了我之前产品从零积累用户的经验,整理了作为PM需要了

网易云音乐的歌单推荐算法

[转载]原文地址:https://www.zhihu.com/question/26743347 原文: 不是广告党,但我却成为网易云音乐的的重度患者,不管是黑红的用户界面,还是高质量音乐质量都用起来很舒服.我喜欢听歌,几乎每周不低于15小时,但其实听得不是特别多,并没有经常刻意地去搜歌名,所以曲目数量我并不是很在乎.但是比起其它,网音给我推荐的歌单几乎次次惊艳,而且大多都没听过,或者好久以前听过早就忘记了名字,或者之前不知道在哪听过 只是知道其中一部分旋律,根本不知道名字,等等,听起来整个人大

推荐算法相关

目录 推荐算法相关 推荐系统介绍 评估指标 评估方法 推荐系统发展 相关算法 LFM算法 Personal Rank算法 item2vec算法 Content Based LR + GBDT FM.FFM MLR WDL FFN PNN DeepFM DIN Deep & Cross Network(DCN) 推荐算法相关 推荐系统介绍 What:分类目录.搜索引擎.推荐系统 Why:需要在信息过载.用户需求不明确的背景下,留住用户和内容生产者,实现商业目标 评估指标 准确性 学界:RMSE.M

今日头条核心技术“个性推荐算法”揭秘

今日头条核心技术"个性推荐算法"揭秘 最近面试华兴资本, 他们比较关注今日头条算法的实现, 今天特转载网上 今日头条算法解密 [IT168 评论]互联网给用户带来了大量的信息,满足了用户在信息时代对信息的需求,也使得用户在面对大量信息时无法从中获得对自己真正有用的那部分信息,对信息的使用效率反而降低了,而通常解决 1月20日,新生代移动互联网企业今日头条在北京国家会议中心举办了"算数·年度数据发布会",数据发布会的主题名为"算数",实际上指的是&

聊聊淘宝天猫个性化推荐技术演进史

引言:个性化推荐技术直面用户,可以说是站在最前线的那个.如今,从用户打开手机淘宝客户端(简称"手淘")或是手机天猫客户端(简称"猫客")的那一刻起,个性化推荐技术就已经启动,为你我带来一场个性化的购物之旅.本文将细数个性化推荐的一路风雨,讲讲个性化推荐技术的演进史. 本文选自[<尽在双11--阿里巴巴技术演进与超越>. 1.个性化推荐All-in无线 无线个性化推荐起步于2013年10月.现在往回看,当时的阿里很好地把握住了移动端快速发展的浪潮,以集团A

协同过滤推荐算法的原理及实现

一.协同过滤算法的原理及实现 协同过滤推荐算法是诞生最早,并且较为著名的推荐算法.主要的功能是预测和推荐.算法通过对用户历史行为数据的挖掘发现用户的偏好,基于不同的偏好对用户进行群组划分并推荐品味相似的商品.协同过滤推荐算法分为两类,分别是基于用户的协同过滤算法(user-based collaboratIve filtering),和基于物品的协同过滤算法(item-based collaborative filtering).简单的说就是:人以类聚,物以群分.下面我们将分别说明这两类推荐算法

推荐算法入门

推荐算法大致能够分为下面几类 基于流行度的算法 协同过滤算法(user-based CF and item-based CF) 基于内容的算法(content-based) 基于模型的算法 混合算法 1. 基于流行度的算法 能够依照一个项目的流行度进行排序,将最流行的项目推荐给用户.比方在微博推荐中.将最为流行的大V用户推荐给普通用户. 微博每日都有最热门话题榜等等. 算法十分简单,通过简单热度排序就可以. 文章> Predict Whom One Will Follow:Followee Re

推荐算法学习笔记

推荐算法举个简单的例子,比如有个用户进来看了一堆内容,我们把他看的所有的历史行为,嵌入到推荐引擎当中去.这个推荐引擎就会生成个性化的频道,下次这个用户再登录,或者都不用下一次,过几分钟之后,他看到的内容就会根据他最近发生的历史行为发生变化,这就是推荐系统的基本逻辑.这种方法叫基于用户行为的推荐,当然是有一定局限性的.比如你只有一个用户行为的时候,你就不知道他会不会看一个从来没人看过的内容,这其实就是长尾问题.当你可以积累越来越多的用户,用户的历史行为会有助于你对长尾内容的理解. 推荐系统本质是在