mirror of
https://github.com/apachecn/ailearning.git
synced 2026-02-11 14:26:04 +08:00
更新svdRec.py 添加示例
This commit is contained in:
@@ -1,30 +1,74 @@
|
||||
# coding=utf-8
|
||||
def loadExData():
|
||||
return[[1,1,1,0,0],
|
||||
[2,2,2,0,0],
|
||||
[1,1,1,0,0],
|
||||
[5,5,5,0,0],
|
||||
[1,1,0,2,2],
|
||||
[0,0,0,3,3],
|
||||
[0,0,0,1,1]]
|
||||
#!/usr/bin/python
|
||||
# encoding: utf-8
|
||||
|
||||
from numpy import *
|
||||
from numpy import *
|
||||
from numpy import linalg as la
|
||||
|
||||
|
||||
def loadExData():
|
||||
return[[1, 1, 1, 0, 0],
|
||||
[2, 2, 2, 0, 0],
|
||||
[1, 1, 1, 0, 0],
|
||||
[5, 5, 5, 0, 0],
|
||||
[1, 1, 0, 2, 2],
|
||||
[0, 0, 0, 3, 3],
|
||||
[0, 0, 0, 1, 1]]
|
||||
|
||||
|
||||
# 欧氏距离相似度,假定inA和inB 都是列向量
|
||||
# 计算向量的第二范式,相当于计算了欧氏距离
|
||||
def ecludSim(inA,inB):
|
||||
def ecludSim(inA, inB):
|
||||
return 1.0/(1.0 + la.norm(inA - inB))
|
||||
|
||||
|
||||
# pearsSim()函数会检查是否存在3个或更多的点。
|
||||
# corrcoef直接计算皮尔逊相关系数
|
||||
def pearsSim(inA,inB):
|
||||
def pearsSim(inA, inB):
|
||||
# 如果不存在,该函数返回1.0,此时两个向量完全相关。
|
||||
if len(inA)< 3 :return 1.0
|
||||
return 0.5 + 0.5*corrcoef(inA,inB,rowvar = 0)[0][1]
|
||||
if len(inA) < 3:
|
||||
return 1.0
|
||||
return 0.5 + 0.5*corrcoef(inA, inB, rowvar=0)[0][1]
|
||||
|
||||
|
||||
# 计算余弦相似度
|
||||
def cosSim(inA,inB):
|
||||
def cosSim(inA, inB):
|
||||
num = float(inA.T*inB)
|
||||
denom = la.norm(inA)*la.norm(inB)
|
||||
return 0.5 +0.5*(num/denom)
|
||||
return 0.5 + 0.5*(num/denom)
|
||||
|
||||
|
||||
# 基于物品相似度的推荐引擎
|
||||
# standEst()函数,用来计算在给定相似度计算方法的条件下,用户对物品的估计评分值。
|
||||
# standEst()函数的参数包括数据矩阵、用户编号、物品编号和相似度计算方法
|
||||
def standEst(dataMat, user, simMeas, item):
|
||||
n = shape(dataMat)[1]
|
||||
simTotal = 0.0
|
||||
ratSimTotal = 0.0
|
||||
for j in range(n):
|
||||
userRating = dataMat[user, j]
|
||||
if userRating == 0:
|
||||
continue
|
||||
# 寻找两个用户都评级的物品
|
||||
overLap = nonzero(logical_and(dataMat[:, item].A>0, dataMat[:, j].A>0))[0]
|
||||
if len(overLap) == 0:similarity =0
|
||||
else: similarity = simMeas(dataMat[overLap,item], \
|
||||
dataMat[overLap,j])
|
||||
#print 'the %d and %d similarity is : %f'(iten,j,similarity)
|
||||
simTotal += similarity
|
||||
ratSimTotal += similarity * userRating
|
||||
if simTotal == 0: return 0
|
||||
else: return ratSimTotal/simTotal
|
||||
|
||||
|
||||
#recommend()函数,就是推荐引擎,它会调用standEst()函数。
|
||||
def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
|
||||
# 寻找未评级的物品
|
||||
unratedItems = nonzero(dataMat[user, :].A == 0)[1]
|
||||
if len(unratedItems) == 0:
|
||||
return 'you rated everything'
|
||||
itemScores = []
|
||||
for item in unratedItems:
|
||||
estimatedScore = estMethod(dataMat, user, simMeas, item)
|
||||
# 寻找前N个未评级物品
|
||||
itemScores.append((item, estimatedScore))
|
||||
return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[: N]
|
||||
|
||||
Reference in New Issue
Block a user