更新 PCA.md文档

This commit is contained in:
jiangzhonglian
2017-08-28 14:32:03 +08:00
parent 21fbd14c76
commit c1fe6d683c
3 changed files with 113 additions and 5 deletions

View File

@@ -74,10 +74,11 @@ def pca(dataMat, topNfeat=9999999):
# -1表示倒序返回topN的特征值[-1 到 -(topNfeat+1) 但是不包括-(topNfeat+1)本身的倒叙]
eigValInd = eigValInd[:-(topNfeat+1):-1]
# print 'eigValInd2=', eigValInd
# 重组eig vects 最大到最小
# 重组 eigVects 最大到最小
redEigVects = eigVects[:, eigValInd]
# print 'redEigVects=', redEigVects.T
# 将数据转换到新空间
# print "---", shape(meanRemoved), shape(redEigVects)
lowDDataMat = meanRemoved * redEigVects
reconMat = (lowDDataMat * redEigVects.T) + meanVals
# print 'lowDDataMat=', lowDDataMat
@@ -114,10 +115,10 @@ def analyse_data(dataMat):
topNfeat = 20
eigValInd = eigValInd[:-(topNfeat+1):-1]
cov_all_score = sum(eigvals)
cov_all_score = float(sum(eigvals))
sum_cov_score = 0
for i in range(0, len(eigValInd)):
line_cov_score = eigvals[eigValInd[i]]
line_cov_score = float(eigvals[eigValInd[i]])
sum_cov_score += line_cov_score
'''
我们发现其中有超过20%的特征值都是0。
@@ -128,7 +129,7 @@ def analyse_data(dataMat):
最后我们可能会注意到有一些小的负值他们主要源自数值误差应该四舍五入成0.
'''
print '主成分:%s, 方差占比:%s%%, 累积方差占比:%s%%' % (format(i+1, '2.0f'), format(line_cov_score/cov_all_score*100, '4.1f'), format(sum_cov_score/cov_all_score*100, '4.1f'))
print '主成分:%s, 方差占比:%s%%, 累积方差占比:%s%%' % (format(i+1, '2.0f'), format(line_cov_score/cov_all_score*100, '4.2f'), format(sum_cov_score/cov_all_score*100, '4.1f'))
if __name__ == "__main__":