mirror of
https://github.com/apachecn/ailearning.git
synced 2026-02-12 06:46:14 +08:00
Merge branch 'master' of https://github.com/apachecn/MachineLearning
This commit is contained in:
45
src/python/13.PCA/pca.py
Normal file
45
src/python/13.PCA/pca.py
Normal file
@@ -0,0 +1,45 @@
|
||||
#!/usr/bin/python
|
||||
# coding:utf8
|
||||
|
||||
'''
|
||||
Created on Jun 1, 2011
|
||||
Update on 2017-04-06
|
||||
@author: Peter Harrington/片刻
|
||||
'''
|
||||
print(__doc__)
|
||||
from numpy import *
|
||||
|
||||
|
||||
def loadDataSet(fileName, delim='\t'):
|
||||
fr = open(fileName)
|
||||
stringArr = [line.strip().split(delim) for line in fr.readlines()]
|
||||
datArr = [map(float,line) for line in stringArr]
|
||||
return mat(datArr)
|
||||
|
||||
|
||||
def pca(dataMat, topNfeat=9999999):
|
||||
meanVals = mean(dataMat, axis=0)
|
||||
meanRemoved = dataMat - meanVals #remove mean
|
||||
covMat = cov(meanRemoved, rowvar=0)
|
||||
eigVals,eigVects = linalg.eig(mat(covMat))
|
||||
eigValInd = argsort(eigVals) #sort, sort goes smallest to largest
|
||||
eigValInd = eigValInd[:-(topNfeat+1):-1] #cut off unwanted dimensions
|
||||
redEigVects = eigVects[:,eigValInd] #reorganize eig vects largest to smallest
|
||||
lowDDataMat = meanRemoved * redEigVects#transform data into new dimensions
|
||||
reconMat = (lowDDataMat * redEigVects.T) + meanVals
|
||||
return lowDDataMat, reconMat
|
||||
|
||||
|
||||
def replaceNanWithMean():
|
||||
datMat = loadDataSet('secom.data', ' ')
|
||||
numFeat = shape(datMat)[1]
|
||||
for i in range(numFeat):
|
||||
meanVal = mean(datMat[nonzero(~isnan(datMat[:,i].A))[0],i]) #values that are not NaN (a number)
|
||||
datMat[nonzero(isnan(datMat[:,i].A))[0],i] = meanVal #set NaN values to mean
|
||||
return datMat
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
dataMat = loadDataSet('data/13.PCA/testSet.txt')
|
||||
lowDmat, reconMat = pca(dataMat, 1)
|
||||
print shape(lowDmat)
|
||||
Reference in New Issue
Block a user