mirror of
https://github.com/apachecn/ailearning.git
synced 2026-05-16 14:04:18 +08:00
Merge branch 'master' of https://github.com/apachecn/MachineLearning
This commit is contained in:
@@ -80,14 +80,14 @@ def smoSimple(dataMatIn, classLabels, C, toler, maxIter):
|
|||||||
dataMatrix = mat(dataMatIn); labelMat = mat(classLabels).transpose()
|
dataMatrix = mat(dataMatIn); labelMat = mat(classLabels).transpose()
|
||||||
b = 0; m,n = shape(dataMatrix)
|
b = 0; m,n = shape(dataMatrix)
|
||||||
alphas = mat(zeros((m,1)))
|
alphas = mat(zeros((m,1)))
|
||||||
iter = 0
|
iter = 0 # 没有任何alpha改变的情况下遍历数据的次数
|
||||||
while (iter < maxIter):
|
while (iter < maxIter):
|
||||||
alphaPairsChanged = 0
|
alphaPairsChanged = 0 #记录alpha是否已经进行优化,每次循环时设为0,然后再对整个集合顺序遍历
|
||||||
for i in range(m):
|
for i in range(m):
|
||||||
fXi = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[i,:].T)) + b
|
fXi = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[i,:].T)) + b # 我们预测的类别
|
||||||
Ei = fXi - float(labelMat[i])#if checks if an example violates KKT conditions
|
Ei = fXi - float(labelMat[i])#if checks if an example violates KKT conditions 误差
|
||||||
if ((labelMat[i]*Ei < -toler) and (alphas[i] < C)) or ((labelMat[i]*Ei > toler) and (alphas[i] > 0)):
|
if ((labelMat[i]*Ei < -toler) and (alphas[i] < C)) or ((labelMat[i]*Ei > toler) and (alphas[i] > 0)):
|
||||||
j = selectJrand(i,m)
|
j = selectJrand(i,m) # 误差很大时进行优化
|
||||||
fXj = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[j,:].T)) + b
|
fXj = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[j,:].T)) + b
|
||||||
Ej = fXj - float(labelMat[j])
|
Ej = fXj - float(labelMat[j])
|
||||||
alphaIold = alphas[i].copy(); alphaJold = alphas[j].copy()
|
alphaIold = alphas[i].copy(); alphaJold = alphas[j].copy()
|
||||||
@@ -195,18 +195,15 @@ def innerL(i, oS):
|
|||||||
else:
|
else:
|
||||||
L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)
|
L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)
|
||||||
H = min(oS.C, oS.alphas[j] + oS.alphas[i])
|
H = min(oS.C, oS.alphas[j] + oS.alphas[i])
|
||||||
if L == H: print
|
if L == H: print("L==H")
|
||||||
"L==H";
|
|
||||||
return 0
|
return 0
|
||||||
eta = 2.0 * oS.K[i, j] - oS.K[i, i] - oS.K[j, j] # changed for kernel
|
eta = 2.0 * oS.K[i, j] - oS.K[i, i] - oS.K[j, j] # changed for kernel
|
||||||
if eta >= 0: print
|
if eta >= 0: print("eta>=0")
|
||||||
"eta>=0";
|
|
||||||
return 0
|
return 0
|
||||||
oS.alphas[j] -= oS.labelMat[j] * (Ei - Ej) / eta
|
oS.alphas[j] -= oS.labelMat[j] * (Ei - Ej) / eta
|
||||||
oS.alphas[j] = clipAlpha(oS.alphas[j], H, L)
|
oS.alphas[j] = clipAlpha(oS.alphas[j], H, L)
|
||||||
updateEk(oS, j) # added this for the Ecache
|
updateEk(oS, j) # added this for the Ecache
|
||||||
if (abs(oS.alphas[j] - alphaJold) < 0.00001): print
|
if (abs(oS.alphas[j] - alphaJold) < 0.00001): print("j not moving enough")
|
||||||
"j not moving enough";
|
|
||||||
return 0
|
return 0
|
||||||
oS.alphas[i] += oS.labelMat[j] * oS.labelMat[i] * (alphaJold - oS.alphas[j]) # update i by the same amount as j
|
oS.alphas[i] += oS.labelMat[j] * oS.labelMat[i] * (alphaJold - oS.alphas[j]) # update i by the same amount as j
|
||||||
updateEk(oS, i) # added this for the Ecache #the update is in the oppostie direction
|
updateEk(oS, i) # added this for the Ecache #the update is in the oppostie direction
|
||||||
@@ -235,22 +232,19 @@ def smoP(dataMatIn, classLabels, C, toler, maxIter, kTup=('lin', 0)): # full Pl
|
|||||||
if entireSet: # go over all
|
if entireSet: # go over all
|
||||||
for i in range(oS.m):
|
for i in range(oS.m):
|
||||||
alphaPairsChanged += innerL(i, oS)
|
alphaPairsChanged += innerL(i, oS)
|
||||||
print
|
print("fullSet, iter: %d i:%d, pairs changed %d" % (iter, i, alphaPairsChanged))
|
||||||
"fullSet, iter: %d i:%d, pairs changed %d" % (iter, i, alphaPairsChanged)
|
|
||||||
iter += 1
|
iter += 1
|
||||||
else: # go over non-bound (railed) alphas
|
else: # go over non-bound (railed) alphas
|
||||||
nonBoundIs = nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0]
|
nonBoundIs = nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0]
|
||||||
for i in nonBoundIs:
|
for i in nonBoundIs:
|
||||||
alphaPairsChanged += innerL(i, oS)
|
alphaPairsChanged += innerL(i, oS)
|
||||||
print
|
print("non-bound, iter: %d i:%d, pairs changed %d" % (iter, i, alphaPairsChanged))
|
||||||
"non-bound, iter: %d i:%d, pairs changed %d" % (iter, i, alphaPairsChanged)
|
|
||||||
iter += 1
|
iter += 1
|
||||||
if entireSet:
|
if entireSet:
|
||||||
entireSet = False # toggle entire set loop
|
entireSet = False # toggle entire set loop
|
||||||
elif (alphaPairsChanged == 0):
|
elif (alphaPairsChanged == 0):
|
||||||
entireSet = True
|
entireSet = True
|
||||||
print
|
print("iteration number: %d" % iter)
|
||||||
"iteration number: %d" % iter
|
|
||||||
return oS.b, oS.alphas
|
return oS.b, oS.alphas
|
||||||
|
|
||||||
|
|
||||||
@@ -272,16 +266,14 @@ def testRbf(k1=1.3):
|
|||||||
svInd = nonzero(alphas.A > 0)[0]
|
svInd = nonzero(alphas.A > 0)[0]
|
||||||
sVs = datMat[svInd] # get matrix of only support vectors
|
sVs = datMat[svInd] # get matrix of only support vectors
|
||||||
labelSV = labelMat[svInd];
|
labelSV = labelMat[svInd];
|
||||||
print
|
print("there are %d Support Vectors" % shape(sVs)[0])
|
||||||
"there are %d Support Vectors" % shape(sVs)[0]
|
|
||||||
m, n = shape(datMat)
|
m, n = shape(datMat)
|
||||||
errorCount = 0
|
errorCount = 0
|
||||||
for i in range(m):
|
for i in range(m):
|
||||||
kernelEval = kernelTrans(sVs, datMat[i, :], ('rbf', k1))
|
kernelEval = kernelTrans(sVs, datMat[i, :], ('rbf', k1))
|
||||||
predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b
|
predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b
|
||||||
if sign(predict) != sign(labelArr[i]): errorCount += 1
|
if sign(predict) != sign(labelArr[i]): errorCount += 1
|
||||||
print
|
print("the training error rate is: %f" % (float(errorCount) / m))
|
||||||
"the training error rate is: %f" % (float(errorCount) / m)
|
|
||||||
dataArr, labelArr = loadDataSet('testSetRBF2.txt')
|
dataArr, labelArr = loadDataSet('testSetRBF2.txt')
|
||||||
errorCount = 0
|
errorCount = 0
|
||||||
datMat = mat(dataArr);
|
datMat = mat(dataArr);
|
||||||
@@ -291,8 +283,7 @@ def testRbf(k1=1.3):
|
|||||||
kernelEval = kernelTrans(sVs, datMat[i, :], ('rbf', k1))
|
kernelEval = kernelTrans(sVs, datMat[i, :], ('rbf', k1))
|
||||||
predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b
|
predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b
|
||||||
if sign(predict) != sign(labelArr[i]): errorCount += 1
|
if sign(predict) != sign(labelArr[i]): errorCount += 1
|
||||||
print
|
print("the test error rate is: %f" % (float(errorCount) / m))
|
||||||
"the test error rate is: %f" % (float(errorCount) / m)
|
|
||||||
|
|
||||||
|
|
||||||
def img2vector(filename):
|
def img2vector(filename):
|
||||||
@@ -338,8 +329,7 @@ def testDigits(kTup=('rbf', 10)):
|
|||||||
kernelEval = kernelTrans(sVs, datMat[i, :], kTup)
|
kernelEval = kernelTrans(sVs, datMat[i, :], kTup)
|
||||||
predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b
|
predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b
|
||||||
if sign(predict) != sign(labelArr[i]): errorCount += 1
|
if sign(predict) != sign(labelArr[i]): errorCount += 1
|
||||||
print
|
print("the training error rate is: %f" % (float(errorCount) / m))
|
||||||
"the training error rate is: %f" % (float(errorCount) / m)
|
|
||||||
dataArr, labelArr = loadImages('testDigits')
|
dataArr, labelArr = loadImages('testDigits')
|
||||||
errorCount = 0
|
errorCount = 0
|
||||||
datMat = mat(dataArr);
|
datMat = mat(dataArr);
|
||||||
@@ -349,8 +339,7 @@ def testDigits(kTup=('rbf', 10)):
|
|||||||
kernelEval = kernelTrans(sVs, datMat[i, :], kTup)
|
kernelEval = kernelTrans(sVs, datMat[i, :], kTup)
|
||||||
predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b
|
predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b
|
||||||
if sign(predict) != sign(labelArr[i]): errorCount += 1
|
if sign(predict) != sign(labelArr[i]): errorCount += 1
|
||||||
print
|
print("the test error rate is: %f" % (float(errorCount) / m))
|
||||||
"the test error rate is: %f" % (float(errorCount) / m)
|
|
||||||
|
|
||||||
|
|
||||||
'''#######********************************
|
'''#######********************************
|
||||||
@@ -416,18 +405,15 @@ def innerLK(i, oS):
|
|||||||
else:
|
else:
|
||||||
L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)
|
L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)
|
||||||
H = min(oS.C, oS.alphas[j] + oS.alphas[i])
|
H = min(oS.C, oS.alphas[j] + oS.alphas[i])
|
||||||
if L == H: print
|
if L == H: print("L==H")
|
||||||
"L==H";
|
|
||||||
return 0
|
return 0
|
||||||
eta = 2.0 * oS.X[i, :] * oS.X[j, :].T - oS.X[i, :] * oS.X[i, :].T - oS.X[j, :] * oS.X[j, :].T
|
eta = 2.0 * oS.X[i, :] * oS.X[j, :].T - oS.X[i, :] * oS.X[i, :].T - oS.X[j, :] * oS.X[j, :].T
|
||||||
if eta >= 0: print
|
if eta >= 0: print("eta>=0")
|
||||||
"eta>=0";
|
|
||||||
return 0
|
return 0
|
||||||
oS.alphas[j] -= oS.labelMat[j] * (Ei - Ej) / eta
|
oS.alphas[j] -= oS.labelMat[j] * (Ei - Ej) / eta
|
||||||
oS.alphas[j] = clipAlpha(oS.alphas[j], H, L)
|
oS.alphas[j] = clipAlpha(oS.alphas[j], H, L)
|
||||||
updateEk(oS, j) # added this for the Ecache
|
updateEk(oS, j) # added this for the Ecache
|
||||||
if (abs(oS.alphas[j] - alphaJold) < 0.00001): print
|
if (abs(oS.alphas[j] - alphaJold) < 0.00001): print("j not moving enough")
|
||||||
"j not moving enough";
|
|
||||||
return 0
|
return 0
|
||||||
oS.alphas[i] += oS.labelMat[j] * oS.labelMat[i] * (alphaJold - oS.alphas[j]) # update i by the same amount as j
|
oS.alphas[i] += oS.labelMat[j] * oS.labelMat[i] * (alphaJold - oS.alphas[j]) # update i by the same amount as j
|
||||||
updateEk(oS, i) # added this for the Ecache #the update is in the oppostie direction
|
updateEk(oS, i) # added this for the Ecache #the update is in the oppostie direction
|
||||||
|
|||||||
Reference in New Issue
Block a user