diff --git a/src/python/6.SVM/svm-complete.py b/src/python/6.SVM/svm-complete.py index b537d385..e2e0c0cc 100644 --- a/src/python/6.SVM/svm-complete.py +++ b/src/python/6.SVM/svm-complete.py @@ -531,123 +531,3 @@ def testDigits(kTup=('rbf', 10)): predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b if sign(predict) != sign(labelArr[i]): errorCount += 1 print("the test error rate is: %f" % (float(errorCount) / m)) - - -'''#######******************************** -Non-Kernel VErsions below -''' #######******************************** - - -class optStructK: - def __init__(self, dataMatIn, classLabels, C, toler): # Initialize the structure with the parameters - self.X = dataMatIn - self.labelMat = classLabels - self.C = C - self.tol = toler - self.m = shape(dataMatIn)[0] - self.alphas = mat(zeros((self.m, 1))) - self.b = 0 - self.eCache = mat(zeros((self.m, 2))) # first column is valid flag - - -def calcEkK(oS, k): - fXk = float(multiply(oS.alphas, oS.labelMat).T * (oS.X * oS.X[k, :].T)) + oS.b - Ek = fXk - float(oS.labelMat[k]) - return Ek - - -def selectJK(i, oS, Ei): # this is the second choice -heurstic, and calcs Ej - maxK = -1 - maxDeltaE = 0 - Ej = 0 - oS.eCache[i] = [1, Ei] # set valid #choose the alpha that gives the maximum delta E - validEcacheList = nonzero(oS.eCache[:, 0].A)[0] - if (len(validEcacheList)) > 1: - for k in validEcacheList: # loop through valid Ecache values and find the one that maximizes delta E - if k == i: continue # don't calc for i, waste of time - Ek = calcEk(oS, k) - deltaE = abs(Ei - Ek) - if (deltaE > maxDeltaE): - maxK = k - maxDeltaE = deltaE - Ej = Ek - return maxK, Ej - else: # in this case (first time around) we don't have any valid eCache values - j = selectJrand(i, oS.m) - Ej = calcEk(oS, j) - return j, Ej - - -def updateEkK(oS, k): # after any alpha has changed update the new value in the cache - Ek = calcEk(oS, k) - oS.eCache[k] = [1, Ek] - - -def innerLK(i, oS): - Ei = calcEk(oS, i) - if ((oS.labelMat[i] * Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or ( - (oS.labelMat[i] * Ei > oS.tol) and (oS.alphas[i] > 0)): - j, Ej = selectJ(i, oS, Ei) # this has been changed from selectJrand - alphaIold = oS.alphas[i].copy() - alphaJold = oS.alphas[j].copy() - if (oS.labelMat[i] != oS.labelMat[j]): - L = max(0, oS.alphas[j] - oS.alphas[i]) - H = min(oS.C, oS.C + oS.alphas[j] - oS.alphas[i]) - else: - L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C) - H = min(oS.C, oS.alphas[j] + oS.alphas[i]) - if L == H: - print("L==H") - return 0 - eta = 2.0 * oS.X[i, :] * oS.X[j, :].T - oS.X[i, :] * oS.X[i, :].T - oS.X[j, :] * oS.X[j, :].T - if eta >= 0: - print("eta>=0") - return 0 - oS.alphas[j] -= oS.labelMat[j] * (Ei - Ej) / eta - oS.alphas[j] = clipAlpha(oS.alphas[j], H, L) - updateEk(oS, j) # added this for the Ecache - if (abs(oS.alphas[j] - alphaJold) < 0.00001): - print("j not moving enough") - return 0 - oS.alphas[i] += oS.labelMat[j] * oS.labelMat[i] * (alphaJold - oS.alphas[j]) # update i by the same amount as j - updateEk(oS, i) # added this for the Ecache #the update is in the oppostie direction - b1 = oS.b - Ei - oS.labelMat[i] * (oS.alphas[i] - alphaIold) * oS.X[i, :] * oS.X[i, :].T - oS.labelMat[j] * ( - oS.alphas[j] - alphaJold) * oS.X[i, :] * oS.X[j, :].T - b2 = oS.b - Ej - oS.labelMat[i] * (oS.alphas[i] - alphaIold) * oS.X[i, :] * oS.X[j, :].T - oS.labelMat[j] * ( - oS.alphas[j] - alphaJold) * oS.X[j, :] * oS.X[j, :].T - if (0 < oS.alphas[i]) and (oS.C > oS.alphas[i]): - oS.b = b1 - elif (0 < oS.alphas[j]) and (oS.C > oS.alphas[j]): - oS.b = b2 - else: - oS.b = (b1 + b2) / 2.0 - return 1 - else: - return 0 - - -def smoPK(dataMatIn, classLabels, C, toler, maxIter): # full Platt SMO - oS = optStruct(mat(dataMatIn), mat(classLabels).transpose(), C, toler) - iter = 0 - entireSet = True - alphaPairsChanged = 0 - while (iter < maxIter) and ((alphaPairsChanged > 0) or (entireSet)): - alphaPairsChanged = 0 - if entireSet: # go over all - for i in range(oS.m): - alphaPairsChanged += innerL(i, oS) - print("fullSet, iter: %d i:%d, pairs changed %d" % (iter, i, alphaPairsChanged)) - iter += 1 - else: # go over non-bound (railed) alphas - nonBoundIs = nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0] - for i in nonBoundIs: - alphaPairsChanged += innerL(i, oS) - print("non-bound, iter: %d i:%d, pairs changed %d" % (iter, i, alphaPairsChanged)) - iter += 1 - if entireSet: - entireSet = False # toggle entire set loop - elif (alphaPairsChanged == 0): - entireSet = True - print("iteration number: %d" % iter) - return oS.b, oS.alphas - diff --git a/src/python/6.SVM/svm-complete_Non-Kernel.py b/src/python/6.SVM/svm-complete_Non-Kernel.py index 3f5fa11c..25dfdaad 100644 --- a/src/python/6.SVM/svm-complete_Non-Kernel.py +++ b/src/python/6.SVM/svm-complete_Non-Kernel.py @@ -257,119 +257,3 @@ if __name__ == "__main__": # 画图 ws = calcWs(alphas, dataArr, labelArr) plotfig_SVM(dataArr, labelArr, ws, b, alphas) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -def testRbf(k1=1.3): - dataArr, labelArr = loadDataSet('testSetRBF.txt') - b, alphas = smoP(dataArr, labelArr, 200, 0.0001, 10000, ('rbf', k1)) # C=200 important - datMat = mat(dataArr) - labelMat = mat(labelArr).transpose() - svInd = nonzero(alphas.A > 0)[0] - sVs = datMat[svInd] # get matrix of only support vectors - labelSV = labelMat[svInd] - print("there are %d Support Vectors" % shape(sVs)[0]) - m, n = shape(datMat) - errorCount = 0 - for i in range(m): - kernelEval = kernelTrans(sVs, datMat[i, :], ('rbf', k1)) - predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b - if sign(predict) != sign(labelArr[i]): errorCount += 1 - print("the training error rate is: %f" % (float(errorCount) / m)) - dataArr, labelArr = loadDataSet('testSetRBF2.txt') - errorCount = 0 - datMat = mat(dataArr) - labelMat = mat(labelArr).transpose() - m, n = shape(datMat) - for i in range(m): - kernelEval = kernelTrans(sVs, datMat[i, :], ('rbf', k1)) - predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b - if sign(predict) != sign(labelArr[i]): errorCount += 1 - print("the test error rate is: %f" % (float(errorCount) / m)) - - -def img2vector(filename): - returnVect = zeros((1, 1024)) - fr = open(filename) - for i in range(32): - lineStr = fr.readline() - for j in range(32): - returnVect[0, 32 * i + j] = int(lineStr[j]) - return returnVect - - -def loadImages(dirName): - from os import listdir - hwLabels = [] - print(dirName) - trainingFileList = listdir(dirName) # load the training set - m = len(trainingFileList) - trainingMat = zeros((m, 1024)) - for i in range(m): - fileNameStr = trainingFileList[i] - fileStr = fileNameStr.split('.')[0] # take off .txt - classNumStr = int(fileStr.split('_')[0]) - if classNumStr == 9: - hwLabels.append(-1) - else: - hwLabels.append(1) - trainingMat[i, :] = img2vector('%s/%s' % (dirName, fileNameStr)) - return trainingMat, hwLabels - - -def testDigits(kTup=('rbf', 10)): - dataArr, labelArr = loadImages('trainingDigits') - b, alphas = smoP(dataArr, labelArr, 200, 0.0001, 10000, kTup) - datMat = mat(dataArr) - labelMat = mat(labelArr).transpose() - svInd = nonzero(alphas.A > 0)[0] - sVs = datMat[svInd] - labelSV = labelMat[svInd] - print("there are %d Support Vectors" % shape(sVs)[0]) - m, n = shape(datMat) - errorCount = 0 - for i in range(m): - kernelEval = kernelTrans(sVs, datMat[i, :], kTup) - predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b - if sign(predict) != sign(labelArr[i]): errorCount += 1 - print("the training error rate is: %f" % (float(errorCount) / m)) - dataArr, labelArr = loadImages('testDigits') - errorCount = 0 - datMat = mat(dataArr) - labelMat = mat(labelArr).transpose() - m, n = shape(datMat) - for i in range(m): - kernelEval = kernelTrans(sVs, datMat[i, :], kTup) - predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b - if sign(predict) != sign(labelArr[i]): errorCount += 1 - print("the test error rate is: %f" % (float(errorCount) / m)) -