mirror of
https://github.com/apachecn/ailearning.git
synced 2026-05-07 14:13:14 +08:00
更新6.支持向量机.md文件
This commit is contained in:
@@ -115,3 +115,32 @@ SVM的一般流程
|
||||
测试算法:十分简单的计算过程就可以实现。
|
||||
使用算法:几乎所有分类问题都可以使用SVM,值得一提的是,SVM本身是一个二类分类器,对多类问题应用SVM需要对代码做一些修改。
|
||||
```
|
||||
* 到目前为止,我们已经了解了一些理论知识,现在我们通过`Code`来实现我们的算法吧。
|
||||
|
||||
## SMO高效优化算法
|
||||
|
||||
> 序列最小优化(Sequential Minimal Optimization, SMO)
|
||||
|
||||
* 创建作者:John Platt
|
||||
* 创建时间:1996年
|
||||
* SMO用途:用于训练SVM
|
||||
* SMO目标:求出一系列alpha和b,一旦求出alpha,就很容易计算出权重向量w并得到分隔超平面。
|
||||
* SMO思想:是讲大优化问题分解为多个小优化问题来求解的。
|
||||
* SMO原理:每次循环选择两个alpha进行优化处理,一旦找出一对合适的alpha,那么就增大一个同时减少一个。
|
||||
* 这里指的合适必须要符合一定的条件
|
||||
* 1.这两个alpha必须要在间隔边界之外
|
||||
* 2.这两个alpha还没有进行过区间化处理或者不在边界上。
|
||||
* 之所以要同时改变2个alpha;原因,我们有一个约束条件:`Σ a[i]*label(i)=0`;如果只是修改一个alpha,很可能导致约束条件失效。
|
||||
|
||||
```
|
||||
SMO伪代码大致如下:
|
||||
|
||||
创建一个alpha向量并将其初始化为0向量
|
||||
当迭代次数小于最大迭代次数时(外循环)
|
||||
对数据集中的每个数据向量(内循环):
|
||||
如果该数据向量可以被优化
|
||||
随机选择另外一个数据向量
|
||||
同时优化这两个向量
|
||||
如果两个向量都不能被优化,退出内循环
|
||||
如果所有向量都没被优化,增加迭代数目,继续下一次循环
|
||||
```
|
||||
|
||||
100
input/6.SVM/testSet.txt
Executable file
100
input/6.SVM/testSet.txt
Executable file
@@ -0,0 +1,100 @@
|
||||
3.542485 1.977398 -1
|
||||
3.018896 2.556416 -1
|
||||
7.551510 -1.580030 1
|
||||
2.114999 -0.004466 -1
|
||||
8.127113 1.274372 1
|
||||
7.108772 -0.986906 1
|
||||
8.610639 2.046708 1
|
||||
2.326297 0.265213 -1
|
||||
3.634009 1.730537 -1
|
||||
0.341367 -0.894998 -1
|
||||
3.125951 0.293251 -1
|
||||
2.123252 -0.783563 -1
|
||||
0.887835 -2.797792 -1
|
||||
7.139979 -2.329896 1
|
||||
1.696414 -1.212496 -1
|
||||
8.117032 0.623493 1
|
||||
8.497162 -0.266649 1
|
||||
4.658191 3.507396 -1
|
||||
8.197181 1.545132 1
|
||||
1.208047 0.213100 -1
|
||||
1.928486 -0.321870 -1
|
||||
2.175808 -0.014527 -1
|
||||
7.886608 0.461755 1
|
||||
3.223038 -0.552392 -1
|
||||
3.628502 2.190585 -1
|
||||
7.407860 -0.121961 1
|
||||
7.286357 0.251077 1
|
||||
2.301095 -0.533988 -1
|
||||
-0.232542 -0.547690 -1
|
||||
3.457096 -0.082216 -1
|
||||
3.023938 -0.057392 -1
|
||||
8.015003 0.885325 1
|
||||
8.991748 0.923154 1
|
||||
7.916831 -1.781735 1
|
||||
7.616862 -0.217958 1
|
||||
2.450939 0.744967 -1
|
||||
7.270337 -2.507834 1
|
||||
1.749721 -0.961902 -1
|
||||
1.803111 -0.176349 -1
|
||||
8.804461 3.044301 1
|
||||
1.231257 -0.568573 -1
|
||||
2.074915 1.410550 -1
|
||||
-0.743036 -1.736103 -1
|
||||
3.536555 3.964960 -1
|
||||
8.410143 0.025606 1
|
||||
7.382988 -0.478764 1
|
||||
6.960661 -0.245353 1
|
||||
8.234460 0.701868 1
|
||||
8.168618 -0.903835 1
|
||||
1.534187 -0.622492 -1
|
||||
9.229518 2.066088 1
|
||||
7.886242 0.191813 1
|
||||
2.893743 -1.643468 -1
|
||||
1.870457 -1.040420 -1
|
||||
5.286862 -2.358286 1
|
||||
6.080573 0.418886 1
|
||||
2.544314 1.714165 -1
|
||||
6.016004 -3.753712 1
|
||||
0.926310 -0.564359 -1
|
||||
0.870296 -0.109952 -1
|
||||
2.369345 1.375695 -1
|
||||
1.363782 -0.254082 -1
|
||||
7.279460 -0.189572 1
|
||||
1.896005 0.515080 -1
|
||||
8.102154 -0.603875 1
|
||||
2.529893 0.662657 -1
|
||||
1.963874 -0.365233 -1
|
||||
8.132048 0.785914 1
|
||||
8.245938 0.372366 1
|
||||
6.543888 0.433164 1
|
||||
-0.236713 -5.766721 -1
|
||||
8.112593 0.295839 1
|
||||
9.803425 1.495167 1
|
||||
1.497407 -0.552916 -1
|
||||
1.336267 -1.632889 -1
|
||||
9.205805 -0.586480 1
|
||||
1.966279 -1.840439 -1
|
||||
8.398012 1.584918 1
|
||||
7.239953 -1.764292 1
|
||||
7.556201 0.241185 1
|
||||
9.015509 0.345019 1
|
||||
8.266085 -0.230977 1
|
||||
8.545620 2.788799 1
|
||||
9.295969 1.346332 1
|
||||
2.404234 0.570278 -1
|
||||
2.037772 0.021919 -1
|
||||
1.727631 -0.453143 -1
|
||||
1.979395 -0.050773 -1
|
||||
8.092288 -1.372433 1
|
||||
1.667645 0.239204 -1
|
||||
9.854303 1.365116 1
|
||||
7.921057 -1.327587 1
|
||||
8.500757 1.492372 1
|
||||
1.339746 -0.291183 -1
|
||||
3.107511 0.758367 -1
|
||||
2.609525 0.902979 -1
|
||||
3.263585 1.367898 -1
|
||||
2.912122 -0.202359 -1
|
||||
1.731786 0.589096 -1
|
||||
2.387003 1.573131 -1
|
||||
@@ -1,49 +1,50 @@
|
||||
#!/usr/bin/python
|
||||
# coding:utf8
|
||||
|
||||
"""
|
||||
Created on Nov 4, 2010
|
||||
Update on 2017-03-21
|
||||
Chapter 5 source file for Machine Learing in Action
|
||||
@author: Peter/geekidentity
|
||||
@author: Peter/geekidentity/片刻
|
||||
"""
|
||||
from numpy import *
|
||||
import pylab
|
||||
from time import sleep
|
||||
|
||||
def main():
|
||||
dataArr, labelArr = loadDataSet('testSet.txt')
|
||||
smoSimple(dataArr, labelArr, 0.6, 0.001, 40)
|
||||
|
||||
def loadDataSet(fileName):
|
||||
"""
|
||||
对文件进行逐行解析,从而得到第行的类标签和整个数据矩阵
|
||||
Args:
|
||||
fileName: testSet.txt
|
||||
|
||||
fileName 文件名
|
||||
Returns:
|
||||
数据矩阵, 类标签
|
||||
dataMat 数据矩阵
|
||||
labelMat 类标签
|
||||
"""
|
||||
dataMat = []; labelMat = []
|
||||
dataMat = []
|
||||
labelMat = []
|
||||
fr = open(fileName)
|
||||
for line in fr.readlines():
|
||||
lineArr = line.strip().split('\t')
|
||||
dataMat.append([float(lineArr[0]), float(lineArr[1])])
|
||||
labelMat.append(float(lineArr[2]))
|
||||
return dataMat,labelMat
|
||||
return dataMat, labelMat
|
||||
|
||||
def selectJrand(i,m):
|
||||
|
||||
def selectJrand(i, m):
|
||||
"""
|
||||
随机选择一个整数
|
||||
Args:
|
||||
i: 第一个alpha的下标
|
||||
m: 所有alpha的数目
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
j=i #we want to select any J not equal to i
|
||||
while (j==i):
|
||||
j = int(random.uniform(0,m))
|
||||
return j
|
||||
|
||||
|
||||
def clipAlpha(aj,H,L):
|
||||
"""
|
||||
用于调整大于H或小于L的alpha值
|
||||
@@ -51,7 +52,6 @@ def clipAlpha(aj,H,L):
|
||||
aj:
|
||||
H:
|
||||
L:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
@@ -61,17 +61,10 @@ def clipAlpha(aj,H,L):
|
||||
aj = L
|
||||
return aj
|
||||
|
||||
|
||||
def smoSimple(dataMatIn, classLabels, C, toler, maxIter):
|
||||
"""
|
||||
SVM SMO算法的简单实现:
|
||||
创建一个alpha向量并将其初始化为0向量
|
||||
当迭代次数据小于最大迭代次数时(外循环)
|
||||
对数据集中的每个数据向量(内循环):
|
||||
如果该数据向量可以被优化:
|
||||
随机选择另外一个数据向量
|
||||
同时优化这两个向量
|
||||
如果两个向量都不能被优化,退出内循环
|
||||
如果所有向量都没有被优化,增加迭代数目,继续下一次循环
|
||||
"""smoSimple
|
||||
|
||||
Args:
|
||||
dataMatIn: 数据集
|
||||
classLabels: 类别标签
|
||||
@@ -80,7 +73,6 @@ def smoSimple(dataMatIn, classLabels, C, toler, maxIter):
|
||||
可以通过调节该参数达到不同的结果。
|
||||
toler: 容错率
|
||||
maxIter: 退出前最大的循环次数
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
@@ -132,6 +124,13 @@ def smoSimple(dataMatIn, classLabels, C, toler, maxIter):
|
||||
return b,alphas
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 获取特征和目标变量
|
||||
dataArr, labelArr = loadDataSet('input/6.SVM/testSet.txt')
|
||||
# print labelArr
|
||||
# smoSimple(dataArr, labelArr, 0.6, 0.001, 40)
|
||||
|
||||
|
||||
def kernelTrans(X, A, kTup): # calc the kernel or transform data to a higher dimensional space
|
||||
"""
|
||||
核转换函数
|
||||
@@ -566,5 +565,3 @@ def smoPK(dataMatIn, classLabels, C, toler, maxIter): # full Platt SMO
|
||||
print("iteration number: %d" % iter)
|
||||
return oS.b, oS.alphas
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user