Files
ailearning/src/python/05.Logistic/core/logRegression01.py
2017-03-09 08:55:59 +08:00

103 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python
# encoding: utf-8
from numpy import *
import matplotlib.pyplot as plt
import time
'''
1、需要安装模块pip install matplotlib-1.5.0-cp27-none-win_amd64.whl
由于直接安装会出现问题所以建议下载whl包进行安装下载网址
https://pypi.python.org/pypi/matplotlib/1.5.0
2、可以看见画出的图像
'''
"""
@version:
@author: yangjf
@license: ApacheCN
@contact: highfei2011@126.com
@site: https://github.com/apachecn/MachineLearning
@software: PyCharm
@file: logRegression01.py
@time: 2017/3/3 22:03
@test result: ok
"""
# sigmoid函数
def sigmoid(inX):
return 1.0 / (1 + exp(-inX))
def trainLogRegres(train_x, train_y, opts):
# 计算训练时间
startTime = time.time()
numSamples, numFeatures = shape(train_x)
alpha = opts['alpha']; maxIter = opts['maxIter']
weights = ones((numFeatures, 1))
# 通过梯度下降算法优化
for k in range(maxIter):
if opts['optimizeType'] == 'gradDescent': # 梯度下降算法
output = sigmoid(train_x * weights)
error = train_y - output
weights = weights + alpha * train_x.transpose() * error
elif opts['optimizeType'] == 'stocGradDescent': # 随机梯度下降
for i in range(numSamples):
output = sigmoid(train_x[i, :] * weights)
error = train_y[i, 0] - output
weights = weights + alpha * train_x[i, :].transpose() * error
elif opts['optimizeType'] == 'smoothStocGradDescent': # 光滑随机梯度下降
# 随机选择样本以优化以减少周期波动
dataIndex = range(numSamples)
for i in range(numSamples):
alpha = 4.0 / (1.0 + k + i) + 0.01
randIndex = int(random.uniform(0, len(dataIndex)))
output = sigmoid(train_x[randIndex, :] * weights)
error = train_y[randIndex, 0] - output
weights = weights + alpha * train_x[randIndex, :].transpose() * error
del(dataIndex[randIndex]) # 在一次交互期间,删除优化的样品
else:
raise NameError('Not support optimize method type!')
print 'Congratulations, training complete! Took %fs!' % (time.time() - startTime)
return weights
#测试给定测试集的训练Logistic回归模型
def testLogRegres(weights, test_x, test_y):
numSamples, numFeatures = shape(test_x)
matchCount = 0
for i in xrange(numSamples):
predict = sigmoid(test_x[i, :] * weights)[0, 0] > 0.5
if predict == bool(test_y[i, 0]):
matchCount += 1
accuracy = float(matchCount) / numSamples
return accuracy
# 显示你的训练逻辑回归模型只有2-D数据可用
def showLogRegres(weights, train_x, train_y):
# 注意train_x和train_y是垫数据类型
numSamples, numFeatures = shape(train_x)
if numFeatures != 3:
print "抱歉! 我不能绘制因为你的数据的维度不是2"
return 1
# 画出所有抽样数据
for i in xrange(numSamples):
if int(train_y[i, 0]) == 0:
plt.plot(train_x[i, 1], train_x[i, 2], 'or')
elif int(train_y[i, 0]) == 1:
plt.plot(train_x[i, 1], train_x[i, 2], 'ob')
# 画图操作
min_x = min(train_x[:, 1])[0, 0]
max_x = max(train_x[:, 1])[0, 0]
weights = weights.getA() # 将mat转换为数组
y_min_x = float(-weights[0] - weights[1] * min_x) / weights[2]
y_max_x = float(-weights[0] - weights[1] * max_x) / weights[2]
plt.plot([min_x, max_x], [y_min_x, y_max_x], '-g')
plt.xlabel('X1'); plt.ylabel('X2')
#显示图像
plt.show()