mirror of
https://github.com/apachecn/ailearning.git
synced 2026-07-01 02:36:12 +08:00
添加决策树,朴素贝叶斯和回归的sklearn版本,logistic回归的sklearn版本
This commit is contained in:
280
src/python/5.Logistic/sklearn_logisticRegression_demo.py
Normal file
280
src/python/5.Logistic/sklearn_logisticRegression_demo.py
Normal file
@@ -0,0 +1,280 @@
|
||||
#!/usr/bin/python
|
||||
# coding: utf8
|
||||
|
||||
'''
|
||||
Created on Oct 27, 2010
|
||||
Update on 2017-05-18
|
||||
Logistic Regression Working Module
|
||||
@author: 小瑶
|
||||
《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning
|
||||
scikit-learn的例子地址:http://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
||||
'''
|
||||
|
||||
# 逻辑回归中的 L1 惩罚和稀缺性 L1 Penalty and Sparsity in Logistic Regression
|
||||
'''
|
||||
print(__doc__)
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn import datasets
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
digits = datasets.load_digits()
|
||||
|
||||
X, y = digits.data, digits.target
|
||||
X = StandardScaler().fit_transform(X)
|
||||
|
||||
# 将大小数字分类为小
|
||||
y = (y > 4).astype(np.int)
|
||||
|
||||
|
||||
# 设置正则化参数
|
||||
for i, C in enumerate((100, 1, 0.01)):
|
||||
# 减少训练时间短的容忍度
|
||||
clf_l1_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
|
||||
clf_l2_LR = LogisticRegression(C=C, penalty='l2', tol=0.01)
|
||||
clf_l1_LR.fit(X, y)
|
||||
clf_l2_LR.fit(X, y)
|
||||
|
||||
coef_l1_LR = clf_l1_LR.coef_.ravel()
|
||||
coef_l2_LR = clf_l2_LR.coef_.ravel()
|
||||
|
||||
# coef_l1_LR contains zeros due to the
|
||||
# L1 sparsity inducing norm
|
||||
# 由于 L1 稀疏诱导规范,coef_l1_LR 包含零
|
||||
|
||||
sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100
|
||||
sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100
|
||||
|
||||
print("C=%.2f" % C)
|
||||
print("Sparsity with L1 penalty: %.2f%%" % sparsity_l1_LR)
|
||||
print("score with L1 penalty: %.4f" % clf_l1_LR.score(X, y))
|
||||
print("Sparsity with L2 penalty: %.2f%%" % sparsity_l2_LR)
|
||||
print("score with L2 penalty: %.4f" % clf_l2_LR.score(X, y))
|
||||
|
||||
l1_plot = plt.subplot(3, 2, 2 * i + 1)
|
||||
l2_plot = plt.subplot(3, 2, 2 * (i + 1))
|
||||
if i == 0:
|
||||
l1_plot.set_title("L1 penalty")
|
||||
l2_plot.set_title("L2 penalty")
|
||||
|
||||
l1_plot.imshow(np.abs(coef_l1_LR.reshape(8, 8)), interpolation='nearest',
|
||||
cmap='binary', vmax=1, vmin=0)
|
||||
l2_plot.imshow(np.abs(coef_l2_LR.reshape(8, 8)), interpolation='nearest',
|
||||
cmap='binary', vmax=1, vmin=0)
|
||||
plt.text(-8, 3, "C = %.2f" % C)
|
||||
|
||||
l1_plot.set_xticks(())
|
||||
l1_plot.set_yticks(())
|
||||
l2_plot.set_xticks(())
|
||||
l2_plot.set_yticks(())
|
||||
|
||||
plt.show()
|
||||
'''
|
||||
|
||||
# 具有 L1-逻辑回归的路径
|
||||
'''
|
||||
print(__doc__)
|
||||
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from sklearn import linear_model
|
||||
from sklearn import datasets
|
||||
from sklearn.svm import l1_min_c
|
||||
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data
|
||||
y = iris.target
|
||||
|
||||
X = X[y != 2]
|
||||
y = y[y != 2]
|
||||
|
||||
X -= np.mean(X, 0)
|
||||
|
||||
cs = l1_min_c(X, y, loss='log') * np.logspace(0, 3)
|
||||
|
||||
|
||||
print("Computing regularization path ...")
|
||||
start = datetime.now()
|
||||
clf = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
|
||||
coefs_ = []
|
||||
for c in cs:
|
||||
clf.set_params(C=c)
|
||||
clf.fit(X, y)
|
||||
coefs_.append(clf.coef_.ravel().copy())
|
||||
print("This took ", datetime.now() - start)
|
||||
|
||||
coefs_ = np.array(coefs_)
|
||||
plt.plot(np.log10(cs), coefs_)
|
||||
ymin, ymax = plt.ylim()
|
||||
plt.xlabel('log(C)')
|
||||
plt.ylabel('Coefficients')
|
||||
plt.title('Logistic Regression Path')
|
||||
plt.axis('tight')
|
||||
plt.show()
|
||||
'''
|
||||
|
||||
# 绘制多项式和一对二的逻辑回归 Plot multinomial and One-vs-Rest Logistic Regression
|
||||
'''
|
||||
print(__doc__)
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.datasets import make_blobs
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
# 制作 3 类数据集进行分类
|
||||
centers = [[-5, 0], [0, 1.5], [5, -1]]
|
||||
X, y = make_blobs(n_samples=1000, centers=centers, random_state=40)
|
||||
transformation = [[0.4, 0.2], [-0.4, 1.2]]
|
||||
X = np.dot(X, transformation)
|
||||
|
||||
for multi_class in ('multinomial', 'ovr'):
|
||||
clf = LogisticRegression(solver='sag', max_iter=100, random_state=42,
|
||||
multi_class=multi_class).fit(X, y)
|
||||
|
||||
# 打印训练分数
|
||||
print("training score : %.3f (%s)" % (clf.score(X, y), multi_class))
|
||||
|
||||
# 创建一个网格来绘制
|
||||
h = .02 # 网格中的步长
|
||||
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
|
||||
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
|
||||
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
|
||||
np.arange(y_min, y_max, h))
|
||||
|
||||
# 绘制决策边界。为此,我们将为网格 [x_min, x_max]x[y_min, y_max]中的每个点分配一个颜色。
|
||||
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
|
||||
# 将结果放入彩色图
|
||||
Z = Z.reshape(xx.shape)
|
||||
plt.figure()
|
||||
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
|
||||
plt.title("Decision surface of LogisticRegression (%s)" % multi_class)
|
||||
plt.axis('tight')
|
||||
|
||||
# 将训练点也绘制进入
|
||||
colors = "bry"
|
||||
for i, color in zip(clf.classes_, colors):
|
||||
idx = np.where(y == i)
|
||||
plt.scatter(X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired)
|
||||
|
||||
# 绘制三个一对数分类器
|
||||
xmin, xmax = plt.xlim()
|
||||
ymin, ymax = plt.ylim()
|
||||
coef = clf.coef_
|
||||
intercept = clf.intercept_
|
||||
|
||||
def plot_hyperplane(c, color):
|
||||
def line(x0):
|
||||
return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]
|
||||
plt.plot([xmin, xmax], [line(xmin), line(xmax)],
|
||||
ls="--", color=color)
|
||||
|
||||
for i, color in zip(clf.classes_, colors):
|
||||
plot_hyperplane(i, color)
|
||||
|
||||
plt.show()
|
||||
'''
|
||||
|
||||
# Logistic Regression 3-class Classifier 逻辑回归 3-类 分类器
|
||||
|
||||
'''
|
||||
print(__doc__)
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn import linear_model, datasets
|
||||
|
||||
# 引入一些数据来玩
|
||||
iris = datasets.load_iris()
|
||||
# 我们只采用样本数据的前两个feature
|
||||
X = iris.data[:, :2]
|
||||
Y = iris.target
|
||||
|
||||
h = .02 # 网格中的步长
|
||||
|
||||
logreg = linear_model.LogisticRegression(C=1e5)
|
||||
|
||||
# 我们创建了一个 Neighbours Classifier 的实例,并拟合数据。
|
||||
logreg.fit(X, Y)
|
||||
|
||||
# 绘制决策边界。为此我们将为网格 [x_min, x_max]x[y_min, y_max] 中的每个点分配一个颜色。
|
||||
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
|
||||
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
|
||||
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
|
||||
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
|
||||
|
||||
# 将结果放入彩色图中
|
||||
Z = Z.reshape(xx.shape)
|
||||
plt.figure(1, figsize=(4, 3))
|
||||
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
|
||||
|
||||
# 将训练点也同样放入彩色图中
|
||||
plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
|
||||
plt.xlabel('Sepal length')
|
||||
plt.ylabel('Sepal width')
|
||||
|
||||
plt.xlim(xx.min(), xx.max())
|
||||
plt.ylim(yy.min(), yy.max())
|
||||
plt.xticks(())
|
||||
plt.yticks(())
|
||||
|
||||
plt.show()
|
||||
'''
|
||||
|
||||
# Logistic function 逻辑回归函数
|
||||
# 这个类似于咱们之前讲解 logistic 回归的 Sigmoid 函数,模拟的阶跃函数
|
||||
|
||||
print(__doc__)
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from sklearn import linear_model
|
||||
|
||||
# 这是我们的测试集,它只是一条直线,带有一些高斯噪声。
|
||||
xmin, xmax = -5, 5
|
||||
n_samples = 100
|
||||
np.random.seed(0)
|
||||
X = np.random.normal(size=n_samples)
|
||||
y = (X > 0).astype(np.float)
|
||||
X[X > 0] *= 4
|
||||
X += .3 * np.random.normal(size=n_samples)
|
||||
|
||||
X = X[:, np.newaxis]
|
||||
# 运行分类器
|
||||
clf = linear_model.LogisticRegression(C=1e5)
|
||||
clf.fit(X, y)
|
||||
|
||||
# 并且画出我们的结果
|
||||
plt.figure(1, figsize=(4, 3))
|
||||
plt.clf()
|
||||
plt.scatter(X.ravel(), y, color='black', zorder=20)
|
||||
X_test = np.linspace(-5, 10, 300)
|
||||
|
||||
|
||||
def model(x):
|
||||
return 1 / (1 + np.exp(-x))
|
||||
loss = model(X_test * clf.coef_ + clf.intercept_).ravel()
|
||||
plt.plot(X_test, loss, color='red', linewidth=3)
|
||||
|
||||
ols = linear_model.LinearRegression()
|
||||
ols.fit(X, y)
|
||||
plt.plot(X_test, ols.coef_ * X_test + ols.intercept_, linewidth=1)
|
||||
plt.axhline(.5, color='.5')
|
||||
|
||||
plt.ylabel('y')
|
||||
plt.xlabel('X')
|
||||
plt.xticks(range(-5, 10))
|
||||
plt.yticks([0, 0.5, 1])
|
||||
plt.ylim(-.25, 1.25)
|
||||
plt.xlim(-4, 10)
|
||||
plt.legend(('Logistic Regression Model', 'Linear Regression Model'),
|
||||
loc="lower right", fontsize='small')
|
||||
plt.show()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user