添加决策树，朴素贝叶斯和回归的sklearn版本，logistic回归的sklearn版本

2026-07-01 02:36:12 +08:00 · 2017-07-04 19:59:10 +08:00
parent 3a42d3a7d2
commit 8de61578d9
4 changed files with 381 additions and 97 deletions
--- a/src/python/5.Logistic/sklearn_logisticRegression_demo.py
+++ b/src/python/5.Logistic/sklearn_logisticRegression_demo.py
@@ -0,0 +1,280 @@
+#!/usr/bin/python
+# coding: utf8
+
+'''
+Created on Oct 27, 2010
+Update  on 2017-05-18
+Logistic Regression Working Module
+@author: 小瑶
+《机器学习实战》更新地址：https://github.com/apachecn/MachineLearning
+scikit-learn的例子地址：http://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+'''
+
+# 逻辑回归中的 L1 惩罚和稀缺性 L1 Penalty and Sparsity in Logistic Regression
+'''
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.linear_model import LogisticRegression
+from sklearn import datasets
+from sklearn.preprocessing import StandardScaler
+
+digits = datasets.load_digits()
+
+X, y = digits.data, digits.target
+X = StandardScaler().fit_transform(X)
+
+# 将大小数字分类为小
+y = (y > 4).astype(np.int)
+
+
+# 设置正则化参数
+for i, C in enumerate((100, 1, 0.01)):
+    # 减少训练时间短的容忍度
+    clf_l1_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
+    clf_l2_LR = LogisticRegression(C=C, penalty='l2', tol=0.01)
+    clf_l1_LR.fit(X, y)
+    clf_l2_LR.fit(X, y)
+
+    coef_l1_LR = clf_l1_LR.coef_.ravel()
+    coef_l2_LR = clf_l2_LR.coef_.ravel()
+
+    # coef_l1_LR contains zeros due to the
+    # L1 sparsity inducing norm
+    # 由于 L1 稀疏诱导规范，coef_l1_LR 包含零
+
+    sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100
+    sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100
+
+    print("C=%.2f" % C)
+    print("Sparsity with L1 penalty: %.2f%%" % sparsity_l1_LR)
+    print("score with L1 penalty: %.4f" % clf_l1_LR.score(X, y))
+    print("Sparsity with L2 penalty: %.2f%%" % sparsity_l2_LR)
+    print("score with L2 penalty: %.4f" % clf_l2_LR.score(X, y))
+
+    l1_plot = plt.subplot(3, 2, 2 * i + 1)
+    l2_plot = plt.subplot(3, 2, 2 * (i + 1))
+    if i == 0:
+        l1_plot.set_title("L1 penalty")
+        l2_plot.set_title("L2 penalty")
+
+    l1_plot.imshow(np.abs(coef_l1_LR.reshape(8, 8)), interpolation='nearest',
+                   cmap='binary', vmax=1, vmin=0)
+    l2_plot.imshow(np.abs(coef_l2_LR.reshape(8, 8)), interpolation='nearest',
+                   cmap='binary', vmax=1, vmin=0)
+    plt.text(-8, 3, "C = %.2f" % C)
+
+    l1_plot.set_xticks(())
+    l1_plot.set_yticks(())
+    l2_plot.set_xticks(())
+    l2_plot.set_yticks(())
+
+plt.show()
+'''
+
+# 具有 L1-逻辑回归的路径
+'''
+print(__doc__)
+
+from datetime import datetime
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn import linear_model
+from sklearn import datasets
+from sklearn.svm import l1_min_c
+
+iris = datasets.load_iris()
+X = iris.data
+y = iris.target
+
+X = X[y != 2]
+y = y[y != 2]
+
+X -= np.mean(X, 0)
+
+cs = l1_min_c(X, y, loss='log') * np.logspace(0, 3)
+
+
+print("Computing regularization path ...")
+start = datetime.now()
+clf = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
+coefs_ = []
+for c in cs:
+    clf.set_params(C=c)
+    clf.fit(X, y)
+    coefs_.append(clf.coef_.ravel().copy())
+print("This took ", datetime.now() - start)
+
+coefs_ = np.array(coefs_)
+plt.plot(np.log10(cs), coefs_)
+ymin, ymax = plt.ylim()
+plt.xlabel('log(C)')
+plt.ylabel('Coefficients')
+plt.title('Logistic Regression Path')
+plt.axis('tight')
+plt.show()
+'''
+
+# 绘制多项式和一对二的逻辑回归 Plot multinomial and One-vs-Rest Logistic Regression
+'''
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.datasets import make_blobs
+from sklearn.linear_model import LogisticRegression
+
+# 制作 3 类数据集进行分类
+centers = [[-5, 0], [0, 1.5], [5, -1]]
+X, y = make_blobs(n_samples=1000, centers=centers, random_state=40)
+transformation = [[0.4, 0.2], [-0.4, 1.2]]
+X = np.dot(X, transformation)
+
+for multi_class in ('multinomial', 'ovr'):
+    clf = LogisticRegression(solver='sag', max_iter=100, random_state=42,
+                             multi_class=multi_class).fit(X, y)
+
+    # 打印训练分数
+    print("training score : %.3f (%s)" % (clf.score(X, y), multi_class))
+
+    # 创建一个网格来绘制
+    h = .02  # 网格中的步长
+    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
+    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
+                         np.arange(y_min, y_max, h))
+
+    # 绘制决策边界。为此，我们将为网格 [x_min, x_max]x[y_min, y_max]中的每个点分配一个颜色。
+    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
+    # 将结果放入彩色图
+    Z = Z.reshape(xx.shape)
+    plt.figure()
+    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
+    plt.title("Decision surface of LogisticRegression (%s)" % multi_class)
+    plt.axis('tight')
+
+    # 将训练点也绘制进入
+    colors = "bry"
+    for i, color in zip(clf.classes_, colors):
+        idx = np.where(y == i)
+        plt.scatter(X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired)
+
+    # 绘制三个一对数分类器
+    xmin, xmax = plt.xlim()
+    ymin, ymax = plt.ylim()
+    coef = clf.coef_
+    intercept = clf.intercept_
+
+    def plot_hyperplane(c, color):
+        def line(x0):
+            return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]
+        plt.plot([xmin, xmax], [line(xmin), line(xmax)],
+                 ls="--", color=color)
+
+    for i, color in zip(clf.classes_, colors):
+        plot_hyperplane(i, color)
+
+plt.show()
+'''
+
+# Logistic Regression 3-class Classifier 逻辑回归 3-类 分类器 
+
+'''
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import linear_model, datasets
+
+# 引入一些数据来玩
+iris = datasets.load_iris()
+# 我们只采用样本数据的前两个feature
+X = iris.data[:, :2]  
+Y = iris.target
+
+h = .02  # 网格中的步长
+
+logreg = linear_model.LogisticRegression(C=1e5)
+
+# 我们创建了一个 Neighbours Classifier 的实例，并拟合数据。
+logreg.fit(X, Y)
+
+# 绘制决策边界。为此我们将为网格 [x_min, x_max]x[y_min, y_max] 中的每个点分配一个颜色。
+x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
+y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
+xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
+Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
+
+# 将结果放入彩色图中
+Z = Z.reshape(xx.shape)
+plt.figure(1, figsize=(4, 3))
+plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
+
+# 将训练点也同样放入彩色图中
+plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
+plt.xlabel('Sepal length')
+plt.ylabel('Sepal width')
+
+plt.xlim(xx.min(), xx.max())
+plt.ylim(yy.min(), yy.max())
+plt.xticks(())
+plt.yticks(())
+
+plt.show()
+'''
+
+# Logistic function 逻辑回归函数
+# 这个类似于咱们之前讲解 logistic 回归的 Sigmoid 函数，模拟的阶跃函数
+
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn import linear_model
+
+# 这是我们的测试集，它只是一条直线，带有一些高斯噪声。
+xmin, xmax = -5, 5
+n_samples = 100
+np.random.seed(0)
+X = np.random.normal(size=n_samples)
+y = (X > 0).astype(np.float)
+X[X > 0] *= 4
+X += .3 * np.random.normal(size=n_samples)
+
+X = X[:, np.newaxis]
+# 运行分类器
+clf = linear_model.LogisticRegression(C=1e5)
+clf.fit(X, y)
+
+# 并且画出我们的结果
+plt.figure(1, figsize=(4, 3))
+plt.clf()
+plt.scatter(X.ravel(), y, color='black', zorder=20)
+X_test = np.linspace(-5, 10, 300)
+
+
+def model(x):
+    return 1 / (1 + np.exp(-x))
+loss = model(X_test * clf.coef_ + clf.intercept_).ravel()
+plt.plot(X_test, loss, color='red', linewidth=3)
+
+ols = linear_model.LinearRegression()
+ols.fit(X, y)
+plt.plot(X_test, ols.coef_ * X_test + ols.intercept_, linewidth=1)
+plt.axhline(.5, color='.5')
+
+plt.ylabel('y')
+plt.xlabel('X')
+plt.xticks(range(-5, 10))
+plt.yticks([0, 0.5, 1])
+plt.ylim(-.25, 1.25)
+plt.xlim(-4, 10)
+plt.legend(('Logistic Regression Model', 'Linear Regression Model'),
+           loc="lower right", fontsize='small')
+plt.show()
+
+