更新决策树的内容,做ppt

This commit is contained in:
jiangzhonglian
2017-03-13 20:47:12 +08:00
parent b237c0fd2a
commit b587a8eed2
6 changed files with 60 additions and 8 deletions

View File

@@ -51,7 +51,7 @@ def predict_train(x_train, y_train):
return y_pre, clf
def show_precision_recall(x, clf, y_train, y_pre):
def show_precision_recall(x, y, clf, y_train, y_pre):
'''
准确率与召回率
参考链接: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_recall_curve.html#sklearn.metrics.precision_recall_curve
@@ -110,7 +110,7 @@ if __name__ == '__main__':
y_pre, clf = predict_train(x_train, y_train)
# 展现 准确率与召回率
show_precision_recall(x, clf, y_train, y_pre)
show_precision_recall(x, y, clf, y_train, y_pre)
# 可视化输出
show_pdf(clf)

View File

@@ -7,9 +7,9 @@ Update on 2017-02-27
Decision Tree Source Code for Machine Learning in Action Ch. 3
@author: Peter Harrington/jiangzhonglian
'''
from math import log
import operator
import DecisionTreePlot as dtPlot
from math import log
import decisionTreePlot as dtPlot
def createDataSet():
@@ -130,7 +130,9 @@ def chooseBestFeatureToSplit(dataSet):
prob = len(subDataSet)/float(len(dataSet))
newEntropy += prob * calcShannonEnt(subDataSet)
# gain[信息增益] 值越大,意味着该分类提供的信息量越大,该特征对分类的不确定程度越小
# 也就说: 列进行group分组后对应的类别越多信息量越大那么香农熵越小那么信息增益就越大所以gain越大
infoGain = baseEntropy - newEntropy
# print 'infoGain=', infoGain, 'bestFeature=', i
if (infoGain > bestInfoGain):
bestInfoGain = infoGain
bestFeature = i

View File

@@ -128,5 +128,5 @@ def retrieveTree(i):
return listOfTrees[i]
myTree = retrieveTree(0)
myTree = retrieveTree(1)
createPlot(myTree)

View File

@@ -0,0 +1,50 @@
#!/usr/bin/python
# coding:utf8
'''
Created on 2017-03-10
Update on 2017-03-10
author: jiangzhonglian
content: 回归树
'''
print(__doc__)
# Import the necessary modules and libraries
import numpy as np
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt
# Create a random dataset
rng = np.random.RandomState(1)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
print X, '\n\n\n-----------\n\n\n', y
y[::5] += 3 * (0.5 - rng.rand(16))
# Fit regression model
regr_1 = DecisionTreeRegressor(max_depth=2, min_samples_leaf=5)
regr_2 = DecisionTreeRegressor(max_depth=5, min_samples_leaf=5)
regr_1.fit(X, y)
regr_2.fit(X, y)
# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_1 = regr_1.predict(X_test)
y_2 = regr_2.predict(X_test)
# Plot the results
plt.figure()
plt.scatter(X, y, c="darkorange", label="data")
plt.plot(X_test, y_1, color="cornflowerblue", label="max_depth=2", linewidth=2)
plt.plot(X_test, y_2, color="yellowgreen", label="max_depth=5", linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Decision Tree Regression")
plt.legend()
plt.show()

View File

@@ -7,14 +7,14 @@ Update on 2017-03-08
Tree-Based Regression Methods Source Code for Machine Learning in Action Ch. 9
@author: jiangzhonglian
'''
import regTrees
from Tkinter import *
from numpy import *
import regTrees
import matplotlib
matplotlib.use('TkAgg')
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
matplotlib.use('TkAgg')
def test_widget_text(root):