更新决策树的内容，做ppt

2026-02-13 07:15:26 +08:00 · 2017-03-13 20:47:12 +08:00
parent b237c0fd2a
commit b587a8eed2
6 changed files with 60 additions and 8 deletions
--- a/src/python/03.DecisionTree/DecisionTree.py
+++ b/src/python/03.DecisionTree/DecisionTree.py
@@ -7,9 +7,9 @@ Update on 2017-02-27
 Decision Tree Source Code for Machine Learning in Action Ch. 3
@author: Peter Harrington/jiangzhonglian
 '''
-from math import log
 import operator
-import DecisionTreePlot as dtPlot
+from math import log
+import decisionTreePlot as dtPlot


 def createDataSet():
@@ -130,7 +130,9 @@ def chooseBestFeatureToSplit(dataSet):
            prob = len(subDataSet)/float(len(dataSet))
            newEntropy += prob * calcShannonEnt(subDataSet)
        # gain[信息增益] 值越大，意味着该分类提供的信息量越大，该特征对分类的不确定程度越小
+        # 也就说： 列进行group分组后，对应的类别越多，信息量越大，那么香农熵越小，那么信息增益就越大，所以gain越大
        infoGain = baseEntropy - newEntropy
+        # print 'infoGain=', infoGain, 'bestFeature=', i
        if (infoGain > bestInfoGain):
            bestInfoGain = infoGain
            bestFeature = i