FIX: FILE MOVE BUG

2026-02-11 14:26:04 +08:00 · 2017-03-25 14:17:59 +08:00
parent 6000e5f36c
commit 9aad9c3b0c
1 changed files with 1 additions and 11 deletions
--- a/src/python/03.DecisionTree/DecisionTree.py
+++ b/src/python/03.DecisionTree/DecisionTree.py
@@ -10,7 +10,7 @@ Decision Tree Source Code for Machine Learning in Action Ch. 3
 print(__doc__)
 import operator
 from math import log
-import DecisionTreePlot as dtPlot
+import decisionTreePlot as dtPlot


 def createDataSet():
@@ -116,24 +116,14 @@ def chooseBestFeatureToSplit(dataSet):
        # get a set of unique values
        # 获取剔重后的集合
        uniqueVals = set(featList)
-        print('uniqueVals:'+str(uniqueVals))
        # 创建一个临时的信息熵
        newEntropy = 0.0
        # 遍历某一列的value集合，计算该列的信息熵
        for value in uniqueVals:
            subDataSet = splitDataSet(dataSet, i, value)
-            print (subDataSet)
            prob = len(subDataSet)/float(len(dataSet))
-<<<<<<< Updated upstream
            newEntropy += prob * calcShannonEnt(subDataSet)
        # gain[信息增益]: 划分数据集前后的信息变化， 获取信息熵最大的值
-=======
-            text = calcShannonEnt(subDataSet)
-            print('----------'+str(text))
-            newEntropy += prob * text
-        # gain[信息增益] 值越大，意味着该分类提供的信息量越大，该特征对分类的不确定程度越小
-        # 也就说： 列进行group分组后，对应的类别越多，信息量越大，那么香农熵越小，那么信息增益就越大，所以gain越大
->>>>>>> Stashed changes
        infoGain = baseEntropy - newEntropy
        print 'infoGain=', infoGain, 'bestFeature=', i, baseEntropy, newEntropy
        if (infoGain > bestInfoGain):