FIX: FILE MOVE BUG

This commit is contained in:
sheepmen
2017-03-25 14:17:59 +08:00
parent 6000e5f36c
commit 9aad9c3b0c

12
src/python/03.DecisionTree/DecisionTree.py Normal file → Executable file
View File

@@ -10,7 +10,7 @@ Decision Tree Source Code for Machine Learning in Action Ch. 3
print(__doc__)
import operator
from math import log
import DecisionTreePlot as dtPlot
import decisionTreePlot as dtPlot
def createDataSet():
@@ -116,24 +116,14 @@ def chooseBestFeatureToSplit(dataSet):
# get a set of unique values
# 获取剔重后的集合
uniqueVals = set(featList)
print('uniqueVals:'+str(uniqueVals))
# 创建一个临时的信息熵
newEntropy = 0.0
# 遍历某一列的value集合计算该列的信息熵
for value in uniqueVals:
subDataSet = splitDataSet(dataSet, i, value)
print (subDataSet)
prob = len(subDataSet)/float(len(dataSet))
<<<<<<< Updated upstream
newEntropy += prob * calcShannonEnt(subDataSet)
# gain[信息增益]: 划分数据集前后的信息变化, 获取信息熵最大的值
=======
text = calcShannonEnt(subDataSet)
print('----------'+str(text))
newEntropy += prob * text
# gain[信息增益] 值越大,意味着该分类提供的信息量越大,该特征对分类的不确定程度越小
# 也就说: 列进行group分组后对应的类别越多信息量越大那么香农熵越小那么信息增益就越大所以gain越大
>>>>>>> Stashed changes
infoGain = baseEntropy - newEntropy
print 'infoGain=', infoGain, 'bestFeature=', i, baseEntropy, newEntropy
if (infoGain > bestInfoGain):