更新文件路径到input目录下

This commit is contained in:
jiangzhonglian
2017-04-07 16:01:04 +08:00
parent 1eecf70e5c
commit 4df7935b5d
15 changed files with 45 additions and 45 deletions

View File

@@ -12,7 +12,7 @@ def createDataSet():
''' 数据读入 '''
data = []
labels = []
with open("testData/DT_data.txt") as ifile:
with open("input/03.DecisionTree/data.txt") as ifile:
for line in ifile:
# 特征: 身高 体重 label 胖瘦
tokens = line.strip().split(' ')

View File

@@ -258,47 +258,47 @@ def plotROC(predStrengths, classLabels):
if __name__ == "__main__":
# 我们要将5个点进行分类
dataArr, labelArr = loadSimpData()
print 'dataArr', dataArr, 'labelArr', labelArr
# # 我们要将5个点进行分类
# dataArr, labelArr = loadSimpData()
# print 'dataArr', dataArr, 'labelArr', labelArr
# D表示最初值对1进行均分为5份平均每一个初始的概率都为0.2
# D的目的是为了计算错误概率 weightedError = D.T*errArr
D = mat(ones((5, 1))/5)
print 'D=', D.T
# # D表示最初值对1进行均分为5份平均每一个初始的概率都为0.2
# # D的目的是为了计算错误概率 weightedError = D.T*errArr
# D = mat(ones((5, 1))/5)
# print 'D=', D.T
# bestStump, minError, bestClasEst = buildStump(dataArr, labelArr, D)
# print 'bestStump=', bestStump
# print 'minError=', minError
# print 'bestClasEst=', bestClasEst.T
# # bestStump, minError, bestClasEst = buildStump(dataArr, labelArr, D)
# # print 'bestStump=', bestStump
# # print 'minError=', minError
# # print 'bestClasEst=', bestClasEst.T
# 分类器weakClassArr
# 历史累计的分类结果集
weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 9)
print '\nweakClassArr=', weakClassArr, '\naggClassEst=', aggClassEst.T
# # 分类器weakClassArr
# # 历史累计的分类结果集
# weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 9)
# print '\nweakClassArr=', weakClassArr, '\naggClassEst=', aggClassEst.T
"""
发现:
分类的权重值最大的值为alpha的加和最小值为-最大值
特征的权重值如果一个值误判的几率越小那么D的特征权重越少
"""
# """
# 发现:
# 分类的权重值最大的值为alpha的加和最小值为-最大值
# 特征的权重值如果一个值误判的几率越小那么D的特征权重越少
# """
# 测试数据的分类结果, 观测aggClassEst分类的最终权重
print adaClassify([0, 0], weakClassArr).T
print adaClassify([[5, 5], [0, 0]], weakClassArr).T
# # 测试数据的分类结果, 观测aggClassEst分类的最终权重
# print adaClassify([0, 0], weakClassArr).T
# print adaClassify([[5, 5], [0, 0]], weakClassArr).T
# # 马疝病数据集
# # 训练集合
# dataArr, labelArr = loadDataSet("testData/AB_horseColicTraining2.txt")
# weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 40)
# print weakClassArr, '\n-----\n', aggClassEst.T
# # 计算ROC下面的AUC的面积大小
# plotROC(aggClassEst.T, labelArr)
# # 测试集合
# dataArrTest, labelArrTest = loadDataSet("testData/AB_horseColicTest2.txt")
# m = shape(dataArrTest)[0]
# predicting10 = adaClassify(dataArrTest, weakClassArr)
# errArr = mat(ones((m, 1)))
# # 测试:计算总样本数,错误样本数,错误率
# print m, errArr[predicting10 != mat(labelArrTest).T].sum(), errArr[predicting10 != mat(labelArrTest).T].sum()/m
# 马疝病数据集
# 训练集合
dataArr, labelArr = loadDataSet("input/07.AdaBoost/horseColicTraining2.txt")
weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 40)
print weakClassArr, '\n-----\n', aggClassEst.T
# 计算ROC下面的AUC的面积大小
plotROC(aggClassEst.T, labelArr)
# 测试集合
dataArrTest, labelArrTest = loadDataSet("input/07.AdaBoost/horseColicTest2.txt")
m = shape(dataArrTest)[0]
predicting10 = adaClassify(dataArrTest, weakClassArr)
errArr = mat(ones((m, 1)))
# 测试:计算总样本数,错误样本数,错误率
print m, errArr[predicting10 != mat(labelArrTest).T].sum(), errArr[predicting10 != mat(labelArrTest).T].sum()/m

View File

@@ -290,8 +290,8 @@ if __name__ == "__main__":
# print mat0, '\n-----------\n', mat1
# # 回归树
# myDat = loadDataSet('testData/RT_data1.txt')
# # myDat = loadDataSet('testData/RT_data2.txt')
# myDat = loadDataSet('input/09.RegTrees/data1.txt')
# # myDat = loadDataSet('input/09.RegTrees/data2.txt')
# # print 'myDat=', myDat
# myMat = mat(myDat)
# # print 'myMat=', myMat
@@ -299,13 +299,13 @@ if __name__ == "__main__":
# print myTree
# # 1. 预剪枝就是:提起设置最大误差数和最少元素数
# myDat = loadDataSet('testData/RT_data3.txt')
# myDat = loadDataSet('input/09.RegTrees/data3.txt')
# myMat = mat(myDat)
# myTree = createTree(myMat, ops=(0, 1))
# print myTree
# # 2. 后剪枝就是:通过测试数据,对预测模型进行合并判断
# myDatTest = loadDataSet('testData/RT_data3test.txt')
# myDatTest = loadDataSet('input/09.RegTrees/data3test.txt')
# myMat2Test = mat(myDatTest)
# myFinalTree = prune(myTree, myMat2Test)
# print '\n\n\n-------------------'
@@ -313,14 +313,14 @@ if __name__ == "__main__":
# # --------
# # 模型树求解
# myDat = loadDataSet('testData/RT_data4.txt')
# myDat = loadDataSet('input/09.RegTrees/data4.txt')
# myMat = mat(myDat)
# myTree = createTree(myMat, modelLeaf, modelErr)
# print myTree
# 回归树 VS 模型树 VS 线性回归
trainMat = mat(loadDataSet('testData/RT_bikeSpeedVsIq_train.txt'))
testMat = mat(loadDataSet('testData/RT_bikeSpeedVsIq_test.txt'))
trainMat = mat(loadDataSet('input/09.RegTrees/bikeSpeedVsIq_train.txt'))
testMat = mat(loadDataSet('input/09.RegTrees/bikeSpeedVsIq_test.txt'))
# 回归树
myTree1 = createTree(trainMat, ops=(1, 20))
print myTree1