From 3c4fc66fd2a88785560dafa49fa88df2787e3301 Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Fri, 7 Apr 2017 16:51:24 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=96=87=E4=BB=B6=E8=B7=AF?= =?UTF-8?q?=E5=BE=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{05.Logistic => 5.Logistic}/HorseColicTest.txt | 0 .../HorseColicTraining.txt | 0 input/{05.Logistic => 5.Logistic}/TestSet.txt | 0 .../horseColicTest2.txt | 0 .../horseColicTraining2.txt | 0 input/{08.Regression => 8.Regression}/abalone.txt | 0 input/{08.Regression => 8.Regression}/data.txt | 0 .../bikeSpeedVsIq_test.txt | 0 .../bikeSpeedVsIq_train.txt | 0 input/{09.RegTrees => 9.RegTrees}/data1.txt | 0 input/{09.RegTrees => 9.RegTrees}/data2.txt | 0 input/{09.RegTrees => 9.RegTrees}/data3.txt | 0 input/{09.RegTrees => 9.RegTrees}/data3test.txt | 0 input/{09.RegTrees => 9.RegTrees}/data4.txt | 0 input/{09.RegTrees => 9.RegTrees}/sine.txt | 0 src/python/5.Logistic/logistic.py | 2 +- src/python/7.AdaBoost/adaboost.py | 4 ++-- .../regression.py | 8 ++++---- src/python/9.RegTrees/regTrees.py | 14 +++++++------- 19 files changed, 14 insertions(+), 14 deletions(-) rename input/{05.Logistic => 5.Logistic}/HorseColicTest.txt (100%) rename input/{05.Logistic => 5.Logistic}/HorseColicTraining.txt (100%) rename input/{05.Logistic => 5.Logistic}/TestSet.txt (100%) rename input/{07.AdaBoost => 7.AdaBoost}/horseColicTest2.txt (100%) rename input/{07.AdaBoost => 7.AdaBoost}/horseColicTraining2.txt (100%) rename input/{08.Regression => 8.Regression}/abalone.txt (100%) rename input/{08.Regression => 8.Regression}/data.txt (100%) rename input/{09.RegTrees => 9.RegTrees}/bikeSpeedVsIq_test.txt (100%) rename input/{09.RegTrees => 9.RegTrees}/bikeSpeedVsIq_train.txt (100%) rename input/{09.RegTrees => 9.RegTrees}/data1.txt (100%) rename input/{09.RegTrees => 9.RegTrees}/data2.txt (100%) rename input/{09.RegTrees => 9.RegTrees}/data3.txt (100%) rename input/{09.RegTrees => 9.RegTrees}/data3test.txt (100%) rename input/{09.RegTrees => 9.RegTrees}/data4.txt (100%) rename input/{09.RegTrees => 9.RegTrees}/sine.txt (100%) diff --git a/input/05.Logistic/HorseColicTest.txt b/input/5.Logistic/HorseColicTest.txt similarity index 100% rename from input/05.Logistic/HorseColicTest.txt rename to input/5.Logistic/HorseColicTest.txt diff --git a/input/05.Logistic/HorseColicTraining.txt b/input/5.Logistic/HorseColicTraining.txt similarity index 100% rename from input/05.Logistic/HorseColicTraining.txt rename to input/5.Logistic/HorseColicTraining.txt diff --git a/input/05.Logistic/TestSet.txt b/input/5.Logistic/TestSet.txt similarity index 100% rename from input/05.Logistic/TestSet.txt rename to input/5.Logistic/TestSet.txt diff --git a/input/07.AdaBoost/horseColicTest2.txt b/input/7.AdaBoost/horseColicTest2.txt similarity index 100% rename from input/07.AdaBoost/horseColicTest2.txt rename to input/7.AdaBoost/horseColicTest2.txt diff --git a/input/07.AdaBoost/horseColicTraining2.txt b/input/7.AdaBoost/horseColicTraining2.txt similarity index 100% rename from input/07.AdaBoost/horseColicTraining2.txt rename to input/7.AdaBoost/horseColicTraining2.txt diff --git a/input/08.Regression/abalone.txt b/input/8.Regression/abalone.txt similarity index 100% rename from input/08.Regression/abalone.txt rename to input/8.Regression/abalone.txt diff --git a/input/08.Regression/data.txt b/input/8.Regression/data.txt similarity index 100% rename from input/08.Regression/data.txt rename to input/8.Regression/data.txt diff --git a/input/09.RegTrees/bikeSpeedVsIq_test.txt b/input/9.RegTrees/bikeSpeedVsIq_test.txt similarity index 100% rename from input/09.RegTrees/bikeSpeedVsIq_test.txt rename to input/9.RegTrees/bikeSpeedVsIq_test.txt diff --git a/input/09.RegTrees/bikeSpeedVsIq_train.txt b/input/9.RegTrees/bikeSpeedVsIq_train.txt similarity index 100% rename from input/09.RegTrees/bikeSpeedVsIq_train.txt rename to input/9.RegTrees/bikeSpeedVsIq_train.txt diff --git a/input/09.RegTrees/data1.txt b/input/9.RegTrees/data1.txt similarity index 100% rename from input/09.RegTrees/data1.txt rename to input/9.RegTrees/data1.txt diff --git a/input/09.RegTrees/data2.txt b/input/9.RegTrees/data2.txt similarity index 100% rename from input/09.RegTrees/data2.txt rename to input/9.RegTrees/data2.txt diff --git a/input/09.RegTrees/data3.txt b/input/9.RegTrees/data3.txt similarity index 100% rename from input/09.RegTrees/data3.txt rename to input/9.RegTrees/data3.txt diff --git a/input/09.RegTrees/data3test.txt b/input/9.RegTrees/data3test.txt similarity index 100% rename from input/09.RegTrees/data3test.txt rename to input/9.RegTrees/data3test.txt diff --git a/input/09.RegTrees/data4.txt b/input/9.RegTrees/data4.txt similarity index 100% rename from input/09.RegTrees/data4.txt rename to input/9.RegTrees/data4.txt diff --git a/input/09.RegTrees/sine.txt b/input/9.RegTrees/sine.txt similarity index 100% rename from input/09.RegTrees/sine.txt rename to input/9.RegTrees/sine.txt diff --git a/src/python/5.Logistic/logistic.py b/src/python/5.Logistic/logistic.py index 3c1b9b8a..94501394 100644 --- a/src/python/5.Logistic/logistic.py +++ b/src/python/5.Logistic/logistic.py @@ -129,7 +129,7 @@ def plotBestFit(dataArr, labelMat, weights): def main(): # 1.收集并准备数据 - dataMat, labelMat = loadDataSet("input/05.Logistic/TestSet.txt") + dataMat, labelMat = loadDataSet("input/5.Logistic/TestSet.txt") # print dataMat, '---\n', labelMat # 2.训练模型, f(x)=a1*x1+b2*x2+..+nn*xn中 (a1,b2, .., nn).T的矩阵值 diff --git a/src/python/7.AdaBoost/adaboost.py b/src/python/7.AdaBoost/adaboost.py index 427559d4..115eee4b 100644 --- a/src/python/7.AdaBoost/adaboost.py +++ b/src/python/7.AdaBoost/adaboost.py @@ -290,13 +290,13 @@ if __name__ == "__main__": # # 马疝病数据集 # # 训练集合 - # dataArr, labelArr = loadDataSet("input/07.AdaBoost/horseColicTraining2.txt") + # dataArr, labelArr = loadDataSet("input/7.AdaBoost/horseColicTraining2.txt") # weakClassArr, aggClassEst = adaBoostTrainDS(dataArr, labelArr, 40) # print weakClassArr, '\n-----\n', aggClassEst.T # # 计算ROC下面的AUC的面积大小 # plotROC(aggClassEst.T, labelArr) # # 测试集合 - # dataArrTest, labelArrTest = loadDataSet("input/07.AdaBoost/horseColicTest2.txt") + # dataArrTest, labelArrTest = loadDataSet("input/7.AdaBoost/horseColicTest2.txt") # m = shape(dataArrTest)[0] # predicting10 = adaClassify(dataArrTest, weakClassArr) # errArr = mat(ones((m, 1))) diff --git a/src/python/8.Predictive numerical data regression/regression.py b/src/python/8.Predictive numerical data regression/regression.py index 480cffba..41139a7c 100644 --- a/src/python/8.Predictive numerical data regression/regression.py +++ b/src/python/8.Predictive numerical data regression/regression.py @@ -233,7 +233,7 @@ def crossValidation(xArr,yArr,numVal=10): #test for standRegression def regression1(): - xArr, yArr = loadDataSet("testData/Regression_data.txt") + xArr, yArr = loadDataSet("input/8.Regression/data.txt") xMat = mat(xArr) yMat = mat(yArr) ws = standRegres(xArr, yArr) @@ -251,7 +251,7 @@ def regression1(): #test for LWLR def regression2(): - xArr, yArr = loadDataSet("input/08.Regression/data.txt") + xArr, yArr = loadDataSet("input/8.Regression/data.txt") yHat = lwlrTest(xArr, xArr, yArr, 0.003) xMat = mat(xArr) srtInd = xMat[:,1].argsort(0) #argsort()函数是将x中的元素从小到大排列,提取其对应的index(索引),然后输出 @@ -265,7 +265,7 @@ def regression2(): #test for ridgeRegression def regression3(): - abX,abY = loadDataSet("input/08.Regression/abalone.txt") + abX,abY = loadDataSet("input/8.Regression/abalone.txt") ridgeWeights = ridgeTest(abX, abY) fig = plt.figure() ax = fig.add_subplot(111) @@ -275,7 +275,7 @@ def regression3(): #test for stageWise def regression4(): - xArr,yArr=loadDataSet("input/08.Regression/abalone.txt") + xArr,yArr=loadDataSet("input/8.Regression/abalone.txt") stageWise(xArr,yArr,0.01,200) xMat = mat(xArr) yMat = mat(yArr).T diff --git a/src/python/9.RegTrees/regTrees.py b/src/python/9.RegTrees/regTrees.py index fec88808..7f210dfd 100644 --- a/src/python/9.RegTrees/regTrees.py +++ b/src/python/9.RegTrees/regTrees.py @@ -290,8 +290,8 @@ if __name__ == "__main__": # print mat0, '\n-----------\n', mat1 # # 回归树 - # myDat = loadDataSet('input/09.RegTrees/data1.txt') - # # myDat = loadDataSet('input/09.RegTrees/data2.txt') + # myDat = loadDataSet('input/9.RegTrees/data1.txt') + # # myDat = loadDataSet('input/9.RegTrees/data2.txt') # # print 'myDat=', myDat # myMat = mat(myDat) # # print 'myMat=', myMat @@ -299,13 +299,13 @@ if __name__ == "__main__": # print myTree # # 1. 预剪枝就是:提起设置最大误差数和最少元素数 - # myDat = loadDataSet('input/09.RegTrees/data3.txt') + # myDat = loadDataSet('input/9.RegTrees/data3.txt') # myMat = mat(myDat) # myTree = createTree(myMat, ops=(0, 1)) # print myTree # # 2. 后剪枝就是:通过测试数据,对预测模型进行合并判断 - # myDatTest = loadDataSet('input/09.RegTrees/data3test.txt') + # myDatTest = loadDataSet('input/9.RegTrees/data3test.txt') # myMat2Test = mat(myDatTest) # myFinalTree = prune(myTree, myMat2Test) # print '\n\n\n-------------------' @@ -313,14 +313,14 @@ if __name__ == "__main__": # # -------- # # 模型树求解 - # myDat = loadDataSet('input/09.RegTrees/data4.txt') + # myDat = loadDataSet('input/9.RegTrees/data4.txt') # myMat = mat(myDat) # myTree = createTree(myMat, modelLeaf, modelErr) # print myTree # 回归树 VS 模型树 VS 线性回归 - trainMat = mat(loadDataSet('input/09.RegTrees/bikeSpeedVsIq_train.txt')) - testMat = mat(loadDataSet('input/09.RegTrees/bikeSpeedVsIq_test.txt')) + trainMat = mat(loadDataSet('input/9.RegTrees/bikeSpeedVsIq_train.txt')) + testMat = mat(loadDataSet('input/9.RegTrees/bikeSpeedVsIq_test.txt')) # 回归树 myTree1 = createTree(trainMat, ops=(1, 20)) print myTree1