diff --git a/src/python/09.RegTrees/regTrees.py b/src/python/09.RegTrees/regTrees.py index c6077e0f..b621c1cb 100644 --- a/src/python/09.RegTrees/regTrees.py +++ b/src/python/09.RegTrees/regTrees.py @@ -103,6 +103,7 @@ def chooseBestSplit(dataSet, leafType=regLeaf, errType=regErr, ops=(1, 4)): bestS, bestIndex, bestValue = inf, 0, 0 # 循环处理每一列对应的feature值 for featIndex in range(n-1): + # [0]表示这一列的[所有行],不要[0]就是一个array[[所有行]] for splitVal in set(dataSet[:, featIndex].T.tolist()[0]): # 对该列进行分组,然后组内的成员的val值进行 二元切分 mat0, mat1 = binSplitDataSet(dataSet, featIndex, splitVal) @@ -236,7 +237,7 @@ def linearSolve(dataSet): # 如果矩阵的逆不存在,会造成程序异常 if linalg.det(xTx) == 0.0: raise NameError('This matrix is singular, cannot do inverse,\ntry increasing the second value of ops') - # 最小二乘法求最优解 + # 最小二乘法求最优解: w0*1+w1*x1=y ws = xTx.I * (X.T * Y) return ws, X, Y @@ -291,7 +292,9 @@ if __name__ == "__main__": # # 回归树 # myDat = loadDataSet('testData/RT_data1.txt') # # myDat = loadDataSet('testData/RT_data2.txt') + # # print 'myDat=', myDat # myMat = mat(myDat) + # # print 'myMat=', myMat # myTree = createTree(myMat) # print myTree @@ -301,7 +304,7 @@ if __name__ == "__main__": # myTree = createTree(myMat, ops=(0, 1)) # print myTree - # # 2.后剪枝就是:通过测试数据,对预测模型进行合并判断 + # # 2. 后剪枝就是:通过测试数据,对预测模型进行合并判断 # myDatTest = loadDataSet('testData/RT_data3test.txt') # myMat2Test = mat(myDatTest) # myFinalTree = prune(myTree, myMat2Test) @@ -330,11 +333,11 @@ if __name__ == "__main__": print myTree2 print "模型树:", corrcoef(yHat2, testMat[:, 1],rowvar=0)[0, 1] - # # 线性回归 - # ws, X, Y = linearSolve(trainMat) - # print ws - # m = len(testMat[:, 0]) - # yHat3 = mat(zeros((m, 1))) - # for i in range(shape(testMat)[0]): - # yHat3[i] = testMat[i, 0]*ws[1, 0] + ws[0, 0] - # print "线性回归:", corrcoef(yHat3, testMat[:, 1],rowvar=0)[0, 1] + # 线性回归 + ws, X, Y = linearSolve(trainMat) + print ws + m = len(testMat[:, 0]) + yHat3 = mat(zeros((m, 1))) + for i in range(shape(testMat)[0]): + yHat3[i] = testMat[i, 0]*ws[1, 0] + ws[0, 0] + print "线性回归:", corrcoef(yHat3, testMat[:, 1],rowvar=0)[0, 1] diff --git a/src/python/09.RegTrees/treeExplore.py b/src/python/09.RegTrees/treeExplore.py index a426a342..33665a6a 100644 --- a/src/python/09.RegTrees/treeExplore.py +++ b/src/python/09.RegTrees/treeExplore.py @@ -100,7 +100,6 @@ def main(root): # 退出按钮 Button(root, text="退出", fg="black", command=quit).grid(row=1, column=2) - # 创建一个画板 canvas reDraw.f = Figure(figsize=(5, 4), dpi=100) reDraw.canvas = FigureCanvasTkAgg(reDraw.f, master=root) diff --git a/src/python/11.Apriori/apriori.py b/src/python/11.Apriori/apriori.py index 3ba163d0..0babab1f 100644 --- a/src/python/11.Apriori/apriori.py +++ b/src/python/11.Apriori/apriori.py @@ -188,7 +188,7 @@ def rulesFromConseq(freqSet, H, supportData, brl, minConf=0.7): """ # H[0]是freqSet的元素组合的第一个元素 m = len(H[0]) - # 判断,freqSet的长度是否>组合的长度+1, 避免过度匹配 例如:计算过一边{1,2,3} 和 {1, 2} {1, 3},就没必要再计算了 {1,2,3}和{1,2,3}的组合关系 + # 判断,freqSet的长度是否>组合的长度+1, 避免过度匹配 例如:计算过一边{1,2,3} 和 {1, 2} {1, 3},就没必要再计算了进一步合并来计算 {1,2,3}和{1,2,3}的组合关系 if (len(freqSet) > (m + 1)): print 'freqSet******************', len(freqSet), m + 1, freqSet, H, H[0] # 合并数据集集合,组合为2/3/..n的集合