mirror of
https://github.com/apachecn/ailearning.git
synced 2026-02-12 14:55:51 +08:00
更新 9.树回归的注释
This commit is contained in:
@@ -103,6 +103,7 @@ def chooseBestSplit(dataSet, leafType=regLeaf, errType=regErr, ops=(1, 4)):
|
||||
bestS, bestIndex, bestValue = inf, 0, 0
|
||||
# 循环处理每一列对应的feature值
|
||||
for featIndex in range(n-1):
|
||||
# [0]表示这一列的[所有行],不要[0]就是一个array[[所有行]]
|
||||
for splitVal in set(dataSet[:, featIndex].T.tolist()[0]):
|
||||
# 对该列进行分组,然后组内的成员的val值进行 二元切分
|
||||
mat0, mat1 = binSplitDataSet(dataSet, featIndex, splitVal)
|
||||
@@ -236,7 +237,7 @@ def linearSolve(dataSet):
|
||||
# 如果矩阵的逆不存在,会造成程序异常
|
||||
if linalg.det(xTx) == 0.0:
|
||||
raise NameError('This matrix is singular, cannot do inverse,\ntry increasing the second value of ops')
|
||||
# 最小二乘法求最优解
|
||||
# 最小二乘法求最优解: w0*1+w1*x1=y
|
||||
ws = xTx.I * (X.T * Y)
|
||||
return ws, X, Y
|
||||
|
||||
@@ -291,7 +292,9 @@ if __name__ == "__main__":
|
||||
# # 回归树
|
||||
# myDat = loadDataSet('testData/RT_data1.txt')
|
||||
# # myDat = loadDataSet('testData/RT_data2.txt')
|
||||
# # print 'myDat=', myDat
|
||||
# myMat = mat(myDat)
|
||||
# # print 'myMat=', myMat
|
||||
# myTree = createTree(myMat)
|
||||
# print myTree
|
||||
|
||||
@@ -301,7 +304,7 @@ if __name__ == "__main__":
|
||||
# myTree = createTree(myMat, ops=(0, 1))
|
||||
# print myTree
|
||||
|
||||
# # 2.后剪枝就是:通过测试数据,对预测模型进行合并判断
|
||||
# # 2. 后剪枝就是:通过测试数据,对预测模型进行合并判断
|
||||
# myDatTest = loadDataSet('testData/RT_data3test.txt')
|
||||
# myMat2Test = mat(myDatTest)
|
||||
# myFinalTree = prune(myTree, myMat2Test)
|
||||
@@ -330,11 +333,11 @@ if __name__ == "__main__":
|
||||
print myTree2
|
||||
print "模型树:", corrcoef(yHat2, testMat[:, 1],rowvar=0)[0, 1]
|
||||
|
||||
# # 线性回归
|
||||
# ws, X, Y = linearSolve(trainMat)
|
||||
# print ws
|
||||
# m = len(testMat[:, 0])
|
||||
# yHat3 = mat(zeros((m, 1)))
|
||||
# for i in range(shape(testMat)[0]):
|
||||
# yHat3[i] = testMat[i, 0]*ws[1, 0] + ws[0, 0]
|
||||
# print "线性回归:", corrcoef(yHat3, testMat[:, 1],rowvar=0)[0, 1]
|
||||
# 线性回归
|
||||
ws, X, Y = linearSolve(trainMat)
|
||||
print ws
|
||||
m = len(testMat[:, 0])
|
||||
yHat3 = mat(zeros((m, 1)))
|
||||
for i in range(shape(testMat)[0]):
|
||||
yHat3[i] = testMat[i, 0]*ws[1, 0] + ws[0, 0]
|
||||
print "线性回归:", corrcoef(yHat3, testMat[:, 1],rowvar=0)[0, 1]
|
||||
|
||||
@@ -100,7 +100,6 @@ def main(root):
|
||||
# 退出按钮
|
||||
Button(root, text="退出", fg="black", command=quit).grid(row=1, column=2)
|
||||
|
||||
|
||||
# 创建一个画板 canvas
|
||||
reDraw.f = Figure(figsize=(5, 4), dpi=100)
|
||||
reDraw.canvas = FigureCanvasTkAgg(reDraw.f, master=root)
|
||||
|
||||
@@ -188,7 +188,7 @@ def rulesFromConseq(freqSet, H, supportData, brl, minConf=0.7):
|
||||
"""
|
||||
# H[0]是freqSet的元素组合的第一个元素
|
||||
m = len(H[0])
|
||||
# 判断,freqSet的长度是否>组合的长度+1, 避免过度匹配 例如:计算过一边{1,2,3} 和 {1, 2} {1, 3},就没必要再计算了 {1,2,3}和{1,2,3}的组合关系
|
||||
# 判断,freqSet的长度是否>组合的长度+1, 避免过度匹配 例如:计算过一边{1,2,3} 和 {1, 2} {1, 3},就没必要再计算了进一步合并来计算 {1,2,3}和{1,2,3}的组合关系
|
||||
if (len(freqSet) > (m + 1)):
|
||||
print 'freqSet******************', len(freqSet), m + 1, freqSet, H, H[0]
|
||||
# 合并数据集集合,组合为2/3/..n的集合
|
||||
|
||||
Reference in New Issue
Block a user