diff --git a/src/python/3.DecisionTree/skelearn_dts_regressor_demo.py b/src/python/3.DecisionTree/skelearn_dts_regressor_demo.py
index 4aaa2cc0..a87999f4 100644
--- a/src/python/3.DecisionTree/skelearn_dts_regressor_demo.py
+++ b/src/python/3.DecisionTree/skelearn_dts_regressor_demo.py
@@ -19,36 +19,38 @@ import matplotlib.pyplot as plt
 # 创建一个随机的数据集
 # 参考 https://docs.scipy.org/doc/numpy-1.6.0/reference/generated/numpy.random.mtrand.RandomState.html
 rng = np.random.RandomState(1)
-print 'lalalalala===', rng
+# print 'lalalalala===', rng
 # rand() 是给定形状的随机值，rng.rand(80, 1)即矩阵的形状是 80行，1列
 # sort() 
 X = np.sort(5 * rng.rand(80, 1), axis=0)
-print 'X=', X
+# print 'X=', X
 y = np.sin(X).ravel()
-print 'y=', y
+# print 'y=', y
 y[::5] += 3 * (0.5 - rng.rand(16))
-print 'yyy=', y
+# print 'yyy=', y
 
 # 拟合回归模型
-regr_1 = DecisionTreeRegressor(max_depth=2)
+# regr_1 = DecisionTreeRegressor(max_depth=2)
+# 保持 max_depth=5 不变，增加 min_samples_leaf=6 的参数，效果进一步提升了
 regr_2 = DecisionTreeRegressor(max_depth=5)
-regr_3 = DecisionTreeRegressor(max_depth=3)
-regr_1.fit(X, y)
+regr_2 = DecisionTreeRegressor(min_samples_leaf=6)
+# regr_3 = DecisionTreeRegressor(max_depth=4)
+# regr_1.fit(X, y)
 regr_2.fit(X, y)
-regr_3.fit(X, y)
+# regr_3.fit(X, y)
 
 # 预测
 X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
-y_1 = regr_1.predict(X_test)
+# y_1 = regr_1.predict(X_test)
 y_2 = regr_2.predict(X_test)
-y_3 = regr_3.predict(X_test)
+# y_3 = regr_3.predict(X_test)
 
 # 绘制结果
 plt.figure()
 plt.scatter(X, y, c="darkorange", label="data")
-plt.plot(X_test, y_1, color="cornflowerblue", label="max_depth=2", linewidth=2)
+# plt.plot(X_test, y_1, color="cornflowerblue", label="max_depth=2", linewidth=2)
 plt.plot(X_test, y_2, color="yellowgreen", label="max_depth=5", linewidth=2)
-plt.plot(X_test, y_3, color="red", label="max_depth=3", linewidth=2)
+# plt.plot(X_test, y_3, color="red", label="max_depth=3", linewidth=2)
 plt.xlabel("data")
 plt.ylabel("target")
 plt.title("Decision Tree Regression")
diff --git a/src/python/4.NaiveBayes/sklearn-nb-demo.py b/src/python/4.NaiveBayes/sklearn-nb-demo.py
index 36e758cf..4262ac35 100644
--- a/src/python/4.NaiveBayes/sklearn-nb-demo.py
+++ b/src/python/4.NaiveBayes/sklearn-nb-demo.py
@@ -8,73 +8,38 @@ NaiveBayes：朴素贝叶斯
 @author: 小瑶
 《机器学习实战》更新地址：https://github.com/apachecn/MachineLearning
 """
+
+
+# GaussianNB_高斯朴素贝叶斯
 import numpy as np
-import matplotlib.pyplot as plt
-from sklearn import svm
-print(__doc__)
-
-
-# 创建40个分离点
-np.random.seed(0)
-# X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]
-# Y = [0] * 20 + [1] * 20
-
-
-def loadDataSet(fileName):
-    """
-    对文件进行逐行解析，从而得到第行的类标签和整个数据矩阵
-    Args:
-        fileName 文件名
-    Returns:
-        dataMat  数据矩阵
-        labelMat 类标签
-    """
-    dataMat = []
-    labelMat = []
-    fr = open(fileName)
-    for line in fr.readlines():
-        lineArr = line.strip().split('\t')
-        dataMat.append([float(lineArr[0]), float(lineArr[1])])
-        labelMat.append(float(lineArr[2]))
-    return dataMat, labelMat
-
-
-X, Y = loadDataSet('input/6.SVM/testSet.txt')
-X = np.mat(X)
-
-print("X=", X)
-print("Y=", Y)
-
-# 拟合一个SVM模型
-clf = svm.SVC(kernel='linear')
+X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+Y = np.array([1, 1, 1, 2, 2, 2])
+from sklearn.naive_bayes import GaussianNB
+clf = GaussianNB()
 clf.fit(X, Y)
+print clf.predict([[-0.8, -1]])
+clf_pf = GaussianNB()
+clf_pf.partial_fit(X, Y, np.unique(Y))
+print clf_pf.predict([[-0.8, -1]])
 
-# 获取分割超平面
-w = clf.coef_[0]
-# 斜率
-a = -w[0] / w[1]
-# 从-5到5，顺序间隔采样50个样本，默认是num=50
-# xx = np.linspace(-5, 5)  # , num=50)
-xx = np.linspace(-2, 10)  # , num=50)
-# 二维的直线方程
-yy = a * xx - (clf.intercept_[0]) / w[1]
-print("yy=", yy)
+# MultinomialNB_多项朴素贝叶斯
+'''
+import numpy as np
+X = np.random.randint(5, size=(6, 100))
+y = np.array([1, 2, 3, 4, 5, 6])
+from sklearn.naive_bayes import MultinomialNB
+clf = MultinomialNB()
+clf.fit(X, y)
+print clf.predict(X[2:3])
+'''
 
-# plot the parallels to the separating hyperplane that pass through the support vectors
-# 通过支持向量绘制分割超平面
-print("support_vectors_=", clf.support_vectors_)
-b = clf.support_vectors_[0]
-yy_down = a * xx + (b[1] - a * b[0])
-b = clf.support_vectors_[-1]
-yy_up = a * xx + (b[1] - a * b[0])
-
-# plot the line, the points, and the nearest vectors to the plane
-plt.plot(xx, yy, 'k-')
-plt.plot(xx, yy_down, 'k--')
-plt.plot(xx, yy_up, 'k--')
-
-plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=80, facecolors='none')
-plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
-
-plt.axis('tight')
-plt.show()
+# BernoulliNB_伯努利朴素贝叶斯
+'''
+import numpy as np
+X = np.random.randint(2, size=(6, 100))
+Y = np.array([1, 2, 3, 4, 4, 5])
+from sklearn.naive_bayes import BernoulliNB
+clf = BernoulliNB()
+clf.fit(X, Y)
+print clf.predict(X[2:3])
+'''
diff --git a/src/python/5.Logistic/logistic.py b/src/python/5.Logistic/logistic.py
index af834873..c8e85021 100644
--- a/src/python/5.Logistic/logistic.py
+++ b/src/python/5.Logistic/logistic.py
@@ -19,7 +19,7 @@ def loadDataSet(file_name):
     fr = open(file_name)
     for line in fr.readlines():
         lineArr = line.strip().split()
-        # 将 X0 的值设为 1.0
+        # 为了方便计算，我们将 X0 的值设为 1.0 ，也就是在每一行的开头添加一个 1.0 作为 X0
         dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
         labelMat.append(int(lineArr[2]))
     return dataMat,labelMat
@@ -57,6 +57,7 @@ def gradAscent(dataMatIn, classLabels):
         # print 'weights====', weights
         # n*3   *  3*1  = n*1
         h = sigmoid(dataMatrix*weights)     # 矩阵乘法
+        # print 'hhhhhhh====', h
         # labelMat是实际值
         error = (labelMat - h)              # 向量相减
         # 0.001* (3*m)*(m*1) 表示在每一个列上的一个误差情况，最后得出 x1,x2,xn的系数的偏移量
@@ -110,6 +111,17 @@ def stocGradAscent1(dataMatrix, classLabels, numIter=150):
 
 # 可视化展示
 def plotBestFit(dataArr, labelMat, weights):
+    '''
+        Desc:
+            将我们得到的数据可视化展示出来
+        Args:
+            dataArr:样本数据的特征
+            labelMat:样本数据的类别标签，即目标变量
+            weights:回归系数
+        Returns:
+            None
+    '''
+    
     n = shape(dataArr)[0]
     xcord1 = []; ycord1 = []
     xcord2 = []; ycord2 = []
@@ -146,8 +158,8 @@ def main():
     # 因为数组没有是复制n份， array的乘法就是乘法
     dataArr = array(dataMat)
     # print dataArr
-    # weights = gradAscent(dataArr, labelMat)
-    weights = stocGradAscent0(dataArr, labelMat)
+    weights = gradAscent(dataArr, labelMat)
+    # weights = stocGradAscent0(dataArr, labelMat)
     # weights = stocGradAscent1(dataArr, labelMat)
     # print '*'*30, weights
 
diff --git a/src/python/5.Logistic/sklearn_logisticRegression_demo.py b/src/python/5.Logistic/sklearn_logisticRegression_demo.py
new file mode 100644
index 00000000..12a712e2
--- /dev/null
+++ b/src/python/5.Logistic/sklearn_logisticRegression_demo.py
@@ -0,0 +1,281 @@
+#!/usr/bin/python
+# coding: utf8
+
+'''
+Created on Oct 27, 2010
+Update  on 2017-05-18
+Logistic Regression Working Module
+@author: 小瑶
+《机器学习实战》更新地址：https://github.com/apachecn/MachineLearning
+scikit-learn的例子地址：http://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+'''
+
+# 逻辑回归中的 L1 惩罚和稀缺性 L1 Penalty and Sparsity in Logistic Regression
+'''
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.linear_model import LogisticRegression
+from sklearn import datasets
+from sklearn.preprocessing import StandardScaler
+
+digits = datasets.load_digits()
+
+X, y = digits.data, digits.target
+X = StandardScaler().fit_transform(X)
+
+# 将大小数字分类为小
+y = (y > 4).astype(np.int)
+
+
+# 设置正则化参数
+for i, C in enumerate((100, 1, 0.01)):
+    # 减少训练时间短的容忍度
+    clf_l1_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
+    clf_l2_LR = LogisticRegression(C=C, penalty='l2', tol=0.01)
+    clf_l1_LR.fit(X, y)
+    clf_l2_LR.fit(X, y)
+
+    coef_l1_LR = clf_l1_LR.coef_.ravel()
+    coef_l2_LR = clf_l2_LR.coef_.ravel()
+
+    # coef_l1_LR contains zeros due to the
+    # L1 sparsity inducing norm
+    # 由于 L1 稀疏诱导规范，coef_l1_LR 包含零
+
+    sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100
+    sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100
+
+    print("C=%.2f" % C)
+    print("Sparsity with L1 penalty: %.2f%%" % sparsity_l1_LR)
+    print("score with L1 penalty: %.4f" % clf_l1_LR.score(X, y))
+    print("Sparsity with L2 penalty: %.2f%%" % sparsity_l2_LR)
+    print("score with L2 penalty: %.4f" % clf_l2_LR.score(X, y))
+
+    l1_plot = plt.subplot(3, 2, 2 * i + 1)
+    l2_plot = plt.subplot(3, 2, 2 * (i + 1))
+    if i == 0:
+        l1_plot.set_title("L1 penalty")
+        l2_plot.set_title("L2 penalty")
+
+    l1_plot.imshow(np.abs(coef_l1_LR.reshape(8, 8)), interpolation='nearest',
+                   cmap='binary', vmax=1, vmin=0)
+    l2_plot.imshow(np.abs(coef_l2_LR.reshape(8, 8)), interpolation='nearest',
+                   cmap='binary', vmax=1, vmin=0)
+    plt.text(-8, 3, "C = %.2f" % C)
+
+    l1_plot.set_xticks(())
+    l1_plot.set_yticks(())
+    l2_plot.set_xticks(())
+    l2_plot.set_yticks(())
+
+plt.show()
+'''
+
+# 具有 L1-逻辑回归的路径
+'''
+print(__doc__)
+
+from datetime import datetime
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn import linear_model
+from sklearn import datasets
+from sklearn.svm import l1_min_c
+
+iris = datasets.load_iris()
+X = iris.data
+y = iris.target
+
+X = X[y != 2]
+y = y[y != 2]
+
+X -= np.mean(X, 0)
+
+cs = l1_min_c(X, y, loss='log') * np.logspace(0, 3)
+
+
+print("Computing regularization path ...")
+start = datetime.now()
+clf = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
+coefs_ = []
+for c in cs:
+    clf.set_params(C=c)
+    clf.fit(X, y)
+    coefs_.append(clf.coef_.ravel().copy())
+print("This took ", datetime.now() - start)
+
+coefs_ = np.array(coefs_)
+plt.plot(np.log10(cs), coefs_)
+ymin, ymax = plt.ylim()
+plt.xlabel('log(C)')
+plt.ylabel('Coefficients')
+plt.title('Logistic Regression Path')
+plt.axis('tight')
+plt.show()
+'''
+
+# 绘制多项式和一对二的逻辑回归 Plot multinomial and One-vs-Rest Logistic Regression
+'''
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.datasets import make_blobs
+from sklearn.linear_model import LogisticRegression
+
+# 制作 3 类数据集进行分类
+centers = [[-5, 0], [0, 1.5], [5, -1]]
+X, y = make_blobs(n_samples=1000, centers=centers, random_state=40)
+transformation = [[0.4, 0.2], [-0.4, 1.2]]
+X = np.dot(X, transformation)
+
+for multi_class in ('multinomial', 'ovr'):
+    clf = LogisticRegression(solver='sag', max_iter=100, random_state=42,
+                             multi_class=multi_class).fit(X, y)
+
+    # 打印训练分数
+    print("training score : %.3f (%s)" % (clf.score(X, y), multi_class))
+
+    # 创建一个网格来绘制
+    h = .02  # 网格中的步长
+    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
+    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
+                         np.arange(y_min, y_max, h))
+
+    # 绘制决策边界。为此，我们将为网格 [x_min, x_max]x[y_min, y_max]中的每个点分配一个颜色。
+    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
+    # 将结果放入彩色图
+    Z = Z.reshape(xx.shape)
+    plt.figure()
+    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
+    plt.title("Decision surface of LogisticRegression (%s)" % multi_class)
+    plt.axis('tight')
+
+    # 将训练点也绘制进入
+    colors = "bry"
+    for i, color in zip(clf.classes_, colors):
+        idx = np.where(y == i)
+        plt.scatter(X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired)
+
+    # 绘制三个一对数分类器
+    xmin, xmax = plt.xlim()
+    ymin, ymax = plt.ylim()
+    coef = clf.coef_
+    intercept = clf.intercept_
+
+    def plot_hyperplane(c, color):
+        def line(x0):
+            return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]
+        plt.plot([xmin, xmax], [line(xmin), line(xmax)],
+                 ls="--", color=color)
+
+    for i, color in zip(clf.classes_, colors):
+        plot_hyperplane(i, color)
+
+plt.show()
+'''
+
+# Logistic Regression 3-class Classifier 逻辑回归 3-类 分类器 
+
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import linear_model, datasets
+
+# 引入一些数据来玩
+iris = datasets.load_iris()
+# 我们只采用样本数据的前两个feature
+X = iris.data[:, :2]  
+Y = iris.target
+
+h = .02  # 网格中的步长
+
+logreg = linear_model.LogisticRegression(C=1e5)
+
+# 我们创建了一个 Neighbours Classifier 的实例，并拟合数据。
+logreg.fit(X, Y)
+
+# 绘制决策边界。为此我们将为网格 [x_min, x_max]x[y_min, y_max] 中的每个点分配一个颜色。
+x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
+y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
+xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
+Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
+
+# 将结果放入彩色图中
+Z = Z.reshape(xx.shape)
+plt.figure(1, figsize=(4, 3))
+plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
+
+# 将训练点也同样放入彩色图中
+plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
+plt.xlabel('Sepal length')
+plt.ylabel('Sepal width')
+
+plt.xlim(xx.min(), xx.max())
+plt.ylim(yy.min(), yy.max())
+plt.xticks(())
+plt.yticks(())
+
+plt.show()
+
+# Logistic function 逻辑回归函数
+# 这个类似于咱们之前讲解 logistic 回归的 Sigmoid 函数，模拟的阶跃函数
+
+'''
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn import linear_model
+
+# 这是我们的测试集，它只是一条直线，带有一些高斯噪声。
+xmin, xmax = -5, 5
+n_samples = 100
+np.random.seed(0)
+X = np.random.normal(size=n_samples)
+y = (X > 0).astype(np.float)
+X[X > 0] *= 4
+X += .3 * np.random.normal(size=n_samples)
+
+X = X[:, np.newaxis]
+# 运行分类器
+clf = linear_model.LogisticRegression(C=1e5)
+clf.fit(X, y)
+
+# 并且画出我们的结果
+plt.figure(1, figsize=(4, 3))
+plt.clf()
+plt.scatter(X.ravel(), y, color='black', zorder=20)
+X_test = np.linspace(-5, 10, 300)
+
+
+def model(x):
+    return 1 / (1 + np.exp(-x))
+loss = model(X_test * clf.coef_ + clf.intercept_).ravel()
+plt.plot(X_test, loss, color='red', linewidth=3)
+
+ols = linear_model.LinearRegression()
+ols.fit(X, y)
+plt.plot(X_test, ols.coef_ * X_test + ols.intercept_, linewidth=1)
+plt.axhline(.5, color='.5')
+
+plt.ylabel('y')
+plt.xlabel('X')
+plt.xticks(range(-5, 10))
+plt.yticks([0, 0.5, 1])
+plt.ylim(-.25, 1.25)
+plt.xlim(-4, 10)
+plt.legend(('Logistic Regression Model', 'Linear Regression Model'),
+           loc="lower right", fontsize='small')
+plt.show()
+'''
+
+
+
diff --git a/src/python/8.Predictive numerical data regression/regression.py b/src/python/8.Predictive numerical data regression/regression.py
index 4e6e0884..59304c5a 100644
--- a/src/python/8.Predictive numerical data regression/regression.py	
+++ b/src/python/8.Predictive numerical data regression/regression.py	
@@ -4,7 +4,7 @@
 '''
 Created on Jan 8, 2011
 Update  on 2017-05-18
-@author: Peter Harrington/ApacheCN-小瑶
+@author: Peter Harrington/小瑶
 《机器学习实战》更新地址：https://github.com/apachecn/MachineLearning
 '''
 
@@ -12,89 +12,214 @@ Update  on 2017-05-18
 from numpy import *
 import matplotlib.pylab as plt
 
-def loadDataSet(fileName):                 # 解析以tab键分隔的文件中的浮点数
+def loadDataSet(fileName):                 
     """ 加载数据
+        解析以tab键分隔的文件中的浮点数
     Returns：
-        dataMat  feature 对应的数据集
-        labelMat feature 对应的分类标签，即类别标签
+        dataMat ：  feature 对应的数据集
+        labelMat ： feature 对应的分类标签，即类别标签
 
     """
-    numFeat = len(open(fileName).readline().split('\t')) - 1 # 获得每一行的输入数据，最后一个代表真实值 
-    dataMat = []; labelMat = []
+    # 获取样本特征的总数，不算最后的目标变量 
+    numFeat = len(open(fileName).readline().split('\t')) - 1 
+    dataMat = []
+    labelMat = []
     fr = open(fileName)
-    for line in fr.readlines():            # 读取每一行
+    for line in fr.readlines():
+        # 读取每一行
         lineArr =[]
-        curLine = line.strip().split('\t') # 删除一行中以tab分隔的数据前后的空白符号
-        for i in range(numFeat):           # 从0到2，不包括2
-            lineArr.append(float(curLine[i]))# 将数据添加到lineArr List中，每一行数据测试数据组成一个行向量
-        dataMat.append(lineArr)            # 将测试数据的输入数据部分存储到dataMat矩阵中
-        labelMat.append(float(curLine[-1]))# 将每一行的最后一个数据，即真实的目标变量存储到labelMat矩阵中
+        # 删除一行中以tab分隔的数据前后的空白符号
+        curLine = line.strip().split('\t')
+        # i 从0到2，不包括2 
+        for i in range(numFeat):
+            # 将数据添加到lineArr List中，每一行数据测试数据组成一个行向量           
+            lineArr.append(float(curLine[i]))
+            # 将测试数据的输入数据部分存储到dataMat 的List中
+        dataMat.append(lineArr)
+        # 将每一行的最后一个数据，即类别，或者叫目标变量存储到labelMat List中
+        labelMat.append(float(curLine[-1]))
     return dataMat,labelMat
 
-def standRegres(xArr,yArr):               # 线性回归
-    xMat = mat(xArr); yMat = mat(yArr).T  # mat()函数将xArr，yArr转换为矩阵
-    xTx = xMat.T*xMat                     # 矩阵乘法的条件是左矩阵的列数等于右矩阵的行数
-    if linalg.det(xTx) == 0.0:            # 因为要用到xTx的逆矩阵，所以事先需要确定计算得到的xTx是否可逆，条件是矩阵的行列式不为0
-        print ("This matrix is singular, cannot do inverse")
+def standRegres(xArr,yArr):
+    '''
+    Description：
+        线性回归
+    Args:
+        xArr ：输入的样本数据，包含每个样本数据的 feature
+        yArr ：对应于输入数据的类别标签，也就是每个样本对应的目标变量
+    Returns:
+        ws：回归系数
+    '''
+
+    # mat()函数将xArr，yArr转换为矩阵 mat().T 代表的是对矩阵进行转置操作
+    xMat = mat(xArr)
+    yMat = mat(yArr).T
+    # 矩阵乘法的条件是左矩阵的列数等于右矩阵的行数
+    xTx = xMat.T*xMat
+    # 因为要用到xTx的逆矩阵，所以事先需要确定计算得到的xTx是否可逆，条件是矩阵的行列式不为0
+    # linalg.det() 函数是用来求得矩阵的行列式的，如果矩阵的行列式为0，则这个矩阵是不可逆的，就无法进行接下来的运算                   
+    if linalg.det(xTx) == 0.0:
+        print "This matrix is singular, cannot do inverse" 
         return
     # 最小二乘法
     # http://www.apache.wiki/pages/viewpage.action?pageId=5505133
-    ws = xTx.I * (xMat.T*yMat)            # 书中的公式，求得w的最优解
+    # 书中的公式，求得w的最优解
+    ws = xTx.I * (xMat.T*yMat)            
     return ws
 
-def lwlr(testPoint,xArr,yArr,k=1.0):      # 局部加权线性回归
-    xMat = mat(xArr); yMat = mat(yArr).T
-    m = shape(xMat)[0]                    # 获得xMat矩阵的行数
-    weights = mat(eye((m)))               # eye()返回一个对角线元素为1，其他元素为0的二维数组，创建权重矩阵
-    for j in range(m):                      # 下面两行创建权重矩阵
-        diffMat = testPoint - xMat[j,:]     # 遍历数据集，计算每个样本点对应的权重值
-        weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))#k控制衰减的速度
+    # 局部加权线性回归
+def lwlr(testPoint,xArr,yArr,k=1.0):
+    '''
+        Description：
+            局部加权线性回归，在待预测点附近的每个点赋予一定的权重，在子集上基于最小均方差来进行普通的回归。
+        Args：
+            testPoint：样本点
+            xArr：样本的特征数据，即 feature
+            yArr：每个样本对应的类别标签，即目标变量
+            k:关于赋予权重矩阵的核的一个参数，与权重的衰减速率有关
+        Returns:
+            testPoint * ws：数据点与具有权重的系数相乘得到的预测点
+        Notes:
+            这其中会用到计算权重的公式，w = e^((x^((i))-x) / -2k^2)
+            理解：x为某个预测点，x^((i))为样本点，样本点距离预测点越近，贡献的误差越大（权值越大），越远则贡献的误差越小（权值越小）。
+            关于预测点的选取，在我的代码中取的是样本点。其中k是带宽参数，控制w（钟形函数）的宽窄程度，类似于高斯函数的标准差。
+            算法思路：假设预测点取样本点中的第i个样本点（共m个样本点），遍历1到m个样本点（含第i个），算出每一个样本点与预测点的距离，
+            也就可以计算出每个样本贡献误差的权值，可以看出w是一个有m个元素的向量（写成对角阵形式）。
+    '''
+    # mat() 函数是将array转换为矩阵的函数， mat().T 是转换为矩阵之后，再进行转置操作
+    xMat = mat(xArr)
+    yMat = mat(yArr).T
+    # 获得xMat矩阵的行数
+    m = shape(xMat)[0]
+    # eye()返回一个对角线元素为1，其他元素为0的二维数组，创建权重矩阵weights，该矩阵为每个样本点初始化了一个权重                   
+    weights = mat(eye((m)))
+    for j in range(m):
+        # testPoint 的形式是 一个行向量的形式
+        # 计算 testPoint 与输入样本点之间的距离，然后下面计算出每个样本贡献误差的权值
+        diffMat = testPoint - xMat[j,:]
+        # k控制衰减的速度
+        weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))
+    # 根据矩阵乘法计算 xTx ，其中的 weights 矩阵是样本点对应的权重矩阵
     xTx = xMat.T * (weights * xMat)
     if linalg.det(xTx) == 0.0:
         print ("This matrix is singular, cannot do inverse")
         return
-    ws = xTx.I * (xMat.T * (weights * yMat)) # 计算出回归系数的一个估计
+    # 计算出回归系数的一个估计
+    ws = xTx.I * (xMat.T * (weights * yMat))
     return testPoint * ws
 
-def lwlrTest(testArr,xArr,yArr,k=1.0):  # 循环所有的数据点，并将lwlr运用于所有的数据点
+def lwlrTest(testArr,xArr,yArr,k=1.0):
+    '''
+        Description：
+            测试局部加权线性回归，对数据集中每个点调用 lwlr() 函数
+        Args：
+            testArr：测试所用的所有样本点
+            xArr：样本的特征数据，即 feature
+            yArr：每个样本对应的类别标签，即目标变量
+            k：控制核函数的衰减速率
+        Returns：
+            yHat：预测点的估计值
+    '''
+    # 得到样本点的总数
     m = shape(testArr)[0]
+    # 构建一个全部都是 0 的 1 * m 的矩阵
     yHat = zeros(m)
+    # 循环所有的数据点，并将lwlr运用于所有的数据点 
     for i in range(m):
         yHat[i] = lwlr(testArr[i],xArr,yArr,k)
+    # 返回估计值
     return yHat
 
-def lwlrTestPlot(xArr,yArr,k=1.0):  # 首先将 X 排序，其余的都与lwlrTest相同，这样更容易绘图
-    yHat = zeros(shape(yArr))       
+def lwlrTestPlot(xArr,yArr,k=1.0):  
+    '''
+        Description:
+            首先将 X 排序，其余的都与lwlrTest相同，这样更容易绘图
+        Args：
+            xArr：样本的特征数据，即 feature
+            yArr：每个样本对应的类别标签，即目标变量，实际值
+            k：控制核函数的衰减速率的有关参数，这里设定的是常量值 1
+        Return：
+            yHat：样本点的估计值
+            xCopy：xArr的复制
+    '''
+    # 生成一个与目标变量数目相同的 0 向量
+    yHat = zeros(shape(yArr))
+    # 将 xArr 转换为 矩阵形式
     xCopy = mat(xArr)
+    # 排序
     xCopy.sort(0)
+    # 开始循环，为每个样本点进行局部加权线性回归，得到最终的目标变量估计值
     for i in range(shape(xArr)[0]):
         yHat[i] = lwlr(xCopy[i],xArr,yArr,k)
     return yHat,xCopy
 
-def rssError(yArr,yHatArr): # yArr 和 yHatArr 两者都需要是数组
+def rssError(yArr,yHatArr):
+    '''
+        Desc:
+            计算分析预测误差的大小
+        Args:
+            yArr：真实的目标变量
+            yHatArr：预测得到的估计值
+        Returns:
+            计算真实值和估计值得到的值的平方和作为最后的返回值
+    '''
     return ((yArr-yHatArr)**2).sum()
 
-def ridgeRegres(xMat,yMat,lam=0.2):  # 岭回归
+def ridgeRegres(xMat,yMat,lam=0.2):
+    '''
+        Desc：
+            这个函数实现了给定 lambda 下的岭回归求解。
+            如果数据的特征比样本点还多，就不能再使用上面介绍的的线性回归和局部现行回归了，因为计算 (xTx)^(-1)会出现错误。
+            如果特征比样本点还多（n > m），也就是说，输入数据的矩阵x不是满秩矩阵。非满秩矩阵在求逆时会出现问题。
+            为了解决这个问题，我们下边讲一下：岭回归，这是我们要讲的第一种缩减方法。
+        Args：
+            xMat：样本的特征数据，即 feature
+            yMat：每个样本对应的类别标签，即目标变量，实际值
+            lam：引入的一个λ值，使得矩阵非奇异
+        Returns：
+            经过岭回归公式计算得到的回归系数
+    '''
+
     xTx = xMat.T*xMat
-    denom = xTx + eye(shape(xMat)[1])*lam   # 按照书上的公式计算计算回归系数
-    if linalg.det(denom) == 0.0:            # 检查行列式是否为零，即矩阵是否可逆
+    # 岭回归就是在矩阵 xTx 上加一个 λI 从而使得矩阵非奇异，进而能对 xTx + λI 求逆
+    denom = xTx + eye(shape(xMat)[1])*lam
+    # 检查行列式是否为零，即矩阵是否可逆，行列式为0的话就不可逆，不为0的话就是可逆。
+    if linalg.det(denom) == 0.0:
         print ("This matrix is singular, cannot do inverse")
         return
     ws = denom.I * (xMat.T*yMat)
     return ws
 
 def ridgeTest(xArr,yArr):
-    xMat = mat(xArr); yMat=mat(yArr).T
-    yMean = mean(yMat,0)    # 计算Y均值
-    yMat = yMat - yMean     # Y的所有的特征减去均值
-                            # 标准化 x
-    xMeans = mean(xMat,0)   # X计算平均值
-    xVar = var(xMat,0)      # 然后计算 X的方差
+    '''
+        Desc：
+            函数 ridgeTest() 用于在一组 λ 上测试结果
+        Args：
+            xArr：样本数据的特征，即 feature
+            yArr：样本数据的类别标签，即真实数据
+        Returns：
+            wMat：将所有的回归系数输出到一个矩阵并返回
+    '''
+
+    xMat = mat(xArr)
+    yMat=mat(yArr).T
+    # 计算Y的均值
+    yMean = mean(yMat,0)
+    # Y的所有的特征减去均值
+    yMat = yMat - yMean
+    # 标准化 x，计算 xMat 平均值
+    xMeans = mean(xMat,0)
+    # 然后计算 X的方差
+    xVar = var(xMat,0)
+    # 所有特征都减去各自的均值并除以方差
     xMat = (xMat - xMeans)/xVar
+    # 可以在 30 个不同的 lambda 下调用 ridgeRegres() 函数。
     numTestPts = 30
-    wMat = zeros((numTestPts,shape(xMat)[1]))# 创建30 * m 的全部数据为0 的矩阵
+    # 创建30 * m 的全部数据为0 的矩阵
+    wMat = zeros((numTestPts,shape(xMat)[1]))
     for i in range(numTestPts):
-        ws = ridgeRegres(xMat,yMat,exp(i-10))# exp返回e^x
+        # exp() 返回 e^x 
+        ws = ridgeRegres(xMat,yMat,exp(i-10))
         wMat[i,:]=ws.T
     return wMat