From 0c43981c768645542bd9e039deb6163eece0b04b Mon Sep 17 00:00:00 2001
From: yangjifei <1714004716@qq.com>
Date: Sat, 4 Mar 2017 20:38:51 +0800
Subject: [PATCH 1/5] add the regression code of python
---
.idea/MachineLearning.iml | 13 +++
.idea/misc.xml | 4 +
.idea/modules.xml | 8 ++
.idea/vcs.xml | 6 ++
docs/5.Logistic回归.md | 43 ++++++--
.../com/apachecn/logistic/logRegression.py | 97 +++++++++++++++++
.../05.Logistic/test/test_logRegression.py | 49 +++++++++
testData/testSet.txt | 100 ++++++++++++++++++
8 files changed, 311 insertions(+), 9 deletions(-)
create mode 100644 .idea/MachineLearning.iml
create mode 100644 .idea/misc.xml
create mode 100644 .idea/modules.xml
create mode 100644 .idea/vcs.xml
create mode 100644 src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py
create mode 100644 src/python/05.Logistic/test/test_logRegression.py
create mode 100644 testData/testSet.txt
diff --git a/.idea/MachineLearning.iml b/.idea/MachineLearning.iml
new file mode 100644
index 00000000..eeeea0a4
--- /dev/null
+++ b/.idea/MachineLearning.iml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 00000000..0974871b
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 00000000..a35ae91e
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 00000000..94a25f7f
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/5.Logistic回归.md b/docs/5.Logistic回归.md
index e049ec53..a3da78b3 100644
--- a/docs/5.Logistic回归.md
+++ b/docs/5.Logistic回归.md
@@ -1,11 +1,36 @@
-# 1) ع
+# 5) 逻辑回归基础
- * ع(Logistic Regression)
- * 1.1
- * 1.2 ˵ʾ
- * 1.3 ж߽
- * 1.4 ۺ
- * 1.5 ijɱݶ½
- * 1.6 Ż
- * 1.7 ࣺһ
\ No newline at end of file
+ * 逻辑回归(Logistic Regression)
+ * 5.1 分类问题
+ * 在分类问题中,尝试预测的是结果是否属于某一个类(例如正确或错误)。
+ * 分类问题的例子有:
+ * 判断一封电子邮件是否是垃圾邮件;
+ * 判断一次金融交易是否是欺诈等等。
+ * 从二元的分类问题开始讨论:
+ 将因变量(dependant variable)可能属于的两个类分别称为负向类(negative class)和正向类(positive class),则因变量
+ y属于{0,1}
+ 注:其中 0 表示负向类,1 表示正向类。
+ * 5.2 假说表示
+
+ * 5.3 判定边界
+ * 在逻辑回归中,我们预测:
+ 当 hθ 大于等于 0.5 时,预测 y=1
+ 当 hθ 小于 0.5 时,预测 y=0
+ * 根据上面绘制出的 S 形函数图像,我们知道当
+ z=0时 ,g(z)=0.5
+ z>0时 ,g(z)>0.5
+ z<0时 ,g(z)<0.5
+ 又z=θ的T次方与X的积,即:
+ z大于等于0时,预测:y=1
+ z小于0时,预测:y=0
+ * 现在假设我们有一个模型:Hθ(x)=g(θ0+θ1*x1+θ2*x2)
+ 并且参数θ是向量[-3 1 1]。则当-3+x1+x2大于等于0,即x1+x2大于等于3时,模型将预测y=1。
+ 我们可以绘制直线x1+x2=3,这条线便是我们模型的分界线,将预测为1的区域和预测为0的区域分隔开。
+ * 假使我们的数据呈现这样的分布情况,怎样的模型才能适合呢?
+ 因为需要用曲线才能分隔 y=0 的区域和 y=1 的区域,我们需要二次方特征: 假设参数是Hθ(x)=g(θ0+θ1*x1+θ2*x2+θ3*(x1^2)+θ4*(x2^2)+θ4*(x2^2))
+ 是[-1 0 0 1 1],则我们得到的判定边界恰好是圆点在原点且半径为 1 的圆形。可以用非常复杂的模型来适应非常复杂形状的判定边界。
+ * 5.4 代价函数
+ * 5.5 简化的成本函数和梯度下降
+ * 5.6 高级优化
+ * 5.7 多类分类:一个对所有
diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py b/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py
new file mode 100644
index 00000000..e20c6440
--- /dev/null
+++ b/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# encoding: utf-8
+from numpy import *
+import matplotlib.pyplot as plt
+import time
+
+
+"""
+@version:
+@author: yangjf
+@license: ApacheCN
+@contact: highfei2011@126.com
+@site: https://github.com/apachecn/MachineLearning
+@software: PyCharm
+@file: logRegression01.py
+@time: 2017/3/3 22:03
+@test result:not pass
+"""
+
+# sigmoid函数
+def sigmoid(inX):
+ return 1.0 / (1 + exp(-inX))
+
+def trainLogRegres(train_x, train_y, opts):
+ # 计算训练时间
+ startTime = time.time()
+
+ numSamples, numFeatures = shape(train_x)
+ alpha = opts['alpha']; maxIter = opts['maxIter']
+ weights = ones((numFeatures, 1))
+
+ # 通过梯度下降算法优化
+ for k in range(maxIter):
+ if opts['optimizeType'] == 'gradDescent': # 梯度下降算法
+ output = sigmoid(train_x * weights)
+ error = train_y - output
+ weights = weights + alpha * train_x.transpose() * error
+ elif opts['optimizeType'] == 'stocGradDescent': # 随机梯度下降
+ for i in range(numSamples):
+ output = sigmoid(train_x[i, :] * weights)
+ error = train_y[i, 0] - output
+ weights = weights + alpha * train_x[i, :].transpose() * error
+ elif opts['optimizeType'] == 'smoothStocGradDescent': # 光滑随机梯度下降
+ # 随机选择样本以优化以减少周期波动
+ dataIndex = range(numSamples)
+ for i in range(numSamples):
+ alpha = 4.0 / (1.0 + k + i) + 0.01
+ randIndex = int(random.uniform(0, len(dataIndex)))
+ output = sigmoid(train_x[randIndex, :] * weights)
+ error = train_y[randIndex, 0] - output
+ weights = weights + alpha * train_x[randIndex, :].transpose() * error
+ del(dataIndex[randIndex]) # 在一次交互期间,删除优化的样品
+ else:
+ raise NameError('Not support optimize method type!')
+
+
+ print 'Congratulations, training complete! Took %fs!' % (time.time() - startTime)
+ return weights
+
+
+#测试给定测试集的训练Logistic回归模型
+def testLogRegres(weights, test_x, test_y):
+ numSamples, numFeatures = shape(test_x)
+ matchCount = 0
+ for i in xrange(numSamples):
+ predict = sigmoid(test_x[i, :] * weights)[0, 0] > 0.5
+ if predict == bool(test_y[i, 0]):
+ matchCount += 1
+ accuracy = float(matchCount) / numSamples
+ return accuracy
+
+
+# 显示你的训练逻辑回归模型只有2-D数据可用
+def showLogRegres(weights, train_x, train_y):
+ # 注意:train_x和train_y是垫数据类型
+ numSamples, numFeatures = shape(train_x)
+ if numFeatures != 3:
+ print "抱歉! 我不能绘制,因为你的数据的维度不是2!"
+ return 1
+
+ # 画出所有抽样数据
+ for i in xrange(numSamples):
+ if int(train_y[i, 0]) == 0:
+ plt.plot(train_x[i, 1], train_x[i, 2], 'or')
+ elif int(train_y[i, 0]) == 1:
+ plt.plot(train_x[i, 1], train_x[i, 2], 'ob')
+
+ # 画图操作
+ min_x = min(train_x[:, 1])[0, 0]
+ max_x = max(train_x[:, 1])[0, 0]
+ weights = weights.getA() # 将mat转换为数组
+ y_min_x = float(-weights[0] - weights[1] * min_x) / weights[2]
+ y_max_x = float(-weights[0] - weights[1] * max_x) / weights[2]
+ plt.plot([min_x, max_x], [y_min_x, y_max_x], '-g')
+ plt.xlabel('X1'); plt.ylabel('X2')
+ #显示图像
+ plt.show()
\ No newline at end of file
diff --git a/src/python/05.Logistic/test/test_logRegression.py b/src/python/05.Logistic/test/test_logRegression.py
new file mode 100644
index 00000000..d6a8f707
--- /dev/null
+++ b/src/python/05.Logistic/test/test_logRegression.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+# encoding: utf-8
+import sys
+sys.path.append("C:\Python27")
+
+from numpy import *
+import matplotlib.pyplot as plt
+from core.com.apachcn.logistic import logRegression
+
+"""
+@version:
+@author: yangjf
+@license: ApacheCN
+@contact: highfei2011@126.com
+@site: https://github.com/apachecn/MachineLearning
+@software: PyCharm
+@file: test_logRegression.py
+@time: 2017/3/3 22:09
+"""
+
+def loadData():
+ train_x = []
+ train_y = []
+ fileIn = open('testData/testSet.txt')
+ for line in fileIn.readlines():
+ lineArr = line.strip().split()
+ train_x.append([1.0, float(lineArr[0]), float(lineArr[1])])
+ train_y.append(float(lineArr[2]))
+ return mat(train_x), mat(train_y).transpose()
+
+
+##第一步: 加载数据
+print "step 1: load data..."
+train_x, train_y = loadData()
+test_x = train_x; test_y = train_y
+
+##第二步: 训练数据...
+print "step 2: training..."
+opts = {'alpha': 0.01, 'maxIter': 20, 'optimizeType': 'smoothStocGradDescent'}
+optimalWeights = trainLogRegres(train_x, train_y, opts)
+
+##第三步: 测试
+print "step 3: testing..."
+accuracy = testLogRegres(optimalWeights, test_x, test_y)
+
+##第四步: 显示结果
+print "step 4: show the result..."
+print 'The classify accuracy is: %.3f%%' % (accuracy * 100)
+showLogRegres(optimalWeights, train_x, train_y)
\ No newline at end of file
diff --git a/testData/testSet.txt b/testData/testSet.txt
new file mode 100644
index 00000000..2356ac54
--- /dev/null
+++ b/testData/testSet.txt
@@ -0,0 +1,100 @@
+-0.017612 14.053064 0
+-1.395634 4.662541 1
+-0.752157 6.538620 0
+-1.322371 7.152853 0
+0.423363 11.054677 0
+0.406704 7.067335 1
+0.667394 12.741452 0
+-2.460150 6.866805 1
+0.569411 9.548755 0
+-0.026632 10.427743 0
+0.850433 6.920334 1
+1.347183 13.175500 0
+1.176813 3.167020 1
+-1.781871 9.097953 0
+-0.566606 5.749003 1
+0.931635 1.589505 1
+-0.024205 6.151823 1
+-0.036453 2.690988 1
+-0.196949 0.444165 1
+1.014459 5.754399 1
+1.985298 3.230619 1
+-1.693453 -0.557540 1
+-0.576525 11.778922 0
+-0.346811 -1.678730 1
+-2.124484 2.672471 1
+1.217916 9.597015 0
+-0.733928 9.098687 0
+-3.642001 -1.618087 1
+0.315985 3.523953 1
+1.416614 9.619232 0
+-0.386323 3.989286 1
+0.556921 8.294984 1
+1.224863 11.587360 0
+-1.347803 -2.406051 1
+1.196604 4.951851 1
+0.275221 9.543647 0
+0.470575 9.332488 0
+-1.889567 9.542662 0
+-1.527893 12.150579 0
+-1.185247 11.309318 0
+-0.445678 3.297303 1
+1.042222 6.105155 1
+-0.618787 10.320986 0
+1.152083 0.548467 1
+0.828534 2.676045 1
+-1.237728 10.549033 0
+-0.683565 -2.166125 1
+0.229456 5.921938 1
+-0.959885 11.555336 0
+0.492911 10.993324 0
+0.184992 8.721488 0
+-0.355715 10.325976 0
+-0.397822 8.058397 0
+0.824839 13.730343 0
+1.507278 5.027866 1
+0.099671 6.835839 1
+-0.344008 10.717485 0
+1.785928 7.718645 1
+-0.918801 11.560217 0
+-0.364009 4.747300 1
+-0.841722 4.119083 1
+0.490426 1.960539 1
+-0.007194 9.075792 0
+0.356107 12.447863 0
+0.342578 12.281162 0
+-0.810823 -1.466018 1
+2.530777 6.476801 1
+1.296683 11.607559 0
+0.475487 12.040035 0
+-0.783277 11.009725 0
+0.074798 11.023650 0
+-1.337472 0.468339 1
+-0.102781 13.763651 0
+-0.147324 2.874846 1
+0.518389 9.887035 0
+1.015399 7.571882 0
+-1.658086 -0.027255 1
+1.319944 2.171228 1
+2.056216 5.019981 1
+-0.851633 4.375691 1
+-1.510047 6.061992 0
+-1.076637 -3.181888 1
+1.821096 10.283990 0
+3.010150 8.401766 1
+-1.099458 1.688274 1
+-0.834872 -1.733869 1
+-0.846637 3.849075 1
+1.400102 12.628781 0
+1.752842 5.468166 1
+0.078557 0.059736 1
+0.089392 -0.715300 1
+1.825662 12.693808 0
+0.197445 9.744638 0
+0.126117 0.922311 1
+-0.679797 1.220530 1
+0.677983 2.556666 1
+0.761349 10.693862 0
+-2.168791 0.143632 1
+1.388610 9.341997 0
+0.317029 14.739025 0
\ No newline at end of file
From c10d797c5807995405eeb7e22f0f014e186d41c0 Mon Sep 17 00:00:00 2001
From: yangjifei <1714004716@qq.com>
Date: Sat, 4 Mar 2017 22:21:24 +0800
Subject: [PATCH 2/5] update some code
---
.idea/MachineLearning.iml | 2 +-
src/__init__.py | 28 +++++++++++++++++++
src/python/03.DecisionTree/DTSklearn.py | 6 ++++
src/python/05.Logistic/__init__.py | 28 +++++++++++++++++++
src/python/05.Logistic/core/__init__.py | 28 +++++++++++++++++++
src/python/05.Logistic/core/com/__init__.py | 28 +++++++++++++++++++
.../05.Logistic/core/com/apachecn/__init__.py | 28 +++++++++++++++++++
.../core/com/apachecn/logistic/__init__.py | 28 +++++++++++++++++++
.../com/apachecn/logistic/logRegression.py | 8 +++++-
.../apachecn/logistic}/test_logRegression.py | 18 ++++++++----
src/python/05.Logistic/test/__init__.py | 27 ++++++++++++++++++
src/python/Logistic.py | 2 +-
src/python/__init__.py | 28 +++++++++++++++++++
13 files changed, 251 insertions(+), 8 deletions(-)
create mode 100644 src/__init__.py
create mode 100644 src/python/05.Logistic/__init__.py
create mode 100644 src/python/05.Logistic/core/__init__.py
create mode 100644 src/python/05.Logistic/core/com/__init__.py
create mode 100644 src/python/05.Logistic/core/com/apachecn/__init__.py
create mode 100644 src/python/05.Logistic/core/com/apachecn/logistic/__init__.py
rename src/python/05.Logistic/{test => core/com/apachecn/logistic}/test_logRegression.py (71%)
create mode 100644 src/python/05.Logistic/test/__init__.py
create mode 100644 src/python/__init__.py
diff --git a/.idea/MachineLearning.iml b/.idea/MachineLearning.iml
index eeeea0a4..bd0813dc 100644
--- a/.idea/MachineLearning.iml
+++ b/.idea/MachineLearning.iml
@@ -2,7 +2,7 @@
-
+
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 00000000..b69024ca
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+
+"""
+@version:
+@author: yangjf
+@license: ApacheCN
+@contact: highfei2011@126.com
+@site: https://github.com/apachecn/MachineLearning
+@software: PyCharm
+@file: __init__.py.py
+@time: 2017/3/4 21:34
+@test result:pass
+"""
+
+
+def func():
+ pass
+
+
+class Main():
+ def __init__(self):
+ pass
+
+
+if __name__ == '__main__':
+ pass
\ No newline at end of file
diff --git a/src/python/03.DecisionTree/DTSklearn.py b/src/python/03.DecisionTree/DTSklearn.py
index 5155b214..2629ef73 100644
--- a/src/python/03.DecisionTree/DTSklearn.py
+++ b/src/python/03.DecisionTree/DTSklearn.py
@@ -6,6 +6,12 @@ from sklearn import tree
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split
+"""
+需要安装依赖模块:
+pip install scikit_learn-0.18-cp27-cp27m-win_amd64.whl
+非常完整的网址:
+http://www.lfd.uci.edu/~gohlke/pythonlibs/#numpy
+"""
def createDataSet():
diff --git a/src/python/05.Logistic/__init__.py b/src/python/05.Logistic/__init__.py
new file mode 100644
index 00000000..b69024ca
--- /dev/null
+++ b/src/python/05.Logistic/__init__.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+
+"""
+@version:
+@author: yangjf
+@license: ApacheCN
+@contact: highfei2011@126.com
+@site: https://github.com/apachecn/MachineLearning
+@software: PyCharm
+@file: __init__.py.py
+@time: 2017/3/4 21:34
+@test result:pass
+"""
+
+
+def func():
+ pass
+
+
+class Main():
+ def __init__(self):
+ pass
+
+
+if __name__ == '__main__':
+ pass
\ No newline at end of file
diff --git a/src/python/05.Logistic/core/__init__.py b/src/python/05.Logistic/core/__init__.py
new file mode 100644
index 00000000..d294a8ba
--- /dev/null
+++ b/src/python/05.Logistic/core/__init__.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+
+"""
+@version:
+@author: yangjf
+@license: ApacheCN
+@contact: highfei2011@126.com
+@site: https://github.com/apachecn/MachineLearning
+@software: PyCharm
+@file: __init__.py.py
+@time: 2017/3/4 21:28
+@test result:pass
+"""
+
+
+def func():
+ pass
+
+
+class Main():
+ def __init__(self):
+ pass
+
+
+if __name__ == '__main__':
+ pass
\ No newline at end of file
diff --git a/src/python/05.Logistic/core/com/__init__.py b/src/python/05.Logistic/core/com/__init__.py
new file mode 100644
index 00000000..d294a8ba
--- /dev/null
+++ b/src/python/05.Logistic/core/com/__init__.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+
+"""
+@version:
+@author: yangjf
+@license: ApacheCN
+@contact: highfei2011@126.com
+@site: https://github.com/apachecn/MachineLearning
+@software: PyCharm
+@file: __init__.py.py
+@time: 2017/3/4 21:28
+@test result:pass
+"""
+
+
+def func():
+ pass
+
+
+class Main():
+ def __init__(self):
+ pass
+
+
+if __name__ == '__main__':
+ pass
\ No newline at end of file
diff --git a/src/python/05.Logistic/core/com/apachecn/__init__.py b/src/python/05.Logistic/core/com/apachecn/__init__.py
new file mode 100644
index 00000000..d294a8ba
--- /dev/null
+++ b/src/python/05.Logistic/core/com/apachecn/__init__.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+
+"""
+@version:
+@author: yangjf
+@license: ApacheCN
+@contact: highfei2011@126.com
+@site: https://github.com/apachecn/MachineLearning
+@software: PyCharm
+@file: __init__.py.py
+@time: 2017/3/4 21:28
+@test result:pass
+"""
+
+
+def func():
+ pass
+
+
+class Main():
+ def __init__(self):
+ pass
+
+
+if __name__ == '__main__':
+ pass
\ No newline at end of file
diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py b/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py
new file mode 100644
index 00000000..d294a8ba
--- /dev/null
+++ b/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+
+"""
+@version:
+@author: yangjf
+@license: ApacheCN
+@contact: highfei2011@126.com
+@site: https://github.com/apachecn/MachineLearning
+@software: PyCharm
+@file: __init__.py.py
+@time: 2017/3/4 21:28
+@test result:pass
+"""
+
+
+def func():
+ pass
+
+
+class Main():
+ def __init__(self):
+ pass
+
+
+if __name__ == '__main__':
+ pass
\ No newline at end of file
diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py b/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py
index e20c6440..caa026be 100644
--- a/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py
+++ b/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py
@@ -3,7 +3,13 @@
from numpy import *
import matplotlib.pyplot as plt
import time
+'''
+1、需要安装模块:pip install matplotlib-1.5.0-cp27-none-win_amd64.whl
+由于直接安装会出现问题,所以建议下载whl包进行安装,下载网址:
+https://pypi.python.org/pypi/matplotlib/1.5.0
+2、可以看见画出的图像
+'''
"""
@version:
@@ -14,7 +20,7 @@ import time
@software: PyCharm
@file: logRegression01.py
@time: 2017/3/3 22:03
-@test result:not pass
+@test result: ok
"""
# sigmoid函数
diff --git a/src/python/05.Logistic/test/test_logRegression.py b/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py
similarity index 71%
rename from src/python/05.Logistic/test/test_logRegression.py
rename to src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py
index d6a8f707..31cf0a57 100644
--- a/src/python/05.Logistic/test/test_logRegression.py
+++ b/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py
@@ -1,12 +1,14 @@
#!/usr/bin/env python
# encoding: utf-8
+import os
import sys
sys.path.append("C:\Python27")
from numpy import *
-import matplotlib.pyplot as plt
-from core.com.apachcn.logistic import logRegression
+import matplotlib.pyplot as plt
+
+from logRegression import *
"""
@version:
@author: yangjf
@@ -16,12 +18,18 @@ from core.com.apachcn.logistic import logRegression
@software: PyCharm
@file: test_logRegression.py
@time: 2017/3/3 22:09
+@test result: ok
"""
def loadData():
train_x = []
train_y = []
- fileIn = open('testData/testSet.txt')
+ # 获取当前文件所在路径
+ project_dir = os.getcwdu()
+ # 截取字符串至项目名:Test\
+ project_dir = project_dir[:project_dir.find("MachineLearning\\") + 15]
+ print project_dir
+ fileIn = open("%s/testData/testSet.txt" % project_dir)
for line in fileIn.readlines():
lineArr = line.strip().split()
train_x.append([1.0, float(lineArr[0]), float(lineArr[1])])
@@ -37,11 +45,11 @@ test_x = train_x; test_y = train_y
##第二步: 训练数据...
print "step 2: training..."
opts = {'alpha': 0.01, 'maxIter': 20, 'optimizeType': 'smoothStocGradDescent'}
-optimalWeights = trainLogRegres(train_x, train_y, opts)
+optimalWeights = trainLogRegres(train_x, train_y, opts)
##第三步: 测试
print "step 3: testing..."
-accuracy = testLogRegres(optimalWeights, test_x, test_y)
+accuracy = testLogRegres(optimalWeights, test_x, test_y)
##第四步: 显示结果
print "step 4: show the result..."
diff --git a/src/python/05.Logistic/test/__init__.py b/src/python/05.Logistic/test/__init__.py
new file mode 100644
index 00000000..4708358a
--- /dev/null
+++ b/src/python/05.Logistic/test/__init__.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+
+"""
+@version:
+@author: yangjf
+@license: ApacheCN
+@contact: highfei2011@126.com
+@site: https://github.com/apachecn/MachineLearning
+@software: PyCharm
+@file: __init__.py.py
+@time: 2017/3/4 21:27
+@test result:pass
+"""
+
+def func():
+ pass
+
+
+class Main():
+ def __init__(self):
+ pass
+
+
+if __name__ == '__main__':
+ pass
\ No newline at end of file
diff --git a/src/python/Logistic.py b/src/python/Logistic.py
index 82ca465c..dc6bc345 100644
--- a/src/python/Logistic.py
+++ b/src/python/Logistic.py
@@ -117,7 +117,7 @@ def plotBestFit(dataArr, labelMat, weights):
def main():
project_dir = os.path.dirname(os.path.dirname(os.getcwd()))
# 1.收集并准备数据
- dataMat, labelMat = loadDataSet("%s/resources/testSet.txt" % project_dir)
+ dataMat, labelMat = loadDataSet("%s/testData/testSet.txt" % project_dir)
# print dataMat, '---\n', labelMat
# 2.训练模型, f(x)=a1*x1+b2*x2+..+nn*xn中 (a1,b2, .., nn).T的矩阵值
diff --git a/src/python/__init__.py b/src/python/__init__.py
new file mode 100644
index 00000000..b69024ca
--- /dev/null
+++ b/src/python/__init__.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+
+"""
+@version:
+@author: yangjf
+@license: ApacheCN
+@contact: highfei2011@126.com
+@site: https://github.com/apachecn/MachineLearning
+@software: PyCharm
+@file: __init__.py.py
+@time: 2017/3/4 21:34
+@test result:pass
+"""
+
+
+def func():
+ pass
+
+
+class Main():
+ def __init__(self):
+ pass
+
+
+if __name__ == '__main__':
+ pass
\ No newline at end of file
From 12b89402eeca33a569a7b9d5f1c8960a914039ff Mon Sep 17 00:00:00 2001
From: yangjifei <1714004716@qq.com>
Date: Wed, 8 Mar 2017 18:40:42 +0800
Subject: [PATCH 3/5] add python cod
---
.gitignore | 90 -------------------
.idea/MachineLearning.iml | 13 ---
.idea/misc.xml | 4 -
.idea/modules.xml | 8 --
.idea/vcs.xml | 6 --
src/__init__.py | 28 ------
src/python/05.Logistic/__init__.py | 28 ------
src/python/05.Logistic/core/__init__.py | 28 ------
src/python/05.Logistic/core/com/__init__.py | 28 ------
.../05.Logistic/core/com/apachecn/__init__.py | 28 ------
.../core/com/apachecn/logistic/__init__.py | 28 ------
.../apachecn/logistic/test_logRegression.py | 2 +
src/python/05.Logistic/test/__init__.py | 27 ------
src/python/__init__.py | 28 ------
14 files changed, 2 insertions(+), 344 deletions(-)
delete mode 100644 .gitignore
delete mode 100644 .idea/MachineLearning.iml
delete mode 100644 .idea/misc.xml
delete mode 100644 .idea/modules.xml
delete mode 100644 .idea/vcs.xml
delete mode 100644 src/__init__.py
delete mode 100644 src/python/05.Logistic/__init__.py
delete mode 100644 src/python/05.Logistic/core/__init__.py
delete mode 100644 src/python/05.Logistic/core/com/__init__.py
delete mode 100644 src/python/05.Logistic/core/com/apachecn/__init__.py
delete mode 100644 src/python/05.Logistic/core/com/apachecn/logistic/__init__.py
delete mode 100644 src/python/05.Logistic/test/__init__.py
delete mode 100644 src/python/__init__.py
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index 6564ff4a..00000000
--- a/.gitignore
+++ /dev/null
@@ -1,90 +0,0 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-env/
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-*.egg-info/
-.installed.cfg
-*.egg
-
-# PyInstaller
-# Usually these files are written by a python script from a template
-# before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*,cover
-.hypothesis/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# IPython Notebook
-.ipynb_checkpoints
-
-# pyenv
-.python-version
-
-# celery beat schedule file
-celerybeat-schedule
-
-# dotenv
-.env
-
-# virtualenv
-venv/
-ENV/
-
-# Spyder project settings
-.spyderproject
-
-# Rope project settings
-.ropeproject
-.vscode
diff --git a/.idea/MachineLearning.iml b/.idea/MachineLearning.iml
deleted file mode 100644
index bd0813dc..00000000
--- a/.idea/MachineLearning.iml
+++ /dev/null
@@ -1,13 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index 0974871b..00000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index a35ae91e..00000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 94a25f7f..00000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/src/__init__.py b/src/__init__.py
deleted file mode 100644
index b69024ca..00000000
--- a/src/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-
-"""
-@version:
-@author: yangjf
-@license: ApacheCN
-@contact: highfei2011@126.com
-@site: https://github.com/apachecn/MachineLearning
-@software: PyCharm
-@file: __init__.py.py
-@time: 2017/3/4 21:34
-@test result:pass
-"""
-
-
-def func():
- pass
-
-
-class Main():
- def __init__(self):
- pass
-
-
-if __name__ == '__main__':
- pass
\ No newline at end of file
diff --git a/src/python/05.Logistic/__init__.py b/src/python/05.Logistic/__init__.py
deleted file mode 100644
index b69024ca..00000000
--- a/src/python/05.Logistic/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-
-"""
-@version:
-@author: yangjf
-@license: ApacheCN
-@contact: highfei2011@126.com
-@site: https://github.com/apachecn/MachineLearning
-@software: PyCharm
-@file: __init__.py.py
-@time: 2017/3/4 21:34
-@test result:pass
-"""
-
-
-def func():
- pass
-
-
-class Main():
- def __init__(self):
- pass
-
-
-if __name__ == '__main__':
- pass
\ No newline at end of file
diff --git a/src/python/05.Logistic/core/__init__.py b/src/python/05.Logistic/core/__init__.py
deleted file mode 100644
index d294a8ba..00000000
--- a/src/python/05.Logistic/core/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-
-"""
-@version:
-@author: yangjf
-@license: ApacheCN
-@contact: highfei2011@126.com
-@site: https://github.com/apachecn/MachineLearning
-@software: PyCharm
-@file: __init__.py.py
-@time: 2017/3/4 21:28
-@test result:pass
-"""
-
-
-def func():
- pass
-
-
-class Main():
- def __init__(self):
- pass
-
-
-if __name__ == '__main__':
- pass
\ No newline at end of file
diff --git a/src/python/05.Logistic/core/com/__init__.py b/src/python/05.Logistic/core/com/__init__.py
deleted file mode 100644
index d294a8ba..00000000
--- a/src/python/05.Logistic/core/com/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-
-"""
-@version:
-@author: yangjf
-@license: ApacheCN
-@contact: highfei2011@126.com
-@site: https://github.com/apachecn/MachineLearning
-@software: PyCharm
-@file: __init__.py.py
-@time: 2017/3/4 21:28
-@test result:pass
-"""
-
-
-def func():
- pass
-
-
-class Main():
- def __init__(self):
- pass
-
-
-if __name__ == '__main__':
- pass
\ No newline at end of file
diff --git a/src/python/05.Logistic/core/com/apachecn/__init__.py b/src/python/05.Logistic/core/com/apachecn/__init__.py
deleted file mode 100644
index d294a8ba..00000000
--- a/src/python/05.Logistic/core/com/apachecn/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-
-"""
-@version:
-@author: yangjf
-@license: ApacheCN
-@contact: highfei2011@126.com
-@site: https://github.com/apachecn/MachineLearning
-@software: PyCharm
-@file: __init__.py.py
-@time: 2017/3/4 21:28
-@test result:pass
-"""
-
-
-def func():
- pass
-
-
-class Main():
- def __init__(self):
- pass
-
-
-if __name__ == '__main__':
- pass
\ No newline at end of file
diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py b/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py
deleted file mode 100644
index d294a8ba..00000000
--- a/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-
-"""
-@version:
-@author: yangjf
-@license: ApacheCN
-@contact: highfei2011@126.com
-@site: https://github.com/apachecn/MachineLearning
-@software: PyCharm
-@file: __init__.py.py
-@time: 2017/3/4 21:28
-@test result:pass
-"""
-
-
-def func():
- pass
-
-
-class Main():
- def __init__(self):
- pass
-
-
-if __name__ == '__main__':
- pass
\ No newline at end of file
diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py b/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py
index 31cf0a57..c40c630e 100644
--- a/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py
+++ b/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py
@@ -6,6 +6,8 @@ sys.path.append("C:\Python27")
from numpy import *
+
+
import matplotlib.pyplot as plt
from logRegression import *
diff --git a/src/python/05.Logistic/test/__init__.py b/src/python/05.Logistic/test/__init__.py
deleted file mode 100644
index 4708358a..00000000
--- a/src/python/05.Logistic/test/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-
-"""
-@version:
-@author: yangjf
-@license: ApacheCN
-@contact: highfei2011@126.com
-@site: https://github.com/apachecn/MachineLearning
-@software: PyCharm
-@file: __init__.py.py
-@time: 2017/3/4 21:27
-@test result:pass
-"""
-
-def func():
- pass
-
-
-class Main():
- def __init__(self):
- pass
-
-
-if __name__ == '__main__':
- pass
\ No newline at end of file
diff --git a/src/python/__init__.py b/src/python/__init__.py
deleted file mode 100644
index b69024ca..00000000
--- a/src/python/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-
-"""
-@version:
-@author: yangjf
-@license: ApacheCN
-@contact: highfei2011@126.com
-@site: https://github.com/apachecn/MachineLearning
-@software: PyCharm
-@file: __init__.py.py
-@time: 2017/3/4 21:34
-@test result:pass
-"""
-
-
-def func():
- pass
-
-
-class Main():
- def __init__(self):
- pass
-
-
-if __name__ == '__main__':
- pass
\ No newline at end of file
From aad785ee666e95a87501713d2f11e25243caab2d Mon Sep 17 00:00:00 2001
From: yangjifei <1714004716@qq.com>
Date: Thu, 9 Mar 2017 08:55:59 +0800
Subject: [PATCH 4/5] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=80=BB=E8=BE=91?=
=?UTF-8?q?=E5=9B=9E=E5=BD=92=E6=A2=AF=E5=BA=A6=E4=B8=8B=E9=99=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../logistic/logRegression.py => logRegression01.py} | 0
.../{com/apachecn/logistic => }/test_logRegression.py | 9 ++-------
src/python/Logistic.py | 2 +-
src/python/apriori.py | 2 +-
testData/{testSet.txt => Logistic_testdata.txt} | 0
5 files changed, 4 insertions(+), 9 deletions(-)
rename src/python/05.Logistic/core/{com/apachecn/logistic/logRegression.py => logRegression01.py} (100%)
rename src/python/05.Logistic/core/{com/apachecn/logistic => }/test_logRegression.py (91%)
rename testData/{testSet.txt => Logistic_testdata.txt} (100%)
diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py b/src/python/05.Logistic/core/logRegression01.py
similarity index 100%
rename from src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py
rename to src/python/05.Logistic/core/logRegression01.py
diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py b/src/python/05.Logistic/core/test_logRegression.py
similarity index 91%
rename from src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py
rename to src/python/05.Logistic/core/test_logRegression.py
index c40c630e..c7d5d50d 100644
--- a/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py
+++ b/src/python/05.Logistic/core/test_logRegression.py
@@ -3,14 +3,9 @@
import os
import sys
sys.path.append("C:\Python27")
-
from numpy import *
-
-
-import matplotlib.pyplot as plt
-
-from logRegression import *
+from logRegression01 import *
"""
@version:
@author: yangjf
@@ -31,7 +26,7 @@ def loadData():
# 截取字符串至项目名:Test\
project_dir = project_dir[:project_dir.find("MachineLearning\\") + 15]
print project_dir
- fileIn = open("%s/testData/testSet.txt" % project_dir)
+ fileIn = open("%s/testData/Logistic_testdata.txt" % project_dir)
for line in fileIn.readlines():
lineArr = line.strip().split()
train_x.append([1.0, float(lineArr[0]), float(lineArr[1])])
diff --git a/src/python/Logistic.py b/src/python/Logistic.py
index dc6bc345..c59e022d 100644
--- a/src/python/Logistic.py
+++ b/src/python/Logistic.py
@@ -117,7 +117,7 @@ def plotBestFit(dataArr, labelMat, weights):
def main():
project_dir = os.path.dirname(os.path.dirname(os.getcwd()))
# 1.收集并准备数据
- dataMat, labelMat = loadDataSet("%s/testData/testSet.txt" % project_dir)
+ dataMat, labelMat = loadDataSet("%s/testData/Logistic_testdata.txt" % project_dir)
# print dataMat, '---\n', labelMat
# 2.训练模型, f(x)=a1*x1+b2*x2+..+nn*xn中 (a1,b2, .., nn).T的矩阵值
diff --git a/src/python/apriori.py b/src/python/apriori.py
index ee6af908..98112685 100644
--- a/src/python/apriori.py
+++ b/src/python/apriori.py
@@ -73,7 +73,7 @@ def apriori(dataSet, minSupport = 0.5):
def main():
# project_dir = os.path.dirname(os.path.dirname(os.getcwd()))
# 1.收集并准备数据
- # dataMat, labelMat = loadDataSet("%s/resources/testSet.txt" % project_dir)
+ # dataMat, labelMat = loadDataSet("%s/resources/Logistic_testdata.txt" % project_dir)
# 1. 加载数据
diff --git a/testData/testSet.txt b/testData/Logistic_testdata.txt
similarity index 100%
rename from testData/testSet.txt
rename to testData/Logistic_testdata.txt
From 3d21601d2cd4366638e23596ee7e37635ccc1152 Mon Sep 17 00:00:00 2001
From: yangjifei <1714004716@qq.com>
Date: Thu, 9 Mar 2017 09:07:00 +0800
Subject: [PATCH 5/5] update logistic
---
.gitignore | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 92 insertions(+)
create mode 100644 .gitignore
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..0de90e72
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,92 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+.vscode
+
+