From 0c43981c768645542bd9e039deb6163eece0b04b Mon Sep 17 00:00:00 2001 From: yangjifei <1714004716@qq.com> Date: Sat, 4 Mar 2017 20:38:51 +0800 Subject: [PATCH 1/5] add the regression code of python --- .idea/MachineLearning.iml | 13 +++ .idea/misc.xml | 4 + .idea/modules.xml | 8 ++ .idea/vcs.xml | 6 ++ docs/5.Logistic回归.md | 43 ++++++-- .../com/apachecn/logistic/logRegression.py | 97 +++++++++++++++++ .../05.Logistic/test/test_logRegression.py | 49 +++++++++ testData/testSet.txt | 100 ++++++++++++++++++ 8 files changed, 311 insertions(+), 9 deletions(-) create mode 100644 .idea/MachineLearning.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py create mode 100644 src/python/05.Logistic/test/test_logRegression.py create mode 100644 testData/testSet.txt diff --git a/.idea/MachineLearning.iml b/.idea/MachineLearning.iml new file mode 100644 index 00000000..eeeea0a4 --- /dev/null +++ b/.idea/MachineLearning.iml @@ -0,0 +1,13 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..0974871b --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..a35ae91e --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/docs/5.Logistic回归.md b/docs/5.Logistic回归.md index e049ec53..a3da78b3 100644 --- a/docs/5.Logistic回归.md +++ b/docs/5.Logistic回归.md @@ -1,11 +1,36 @@ -# 1) ߼ع +# 5) 逻辑回归基础 - * ߼ع(Logistic Regression) - * 1.1 - * 1.2 ˵ʾ - * 1.3 ж߽ - * 1.4 ۺ - * 1.5 򻯵ijɱݶ½ - * 1.6 ߼Ż - * 1.7 ࣺһ \ No newline at end of file + * 逻辑回归(Logistic Regression) + * 5.1 分类问题 + * 在分类问题中,尝试预测的是结果是否属于某一个类(例如正确或错误)。 + * 分类问题的例子有: + * 判断一封电子邮件是否是垃圾邮件; + * 判断一次金融交易是否是欺诈等等。 + * 从二元的分类问题开始讨论: + 将因变量(dependant variable)可能属于的两个类分别称为负向类(negative class)和正向类(positive class),则因变量 + y属于{0,1} + 注:其中 0 表示负向类,1 表示正向类。 + * 5.2 假说表示 + + * 5.3 判定边界 + * 在逻辑回归中,我们预测: + 当 hθ 大于等于 0.5 时,预测 y=1 + 当 hθ 小于 0.5 时,预测 y=0 + * 根据上面绘制出的 S 形函数图像,我们知道当 + z=0时 ,g(z)=0.5 + z>0时 ,g(z)>0.5 + z<0时 ,g(z)<0.5 + 又z=θ的T次方与X的积,即: + z大于等于0时,预测:y=1 + z小于0时,预测:y=0 + * 现在假设我们有一个模型:Hθ(x)=g(θ0+θ1*x1+θ2*x2) + 并且参数θ是向量[-3 1 1]。则当-3+x1+x2大于等于0,即x1+x2大于等于3时,模型将预测y=1。 + 我们可以绘制直线x1+x2=3,这条线便是我们模型的分界线,将预测为1的区域和预测为0的区域分隔开。 + * 假使我们的数据呈现这样的分布情况,怎样的模型才能适合呢? + 因为需要用曲线才能分隔 y=0 的区域和 y=1 的区域,我们需要二次方特征: 假设参数是Hθ(x)=g(θ0+θ1*x1+θ2*x2+θ3*(x1^2)+θ4*(x2^2)+θ4*(x2^2)) + 是[-1 0 0 1 1],则我们得到的判定边界恰好是圆点在原点且半径为 1 的圆形。可以用非常复杂的模型来适应非常复杂形状的判定边界。 + * 5.4 代价函数 + * 5.5 简化的成本函数和梯度下降 + * 5.6 高级优化 + * 5.7 多类分类:一个对所有 diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py b/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py new file mode 100644 index 00000000..e20c6440 --- /dev/null +++ b/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# encoding: utf-8 +from numpy import * +import matplotlib.pyplot as plt +import time + + +""" +@version: +@author: yangjf +@license: ApacheCN +@contact: highfei2011@126.com +@site: https://github.com/apachecn/MachineLearning +@software: PyCharm +@file: logRegression01.py +@time: 2017/3/3 22:03 +@test result:not pass +""" + +# sigmoid函数 +def sigmoid(inX): + return 1.0 / (1 + exp(-inX)) + +def trainLogRegres(train_x, train_y, opts): + # 计算训练时间 + startTime = time.time() + + numSamples, numFeatures = shape(train_x) + alpha = opts['alpha']; maxIter = opts['maxIter'] + weights = ones((numFeatures, 1)) + + # 通过梯度下降算法优化 + for k in range(maxIter): + if opts['optimizeType'] == 'gradDescent': # 梯度下降算法 + output = sigmoid(train_x * weights) + error = train_y - output + weights = weights + alpha * train_x.transpose() * error + elif opts['optimizeType'] == 'stocGradDescent': # 随机梯度下降 + for i in range(numSamples): + output = sigmoid(train_x[i, :] * weights) + error = train_y[i, 0] - output + weights = weights + alpha * train_x[i, :].transpose() * error + elif opts['optimizeType'] == 'smoothStocGradDescent': # 光滑随机梯度下降 + # 随机选择样本以优化以减少周期波动 + dataIndex = range(numSamples) + for i in range(numSamples): + alpha = 4.0 / (1.0 + k + i) + 0.01 + randIndex = int(random.uniform(0, len(dataIndex))) + output = sigmoid(train_x[randIndex, :] * weights) + error = train_y[randIndex, 0] - output + weights = weights + alpha * train_x[randIndex, :].transpose() * error + del(dataIndex[randIndex]) # 在一次交互期间,删除优化的样品 + else: + raise NameError('Not support optimize method type!') + + + print 'Congratulations, training complete! Took %fs!' % (time.time() - startTime) + return weights + + +#测试给定测试集的训练Logistic回归模型 +def testLogRegres(weights, test_x, test_y): + numSamples, numFeatures = shape(test_x) + matchCount = 0 + for i in xrange(numSamples): + predict = sigmoid(test_x[i, :] * weights)[0, 0] > 0.5 + if predict == bool(test_y[i, 0]): + matchCount += 1 + accuracy = float(matchCount) / numSamples + return accuracy + + +# 显示你的训练逻辑回归模型只有2-D数据可用 +def showLogRegres(weights, train_x, train_y): + # 注意:train_x和train_y是垫数据类型 + numSamples, numFeatures = shape(train_x) + if numFeatures != 3: + print "抱歉! 我不能绘制,因为你的数据的维度不是2!" + return 1 + + # 画出所有抽样数据 + for i in xrange(numSamples): + if int(train_y[i, 0]) == 0: + plt.plot(train_x[i, 1], train_x[i, 2], 'or') + elif int(train_y[i, 0]) == 1: + plt.plot(train_x[i, 1], train_x[i, 2], 'ob') + + # 画图操作 + min_x = min(train_x[:, 1])[0, 0] + max_x = max(train_x[:, 1])[0, 0] + weights = weights.getA() # 将mat转换为数组 + y_min_x = float(-weights[0] - weights[1] * min_x) / weights[2] + y_max_x = float(-weights[0] - weights[1] * max_x) / weights[2] + plt.plot([min_x, max_x], [y_min_x, y_max_x], '-g') + plt.xlabel('X1'); plt.ylabel('X2') + #显示图像 + plt.show() \ No newline at end of file diff --git a/src/python/05.Logistic/test/test_logRegression.py b/src/python/05.Logistic/test/test_logRegression.py new file mode 100644 index 00000000..d6a8f707 --- /dev/null +++ b/src/python/05.Logistic/test/test_logRegression.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# encoding: utf-8 +import sys +sys.path.append("C:\Python27") + +from numpy import * +import matplotlib.pyplot as plt +from core.com.apachcn.logistic import logRegression + +""" +@version: +@author: yangjf +@license: ApacheCN +@contact: highfei2011@126.com +@site: https://github.com/apachecn/MachineLearning +@software: PyCharm +@file: test_logRegression.py +@time: 2017/3/3 22:09 +""" + +def loadData(): + train_x = [] + train_y = [] + fileIn = open('testData/testSet.txt') + for line in fileIn.readlines(): + lineArr = line.strip().split() + train_x.append([1.0, float(lineArr[0]), float(lineArr[1])]) + train_y.append(float(lineArr[2])) + return mat(train_x), mat(train_y).transpose() + + +##第一步: 加载数据 +print "step 1: load data..." +train_x, train_y = loadData() +test_x = train_x; test_y = train_y + +##第二步: 训练数据... +print "step 2: training..." +opts = {'alpha': 0.01, 'maxIter': 20, 'optimizeType': 'smoothStocGradDescent'} +optimalWeights = trainLogRegres(train_x, train_y, opts) + +##第三步: 测试 +print "step 3: testing..." +accuracy = testLogRegres(optimalWeights, test_x, test_y) + +##第四步: 显示结果 +print "step 4: show the result..." +print 'The classify accuracy is: %.3f%%' % (accuracy * 100) +showLogRegres(optimalWeights, train_x, train_y) \ No newline at end of file diff --git a/testData/testSet.txt b/testData/testSet.txt new file mode 100644 index 00000000..2356ac54 --- /dev/null +++ b/testData/testSet.txt @@ -0,0 +1,100 @@ +-0.017612 14.053064 0 +-1.395634 4.662541 1 +-0.752157 6.538620 0 +-1.322371 7.152853 0 +0.423363 11.054677 0 +0.406704 7.067335 1 +0.667394 12.741452 0 +-2.460150 6.866805 1 +0.569411 9.548755 0 +-0.026632 10.427743 0 +0.850433 6.920334 1 +1.347183 13.175500 0 +1.176813 3.167020 1 +-1.781871 9.097953 0 +-0.566606 5.749003 1 +0.931635 1.589505 1 +-0.024205 6.151823 1 +-0.036453 2.690988 1 +-0.196949 0.444165 1 +1.014459 5.754399 1 +1.985298 3.230619 1 +-1.693453 -0.557540 1 +-0.576525 11.778922 0 +-0.346811 -1.678730 1 +-2.124484 2.672471 1 +1.217916 9.597015 0 +-0.733928 9.098687 0 +-3.642001 -1.618087 1 +0.315985 3.523953 1 +1.416614 9.619232 0 +-0.386323 3.989286 1 +0.556921 8.294984 1 +1.224863 11.587360 0 +-1.347803 -2.406051 1 +1.196604 4.951851 1 +0.275221 9.543647 0 +0.470575 9.332488 0 +-1.889567 9.542662 0 +-1.527893 12.150579 0 +-1.185247 11.309318 0 +-0.445678 3.297303 1 +1.042222 6.105155 1 +-0.618787 10.320986 0 +1.152083 0.548467 1 +0.828534 2.676045 1 +-1.237728 10.549033 0 +-0.683565 -2.166125 1 +0.229456 5.921938 1 +-0.959885 11.555336 0 +0.492911 10.993324 0 +0.184992 8.721488 0 +-0.355715 10.325976 0 +-0.397822 8.058397 0 +0.824839 13.730343 0 +1.507278 5.027866 1 +0.099671 6.835839 1 +-0.344008 10.717485 0 +1.785928 7.718645 1 +-0.918801 11.560217 0 +-0.364009 4.747300 1 +-0.841722 4.119083 1 +0.490426 1.960539 1 +-0.007194 9.075792 0 +0.356107 12.447863 0 +0.342578 12.281162 0 +-0.810823 -1.466018 1 +2.530777 6.476801 1 +1.296683 11.607559 0 +0.475487 12.040035 0 +-0.783277 11.009725 0 +0.074798 11.023650 0 +-1.337472 0.468339 1 +-0.102781 13.763651 0 +-0.147324 2.874846 1 +0.518389 9.887035 0 +1.015399 7.571882 0 +-1.658086 -0.027255 1 +1.319944 2.171228 1 +2.056216 5.019981 1 +-0.851633 4.375691 1 +-1.510047 6.061992 0 +-1.076637 -3.181888 1 +1.821096 10.283990 0 +3.010150 8.401766 1 +-1.099458 1.688274 1 +-0.834872 -1.733869 1 +-0.846637 3.849075 1 +1.400102 12.628781 0 +1.752842 5.468166 1 +0.078557 0.059736 1 +0.089392 -0.715300 1 +1.825662 12.693808 0 +0.197445 9.744638 0 +0.126117 0.922311 1 +-0.679797 1.220530 1 +0.677983 2.556666 1 +0.761349 10.693862 0 +-2.168791 0.143632 1 +1.388610 9.341997 0 +0.317029 14.739025 0 \ No newline at end of file From c10d797c5807995405eeb7e22f0f014e186d41c0 Mon Sep 17 00:00:00 2001 From: yangjifei <1714004716@qq.com> Date: Sat, 4 Mar 2017 22:21:24 +0800 Subject: [PATCH 2/5] update some code --- .idea/MachineLearning.iml | 2 +- src/__init__.py | 28 +++++++++++++++++++ src/python/03.DecisionTree/DTSklearn.py | 6 ++++ src/python/05.Logistic/__init__.py | 28 +++++++++++++++++++ src/python/05.Logistic/core/__init__.py | 28 +++++++++++++++++++ src/python/05.Logistic/core/com/__init__.py | 28 +++++++++++++++++++ .../05.Logistic/core/com/apachecn/__init__.py | 28 +++++++++++++++++++ .../core/com/apachecn/logistic/__init__.py | 28 +++++++++++++++++++ .../com/apachecn/logistic/logRegression.py | 8 +++++- .../apachecn/logistic}/test_logRegression.py | 18 ++++++++---- src/python/05.Logistic/test/__init__.py | 27 ++++++++++++++++++ src/python/Logistic.py | 2 +- src/python/__init__.py | 28 +++++++++++++++++++ 13 files changed, 251 insertions(+), 8 deletions(-) create mode 100644 src/__init__.py create mode 100644 src/python/05.Logistic/__init__.py create mode 100644 src/python/05.Logistic/core/__init__.py create mode 100644 src/python/05.Logistic/core/com/__init__.py create mode 100644 src/python/05.Logistic/core/com/apachecn/__init__.py create mode 100644 src/python/05.Logistic/core/com/apachecn/logistic/__init__.py rename src/python/05.Logistic/{test => core/com/apachecn/logistic}/test_logRegression.py (71%) create mode 100644 src/python/05.Logistic/test/__init__.py create mode 100644 src/python/__init__.py diff --git a/.idea/MachineLearning.iml b/.idea/MachineLearning.iml index eeeea0a4..bd0813dc 100644 --- a/.idea/MachineLearning.iml +++ b/.idea/MachineLearning.iml @@ -2,7 +2,7 @@ - + diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 00000000..b69024ca --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# encoding: utf-8 + + +""" +@version: +@author: yangjf +@license: ApacheCN +@contact: highfei2011@126.com +@site: https://github.com/apachecn/MachineLearning +@software: PyCharm +@file: __init__.py.py +@time: 2017/3/4 21:34 +@test result:pass +""" + + +def func(): + pass + + +class Main(): + def __init__(self): + pass + + +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/src/python/03.DecisionTree/DTSklearn.py b/src/python/03.DecisionTree/DTSklearn.py index 5155b214..2629ef73 100644 --- a/src/python/03.DecisionTree/DTSklearn.py +++ b/src/python/03.DecisionTree/DTSklearn.py @@ -6,6 +6,12 @@ from sklearn import tree from sklearn.metrics import precision_recall_curve from sklearn.metrics import classification_report from sklearn.cross_validation import train_test_split +""" +需要安装依赖模块: +pip install scikit_learn-0.18-cp27-cp27m-win_amd64.whl +非常完整的网址: +http://www.lfd.uci.edu/~gohlke/pythonlibs/#numpy +""" def createDataSet(): diff --git a/src/python/05.Logistic/__init__.py b/src/python/05.Logistic/__init__.py new file mode 100644 index 00000000..b69024ca --- /dev/null +++ b/src/python/05.Logistic/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# encoding: utf-8 + + +""" +@version: +@author: yangjf +@license: ApacheCN +@contact: highfei2011@126.com +@site: https://github.com/apachecn/MachineLearning +@software: PyCharm +@file: __init__.py.py +@time: 2017/3/4 21:34 +@test result:pass +""" + + +def func(): + pass + + +class Main(): + def __init__(self): + pass + + +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/src/python/05.Logistic/core/__init__.py b/src/python/05.Logistic/core/__init__.py new file mode 100644 index 00000000..d294a8ba --- /dev/null +++ b/src/python/05.Logistic/core/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# encoding: utf-8 + + +""" +@version: +@author: yangjf +@license: ApacheCN +@contact: highfei2011@126.com +@site: https://github.com/apachecn/MachineLearning +@software: PyCharm +@file: __init__.py.py +@time: 2017/3/4 21:28 +@test result:pass +""" + + +def func(): + pass + + +class Main(): + def __init__(self): + pass + + +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/src/python/05.Logistic/core/com/__init__.py b/src/python/05.Logistic/core/com/__init__.py new file mode 100644 index 00000000..d294a8ba --- /dev/null +++ b/src/python/05.Logistic/core/com/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# encoding: utf-8 + + +""" +@version: +@author: yangjf +@license: ApacheCN +@contact: highfei2011@126.com +@site: https://github.com/apachecn/MachineLearning +@software: PyCharm +@file: __init__.py.py +@time: 2017/3/4 21:28 +@test result:pass +""" + + +def func(): + pass + + +class Main(): + def __init__(self): + pass + + +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/src/python/05.Logistic/core/com/apachecn/__init__.py b/src/python/05.Logistic/core/com/apachecn/__init__.py new file mode 100644 index 00000000..d294a8ba --- /dev/null +++ b/src/python/05.Logistic/core/com/apachecn/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# encoding: utf-8 + + +""" +@version: +@author: yangjf +@license: ApacheCN +@contact: highfei2011@126.com +@site: https://github.com/apachecn/MachineLearning +@software: PyCharm +@file: __init__.py.py +@time: 2017/3/4 21:28 +@test result:pass +""" + + +def func(): + pass + + +class Main(): + def __init__(self): + pass + + +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py b/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py new file mode 100644 index 00000000..d294a8ba --- /dev/null +++ b/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# encoding: utf-8 + + +""" +@version: +@author: yangjf +@license: ApacheCN +@contact: highfei2011@126.com +@site: https://github.com/apachecn/MachineLearning +@software: PyCharm +@file: __init__.py.py +@time: 2017/3/4 21:28 +@test result:pass +""" + + +def func(): + pass + + +class Main(): + def __init__(self): + pass + + +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py b/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py index e20c6440..caa026be 100644 --- a/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py +++ b/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py @@ -3,7 +3,13 @@ from numpy import * import matplotlib.pyplot as plt import time +''' +1、需要安装模块:pip install matplotlib-1.5.0-cp27-none-win_amd64.whl +由于直接安装会出现问题,所以建议下载whl包进行安装,下载网址: +https://pypi.python.org/pypi/matplotlib/1.5.0 +2、可以看见画出的图像 +''' """ @version: @@ -14,7 +20,7 @@ import time @software: PyCharm @file: logRegression01.py @time: 2017/3/3 22:03 -@test result:not pass +@test result: ok """ # sigmoid函数 diff --git a/src/python/05.Logistic/test/test_logRegression.py b/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py similarity index 71% rename from src/python/05.Logistic/test/test_logRegression.py rename to src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py index d6a8f707..31cf0a57 100644 --- a/src/python/05.Logistic/test/test_logRegression.py +++ b/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py @@ -1,12 +1,14 @@ #!/usr/bin/env python # encoding: utf-8 +import os import sys sys.path.append("C:\Python27") from numpy import * -import matplotlib.pyplot as plt -from core.com.apachcn.logistic import logRegression +import matplotlib.pyplot as plt + +from logRegression import * """ @version: @author: yangjf @@ -16,12 +18,18 @@ from core.com.apachcn.logistic import logRegression @software: PyCharm @file: test_logRegression.py @time: 2017/3/3 22:09 +@test result: ok """ def loadData(): train_x = [] train_y = [] - fileIn = open('testData/testSet.txt') + # 获取当前文件所在路径 + project_dir = os.getcwdu() + # 截取字符串至项目名:Test\ + project_dir = project_dir[:project_dir.find("MachineLearning\\") + 15] + print project_dir + fileIn = open("%s/testData/testSet.txt" % project_dir) for line in fileIn.readlines(): lineArr = line.strip().split() train_x.append([1.0, float(lineArr[0]), float(lineArr[1])]) @@ -37,11 +45,11 @@ test_x = train_x; test_y = train_y ##第二步: 训练数据... print "step 2: training..." opts = {'alpha': 0.01, 'maxIter': 20, 'optimizeType': 'smoothStocGradDescent'} -optimalWeights = trainLogRegres(train_x, train_y, opts) +optimalWeights = trainLogRegres(train_x, train_y, opts) ##第三步: 测试 print "step 3: testing..." -accuracy = testLogRegres(optimalWeights, test_x, test_y) +accuracy = testLogRegres(optimalWeights, test_x, test_y) ##第四步: 显示结果 print "step 4: show the result..." diff --git a/src/python/05.Logistic/test/__init__.py b/src/python/05.Logistic/test/__init__.py new file mode 100644 index 00000000..4708358a --- /dev/null +++ b/src/python/05.Logistic/test/__init__.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# encoding: utf-8 + + +""" +@version: +@author: yangjf +@license: ApacheCN +@contact: highfei2011@126.com +@site: https://github.com/apachecn/MachineLearning +@software: PyCharm +@file: __init__.py.py +@time: 2017/3/4 21:27 +@test result:pass +""" + +def func(): + pass + + +class Main(): + def __init__(self): + pass + + +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/src/python/Logistic.py b/src/python/Logistic.py index 82ca465c..dc6bc345 100644 --- a/src/python/Logistic.py +++ b/src/python/Logistic.py @@ -117,7 +117,7 @@ def plotBestFit(dataArr, labelMat, weights): def main(): project_dir = os.path.dirname(os.path.dirname(os.getcwd())) # 1.收集并准备数据 - dataMat, labelMat = loadDataSet("%s/resources/testSet.txt" % project_dir) + dataMat, labelMat = loadDataSet("%s/testData/testSet.txt" % project_dir) # print dataMat, '---\n', labelMat # 2.训练模型, f(x)=a1*x1+b2*x2+..+nn*xn中 (a1,b2, .., nn).T的矩阵值 diff --git a/src/python/__init__.py b/src/python/__init__.py new file mode 100644 index 00000000..b69024ca --- /dev/null +++ b/src/python/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# encoding: utf-8 + + +""" +@version: +@author: yangjf +@license: ApacheCN +@contact: highfei2011@126.com +@site: https://github.com/apachecn/MachineLearning +@software: PyCharm +@file: __init__.py.py +@time: 2017/3/4 21:34 +@test result:pass +""" + + +def func(): + pass + + +class Main(): + def __init__(self): + pass + + +if __name__ == '__main__': + pass \ No newline at end of file From 12b89402eeca33a569a7b9d5f1c8960a914039ff Mon Sep 17 00:00:00 2001 From: yangjifei <1714004716@qq.com> Date: Wed, 8 Mar 2017 18:40:42 +0800 Subject: [PATCH 3/5] add python cod --- .gitignore | 90 ------------------- .idea/MachineLearning.iml | 13 --- .idea/misc.xml | 4 - .idea/modules.xml | 8 -- .idea/vcs.xml | 6 -- src/__init__.py | 28 ------ src/python/05.Logistic/__init__.py | 28 ------ src/python/05.Logistic/core/__init__.py | 28 ------ src/python/05.Logistic/core/com/__init__.py | 28 ------ .../05.Logistic/core/com/apachecn/__init__.py | 28 ------ .../core/com/apachecn/logistic/__init__.py | 28 ------ .../apachecn/logistic/test_logRegression.py | 2 + src/python/05.Logistic/test/__init__.py | 27 ------ src/python/__init__.py | 28 ------ 14 files changed, 2 insertions(+), 344 deletions(-) delete mode 100644 .gitignore delete mode 100644 .idea/MachineLearning.iml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml delete mode 100644 src/__init__.py delete mode 100644 src/python/05.Logistic/__init__.py delete mode 100644 src/python/05.Logistic/core/__init__.py delete mode 100644 src/python/05.Logistic/core/com/__init__.py delete mode 100644 src/python/05.Logistic/core/com/apachecn/__init__.py delete mode 100644 src/python/05.Logistic/core/com/apachecn/logistic/__init__.py delete mode 100644 src/python/05.Logistic/test/__init__.py delete mode 100644 src/python/__init__.py diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 6564ff4a..00000000 --- a/.gitignore +++ /dev/null @@ -1,90 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*,cover -.hypothesis/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# IPython Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# dotenv -.env - -# virtualenv -venv/ -ENV/ - -# Spyder project settings -.spyderproject - -# Rope project settings -.ropeproject -.vscode diff --git a/.idea/MachineLearning.iml b/.idea/MachineLearning.iml deleted file mode 100644 index bd0813dc..00000000 --- a/.idea/MachineLearning.iml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index 0974871b..00000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index a35ae91e..00000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7f..00000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index b69024ca..00000000 --- a/src/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - - -""" -@version: -@author: yangjf -@license: ApacheCN -@contact: highfei2011@126.com -@site: https://github.com/apachecn/MachineLearning -@software: PyCharm -@file: __init__.py.py -@time: 2017/3/4 21:34 -@test result:pass -""" - - -def func(): - pass - - -class Main(): - def __init__(self): - pass - - -if __name__ == '__main__': - pass \ No newline at end of file diff --git a/src/python/05.Logistic/__init__.py b/src/python/05.Logistic/__init__.py deleted file mode 100644 index b69024ca..00000000 --- a/src/python/05.Logistic/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - - -""" -@version: -@author: yangjf -@license: ApacheCN -@contact: highfei2011@126.com -@site: https://github.com/apachecn/MachineLearning -@software: PyCharm -@file: __init__.py.py -@time: 2017/3/4 21:34 -@test result:pass -""" - - -def func(): - pass - - -class Main(): - def __init__(self): - pass - - -if __name__ == '__main__': - pass \ No newline at end of file diff --git a/src/python/05.Logistic/core/__init__.py b/src/python/05.Logistic/core/__init__.py deleted file mode 100644 index d294a8ba..00000000 --- a/src/python/05.Logistic/core/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - - -""" -@version: -@author: yangjf -@license: ApacheCN -@contact: highfei2011@126.com -@site: https://github.com/apachecn/MachineLearning -@software: PyCharm -@file: __init__.py.py -@time: 2017/3/4 21:28 -@test result:pass -""" - - -def func(): - pass - - -class Main(): - def __init__(self): - pass - - -if __name__ == '__main__': - pass \ No newline at end of file diff --git a/src/python/05.Logistic/core/com/__init__.py b/src/python/05.Logistic/core/com/__init__.py deleted file mode 100644 index d294a8ba..00000000 --- a/src/python/05.Logistic/core/com/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - - -""" -@version: -@author: yangjf -@license: ApacheCN -@contact: highfei2011@126.com -@site: https://github.com/apachecn/MachineLearning -@software: PyCharm -@file: __init__.py.py -@time: 2017/3/4 21:28 -@test result:pass -""" - - -def func(): - pass - - -class Main(): - def __init__(self): - pass - - -if __name__ == '__main__': - pass \ No newline at end of file diff --git a/src/python/05.Logistic/core/com/apachecn/__init__.py b/src/python/05.Logistic/core/com/apachecn/__init__.py deleted file mode 100644 index d294a8ba..00000000 --- a/src/python/05.Logistic/core/com/apachecn/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - - -""" -@version: -@author: yangjf -@license: ApacheCN -@contact: highfei2011@126.com -@site: https://github.com/apachecn/MachineLearning -@software: PyCharm -@file: __init__.py.py -@time: 2017/3/4 21:28 -@test result:pass -""" - - -def func(): - pass - - -class Main(): - def __init__(self): - pass - - -if __name__ == '__main__': - pass \ No newline at end of file diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py b/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py deleted file mode 100644 index d294a8ba..00000000 --- a/src/python/05.Logistic/core/com/apachecn/logistic/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - - -""" -@version: -@author: yangjf -@license: ApacheCN -@contact: highfei2011@126.com -@site: https://github.com/apachecn/MachineLearning -@software: PyCharm -@file: __init__.py.py -@time: 2017/3/4 21:28 -@test result:pass -""" - - -def func(): - pass - - -class Main(): - def __init__(self): - pass - - -if __name__ == '__main__': - pass \ No newline at end of file diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py b/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py index 31cf0a57..c40c630e 100644 --- a/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py +++ b/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py @@ -6,6 +6,8 @@ sys.path.append("C:\Python27") from numpy import * + + import matplotlib.pyplot as plt from logRegression import * diff --git a/src/python/05.Logistic/test/__init__.py b/src/python/05.Logistic/test/__init__.py deleted file mode 100644 index 4708358a..00000000 --- a/src/python/05.Logistic/test/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - - -""" -@version: -@author: yangjf -@license: ApacheCN -@contact: highfei2011@126.com -@site: https://github.com/apachecn/MachineLearning -@software: PyCharm -@file: __init__.py.py -@time: 2017/3/4 21:27 -@test result:pass -""" - -def func(): - pass - - -class Main(): - def __init__(self): - pass - - -if __name__ == '__main__': - pass \ No newline at end of file diff --git a/src/python/__init__.py b/src/python/__init__.py deleted file mode 100644 index b69024ca..00000000 --- a/src/python/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - - -""" -@version: -@author: yangjf -@license: ApacheCN -@contact: highfei2011@126.com -@site: https://github.com/apachecn/MachineLearning -@software: PyCharm -@file: __init__.py.py -@time: 2017/3/4 21:34 -@test result:pass -""" - - -def func(): - pass - - -class Main(): - def __init__(self): - pass - - -if __name__ == '__main__': - pass \ No newline at end of file From aad785ee666e95a87501713d2f11e25243caab2d Mon Sep 17 00:00:00 2001 From: yangjifei <1714004716@qq.com> Date: Thu, 9 Mar 2017 08:55:59 +0800 Subject: [PATCH 4/5] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=80=BB=E8=BE=91?= =?UTF-8?q?=E5=9B=9E=E5=BD=92=E6=A2=AF=E5=BA=A6=E4=B8=8B=E9=99=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../logistic/logRegression.py => logRegression01.py} | 0 .../{com/apachecn/logistic => }/test_logRegression.py | 9 ++------- src/python/Logistic.py | 2 +- src/python/apriori.py | 2 +- testData/{testSet.txt => Logistic_testdata.txt} | 0 5 files changed, 4 insertions(+), 9 deletions(-) rename src/python/05.Logistic/core/{com/apachecn/logistic/logRegression.py => logRegression01.py} (100%) rename src/python/05.Logistic/core/{com/apachecn/logistic => }/test_logRegression.py (91%) rename testData/{testSet.txt => Logistic_testdata.txt} (100%) diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py b/src/python/05.Logistic/core/logRegression01.py similarity index 100% rename from src/python/05.Logistic/core/com/apachecn/logistic/logRegression.py rename to src/python/05.Logistic/core/logRegression01.py diff --git a/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py b/src/python/05.Logistic/core/test_logRegression.py similarity index 91% rename from src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py rename to src/python/05.Logistic/core/test_logRegression.py index c40c630e..c7d5d50d 100644 --- a/src/python/05.Logistic/core/com/apachecn/logistic/test_logRegression.py +++ b/src/python/05.Logistic/core/test_logRegression.py @@ -3,14 +3,9 @@ import os import sys sys.path.append("C:\Python27") - from numpy import * - - -import matplotlib.pyplot as plt - -from logRegression import * +from logRegression01 import * """ @version: @author: yangjf @@ -31,7 +26,7 @@ def loadData(): # 截取字符串至项目名:Test\ project_dir = project_dir[:project_dir.find("MachineLearning\\") + 15] print project_dir - fileIn = open("%s/testData/testSet.txt" % project_dir) + fileIn = open("%s/testData/Logistic_testdata.txt" % project_dir) for line in fileIn.readlines(): lineArr = line.strip().split() train_x.append([1.0, float(lineArr[0]), float(lineArr[1])]) diff --git a/src/python/Logistic.py b/src/python/Logistic.py index dc6bc345..c59e022d 100644 --- a/src/python/Logistic.py +++ b/src/python/Logistic.py @@ -117,7 +117,7 @@ def plotBestFit(dataArr, labelMat, weights): def main(): project_dir = os.path.dirname(os.path.dirname(os.getcwd())) # 1.收集并准备数据 - dataMat, labelMat = loadDataSet("%s/testData/testSet.txt" % project_dir) + dataMat, labelMat = loadDataSet("%s/testData/Logistic_testdata.txt" % project_dir) # print dataMat, '---\n', labelMat # 2.训练模型, f(x)=a1*x1+b2*x2+..+nn*xn中 (a1,b2, .., nn).T的矩阵值 diff --git a/src/python/apriori.py b/src/python/apriori.py index ee6af908..98112685 100644 --- a/src/python/apriori.py +++ b/src/python/apriori.py @@ -73,7 +73,7 @@ def apriori(dataSet, minSupport = 0.5): def main(): # project_dir = os.path.dirname(os.path.dirname(os.getcwd())) # 1.收集并准备数据 - # dataMat, labelMat = loadDataSet("%s/resources/testSet.txt" % project_dir) + # dataMat, labelMat = loadDataSet("%s/resources/Logistic_testdata.txt" % project_dir) # 1. 加载数据 diff --git a/testData/testSet.txt b/testData/Logistic_testdata.txt similarity index 100% rename from testData/testSet.txt rename to testData/Logistic_testdata.txt From 3d21601d2cd4366638e23596ee7e37635ccc1152 Mon Sep 17 00:00:00 2001 From: yangjifei <1714004716@qq.com> Date: Thu, 9 Mar 2017 09:07:00 +0800 Subject: [PATCH 5/5] update logistic --- .gitignore | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..0de90e72 --- /dev/null +++ b/.gitignore @@ -0,0 +1,92 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject +.vscode + +