更新 15章 代码新格式

This commit is contained in:
jiangzhonglian
2017-09-15 17:03:58 +08:00
parent fc272b1fb4
commit 36127ba24b
2 changed files with 30 additions and 25 deletions

View File

@@ -1,7 +1,10 @@
#!/usr/bin/python
# coding:utf8
'''
Created on Feb 25, 2011
@author: Peter
Created on 2011-02-25
Update on 2017-06-20
@author: Peter/ApacheCN-xy/片刻
《机器学习实战》更新地址https://github.com/apachecn/MachineLearning
'''
import numpy
@@ -9,28 +12,28 @@ def map(key, value):
# input key= class for one training example, e.g. "-1.0"
classes = [float(item) for item in key.split(",")] # e.g. [-1.0]
D = numpy.diag(classes)
# input value = feature vector for one training example, e.g. "3.0, 7.0, 2.0"
featurematrix = [float(item) for item in value.split(",")]
A = numpy.matrix(featurematrix)
# create matrix E and vector e
e = numpy.matrix(numpy.ones(len(A)).reshape(len(A),1))
E = numpy.matrix(numpy.append(A,-e,axis=1))
e = numpy.matrix(numpy.ones(len(A)).reshape(len(A), 1))
E = numpy.matrix(numpy.append(A, -e, axis=1))
# create a tuple with the values to be used by reducer
# and encode it with base64 to avoid potential trouble with '\t' and '\n' used
# as default separators in Hadoop Streaming
producedvalue = base64.b64encode(pickle.dumps( (E.T*E, E.T*D*e) )
producedvalue = base64.b64encode(pickle.dumps( (E.T*E, E.T*D*e))
# note: a single constant key "producedkey" sends to only one reducer
# somewhat "atypical" due to low degree of parallism on reducer side
print "producedkey\t%s" % (producedvalue)
def reduce(key, values, mu=0.1):
sumETE = None
sumETDe = None
# key isn't used, so ignoring it with _ (underscore).
for _, value in values:
# unpickle values
@@ -39,13 +42,13 @@ def reduce(key, values, mu=0.1):
# create the I/mu with correct dimensions
sumETE = numpy.matrix(numpy.eye(ETE.shape[1])/mu)
sumETE += ETE
if sumETDe == None:
# create sumETDe with correct dimensions
sumETDe = ETDe
else:
sumETDe += ETDe
# note: omega = result[:-1] and gamma = result[-1]
# but printing entire vector as output
result = sumETE.I*sumETDe