mirror of
https://github.com/apachecn/ailearning.git
synced 2026-02-03 18:34:17 +08:00
56 lines
1.8 KiB
Python
56 lines
1.8 KiB
Python
#!/usr/bin/python
|
||
# coding:utf8
|
||
'''
|
||
Created on 2011-02-25
|
||
Update on 2017-06-20
|
||
@author: Peter/ApacheCN-xy/片刻
|
||
《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning
|
||
'''
|
||
import numpy
|
||
|
||
def map(key, value):
|
||
# input key= class for one training example, e.g. "-1.0"
|
||
classes = [float(item) for item in key.split(",")] # e.g. [-1.0]
|
||
D = numpy.diag(classes)
|
||
|
||
# input value = feature vector for one training example, e.g. "3.0, 7.0, 2.0"
|
||
featurematrix = [float(item) for item in value.split(",")]
|
||
A = numpy.matrix(featurematrix)
|
||
|
||
# create matrix E and vector e
|
||
e = numpy.matrix(numpy.ones(len(A)).reshape(len(A), 1))
|
||
E = numpy.matrix(numpy.append(A, -e, axis=1))
|
||
|
||
# create a tuple with the values to be used by reducer
|
||
# and encode it with base64 to avoid potential trouble with '\t' and '\n' used
|
||
# as default separators in Hadoop Streaming
|
||
producedvalue = base64.b64encode(pickle.dumps( (E.T*E, E.T*D*e))
|
||
|
||
# note: a single constant key "producedkey" sends to only one reducer
|
||
# somewhat "atypical" due to low degree of parallism on reducer side
|
||
print "producedkey\t%s" % (producedvalue)
|
||
|
||
def reduce(key, values, mu=0.1):
|
||
sumETE = None
|
||
sumETDe = None
|
||
|
||
# key isn't used, so ignoring it with _ (underscore).
|
||
for _, value in values:
|
||
# unpickle values
|
||
ETE, ETDe = pickle.loads(base64.b64decode(value))
|
||
if sumETE == None:
|
||
# create the I/mu with correct dimensions
|
||
sumETE = numpy.matrix(numpy.eye(ETE.shape[1])/mu)
|
||
sumETE += ETE
|
||
|
||
if sumETDe == None:
|
||
# create sumETDe with correct dimensions
|
||
sumETDe = ETDe
|
||
else:
|
||
sumETDe += ETDe
|
||
|
||
# note: omega = result[:-1] and gamma = result[-1]
|
||
# but printing entire vector as output
|
||
result = sumETE.I*sumETDe
|
||
print "%s\t%s" % (key, str(result.tolist()))
|