Files
ailearning/src/python/15.BigData_MapReduce/proximalSVM.py
2017-09-15 17:03:58 +08:00

56 lines
1.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/python
# coding:utf8
'''
Created on 2011-02-25
Update on 2017-06-20
@author: Peter/ApacheCN-xy/片刻
《机器学习实战》更新地址https://github.com/apachecn/MachineLearning
'''
import numpy
def map(key, value):
# input key= class for one training example, e.g. "-1.0"
classes = [float(item) for item in key.split(",")] # e.g. [-1.0]
D = numpy.diag(classes)
# input value = feature vector for one training example, e.g. "3.0, 7.0, 2.0"
featurematrix = [float(item) for item in value.split(",")]
A = numpy.matrix(featurematrix)
# create matrix E and vector e
e = numpy.matrix(numpy.ones(len(A)).reshape(len(A), 1))
E = numpy.matrix(numpy.append(A, -e, axis=1))
# create a tuple with the values to be used by reducer
# and encode it with base64 to avoid potential trouble with '\t' and '\n' used
# as default separators in Hadoop Streaming
producedvalue = base64.b64encode(pickle.dumps( (E.T*E, E.T*D*e))
# note: a single constant key "producedkey" sends to only one reducer
# somewhat "atypical" due to low degree of parallism on reducer side
print "producedkey\t%s" % (producedvalue)
def reduce(key, values, mu=0.1):
sumETE = None
sumETDe = None
# key isn't used, so ignoring it with _ (underscore).
for _, value in values:
# unpickle values
ETE, ETDe = pickle.loads(base64.b64decode(value))
if sumETE == None:
# create the I/mu with correct dimensions
sumETE = numpy.matrix(numpy.eye(ETE.shape[1])/mu)
sumETE += ETE
if sumETDe == None:
# create sumETDe with correct dimensions
sumETDe = ETDe
else:
sumETDe += ETDe
# note: omega = result[:-1] and gamma = result[-1]
# but printing entire vector as output
result = sumETE.I*sumETDe
print "%s\t%s" % (key, str(result.tolist()))