ailearning/src/python/15.BigData_MapReduce/proximalSVM.py

#!/usr/bin/python
# coding:utf8
'''
Created on 2011-02-25
Update  on 2017-06-20
@author: Peter/ApacheCN-xy/片刻
《机器学习实战》更新地址：https://github.com/apachecn/MachineLearning
'''
import numpy

def map(key, value):
   # input key= class for one training example, e.g. "-1.0"
   classes = [float(item) for item in key.split(",")]   # e.g. [-1.0]
   D = numpy.diag(classes)

   # input value = feature vector for one training example, e.g. "3.0, 7.0, 2.0"
   featurematrix = [float(item) for item in value.split(",")]
   A = numpy.matrix(featurematrix)

   # create matrix E and vector e
   e = numpy.matrix(numpy.ones(len(A)).reshape(len(A), 1))
   E = numpy.matrix(numpy.append(A, -e, axis=1))

   # create a tuple with the values to be used by reducer
   # and encode it with base64 to avoid potential trouble with '\t' and '\n' used
   # as default separators in Hadoop Streaming
   producedvalue = base64.b64encode(pickle.dumps( (E.T*E, E.T*D*e))

   # note: a single constant key "producedkey" sends to only one reducer
   # somewhat "atypical" due to low degree of parallism on reducer side
   print "producedkey\t%s" % (producedvalue)

def reduce(key, values, mu=0.1):
  sumETE = None
  sumETDe = None

  # key isn't used, so ignoring it with _ (underscore).
  for _, value in values:
    # unpickle values
    ETE, ETDe = pickle.loads(base64.b64decode(value))
    if sumETE == None:
      # create the I/mu with correct dimensions
      sumETE = numpy.matrix(numpy.eye(ETE.shape[1])/mu)
    sumETE += ETE

    if sumETDe == None:
      # create sumETDe with correct dimensions
      sumETDe = ETDe
    else:
      sumETDe += ETDe

    # note: omega = result[:-1] and gamma = result[-1]
    # but printing entire vector as output
    result = sumETE.I*sumETDe
    print "%s\t%s" % (key, str(result.tolist()))