更新 15章代码新格式

2026-02-13 15:26:28 +08:00 · 2017-09-15 17:03:58 +08:00
parent fc272b1fb4
commit 36127ba24b
2 changed files with 30 additions and 25 deletions
--- a/src/python/15.BigData_MapReduce/proximalSVM.py
+++ b/src/python/15.BigData_MapReduce/proximalSVM.py
@@ -1,7 +1,10 @@
+#!/usr/bin/python
+# coding:utf8
 '''
-Created on Feb 25, 2011
-
-@author: Peter
+Created on 2011-02-25
+Update  on 2017-06-20
+@author: Peter/ApacheCN-xy/片刻
+《机器学习实战》更新地址：https://github.com/apachecn/MachineLearning
 '''
 import numpy

@@ -9,28 +12,28 @@ def map(key, value):
   # input key= class for one training example, e.g. "-1.0"
   classes = [float(item) for item in key.split(",")]   # e.g. [-1.0]
   D = numpy.diag(classes)
- 
+
   # input value = feature vector for one training example, e.g. "3.0, 7.0, 2.0"
   featurematrix = [float(item) for item in value.split(",")]
   A = numpy.matrix(featurematrix)
- 
+
   # create matrix E and vector e
-   e = numpy.matrix(numpy.ones(len(A)).reshape(len(A),1))
-   E = numpy.matrix(numpy.append(A,-e,axis=1)) 
- 
+   e = numpy.matrix(numpy.ones(len(A)).reshape(len(A), 1))
+   E = numpy.matrix(numpy.append(A, -e, axis=1)) 
+
   # create a tuple with the values to be used by reducer
   # and encode it with base64 to avoid potential trouble with '\t' and '\n' used
   # as default separators in Hadoop Streaming
-   producedvalue = base64.b64encode(pickle.dumps( (E.T*E, E.T*D*e) )    
- 
+   producedvalue = base64.b64encode(pickle.dumps( (E.T*E, E.T*D*e))    
+
   # note: a single constant key "producedkey" sends to only one reducer
   # somewhat "atypical" due to low degree of parallism on reducer side
   print "producedkey\t%s" % (producedvalue)
-   
+
 def reduce(key, values, mu=0.1):
  sumETE = None
  sumETDe = None
- 
+
  # key isn't used, so ignoring it with _ (underscore).
  for _, value in values:
    # unpickle values
@@ -39,13 +42,13 @@ def reduce(key, values, mu=0.1):
      # create the I/mu with correct dimensions
      sumETE = numpy.matrix(numpy.eye(ETE.shape[1])/mu)
    sumETE += ETE
- 
+
    if sumETDe == None:
      # create sumETDe with correct dimensions
      sumETDe = ETDe
    else:
      sumETDe += ETDe
- 
+
    # note: omega = result[:-1] and gamma = result[-1]
    # but printing entire vector as output
    result = sumETE.I*sumETDe