#!/usr/bin/python # coding:utf8 from mrjob.job import MRJob class MRWordCountUtility(MRJob): def __init__(self, *args, **kwargs): super(MRWordCountUtility, self).__init__(*args, **kwargs) self.chars = 0 self.words = 0 self.lines = 0 def mapper(self, _, line): if False: yield # I'm a generator! self.chars += len(line) + 1 # +1 for newline self.words += sum(1 for word in line.split() if word.strip()) self.lines += 1 def mapper_final(self): yield('chars', self.chars) yield('words', self.words) yield('lines', self.lines) def reducer(self, key, values): yield(key, sum(values)) if __name__ == '__main__': MRWordCountUtility.run()