mrMeanMapper 清单15-1
import sys
from numpy import mat, mean, power
def read_input(file):
for line in file:
yield line.rstrip()
input = read_input(sys.stdin)#creates a list of input lines
input = [float(line) for line in input] #overwrite with floats
numInputs = len(input)
input = mat(input)
sqInput = power(input,2)
#output size, mean, mean(square values)
print (numInputs, mean(input), mean(sqInput)) #calc mean of columns
print ("report: still alive",file=sys.stderr)
ruducer 程序清单 15-2
import sys
from numpy import mat, mean, power
def read_input(file):
for line in file:
yield line.rstrip()
input = read_input(sys.stdin)#creates a list of input lines
#split input lines into separate items and store in list of lists
mapperOut = [line.split('\t') for line in input]
#accumulate total number of samples, overall sum and overall sum sq
cumVal=0.0
cumSumSq=0.0
cumN=0.0
for instance in mapperOut:
nj = float(instance[0])
cumN += nj
cumVal += nj*float(instance[1])
cumSumSq += nj*float(instance[2])
#calculate means
mean = cumVal/cumN
meanSq = cumSumSq/cumN
#output size, mean, mean(square values)
print (cumN, mean, meanSq)
print ("report: still alive",file=sys.stderr)
测试方法 linux下
cat inputFile.txt | python mapper.py | sort | python reducer.py > outputFile.txt
用户评论