map
[[email protected] hadoop]# cat mapper.py
#!/usr/bin/env python
import sys
for line in sys.stdin:
line =line.strip()
words =line.split()
for word in words:
print ‘%s\t%s‘ % (word,1)
[[email protected] hadoop]#
reduce
[[email protected] hadoop]# vim reducer.py
for line in sys.stdin:
line = line.strip()
word,count = line.split(‘\t‘,1)
try:
count = int(count)
except ValueError:
continue
if current_word ==word:
current_count += count
else:
if current_word:
print ‘%s\t%s‘ % (current_word,current_count)
current_count =count
current_word =word
if current _word ==word:
print ‘%s\t%s‘ % (current_word,current_count)
时间: 2024-10-28 21:30:59