实现WordCount的MapReduce代码
时间: 2024-06-14 17:05:30 浏览: 105
Hadoop mapreduce实现wordcount
以下是一个实现WordCount的MapReduce代码的例子:
```python
# Mapper函数
def mapper(text):
words = text.split() # 将文本拆分成单词列表
word_count = []
for word in words:
word_count.append((word, 1)) # 将每个单词映射为(key, value)对,value为1
return word_count
# Reducer函数
def reducer(word_count_list):
word_count_dict = {}
for word_count in word_count_list:
word = word_count[0]
count = word_count[1]
if word in word_count_dict:
word_count_dict[word] += count # 对相同单词的计数进行累加
else:
word_count_dict[word] = count
return word_count_dict
# MapReduce函数
def word_count_map_reduce(text_list):
word_count_list = []
for text in text_list:
word_count_list.extend(mapper(text)) # 将每个文本的映射结果合并到一个列表中
word_count_dict = reducer(word_count_list) # 对映射结果进行归约
return word_count_dict
# 测试数据
text_list = ["Hello world", "Hello Python", "Python is great"]
# 调用MapReduce函数
result = word_count_map_reduce(text_list)
print(result) # 输出:{'Hello': 2, 'world': 1, 'Python': 2, 'is': 1, 'great': 1}
```
阅读全文