public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private Map<String, Integer> counts = new HashMap<>(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable value : values) { sum += value.get(); } counts.put(key.toString(), sum); } @Override protected void cleanup(Context context) throws IOException, InterruptedException { // 对 counts 中的键值对按照要求排序 List<Map.Entry<String, Integer>> list = new ArrayList<>(counts.entrySet()); Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() { @Override public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) { if (o1.getValue() .equals(o2.getValue()) ) { return o2.getKey().compareTo(o1.getKey()); } else { return o2.getValue().compareTo(o1.getValue()); } } }); // 将排序后的结果写入输出文件中 for (Map.Entry<String, Integer> entry : list) { Text word = new Text(entry.getKey()); IntWritable count = new IntWritable(entry.getValue()); context.write(word, count); } } }将这段代码修改,在原来的基础上去除非字母字符和把大写字母变成小写
时间: 2024-02-10 16:12:14 浏览: 146
可以在 reduce() 方法中对 key 进行处理,去除非字母字符并将大写字母转换成小写字母,然后再进行聚合操作。修改后的代码如下:
```
public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private Map<String, Integer> counts = new HashMap<>();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
String processedKey = key.toString().replaceAll("[^a-zA-Z]", "").toLowerCase(); // 去除非字母字符并转换为小写字母
counts.put(processedKey, sum);
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
// 对 counts 中的键值对按照要求排序
List<Map.Entry<String, Integer>> list = new ArrayList<>(counts.entrySet());
Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
if (o1.getValue().equals(o2.getValue())) {
return o2.getKey().compareTo(o1.getKey());
} else {
return o2.getValue().compareTo(o1.getValue());
}
}
});
// 将排序后的结果写入输出文件中
for (Map.Entry<String, Integer> entry : list) {
Text word = new Text(entry.getKey());
IntWritable count = new IntWritable(entry.getValue());
context.write(word, count);
}
}
}
```
这样就完成了对 key 的预处理,在聚合操作之前去除了非字母字符并将大写字母转换为小写字母。
阅读全文