java实现flink读取HDFS下多目录文件的例子
时间: 2023-04-11 19:00:50 浏览: 364
HDFS存取实例(java)
5星 · 资源好评率100%
可以回答这个问题。以下是一个Java实现Flink读取HDFS下多目录文件的例子:
```
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormat;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class FlinkHDFSExample {
public static void main(String[] args) throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
Job job = Job.getInstance();
FileInputFormat.addInputPaths(job, "/path/to/first/directory,/path/to/second/directory");
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<>(new org.apache.hadoop.mapreduce.lib.input.TextInputFormat(), LongWritable.class, Text.class, job);
env.createInput(hadoopInputFormat)
.flatMap(new FlatMapFunction<Tuple2<LongWritable, Text>, Tuple2<String, Integer>>() {
@Override
public void flatMap(Tuple2<LongWritable, Text> value, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = value.f1.toString().split("\\s+");
for (String word : words) {
out.collect(new Tuple2<>(word, 1));
}
}
})
.groupBy(0)
.sum(1)
.print();
}
}
```
这个例子演示了如何在Flink中读取HDFS下多个目录中的文件,并对文件中的单词进行计数。
阅读全文