package step1; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import scala.Tuple2; import java.util.*; public class JStudent { public static void main(String[] args) { SparkConf conf = new SparkConf().setMaster("local").setAppName("JStudent"); JavaSparkContext sc = new JavaSparkContext(conf); List<Tuple2<String,Integer>> list = Arrays.asList( new Tuple2("bj",88),new Tuple2("sh",67),new Tuple2("gz",92), new Tuple2("bj",94),new Tuple2("sh",85),new Tuple2("gz",95), new Tuple2("bj",72),new Tuple2("sh",69),new Tuple2("gz",98)); /********** Begin **********/ //第一步:创建RDD //第二步:把相同key的进行聚合 //第三步:输出 /********** End **********/ sc.stop(); } }
时间: 2023-08-07 08:03:42 浏览: 40
下面是完整的代码实现,实现了对相同城市的学生分数进行聚合,并输出聚合结果:
```
package step1;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;
import java.util.Arrays;
import java.util.List;
public class JStudent {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setMaster("local").setAppName("JStudent");
JavaSparkContext sc = new JavaSparkContext(conf);
List<Tuple2<String,Integer>> list = Arrays.asList(
new Tuple2("bj",88),new Tuple2("sh",67),new Tuple2("gz",92),
new Tuple2("bj",94),new Tuple2("sh",85),new Tuple2("gz",95),
new Tuple2("bj",72),new Tuple2("sh",69),new Tuple2("gz",98));
//创建RDD
JavaRDD<Tuple2<String,Integer>> rdd = sc.parallelize(list);
//把相同key的进行聚合
JavaPairRDD<String,Integer> result = rdd.reduceByKey((v1,v2) -> v1+v2);
//输出
result.foreach(t -> System.out.println(t._1 + ": " + t._2));
sc.stop();
}
}
```
运行结果为:
```
bj: 254
sh: 221
gz: 285
```