[关闭]
@rickyChen 2017-03-03T17:23:40.000000Z 字数 2650 阅读 2723

Spark Java编程使用方法

Java


如何使用map

  1. JavaDStream<String> lines = messages.map(s -> s.substring(0, 5))
  1. // Function[T1, R]
  2. JavaDStream<String> lines = messages.map(new Function<String, String>() {
  3. // call(v1: T1): R
  4. public String call(String s) {
  5. return s.substring(0, 5);
  6. }
  7. });
  1. class GetLength implements Function<String, int> {
  2. public Inter call(String s) { return s.length(); }
  3. }
  4. JavaDStream<String> lineLengths = lines.map(new GetLength())

如何使用reduce

  1. int totalLength = lineLengths.reduce((a, b) -> a + b)
  1. // Function2[T1, T2, R]
  2. int totalLength = lineLengths.reduce(new Function2<Integer, Integer, Integer>() {
  3. public Integer call(Integer a, Integer b) { return a + b; }
  4. });
  1. class Sum implements Function2<Integer, Integer, Integer> {
  2. public Integer call(Integer a, Integer b) { return a + b; }
  3. }
  4. int totalLength = lineLengths.reduce(new Sum());

如何使用reduceByKey

  1. JavaPairDStream<String, Integer> wordCounts = wordCount.reduceByKey((a, b) -> a + b);
  1. // Function2[T1, T2, R]
  2. JavaPairDStream<String, Integer> wordCounts = wordCount.reduceByKey(
  3. new Function2<Integer, Integer, Integer>() {
  4. @Override
  5. public Integer call(Integer i1, Integer i2) {
  6. return i1 + i2;
  7. }
  8. });

如何使用flatMap

  1. JavaDStream<String> words = lines.flatMap(x -> Lists.newArrayList(x.split(" ")));
  1. // FlatMapFunction[T, R]
  2. JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
  3. // call(t: T): Iterator[R]
  4. public Iterable<String> call(String x) {
  5. return Lists.newArrayList(SPACE.split(x));
  6. }
  7. });

如何使用mapPartitions

  1. JavaDStream<Tuple2<String, String>> mapLines = lines.mapPartitions(parts -> {
  2. List<Tuple2<String, String>> list = new ArrayList<Tuple2<String, String>>();
  3. while(parts.hasNext()){
  4. String msg = parts.next();
  5. String ip = msg.split(" ")[0];
  6. String domain = msg.split(" ")[1];
  7. list.add(new Tuple2<String, String>(ip, domain));
  8. };
  9. return list;
  10. });
  1. // FlatMapFunction[T, R]
  2. JavaDStream<Tuple2<String, String>> mapLines = lines.mapPartitions(
  3. new FlatMapFunction<Iterator<String>, Tuple2<String, String>>() {
  4. List<Tuple2<String, String>> list = new ArrayList<Tuple2<String, String>>();
  5. // call(t: T): Iterator[R]
  6. public Iterable<Tuple2<String, String>> call(Iterator<String> s){
  7. while(s.hasNext()){
  8. String msg = s.next();
  9. String ip = msg.split(" ")[0];
  10. String domain = msg.split(" ")[1];
  11. list.add(new Tuple2<String, String>(ip, domain));
  12. }
  13. return list;
  14. }
  15. }
  16. );

如何使用mapToPair

  1. JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2(s, 1));
  1. // PairFunction[T, K, V]
  2. JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
  3. new PairFunction<String, String, Integer>() {
  4. // call(t: T): (K, V)
  5. public Tuple2<String, Integer> call(String s) {
  6. return new Tuple2<String, Integer>(s, 1);
  7. }
  8. });
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注