spark常见的输入源


spark 输入源整理

spark常见的输入源

text

1
2
3
4
5
6
7
8
9
10
11
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("text_count");

JavaSparkContext sc = new JavaSparkContext(conf);

JavaRDD<String> tRDD = sc.textFile("/Users/kuiq.wang/Desktop/upload/yd_conver.txt", 3);

long res = tRDD.count();

log.info("text_count's result is [{}]", res);
}

collect

1
2
3
4
5
6
7
8
9
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("collectRDD");
JavaSparkContext jsc = new JavaSparkContext(conf);

JavaRDD collectRDD = jsc.parallelize(Arrays.asList(new String[]{"one", "two", "three"}));
long res = collectRDD.count();

log.info("collect rdd res is [{}]", res);
}

elasticsearch

准备

1
2
3
4
5
6
<!--spark-es-->
<dependency>
  <groupId>org.elasticsearch</groupId>
  <artifactId>elasticsearch-spark-20_2.11</artifactId>
  <version>6.7.2</version>
</dependency>
1
2
3
4
5
6
7
8
public static void readES(String url, String index) {
SparkConf conf = new SparkConf().setAppName("es_count").set("es.nodes", "data1:9200");
JavaSparkContext jsc = new JavaSparkContext(conf);
JavaPairRDD<String, Map<String, Object>> esRDD = JavaEsSpark.esRDD(jsc, "funnylog_test");
long es = esRDD.count();
System.out.println("res is :" + es);

}

hbase

1

kafka


Author: Kuiq Wang
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint policy. If reproduced, please indicate source Kuiq Wang !
  TOC