@tsing1226
2016-01-15T16:42:40.000000Z
字数 2988
阅读 828
spark
- 解压软件
tar -zxf jdk-7u67-linux-x64.tar.gz -C /opt/modules/
- sudo vi /etc/profile
#JAVA_HOME
export JAVA_HOME=/opt/modules/jdk1.7.0_67
export PATH=$JAVA_HOME/bin:$PATH
source /etc/profile
验证JDK是否成功安装
> $ java -version
java version "1.7.0_67"
Java(TM) SE Runtime Environment (build 1.7.0_67-b01)
Java HotSpot(TM) 64-Bit Server VM (build 24.65-b04, mixed mode)
- 解压安装Scala
tar -zxf scala-2.10.4.tgz -C /opt/modules/
- 配置环境变量
sudo vi /etc/profile
- 添加
##SCALA_HOME
export SCALA_HOME=/opt/modules/scala-2.10.4
export PATH=$SCALA_HOME/bin:$PATH
- 检测版本是否安装成功
scala -version
Scala code runner version 2.10.4 -- Copyright 2002-2013, LAMP/EPFL
参考地址:https://www.zybuluo.com/tsing1226/note/255903
tar -zxf spark-1.3.0-bin-2.5.tar.gz -C /opt/cdh3.5.6/
export SPARK_HOME=/opt/cdh3.5.6/spark-1.3.0-bin-2.5.0
- spark-defaults.conf
spark.master spark://hadoop-senior02.grc.com:7077
- slaves
hadoop-senior02.grc.com
- spark-env.sh
JAVA_HOME=/opt/modules/jdk1.7.0_67
SCALA_HOME=/opt/modules/scala-2.10.4
HADOOP_CONF_DIR=/opt/cdh3.5.6/hadoop-2.5.0-cdh5.3.6/etc/hadoop
SPARK_MASTER_IP=hadoop-senior02.grc.com
SPARK_MASTER_PORT=7077
SPARK_MASTER_WEBUI_PORT=8080
SPARK_WORKER_CORES=2
SPARK_WORKER_MEMORY=2g
SPARK_WORKER_PORT=7078
SPARK_WORKER_WEBUI_PORT=8081
SPARK_WORKER_INSTANCES=1
> sbin/start-master.sh
> sbin/start-slaves.sh
- 运行在standalone模式
./bin/spark-shell
- 运行在本地模式
./bin/spark-shell --master local[k]
> jps
> WebUI http://hadoop-senior02.grc.com:8080/
>$ touch wordcount.txt
hadoop mapreduce
yarn spark
Hadoop MapReduce
hello like
bin/hdfs dfs -mkdir -p spark/wordcount/input
bin/hdfs dfs -put /opt/datas/wordcount.txt spark/wordcount
val rdd=sc.textFile("hdfs://hadoop-senior02.grc.com:8020/user/grc/spark/wordcount/input/wordcount.txt")
val kvrdd=rdd.flatMap(line=>line.split("\t")).map(word=>(word,1)).reduceByKey((a,b)=>(a+b))
kvrdd.saveAsTextFile("hdfs://hadoop-senior02.grc.com:8020/user/grc/spark/wordcount/output")
bin/hdfs dfs -text spark/wordcount/output/p*
(MapReduce,1) (mapreduce,1) (hello,1) (yarn,1) (spark,1) (hadoop,1) (like,1) (Hadoop,1)
- 创建目录
bin/hdfs dfs -mkdir -p /user/grc/spark/logs
- 服务器端配置spark-env.sh
SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=hdfs://hadoop-senior02.grc.com:8020/user/grc/spark/logs"
- 客户端配置spark-defaults.conf
spark.eventLog.enabled true
spark.eventLog.dir hdfs://hadoop-senior02.grc.com:8020/user/grc/spark/logs
spark.eventLog.compress true
- 启动Spark historyserver
./sbin/start-history-server.sh
//create spark context
val rdd=sc.textFile("hdfs://hadoop-senior02.grc.com:8020//user/grc/spark/wordcount/input/wordcount.txt")
//rdd transformation
val kvrdd=rdd.flatMap(line=>line.split("\t")).map(word=>(word,1)).reduceByKey((a,b)=>(a+b))
//save file
kvrdd.saveAsTextFile("hdfs://hadoop-senior02.grc.com:8020/user/grc/spark/wordcount/output")
//close spark context
sc.stop
- 应用提交
- 运行测试
> [https://databricks.com/spark/about](https://databricks.com/spark/about)
> [http://spark.apache.org/](http://spark.apache.org/)
> [https://github.com/apache/spark](http://spark.apache.org/)