hadoop生态搭建(3节点)-10.spark配置

https://www.scala-lang.org/download/2.12.4.html
# ==================================================================安装 scala

tar -zxvf ~/scala-2.12.4.tgz -C /usr/local
rm –r ~/scala-2.12.4.tgz

# http://archive.apache.org/dist/spark/spark-2.3.0/

# ==================================================================安装 spark

tar -zxf ~/spark-2.3.0-bin-hadoop2.7.tgz -C /usr/local
mv /usr/local/spark-2.3.0-bin-hadoop2.7 /usr/local/spark-2.3.0
rm –r ~/spark-2.3.0-bin-hadoop2.7.tgz

# 环境变量
# ==================================================================node1 node2 node3

vi /etc/profile

# 在export PATH USER LOGNAME MAIL HOSTNAME HISTSIZE HISTCONTROL下添加

export JAVA_HOME=/usr/java/jdk1.8.0_111
export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.12
export HADOOP_HOME=/usr/local/hadoop/hadoop-2.7.6
export MYSQL_HOME=/usr/local/mysql
export HBASE_HOME=/usr/local/hbase-1.2.4
export HIVE_HOME=/usr/local/hive-2.1.1
export SCALA_HOME=/usr/local/scala-2.12.4
export KAFKA_HOME=/usr/local/kafka_2.12-0.10.2.1
export FLUME_HOME=/usr/local/flume-1.8.0
export SPARK_HOME=/usr/local/spark-2.3.0

export PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$ZOOKEEPER_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$MYSQL_HOME/bin:$HBASE_HOME/bin:$HIVE_HOME/bin:$SCALA_HOME/bin:$KAFKA_HOME/bin:$FLUME_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native

# ==================================================================node1

# 使环境变量生效
source /etc/profile

# 查看配置结果
echo $SPARK_HOME

# ==================================================================node1

cp $SPARK_HOME/conf/docker.properties.template $SPARK_HOME/conf/docker.properties
vi $SPARK_HOME/conf/docker.properties

spark.mesos.executor.home: /usr/local/spark-2.3.0

cp $SPARK_HOME/conf/fairscheduler.xml.template $SPARK_HOME/conf/fairscheduler.xml
cp $SPARK_HOME/conf/log4j.properties.template $SPARK_HOME/conf/log4j.properties
cp $SPARK_HOME/conf/metrics.properties.template $SPARK_HOME/conf/metrics.properties

cp $SPARK_HOME/conf/slaves.template $SPARK_HOME/conf/slaves
vi $SPARK_HOME/conf/slaves

node1
node2
node3

cp $SPARK_HOME/conf/spark-defaults.conf.template $SPARK_HOME/conf/spark-defaults.conf
vi $SPARK_HOME/conf/spark-defaults.conf

spark.eventLog.enabled           true
spark.eventLog.dir               hdfs://appcluster/spark/eventslog
# 监控页面需要监控的目录,需要先启用和指定事件日志目录,配合上面两项使用
spark.history.fs.logDirectory    hdfs://appcluster/spark
spark.eventLog.compress          true

# 如果想 YARN ResourceManager 访问 Spark History Server ,则添加一行:
# spark.yarn.historyServer.address http://node1:19888

cp $SPARK_HOME/conf/spark-env.sh.template $SPARK_HOME/conf/spark-env.sh
vi $SPARK_HOME/conf/spark-env.sh

export SPARK_MASTER_PORT=7077        #提交任务的端口,默认是7077
export SPARK_MASTER_WEBUI_PORT=8070  #masster节点的webui端口 默认8080改为8070
export SPARK_WORKER_CORES=1          #每个worker从节点能够支配的core的个数
export SPARK_WORKER_MEMORY=1g        #每个worker从节点能够支配的内存数
export SPARK_WORKER_PORT=7078        #每个worker从节点的端口(可选配置)
export SPARK_WORKER_WEBUI_PORT=8071  #每个worker从节点的wwebui端口(可选配置)
export SPARK_WORKER_INSTANCES=1      #每个worker从节点的实例(可选配置)

export JAVA_HOME=/usr/java/jdk1.8.0_111
export SCALA_HOME=/usr/local/scala-2.12.4
export HADOOP_HOME=/usr/local/hadoop-2.7.6
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/Hadoop
export SPARK_PID_DIR=/usr/local/spark-2.3.0/pids
export SPARK_LOCAL_DIR=/usr/local/spark-2.3.0/tmp
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=node1:2181,node2:2181,node3:2181 -Dspark.deploy.zookeeper.dir=/spark"

vi $SPARK_HOME/sbin/start-master.sh

SPARK_MASTER_WEBUI_PORT=8070

cp $HADOOP_HOME/etc/hadoop/hdfs-site.xml $SPARK_HOME/conf/

vi $HADOOP_HOME/etc/hadoop/log4j.properties

log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR

scp -r $HADOOP_HOME/etc/hadoop/log4j.properties node2:$HADOOP_HOME/etc/hadoop/
scp -r $HADOOP_HOME/etc/hadoop/log4j.properties node3:$HADOOP_HOME/etc/hadoop/

# ==================================================================node1

scp -r $SPARK_HOME node2:/usr/local/
scp -r $SPARK_HOME node3:/usr/local/

# ==================================================================node2 node3

# 使环境变量生效
source /etc/profile

# 查看配置结果
echo $FLUME_HOME

# 启动

# ==================================================================node1 node2 node3# 先启动zookeeper 和 hdfs
zkServer.sh start
zkServer.sh status

# ==================================================================node1
zkCli.sh
create /spark ‘‘

$HADOOP_HOME/sbin/start-all.sh

$HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc

# ==================================================================node2
$HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc
$HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager

# 启动spark

# ==================================================================node1
$SPARK_HOME/sbin/start-master.sh

$SPARK_HOME/sbin/start-slaves.sh

# ==================================================================node2
$SPARK_HOME/sbin/start-master.sh

# ==================================================================node1
# 获取安全模式的状态:
hdfs dfsadmin -safemode get

# 安全模式打开
# hdfs dfsadmin -safemode enter

# 安全模式关闭
# hdfs dfsadmin -safemode leave

hdfs dfs -mkdir -p /spark/eventslog

$SPARK_HOME/bin/spark-shell

# http://node1:4040
# http://node1:8070

> :quit

# test

# 需保证hdfs上该目录不存在
# hdfs dfs -mkdir -p /spark/output
# hdfs dfs -rmr /spark/output

vi ~/sparkdata.txt

hello man
what are you doing now
my running
hello
kevin
hi man

hdfs dfs -mkdir -p /usr/file/input

hdfs dfs -put ~/sparkdata.txt /usr/file/input
hdfs dfs -ls /usr/file/input

val file1 = sc.textFile("file:///root/sparkdata.txt")
val count1=file1.flatMap(line => line.split(" ")).map(word => (word,1)).reduceByKey(_+_)
count1.saveAsTextFile("hdfs://node1:8020/spark/output1")

val file=sc.textFile("hdfs://appcluster/usr/file/input/sparkdata.txt")
val count=file.flatMap(line => line.split(" ")).map(word => (word,1)).reduceByKey(_+_)
count.saveAsTextFile("hdfs://node1:8020/spark/output")

hdfs dfs -ls /spark/output

hdfs dfs -cat /spark/output/part-00000

# stop已经启动的进程

# ==================================================================node1
$SPARK_HOME/sbin/stop-slaves.sh

$SPARK_HOME/sbin/stop-master.sh

$HADOOP_HOME/sbin/stop-all.sh

# ==================================================================node1 node2 node3
# 停止 zookeeper
zkServer.sh stop

# ==================================================================node2
$HADOOP_HOME/sbin/yarn-daemon.sh stop resourcemanager
$HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfc

# ==================================================================node1
$HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfc

shutdown -h now
# 快照 spark

原文地址:https://www.cnblogs.com/zcf5522/p/9775651.html

时间: 2024-10-31 13:16:30

hadoop生态搭建(3节点)-10.spark配置的相关文章

hadoop生态搭建(3节点)-04.hadoop配置

如果之前没有安装jdk和zookeeper,安装了的请直接跳过 # https://www.oracle.com/technetwork/java/javase/downloads/java-archive-javase8-2177648.html # ==================================================================安装 jdk mkdir -p /usr/java tar -zxvf ~/jdk-8u111-linux-x64

hadoop生态搭建(3节点)-13.mongodb配置

# 13.mongodb配置_副本集_认证授权# ==================================================================安装 mongodb tar -zxvf ~/mongodb-linux-x86_64-rhel70-3.4.5.tgz -C /usr/local mv /usr/local/mongodb-linux-x86_64-rhel70-3.4.5 /usr/local/mongodb-3.4.5 rm -r ~/mon

hadoop生态搭建(3节点)-11.storm配置

# http://archive.apache.org/dist/storm/apache-storm-1.1.0/ # ==================================================================安装 storm tar -zxvf ~/apache-storm-1.1.0.tar.gz -C /usr/local mv /usr/local/apache-storm-1.1.0 /usr/local/storm-1.1.0 rm –r

hadoop生态搭建(3节点)-12.rabbitmq配置

# 安装 需要相关包# ==================================================================node1 node2 node3 yum install -y gcc gcc-c++ zlib zlin-devel perl ncurses-devel # 安装 openssl# ==================================================================node1 scp -r

hadoop生态搭建(3节点)-17.sqoop配置_单节点

# ==================================================================安装 sqoop tar -zxvf ~/sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz mv ~/sqoop-1.4.7.bin__hadoop-2.6.0 /usr/local/sqoop-1.4.7 # 环境变量 # =========================================================

hadoop生态搭建(3节点)-06.hbase配置

# http://archive.apache.org/dist/hbase/1.2.4/ # ==================================================================安装 hbase tar -zxvf ~/hbase-1.2.4-bin.tar.gz -C /usr/local rm –r ~/hbase-1.2.4-bin.tar.gz # 配置环境变量# =====================================

hadoop生态搭建(3节点)-07.hive配置

# http://archive.apache.org/dist/hive/hive-2.1.1/ # ==================================================================安装 hive tar -zxvf apache-hive-2.1.1-bin.tar.gz -C /usr/local mv /usr/local/apache-hive-2.1.1-bin /usr/local/hive-2.1.1 rm –r ~/apach

hadoop生态搭建(3节点)-01.基础配置

# 基础配置# ==================================================================node1 vi /etc/hostname node1 vi /etc/sysconfig/network-scripts/ifcfg-ens33 # BOOTPROTO=dhcp BOOTPROTO=static # ONBOOT=no ONBOOT=yes IPADDR=192.168.6.131 NETMASK=255.255.255.0 G

hadoop生态搭建(3节点)-05.mysql配置_单节点

# ==================================================================node1 # ==================================================================安装 mysql # 查看当前安装的mariadb包 rpm -qa | grep mariadb # 有就将它们统统强制性卸载掉: rpm -e --nodeps mariadb-libs-5.5.52-1.el7