How to install Hadoop 2.7.3 cluster on CentOS 7.3

#############################
#ENV
#spark01 192.168.51.6
#spark02 192.168.51.18
#spark03 192.168.51.19
#spark04 192.168.51.21
#spark05 192.168.51.24
############################
##We must to improve file limits on every nodes
echo "ulimit -SHn 204800" >> /etc/rc.local
echo "ulimit -SHu 204800" >> /etc/rc.local
cat >> /etc/security/limits.conf << EOF
*          soft   nofile    204800
*          hard   nofile    204800
*          soft   nproc     204800
*          hard   nproc     204800
EOF
##We must to disable ipv6 on every nodes
echo ‘net.ipv6.conf.all.disable_ipv6 = 1‘>>/etc/sysctl.conf
echo ‘net.ipv6.conf.default.disable_ipv6 = 1‘ >>/etc/sysctl.conf
echo ‘vm.swappiness = 0‘ >> /etc/sysctl.conf
sysctl -p
echo ‘echo never > /sys/kernel/mm/transparent_hugepage/defrag‘ >> /etc/rc.local
chmod +x /etc/rc.d/rc.local
#1)Edit /etc/hosts file on every nodes
cat >/etc/hosts<<EOF
127.0.0.1   localhost
192.168.51.6    spark01
192.168.51.18   spark02
192.168.51.19   spark03
192.168.51.21   spark04
192.168.51.24   spark05
EOF
#2)install jdk on every nodes
wget http://god.nongdingbang.net/downloads/auto_jdk.sh
sh auto_jdk.sh
#3)create hadoop user on every nodes
groupadd hadoop -g 700
useradd hadoop -g hadoop -u 700
echo "hadoop123"|passwd --stdin hadoop
echo ‘hadoop ALL=(ALL) NOPASSWD: ALL‘ >>/etc/sudoers
#4)set permission with opt directory on every nodes
chown -R hadoop.hadoop /opt/
#5)Set up key-based (passwordless) login:
#just do it no spark01
su - hadoop
ssh-keygen
ssh-copy-id -i ~/.ssh/id_rsa.pub [email protected]
ssh-copy-id -i ~/.ssh/id_rsa.pub [email protected]
ssh-copy-id -i ~/.ssh/id_rsa.pub [email protected]
ssh-copy-id -i ~/.ssh/id_rsa.pub [email protected]
ssh-copy-id -i ~/.ssh/id_rsa.pub [email protected]
#6)install hadoop on spark01 and propagate /opt/hadoop2.7.3 to other nodes:
cd /home/tools
sudo wget http://god.nongdingbang.net/downloads/hadoop-2.7.3.tar.gz
sudo tar zxvf hadoop-2.7.3.tar.gz -C /opt/
sudo chown -R hadoop.hadoop /opt/hadoop-2.7.3
scp -r /opt/hadoop-2.7.3 [email protected]:/opt
scp -r /opt/hadoop-2.7.3 [email protected]:/opt
scp -r /opt/hadoop-2.7.3 [email protected]:/opt
scp -r /opt/hadoop-2.7.3 [email protected]:/opt
#7)Edit this file on every nodes
sudo su -
cat >/etc/profile.d/hadoop.sh <<EOF
export HADOOP_PREFIX=/opt/hadoop-2.7.3
export HADOOP_HOME=\$HADOOP_PREFIX
export HADOOP_COMMON_HOME=\$HADOOP_PREFIX
export HADOOP_CONF_DIR=\$HADOOP_PREFIX/etc/hadoop
export HADOOP_HDFS_HOME=\$HADOOP_PREFIX
export HADOOP_MAPRED_HOME=\$HADOOP_PREFIX
export HADOOP_YARN_HOME=\$HADOOP_PREFIX
export PATH=\$PATH:\$HADOOP_PREFIX/sbin:\$HADOOP_PREFIX/bin
EOF
source /etc/profile.d/hadoop.sh
#8)Edit /opt/hadoop/etc/hadoop/core-site.xml
##set up NameNode URI on every node:
######################################################
cat >/opt/hadoop-2.7.3/etc/hadoop/core-site.xml<<EOF
<configuration>
<property>
    <name>fs.defaultFS</name>
    <value>hdfs://spark01:9000</value>
</property>
<property>
    <name>io.file.buffer.size</name>
    <value>131072</value>
</property>
<property>
    <name>hadoop.tmp.dir</name>
    <value>/opt/hadoop-2.7.3/tmp/</value>
</property>
<property>
   <name>hadoop.proxyuser.hadoop.hosts</name>
   <value>*</value>
</property>
<property>  
   <name>hadoop.proxyuser.hadoop.groups</name>
   <value>*</value>
</property>
</configuration>
EOF
#9)Create HDFS DataNode data dirs on every node and change ownership
mkdir -p /opt/storage/{datanode,namenode}
chown -R hadoop.hadoop /opt/storage
chown -R hadoop.hadoop /opt/storage
#10)Edit /opt/hadoop/etc/hadoop/hdfs-site.xml on every nodes– set up DataNodes:
###############################################
cat >/opt/hadoop-2.7.3/etc/hadoop/hdfs-site.xml<<EOF
<configuration>
<property>
  <name>dfs.replication</name>
  <value>3</value>
</property>
<property>
  <name>dfs.permissions</name>
  <value>false</value>
</property>
<property>
  <name>dfs.datanode.data.dir</name>
  <value>/opt/storage/datanode</value>
</property>
<property>
  <name>dfs.namenode.data.dir</name>
  <value>/opt/storage/namenode</value>
</property>
<property>
  <name>dfs.secondary.http.address</name>
  <value>spark01:50090</value>
</property>
<property>
  <name>dfs.namenode.http-address</name>
  <value>spark01:50070</value>
</property>
<property>
  <name>dfs.webhdfs.enabled</name>
  <value>true</value>
</property>
</configuration>
EOF
#11)Edit /opt/hadoop/etc/hadoop/mapred-site.xml on spark01.
#################################################################
cat > /opt/hadoop-2.7.3/etc/hadoop/mapred-site.xml <<EOF
<configuration>
 <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.address</name>
    <value>spark01:10020</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>spark01:19888</value>
  </property>
</configuration>
EOF
#12)setup ResourceManager on spark01 and NodeManagers on spark02-05
#########################################################################
cat >/opt/hadoop-2.7.3/etc/hadoop/yarn-site.xml<<EOF
<configuration>
<property>
  <name>yarn.resourcemanager.hostname</name>
  <value>spark01</value>
</property>
<property>
  <name>yarn.nodemanager.hostname.nm1</name>
  <value>spark02</value>
</property>
<property>
  <name>yarn.nodemanager.hostname.nm2</name>
  <value>spark03</value>
</property>
<property>
  <name>yarn.nodemanager.hostname.nm3</name>
  <value>spark04</value>
</property>
<property>
  <name>yarn.nodemanager.hostname.nm4</name>
  <value>spark05</value>
</property>
<property>
  <name>yarn.nodemanager.aux-services</name>
  <value>mapreduce_shuffle</value>
</property>
</configuration>
EOF
#13)Edit /opt/hadoop-2.7.3/etc/hadoop/slaves on spark01
##(so that master may start all necessary services on slaves automagically):
###############################################################
cat >/opt/hadoop-2.7.3/etc/hadoop/slaves<<EOF
spark02
spark03
spark04
spark05
EOF
#14)Format NameNode
##Just do it on spark01
su - hadoop
hdfs namenode -format
#15)Start HDFS on spark01:(as user hadoop):
start-dfs.sh
#16)Also try accessing http://spark01:50070/
#Start YARN on spark01:(as user hadoop):
start-yarn.sh 
################################################################################
#Install Hue
#################################################################################
##17) Install Hue on spark01
##http://archive.cloudera.com/cdh5/cdh/5/
cd /home/tools
yum -y install apache-maven ant python-simplejson gmp-devel libffi-devel asciidoc cyrus-sasl-devel rsync cyrus-sasl-gssapi gcc gcc-c++ krb5-devel libxml2-devel libxslt-devel make openssl-devel openldap-devel python-devel sqlite-devel 
wget http://god.nongdingbang.net/downloads/hue-latest.tar.gz
tar zxvf hue-latest.tar.gz
cd /home/tools/hue-3.9.0-cdh5.12.0 && PREFIX=/opt/ make install
sed -i ‘21s/secret_key=/secret_key=jerry2049205020512052,with==nothing/‘ /opt/hue/desktop/conf/hue.ini
sed -i ‘s#America/Los_Angeles#Asia/Shanghai#‘ /opt/hue/desktop/conf/hue.ini
sed -i ‘62s/## server_user=hue/server_user=hadoop/‘ /opt/hue/desktop/conf/hue.ini
sed -i ‘63s/## server_group=hue/server_group=hadoop/‘ /opt/hue/desktop/conf/hue.ini
sed -i ‘69s/## default_hdfs_superuser=hdfs/default_hdfs_superuser=hadoop/‘ /opt/hue/desktop/conf/hue.ini
sed -i ‘66s/## default_user=hue/default_user=hadoop/‘ /opt/hue/desktop/conf/hue.ini
sed -i ‘837s#hdfs://localhost:8020#hdfs://spark01:9000#‘ /opt/hue/desktop/conf/hue.ini
sed -i ‘845s#http://localhost:50070#http://spark01:50070#‘ /opt/hue/desktop/conf/hue.ini
sed -i ‘845s/## webhdfs_url/webhdfs_url/‘ /opt/hue/desktop/conf/hue.ini
chown -R hadoop.hadoop /opt/hue/
##18) Start Hue on spark01 by hadoop user(web_port:8888)
su - hadoop
nohup /opt/hue/build/env/bin/supervisor &
时间: 2024-10-06 15:08:12

How to install Hadoop 2.7.3 cluster on CentOS 7.3的相关文章

Install hadoop with Cloudera Manager 5 on CentOS 6.5

分区考虑,不要使用LVMroot -- > 20Gswap -- 2倍系统内存 RAM -- >4GB Master node:RAID 10, dual Ethernet cards, dual power supplies, etc. Slave node:1. RAID is not necessary 2. HDFS分区, not using LVM/etc/fstab -- ext3    defaults,noatime挂载到/data/N, for N=0,1,2... (one

install hadoop on xubuntu

0. install xubuntu we recommend to set username as "hadoop" after installation, set user "hadoop" as administrator sudo addgroup hadoop sudo adduser --ingroup hadoop hadoop 打开/etc/sudoers文件 sudo gedit /etc/sudoers 在root  ALL=(ALL:ALL) 

Hadoop学习日志- install hadoop

资料来源 : http://www.tutorialspoint.com/hadoop/hadoop_enviornment_setup.htm Hadoop 安装 创建新用户 $ su password: # useradd hadoop -g root # passwd hadoop New passwd: Retype new passwd 修改/etc/sudoers 赋予sudo 权限 设置ssh SSH Setup and Key Generation SSH setup is re

mac osx 系统 brew install hadoop 安装指南

mac osx 系统 brew  install hadoop 安装指南 brew install hadoop 配置 core-site.xml:配置hdfs文件地址(记得chmod 对应文件夹,否则无法正常启动hdfs)和namenode的rpc通行端口 配置mapred-site.xml  中的map reduce通信端口 配置hdfs-site.xml 中的datanode 副本数. 格式化hdfs文件系统 启动Hadoop 和 yarn  namenode  datanode  map

Hadoop大象之旅003-配置CentOS操作系统

Hadoop大象之旅003-配置CentOS操作系统 老帅 上一章中,我们已经在VMWare虚拟机中安装好了CentOS操作系统,为了后面即将安装的Hadoop能够正常运行,还需要对CentOS操作系统进行相关的配置. 1.激活CentOS网卡    在虚拟机中安装完CentOS系统之后,在CentOS桌面右上角网络连接的图标是无效的:在命令行中执行ifconfig也看不到IP地址:这是因为在安装CentOS操作系统过程中,网卡没有被激活.所以首先我们要激活CentOS的网卡. 执行下面的命令来

CentOS install Desktop and Remotely access 远程管理Centos桌面 TigerVNC

Centos 安装完毕,但是通过windows 远程管理桌面: 当然我们可以通过secureCRT工具连接命令行,但是我希望通过windows 连接他们的远程桌面: 首先我们的保证 X windows等桌面环境安装成功: yum groupinstall -y "Desktop"   "Desktop Platform"   "Desktop Platform Development" "Fonts" "Genera

Ambari Install Hadoop ecosystem for 9 steps

Ambari for provisioning,managing and monitoring Hadoop 1. Install Ambari Server: 2. Enter list of hosts to be included in the cluster and provide your SSH key: 3. Register your hosts(Confirm hosts): 4. Host checks: 5. Choose Services(HDFS,MR,Nagios,G

hadoop出现Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name

PriviledgedActionException as:man (auth:SIMPLE) cause:java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses. 2014-09-24 12:57:41,567 ERROR [RunService.java:206

install Hadoop

Installing Java Hadoop runs on both Unix and Windows operating systems, and requires Java to beinstalled. For a production installation, you should select a combination of operatingsystem, Java, and Hadoop that has been certified by the vendor of the