hadoop生态搭建(3节点)-17.sqoop配置_单节点

# ==================================================================安装 sqoop

tar -zxvf ~/sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz
mv ~/sqoop-1.4.7.bin__hadoop-2.6.0 /usr/local/sqoop-1.4.7

# 环境变量

# ==================================================================node1

vi /etc/profile

# 在export PATH USER LOGNAME MAIL HOSTNAME HISTSIZE HISTCONTROL下添加

export JAVA_HOME=/usr/java/jdk1.8.0_111
export JRE_HOME=/usr/java/jdk1.8.0_111/jre
export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.12
export HADOOP_HOME=/usr/local/hadoop-2.7.6
export MYSQL_HOME=/usr/local/mysql
export HBASE_HOME=/usr/local/hbase-1.2.4
export HIVE_HOME=/usr/local/hive-2.1.1
export SCALA_HOME=/usr/local/scala-2.12.4
export KAFKA_HOME=/usr/local/kafka_2.12-0.10.2.1
export FLUME_HOME=/usr/local/flume-1.8.0
export SPARK_HOME=/usr/local/spark-2.3.0
export STORM_HOME=/usr/local/storm-1.1.0
export REDIS_HOME=/usr/local/redis-4.0.2
export ERLANG_HOME=/usr/local/erlang
export RABBITMQ_HOME=/usr/local/rabbitmq_server-3.7.5
export MONGODB_HOME=/usr/local/mongodb-3.4.5
export NGINX_HOME=/usr/local/nginx
export CATALINA_BASE=/usr/local/tomcat
export CATALINA_HOME=/usr/local/tomcat
export TOMCAT_HOME=/usr/local/tomcat
export KEEPALIVED_HOME=/usr/local/keepalived
export ELASTICSEARCH_HOME=/usr/local/elasticsearch-6.2.4
export LOGSTASH_HOME=/usr/local/logstash-6.2.4
export KIBANA_HOME=/usr/local/kibana-6.2.4
export SQOOP_HOME=/usr/local/sqoop-1.4.7

export PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$ZOOKEEPER_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$MYSQL_HOME/bin:$HBASE_HOME/bin:$HIVE_HOME/bin:$SCALA_HOME/bin:$KAFKA_HOME/bin:$FLUME_HOME/bin:$SPARK_HOME/bin:$STORM_HOME/bin:$REDIS_HOME/bin:$ERLANG_HOME/bin:$RABBITMQ_HOME/ebin:$RABBITMQ_HOME/sbin:$MONGODB_HOME/bin:$NGINX_HOME/sbin:$CATALINA_HOME/bin:$KEEPALIVED_HOME/sbin:$ELASTICSEARCH_HOME/bin:$LOGSTASH_HOME/bin:$KIBANA_HOME/bin:$SQOOP_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
# 使环境变量生效
source /etc/profile

# 查看配置结果
echo $SQOOP_HOME
cp -a $SQOOP_HOME/conf/sqoop-env-template.sh $SQOOP_HOME/conf/sqoop-env.sh
vi $SQOOP_HOME/conf/sqoop-env.sh 

#Set path to where bin/hadoop is available
export HADOOP_COMMON_HOME=${HADOOP_HOME}

#Set path to where hadoop-*-core.jar is available
export HADOOP_MAPRED_HOME=${HADOOP_HOME}

#set the path to where bin/hbase is available
#export HBASE_HOME=${HBASE_HOME}

#Set the path to where bin/hive is available
export HIVE_HOME=${HIVE_HOME}

#Set the path for where zookeper config dir is
#export ZOOCFGDIR=

cp -a ~/mysql-connector-java-5.1.46.jar $SQOOP_HOME/lib/
cp -a $HIVE_HOME/lib/hive-exec-2.1.1.jar $SQOOP_HOME/lib/

sqoop-version
mysql -u root -p

> create database sqooptest character set utf8 ;
> create user ‘sqoop‘@‘%‘ identified by ‘Sqoop-123‘;
> grant all privileges on *.* to ‘sqoop‘@‘%‘;
> flush privileges;

> show databases;

> quit;

# 用户 sqoop 登录 mysql

mysql -u sqoop -p

Enter password: Sqoop-123

> use sqooptest;

> create table emp(id INT NOT NULL PRIMARY KEY, name VARCHAR(20), age INT);

> insert into emp(id, name, age)values(1, ‘zhangsan‘, 11);
  insert into emp(id, name, age)values(2, ‘lisi‘, 12);
  insert into emp(id, name, age)values(3, ‘王五‘, 13);

> create table emp_add(id INT NOT NULL PRIMARY KEY, name VARCHAR(20), age INT, sex VARCHAR(20));

> insert into emp_add(id, name, age, sex)values(1, ‘zhangsan‘, 11, ‘男‘);
  insert into emp_add(id, name, age, sex)values(2, ‘lisi‘, 12, ‘男‘);
  insert into emp_add(id, name, age, sex)values(3, ‘王五‘, 13, ‘女‘);
  insert into emp_add(id, name, age, sex)values(4,‘liuliu‘, 11, ‘男‘);

> show tables;

> select * from emp;
  select * from emp_add;

> quit;

# 启动 hadoop

# ==================================================================node1 node2 node3
zkServer.sh start

# ==================================================================node1
# 启动hadoop所有进程
$HADOOP_HOME/sbin/start-all.sh

$HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc

# ==================================================================node2
$HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager
$HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc

hadoop dfsadmin -safemode get

# 命令强制离开
# hadoop dfsadmin -safemode leave

# 网页访问
# http://node1:50070?user.name=hadoop
# http://node2:50070?user.name=hadoop
# http://node1:8088?user.name=hadoop/cluster/nodes

# 导入表数据到HDFS

# ==================================================================node1

sqoop import   --connect jdbc:mysql://node1:3306/sqooptest   --username sqoop   --password Sqoop-123   --table emp   --m 1

# 如果包下面的错误请将 $HADOOP_HOME/etc/hadoop/yarn-site.xml 文件进行调整
# ERROR tool.ImportTool: Import failed: java.io.IOException: org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException:
# Invalid resource request, requested memory < 0, or requested memory > max configured, requestedMemory=1536, maxMemory=1024

# ==================================================================node1 node2 node3
vi $HADOOP_HOME/etc/hadoop/yarn-site.xml

<property>
	<name>yarn.nodemanager.resource.memory-mb</name>
	<value>2048</value>
</property>
<property>
	<name>yarn.scheduler.maximum-allocation-mb</name>
	<value>2048</value>
</property>

# 重启Yarn

# ==================================================================node1
$HADOOP_HOME/sbin/start-yarn.sh
# 导入表数据到HDFS
sqoop import   --connect jdbc:mysql://node1:3306/sqooptest   --username sqoop   --password Sqoop-123   --table emp   --m 1

# 查看导入的数据
hadoop fs -ls /user/root/emp

hadoop fs -cat /user/root/emp/part-m-00000

# 导入表到HDFS指定目录
sqoop import --connect jdbc:mysql://node1:3306/sqooptest   --username sqoop --password Sqoop-123   --target-dir /sqooptest/table_emp/queryresult   --table emp --num-mappers 1

# 查看导入的数据
hadoop fs -ls /sqooptest/table_emp/queryresult

hadoop fs -cat /sqooptest/table_emp/queryresult/part-m-00000

# 导入关系表到HIVE

sqoop import --connect jdbc:mysql://node1:3306/sqooptest   --username sqoop --password Sqoop-123   --table emp --hive-import   --num-mappers 1

# 如报下面的错误,请先删除 /user/root/emp

# ERROR tool.ImportTool: Import failed: org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory hdfs://appcluster/user/root/emp already exists

hadoop fs -rmr /user/root/emp

# 重新导入关系表到HIVE

sqoop import --connect jdbc:mysql://node1:3306/sqooptest   --username sqoop --password Sqoop-123   --table emp --hive-import   --num-mappers 1

# 启动 hive

hive
# 查看导入的数据
> show tables;

> select * from emp;

# 导入表到HIVE指定库指定表

sqoop import --connect jdbc:mysql://node1:3306/sqooptest   --username sqoop --password Sqoop-123   --table emp   --delete-target-dir   --fields-terminated-by ‘\t‘   --hive-import   --hive-database sqooptest   --hive-table hive_emp   --num-mappers 1 

# 报错 hive的库sqooptest,必须先建立。否则会报:FAILED: SemanticException [Error 10072]: Database does not exist: sqooptest

> create database sqooptest;

> show databases;

# 导入表到HIVE指定库指定表

sqoop import --connect jdbc:mysql://node1:3306/sqooptest   --username sqoop --password Sqoop-123   --table emp   --delete-target-dir   --fields-terminated-by ‘\t‘   --hive-import   --hive-database sqooptest   --hive-table hive_emp   --num-mappers 1

> use sqooptest;

> show tables;

> select * from hive_emp;

# 导入表数据子集 where子句的导入

sqoop import --connect jdbc:mysql://node1:3306/sqooptest   --username sqoop --password Sqoop-123   --table emp_add   --where "age =11"   --target-dir /sqooptest/table_emp/queryresult2   --num-mappers 1

# 查看导入的数据
hadoop fs -ls /sqooptest/table_emp/queryresult2

hadoop fs -cat /sqooptest/table_emp/queryresult2/part-m-00000

# query按需导入

sqoop import --connect jdbc:mysql://node1:3306/sqooptest   --username sqoop --password Sqoop-123   --query ‘select id,name,age from emp WHERE  id>=2 and $CONDITIONS‘   --split-by id   --fields-terminated-by ‘\t‘   --target-dir /sqooptest/table_emp/queryresult3   --num-mappers 1

# 查看导入的数据
hadoop fs -cat /sqooptest/table_emp/queryresult3/part-m-00000

# 增量导入

sqoop import --connect jdbc:mysql://node1:3306/sqooptest   --username sqoop --password Sqoop-123   --table emp   --incremental append   --check-column id   --last-value 2   --fields-terminated-by ‘\t‘   --target-dir /sqooptest/table_emp/queryresult4   --num-mappers 1

# 查看导入的数据
hadoop fs -ls /sqooptest/table_emp/queryresult4/

hadoop fs -cat /sqooptest/table_emp/queryresult4/part-m-00000

# Sqoop的数据导出

# 查看 hdfs 数据

hadoop fs -ls /sqooptest/table_emp/queryresult
hadoop fs -cat /sqooptest/table_emp/queryresult/part-m-00000

# 1、首先需要手动创建mysql中的目标表

mysql -u sqoop -p

Enter password: Sqoop-123

> use sqooptest;

> CREATE TABLE employee(id INT NOT NULL PRIMARY KEY,name VARCHAR(20),age INT);

> show tables;

> select * from employee;

# 2、然后执行导出命令

sqoop export   --connect jdbc:mysql://node1:3306/sqooptest   --username sqoop --password Sqoop-123   --table employee   --export-dir /sqooptest/table_emp/queryresult/

# 验证表mysql命令行
> select * from employee;

> quit;

shutdown -h now
# sqoop

原文地址:https://www.cnblogs.com/zcf5522/p/9951080.html

时间: 2024-08-13 17:23:44

hadoop生态搭建(3节点)-17.sqoop配置_单节点的相关文章

hadoop生态搭建(3节点)-05.mysql配置_单节点

# ==================================================================node1 # ==================================================================安装 mysql # 查看当前安装的mariadb包 rpm -qa | grep mariadb # 有就将它们统统强制性卸载掉: rpm -e --nodeps mariadb-libs-5.5.52-1.el7

hadoop生态搭建(3节点)-13.mongodb配置

# 13.mongodb配置_副本集_认证授权# ==================================================================安装 mongodb tar -zxvf ~/mongodb-linux-x86_64-rhel70-3.4.5.tgz -C /usr/local mv /usr/local/mongodb-linux-x86_64-rhel70-3.4.5 /usr/local/mongodb-3.4.5 rm -r ~/mon

hadoop生态搭建(3节点)-12.rabbitmq配置

# 安装 需要相关包# ==================================================================node1 node2 node3 yum install -y gcc gcc-c++ zlib zlin-devel perl ncurses-devel # 安装 openssl# ==================================================================node1 scp -r

js 节点 document html css 表单节点操作

节点操作:访问.属性.创建 (1)节点的访问:firstChild.lastChild.childNodes.parentChild(父子节) 可以使用元素对象的方法进行代替:getElementById().getElementsByTagName() (2)节点属性的操作:setAttribute().removeAttribute().getAttribute() (3)节点的创建.删除.追加: 创建节点:document.createElement(tagName) 删除节点(必须父节点

hadoop生态搭建(3节点)-04.hadoop配置

如果之前没有安装jdk和zookeeper,安装了的请直接跳过 # https://www.oracle.com/technetwork/java/javase/downloads/java-archive-javase8-2177648.html # ==================================================================安装 jdk mkdir -p /usr/java tar -zxvf ~/jdk-8u111-linux-x64

hadoop生态搭建(3节点)-10.spark配置

# https://www.scala-lang.org/download/2.12.4.html# ==================================================================安装 scala tar -zxvf ~/scala-2.12.4.tgz -C /usr/local rm –r ~/scala-2.12.4.tgz # http://archive.apache.org/dist/spark/spark-2.3.0/ # ==

hadoop生态搭建(3节点)-06.hbase配置

# http://archive.apache.org/dist/hbase/1.2.4/ # ==================================================================安装 hbase tar -zxvf ~/hbase-1.2.4-bin.tar.gz -C /usr/local rm –r ~/hbase-1.2.4-bin.tar.gz # 配置环境变量# =====================================

hadoop生态搭建(3节点)-07.hive配置

# http://archive.apache.org/dist/hive/hive-2.1.1/ # ==================================================================安装 hive tar -zxvf apache-hive-2.1.1-bin.tar.gz -C /usr/local mv /usr/local/apache-hive-2.1.1-bin /usr/local/hive-2.1.1 rm –r ~/apach

hadoop生态搭建(3节点)-01.基础配置

# 基础配置# ==================================================================node1 vi /etc/hostname node1 vi /etc/sysconfig/network-scripts/ifcfg-ens33 # BOOTPROTO=dhcp BOOTPROTO=static # ONBOOT=no ONBOOT=yes IPADDR=192.168.6.131 NETMASK=255.255.255.0 G