一、hive安装
1.下载hive2.1-tar.gz
2.解压
tar -zxf hive-2.1.0.tar.gz -C /opt cd /opt ln -s hive-2.1.0 hive
3.配置环境变量
[/etc/profile] HIVE_HOME=/soft/hive PATH=...:$HIVE_HOME/bin
测试:
hive --version hive --help
4.复制mysql驱动程序到hive的lib目录下。
5.配置hive
1)复制hive-default.xml.template为hive-site.xml
2)将${system:...字样替换成具体路径。
3)<property>
javax.jdo.option.ConnectionPassword -> mysql javax.jdo.option.ConnectionUserName -> root javax.jdo.option.ConnectionURL -> jdbc:mysql://192.168.137.101:3306/hive javax.jdo.option.ConnectionDriverName -> com.mysql.jdbc.Driver hive.server2.enable.doAs -> false
6.在msyql中创建存放hive信息的数据库
mysql>create database hive;
7.初始化hive的元数据(表结构)到mysql中。
cd /soft/hive/bin schematool -dbType mysql -initSchema
二、hive命令行操作
1.创建hive的数据库
$hive>create database mydb ; //默认存储路径/user/hive/warehouse $hive>show databases ; $hive>use mydb ; $hive>create table t(id int,name string,age int); $hive>insert into t(id,name,age) values(1,‘tom‘,12); $hive>select * from t ; $hive>drop table t ; $hive>exit ;
2.hive中表可以在mysql中查看
select * from DBS; select * from TBLS;
3.内部表外部表
1).managed table
删除表时,数据也删除了。
2).external table
删除表时,数据不删。
三、启动hiveserver2服务
1. hive == hive --service cli
2.启动hiveservice2
1).启动hiveserver2服务器,监听端口10000
hive --service hiveserver2 &
2).通过beeline命令行连接到hiveserver2
$>beeline //进入beeline命令行(于hive --service beeline) $beeline>!help //查看帮助 $beeline>!quit //退出 $beeline>!connect jdbc:hive2://localhost:10000/mydb2 //连接到hibve数据 $beeline>show databases ; $beeline>use mydb2 ; $beeline>show tables; //显式表
四、java api方式访问hiveserver2
1.创建java模块
2.引入maven
3.添加hive-jdbc依赖
<dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>2.1.0</version> </dependency>
4.App
使用jdbc方式连接到hive数据仓库,数据仓库需要开启hiveserver2服务。
public class App { public static void main(String[] args) throws Exception { Class.forName("org.apache.hive.jdbc.HiveDriver"); Connection conn = DriverManager.getConnection("jdbc:hive2://192.168.137.101:10000/testdb1"); Statement st = conn.createStatement(); ResultSet rs = st.executeQuery("select id , name ,age from t"); while(rs.next()){ System.out.println(rs.getInt(1) + "," + rs.getString(2)) ; } rs.close(); st.close(); conn.close(); } }
五、hive命令
1.创建表,external 外部表
$hive>CREATE external TABLE IF NOT EXISTS t2(id int,name string,age int) COMMENT ‘xx‘ ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,‘ STORED AS TEXTFILE ;
2.查看表数据
$hive>desc t2 ; $hive>desc formatted t2 ;
3.加载数据到hive表
$hive>load data local inpath ‘/home/centos/customers.txt‘ into table t2 ; //local上传文件 $hive>load data inpath ‘/user/centos/customers.txt‘ [overwrite] into table t2 ; //移动文件
4.复制表
mysql>create table tt as select * from users ; //携带数据和表结构 mysql>create table tt like users ; //不带数据,只有表结构 hive>create table tt as select * from users ; hive>create table tt like users ;
5.count()查询要转成mr
$hive>select count(*) from t2 ; $hive>select id,name from t2 ; $hive>select * from t2 order by id desc ; //MR
6.启用/禁用表
$hive>ALTER TABLE t2 ENABLE NO_DROP; //不允许删除 $hive>ALTER TABLE t2 DISABLE NO_DROP; //允许删除
7.分区表,优化手段之一,从目录的层面控制搜索数据的范围。
创建分区表.
CREATE TABLE t3(id int,name string,age int) PARTITIONED BY (Year INT, Month INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,‘ ;
显式表的分区信息
SHOW PARTITIONS t3;
添加分区,创建目录
alter table t3 add partition (year=2014, month=12);
删除分区
ALTER TABLE employee_partitioned DROP IF EXISTS PARTITION (year=2014, month=11)
加载数据到分区表
load data local inpath ‘/home/centos/customers.txt‘ into table t3 partition(year=2014,month=11);
8.创建桶表
CREATE TABLE t4(id int,name string,age int) CLUSTERED BY (id) INTO 3 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,‘ ;
加载数据不会进行分桶操作
load data local inpath ‘/home/centos/customers.txt‘ into table t4 ;
查询t3表数据插入到t4中。
insert into t4 select id,name,age from t3 ;
评估数据量,保证每个桶的数据量block的2倍大小。
9.连接查询
$hive>CREATE TABLE customers(id int,name string,age int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,‘ ; $hive>CREATE TABLE orders(id int,orderno string,price float,cid int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,‘ ; 加载数据到表 内连接查询 hive>select a.*,b.* from customers a , orders b where a.id = b.cid ; 左外 hive>select a.*,b.* from customers a left outer join orders b on a.id = b.cid ; hive>select a.*,b.* from customers a right outer join orders b on a.id = b.cid ; hive>select a.*,b.* from customers a full outer join orders b on a.id = b.cid ;
原文地址:https://www.cnblogs.com/water-sky/p/10221238.html