
-- 向数据表中加载文件,不会对数据进行任何转换,load操作只是复制(本地到hdfs)、移动(hdfs上的文件)
-- 如果表包含分区,则必须指定每个分区的分区名,filepath可以引用一个文件或一个目录
-- 如果数据在hdfs上,且filepath不是绝对的,则hive会相对于/user/进行解释,会移动(记住)
-- 如果指定了overwrite,会先删除目标表,再移动数据。如果没有且有文件名冲突,那么现有的文件会被新文件替换。
load data [local] inpath ‘filepath‘ [overwrite]
 into table tablename
 [partition (p1=val1,p2=val2...)]

-- 将查询结果插入到hive表中
insert [overwrite] table table_name1 [partition (part1=val1,part2=val2)]
select * from table_name2

-- extendsion
from from_statement
insert [overwrite] table table_name1 [partition (part1=val1,part2=val2)]
insert [overwrite] table table_name2 [partition (part1=val1,part2=val2)]

-- 将查询结果写入文件系统
insert [overwrite] [local] directory directory1 select ... from ...
-- extension
from from_statement
insert [overwrite] [local] directory directory1 select_statement1
insert [overwrite] [local] directory directory2 select_statement2

-- select语句
select [all | distinct] c1,c2...
from table_name
[where where_condition]
[group by col_list]
 clustered by col_list sort by col_list |
 distribute by col_list
[limit number];

select * from t1;
select * from sales where amount >10 and region = ‘cs‘;

select distinct col1,col2 from t1;
select distinct col1 from t1;

select * from t1 limit 5;

-- 输出top-k,5
set mapred.reduce.tasks = 1
select * from sales sort by amount desc limit 5;

-- 基于分区的查询
select p.*
 from page_view p
 where >= ‘20140212‘ and <=‘20140212‘;
-- having hive不支持,可以通过子查询实现
select col1 from t1 group by col1 having sum(col2) > 10;
select col1 from (
 select col1,sum(col2) as col2sum from t1 group by col1
) t2
where t2.col2sum > 10;

-- 查询结果写入到目录
insert overwrite [local] directory ‘/user/output‘
select a.* from invites a where a.ds = ‘20120101‘;

-- group by
from invites a
insert overwrite table envents
select,count(*) where > 0 group by;

insert overwrite table envents
select,count(*) from invites a where > 0 group by;

-- join
from pokes t1 join invites t2 on ( =
insert overwrite table envents

-- 多表insert
from src
insert overwrite table dest1 select src.* where src.key < 100
insert overwrite table dest2 select src.key,src.value where src.key > 100 and src.key < 200

-- streaming 不解
from invites a
insert overwrite table events
select transform(, as(oof,rab) using ‘/bin/cat‘ where a.ds=‘20120212‘;

-- hive sql 使用实例
create table u_data(
 userid int,
 movieid int,
 rating int,
 unixtime string
row format delimited
fields terminated by ‘\t‘
stored as textfile;

load data local inpath ‘‘
overwrite into table u_data;

select count(1) from u_data;
import sys
import datetime
for line in sys.stdin
 line = line.strip()
 userid,movieid,rating,unixtime = line.split(‘\t‘)
 weekday = datetime.datetime.fromtimestamp(float(unixtime)).isoweekday()
 print ‘\t‘.join([userid,movieid,rating,str(weekday)])

create table u_data_new(
 userid int,
 movieid int,
 rating int,
 weekday int
row format delimited
fields terminated by ‘\t‘;
-- 增加文件hive使用的
add file

insert overwrite table u_data_new
select transform(userid,movieid,rating,unixtime) using ‘python‘ as (userid,movieid,rating,weekday)
from u_data;

select weekday,count(*) from u_data_new group by weekday;

add jar ...hive_contrib.jar 将jar增加进hive的class路径
create table apachelog(
 host string,
 identity string,
 user string,
 time string,
 request string,
 status string,
 size string,
 referer string,
 agent string
row format serde ‘.....RegexSerDe‘
with serdeproperties(
stored as textfile;

hive 网路接口
hive --service hwi 开启服务

hive jdbc 操作
hive --service hiveserver


【原】hive 操作笔记

1.建表: hive> CREATE TABLE pokes (foo INT, bar STRING);hive> CREATE TABLE invites (foo INT, bar STRING) PARTITIONED BY (ds STRING);由于很多数据在hadoop平台,当从hadoop平台的数据迁移到hive目录下时,由于hive默认的分隔符是/u0001,为了平滑迁移,需要在创建表格时指定数据的分割符号,语法如下:create table ooo(uid string,n

Python数据库操作 DML操作-数据的增删改#学习猿地

# MySQL 数据操作 DML > 数据的DML操作:添加数据,修改数据,删除数据 ## 添加数据 > 格式: insert into 表名[(字段列表)] values(值列表...); ```sql --标准添加(指定所有字段,给定所有的值) mysql> insert into stu(id,name,age,sex,classid) values(1,'zhangsan',20,'m','lamp138'); Query OK, 1 row affected (0.13 sec


一.CLI操作: 1.普通设置: set hive.cli.print.header=true;    // 打印查询头,需要显示设置 set hive.mapred.mode=strict;     // hive strict模式,分区表必须在where中分区过滤,不允许笛卡尔积查询 set hive.mapred.mode=nostrict;    // 与上一行相反  // 分区已经对应目录结构,因此在数据文件就可以不用存分区的数据了; set hive.exec.dynamic.par


#创建表人信息表  person(String name,int age) hive> create table person(name STRING,age INT)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ESCAPED BY '\\' STORED AS TEXTFILE; OK Time taken: 0.541 seconds#创建表票价信息表 ticket(int age,float price) hive> create tab


HQL操作1.Distribute by    distribute by col按照col列把数据分散到不同的reduce    sort    sort by col 按照col列把数据排序    select col,co2 from table_name distribute by col1 sort by col1    asc,col2 desc;    两者结合出现,确保每个reduce的输出都是有序的     应用场景:    * map输出的文件大小不均    * reduce


学习DDL语句 创建对象的语句 Create/Drop/Alter Database Create Database CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name [COMMENT database_comment] [LOCATION hdfs_path] [WITH DBPROPERTIES (property_name=property_value, ...)]; Drop Database DROP (DATABASE|SC

通过JDBC驱动连接Hive操作实例 笔记

package com.yuxipacific; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.Statement; public class Hive {     public static void main(String[] args) throws Exception {                  Class.forName


hive库清表,删除数据 insert overwrite table lorry.bigdata select * from lorry.bigdata where 1=0


hive的具体练习以下4个目标1. 第一普通的hdfs文件能导入到hive中以供我们查询. create table dept(deptID int,deptName string,address string); load data local inpath '/home/dyq/Documents/dept' overwrite into table dept; select * from dept; hive> select * from dept; OK NULL NULL NULL N