1数据导入--------------
CREATE EXTERNAL TABLE wizad_mdm_dev_lmj_edition_20141120 (
cookie_id STRING,
guid STRING
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘,‘
LINES TERMINATED BY ‘\n‘
stored as textfile
LOCATION ‘/user/wizad/test/lmj/edition_compare/‘;
2join(只接受等值)后的结果入表------------------------------------
CREATE TABLE wizad_mdm_dev_lmj_edition_result as
select *
from wizad_mdm_dev_lmj_20141120 as w
JOIN wizad_mdm_main as a ON (a.rowkey = w.guid);
======================================================================
3:hbase上数据导入失败(原因:)
下载到本地处理。设置scan时间加大,大表放最后。
结果导出到本地‘/home/wizad/lmj/‘:
SET mapred.job.queue.name=queue3;
SET hbase.client.scanner.caching=5000;
SET hbase.zookeeper.quorum=datanode06,datanode07,datanode08;
SET zookeeper.znode.parent=/hbase;
set hbase.regionserver.lease.period=180000;
INSERT OVERWRITE LOCAL DIRECTORY ‘/home/wizad/lmj/‘
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,‘
select cookie_id,rowkey,fixeddim_map
from wizad_mdm_main_from_hbase as a JOIN wizad_mdm_dev_lmj_edition_20141120 as w
ON (w.guid = a.rowkey);
---------------------------------------------------------
SET mapred.job.queue.name=queue3;
SET hbase.client.scanner.caching=5000;
SET hbase.zookeeper.quorum=datanode06,datanode07,datanode08;
SET zookeeper.znode.parent=/hbase;
set hbase.regionserver.lease.period=180000;
CREATE TABLE wizad_mdm_dev_lmj_edition_result as
select cookie_id,rowkey,fixeddim_map
from wizad_mdm_main_from_hbase as a JOIN wizad_mdm_dev_lmj_edition_20141120 as w
ON (w.guid = a.rowkey);
========================================
select count(*) from wizad_mdm_dev_lmj_edition_result;14638
create TABLE wizad_mdm_dev_lmj_edition_insterest as
select cookie_id,rowkey,fixeddim_key,fixeddim_value
from wizad_mdm_dev_lmj_edition_result
LATERAL VIEW explode(fixeddim_map) my_t1 AS fixeddim_key,fixeddim_value
WHERE (fixeddim_key like ‘interest%‘);
-----------------------------------------
select count(*) from wizad_mdm_dev_lmj_edition_insterest;15331
INSERT OVERWRITE table wizad_mdm_dev_lmj_edition_insterest
select cookie_id,rowkey,fixeddim_key,fixeddim_value
from wizad_mdm_dev_lmj_edition_result
LATERAL VIEW explode(fixeddim_map) my_t1 AS fixeddim_key,fixeddim_value
WHERE (fixeddim_key like ‘interest%‘);
输出到hdfs上(本地用local DIRECTORY),不能指定列的分隔符,语句ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,‘ 报错不能识别。
INSERT OVERWRITE DIRECTORY ‘/user/wizad/tmp/mytest‘
select * from wizad_mdm_dev_lmj_edition_insterest
hive在hdfs上,默认使用^A作为分隔符,其对应着001。这样的数据在pig中无法读入,用‘\001‘或者‘\\001‘或者‘^A‘都无法读入。
解决办法:作为一个列整体读入后,在用STRSPLIT分隔按‘\\001‘(使用‘^A‘无效),可以返回一个元组类似((a,b)),如下pig
%default interestFlie /user/wizad/tmp/mytest/*
--无效interest_data = LOAD ‘$interestFlie‘ USING PigStorage(‘\\001‘)
--无效interest_data = LOAD ‘$interestFlie‘ USING PigStorage(‘^A‘)
interest_data = LOAD ‘$interestFlie‘
AS(cookie_id:chararray
-- guid:chararray,
-- dimkey :chararray,
-- dimvalue:chararray
);
test2 = foreach interest_data generate STRSPLIT(cookie_id,‘\\001‘);
DUMP res;
describe res;
结果:result结构: {(null)}
((B2BEF56E09EC115456E221,352751019523267,interest_11,161))
((B2BEF56E09EC115456E221,352751019523267,interest_13,102))
((EC15860E09D5EA545700E9,352751019523267,interest_11,161))
((EC15860E09D5EA545700E9,352751019523267,interest_13,102))
((daca0487-5424ef1a0dc3fa-11535481,352751019523267,interest_4,61))
((daca0487-5424ef1a0dc3fa-11535481,352751019523267,interest_21,21))
((daca0487-5424ef1a0dc3fa-11535481,352751019523267,interest_11,161))
((daca0487-5424ef1a0dc3fa-11535481,352751019523267,interest_13,102))
全路径也不行。
INSERT OVERWRITE DIRECTORY ‘hdfs://namenode/user/wizad/tmp/interest2/‘
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,‘
select * from wizad_mdm_dev_lmj_edition_insterest
但导出到本地,可以指定列分隔符
INSERT OVERWRITE local DIRECTORY ‘/home/wizad/lmj/inserest2‘
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,‘
select * from wizad_mdm_dev_lmj_edition_insterest
hive -e ‘use wizad;
select * from wizad_mdm_dev_lmj_edition_insterest;‘>> mytest
查询结果使用\t作为列分隔符,mytest中
3531 3631 3730 3631 3931 3635 3436 0969 51617061916546.i
vim中16进制(%!xxd)两位对应一个字符,看到"."对应的09,在asii码表中09,就是tab制表符