set hive.cli.print.header=true;
set mapred.max.split.size=256000000;
set mapred.min.split.size.per.node=256000000;
set mapred.min.split.size.per.rack=256000000;
set hive.exec.reducers.max=200;
set hive.exec.reducers.bytes.per.reducer=1000000000;
set hive.exec.compress.output=false;
set hive.exec.compress.intermediate=true;
set hive.exec.parallel=true;
set hive.exec.parallel.thread.number=8;
set hive.map.aggr=true;
set hive.groupby.mapaggr.checkinterval=100000;
set hive.groupby.skewindata=true;
set hive.warehouse.subdir.inherit.perms=false;
set hive.skewjoin.key=100000;
set hive.optimize.skewjoin=true;
set hive.auto.convert.join=false;
set hive.vectorized.execution.enabled=false;
use parsedb;
insert overwrite table FACT_BI_BROWSER_T_00
select
‘o2o‘ as channel,
get_json_object(t.json, ‘$.session_id‘) as session_id,
get_json_object(t.json, ‘$.user_id‘) as app_id,
get_json_object(t.json, ‘$.ip‘) as ip,
get_json_object(t.json, ‘$.ref_url‘) as ref_url,
get_json_object(t.json, ‘$.url‘) as url,
get_json_object(t.json, ‘$.stay_time‘) as stay_time,
get_json_object(t.json, ‘$.log_time‘) as log_time,
get_json_object(t.json, ‘$.browser‘) as browser
from ods_db.ber_o2o_pv t
where ( get_json_object(t.json, ‘$.session_id‘) is not null and get_json_object(t.json, ‘$.session_id‘) <> ‘‘ )
and get_json_object(t.json, ‘$.extend‘) is null
and get_json_object(t.json, ‘$.log_time‘) >= ‘${v_fm_tm}‘
and get_json_object(t.json, ‘$.log_time‘) < ‘${v_to_tm}‘;
--insert overwrite table FACT_BI_BROWSER_T_00
--select
--t.channel,
--t.session_id,
--t.app_id,
--t.ip,
--t.ref_url,
--t.url,
--t.stay_time,
--t.log_time,
--t.browser
--from FACT_BI_BROWSER_T_00 t;
--2浏览量(PV)
insert overwrite table FACT_BI_BROWSER_T_01
select
t.browser as BR_NAME,
count(t.url) as BI_VALUE,
to_date(t.log_time) as BI_DATETIME,
t.channel as BI_CHANNEL
from FACT_BI_BROWSER_T_00 t
group by t.channel,
t.browser,
to_date(t.log_time);
insert overwrite table FACT_BI_BROWSER_T partition (year=‘${year}‘,month=‘${month}‘,day=‘${day}‘)
select
regexp_replace(reflect("java.util.UUID", "randomUUID"), "-", "") as ID,
1 as BI_TYPE,
t.BR_NAME,
t.BI_VALUE,
CONCAT(t.BI_DATETIME,‘ 00:00:00‘) AS BI_DATETIME,
t.BI_CHANNEL
from FACT_BI_BROWSER_T_01 t;
原文地址:https://www.cnblogs.com/heguoxiu/p/10120644.html