hive分析脚步p_fact_bi_browser_t.sql

set hive.cli.print.header=true;
set mapred.max.split.size=256000000;
set mapred.min.split.size.per.node=256000000;
set mapred.min.split.size.per.rack=256000000;
set hive.exec.reducers.max=200;
set hive.exec.reducers.bytes.per.reducer=1000000000;
set hive.exec.compress.output=false;
set hive.exec.compress.intermediate=true;
set hive.exec.parallel=true;
set hive.exec.parallel.thread.number=8;
set hive.map.aggr=true;
set hive.groupby.mapaggr.checkinterval=100000;
set hive.groupby.skewindata=true;
set hive.warehouse.subdir.inherit.perms=false;
set hive.skewjoin.key=100000;
set hive.optimize.skewjoin=true;
set hive.auto.convert.join=false;
set hive.vectorized.execution.enabled=false;

use parsedb;

insert overwrite table FACT_BI_BROWSER_T_00
select
‘o2o‘ as channel,
get_json_object(t.json, ‘$.session_id‘) as session_id,
get_json_object(t.json, ‘$.user_id‘) as app_id,
get_json_object(t.json, ‘$.ip‘) as ip,
get_json_object(t.json, ‘$.ref_url‘) as ref_url,
get_json_object(t.json, ‘$.url‘) as url,
get_json_object(t.json, ‘$.stay_time‘) as stay_time,
get_json_object(t.json, ‘$.log_time‘) as log_time,
get_json_object(t.json, ‘$.browser‘) as browser
from ods_db.ber_o2o_pv t
where ( get_json_object(t.json, ‘$.session_id‘) is not null and get_json_object(t.json, ‘$.session_id‘) <> ‘‘ )
and get_json_object(t.json, ‘$.extend‘) is null
and get_json_object(t.json, ‘$.log_time‘) >= ‘${v_fm_tm}‘
and get_json_object(t.json, ‘$.log_time‘) < ‘${v_to_tm}‘;

--insert overwrite table FACT_BI_BROWSER_T_00
--select
--t.channel,
--t.session_id,
--t.app_id,
--t.ip,
--t.ref_url,
--t.url,
--t.stay_time,
--t.log_time,
--t.browser
--from FACT_BI_BROWSER_T_00 t;

--2浏览量（PV）
insert overwrite table FACT_BI_BROWSER_T_01
select
t.browser as BR_NAME,
count(t.url) as BI_VALUE,
to_date(t.log_time) as BI_DATETIME,
t.channel as BI_CHANNEL
from FACT_BI_BROWSER_T_00 t
group by t.channel,
t.browser,
to_date(t.log_time);

insert overwrite table FACT_BI_BROWSER_T partition (year=‘${year}‘,month=‘${month}‘,day=‘${day}‘)
select
regexp_replace(reflect("java.util.UUID", "randomUUID"), "-", "") as ID,
1 as BI_TYPE,
t.BR_NAME,
t.BI_VALUE,
CONCAT(t.BI_DATETIME,‘ 00:00:00‘) AS BI_DATETIME,
t.BI_CHANNEL
from FACT_BI_BROWSER_T_01 t;

原文地址：https://www.cnblogs.com/heguoxiu/p/10120644.html

时间： 2024-10-05 20:00:19

hive分析脚步p_fact_bi_browser_t.sql

hive分析脚步p_fact_bi_browser_t.sql的相关文章

SQL Standard Based Hive Authorization（基于SQL标准的Hive授权）

应用系统如何分析和获取SQL语句的执行代码

Hive分析窗口函数

使用Hive或Impala执行SQL语句，对存储在HBase中的数据操作

使用hive分析nginx访问日志方法

同事总结的hivesql优化Hive是将符合SQL语法的字符串解析生成可以在Hadoop上执行的M

Hive分析窗口函数(二) NTILE,ROW_NUMBER,RANK,DENSE_RANK

Hive分析窗口函数(一) SUM,AVG,MIN,MAX

hive调度脚步p_fact_bi_browser_t_job.sh