用户数据同步

文件名是今日，数据是昨天的数据。

dayid=`date -d "1 days ago" +%Y%m%d `

##############################################

# 功能：开始日期加上偏移天数后的日期字符串

# 入参：开始日期,yyyymmdd

# 偏移天数

# 返回值：开始日期加上偏移天数日期，yyyymmdd

###############################################

function toDate()

{

startdate=$1;

days=$2;

timestamp_startdate=`date -d ${startdate} +%s`

timestamp_resultdate=`expr ${timestamp_startdate} ‘+‘ ${days} ‘*‘ 86400`

resultdate=`date -d @${timestamp_resultdate} +%Y%m%d`

echo $resultdate

}

filedayid=`toDate $dayid 1`

spark-submit --class="com.zyuc.stat.iot.etl.UserInfoETL" \

--master yarn-client \

--name UserInfoETL \

--conf "spark.app.appName=UserInfoETL" \

--conf "spark.app.dataDayid=${dayid}" \

--conf "spark.app.userTable=iot_customer_userinfo" \

--conf "spark.app.syncType=full" \

--conf "spark.app.inputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/UserInfo/" \

--conf "spark.app.outputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/output/UserInfo/" \

--conf "spark.app.fileWildcard=all_userinfo_qureyes_${filedayid}*" \

--conf "spark.app.vpdnInput=/hadoop/IOT/ANALY_PLATFORM/BasicData/VPDNProvince/" \

--conf "spark.app.vpdnWildcard=vpdninfo.txt" \

--conf spark.yarn.executor.memoryOverhead=700 \

--executor-memory 2G \

--executor-cores 1 \

--num-executors 6 \

/slview/test/zcw/jars/userETL.jar

dayid=`date -d "1 days ago" +%Y%m%d `

##############################################

# 功能：开始日期加上偏移天数后的日期字符串

# 入参：开始日期,yyyymmdd

# 偏移天数

# 返回值：开始日期加上偏移天数日期，yyyymmdd

###############################################

function toDate()

{

startdate=$1;

days=$2;

timestamp_startdate=`date -d ${startdate} +%s`

timestamp_resultdate=`expr ${timestamp_startdate} ‘+‘ ${days} ‘*‘ 86400`

resultdate=`date -d @${timestamp_resultdate} +%Y%m%d`

echo $resultdate

}

filedayid=`toDate $dayid 1`

spark-submit --class="com.zyuc.stat.iot.etl.UserInfoETL" \

--master yarn-client \

--name UserInfoETL \

--conf "spark.app.appName=UserInfoETL" \

--conf "spark.app.dataDayid=${dayid}" \

--conf "spark.app.userTable=iot_customer_userinfo" \

--conf "spark.app.syncType=incr" \

--conf "spark.app.inputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/UserInfo/" \

--conf "spark.app.outputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/output/UserInfo/" \

--conf "spark.app.fileWildcard=incr_userinfo_qureyes_${filedayid}*" \

--conf "spark.app.vpdnInput=/hadoop/IOT/ANALY_PLATFORM/BasicData/VPDNProvince/" \

--conf "spark.app.vpdnWildcard=vpdninfo.txt" \

--conf spark.yarn.executor.memoryOverhead=700 \

--executor-memory 2G \

--executor-cores 1 \

--num-executors 6 \

/slview/test/zcw/jars/userETL.jar

--conf "spark.app.fileWildcard= 如下：

all_userinfo_qureyes_20170714*

incr_userinfo_qureyes_20170715*

create table iot_customer_userinfo(vpdncompanycode string, mdn string, imsicdma string, imsilte string, iccid string, imei string, company string, nettype string, vpdndomain string, isvpdn string, subscribetimeaaa string, subscribetimehlr string, subscribetimehss string, subscribetimepcrf string, firstactivetime string, userstatus string, atrbprovince string, userprovince string, crttime string, custProvince string) partitioned by (d int) stored as orc location ‘/hadoop/IOT/ANALY_PLATFORM/BasicData/output/UserInfo/data/‘;

alter table iot_customer_userinfo add IF NOT EXISTS partition(d=‘20170714‘);

shell 调度脚本：

增量数据：

$ cat userETL.sh
dayid=$1
if [ -z $dayid ] ; then
    dayid=`date -d "1 days ago" "+%Y%m%d"`
fi
##############################################
# 功能： 开始日期加上偏移天数后的日期字符串
# 入参： 开始日期,yyyymmdd
#        偏移天数
# 返回值：开始日期加上偏移天数日期，yyyymmdd
###############################################
function toDate()
{
   startdate=$1;
   days=$2;
   timestamp_startdate=`date -d ${startdate} +%s`
   timestamp_resultdate=`expr ${timestamp_startdate} ‘+‘ ${days} ‘*‘ 86400`
   resultdate=`date -d @${timestamp_resultdate} +%Y%m%d`
   echo $resultdate
}
filedayid=`toDate $dayid 1`
spark-submit --class="com.zyuc.stat.iot.etl.UserInfoETL"  --master yarn-client  --name UserInfoETL --conf "spark.app.appName=UserInfoETL" --conf "spark.app.dataDayid=${dayid}" --conf "spark.app.userTable=iot_customer_userinfo" --conf "spark.app.syncType=incr" --conf "spark.app.inputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/UserInfo/"  --conf "spark.app.outputPath=/hadoop/IOT/ANALY_PLATFORM/BasicData/output/UserInfo/"  --conf "spark.app.fileWildcard=incr_userinfo_qureyes_${filedayid}*" --conf "spark.app.vpdnInput=/hadoop/IOT/ANALY_PLATFORM/BasicData/VPDNProvince/"  --conf "spark.app.vpdnWildcard=vpdninfo.txt" --conf spark.yarn.executor.memoryOverhead=700 --executor-memory 2G --executor-cores  1 --num-executors  6 /slview/test/zcw/shell/userinfo/jars/userETL.jar >/slview/test/zcw/shell/userinfo/logs/${dayid}.log   2>&1

时间： 2024-10-10 11:07:57

用户数据同步

用户数据同步的相关文章

[Java][activiti]同步或者重构activiti identify用户数据的方法

onLaunch与onLoad同步获取用户数据

Oracle 10g通过创建物化视图实现不同数据库间表级别的数据同步

服务 - Sersync数据同步详细教程

【转】mysql数据库读写分离数据同步

搭建中小规模集群之rsync数据同步备份

Linux的rsync远程数据同步工具

Linux rsync数据同步命令解析

Oracle数据库数据同步方案