linux_data_mining

#!/usr/bin/env bash

#打包
zip -r out.zip ./*

#日期循环
date="2014-10-27";
for((i=0;i<11;i++))
do
./run.sh `date -d"$date +$((i*7)) day" +%F`;
done

#差某一时候的s
date +%s -d "2015-06-01 21:30:00"

#ddb日期格式+000
from=`date -d $1 +%s000`
to=`date -d $2 +%s000`
ddb -s "select sum(buy_num) from one_order_buy_detail where order_status=1 and buy_time>=$from and buy_time"

---------------awk专区--------------------------------------
#awk 根据第一列去重,第二列累加
awk -F"," ‘{ ary[$2]+=1 } END{for(key in ary) print ary[key] "," key}‘
#和、均、max、min
cat data|awk ‘{sum+=$1} END {print "Sum = ", sum}‘
cat data|awk ‘{sum+=$1} END {print "Average = ", sum/NR}‘
cat data|awk ‘BEGIN {max = 0} {if ($1>max) max=$1 fi} END {print "Max=", max}‘
awk ‘BEGIN {min = 1999999} {if ($1<min) min=$1 fi} END {print "Min=", min}‘

awk ‘$1 == 5‘ file
awk ‘$1 == "CT"‘ file 注意必须带双引号
awk ‘$1 * $2 >100 ‘ file
awk ‘$2 >5 && $2<=15‘ file

------------------------------------------------------------

#后台运行
nohup sh -x hive_difference.sh uniq_result.txt uid_six &
nohup sh uid_youbian_address.sh &

#日期运行(指定日期范围 自动运行)
beg_s=`date -d "2015-05-19" +%s`
end_s=`date -d "2015-06-01" +%s`
while [ "$beg_s" -le "$end_s" ]
do
DATE=`date "+%F" -d @$beg_s `
beg_s=$((beg_s+86400))
done

cat result_kaijiang_2015-08-12.txt result_liushui_2015-08-12.txt| mailsender -S "一元夺宝每日流水组成-商品分布数据统计需求" -to [email protected]
echo "Test" |mailsender -S "test" -to "[email protected] [email protected]"
mailsender -to [email protected] -S "test" <all_items.txt

awk -F "\t" ‘{printf "%d\t%s\t%d\t%.4f%%\n", $1,$2,$3,$4*100;}‘ a

#文件分割 命令
split -d -l 2500000 a z

#编译java文件 导入包名格式
javac -cp .:javamail-1.4.3.jar sendmail_tongji.java

检查文件编码
file XX

#行变列
awk ‘BEGIN{OFS="\t";}{printf "%s\t",$2}END{print " "}‘ z
#列变行
cat file | paste -d "," -s

#判断文件存在
if [ ! -n "$1" ] && [ ! -n "$2" ]

#合并列,【渠道、pv、uv】
awk ‘NR==FNR{a[$1]=$2} NR>FNR&&a[$1]{print $0,a[$1]}‘ ${DATE}_qudao_uv ${DATE}_qudao_pv > jieguo

awk ‘{CMD[$1]+=$2;}END{for(k in CMD){print k,CMD[k];}}‘ a
mysql -utongji -ptongji -h10.110.9.109 -Dxparkone --default-character-set=‘gbk‘ -e "select id,prize_id, goods_name from one_goods_dict order by id;" |sed 1i|awk -F ‘\t‘ ‘BEGIN{OFS="\t";}{print $2,$1,$3}‘ |more

awk -F ‘\t‘ ‘NR==FNR{a[$1]=1}NR>FNR&&a[$3]==1{print $0}‘ kaola_dict_id xiaofei.08.16-31
awk ‘NR==FNR{a[$1]=$2} NR>FNR&&a[$1]{print $0,a[$1]}‘ all_list kaola_prize_id|awk ‘{print $2}‘>kaola_dict_id

SELECT `name`,SUM(`cost_money`) FROM `table` group by `name` having SUM(`cost_money`)>1000

select winner_uid,sum(price) from one_goods_history

ddb -s "select from one_order_buy_detail where buy_time>=1440864000000 and buy_time<1440950400000 group by uid having sum(buy_num)>=100"

cat zj_pid.xls no_zj_pid.xls |sort -n -k 5|head -1000| awk ‘$4==0‘|wc -l

while read line;do echo $line;done<tuhao_dy_10000_pid

while read line;do echo $line;sh ddb_190442 -s "select pid,create_time,city from one_player_base_info where pid=‘$line‘;" >>tuhao_1-2;done<tuhao_dy_10000_pid

sh ddb_190442 -s "select pid,city,max(buy_time) from one_order_buy_detail group by pid limit 10;"

perl -ne ‘if($_ =~ /province\":\"(\W+)\",.*zonename\":\"(\W+)\",.*city\":\"(\W+)\"}/){print "$1 $3 $2\n";}‘
curl ‘http://192.168.200.52/zoneinfo/zone.do?id=445202‘

select addrid from prize_log where aid=3 and uid=? order by exchangetime desc;

while read line;do echo $line;sh ddb_190442 -s "select pid,uid,mobile,create_time,city from one_player_base_info where pid=‘$line‘;" >>tuhao_757_detail;done<tuhao_sy_757

#转化运行时间为秒
awk ‘{printf $1" ";cmd="convert_elapsed_seconds "$3;system(cmd);}‘ origin-list >tmp-list

while read line;do curl ‘http://192.168.200.52/zoneinfo/zone.do?id=‘$line‘‘>>you_bian_addres ;done<you_bian s

#并集
awk -F ‘\t‘ ‘NR==FNR{a[$1]=$2} NR>FNR{print $0,a[$1];}‘ 10000_uid_address_GB.xls tuhao_dy_10000_uid |wc -l

大文件查找与替换
awk -F "|" ‘{if($1=="61.153.0") print NR,$0}‘ *_20150913
awk -F "|" ‘{if(NR==932973) print $0}‘ combine_gzhzbj_ipdb_chunzhen_tjf_tcey_cne_20150913
sed ‘6c 100.100.12000|CN|XXX|镇江市|t|浙江省|t|undefined‘ zz

awk -v a=$sum ‘BEGIN{FS="\t";}{printf "%d\t%s\t%d\t%.2f%%\n", NR,$1,$2,$NF/a*100}END{print " ";}‘
awk ‘{printf "%s\t%d\t%.2f\t%d\t%.2f\t%.2f%\t%.2f%\n", $1,$2,$3,$4,$5,$4/$2*100,$5/$3*100}‘ z

时间: 2024-08-05 12:36:43

linux_data_mining的相关文章