DPI流程交接.cmd-1.cp

-rw-rw-r-- 1 hzadmin hzadmin 1258939838 Jul 10 08:53 20160709001.txt
-rw-rw-r-- 1 hzadmin hzadmin 1259011023 Jul 10 08:53 20160709002.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258893627 Jul 10 08:53 20160709003.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258825284 Jul 10 08:53 20160709004.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258902330 Jul 10 08:53 20160709005.txt
-rw-rw-r-- 1 hzadmin hzadmin   12662944 Jul 10 08:53 20160709006.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258267725 Jul 11 08:48 20160710001.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258199674 Jul 11 08:48 20160710002.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258244177 Jul 11 08:48 20160710003.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258312191 Jul 11 08:48 20160710004.txt
-rw-rw-r-- 1 hzadmin hzadmin 1131040166 Jul 11 08:48 20160710005.txt
-rw-rw-r-- 1 hzadmin hzadmin 1257713262 Jul 12 09:20 20160711001.txt
-rw-rw-r-- 1 hzadmin hzadmin 1257638353 Jul 12 09:20 20160711002.txt
-rw-rw-r-- 1 hzadmin hzadmin 1257634023 Jul 12 09:20 20160711003.txt
-rw-rw-r-- 1 hzadmin hzadmin 1257516561 Jul 12 09:20 20160711004.txt
-rw-rw-r-- 1 hzadmin hzadmin 1257627299 Jul 12 09:21 20160711005.txt
-rw-rw-r-- 1 hzadmin hzadmin  553070932 Jul 12 09:21 20160711006.txt
[[email protected] bj_data]$
[[email protected] bj_data]$
[[email protected] bj_data]$ pwd
/dfs/ftp/hzadmin/bj_data
[[email protected] bj_data]$ history
   23  hdfs dfs -ls /home/hzadmin/bj_ggsn/start.log
   24  hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/
   25  hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/20160509
   26  cd ..
   27  ll
   28  crontab -l
   29  sh /home/hzadmin/bj_ggsn/start3.sh > /home/hzadmin/bj_ggsn/start.log &
   30  tail -f /home/hzadmin/bj_ggsn/start.log
   31  sh /home/hzadmin/bj_ggsn/start3.sh > /home/hzadmin/bj_ggsn/start.log &
   32  ll
   33  cd /home/hzadmin
   34  ll
   35  pwd
   36  cd bj_ggsn/
   37  ll
   38  pwd
   39  cd ../
   40  ll
   41  cd urlAPP/
   42  ll
   43  pwd
   44  crobtab -l
   45  crontab -l
   46  cd ../bj_ggsn/
   47  ll
   48  cd
   49  ll
   50  cd test
   51  ll
   52  cd ../bj_data/
   53  ll
   54  crontab -l
   55  ps -ef|grep start1
   56  tail -f /home/hzadmin/bj_ggsn/start.log
   57  more /home/hzadmin/bj_ggsn/start.log
   58  hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/all/20160509
   59  hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/
   60  hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/ALL/20160509
   61  hdfs dfs -ls /share/external_table/ssa/DPI_MBL_4G/ALL/20160509
   62  crontab -l
   63  cd ..
   64  ll
   65  cd /home/hzadmin
   66  ll
   67  cd bj_ggsn/
   68  ll
   69  cd ../urlAPP/
   70  ll
   71  cd URLAppProgram_sf
   72  ll
   73  more public.cfg
   74  ftp 132.63.10.7
   75  ll
   76  crontab -l
   77  cd trydemo
   78  ll
   79  pwd
   80  cd bj_ggsn
   81  ll
   82  cd ..
   83  ll
   84  cd  /home/hzadmin/bj_ggsn/
   85  ll
   86  pwd
   87  vi /home/hzadmin/bj_ggsn/hours.txt
   88  cd ..
   89  ll
   90  cd urlAPP
   91  ll
   92  cd ..
   93  ;ll
   94  ll
   95  pwd
   96  cd  urlAPP
   97  ll
   98  cd /home/hzadmin/urlAPP/URLAppProgram_sf
   99  ll
  100  cd ..
  101  ll
  102  cd ..
  103  ll
  104  cd  urlAPP
  105  ll
  106  cd URLAppProgram_sf
  107  ll
  108  pwd
  109  ll
  110  pwd
  111  ll
  112  vi  public.cfg
  113  cd  /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
  114  ll
  115  vi get_file_list.sh
  116  cd  /home/hzadmin/urlAPP/URLAppProgram_sf
  117  ll
  118  vi get_uacds.sh
  119  cd ProgramByDay
  120  ll
  121  vi  get_uacds.sh
  122  ll
  123  cd /home/hzadmin/urlAPP/URLAppProgram_sf
  124  ll
  125  vi public.cfg
  126  ll
  127  cd bj_data/
  128  ll
  129  cd ..
  130  ll
  131  cd /home/hzadmin
  132  ll
  133  cd bj_ggsn/
  134  ll
  135  more start
  136  more start.log
  137  ps -ef|grep start1.sh
  138  ps -ef|grep start3.sh
  139  kill -9 178805
  140  kill -9 221082
  141  ps -ef|grep start1.sh
  142  ll
  143  cd bj_data/
  144  ll
  145  cd /home/hzadmin
  146  ll
  147  cd bj_ggsn/
  148  ll
  149  crontab -l
  150  ps -ef|grep start1.sh
  151  sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log &
  152  tail -f /home/hzadmin/bj_ggsn/start.log
  153  kill -9 14886
  154  ll
  155  sh /home/hzadmin/bj_ggsn/start2.sh 20160509 > /home/hzadmin/bj_ggsn/start.log
  156  ps -ef|grep start2
  157  sh /home/hzadmin/bj_ggsn/start2.sh 20160509 > /home/hzadmin/bj_ggsn/start.log &
  158  ps -ef|grep start2
  159  ps -ef|grep start1
  160  ps -ef|grep start2
  161  ps -ef|grep start3
  162  cd /home/hzadmin
  163  ll
  164  cd bj_ggsn/
  165  ll
  166  more select1.sh
  167  more start1.sh
  168  ll
  169  cd bj_data/
  170  ll
  171  cd /home/hzadmin
  172  ll
  173  cd bj_ggsn/
  174  ll
  175  tail -f start
  176  tail -f start.log
  177  ll
  178  cd log
  179  ll
  180  tail -f 20160509_1.log
  181  hive
  182  hdfs dfs -ls /usr/local
  183  hdfs dfs -ls /
  184  hdfs dfs -chown /usr/local
  185  hdfs dfs -chown hdfs /usr/local
  186  ll
  187  cd bj_data
  188  ll
  189  cd ..
  190  ll
  191  cd test
  192  ll
  193  cd /home/hzadmin
  194  ll
  195  cd bj_ggsn/
  196  ll
  197  crontab -l
  198  sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log &
  199  hive
  200  ll
  201  cd bj_data/
  202  ll
  203  hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G
  204  hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/all/
  205  hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/ALL/
  206  ll
  207  ll
  208  cd urlAPP
  209  ll
  210  crontab -l
  211  cd /dfs/ftp/hzadmin
  212  ll
  213  cd bj_data
  214  ll
  215  vi  20160509007.txt
  216  cd ..
  217  ll
  218  vi log.txt
  219  cd t_user
  220  ll
  221  vi  phone_number.dat
  222  cd  /home/hzadmin/bj_ggsn
  223  ll
  224  vi  select2.sh
  225  vi /home/hzadmin/urlAPP/BoncRun.sh
  226  cd  /home/hzadmin/urlAPP
  227  ll
  228  cd URLAppProgram_sf
  229  ll
  230  vi  common.cfg
  231  df
  232  cd   /home/hzadmin/urlAPP/URLAppProgram_sf
  233  ll
  234  vi run.sh
  235  ll
  236  cd ProgramByDay
  237  ll
  238  vi report_summary.sh
  239  ll
  240  cd ..
  241  ll
  242  vi  match.cfg
  243  cd  ProgramByHour
  244  ll
  245  cd ..
  246  ll
  247  cd  ProgramByDay
  248  ll
  249  pwd
  250  cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
  251  ll
  252  sh ftp_getfilelist.sh
  253  cd ..
  254  ll
  255  cd ProgramByDay
  256  ll
  257  cd ..
  258  ll
  259  cd ProgramByHour
  260  ll
  261  pwd
  262  cd ..
  263  ll
  264  vi match.cfg
  265  cd ProgramByHour
  266  ll
  267  cd ..
  268  ll
  269  cd ..
  270  ll
  271  cd ResultMatch
  272  ll
  273  crontab -l
  274  exit
  275  cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
  276  ll
  277  vi get_uacds.sh
  278  cd /home/hzadmin/urlAPP/URLAppProgram_sf
  279  ll
  280  cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
  281  ll
  282  vi  get_file_list.sh
  283  get_uacds.sh
  284  cd /data3/ftp000/URLAppProgram
  285  cd ..
  286  ll
  287  cd  ProgramByDay
  288  ll
  289  cd ..
  290  ll
  291  cd ..
  292  ll
  293  cd logs
  294  ll
  295  vi  hive__20160320.log
  296  ll
  297  cd ..
  298  ll
  299  cd  /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
  300  ll
  301  cd ..
  302  ll
  303  vi R_URL_TYPE_20160510_00.txt
  304  df
  305  cd ProgramByDay;
  306  ll
  307  cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
  308  ll
  309  cd /home/hzadmin/urlAPP/URLAppProgram_sf
  310  ll
  311  ping 132.63.10.7
  312  ls -lt /dfs/ftp/hzadmin/urlAPP/ResultMatch/data
  313  df
  314  cd   /home/hzadmin/urlAPP/
  315  ll
  316  vi  hive.sh
  317  cd /home/hzadmin/bj_ggsn/
  318  ll
  319  vi  delete.sh
  320  pwd
  321  cd  /home/hzadmin/urlAPP/URLAppProgram_sf
  322  ll
  323  cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
  324  ll
  325  vi match.sh
  326  pwd
  327  ll
  328  cd /dfs/data/ugftp/ccg/
  329  ll
  330  cd /dfs/ftp/hzadmin
  331  ll
  332  cd bj_data
  333  ll
  334  pwd
  335  cd ..
  336  ll
  337  cd  urlAPP
  338  ll
  339  cd ResultMatch
  340  ll
  341  cd data
  342  ll
  343  cd ..
  344  ll
  345  cd ..
  346  ll
  347  cd ..
  348  ll
  349  cd bj_data
  350  ll
  351  cd ..
  352  ll
  353  du -sh   bj_data
  354  df
  355  df -h
  356  cd ..
  357  ll
  358  cd ..
  359  ll
  360  df
  361  ll
  362  cd /dfs/ftp/hzadmin
  363  ll
  364  cd t_user/
  365  ll
  366  cd ..
  367  ll
  368  cd /dfs/ftp/hzadmin/
  369  ll
  370  cd /home/hzadmin/
  371  ll
  372  cd bj_ggsn/
  373  ll
  374  more start1.sh
  375  more select1.sh
  376  cd /home/hzadmin
  377  ll
  378  cd
  379  ll
  380  cd bj_data/
  381  ll
  382  pwd
  383  cd ..
  384  ll
  385  cd t_user/
  386  ll
  387  cd ..
  388  ll
  389  cd urlAPP/
  390  ll
  391  cd ResultMatch/
  392  ll
  393  cd data
  394  ll
  395  cd 201605
  396  ll
  397  cd 20160530
  398  ll
  399  cd ..
  400  ll
  401  cd
  402  ll
  403  hdfs
  404  hadoop
  405  hadoop version
  406  ll
  407  cd bj_data/
  408  ll
  409  cd ..
  410  cd /home/hzadmin/
  411  ll
  412  cd bj_ggsn/
  413  ll
  414  vim start1.sh
  415  vim select1.sh
  416  vim delete.sh
  417  more start1.sh
  418  vim /home/hzadmin/urlAPP/hive.sh
  419  cd
  420  cd bj_data/
  421  ll
  422  exit
  423  ll
  424  pwd
  425  cd home
  426  cd bj_data
  427  ll
  428  cd ../
  429  ll
  430  pwd
  431  cd /home
  432  ll
  433  cd hzadmin
  434  ll
  435  cd urlAPP
  436  ll
  437  cd ..
  438  ll
  439  cd bj_data
  440  ll
  441  cd ..
  442  ll
  443  cd /home/hzadmin/bj_ggsn
  444  ll
  445  cd ..
  446  ll
  447  pwd
  448  cd bj_ggsn
  449  ll
  450  cd jar
  451  ll
  452  cd ..
  453  ll
  454  cd ..
  455  ll
  456  cd urlAPP
  457  ll
  458  vi ResultMatch
  459  cd URLAppProgram
  460  ll
  461  cd ..
  462  ll
  463  cd URLAppProgram_sf
  464  ll
  465  vi public.cfg
  466  vi run.sh
  467  ll
  468  cd  urlAPP
  469  ll
  470  cd ..
  471  cd  bj_data
  472  ll
  473  vi 20160607006.txt
  474  cat  20160607006.txt
  475  ll
  476  cd /home
  477  ll
  478  cd /home/hzadmin/bj_ggsn/start.log
  479  cd /home/hzadmin/bj_ggsn
  480  ll
  481  cat start.log
  482  cd ../
  483  ll
  484  cat /dfs/ftp/hzadmin/trydemo/log.txt
  485  crontab -l
  486  ll
  487  cd  /dfs/ftp/hzadmin/bj_data
  488  ll
  489  cat 20160607006.txt
  490  ll
  491  cd bj_data
  492  ll
  493  crontab -l
  494  cd ~
  495  ls
  496  cd /home/hzadmin/
  497  ls
  498  pwd
  499  cd ~
  500  pwd
  501  cd /home/hzadmin/
  502  cd bj_ggsn/
  503  ls
  504  vim start1.sh
  505  cd ..
  506  ls
  507  cd urlAPP/
  508  ls
  509  vim hive.sh
  510  ls
  511  cd ..
  512  ls
  513  cd urlAPP/
  514  ls
  515  cd logs
  516  ls
  517  ll
  518  cd 20160615
  519  ls
  520  ll
  521  more match_20160615_20160614.log
  522  ls
  523  more report_20160615_20160614.log
  524  cd ..
  525  ls
  526  cd ..
  527  ls
  528  cd URLAppProgram_sf/
  529  ls
  530  vim run.sh
  531  ls
  532  cd ProgramByDay/
  533  ls
  534  vim alter_table.sh
  535  ls
  536  vim create_table.sh
  537  ls
  538  vim match1.sh
  539  ls
  540  vim match.sh
  541  hive
  542  ll
  543  cd bj_data/
  544  ll
  545  rm -f ./201605*.txt
  546  ll
  547  ll|grep 201604
  548  rm -f ./201604*.txt
  549  ll
  550  ls -lt
  551  rm -f ./2015*.txt
  552  ll
  553  ls -lt
  554  rm -f ./2015*.tx
  555  ll
  556  ls -lrt
  557  ls -lt
  558  debugfs
  559  exit
  560  ll
  561  cd urlAPP/
  562  ll
  563  cd /dfs/ftp
  564  ll
  565  cd /dfs/ftp/hzadmin
  566  ll
  567  cd urlAPP/
  568  ll
  569  cd URLAppReport/
  570  LL
  571  ll
  572  cd ..
  573  ll
  574  cd UnMatchTop1000/
  575  ll
  576  cd ..
  577  ll
  578  cd ResultMatch/
  579  ll
  580  cd data/
  581  ll
  582  cd ../..
  583  l
  584  cd ..
  585  ll
  586  cd
  587  ll
  588  cd /dfs/ftp/hzadmin/
  589  ll
  590  cd /home/hzadmin/
  591  ll
  592  cd bj_ggsn/
  593  ll
  594  cd ..
  595  ll
  596  cd urlAPP/
  597  ll
  598  cd URLAppProgram
  599  cd URLAppProgram_sf
  600  cd ../URLAppProgram_sf
  601  ll
  602  cd bin
  603  ll
  604  cd ..
  605  ll
  606  pwd
  607  find .. -name "*match*"
  608  find .. -name "*match.sh"
  609  cd ../URLAppProgram_sf/ProgramByDay/match.sh
  610  cd ../URLAppProgram_sf/ProgramByDay/
  611  ll
  612  pwd
  613  ll
  614  cd bj_data/
  615  ll
  616  exit
  617  ll
  618  exit
  619  ll
  620  cd bj_data/
  621  ll
  622  cd /home/hzadmin
  623  ll
  624  cd bj_ggsn/
  625  ll
  626  sh start2.sh 20160625
  627  sh start2.sh 20160625 > start.log 2>&1 &
  628  tail -f start.log
  629  cd
  630  ll
  631  cd bj_data/
  632  ll
  633  cd /ap
  634  cd /app
  635  ll
  636  cd hadoop/con
  637  cd hadoop/etc/hadoop/
  638  ll
  639  more core-site.xml
  640  ll
  641  ll
  642  cd /home/hzadmin
  643  ll
  644  cd bj_ggsn/
  645  ll
  646  more start2.sh
  647  sh start2.sh 20160625
  648  ll
  649  sh start2.sh 20160625 > start.log 2>&1 &
  650  tail -f start.log
  651  ll
  652  more start1.sh
  653  more start2.sh
  654  ll
  655  more start.log
  656  cd /dfs/ftp/hzadmin/test/
  657  tail  start.log
  658  cd -
  659  tail -n 200 start.log
  660  ll
  661  more start3.sh
  662  sh ./start2.sh 20160625 > start.log 2>&1 &
  663  tail -f start.log
  664  cd
  665  cd test/
  666  ll
  667  cd ..
  668  ll
  669  cd bj_data/
  670  ll
  671  cd
  672  cd /home/hzadmin/
  673  ll
  674  cd bj_ggsn/
  675  ll
  676  vim start2.sh
  677  sh ./start2.sh 20160625 > start.log 2>&1 &
  678  df -h
  679  tail -f start.log
  680  ll
  681  cd bj_data/
  682  ll
  683  cd ..
  684  ll
  685  cd /home/hzadmin
  686  ll
  687  cd bj_ggsn/
  688  ll
  689  sh start2.sh 20160624 > start.log 2>&1 &
  690  ll /dfs/ftp/hzadmin/bj_data/
  691  cd
  692  ll
  693  cd bj_data/
  694  ll
  695  cd -
  696  ll
  697  cd -
  698  ll
  699  cd -
  700  cd /home/hzadmin/
  701  ll
  702  cd bj_ggsn/
  703  ll
  704  tail -f start.log
  705  ll /dfs/ftp/hzadmin/bj_data/
  706  sh start2.sh 20160625 > start.log 2>&1 &
  707  ftp 10.62.242.124
  708  ll /dfs/ftp/hzadmin/bj_data/
  709  tail -f start.log
  710  ll /dfs/ftp/hzadmin/bj_data/
  711  tail -f start.log
  712  ll
  713  ps -ef |grep start2.sh
  714  ll
  715  ll /dfs/ftp/hzadmin/bj_data/
  716  tail -f -n 100 start.log
  717  ll
  718  cd bj_data/
  719  ll
  720  cd /home/hzadmin
  721  ll
  722  cd bj_ggsn/
  723  ll
  724  sh start2.sh 20160626 > start.log 2>&1 &
  725  hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160627/match
  726  hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160627/
  727  hadoop fs -ls /
  728  hadoop fs -ls /user
  729  hadoop fs -ls /user/hzadmin
  730  hadoop fs -mkdir /user/hzadmin/extract
  731  hadoop fs -ls /user/hzadmin/
  732  exit
  733  cd ~
  734  ls
  735  cd /home/hzadmin
  736  ls
  737  spark-submit
  738  exit
  739  ls
  740  exit
  741  hadoop fs -ls /home/hzadmin
  742  hadoop fs -ls /user/hzadmin
  743  hadoop fs -rm -r /user/hzadmin/extract
  744  hadoop fs -ls /user/hzadmin
  745  exit
  746  ll
  747  cd bj_data/
  748  ll
  749  /home/spark/spark-1.2.2-bin-hadoop2.4/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
  750  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
  751  ll /home/spark/spark-1.2.2-bin-hadoop2.4/bin
  752  exit
  753  ll /home/spark/spark-1.2.2-bin-hadoop2.4/bin
  754  ll /home/spark/spark-1.2.2-bin-hadoop2.4/
  755  ll /home/spark/
  756  exit
  757  ll /home/spark/
  758  ll /home/spark/spark-1.2.2-bin-hadoop2.4/
  759   /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit
  760  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
  761  exit
  762  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
  763  yarn application -list
  764  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
  765  yarn application -list
  766  yarn application -kill application_1464150086810_7363
  767  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
  768  hadoop fs -ls /user/hzadmin
  769  hadoop fs -rm -r /user/hzadmin/extract
  770  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
  771  hadoop fs -ls /user/hzadmin/extract
  772  hadoop fs -du -h /user/hzadmin/extract
  773  hadoop fs -du -h /user/hzadmin/
  774  ls
  775  exit
  776  hadoop fs -ls /user/hzadmin
  777  hadoop fs -ls /user/hzadmin/.sparkStaging
  778  hadoop fs -ls /user/hzadmin/.sparkStaging/application_1464150086810_7363
  779  ls
  780  mkdir extract
  781  ls
  782  hadoop fs -get /user/hzadmin/extract/* /home/hzadmin/extract/
  783  ls
  784  ll -h
  785  ll extract/
  786  ls
  787  tar -zcvf extract.tar.gz extract
  788  ls
  789  ll -h
  790  exit
  791  ll
  792  cd bj_data/
  793  ll
  794  ll -h
  795  cd ..
  796  ll
  797  mkdir 6y
  798  ll
  799  cd bj_data/
  800  ll
  801  cp 201606* ../6y/
  802  ll
  803  cd ..
  804  ll
  805  rm -rf 6y
  806  ll
  807  cd 6y/
  808  ll
  809  df -h
  810  ll
  811  cd ..
  812  ll
  813  cd bj_data/
  814  ll
  815  ls |grep 201606
  816  ls |grep 201606|xargs du -h
  817  ls |grep 201606|xargs du -cb
  818  ls |grep 201606|xargs du -h
  819  ls |grep 201606|xargs du -cb
  820  ls |grep 201606|xargs du -cbh
  821  ls |grep 201603|xargs du -cbh
  822  hadoop fs -ls /user/hzadmin
  823  hadoop fs -ls /user/hzadmin/extract
  824  hadoop fs -rm -r /user/hzadmin/extract
  825  hadoop fs -ls /user/hzadmin
  826  hadoop fs -ls /user/hzadmin/.sparkStaging
  827  hadoop fs -ls /user/hzadmin/.sparkStaging/application_1464150086810_9663
  828  hadoop fs -ls /user/hzadmin/.sparkStaging/.staging
  829  hadoop fs -ls /user/hzadmin/.staging
  830  hadoop fs -ls /
  831  hadoop fs -ls /app-logs
  832  hadoop fs -ls /app-logs/hzadmin
  833  hadoop fs -ls /app-logs/hzadmin/logs
  834  hadoop fs -ls /app-logs/hzadmin/logs/application_1464150086810_9663
  835  cd ~
  836  ls
  837  cd /home/hzadmin/
  838  ls
  839  hadoop fs -get /app-logs/hzadmin/logs/application_1464150086810_9663/BD18.bd.bjtel_45454
  840  ls
  841  more BD18.bd.bjtel_45454
  842  hadoop fs -tail /app-logs/hzadmin/logs/application_1464150086810_9663/BD18.bd.bjtel_45454
  843  exit
  844  hadoop fs -ls /user/hzadmin
  845  hadoop fs -ls /user/hzadmin/extract
  846  hadoop fs -rm -r /user/hzadmin/extract
  847  hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/
  848  hadoop fs -du -h /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/
  849  hadoop fs -ls /user/hzadmin/extract
  850  hadoop fs -du -h /user/hzadmin/extract
  851  hadoop fs -du -h /user/hzadmin/
  852  hadoop fs -du -h /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/
  853  hadoop fs -du -h /user/hzadmin/
  854  hadoop fs -du -h /user/hzadmin/extract2
  855  cd /home/hzadmin
  856  ls
  857  hadoop fs -get /user/hzadmin/extract
  858  ls
  859  ls extract/
  860  hadoop fs -get /user/hzadmin/extract2
  861  ls
  862  man gz
  863  man tar
  864  ls
  865  tar -cf extract
  866  tar zcvf extract.tar.gz extract
  867  ls
  868  tar zcvf extract2.tar.gz extract2
  869  ls
  870  exit
  871  hadoop fs -ls /user/hzadmin
  872  hadoop fs -ls /user/hzadmin/extract
  873  hadoop fs -rm -r /user/hzadmin/extract
  874  hadoop fs -ls /user/hzadmin/
  875  ls
  876  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 8g --executor-cores 4 --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160705/match /user/hzadmin/extract
  877  yarn application -list
  878  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160705/match /user/hzadmin/extract
  879  hadoop fs -ls /user/hzadmin
  880  hadoop fs -ls /user/hzadmin/extract
  881  hadoop fs -rm -r /user/hzadmin/extract
  882  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160705/match /user/hzadmin/extract
  883  yarn application -list
  884  yarn application -kill application_1464150086810_9170
  885  yarn application -list
  886  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 20 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
  887  yarn application -list
  888  yarn application -kill application_1464150086810_9256
  889  history
  890  hadoop fs -ls /user/hzadmin
  891  hadoop fs -ls /user/hzadmin
  892  hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match
  893  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
  894  yarn application -list
  895  yarn application -kill application_1464150086810_9293
  896  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
  897  ls /home/spark/
  898  find /home/spark/ -name *example*jar
  899  jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i pi
  900  jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i Pi
  901  jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i SparkPi
  902  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi  --deploy-mode cluster --executor-memory 4G --num-executors 40 10
  903  find /home/spark/ -name *example*jar
  904  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi  --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
  905  yarn
  906  yarn application
  907  yarn application -list
  908  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi  --deploy-mode cluster --executor-memory 4G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
  909  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi  --deploy-mode cluster --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
  910  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi --master yarn --deploy-mode client --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
  911  hdfs dfs -ls /user/hzadmin/
  912  hdfs dfs -ls /user/hzadmin/extract
  913  hdfs dfs -rmr /user/hzadmin/extract
  914  hdfs dfs -ls /user/hzadmin/extract
  915  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
  916  yarn application -list
  917  yarn application -kill application_1464150086810_9459
  918  yarn application -list
  919  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
  920  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
  921  yarn application -list
  922  yarn application -kill application_1464150086810_9476
  923  hadoop fs -ls /user/hzadmin
  924  hadoop fs -rm -r /user/hzadmin/extract
  925  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
  926  yarn application -list
  927  yarn application -kill application_1464150086810_9481
  928  pwd
  929  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apache.spark.examples.sparkPi --master yarn --deploy-mode client --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
  930  jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep sparkPi
  931  jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i sparkpi
  932  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode client --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
  933  diagnostics: Application application_1464150086810_9496 failed 2 times due to AM Container for appattempt_1464150086810_9496_000002 exited with  exitCode: 10 due to: Exception from container-launch: org.apache.hadoop.util.Shell$ExitCodeException:
  934  org.apache.hadoop.util.Shell$ExitCodeException:
  935          at org.apache.hadoop.util.Shell.runCommand(Shell.java:505)
  936          at org.apache.hadoop.util.Shell.run(Shell.java:418)
  937          at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650)
  938          at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:195)
  939          at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:300)
  940          at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:81)
  941          at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303)
  942          at java.util.concurrent.FutureTask.run(FutureTask.java:138)
  943          at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
  944          at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
  945          at java.lang.Thread.run(Thread.java:662)
  946  Container exited with a non-zero exit code 10
  947  .Failing this attempt.. Failing the application.
  948           ApplicationMaster host: N/A
  949           ApplicationMaster RPC port: -1
  950           queue: default
  951           start time: 1467966689710
  952           final status: FAILED
  953           tracking URL: BD01.bd.bjtel:8088/cluster/app/application_1464150086810_9496
  954           user: hzadmin
  955  Exception in thread "main" org.apache.spark.SparkException: Yarn application has already ended! It might have been killed or unable to launch application master.
  956          at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:118)
  957          at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:59)
  958          at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:140)
  959          at org.apache.spark.SparkContext.<init>(SparkContext.scala:348)
  960          at org.apache.spark.examples.SparkPi$.main(SparkPi.scala:28)
  961          at org.apache.spark.examples.SparkPi.main(SparkPi.scala)
  962          at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  963          at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
  964          at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  965          at java.lang.reflect.Method.invoke(Method.java:606)
  966          at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:358)
  967          at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
  968          at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
  969  [[email protected] hzadmin]$
  970  yarn application -list
  971  hadoop fs -ls /user/hzadmin
  972  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
  973  yarn application -list
  974  yarn application -kill application_1464150086810_9663
  975  hadoop fs -ls /user/hzadmin
  976  hadoop fs -ls /user/hzadmin/extract
  977  hadoop fs -rm -r /user/hzadmin/extract
  978  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 10G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
  979  yarn application -lsit
  980  yarn application -lis
  981  yarn application -kill application_1464150086810_9732
  982  hadoop fs -rm -r /user/hzadmin/extract
  983  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 16G --executor-cores 4 --num-executors 10 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
  984  yarn application -kill  application_1464150086810_9733
  985  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 20G --executor-cores 4 --num-executors 10 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
  986  /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 10G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160707/match /user/hzadmin/extract2
  987  ls
  988  ls extract/
  989  rm -rf extract
  990  ls
  991  top
  992  ls
  993  top
  994  ll
  995  [wd
  996  pwd
  997  cd /home/hzadmin
  998  ll
  999  cd bj_ggsn/
 1000  ll
 1001  crontab -l
 1002  more start1.sh
 1003  more start2.sh
 1004  ~/bj_data/
 1005  cd ~/bj_data/
 1006  ll
 1007  cd -
 1008  ll
 1009  more start2.sh
 1010  more start1.sh
 1011  ll
 1012  cd ..
 1013  cd urlAPP/
 1014  ll
 1015  cd
 1016  ll
 1017  cd /dfs/ftp/hzadmin/
 1018  ll
 1019  cd bj_data/
 1020  ll
 1021  pwd
 1022  history
[[email protected] bj_data]$ 

--------------------------------------------------------------------------------------
[[email protected] home]$ cd hzadmin/
[[email protected] hzadmin]$
[[email protected] hzadmin]$ ll
total 28
drwxrwxr-x 3 hzadmin hzadmin 4096 Aug  5  2015 bak
drwxr-xr-x 4 hzadmin hzadmin 4096 Jun 26 19:31 bj_ggsn
drwxrwxr-x 2 hzadmin hzadmin 4096 Jul 11 14:48 extract
drwxrwxr-x 2 hzadmin hzadmin 4096 Jul 11 14:55 extract2
-rw-r--r-- 1 root    root    5485 Jun 29 10:46 process_2.10-1.0.jar
drwxrwxr-x 8 hzadmin hzadmin 4096 Jun 17 11:09 urlAPP
[[email protected] hzadmin]$ pwd
/home/hzadmin
[[email protected] hzadmin]$ cd bj_ggsn/
[[email protected] bj_ggsn]$ ll
total 136
-rwxr-xr-x 1 hzadmin hzadmin   433 Feb 10 20:39 delete.sh
-rw-r--r-- 1 hzadmin hzadmin    71 Apr 30  2015 hours.txt
drwxr-xr-x 2 root    root     4096 Aug  5  2015 jar
drwxrwxr-x 2 hzadmin hzadmin 36864 Jul 12 03:19 log
-rw------- 1 hzadmin hzadmin 21554 Apr 12 20:56 nohup.out
-rwxr-xr-x 1 hzadmin hzadmin  1845 Sep 23  2015 select1.sh
-rwxr-xr-x 1 hzadmin hzadmin   454 Oct 12  2015 select2bak.sh
-rwxr-xr-x 1 hzadmin hzadmin  1367 Oct 12  2015 select2.sh
-rwxr-xr-x 1 hzadmin hzadmin  1344 Jun 18  2015 select.sh
-rwxr-xr-x 1 hzadmin hzadmin  1337 May  4  2015 select.shbak
-rwxr-xr-x 1 hzadmin hzadmin   628 Oct 28  2015 start1.sh
-rwxr-xr-x 1 hzadmin hzadmin   692 Jun 26 19:31 start2.sh
-rwxr-xr-x 1 hzadmin hzadmin   636 May 10 14:22 start3.sh
-rwxr-xr-x 1 hzadmin hzadmin   631 Mar  5 13:27 startbak1.sh
-rw-r--r-- 1 hzadmin hzadmin 16658 Jul 12 09:21 start.log
[[email protected] bj_ggsn]$ 

[[email protected] bj_ggsn]$ crontab -l
00 03 * * * sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log
00 13 * * * sh /dfs/ftp/hzadmin/trydemo/dailycheckdemo.sh >>/dfs/ftp/hzadmin/trydemo/log.txt
[[email protected] bj_ggsn]$ pwd
/home/hzadmin/bj_ggsn
[[email protected] bj_ggsn]$
[[email protected] bj_ggsn]$ pwd
/home/hzadmin/bj_ggsn
[[email protected] bj_ggsn]$ cat start1.sh
#!/bin/sh
source ~/.bash_profile
datetime=$(date --date "1 days ago" +%Y%m%d)
cd /home/hzadmin/bj_ggsn/
sh /home/hzadmin/bj_ggsn/select1.sh $datetime  >> log/${datetime}_1.log 2>&1
sh /home/hzadmin/bj_ggsn/select2.sh $datetime  >> log/${datetime}_2.log 2>&1
hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh
sh /home/hzadmin/urlAPP/BoncRun.sh
sh /home/hzadmin/urlAPP/hive.sh $datetime
sh /home/hzadmin/bj_ggsn/delete.sh
[[email protected] bj_ggsn]$ cat select1.sh
#!/bin/bash
datetime=$1
hours=/home/hzadmin/bj_ggsn/hours.txt
s=`du -k /dfs/ftp/hzadmin/t_user/phone_number.dat|awk '{print $1}'`
datetime2=`date -d -2day +%Y%m%d`

hive -e"use dpi;alter table t_user add IF NOT EXISTS partition(receive_day='$datetime');"
if [ $s -ge 4000 ];
then
    hadoop fs -put /dfs/ftp/hzadmin/t_user/*.dat /apps/hive/warehouse/dpi.db/t_user/receive_day=$datetime/
else
    hadoop fs -mv /apps/hive/warehouse/dpi.db/t_user/receive_day=$datetime2/*.dat /apps/hive/warehouse/dpi.db/t_user/receive_day=$datetime/
fi
while read LINE
do
   hadoop fs -test -e /share/external_table/ssa/DPI_MBL/ALL/${datetime}/${LINE}
   if [ $? -eq 0 ]; then
        hive -e "use dpi;alter table bj_ggsn add partition (receive_day='${datetime}',hours='${LINE}') location '/share/external_table/ssa/DPI_MBL/ALL/${datetime}/${LINE}'" >>log/${datetime}.log 2>>log/${datetime}.log
   else
        echo 'not exist'
   fi
done < $hours
hive -e"
use dpi;
set hive.auto.convert.join=false;
set mapreduce.job.queuename=thirdpart1;
from t_user m join bj_ggsn t
 on(m.usernum = t.MDN and m.receive_day = '${datetime}' and t.receive_day = '${datetime}')
 insert overwrite table bj_ggsn_mobile
  partition (receive_day = '${datetime}')
    select regexp_extract(t.MDN,'(1[0-9]{10})') MDN,
         t.LAC,
         t.CI,
         t.IMEI,
         t.BUSITYPE,
         t.CAPTURETIME,
         t.ENDTIME,
         t.DURATION,
         t.FLOWUP,
         t.FLOWDOWN,
         t.FLOWALL,
         t.RATTYPE,
         t.TERMIANL_IP,
         t.DESTIP,
         t.STATUSCODE,
         t.USERAGENT,
         t.APN,
         t.IMSI,
         t.SGSNIP,
         t.GGSNIP,
         t.CONTENTTYPE,
         t.SOURCEPORT,
         t.DESTPORT,
         t.LOGOCODE,
         t.URL,
         t.RESULT,
         t.HOST,
         '3G',
         t.YULIU2,
         t.YULIU3;
"
[[email protected] bj_ggsn]$ cat select2.sh
hours=/home/hzadmin/bj_ggsn/hours.txt
datetime=$1
while read LINE
do
   hadoop fs -test -e /share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE}
   if [ $? -eq 0 ]; then
        hive -e "use dpi;alter table bj_ggsn_4g add partition (receive_day='${datetime}',hours='${LINE}') location '/share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE}'" >>log/${datetime}.log 2>>log/${datetime}.log
   else
        echo 'not exist'
   fi
done < $hours
hive -e"
use dpi;
set hive.auto.convert.join=false;
set mapreduce.job.queuename=thirdpart1;
from t_user m join bj_ggsn_4g t
 on(m.usernum = t.MDN and m.receive_day = '${datetime}' and t.receive_day = '${datetime}')
 insert into table bj_ggsn_mobile
  partition (receive_day = '${datetime}')
    select regexp_extract(t.MDN,'(1[0-9]{10})') MDN,
         t.LAC,
         t.CI,
         t.IMEI,
         t.BUSITYPE,
         t.CAPTURETIME,
         t.ENDTIME,
         t.DURATION,
         t.FLOWUP,
         t.FLOWDOWN,
         t.FLOWALL,
         t.RATTYPE,
         t.TERMIANL_IP,
         t.DESTIP,
         t.STATUSCODE,
         t.USERAGENT,
         t.APN,
         t.IMSI,
         t.SGSNIP,
         t.GGSNIP,
         t.CONTENTTYPE,
         t.SOURCEPORT,
         t.DESTPORT,
         t.LOGOCODE,
         t.URL,
         t.RESULT,
         t.HOST,
         '4G',
         t.YULIU2,
         t.YULIU3;
"
[[email protected] bj_ggsn]$ cat /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh
#!/bin/bash

cd `dirname $0`
cd ProgramByDay/
./get_file_list.sh
./get_uacds.sh
[[email protected] bj_ggsn]$ cd /home/hzadmin/urlAPP/URLAppProgram_sf/
[[email protected] URLAppProgram_sf]$ ll
total 129348
drwxr-xr-x 2 hzadmin hzadmin     4096 Jun 10  2015 bin
-rwxr-xr-x 1 hzadmin hzadmin     3017 Sep 28  2015 common.cfg
-rwxr-xr-x 1 hzadmin hzadmin      200 Nov  7  2014 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin       80 May 10 14:21 get_uacds.sh
-rw-rw-r-- 1 hzadmin hzadmin       33 Jul 11 21:01 match.cfg
drwxr-xr-x 2 hzadmin hzadmin     4096 Jul 12 04:35 ProgramByDay
drwxr-xr-x 2 hzadmin hzadmin     4096 Jun 10  2015 ProgramByHour
-rwxr-xr-x 1 hzadmin hzadmin      741 Jul 14  2015 public.cfg
-rw-rw-r-- 1 hzadmin hzadmin   721256 Jul 11 21:01 R_APP_TYPE_20160711_00.txt
-rwxr-xr-x 1 hzadmin hzadmin      728 Nov  7  2014 reload.sh
-rwxr-xr-x 1 hzadmin hzadmin     4705 May  6  2015 remove_files.sh
-rw-rw-r-- 1 hzadmin hzadmin     4500 Jul 11 21:01 R_NOISE_TYPE_20160711_00.txt
-rw-rw-r-- 1 hzadmin hzadmin  1426612 Jul 11 21:01 R_SITE_TYPE_20160711_00.txt
-rwxr-xr-x 1 hzadmin hzadmin     6966 Jun 15  2015 rule.xml
-rwxr-xr-x 1 hzadmin hzadmin     6301 Sep 28  2015 runbak.sh
-rwxr-xr-x 1 hzadmin hzadmin     6291 May  7  2015 run.sh
-rw-rw-r-- 1 hzadmin hzadmin  1060990 Jul 11 21:01 R_URL_TYPE_20160711_00.txt
-rw-rw-r-- 1 hzadmin hzadmin 32290292 Jul 11 21:01 UACDS_20160711_00_01_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32233495 Jul 11 21:00 UACDS_20160711_00_01.jar
-rw-rw-r-- 1 hzadmin hzadmin 32339441 Jul 11 21:01 UACDS_20160711_00_02_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32282651 Jul 11 21:00 UACDS_20160711_00_02.jar
[[email protected] URLAppProgram_sf]$ cd ProgramByDay/
[[email protected] ProgramByDay]$ ll
total 132
-rwxr-xr-x 1 hzadmin hzadmin  1846 May 11  2015 alter_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20  2015 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 18168 Jun  8  2015 create_table.sh.bak
-rwxr-xr-x 1 hzadmin hzadmin  1280 Jun 16  2015 drop_table.sh
-rwxr-xr-x 1 hzadmin hzadmin   291 Jul 14  2015 get_file_list.sh
-rwxr-xr-x 1 hzadmin hzadmin  2279 Jul 14  2015 get_uacds.sh
-rwxr-xr-x 1 hzadmin hzadmin  4389 May  7  2015 label.sh
-rwxr-xr-x 1 hzadmin hzadmin   604 Nov  7  2014 load_data.sh
-rwxr-xr-x 1 hzadmin hzadmin  1011 Nov  7  2014 logupload.sh
-rwxr-xr-x 1 hzadmin hzadmin  2829 Aug  4  2015 match1.sh
-rwxr-xr-x 1 hzadmin hzadmin  2908 Sep 28  2015 matchbak.sh
-rwxr-xr-x 1 hzadmin hzadmin  2820 May  6  2015 match.sh
-rwxr-xr-x 1 hzadmin hzadmin  6788 Jun  8  2015 report.sh
-rwxr-xr-x 1 hzadmin hzadmin  2060 May  6  2015 report_summary.sh
-rw-rw-r-- 1 hzadmin hzadmin   144 Jul 16  2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   147 Jul 16  2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   144 Jul 16  2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   146 Jul 16  2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   151 Jul 16  2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   147 Jul 16  2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz
-rwxr-xr-x 1 hzadmin hzadmin  4691 Nov  7  2014 upload.sh
-rw-rw-r-- 1 hzadmin hzadmin   166 Jul 16  2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz
[[email protected] ProgramByDay]$ cat get_file_list.sh
#!/bin/bash

cd `dirname $0`

eval $(grep FTP_DATA_PATH ../public.cfg)
eval $(grep FTP_IP ../public.cfg)
eval $(grep FTP_USERNAME ../public.cfg)
eval $(grep FTP_PWD ../public.cfg)

ftp -n<<!
open $FTP_IP
user $FTP_USERNAME $FTP_PWD
cd $FTP_DATA_PATH
mdir $FTP_DATA_PATH ftp_con.txt
bye
!
[[email protected] ProgramByDay]$ cat ../public.cfg
#前置机存放分类匹配程序压缩包路径
FTP_DATA_PATH=/data3/ftp000/URLAppProgram;

#存放分类匹配程序压缩包路径
DATA_HOME=/home/hzadmin/urlAPP/URLAppProgram;

#集团分类匹配程序压缩包前置机IP
FTP_IP=132.63.10.7;

#集团前置机FTP用户名
FTP_USERNAME=ftp811;

#集团前置机FTP用户密码
FTP_PWD=ftp811!123;

#存放Top1000未匹配记录文件路径
UnMatchTop1000=/home/hzadmin/urlAPP/UnMatchTop1000
#存放urlApp统计报表文件路径
URLAppReport=/home/hzadmin/urlAPP/URLAppReport

#存放匹配程序压缩包上限
ZIP_LIMIT=10;

#存放Top1000、urlApp统计报表存储上限
REPORT_LIMIT=10;

#存放hadoop文件存储上限
DELETE_DAY=4;

#存放汇总match文件上限
SUMMARY_DAY=7

[[email protected] ProgramByDay]$ cat ftp_con.txt
cat: ftp_con.txt: No such file or directory
[[email protected] ProgramByDay]$ ll
total 132
-rwxr-xr-x 1 hzadmin hzadmin  1846 May 11  2015 alter_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20  2015 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 18168 Jun  8  2015 create_table.sh.bak
-rwxr-xr-x 1 hzadmin hzadmin  1280 Jun 16  2015 drop_table.sh
-rwxr-xr-x 1 hzadmin hzadmin   291 Jul 14  2015 get_file_list.sh
-rwxr-xr-x 1 hzadmin hzadmin  2279 Jul 14  2015 get_uacds.sh
-rwxr-xr-x 1 hzadmin hzadmin  4389 May  7  2015 label.sh
-rwxr-xr-x 1 hzadmin hzadmin   604 Nov  7  2014 load_data.sh
-rwxr-xr-x 1 hzadmin hzadmin  1011 Nov  7  2014 logupload.sh
-rwxr-xr-x 1 hzadmin hzadmin  2829 Aug  4  2015 match1.sh
-rwxr-xr-x 1 hzadmin hzadmin  2908 Sep 28  2015 matchbak.sh
-rwxr-xr-x 1 hzadmin hzadmin  2820 May  6  2015 match.sh
-rwxr-xr-x 1 hzadmin hzadmin  6788 Jun  8  2015 report.sh
-rwxr-xr-x 1 hzadmin hzadmin  2060 May  6  2015 report_summary.sh
-rw-rw-r-- 1 hzadmin hzadmin   144 Jul 16  2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   147 Jul 16  2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   144 Jul 16  2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   146 Jul 16  2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   151 Jul 16  2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   147 Jul 16  2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz
-rwxr-xr-x 1 hzadmin hzadmin  4691 Nov  7  2014 upload.sh
-rw-rw-r-- 1 hzadmin hzadmin   166 Jul 16  2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz
[[email protected] ProgramByDay]$ pwd
/home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
[[email protected] ProgramByDay]$ grep FTP_PWD ../public.cfg
FTP_PWD=ftp811!123;
[[email protected] ProgramByDay]$ id
uid=526(hzadmin) gid=526(hzadmin) groups=526(hzadmin),547(spark)
[[email protected] ProgramByDay]$ 

[[email protected] URLAppProgram_sf]$ pwd
/home/hzadmin/urlAPP/URLAppProgram_sf
[[email protected] URLAppProgram_sf]$ cd ProgramByDay/
[[email protected] ProgramByDay]$ ll
total 132
-rwxr-xr-x 1 hzadmin hzadmin  1846 May 11  2015 alter_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20  2015 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 18168 Jun  8  2015 create_table.sh.bak
-rwxr-xr-x 1 hzadmin hzadmin  1280 Jun 16  2015 drop_table.sh
-rwxr-xr-x 1 hzadmin hzadmin   291 Jul 14  2015 get_file_list.sh
-rwxr-xr-x 1 hzadmin hzadmin  2279 Jul 14  2015 get_uacds.sh
-rwxr-xr-x 1 hzadmin hzadmin  4389 May  7  2015 label.sh
-rwxr-xr-x 1 hzadmin hzadmin   604 Nov  7  2014 load_data.sh
-rwxr-xr-x 1 hzadmin hzadmin  1011 Nov  7  2014 logupload.sh
-rwxr-xr-x 1 hzadmin hzadmin  2829 Aug  4  2015 match1.sh
-rwxr-xr-x 1 hzadmin hzadmin  2908 Sep 28  2015 matchbak.sh
-rwxr-xr-x 1 hzadmin hzadmin  2820 May  6  2015 match.sh
-rwxr-xr-x 1 hzadmin hzadmin  6788 Jun  8  2015 report.sh
-rwxr-xr-x 1 hzadmin hzadmin  2060 May  6  2015 report_summary.sh
-rw-rw-r-- 1 hzadmin hzadmin   144 Jul 16  2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   147 Jul 16  2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   144 Jul 16  2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   146 Jul 16  2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   151 Jul 16  2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   147 Jul 16  2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz
-rwxr-xr-x 1 hzadmin hzadmin  4691 Nov  7  2014 upload.sh
-rw-rw-r-- 1 hzadmin hzadmin   166 Jul 16  2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz
[[email protected] ProgramByDay]$ cat get_uacds.sh
#!/bin/bash
#===========================================
#*
#* Copyright BONC
#* All rights reserved.
#* Abstract: Download file from the group to local
#* FileName: get_uacds.sh
#* Author: LiangWei
#* Create Time: 2014-02-22
#* Mender:
#* Mender Time:
#* Modify content:
#*
#============================================
cd `dirname $0`
eval $(grep FTP_DATA_PATH ../public.cfg)
eval $(grep DATA_HOME ../public.cfg)
eval $(grep FTP_IP ../public.cfg)
eval $(grep FTP_USERNAME ../public.cfg)
eval $(grep FTP_PWD ../public.cfg)
LOCAL=`pwd`
$LOCAL/get_file_list.sh

YY=`date +%Y`
MM=`date +%m`
DD=`date +%d`
DATE=$YY$MM$DD

cd $LOCAL

awk '{print $9}' ftp_con.txt > grep.txt
e=`wc -l grep.txt | cut -d ' ' -f 1`
for ((m=1;m<=e;m++))
do
 grepstr='sed -n '$m'p grep.txt'
 greps=`$grepstr`
greps2=`expr substr $greps 1 8`

greps3=`expr substr $greps 18 7`

if [[ "$greps2" = 'UACDS_20' && "$greps3" = '.tar.gz' ]]; then
  echo $greps >> grep2
fi
done

cut -d '_' -f 2 grep2 | uniq | sort -r > day
a=`wc -l day | cut -d ' ' -f 1` 

sort -r grep2 > mu

  strd='sed -n 1p day'
  str=`$strd`
  b=`wc -l mu | cut -d ' ' -f 1`
  for ((j=1;j<=b;j++))
   do
    str1='sed -n '$j'p mu '
    echo `$str1` > str2
   str2='str2'
    str3=`cut -d '_' -f 2 str2`
     if [ "$str"x = "$str3"x ];
       then
       cat $str2 >> files  #del
      break
     fi
  done

c=`wc -l files | cut -d ' ' -f 1`
for ((k=1;k<=c;k++))
 do
  mystr='sed -n '$k'p files'
  myFile=`$mystr`
  cd $DATA_HOME
   if [ -f $myFile ]; then
   echo $myFile
   else
   cd $LOCAL
   echo $myFile > files2.txt #del
  fi
done

logDir=${DATA_HOME}/logs
logFile=${DATA_HOME}/'logs'/${DATE}.log

mkdir -p $logDir

d=`wc -l files2.txt | cut -d ' ' -f 1`

for ((l=1;l<=d;l++))
do

echo "Begin at: `date` ;" | tee -a $logFile
time_begin=$(date +%s )
str4='sed -n '$l'p files2.txt'
DATAFILES=`$str4`
ftp -n<<!
open $FTP_IP
user $FTP_USERNAME $FTP_PWD
bin
prompt
cd $FTP_DATA_PATH
lcd $DATA_HOME
mget $DATAFILES
bye
!
time_end=$(date +%s )
time_total=`expr ${time_end} - ${time_begin}`
echo "End at: `date`;" | tee -a $logFile
echo "DownLoadfilename: $DATAFILES  total time=${time_total} s ;" | tee -a $logFile
done

cd $LOCAL
rm day
rm files
rm ftp_con.txt
rm grep.txt
rm grep2
rm mu
rm str2
echo "$LOCAL";
sh $LOCAL/load_data.sh
[[email protected] ProgramByDay]$ 

[[email protected] ProgramByDay]$ ll
total 132
-rwxr-xr-x 1 hzadmin hzadmin  1846 May 11  2015 alter_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20  2015 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 18168 Jun  8  2015 create_table.sh.bak
-rwxr-xr-x 1 hzadmin hzadmin  1280 Jun 16  2015 drop_table.sh
-rwxr-xr-x 1 hzadmin hzadmin   291 Jul 14  2015 get_file_list.sh
-rwxr-xr-x 1 hzadmin hzadmin  2279 Jul 14  2015 get_uacds.sh
-rwxr-xr-x 1 hzadmin hzadmin  4389 May  7  2015 label.sh
-rwxr-xr-x 1 hzadmin hzadmin   604 Nov  7  2014 load_data.sh
-rwxr-xr-x 1 hzadmin hzadmin  1011 Nov  7  2014 logupload.sh
-rwxr-xr-x 1 hzadmin hzadmin  2829 Aug  4  2015 match1.sh
-rwxr-xr-x 1 hzadmin hzadmin  2908 Sep 28  2015 matchbak.sh
-rwxr-xr-x 1 hzadmin hzadmin  2820 May  6  2015 match.sh
-rwxr-xr-x 1 hzadmin hzadmin  6788 Jun  8  2015 report.sh
-rwxr-xr-x 1 hzadmin hzadmin  2060 May  6  2015 report_summary.sh
-rw-rw-r-- 1 hzadmin hzadmin   144 Jul 16  2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   147 Jul 16  2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   144 Jul 16  2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   146 Jul 16  2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   151 Jul 16  2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin   147 Jul 16  2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz
-rwxr-xr-x 1 hzadmin hzadmin  4691 Nov  7  2014 upload.sh
-rw-rw-r-- 1 hzadmin hzadmin   166 Jul 16  2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz
[[email protected] ProgramByDay]$
[[email protected] ProgramByDay]$
[[email protected] ProgramByDay]$
[[email protected] ProgramByDay]$
[[email protected] ProgramByDay]$
[[email protected] ProgramByDay]$
[[email protected] ProgramByDay]$
[[email protected] ProgramByDay]$ cat load_data.sh
#!/bin/bash

#*=================================================
#*
#* FileName  : load_data.sh
#* CreateDate: 2014-02-25
#* Abstract  : Unzip the file 'UACDS_YYYYMMDD_**.tar.gz'
#* Author    : LiangWei
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
PWDNOW=`pwd`
eval $(grep DATA_HOME ../public.cfg)
cd ..
LOC=`pwd`
cd ${PWDNOW}

a=`wc -l files2.txt | cut -d ' ' -f 1`

if [ "$a" == 1 ]; then

str=`sed -n 1p files2.txt`

rm -f ${LOC}/match.cfg
rm -f ${LOC}/UACDS*.jar
rm -f ${LOC}/R_*_TYPE*.txt

tar xzvf $DATA_HOME/$str -C ${LOC}
fi
rm files2.txt
[[email protected] ProgramByDay]$ 

[[email protected] URLAppProgram_sf]$ pwd
/home/hzadmin/urlAPP/URLAppProgram_sf
[[email protected] URLAppProgram_sf]$ ll
total 129348
drwxr-xr-x 2 hzadmin hzadmin     4096 Jun 10  2015 bin
-rwxr-xr-x 1 hzadmin hzadmin     3017 Sep 28  2015 common.cfg
-rwxr-xr-x 1 hzadmin hzadmin      200 Nov  7  2014 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin       80 May 10 14:21 get_uacds.sh
-rw-rw-r-- 1 hzadmin hzadmin       33 Jul 11 21:01 match.cfg
drwxr-xr-x 2 hzadmin hzadmin     4096 Jul 12 04:35 ProgramByDay
drwxr-xr-x 2 hzadmin hzadmin     4096 Jun 10  2015 ProgramByHour
-rwxr-xr-x 1 hzadmin hzadmin      741 Jul 14  2015 public.cfg
-rw-rw-r-- 1 hzadmin hzadmin   721256 Jul 11 21:01 R_APP_TYPE_20160711_00.txt
-rwxr-xr-x 1 hzadmin hzadmin      728 Nov  7  2014 reload.sh
-rwxr-xr-x 1 hzadmin hzadmin     4705 May  6  2015 remove_files.sh
-rw-rw-r-- 1 hzadmin hzadmin     4500 Jul 11 21:01 R_NOISE_TYPE_20160711_00.txt
-rw-rw-r-- 1 hzadmin hzadmin  1426612 Jul 11 21:01 R_SITE_TYPE_20160711_00.txt
-rwxr-xr-x 1 hzadmin hzadmin     6966 Jun 15  2015 rule.xml
-rwxr-xr-x 1 hzadmin hzadmin     6301 Sep 28  2015 runbak.sh
-rwxr-xr-x 1 hzadmin hzadmin     6291 May  7  2015 run.sh
-rw-rw-r-- 1 hzadmin hzadmin  1060990 Jul 11 21:01 R_URL_TYPE_20160711_00.txt
-rw-rw-r-- 1 hzadmin hzadmin 32290292 Jul 11 21:01 UACDS_20160711_00_01_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32233495 Jul 11 21:00 UACDS_20160711_00_01.jar
-rw-rw-r-- 1 hzadmin hzadmin 32339441 Jul 11 21:01 UACDS_20160711_00_02_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32282651 Jul 11 21:00 UACDS_20160711_00_02.jar
[[email protected] URLAppProgram_sf]$ cat run.sh
#!/bin/bash
#run.sh
#*=================================================
#*
#* FileName : run.sh
#* CreateDate: 2014-04-03
#* Abstract : Overall deployment schedule
#* Author : LiBin
#*
#* BONC All rights reserved.
#*==================================================

cd `dirname $0`

eval $(grep DEFAULT_TIME common.cfg)
eval $(grep LOC_DIR common.cfg)
eval $(grep DELAY_DAY common.cfg)
eval $(grep DELAY_HOUR common.cfg)
eval $(grep IS_LTBAL common.cfg)

dataday=`date -d -${DELAY_DAY}days-${DELAY_HOUR}hours +%Y%m%d`
datahour=`date -d -${DELAY_HOUR}hours +%H`

if [ $# -eq 2 ] ; then

		if [ ${DEFAULT_TIME} = 'day' ] ; then
        echo "Input parameter error : there should be 1 parameters";
        exit 1;
    else
    		dataday=$1;
				datahour=$2;
		fi;
elif [ $# -eq 1 ] ; then

		if [ ${DEFAULT_TIME} = 'hour' ] ; then
        echo "Input parameter error : there should be 2 parameters";
        exit 1;
    else
    		dataday=$1;
		fi;
fi;

DAY=`date +%Y%m%d`

if [ ! -d "${LOC_DIR}/logs/${DAY}" ] ; then
        mkdir -p "${LOC_DIR}/logs/${DAY}"
fi;
DEL_DAY=`date -d -10days +%Y%m%d`
if [ -d "${LOC_DIR}/logs/${DEL_DAY}" ] ; then
        rm -rf "${LOC_DIR}/logs/${DEL_DAY}"
fi;

if [ ${DEFAULT_TIME} = 'day' ] ; then

echo "===========================================================================";
echo "========== The program is running , please keep the network flow ...";
echo "========== running model ${DEFAULT_TIME}" : ${dataday};
echo "===========================================================================";

cd `dirname $0`

echo "========== Step 1 of 2 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday".log ";
./ProgramByDay/match.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday".log 2>&1

echo "========== Step 2 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday".log ";
./ProgramByDay/report_summary.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday".log 2>&1
./ProgramByDay/report.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday".log 2>&1

if [ ${IS_LTBAL} = '1' ] ; then
./ProgramByDay/label.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/label_"$DAY"_"$dataday".log 2>&1
fi

echo "========== Step 3 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday".log ";
./ProgramByDay/upload.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday".log 2>&1

./ProgramByDay/logupload.sh ${DAY}

exit 0;
fi;

if [[ ${datahour} = '00' ]] ; then

echo "===========================================================================";
echo "========== The program is running , please keep the network flow ...";
echo "========== running model ${DEFAULT_TIME}" : ${dataday} ${datahour};
echo "===========================================================================";

echo "========== Step 1 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/get_"$DAY"_"$dataday"_"$datahour".log ";

./ProgramByDay/ftp_getfilelist.sh > ${LOC_DIR}/logs/${DAY}/get_"$DAY"_"$dataday"_"$datahour".log 2>&1

echo "========== Step 2 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log ";

./ProgramByHour/match_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log 2>&1

echo "========== Step 3 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log ";

./ProgramByHour/report_summary_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday"_"$datahour".log 2>&1
./ProgramByHour/report_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log 2>&1

if [[ ${datahour} = '23' ]] ; then

echo "===========================================================================";
echo "========== The program is running , please keep the network flow ...";
echo "========== running model ${DEFAULT_TIME}" : ${dataday} ${datahour};
echo "===========================================================================";

echo "========== Step 1 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log ";

./ProgramByHour/match_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log 2>&1

echo "========== Step 2 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log ";

./ProgramByHour/report_summary_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday"_"$datahour".log 2>&1
./ProgramByHour/report_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log 2>&1

echo "========== Step 3 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday"_"$datahour".log ";

if [ ${IS_LTBAL} = '1' ] ; then
./ProgramByDay/label.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/label_"$DAY"_"$dataday".log 2>&1
fi

./ProgramByHour/upload_H.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday"_"$datahour".log 2>&1

else

echo "===========================================================================";
echo "========== The program is running , please keep the network flow ...";
echo "========== running model ${DEFAULT_TIME}" : ${dataday} ${datahour};
echo "===========================================================================";

echo "========== Step 1 of 2 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log ";

./ProgramByHour/match_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log 2>&1

echo "========== Step 2 of 2 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log ";

./ProgramByHour/report_summary_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday"_"$datahour".log 2>&1
./ProgramByHour/report_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log 2>&1

fi;
./remove_files.sh > ${LOC_DIR}/logs/${DAY}/remove_"$DAY"_"$dataday"_"$datahour".log 2>&1
./ProgramByDay/logupload.sh ${DAY}[[email protected] URLAppProgram_sf]$
[[email protected] URLAppProgram_sf]$

[hzadm[email protected] ProgramByDay]$ cat match.sh
#!/bin/bash
# match.sh
######################################################################################################
# function : 匹配程序运行、统计报表出数据
# date: 2014/02/10
# author: SPP
# param:day(数据日期)
######################################################################################################

cd `dirname $0`
cd ..
PWDNOW=`pwd`
cd `dirname $0`
eval $(grep RULE_PROV_VERNO ${PWDNOW}/match.cfg)
eval $(grep URL_MATCH ${PWDNOW}/common.cfg)
eval $(grep URL_INPUT_PATH ${PWDNOW}/common.cfg)
eval $(grep DPI_CONF_PATH ${PWDNOW}/common.cfg)
eval $(grep R_URL_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_APP_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_NOISE_TYPE ${PWDNOW}/common.cfg)
eval $(grep HIVE_USER ${PWDNOW}/common.cfg)
eval $(grep LOC_DIR ${PWDNOW}/common.cfg)
eval $(grep HIVE_LICENSE ${PWDNOW}/common.cfg)
eval $(grep MR_VERSION ${PWDNOW}/common.cfg)
eval $(grep PARA_JAR ${PWDNOW}/common.cfg)
eval $(grep PARA_HIVE ${PWDNOW}/common.cfg)
eval $(grep R_SITE_TYPE ${PWDNOW}/common.cfg)
#判断参数是否输入
if [ $# -ne 1 ] ; then
        echo "Input parameter error : there should be 1 parameters";
        exit 1;
fi;

day=$1

hadoop fs -ls ${URL_INPUT_PATH}${day} > exist_test ;

x=`wc -l exist_test | cut -d ' ' -f 1`;

if  [ ${x} = 0 ] ; then
echo " HDFS DIR ERROR : ${URL_INPUT_PATH}${day} file is not exist !"
rm -f exist_test
exit 1;
fi;
rm -f exist_test

hadoop fs -rm ${R_URL_TYPE}R_URL_TYPE*.txt
hadoop fs -rm ${R_APP_TYPE}R_APP_TYPE*.txt
hadoop fs -rm ${R_NOISE_TYPE}R_NOISE_TYPE*.txt
hadoop fs -rm ${R_SITE_TYPE}R_SITE_TYPE*.txt

hadoop fs -put ${PWDNOW}/R_URL_TYPE*.txt ${R_URL_TYPE}
hadoop fs -put ${PWDNOW}/R_APP_TYPE*.txt ${R_APP_TYPE}
hadoop fs -put ${PWDNOW}/R_NOISE_TYPE*.txt ${R_NOISE_TYPE}
hadoop fs -put ${PWDNOW}/R_SITE_TYPE*.txt ${R_SITE_TYPE}

echo "${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar";
hadoop jar ${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar com.bonc.mapred.UserurlAllMain ${PARA_JAR} ${URL_INPUT_PATH}${day} ${URL_MATCH}${day} $PWDNOW/${DPI_CONF_PATH}

#hadoop fs -rm ${url_match}${day}/part-m-*.gz

hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
set dpi.encode.license=${HIVE_LICENSE};
${PARA_HIVE}

set mapred.job.name=CMSS-COUNT;
alter table  dpi_http_dtl_mark_match drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_noise drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_unmatch drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_match add partition (receive_day='${day}') location '${day}/match';
alter table dpi_http_dtl_mark_noise add partition (receive_day='${day}') location '${day}/noise';
alter table dpi_http_dtl_mark_unmatch add partition (receive_day='${day}') location '${day}/unmatch';
"

[[email protected] ProgramByDay]$ 

[[email protected] URLAppProgram_sf]$ cat match.cfg
RULE_PROV_VERNO=UACDS_20160711_00
[[email protected] URLAppProgram_sf]$
[[email protected] URLAppProgram_sf]$ cat common.cfg
##匹配程序的输出路径,也是hive匹配表、噪音表、未识别表的hive文件路径
URL_MATCH=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/ 

##hive统计汇总表
MATCH_SUMMARY=/share/hzadmin/urlapp/spp/dpi_http_dtl_mark_match_summary/
UNMATCH_SUMMARY=/share/hzadmin/urlapp/spp/dpi_http_dtl_mark_unmatch_summary/

##hive统计报表、top1000的hadoop文件路径
URL_SUMMARY=         /share/hzadmin/urlapp/spp/dpi_http_mark_summary/
URL_RULE=            /share/hzadmin/urlapp/spp/dpi_http_mark_rule/
URL_TYPECODE=        /share/hzadmin/urlapp/spp/dpi_http_mark_type/
URL_UNMATCH_TOP1000= /share/hzadmin/urlapp/spp/dpi_http_mark_top1000/
SITE_RULE=           /share/hzadmin/urlapp/spp/dpi_site_mark_rule/
SITE_TYPECODE=       /share/hzadmin/urlapp/spp/dpi_site_mark_type/
SITE_UNMATCH_TOP1000=/share/hzadmin/urlapp/spp/dpi_site_mark_top1000/

##分类标准库的hadoop文件路径
R_URL_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/url_type/
R_APP_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/app_type/
R_NOISE_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/noise_type/
R_SITE_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/site_type/

##中间表hadoop文件路径
TEMP_DPI_MATCH=/share/hzadmin/urlapp/spp/temp_dpi_match
TEMP_DPI_NOISE=/share/hzadmin/urlapp/spp/temp_dpi_noise
TEMP_DPI_UNMATCH=/share/hzadmin/urlapp/spp/temp_dpi_unmatch
TEMP_DPI_URL=/share/hzadmin/urlapp/spp/temp_dpi_url
TEMP_DPI_APP=/share/hzadmin/urlapp/spp/temp_dpi_app
TEMP_DPI_SITE=/share/hzadmin/urlapp/spp/temp_dpi_site

##客户标签表路径(省份无客户标签可不进行配置)
LTBAL=/share/hzadmin/urlapp/spp/CDPI_USER_BEH_PREFER_D/
LTBAL_tmp=/share/hzadmin/urlapp/spp/CDPI_USER_BEH_PREFER_D_tmp/

##dpi原始文件在hadoop上的路径,即匹配程序的输入路径
URL_INPUT_PATH=/share/hzadmin/external_table/DMP_SSA/DPI/
#URL_INPUT_PATH="/apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day"="

##配置dpi字段配置文件路径,为linux路径
DPI_CONF_PATH=rule.xml

##本省省份编码:比如上海编码为831
AREA_NO=811

##重传次数
REUPLOAD_COUNT=00

##生成上传文件路径
UNMATCHTOP1000=/dfs/ftp/hzadmin/urlAPP/UnMatchTop1000
URLAPPREPORT=/dfs/ftp/hzadmin/urlAPP/URLAppReport

#本地程序存放目录
LOC_DIR=/home/hzadmin/urlAPP

#HIVE用户规范,未指定:default,指定用户填写用户名
HIVE_USER=dpi
#MapReduce框架版本号
MR_VERSION=_02_1
#匹配输出是否加密 0 :否  1 : 是
ISENCODE=0
#HIVE文件加密密钥
HIVE_LICENSE=DpiBonc
#执行JAR包时的特定参数
PARA_JAR='-D mapred.job.queue.name=thirdpart1'
#执行HIVE时的特定参数
PARA_HIVE='set hive.auto.convert.join=false;set mapreduce.job.queuename=thirdpart1;'

#原数据执行类型,按小时跑:hour,按天跑:day
DEFAULT_TIME=day
#DPI数据比运行时间延迟的天数
DELAY_DAY=1
#hour模式下,DPI数据比运行时间延迟的小时数
DELAY_HOUR=0

##本省是否配置客户标签程序,是:1,否:0
IS_LTBAL=0

[[email protected] URLAppProgram_sf]$ 

[[email protected] ~]# hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/20160711
Found 706 items
-rw-r--r--   2 hzadmin hdfs  852456797 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000000_0.snappy
-rw-r--r--   2 hzadmin hdfs 1025069938 2016-07-12 04:11 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000000_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  816682614 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000001_0.snappy
-rw-r--r--   2 hzadmin hdfs 1030962113 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000001_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  922216071 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000002_0.snappy
-rw-r--r--   2 hzadmin hdfs 1018908652 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000002_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  873706406 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000003_0.snappy
-rw-r--r--   2 hzadmin hdfs 1025021048 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000003_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  876314487 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000004_0.snappy
-rw-r--r--   2 hzadmin hdfs 1007005145 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000004_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  886130165 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000005_0.snappy
-rw-r--r--   2 hzadmin hdfs 1017040305 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000005_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  870758798 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000006_0.snappy
-rw-r--r--   2 hzadmin hdfs 1075565204 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000006_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  853730203 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000007_0.snappy
-rw-r--r--   2 hzadmin hdfs  990414241 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000007_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  836870377 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000008_0.snappy
-rw-r--r--   2 hzadmin hdfs 1003709447 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000008_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  849027164 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000009_0.snappy
-rw-r--r--   2 hzadmin hdfs 1002311449 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000009_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  919945440 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000010_0.snappy
-rw-r--r--   2 hzadmin hdfs  985896710 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000010_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  853679744 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000011_0.snappy
-rw-r--r--   2 hzadmin hdfs  978387233 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000011_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  857535482 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000012_0.snappy
-rw-r--r--   2 hzadmin hdfs 1013308316 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000012_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  839661882 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000013_0.snappy
-rw-r--r--   2 hzadmin hdfs  963660717 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000013_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  853614174 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000014_0.snappy
-rw-r--r--   2 hzadmin hdfs  923638015 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000014_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  833865334 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000015_0.snappy
-rw-r--r--   2 hzadmin hdfs  993313562 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000015_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  863788726 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000016_0.snappy
-rw-r--r--   2 hzadmin hdfs 1055935913 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000016_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  938640887 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000017_0.snappy
-rw-r--r--   2 hzadmin hdfs  992664370 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000017_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  836728758 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000018_0.snappy
-rw-r--r--   2 hzadmin hdfs  958110493 2016-07-12 04:11 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000018_0_copy_1.snappy
-rw-r--r--   2 hzadmin hdfs  878931625 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000019_0.snappy
-rw-r--r--   2 hzadmin hdfs  962405868 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000586_0.snappy
-rw-r--r--   2 hzadmin hdfs 1051757492 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000587_0.snappy
-rw-r--r--   2 hzadmin hdfs 1016882930 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000588_0.snappy
-rw-r--r--   2 hzadmin hdfs 1039885210 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000589_0.snappy
-rw-r--r--   2 hzadmin hdfs  941194043 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000590_0.snappy
-rw-r--r--   2 hzadmin hdfs 1008122362 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000591_0.snappy
-rw-r--r--   2 hzadmin hdfs 1047224089 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000592_0.snappy
-rw-r--r--   2 hzadmin hdfs  982080269 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000593_0.snappy
-rw-r--r--   2 hzadmin hdfs 1023890503 2016-07-12 04:33 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000594_0.snappy
-rw-r--r--   2 hzadmin hdfs  986968252 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000595_0.snappy
-rw-r--r--   2 hzadmin hdfs  987693087 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000596_0.snappy
-rw-r--r--   2 hzadmin hdfs 1011458249 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000597_0.snappy
-rw-r--r--   2 hzadmin hdfs 1009166057 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000598_0.snappy
-rw-r--r--   2 hzadmin hdfs  985772040 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000599_0.snappy
-rw-r--r--   2 hzadmin hdfs  965906316 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000600_0.snappy
-rw-r--r--   2 hzadmin hdfs  955717905 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000601_0.snappy
-rw-r--r--   2 hzadmin hdfs  968491437 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000602_0.snappy
-rw-r--r--   2 hzadmin hdfs  981757576 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000603_0.snappy
[[email protected] ~]# hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/
Found 6 items
drwxr-xr-x   - hzadmin hdfs          0 2016-07-07 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160706
drwxr-xr-x   - hzadmin hdfs          0 2016-07-08 03:59 /share/hzadmin/external_table/DMP_SSA/DPI/20160707
drwxr-xr-x   - hzadmin hdfs          0 2016-07-09 04:21 /share/hzadmin/external_table/DMP_SSA/DPI/20160708
drwxr-xr-x   - hzadmin hdfs          0 2016-07-10 04:30 /share/hzadmin/external_table/DMP_SSA/DPI/20160709
drwxr-xr-x   - hzadmin hdfs          0 2016-07-11 04:26 /share/hzadmin/external_table/DMP_SSA/DPI/20160710
drwxr-xr-x   - hzadmin hdfs          0 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711
[[email protected] ~]#
[[email protected] ~]# hdfs dfs -ls /share/hzadmin/urlapp/spp/CDPI_USER_BEH_PREFER_D/
[[email protected] ~]# 

#定时器 执行日志
[[email protected] bj_ggsn]$ cat /home/hzadmin/bj_ggsn/start.log
output to local-file: ftp_con.txt? output to local-file: ftp_con.txt? Begin at: Wed Jul 13 04:39:05 CST 2016 ;
Interactive mode off.
Local directory now /home/hzadmin/urlAPP/URLAppProgram
End at: Wed Jul 13 04:39:06 CST 2016;
DownLoadfilename: UACDS_20160712_00.tar.gz  total time=1 s ;
/home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
match.cfg
R_APP_TYPE_20160712_00.txt
R_NOISE_TYPE_20160712_00.txt
R_SITE_TYPE_20160712_00.txt
R_URL_TYPE_20160712_00.txt
UACDS_20160712_00_01_1.jar
UACDS_20160712_00_01.jar
UACDS_20160712_00_02_1.jar
UACDS_20160712_00_02.jar
===========================================================================
========== The program is running , please keep the network flow ...
========== running model day : 20160712
===========================================================================
========== Step 1 of 2 ...
========== loging : tail -100f /home/hzadmin/urlAPP/logs/20160713/match_20160713_20160712.log
========== Step 2 of 3 ...
========== loging : tail -100f /home/hzadmin/urlAPP/logs/20160713/upload_20160713_20160712.log
========== Step 3 of 3 ...
========== loging : tail -100f /home/hzadmin/urlAPP/logs/20160713/upload_20160713_20160712.log
not input days so day=today
create_day: 20160713
day: 20160712
/home/hzadmin/urlAPP/ResultMatch/remove_details.sh: line 1: ?#!/bin/bash: No such file or directory
delete file: 20160706
delete file: /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/merge/20160705
16/07/13 09:07:26 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://BJTEL/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/merge/20160705' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current

Logging initialized using configuration in jar:file:/app/hive/lib/hive-common-0.13.0.2.1.1.0-385.jar!/hive-log4j.properties
OK
Time taken: 2.58 seconds
Added /home/hzadmin/bj_ggsn/jar/Decode.jar to class path
Added resource: /home/hzadmin/bj_ggsn/jar/Decode.jar
OK
Time taken: 1.676 seconds
Query ID = hzadmin_20160713090707_053181f1-eb59-4188-be6a-a983081c9f5f
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 231
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1464150086810_11516, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11516/
Kill Command = /app/hadoop/bin/hadoop job  -kill job_1464150086810_11516
Hadoop job information for Stage-1: number of mappers: 661; number of reducers: 231
2016-07-13 09:08:01,998 Stage-1 map = 0%,  reduce = 0%
2016-07-13 09:08:50,489 Stage-1 map = 1%,  reduce = 0%, Cumulative CPU 5197.54 sec
2016-07-13 09:09:03,046 Stage-1 map = 2%,  reduce = 0%, Cumulative CPU 7764.25 sec
2016-07-13 09:09:07,341 Stage-1 map = 3%,  reduce = 0%, Cumulative CPU 8663.68 sec
2016-07-13 09:09:12,666 Stage-1 map = 4%,  reduce = 0%, Cumulative CPU 9852.68 sec
2016-07-13 09:09:21,205 Stage-1 map = 5%,  reduce = 0%, Cumulative CPU 11010.68 sec
2016-07-13 09:09:25,392 Stage-1 map = 6%,  reduce = 0%, Cumulative CPU 11844.46 sec
2016-07-13 09:09:27,450 Stage-1 map = 7%,  reduce = 0%, Cumulative CPU 12290.34 sec
2016-07-13 09:09:29,554 Stage-1 map = 8%,  reduce = 0%, Cumulative CPU 12730.05 sec
2016-07-13 09:09:32,076 Stage-1 map = 9%,  reduce = 0%, Cumulative CPU 13393.25 sec
2016-07-13 09:09:33,453 Stage-1 map = 10%,  reduce = 0%, Cumulative CPU 13535.78 sec
2016-07-13 09:09:35,612 Stage-1 map = 11%,  reduce = 0%, Cumulative CPU 13797.18 sec
2016-07-13 09:09:40,782 Stage-1 map = 12%,  reduce = 0%, Cumulative CPU 14445.45 sec
2016-07-13 09:09:47,771 Stage-1 map = 13%,  reduce = 0%, Cumulative CPU 15535.53 sec
2016-07-13 09:09:52,221 Stage-1 map = 14%,  reduce = 0%, Cumulative CPU 16179.99 sec
2016-07-13 09:09:56,108 Stage-1 map = 15%,  reduce = 0%, Cumulative CPU 16602.82 sec
2016-07-13 09:10:05,058 Stage-1 map = 16%,  reduce = 0%, Cumulative CPU 18181.36 sec
2016-07-13 09:10:08,191 Stage-1 map = 17%,  reduce = 0%, Cumulative CPU 18632.7 sec
2016-07-13 09:10:12,436 Stage-1 map = 18%,  reduce = 0%, Cumulative CPU 19106.37 sec
2016-07-13 09:10:15,600 Stage-1 map = 19%,  reduce = 0%, Cumulative CPU 19617.38 sec
2016-07-13 09:10:18,757 Stage-1 map = 20%,  reduce = 0%, Cumulative CPU 19945.72 sec
2016-07-13 09:10:27,207 Stage-1 map = 21%,  reduce = 1%, Cumulative CPU 21447.84 sec
2016-07-13 09:10:32,471 Stage-1 map = 22%,  reduce = 1%, Cumulative CPU 22085.79 sec
2016-07-13 09:10:36,684 Stage-1 map = 23%,  reduce = 1%, Cumulative CPU 22729.29 sec
2016-07-13 09:10:41,143 Stage-1 map = 24%,  reduce = 1%, Cumulative CPU 23253.89 sec
2016-07-13 09:10:43,254 Stage-1 map = 25%,  reduce = 1%, Cumulative CPU 23457.85 sec
2016-07-13 09:10:45,362 Stage-1 map = 26%,  reduce = 1%, Cumulative CPU 23678.34 sec
2016-07-13 09:10:46,404 Stage-1 map = 27%,  reduce = 1%, Cumulative CPU 23774.85 sec
2016-07-13 09:10:50,629 Stage-1 map = 28%,  reduce = 1%, Cumulative CPU 24180.11 sec
2016-07-13 09:10:57,034 Stage-1 map = 29%,  reduce = 1%, Cumulative CPU 24839.33 sec
2016-07-13 09:11:01,255 Stage-1 map = 30%,  reduce = 1%, Cumulative CPU 25307.08 sec
2016-07-13 09:11:03,958 Stage-1 map = 31%,  reduce = 1%, Cumulative CPU 25763.26 sec
2016-07-13 09:11:08,528 Stage-1 map = 32%,  reduce = 2%, Cumulative CPU 26300.39 sec
2016-07-13 09:11:14,875 Stage-1 map = 33%,  reduce = 2%, Cumulative CPU 27264.86 sec
2016-07-13 09:11:24,386 Stage-1 map = 34%,  reduce = 2%, Cumulative CPU 28419.61 sec
2016-07-13 09:11:31,847 Stage-1 map = 35%,  reduce = 2%, Cumulative CPU 29402.95 sec
2016-07-13 09:11:36,334 Stage-1 map = 36%,  reduce = 2%, Cumulative CPU 29849.6 sec
2016-07-13 09:11:43,729 Stage-1 map = 37%,  reduce = 2%, Cumulative CPU 30704.56 sec
2016-07-13 09:11:47,933 Stage-1 map = 38%,  reduce = 2%, Cumulative CPU 31181.2 sec
2016-07-13 09:11:50,068 Stage-1 map = 39%,  reduce = 2%, Cumulative CPU 31369.04 sec
2016-07-13 09:11:55,367 Stage-1 map = 40%,  reduce = 2%, Cumulative CPU 31936.9 sec
2016-07-13 09:11:57,473 Stage-1 map = 40%,  reduce = 3%, Cumulative CPU 32240.58 sec
2016-07-13 09:11:59,545 Stage-1 map = 41%,  reduce = 3%, Cumulative CPU 32428.94 sec
2016-07-13 09:12:01,649 Stage-1 map = 42%,  reduce = 3%, Cumulative CPU 32744.61 sec
2016-07-13 09:12:07,942 Stage-1 map = 43%,  reduce = 3%, Cumulative CPU 33410.43 sec
2016-07-13 09:12:12,113 Stage-1 map = 44%,  reduce = 3%, Cumulative CPU 33952.16 sec
2016-07-13 09:12:15,242 Stage-1 map = 45%,  reduce = 3%, Cumulative CPU 34206.1 sec
2016-07-13 09:12:21,495 Stage-1 map = 46%,  reduce = 3%, Cumulative CPU 34844.34 sec
2016-07-13 09:12:30,872 Stage-1 map = 46%,  reduce = 4%, Cumulative CPU 35728.26 sec
2016-07-13 09:12:34,036 Stage-1 map = 47%,  reduce = 4%, Cumulative CPU 36057.98 sec
2016-07-13 09:12:40,293 Stage-1 map = 48%,  reduce = 4%, Cumulative CPU 36777.14 sec
2016-07-13 09:12:50,802 Stage-1 map = 49%,  reduce = 4%, Cumulative CPU 38085.11 sec
2016-07-13 09:12:58,132 Stage-1 map = 50%,  reduce = 4%, Cumulative CPU 38899.54 sec
2016-07-13 09:13:00,323 Stage-1 map = 51%,  reduce = 4%, Cumulative CPU 39107.81 sec
2016-07-13 09:13:04,948 Stage-1 map = 52%,  reduce = 4%, Cumulative CPU 39535.16 sec
2016-07-13 09:13:09,327 Stage-1 map = 53%,  reduce = 4%, Cumulative CPU 39903.93 sec
2016-07-13 09:13:13,553 Stage-1 map = 53%,  reduce = 5%, Cumulative CPU 40394.49 sec
2016-07-13 09:13:15,702 Stage-1 map = 54%,  reduce = 5%, Cumulative CPU 40552.41 sec
2016-07-13 09:13:24,136 Stage-1 map = 55%,  reduce = 5%, Cumulative CPU 41410.8 sec
2016-07-13 09:13:28,720 Stage-1 map = 56%,  reduce = 5%, Cumulative CPU 41846.2 sec
2016-07-13 09:13:33,006 Stage-1 map = 57%,  reduce = 5%, Cumulative CPU 42176.72 sec
2016-07-13 09:13:43,675 Stage-1 map = 58%,  reduce = 5%, Cumulative CPU 43068.84 sec
2016-07-13 09:13:52,139 Stage-1 map = 59%,  reduce = 5%, Cumulative CPU 43853.31 sec
2016-07-13 09:13:59,475 Stage-1 map = 60%,  reduce = 5%, Cumulative CPU 44538.15 sec
2016-07-13 09:14:03,638 Stage-1 map = 61%,  reduce = 5%, Cumulative CPU 44855.9 sec
2016-07-13 09:14:18,548 Stage-1 map = 62%,  reduce = 5%, Cumulative CPU 46195.74 sec
2016-07-13 09:14:24,862 Stage-1 map = 63%,  reduce = 5%, Cumulative CPU 46731.81 sec
2016-07-13 09:14:33,309 Stage-1 map = 64%,  reduce = 5%, Cumulative CPU 47588.96 sec
2016-07-13 09:14:36,458 Stage-1 map = 64%,  reduce = 6%, Cumulative CPU 48099.49 sec
2016-07-13 09:14:42,805 Stage-1 map = 65%,  reduce = 6%, Cumulative CPU 48679.78 sec
2016-07-13 09:14:49,239 Stage-1 map = 66%,  reduce = 6%, Cumulative CPU 49262.33 sec
2016-07-13 09:14:55,665 Stage-1 map = 67%,  reduce = 6%, Cumulative CPU 49899.25 sec
2016-07-13 09:15:03,040 Stage-1 map = 68%,  reduce = 6%, Cumulative CPU 50497.22 sec
2016-07-13 09:15:12,510 Stage-1 map = 69%,  reduce = 6%, Cumulative CPU 51435.08 sec
2016-07-13 09:15:24,174 Stage-1 map = 70%,  reduce = 6%, Cumulative CPU 52788.92 sec
2016-07-13 09:15:32,711 Stage-1 map = 71%,  reduce = 6%, Cumulative CPU 53665.99 sec
2016-07-13 09:15:42,103 Stage-1 map = 72%,  reduce = 6%, Cumulative CPU 54553.65 sec
2016-07-13 09:15:47,333 Stage-1 map = 73%,  reduce = 6%, Cumulative CPU 54961.75 sec
2016-07-13 09:15:54,771 Stage-1 map = 74%,  reduce = 6%, Cumulative CPU 55748.89 sec
2016-07-13 09:16:02,233 Stage-1 map = 75%,  reduce = 6%, Cumulative CPU 56317.34 sec
2016-07-13 09:16:06,637 Stage-1 map = 76%,  reduce = 6%, Cumulative CPU 56664.01 sec
2016-07-13 09:16:07,687 Stage-1 map = 76%,  reduce = 7%, Cumulative CPU 56726.21 sec
2016-07-13 09:16:15,012 Stage-1 map = 77%,  reduce = 7%, Cumulative CPU 57229.48 sec
2016-07-13 09:16:28,460 Stage-1 map = 78%,  reduce = 7%, Cumulative CPU 58587.28 sec
2016-07-13 09:16:32,644 Stage-1 map = 79%,  reduce = 7%, Cumulative CPU 58893.37 sec
2016-07-13 09:16:43,193 Stage-1 map = 80%,  reduce = 7%, Cumulative CPU 59747.56 sec
2016-07-13 09:16:51,545 Stage-1 map = 81%,  reduce = 7%, Cumulative CPU 60503.95 sec
2016-07-13 09:16:56,784 Stage-1 map = 82%,  reduce = 7%, Cumulative CPU 60890.77 sec
2016-07-13 09:17:08,285 Stage-1 map = 83%,  reduce = 7%, Cumulative CPU 61747.42 sec
2016-07-13 09:17:14,555 Stage-1 map = 84%,  reduce = 7%, Cumulative CPU 62244.9 sec
2016-07-13 09:17:19,774 Stage-1 map = 85%,  reduce = 7%, Cumulative CPU 62596.97 sec
2016-07-13 09:17:31,248 Stage-1 map = 86%,  reduce = 7%, Cumulative CPU 63653.0 sec
2016-07-13 09:17:38,563 Stage-1 map = 87%,  reduce = 7%, Cumulative CPU 64143.74 sec
2016-07-13 09:17:41,682 Stage-1 map = 87%,  reduce = 8%, Cumulative CPU 64299.0 sec
2016-07-13 09:17:46,882 Stage-1 map = 88%,  reduce = 8%, Cumulative CPU 64714.76 sec
2016-07-13 09:17:58,323 Stage-1 map = 89%,  reduce = 8%, Cumulative CPU 65398.87 sec
2016-07-13 09:18:11,927 Stage-1 map = 90%,  reduce = 8%, Cumulative CPU 66519.12 sec
2016-07-13 09:18:17,138 Stage-1 map = 91%,  reduce = 8%, Cumulative CPU 66854.71 sec
2016-07-13 09:18:25,471 Stage-1 map = 92%,  reduce = 8%, Cumulative CPU 67504.99 sec
2016-07-13 09:18:32,823 Stage-1 map = 93%,  reduce = 8%, Cumulative CPU 68050.78 sec
2016-07-13 09:18:35,954 Stage-1 map = 93%,  reduce = 9%, Cumulative CPU 68204.15 sec
2016-07-13 09:18:40,180 Stage-1 map = 94%,  reduce = 9%, Cumulative CPU 68751.23 sec
2016-07-13 09:18:44,347 Stage-1 map = 94%,  reduce = 10%, Cumulative CPU 69048.97 sec
2016-07-13 09:18:46,472 Stage-1 map = 95%,  reduce = 10%, Cumulative CPU 69248.88 sec
2016-07-13 09:18:52,740 Stage-1 map = 96%,  reduce = 11%, Cumulative CPU 69662.81 sec
2016-07-13 09:18:55,923 Stage-1 map = 97%,  reduce = 11%, Cumulative CPU 69937.75 sec
2016-07-13 09:18:57,023 Stage-1 map = 97%,  reduce = 12%, Cumulative CPU 69987.32 sec
2016-07-13 09:19:01,192 Stage-1 map = 98%,  reduce = 12%, Cumulative CPU 70135.07 sec
2016-07-13 09:19:02,250 Stage-1 map = 98%,  reduce = 13%, Cumulative CPU 70162.43 sec
2016-07-13 09:19:07,490 Stage-1 map = 98%,  reduce = 14%, Cumulative CPU 70372.58 sec
2016-07-13 09:19:12,776 Stage-1 map = 99%,  reduce = 15%, Cumulative CPU 70731.6 sec
2016-07-13 09:19:18,031 Stage-1 map = 99%,  reduce = 16%, Cumulative CPU 70899.25 sec
2016-07-13 09:19:31,485 Stage-1 map = 99%,  reduce = 17%, Cumulative CPU 71470.97 sec
2016-07-13 09:19:46,107 Stage-1 map = 100%,  reduce = 17%, Cumulative CPU 71714.27 sec
2016-07-13 09:19:47,161 Stage-1 map = 100%,  reduce = 19%, Cumulative CPU 71731.02 sec
2016-07-13 09:19:48,219 Stage-1 map = 100%,  reduce = 24%, Cumulative CPU 71810.86 sec
2016-07-13 09:19:49,261 Stage-1 map = 100%,  reduce = 29%, Cumulative CPU 72135.09 sec
2016-07-13 09:19:50,287 Stage-1 map = 100%,  reduce = 33%, Cumulative CPU 72396.02 sec
2016-07-13 09:19:51,337 Stage-1 map = 100%,  reduce = 35%, Cumulative CPU 72698.52 sec
2016-07-13 09:19:52,393 Stage-1 map = 100%,  reduce = 38%, Cumulative CPU 72918.64 sec
2016-07-13 09:19:53,444 Stage-1 map = 100%,  reduce = 44%, Cumulative CPU 73111.93 sec
2016-07-13 09:19:54,470 Stage-1 map = 100%,  reduce = 46%, Cumulative CPU 73228.55 sec
2016-07-13 09:19:55,495 Stage-1 map = 100%,  reduce = 47%, Cumulative CPU 73314.57 sec
2016-07-13 09:19:56,538 Stage-1 map = 100%,  reduce = 48%, Cumulative CPU 73403.39 sec
2016-07-13 09:19:57,698 Stage-1 map = 100%,  reduce = 49%, Cumulative CPU 73452.14 sec
2016-07-13 09:20:00,788 Stage-1 map = 100%,  reduce = 51%, Cumulative CPU 73859.22 sec
2016-07-13 09:20:01,821 Stage-1 map = 100%,  reduce = 57%, Cumulative CPU 74159.29 sec
2016-07-13 09:20:03,490 Stage-1 map = 100%,  reduce = 66%, Cumulative CPU 74342.07 sec
2016-07-13 09:20:04,531 Stage-1 map = 100%,  reduce = 70%, Cumulative CPU 74864.62 sec
2016-07-13 09:20:05,558 Stage-1 map = 100%,  reduce = 75%, Cumulative CPU 75034.61 sec
2016-07-13 09:20:06,584 Stage-1 map = 100%,  reduce = 77%, Cumulative CPU 75119.84 sec
2016-07-13 09:20:07,613 Stage-1 map = 100%,  reduce = 79%, Cumulative CPU 75180.32 sec
2016-07-13 09:20:08,639 Stage-1 map = 100%,  reduce = 81%, Cumulative CPU 75280.79 sec
2016-07-13 09:20:09,665 Stage-1 map = 100%,  reduce = 83%, Cumulative CPU 75434.05 sec
2016-07-13 09:20:10,713 Stage-1 map = 100%,  reduce = 85%, Cumulative CPU 75521.87 sec
2016-07-13 09:20:11,775 Stage-1 map = 100%,  reduce = 87%, Cumulative CPU 75682.74 sec
2016-07-13 09:20:13,574 Stage-1 map = 100%,  reduce = 91%, Cumulative CPU 75881.12 sec
2016-07-13 09:20:15,641 Stage-1 map = 100%,  reduce = 92%, Cumulative CPU 76062.67 sec
2016-07-13 09:20:17,742 Stage-1 map = 100%,  reduce = 93%, Cumulative CPU 76159.41 sec
2016-07-13 09:20:19,865 Stage-1 map = 100%,  reduce = 94%, Cumulative CPU 76248.27 sec
2016-07-13 09:20:20,964 Stage-1 map = 100%,  reduce = 95%, Cumulative CPU 76323.18 sec
2016-07-13 09:20:24,140 Stage-1 map = 100%,  reduce = 96%, Cumulative CPU 76662.94 sec
2016-07-13 09:20:27,289 Stage-1 map = 100%,  reduce = 97%, Cumulative CPU 76664.86 sec
2016-07-13 09:20:31,500 Stage-1 map = 100%,  reduce = 98%, Cumulative CPU 77118.12 sec
2016-07-13 09:20:32,531 Stage-1 map = 100%,  reduce = 99%, Cumulative CPU 77257.73 sec
2016-07-13 09:20:38,761 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 77452.2 sec
MapReduce Total cumulative CPU time: 0 days 21 hours 30 minutes 52 seconds 200 msec
Ended Job = job_1464150086810_11516
MapReduce Jobs Launched:
Job 0: Map: 661  Reduce: 231   Cumulative CPU: 77452.2 sec   HDFS Read: 230040091537 HDFS Write: 2692705050 SUCCESS
Total MapReduce CPU Time Spent: 0 days 21 hours 30 minutes 52 seconds 200 msec
OK
Time taken: 774.963 seconds, Fetched: 66214908 row(s)
mv: cannot stat `/dfs/ftp/hzadmin/test/20160712ag': No such file or directory
mv: cannot stat `/dfs/ftp/hzadmin/test/20160712ah': No such file or directory
mv: cannot stat `/dfs/ftp/hzadmin/test/20160712ai': No such file or directory
16/07/13 09:33:08 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://BJTEL/share/hzadmin/urlapp/spp/dpi_http_dtl_mark_match_summary/receive_day=20160706' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current
16/07/13 09:33:11 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://BJTEL/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current
16/07/13 09:33:13 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://BJTEL/share/hzadmin/external_table/DMP_SSA/DPI/20160706' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current

Logging initialized using configuration in jar:file:/app/hive/lib/hive-common-0.13.0.2.1.1.0-385.jar!/hive-log4j.properties
OK
Time taken: 5.626 seconds
Dropped the partition receive_day=20160706
OK
Time taken: 5.321 seconds
[[email protected] bj_ggsn]$
#查看文件是否生成
[[email protected] bj_data]$ ls -ltr 20160712*
-rw-rw-r-- 1 hzadmin hzadmin 1254354742 Jul 13 09:32 20160712001.txt
-rw-rw-r-- 1 hzadmin hzadmin 1254303891 Jul 13 09:32 20160712002.txt
-rw-rw-r-- 1 hzadmin hzadmin 1254247185 Jul 13 09:32 20160712003.txt
-rw-rw-r-- 1 hzadmin hzadmin 1254298641 Jul 13 09:33 20160712004.txt
-rw-rw-r-- 1 hzadmin hzadmin 1254315336 Jul 13 09:33 20160712005.txt
-rw-rw-r-- 1 hzadmin hzadmin  649592869 Jul 13 09:33 20160712006.txt
[[email protected] bj_data]$ pwd
/dfs/ftp/hzadmin/bj_data
[[email protected] bj_data]$
#查看hadoop任务
[[email protected] bj_ggsn]$ hadoop job -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.

Total jobs:0
                  JobId	     State	     StartTime	    UserName	       Queue	  Priority	 UsedContainers	 RsvdContainers	 UsedMem	 RsvdMem	 NeededMem	   AM info
[[email protected] bj_ggsn]$ jobs -l
[[email protected] bj_ggsn]$ 

[[email protected] bj_ggsn]$ ll
total 136
-rwxr-xr-x 1 hzadmin hzadmin   433 Feb 10 20:39 delete.sh
-rw-r--r-- 1 hzadmin hzadmin    71 Apr 30  2015 hours.txt
drwxr-xr-x 2 root    root     4096 Aug  5  2015 jar
drwxrwxr-x 2 hzadmin hzadmin 36864 Jul 13 03:18 log
-rw------- 1 hzadmin hzadmin 21554 Apr 12 20:56 nohup.out
-rwxr-xr-x 1 hzadmin hzadmin  1845 Sep 23  2015 select1.sh
-rwxr-xr-x 1 hzadmin hzadmin   454 Oct 12  2015 select2bak.sh
-rwxr-xr-x 1 hzadmin hzadmin  1367 Oct 12  2015 select2.sh
-rwxr-xr-x 1 hzadmin hzadmin  1344 Jun 18  2015 select.sh
-rwxr-xr-x 1 hzadmin hzadmin  1337 May  4  2015 select.shbak
-rwxr-xr-x 1 hzadmin hzadmin   628 Oct 28  2015 start1.sh
-rwxr-xr-x 1 hzadmin hzadmin   692 Jun 26 19:31 start2.sh
-rwxr-xr-x 1 hzadmin hzadmin   636 May 10 14:22 start3.sh
-rwxr-xr-x 1 hzadmin hzadmin   631 Mar  5 13:27 startbak1.sh
-rw-r--r-- 1 hzadmin hzadmin 16926 Jul 13 09:33 start.log
[[email protected] bj_ggsn]$ cd /home/hzadmin/urlAPP/
[[email protected] urlAPP]$ ll
total 55164
-rwxr-xr-x  1 hzadmin hzadmin     1331 Sep 22  2015 BoncRun1.sh
-rwxr-xr-x  1 hzadmin hzadmin     1306 Oct 28  2015 BoncRun.sh
-rwxr-xr-x  1 hzadmin hzadmin     1636 May 31 11:33 hive.sh
drwxrwxr-x 17 hzadmin hzadmin     4096 Jul 13 04:39 logs
drwxrwxr-x  3 hzadmin hzadmin     4096 Jul 13 09:07 ResultMatch
-rwxr-xr-x  1 hzadmin hzadmin 56426871 Jun 16  2015 ResultMatch.zip
drwxr-xr-x 17 hzadmin hzadmin     4096 Jul 15  2015 UnMatchTop1000
drwxr-xr-x  3 hzadmin hzadmin    20480 Jul 13 04:39 URLAppProgram
drwxr-xr-x  5 hzadmin hzadmin     4096 Jul 13 04:39 URLAppProgram_sf
drwxr-xr-x 17 hzadmin hzadmin     4096 Jul 15  2015 URLAppReport
[[email protected] urlAPP]$ cd URLAppProgram_sf/
[[email protected] URLAppProgram_sf]$ ll
total 129348
drwxr-xr-x 2 hzadmin hzadmin     4096 Jun 10  2015 bin
-rwxr-xr-x 1 hzadmin hzadmin     3017 Sep 28  2015 common.cfg
-rwxr-xr-x 1 hzadmin hzadmin      200 Nov  7  2014 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin       80 May 10 14:21 get_uacds.sh
-rw-rw-r-- 1 hzadmin hzadmin       33 Jul 12 21:01 match.cfg
drwxr-xr-x 2 hzadmin hzadmin     4096 Jul 13 04:39 ProgramByDay
drwxr-xr-x 2 hzadmin hzadmin     4096 Jun 10  2015 ProgramByHour
-rwxr-xr-x 1 hzadmin hzadmin      741 Jul 14  2015 public.cfg
-rw-rw-r-- 1 hzadmin hzadmin   721993 Jul 12 21:01 R_APP_TYPE_20160712_00.txt
-rwxr-xr-x 1 hzadmin hzadmin      728 Nov  7  2014 reload.sh
-rwxr-xr-x 1 hzadmin hzadmin     4705 May  6  2015 remove_files.sh
-rw-rw-r-- 1 hzadmin hzadmin     4500 Jul 12 21:01 R_NOISE_TYPE_20160712_00.txt
-rw-rw-r-- 1 hzadmin hzadmin  1426612 Jul 12 21:01 R_SITE_TYPE_20160712_00.txt
-rwxr-xr-x 1 hzadmin hzadmin     6966 Jun 15  2015 rule.xml
-rwxr-xr-x 1 hzadmin hzadmin     6301 Sep 28  2015 runbak.sh
-rwxr-xr-x 1 hzadmin hzadmin     6291 May  7  2015 run.sh
-rw-rw-r-- 1 hzadmin hzadmin  1060990 Jul 12 21:01 R_URL_TYPE_20160712_00.txt
-rw-rw-r-- 1 hzadmin hzadmin 32290563 Jul 12 21:01 UACDS_20160712_00_01_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32233766 Jul 12 21:00 UACDS_20160712_00_01.jar
-rw-rw-r-- 1 hzadmin hzadmin 32339712 Jul 12 21:01 UACDS_20160712_00_02_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32282922 Jul 12 21:01 UACDS_20160712_00_02.jar
[[email protected] URLAppProgram_sf]$ pwd
/home/hzadmin/urlAPP/URLAppProgram_sf
[[email protected] URLAppProgram_sf]$ 

[[email protected] URLAppProgram_sf]$ cat ProgramByDay/match.sh
#!/bin/bash
# match.sh
######################################################################################################
# function : 匹配程序运行、统计报表出数据
# date: 2014/02/10
# author: SPP
# param:day(数据日期)
######################################################################################################

cd `dirname $0`
cd ..
PWDNOW=`pwd`
cd `dirname $0`
eval $(grep RULE_PROV_VERNO ${PWDNOW}/match.cfg)
eval $(grep URL_MATCH ${PWDNOW}/common.cfg)
eval $(grep URL_INPUT_PATH ${PWDNOW}/common.cfg)
eval $(grep DPI_CONF_PATH ${PWDNOW}/common.cfg)
eval $(grep R_URL_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_APP_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_NOISE_TYPE ${PWDNOW}/common.cfg)
eval $(grep HIVE_USER ${PWDNOW}/common.cfg)
eval $(grep LOC_DIR ${PWDNOW}/common.cfg)
eval $(grep HIVE_LICENSE ${PWDNOW}/common.cfg)
eval $(grep MR_VERSION ${PWDNOW}/common.cfg)
eval $(grep PARA_JAR ${PWDNOW}/common.cfg)
eval $(grep PARA_HIVE ${PWDNOW}/common.cfg)
eval $(grep R_SITE_TYPE ${PWDNOW}/common.cfg)
#判断参数是否输入
if [ $# -ne 1 ] ; then
        echo "Input parameter error : there should be 1 parameters";
        exit 1;
fi;

day=$1

hadoop fs -ls ${URL_INPUT_PATH}${day} > exist_test ;

x=`wc -l exist_test | cut -d ' ' -f 1`;

if  [ ${x} = 0 ] ; then
echo " HDFS DIR ERROR : ${URL_INPUT_PATH}${day} file is not exist !"
rm -f exist_test
exit 1;
fi;
rm -f exist_test

hadoop fs -rm ${R_URL_TYPE}R_URL_TYPE*.txt
hadoop fs -rm ${R_APP_TYPE}R_APP_TYPE*.txt
hadoop fs -rm ${R_NOISE_TYPE}R_NOISE_TYPE*.txt
hadoop fs -rm ${R_SITE_TYPE}R_SITE_TYPE*.txt

hadoop fs -put ${PWDNOW}/R_URL_TYPE*.txt ${R_URL_TYPE}
hadoop fs -put ${PWDNOW}/R_APP_TYPE*.txt ${R_APP_TYPE}
hadoop fs -put ${PWDNOW}/R_NOISE_TYPE*.txt ${R_NOISE_TYPE}
hadoop fs -put ${PWDNOW}/R_SITE_TYPE*.txt ${R_SITE_TYPE}

echo "${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar";
hadoop jar ${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar com.bonc.mapred.UserurlAllMain ${PARA_JAR} ${URL_INPUT_PATH}${day} ${URL_MATCH}${day} $PWDNOW/${DPI_CONF_PATH}

#hadoop fs -rm ${url_match}${day}/part-m-*.gz

hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
set dpi.encode.license=${HIVE_LICENSE};
${PARA_HIVE}

set mapred.job.name=CMSS-COUNT;
alter table  dpi_http_dtl_mark_match drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_noise drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_unmatch drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_match add partition (receive_day='${day}') location '${day}/match';
alter table dpi_http_dtl_mark_noise add partition (receive_day='${day}') location '${day}/noise';
alter table dpi_http_dtl_mark_unmatch add partition (receive_day='${day}') location '${day}/unmatch';

"[[email protected] URLAppProgram_sf]$ 

[[email protected] ProgramByDay]$ cat report_summary.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`

eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)

#判断参数是否输入
if [ $# -ne 1 ] ; then
        echo "Input parameter error : there should be 1 parameters";
        exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};

set mapred.job.name=CMSS-COUNT;
alter table dpi_http_dtl_mark_match_summary drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_match_summary add partition (receive_day=${day}) location  '${day}';
alter table dpi_http_dtl_mark_unmatch_summary drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_unmatch_summary add partition (receive_day=${day}) location  '${day}';

insert overwrite table dpi_http_dtl_mark_match_summary partition(receive_day=${day})
select phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host,
count(id) as count,sum(download_bytes) ,sum(upload_bytes)
from dpi_http_dtl_mark_match
where receive_day=${day}
group by phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host;

insert overwrite table dpi_http_dtl_mark_unmatch_summary partition(receive_day=${day})
select host,url,count(id),sum(download_bytes),sum(upload_bytes)
from dpi_http_dtl_mark_unmatch
where receive_day=${day}
group by host,url;

"
[[email protected] ProgramByDay]$ 

[[email protected] ProgramByDay]$ cat report.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`

eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)

#判断参数是否输入
if [ $# -ne 1 ] ; then
        echo "Input parameter error : there should be 1 parameters";
        exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};

alter table dpi_http_mark_summary drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_mark_rule drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_mark_type drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_mark_top1000 drop IF EXISTS partition(receive_day='${day}');
alter table dpi_site_mark_rule drop IF EXISTS partition(receive_day='${day}');
alter table dpi_site_mark_type drop IF EXISTS partition(receive_day='${day}');
alter table dpi_site_mark_top1000 drop IF EXISTS partition(receive_day='${day}');

alter table dpi_http_mark_summary add partition (receive_day=${day}) location  '${day}';
alter table dpi_http_mark_rule add partition (receive_day=${day}) location  '${day}';
alter table dpi_http_mark_type add partition (receive_day=${day}) location  '${day}';
alter table dpi_http_mark_top1000 add partition (receive_day=${day}) location  '${day}';
alter table dpi_site_mark_rule add partition (receive_day=${day}) location  '${day}';
alter table dpi_site_mark_type add partition (receive_day=${day}) location  '${day}';
alter table dpi_site_mark_top1000 add partition (receive_day=${day}) location  '${day}';

insert overwrite table dpi_http_mark_rule partition(receive_day=${day})
select visit_type,rule_id,typecode,matchs,'${day}' from
(
select visit_type,rule_id,type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and visit_type='url' group by visit_type,rule_id,type_code
union all
select visit_type,rule_id,app_type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and visit_type='app' group by visit_type,rule_id,app_type_code)sub ;

insert overwrite table dpi_http_mark_type partition(receive_day=${day})
select visit_type,typecode, rulematchs, urlmatchs,'${day}' from (
select visit_type,typecode, count(distinct ruleid) as rulematchs,sum(matchs) as urlmatchs from dpi_http_mark_rule where  receive_day =${day} and visit_type='url' group by visit_type,typecode
union all
select visit_type,typecode, count(distinct ruleid) as rulematchs,sum(matchs) as urlmatchs from dpi_http_mark_rule where  receive_day =${day} and visit_type='app' group by visit_type,typecode )sub;

insert overwrite table dpi_site_mark_rule partition(receive_day=${day})
select site_id,site_code,matchs,'${day}' from
(
select site_id,site_code,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and site_id <> '' group by site_id,site_code
) sub ;

insert overwrite table dpi_site_mark_type partition(receive_day=${day})
select site_code,  matchs, hostmatchs,'${day}' from (
select site_code, count(distinct site_id) as matchs,sum(matchs)as hostmatchs from dpi_site_mark_rule where receive_day =${day} and site_id <> '' group by site_code
) sub;

insert overwrite table dpi_http_mark_top1000 partition(receive_day=${day})
select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_unmatch_summary
where receive_day =${day} group by host,url order by count desc limit 5000;

insert overwrite table dpi_site_mark_top1000 partition(receive_day=${day})
select host,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_match_summary
where receive_day =${day} and site_id = '' group by host
order by count desc limit 1000;

insert overwrite table temp_dpi_match partition(receive_day=${day})
SELECT
count(id) as matchcount,
cast(0 as bigint)as noisecount ,
cast(0 as bigint) as unmatchcount,
count(CASE
                    WHEN visit_type='url'
                    THEN id
                    ELSE null
                END  )AS urlcount,
count(CASE
                    WHEN visit_type='app'
                    THEN id
                    ELSE null
                END  )AS appcount,
cast(0 as bigint) as sitecount

FROM
    dpi_http_dtl_mark_match
WHERE
    receive_day=${day}
AND
   reserved_19='1';

insert overwrite table temp_dpi_noise partition(receive_day=${day})
select cast(0 as bigint) as matchcount,count(id) as noisecount ,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_noise where receive_day=${day} ;

insert overwrite table temp_dpi_unmatch partition(receive_day=${day})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,sum(count) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_unmatch_summary where receive_day=${day} ;

insert overwrite table temp_dpi_site partition(receive_day=${day})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,sum(matchs) as sitecount from dpi_site_mark_type where receive_day=${day};

insert overwrite table dpi_http_mark_summary partition(receive_day='${day}')
select sum(matchcount+noisecount+unmatchcount) as totalcount,sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount) from
(
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_match where receive_day=${day}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_noise where receive_day=${day}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_unmatch where receive_day=${day}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_site where receive_day=${day}
) sub ;

"[[email protected] ProgramByDay]$ 

[[email protected] ProgramByDay]$
#查看hadoop任务(没查到)
[[email protected] ProgramByDay]$ hadoop job -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.

Total jobs:0
                  JobId	     State	     StartTime	    UserName	       Queue	  Priority	 UsedContainers	 RsvdContainers	 UsedMem	 RsvdMem	 NeededMem	   AM info
#启动hadoop任务
hive (dpi)>
          >
          >
          >
          > select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'20160713'
          > from dpi_http_dtl_mark_unmatch_summary
          > where receive_day ='20160713' group by host,url order by count desc limit 5;
Query ID = hzadmin_20160713143333_11c325a3-d541-4fd2-b5e8-f8ee0d1f25d3
Total jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1464150086810_11524, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11524/
Kill Command = /app/hadoop/bin/hadoop job  -kill job_1464150086810_11524
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2016-07-13 14:33:16,955 Stage-1 map = 0%,  reduce = 0%
2016-07-13 14:33:28,419 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 18.02 sec
2016-07-13 14:33:38,849 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 20.2 sec
MapReduce Total cumulative CPU time: 20 seconds 200 msec
Ended Job = job_1464150086810_11524
Launching Job 2 out of 2
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1464150086810_11525, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11525/
Kill Command = /app/hadoop/bin/hadoop job  -kill job_1464150086810_11525  (杀死hadoop job)
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
2016-07-13 14:33:48,364 Stage-2 map = 0%,  reduce = 0%
2016-07-13 14:33:59,031 Stage-2 map = 100%,  reduce = 0%, Cumulative CPU 10.59 sec
2016-07-13 14:34:07,565 Stage-2 map = 100%,  reduce = 100%, Cumulative CPU 12.53 sec
MapReduce Total cumulative CPU time: 12 seconds 530 msec
Ended Job = job_1464150086810_11525
MapReduce Jobs Launched:
Job 0: Map: 1  Reduce: 1   Cumulative CPU: 20.2 sec   HDFS Read: 256 HDFS Write: 96 SUCCESS
Job 1: Map: 1  Reduce: 1   Cumulative CPU: 12.53 sec   HDFS Read: 453 HDFS Write: 4 SUCCESS
Total MapReduce CPU Time Spent: 32 seconds 730 msec
OK
Time taken: 61.137 seconds
hive (dpi)>
#查看hadoop任务(查到)
[[email protected] ProgramByDay]$ hadoop job -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.

Total jobs:1
                  JobId	     State	     StartTime	    UserName	       Queue	  Priority	 UsedContainers	 RsvdContainers	 UsedMem	 RsvdMem	 NeededMem	   AM info
job_1464150086810_11524	      PREP	 1468391589369	     hzadmin	     default	    NORMAL	              1	              0	   4096M	      0M	     4096M	http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11524/
[[email protected] ProgramByDay]$ hadoop job -lis
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.

Usage: CLI <command> <args>
	[-submit <job-file>]
	[-status <job-id>]
	[-counter <job-id> <group-name> <counter-name>]
	[-kill <job-id>]
	[-set-priority <job-id> <priority>]. Valid values for priorities are: VERY_HIGH HIGH NORMAL LOW VERY_LOW
	[-events <job-id> <from-event-#> <#-of-events>]
	[-history <jobHistoryFile>]
	[-list [all]]
	[-list-active-trackers]
	[-list-blacklisted-trackers]
	[-list-attempt-ids <job-id> <task-type> <task-state>]. Valid values for <task-type> are REDUCE MAP. Valid values for <task-state> are running, completed
	[-kill-task <task-attempt-id>]
	[-fail-task <task-attempt-id>]
	[-logs <job-id> <task-attempt-id>]

Generic options supported are
-conf <configuration file>     specify an application configuration file
-D <property=value>            use value for given property
-fs <local|namenode:port>      specify a namenode
-jt <local|jobtracker:port>    specify a job tracker
-files <comma separated list of files>    specify comma separated files to be copied to the map reduce cluster
-libjars <comma separated list of jars>    specify comma separated jar files to include in the classpath.
-archives <comma separated list of archives>    specify comma separated archives to be unarchived on the compute machines.

The general command line syntax is
bin/hadoop command [genericOptions] [commandOptions]

[[email protected] ProgramByDay]$ hadoop job -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.

Total jobs:1
                  JobId	     State	     StartTime	    UserName	       Queue	  Priority	 UsedContainers	 RsvdContainers	 UsedMem	 RsvdMem	 NeededMem	   AM info
job_1464150086810_11524	   RUNNING	 1468391589369	     hzadmin	     default	    NORMAL	              2	              0	   8192M	      0M	     8192M	http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11524/
[[email protected] ProgramByDay]$ 

hive (dpi)>
          > select site_id,site_code,sum(count) as matchs
          > from dpi_http_dtl_mark_match_summary where receive_day='20160713' and site_id <> '' group by site_id,site_code
          > ;
Query ID = hzadmin_20160713145555_149dda0c-d7c0-4841-91aa-57f3ce1f454d
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1464150086810_11526, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11526/
Kill Command = /app/hadoop/bin/hadoop job  -kill job_1464150086810_11526
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2016-07-13 14:55:29,837 Stage-1 map = 0%,  reduce = 0%
2016-07-13 14:56:03,280 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 25.72 sec
2016-07-13 14:56:11,859 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 39.58 sec
MapReduce Total cumulative CPU time: 39 seconds 580 msec
Ended Job = job_1464150086810_11526
MapReduce Jobs Launched:
Job 0: Map: 1  Reduce: 1   Cumulative CPU: 39.58 sec   HDFS Read: 256 HDFS Write: 4 SUCCESS
Total MapReduce CPU Time Spent: 39 seconds 580 msec
OK
Time taken: 50.996 seconds
hive (dpi)> 

[[email protected] bj_ggsn]$ hadoop job -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.

Total jobs:1
                  JobId	     State	     StartTime	    UserName	 Queue	  Priority	 UsedContainers	 RsvdContainers	 UsedMem	 RsvdMem	 NeededMem	   AM info
job_1464150086810_11526	   RUNNING	 1468392923303	    hzadmin	     default  NORMAL	              2	              0	   8192M	      0M	     8192M	   http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11526/
[[email protected] bj_ggsn]$ 

[[email protected] ProgramByDay]$
[[email protected] ProgramByDay]$ cat label.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`

eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)

#判断参数是否输入
if [ $# -ne 1 ] ; then
        echo "Input parameter error : there should be 1 parameters";
        exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};

INSERT
    OVERWRITE TABLE CDPI_USER_BEH_PREFER_D_tmp
SELECT
    CASE
        WHEN T.STARTTIME LIKE '%%-%%'
        AND T.STARTTIME LIKE '%%.%%'
        THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd')
        WHEN T.STARTTIME LIKE '%-%'
        AND T.STARTTIME LIKE '%%:%%'
        THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd')
        WHEN T.STARTTIME LIKE '%%.%%'
        THEN FROM_UNIXTIME(CAST(ROUND(T.STARTTIME) AS INT),'yyyyMMdd')
        WHEN LENGTH(T.STARTTIME) = 13
        THEN FROM_UNIXTIME(CAST(ROUND(SUBSTR(T.STARTTIME,1,10)) AS INT),'yyyyMMdd')
        ELSE SUBSTR(T.STARTTIME,1,8)
    END AS VISIT_DAY,
    T.PHONE_ID,
    T1.LABEL_CODE,
    COUNT(1)                               AS TIMES,
    SUM(T.DOWNLOAD_BYTES + T.UPLOAD_BYTES) AS FLOWS,
    1                                      AS DAYS
FROM
    (
        SELECT
            STARTTIME,
            PHONE_ID,
            TYPE_CODE,
            SUM(DOWNLOAD_BYTES) AS DOWNLOAD_BYTES,
            SUM(UPLOAD_BYTES)   AS UPLOAD_BYTES
        FROM
            dpi_http_dtl_mark_match_summary
        WHERE
            RECEIVE_DAY = '${day}'
        AND LENGTH(PHONE_ID)=11
        AND SUBSTR(PHONE_ID,1,1)='1'
        AND PHONE_ID NOT LIKE '%.%'
        AND TYPE_CODE IS NOT NULL
        AND TYPE_CODE <> ''
        GROUP BY
            STARTTIME,
            PHONE_ID,
            TYPE_CODE ) T
LEFT OUTER JOIN
    R_LABEL_BEHAVIOR T1
ON
    T.TYPE_CODE = T1.TYPE_CODE
WHERE T1.LABEL_CODE IS NOT NULL
GROUP BY
    CASE
        WHEN T.STARTTIME LIKE '%%-%%'
        AND T.STARTTIME LIKE '%%.%%'
        THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd')
        WHEN T.STARTTIME LIKE '%-%'
        AND T.STARTTIME LIKE '%%:%%'
        THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd')
        WHEN T.STARTTIME LIKE '%%.%%'
        THEN FROM_UNIXTIME(CAST(ROUND(T.STARTTIME) AS INT),'yyyyMMdd')
        WHEN LENGTH(T.STARTTIME) = 13
        THEN FROM_UNIXTIME(CAST(ROUND(SUBSTR(T.STARTTIME,1,10)) AS INT),'yyyyMMdd')
        ELSE SUBSTR(T.STARTTIME,1,8)
    END,
    T.PHONE_ID,
    T1.LABEL_CODE;

ALTER TABLE
    CDPI_USER_BEH_PREFER_D DROP IF EXISTS PARTITION(DATA_DAY='${day}');
ALTER TABLE
    CDPI_USER_BEH_PREFER_D ADD PARTITION (DATA_DAY='${day}') LOCATION '${day}';
INSERT
    OVERWRITE TABLE CDPI_USER_BEH_PREFER_D PARTITION
    (
        DATA_DAY='${day}'
    )
SELECT
    *
FROM
    (
        SELECT
            VISIT_DAY,
            PHONE_ID,
            CONCAT(SUBSTR(LABEL_CODE,1,10),'000') AS LABEL_CODE,
            SUM(times),
            SUM(flows),
            count(distinct days)
        FROM
            CDPI_USER_BEH_PREFER_D_tmp
        where
        SUBSTR(LABEL_CODE,8,3) <> '000'
        GROUP BY
            VISIT_DAY,
            PHONE_ID,
            CONCAT(SUBSTR(LABEL_CODE,1,10),'000')
        UNION ALL
        SELECT
            VISIT_DAY,
            PHONE_ID,
            CONCAT(SUBSTR(LABEL_CODE,1,7),'000000') AS LABEL_CODE,
            SUM(times),
            SUM(flows),
            count(distinct days)
        FROM
            CDPI_USER_BEH_PREFER_D_tmp
        where SUBSTR(LABEL_CODE,5,3) <> '000'
        GROUP BY
            VISIT_DAY,
            PHONE_ID,
            CONCAT(SUBSTR(LABEL_CODE,1,7),'000000') )A;
 "[[email protected] ProgramByDay]$ 

 [[email protected] ProgramByDay]$ cat upload.sh
#!/bin/bash
# upload.sh

######################################################################################################
# function : 抽取文件、压缩、上传至省份前置机
# date: 2014/02/10
# author: YyDou
# param:day(数据日期)
######################################################################################################

##

cd `dirname $0`

eval $(grep AREA_NO ../common.cfg)
eval $(grep REUPLOAD_COUNT ../common.cfg)
eval $(grep UNMATCHTOP1000 ../common.cfg)
eval $(grep URLAPPREPORT ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)

#判断参数是否输入
if [ $# -ne 1 ] ; then
        echo "Input parameter error : there should be 1 parameters";
        exit 1;
fi;

day=$1
#day=`date -d "yesterday" +%Y%m%d`
#day=$1
curr_date=`date +%Y%m%d`
curr_time=`date +%H`

if [ ${curr_time} -gt '06' ]; then
curr_date=`date -d +1days +%Y%m%d`
fi

if [ ! -d "$UNMATCHTOP1000/${curr_date}" ] ; then
        mkdir -p "$UNMATCHTOP1000/${curr_date}"
fi;

if [ ! -d "$URLAPPREPORT/${curr_date}" ] ; then
        mkdir -p "$URLAPPREPORT/${curr_date}"
fi;

cd $UNMATCHTOP1000/${curr_date}
#rm -f *.*

hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',url,count,download_bytes,upload_bytes,day_date,host  from dpi_http_mark_top1000 where receive_day=${day}" >> UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT 

hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',host,count,download_bytes,upload_bytes,day_date from dpi_site_mark_top1000 where receive_day=${day}" >> UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT 

##压缩
tar -czf UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT

cd $URLAPPREPORT/${curr_date}
#rm -f *.*
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',totalcount,noisecount,unmatchcount,matchcount,urlcount,appcount,day_date,sitecount from dpi_http_mark_summary where receive_day=${day}" >> URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',visit_type,ruleid,typecode,matchs,day_date  from dpi_http_mark_rule where receive_day=${day}" >> RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',visit_type,type_code,matchs,urlmatchs,day_date  from dpi_http_mark_type where receive_day=${day}" >> TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT 

(执行hiveSQL 定向到 文件)
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',site_id,site_code,matchs,day_date  from dpi_site_mark_rule where receive_day=${day}" >> RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',site_code,matchs,hostmatchs,day_date  from dpi_site_mark_type where receive_day=${day}" >> TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT 

##压缩(之后有删除)
tar -czf URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT

tar -czf RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT[[email protected] ProgramByDay]$
[[email protected] ProgramByDay]$
[[email protected] ProgramByDay]$
[[email protected] ProgramByDay]$ 

[[email protected] ProgramByDay]$ cat logupload.sh
#!/bin/bash
#logupload.sh
#*=================================================
#*
#* FileName : logupload.sh
#* CreateDate: 2014-06-25
#* Abstract :
#* Author : LiBin
#*
#* BONC All rights reserved.
#*==================================================

cd `dirname $0`
PWDNOW=`pwd`

eval $(grep URLAPPREPORT ../common.cfg)
eval $(grep AREA_NO ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)

if [ $# -ne 1 ] ; then
echo "Input parameter error, there should be 1 parameters ";
exit 1;
fi;

LOGDAY=$1
curr_time=`date +%H`
DAY=`date +%Y%m%d`
if [ ${curr_time} -gt '06' ]; then
DAY=`date -d +1days +%Y%m%d`
fi

if [ ! -d "${URLAPPREPORT}/${DAY}" ] ; then
	mkdir -p "${URLAPPREPORT}/${DAY}"
fi;

cd ${LOC_DIR}/logs/${LOGDAY}/
tar -zcf LogInfo.${LOGDAY}.${AREA_NO}.tar.gz *.log
mv -f LogInfo.${LOGDAY}.${AREA_NO}.tar.gz ${URLAPPREPORT}/${DAY}/

#前置机程序机分开省份使用
#cd ${PWDNOW}
#sh ./ftp_putalldata.sh ${URLAPPREPORT}/${DAY}/LogInfo.${LOGDAY}.${AREA_NO}.tar.gz

cd ${PWDNOW}

[[email protected] ProgramByDay]$ 

[[email protected] ProgramByDay]$ cat ../ProgramByDay/ftp_getfilelist.sh
cat: ../ProgramByDay/ftp_getfilelist.sh: No such file or directory
[[email protected] ProgramByDay]$ cat ftp_getfilelist.sh
cat: ftp_getfilelist.sh: No such file or directory
[[email protected] ProgramByDay]$ cat ../ProgramByHour/match_H.sh
#!/bin/bash
# match_H.sh

######################################################################################################
# function : 匹配程序运行、统计报表出数据
# date: 2014/02/10
# author: YyDou
# param:day(数据日期)
######################################################################################################

##
cd `dirname $0`
PWDRE=`pwd`
cd ..
PWDNOW=`pwd`
cd $PWDRE

eval $(grep RULE_PROV_VERNO ${PWDNOW}/match.cfg)
eval $(grep URL_MATCH ${PWDNOW}/common.cfg)
eval $(grep URL_INPUT_PATH ${PWDNOW}/common.cfg)
eval $(grep DPI_CONF_PATH ${PWDNOW}/common.cfg)
eval $(grep R_URL_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_APP_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_NOISE_TYPE ${PWDNOW}/common.cfg)

eval $(grep HIVE_USER ${PWDNOW}/common.cfg)
eval $(grep LOC_DIR ${PWDNOW}/common.cfg)
eval $(grep HIVE_LICENSE ${PWDNOW}/common.cfg)
eval $(grep MR_VERSION ${PWDNOW}/common.cfg)
eval $(grep PARA_JAR ${PWDNOW}/common.cfg)
eval $(grep R_SITE_TYPE ${PWDNOW}/common.cfg)

#判断参数是否输入
if [ $# -ne 2 ] ; then
        echo "Input parameter error : there should be 2 parameters";
        exit 1;
fi;

day=$1
hour=$2

hadoop fs -ls ${URL_INPUT_PATH}${day}/${hour} > exist_test ;

x=`wc -l exist_test | cut -d ' ' -f 1`;

if  [ ${x} = 0 ] ; then
echo " HDFS DIR ERROR : ${URL_INPUT_PATH}${day}/${hour} file is not exist !"
rm -f exist_test
exit 1;
fi;
rm -f exist_test

hadoop fs -rm ${R_URL_TYPE}R_URL_TYPE*.txt
hadoop fs -rm ${R_APP_TYPE}R_APP_TYPE*.txt
hadoop fs -rm ${R_NOISE_TYPE}R_NOISE_TYPE*.txt
hadoop fs -rm ${R_SITE_TYPE}R_SITE_TYPE*.txt

hadoop fs -put ${PWDNOW}/R_URL_TYPE*.txt ${R_URL_TYPE}
hadoop fs -put ${PWDNOW}/R_APP_TYPE*.txt ${R_APP_TYPE}
hadoop fs -put ${PWDNOW}/R_NOISE_TYPE*.txt ${R_NOISE_TYPE}
hadoop fs -put ${PWDNOW}/R_SITE_TYPE*.txt ${R_SITE_TYPE}

echo "${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar";
hadoop jar ${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar com.bonc.mapred.UserurlAllMain ${PARA_JAR} ${URL_INPUT_PATH}${day}/${hour} ${URL_MATCH}${day}/${hour} $PWDNOW/${DPI_CONF_PATH}

#hadoop fs -rm ${url_match}${day}/part-m-*.gz

hive -e "

add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
set dpi.encode.license=${HIVE_LICENSE};

alter table dpi_http_dtl_mark_match drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_dtl_mark_noise drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_dtl_mark_unmatch drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');

alter table dpi_http_dtl_mark_match add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}/match';
alter table dpi_http_dtl_mark_noise add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}/noise';
alter table dpi_http_dtl_mark_unmatch add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}/unmatch';

"

cd $PWDNOW
[[email protected] ProgramByDay]$ 

[[email protected] ProgramByDay]$ cat report_summary_H.sh
cat: report_summary_H.sh: No such file or directory
[[email protected] ProgramByDay]$ cat ../ProgramByHour/report_summary_H.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`

eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)

#判断参数是否输入
if [ $# -ne 1 ] ; then
        echo "Input parameter error : there should be 1 parameters";
        exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};

set mapred.job.name=CMSS-COUNT;
alter table dpi_http_dtl_mark_match_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_dtl_mark_match_summary add partition (receive_day='${day}',receive_hour='${hour}') location  '${day}/${hour}';
alter table dpi_http_dtl_mark_unmatch_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_dtl_mark_unmatch_summary add partition (receive_day='${day}',receive_hour='${hour}') location  '${day}/${hour}';

insert overwrite table dpi_http_dtl_mark_match_summary partition(receive_day=${day},receive_hour=${hour})
select phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host,
count(id) as count,sum(download_bytes) ,sum(upload_bytes)
from dpi_http_dtl_mark_match
where receive_day=${day} and receive_hour=${hour}
group by phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host;

insert overwrite table dpi_http_dtl_mark_unmatch_summary partition(receive_day=${day},receive_hour=${hour})
select host,url,count(id),sum(download_bytes),sum(upload_bytes)
from dpi_http_dtl_mark_unmatch
where receive_day=${day} and receive_hour=${hour}
group by host,url;

"
[[email protected] ProgramByDay]$ 

[[email protected] ProgramByDay]$ cat ../ProgramByHour/report_H.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`

eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)

#?¶??貊剫
if [ $# -ne 1 ] ; then
        echo "Input parameter error : there should be 1 parameters";
        exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};

alter table dpi_http_mark_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_site_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_site_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');

alter table dpi_http_mark_summary add partition (receive_day=${day},receive_hour='${hour}') location  '${day}/${hour}';
alter table dpi_http_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location  '${day}/${hour}';
alter table dpi_http_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location  '${day}/${hour}';
alter table dpi_site_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location  '${day}/${hour}';
alter table dpi_site_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location  '${day}/${hour}';

insert overwrite table dpi_http_mark_rule partition(receive_day=${day},receive_hour=${hour})
select visit_type,rule_id,typecode,matchs,'${day}' from
(
select visit_type,rule_id,type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by visit_type,rule_id,type_code
union all
select visit_type,rule_id,app_type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by visit_type,rule_id,app_type_code)sub ;

insert overwrite table dpi_site_mark_rule partition(receive_day=${day},receive_hour=${hour})
select site_id,site_code,matchs,'${day}' from
(
select site_id,site_code,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and site_id <> '' group by site_id,site_code
) sub ;

insert overwrite table dpi_http_mark_top1000 partition(receive_day=${day},receive_hour=${hour})
select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_unmatch_summary
where receive_day =${day} and receive_hour=${hour} group by host,url order by count desc limit 5000;

insert overwrite table dpi_site_mark_top1000 partition(receive_day=${day},receive_hour=${hour})
select host,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_match_summary
where receive_day =${day} and site_id = '' group by host
order by count desc limit 1000;

insert overwrite table temp_dpi_match partition(receive_day=${day},receive_hour=${hour})
select count(t.id) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} group by id ) t ;

insert overwrite table temp_dpi_url partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,count(url.id) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by id) url ;

insert overwrite table temp_dpi_app partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,count(app.id) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by id) app ;

insert overwrite table temp_dpi_noise partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,count(id) as noisecount ,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_noise where receive_day=${day} and receive_hour=${hour} ;

insert overwrite table temp_dpi_unmatch partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,sum(count) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_unmatch_summary where receive_day=${day} and receive_hour=${hour} ;

insert overwrite table temp_dpi_site partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,sum(matchs) as sitecount from dpi_site_mark_type where receive_day=${day} and receive_hour=${hour} ;

insert overwrite table dpi_http_mark_summary partition(receive_day='${day}',receive_hour=${hour})
select sum(matchcount+noisecount+unmatchcount) as totalcount,sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount) from
(
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_match where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_url where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_app where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_noise where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_unmatch where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_site where receive_day=${day} and receive_hour=${hour}
) sub ;

"[[email protected] ProgramByDay]$ 

[[email protected] ProgramByDay]$ cat ../ProgramByHour/upload_H.sh
#!/bin/bash
# upload_H.sh
#*=================================================
#*
#* FileName : upload_H.sh
#* CreateDate: 2014-04-03
#* Abstract : State statistics package
#* Author : LiBin
#*
#* BONC All rights reserved.
#*==================================================
##

cd `dirname $0`

eval $(grep AREA_NO ../common.cfg)
eval $(grep REUPLOAD_COUNT ../common.cfg)
eval $(grep UNMATCHTOP1000 ../common.cfg)
eval $(grep URLAPPREPORT ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)

#判断参数是否输入
if [ $# -ne 1 ] ; then
        echo "Input parameter error : there should be 1 parameters";
        exit 1;
fi;

day=$1
#day=`date -d "yesterday" +%Y%m%d`
#day=$1
curr_date=`date +%Y%m%d`
curr_time=`date +%H`

if [ ${curr_time} -gt '06' ]; then
curr_date=`date -d +1days +%Y%m%d`
fi

if [ ! -d "$UNMATCHTOP1000/${curr_date}" ] ; then
        mkdir -p "$UNMATCHTOP1000/${curr_date}"
fi;

if [ ! -d "$URLAPPREPORT/${curr_date}" ] ; then
        mkdir -p "$URLAPPREPORT/${curr_date}"
fi;

cd $UNMATCHTOP1000/${curr_date}
#rm -f *.*
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',url,sum(count) count,sum(download_bytes),sum(upload_bytes),'${day}',host
from dpi_http_mark_top1000
where receive_day=${day} group by url,host,day_date order by count desc limit 5000; " >> UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT 

hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',host,sum(count),sum(download_bytes),sum(upload_bytes),'${day}' from dpi_site_mark_top1000 where receive_day=${day} group by host,day_date order by count desc limit 1000;" >> UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT 

##压缩
tar -czf UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT

cd $URLAPPREPORT/${curr_date}
#rm -f *.*
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',sum(totalcount),sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount)
from dpi_http_mark_summary
where receive_day=${day};" >> URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',visit_type,ruleid,typecode,sum(matchs),'${day}'
from dpi_http_mark_rule
where receive_day=${day} group by visit_type,ruleid,typecode;" >> RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',visit_type,typecode,count(distinct ruleid) as matchs,sum(matchs),'${day}'
from dpi_http_mark_rule
where receive_day=${day} group by visit_type,typecode;" >> TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
alter table dpi_http_mark_type drop IF EXISTS partition(receive_day='${day}',receive_hour='00');
alter table dpi_http_mark_type add partition (receive_day=${day},receive_hour='00') location  '${day}/00';
insert overwrite table dpi_http_mark_type partition(receive_day=${day},receive_hour=00)
select visit_type,typecode,count(distinct ruleid),sum(matchs),'${day}'
from dpi_http_mark_rule
where receive_day=${day} group by visit_type,typecode;"

hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',site_id,site_code,sum(matchs),'${day}'
from dpi_site_mark_rule
where receive_day=${day} group by site_id,site_code" >> RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',site_code,count(distinct site_id),sum(matchs),'${day}'
from dpi_site_mark_rule
where receive_day=${day} group by site_code" >> TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
alter table dpi_site_mark_type drop IF EXISTS partition(receive_day='${day}',receive_hour='00');
alter table dpi_site_mark_type add partition (receive_day=${day},receive_hour='00') location  '${day}/00';
insert overwrite table dpi_site_mark_type partition(receive_day=${day},receive_hour=00)
select site_code,count(distinct site_id),sum(matchs),'${day}'
from dpi_site_mark_rule
where receive_day=${day} group by site_code;"

##压缩
tar -czf URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT

tar -czf RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT

[[email protected] ProgramByDay]$ 

[[email protected] ProgramByDay]$ cat ../ProgramByHour/report_H.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`

eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)

#?¶??貊剫
if [ $# -ne 1 ] ; then
        echo "Input parameter error : there should be 1 parameters";
        exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};

alter table dpi_http_mark_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_site_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_site_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');

alter table dpi_http_mark_summary add partition (receive_day=${day},receive_hour='${hour}') location  '${day}/${hour}';
alter table dpi_http_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location  '${day}/${hour}';
alter table dpi_http_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location  '${day}/${hour}';
alter table dpi_site_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location  '${day}/${hour}';
alter table dpi_site_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location  '${day}/${hour}';

insert overwrite table dpi_http_mark_rule partition(receive_day=${day},receive_hour=${hour})
select visit_type,rule_id,typecode,matchs,'${day}' from
(
select visit_type,rule_id,type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by visit_type,rule_id,type_code
union all
select visit_type,rule_id,app_type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by visit_type,rule_id,app_type_code)sub ;

insert overwrite table dpi_site_mark_rule partition(receive_day=${day},receive_hour=${hour})
select site_id,site_code,matchs,'${day}' from
(
select site_id,site_code,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and site_id <> '' group by site_id,site_code
) sub ;

insert overwrite table dpi_http_mark_top1000 partition(receive_day=${day},receive_hour=${hour})
select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_unmatch_summary
where receive_day =${day} and receive_hour=${hour} group by host,url order by count desc limit 5000;

insert overwrite table dpi_site_mark_top1000 partition(receive_day=${day},receive_hour=${hour})
select host,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_match_summary
where receive_day =${day} and site_id = '' group by host
order by count desc limit 1000;

insert overwrite table temp_dpi_match partition(receive_day=${day},receive_hour=${hour})
select count(t.id) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} group by id ) t ;

insert overwrite table temp_dpi_url partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,count(url.id) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by id) url ;

insert overwrite table temp_dpi_app partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,count(app.id) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by id) app ;

insert overwrite table temp_dpi_noise partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,count(id) as noisecount ,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_noise where receive_day=${day} and receive_hour=${hour} ;

insert overwrite table temp_dpi_unmatch partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,sum(count) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_unmatch_summary where receive_day=${day} and receive_hour=${hour} ;

insert overwrite table temp_dpi_site partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,sum(matchs) as sitecount from dpi_site_mark_type where receive_day=${day} and receive_hour=${hour} ;

insert overwrite table dpi_http_mark_summary partition(receive_day='${day}',receive_hour=${hour})
select sum(matchcount+noisecount+unmatchcount) as totalcount,sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount) from
(
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_match where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_url where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_app where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_noise where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_unmatch where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from  temp_dpi_site where receive_day=${day} and receive_hour=${hour}
) sub ;

"[[email protected] ProgramByDay]$ 

[[email protected] ProgramByDay]$ cd ..
[[email protected] URLAppProgram_sf]$ cat remove_files.sh
#!/bin/bash

#*=================================================
#*
#* FileName  : remove_files.sh
#* CreateDate: 2014-02-25
#* Abstract  : Delete 'UACDS_YYYYMMDD_**.tar.gz' files
#*             on a regular , most retain ten files.
#* Author    : LiangWei
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
eval $(grep DATA_HOME public.cfg)
eval $(grep ZIP_LIMIT public.cfg)
eval $(grep REPORT_LIMIT public.cfg)
eval $(grep UNMATCHTOP1000 public.cfg)
eval $(grep URLAPPREPORT public.cfg)
eval $(grep SUMMARY_DAY public.cfg)
eval $(grep DELETE_DAY public.cfg)
eval $(grep URL_MATCH common.cfg)
eval $(grep TEMP_DPI_MATCH common.cfg)
eval $(grep TEMP_DPI_NOISE common.cfg)
eval $(grep TEMP_DPI_UNMATCH common.cfg)
eval $(grep TEMP_DPI_URL common.cfg)
eval $(grep TEMP_DPI_APP common.cfg)
eval $(grep TEMP_DPI_SITE common.cfg)
eval $(grep MATCH_SUMMARY common.cfg)
eval $(grep UNMATCH_SUMMARY common.cfg)

upl=$((ZIP_LIMIT+1))
cd $DATA_HOME

a=` ls  UA* | wc -l`

ls  UA* | sort -r > list

if [ $a -gt $ZIP_LIMIT ]; then

sed -n ${upl},${a}p list > dellist

c=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=c;m++))
do
 grepstr='sed -n '$m'p dellist'
 greps=`$grepstr`
 rm $greps
 echo 'delete file:' $greps

done
rm dellist
else
echo ' Deleting data did not reach the upper limit!'
fi
rm list

#*=================================================
#*
#*
#* CreateDate: 2014-02-25
#* Abstract  : Delete UnMatchTop1000 Folder on a regular.
#* Author    : LiangWei
#*
#* BONC All rights reserved.
#*==================================================

upl=$((REPORT_LIMIT+1))

ls -lt $UNMATCHTOP1000 |awk '/^d/ {print $9}'| sort -r > list
q=`wc -l list | cut -d ' ' -f 1`
if [ $q -gt $REPORT_LIMIT ]; then

sed -n ${upl},${q}p list > dellist

x=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=x;m++))
do
 grepstr='sed -n '$m'p dellist'
 greps=`$grepstr`
 rm -rf ${UNMATCHTOP1000}/${greps}
 echo 'delete file:' $greps

done
rm dellist
else
echo ' UnMatchTop1000 数据没有达到删除上限!'
fi
rm list

#*=================================================
#*
#*
#* CreateDate: 2014-02-25
#* Abstract  : Delete URLAppReport Folder on a regular.
#* Author    : LiangWei
#*
#* BONC All rights reserved.
#*==================================================

upl=$((REPORT_LIMIT+1))

ls -lt $URLAPPREPORT |awk '/^d/ {print $9}'| sort -r > list
w=`wc -l list | cut -d ' ' -f 1`
if [ $w -gt $REPORT_LIMIT ]; then

sed -n ${upl},${w}p list > dellist

v=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=v;m++))
do
 grepstr='sed -n '$m'p dellist'
 greps=`$grepstr`
 rm -rf ${URLAPPREPORT}/${greps}
 echo 'delete file:' $greps

done
rm dellist
else
echo ' URLAppReport 数据没有达到删除上限!'
fi
rm list

#删除hdfs中match文件
upl=$((DELETE_DAY+1))

hadoop fs -ls $URL_MATCH |awk '/^d/ {print $8}'| sort -r > list
w=`wc -l list | cut -d ' ' -f 1`
if [ $w -gt $DELETE_DAY ]; then

sed -n ${upl},${w}p list > dellist

v=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=v;m++))
do
 grepstr='sed -n '$m'p dellist'
 greps=`$grepstr`
 hadoop fs -rmr ${URL_MATCH}/${greps}
 echo 'delete file:' ${URL_MATCH}/${greps}
done
rm dellist
else
echo ' URL_MATCH 数据没有达到删除上限!'
fi
rm list

#删除hdfs中间表数据
DAY=`date -d -"$DELETE_DAY"day +%Y%m%d`
hadoop fs -rmr ${TEMP_DPI_MATCH}/receive_day=${DAY}
hadoop fs -rmr ${TEMP_DPI_NOISE}/receive_day=${DAY}
hadoop fs -rmr ${TEMP_DPI_UNMATCH}/receive_day=${DAY}
hadoop fs -rmr ${TEMP_DPI_URL}/receive_day=${DAY}
hadoop fs -rmr ${TEMP_DPI_APP}/receive_day=${DAY}
hadoop fs -rmr ${TEMP_DPI_SITE}/receive_day=${DAY}

#hadoop fs -rm -r ${url_match}/sitekey/${DAY}

#删除match汇总表数据
upl=$((SUMMARY_DAY+1))

hadoop fs -ls $MATCH_SUMMARY |awk '/^d/ {print $8}'| sort -r > list
w=`wc -l list | cut -d ' ' -f 1`
if [ $w -gt $SUMMARY_DAY ]; then

sed -n ${upl},${w}p list > dellist

v=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=v;m++))
do
 grepstr='sed -n '$m'p dellist'
 greps=`$grepstr`
 hadoop fs -rmr ${MATCH_SUMMARY}/${greps}
 echo 'delete file:' ${MATCH_SUMMARY}/${greps}
done
rm dellist
else
echo ' MATCH_SUMMARY 数据没有达到删除上限!'
fi
rm list

#删除unmatch汇总表数据

upl=$((SUMMARY_DAY+1))

hadoop fs -ls $UNMATCH_SUMMARY |awk '/^d/ {print $8}'| sort -r > list
w=`wc -l list | cut -d ' ' -f 1`
if [ $w -gt $SUMMARY_DAY ]; then

sed -n ${upl},${w}p list > dellist

v=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=v;m++))
do
 grepstr='sed -n '$m'p dellist'
 greps=`$grepstr`
 hadoop fs -rmr ${UNMATCH_SUMMARY}/${greps}
 echo 'delete file:' ${UNMATCH_SUMMARY}/${greps}
done
rm dellist
else
echo ' UNMATCH_SUMMARY 数据没有达到删除上限!'
fi
rm list
[[email protected] URLAppProgram_sf]$ 

[[email protected] URLAppProgram_sf]$ cat /home/hzadmin/urlAPP/ResultMatch/match_detail.sh
#!/bin/bash
#match_details.sh
#*=================================================
#*
#* FileName : match_details.sh
#* CreateDate: 2015-04-22
#* Abstract : CMSS Interface
#* Author : SPP
#* 主程序
#* BONC All rights reserved.
#*==================================================

cd `dirname $0`
PWDNOW=`pwd`

eval $(grep MATCH_DETAILS ${PWDNOW}/details_conf.cfg)
eval $(grep MATCH_PRINT ${PWDNOW}/details_conf.cfg)
eval $(grep MATCH_INPUT ${PWDNOW}/details_conf.cfg)
eval $(grep FTP_DIR ${PWDNOW}/details_conf.cfg)
eval $(grep DELAY_DAY ${PWDNOW}/details_conf.cfg)
eval $(grep DETAILS ${PWDNOW}/details_conf.cfg)

unset details day steps 

args=`getopt r:d:s: $*`
if test $? != 0
     then
	echo " Usage is ./match_details.sh  [-r details -d day -s steps ] "
	echo " Use database administrator account for user name "
        exit 1
fi
set -- $args

for i
do
  case "$i" in
  			-r) shift;details=$1;shift;;
        -d) shift;day=$1;shift;;
        -s) shift;steps=$1;shift;;
  esac
done

#判断传入参数日期是否为空,如果为空则获取系统时间

if [ ! -n "$day" ] ; then
  echo "not input days so day=today"
	day=`date -d -${DELAY_DAY}days +%Y%m%d`
	create_day=`date +%Y%m%d`

else
	create_day=$day
fi

if [ ! -n "$details" ] ; then
	LIST=$DETAILS
 	DETAILS_LIST=`echo $LIST | sed 's/,/ /g'`

else
	DETAILS_LIST=$details
fi

echo "create_day:" $create_day
echo "day:" $day
#判断目录是否存在,如果不存在则重新创建
for DETAILS in $DETAILS_LIST
do
if [ ! -d "$MATCH_DETAILS/${create_day}/$DETAILS" ] ; then
	mkdir -p "$MATCH_DETAILS/${create_day}/$DETAILS"
fi

if [ ! -d "$MATCH_DETAILS/${create_day}/logs" ] ; then
	mkdir -p "$MATCH_DETAILS/${create_day}/logs"
fi

rm -f $MATCH_DETAILS/${create_day}/logs/run_"$DETAILS"_"$create_day".log

#判断步骤是否为空
if [ ! -n "$steps" ] ; then
		./merge_file.sh	$day $DETAILS> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1
		./get_file.sh $day $create_day $DETAILS>> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1

else
	if [ $steps -eq 2 ] ; then
		./get_file.sh $day $create_day $DETAILS>> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1
	else
		echo " please make your second var is 2.........">> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1
 fi
fi

done[[email protected] URLAppProgram_sf]$ 

[[email protected] URLAppProgram_sf]$ cat /home/hzadmin/urlAPP/ResultMatch/remove_details.sh
?#!/bin/bash
#remove_details.sh
#*=================================================
#*
#* FileName : remove_details.sh
#* CreateDate: 2014-10-22
#* Abstract : delete MATCH_DETAILS files
#* Author : WangNing
#*
#* BONC All rights reserved.
#*==================================================

cd `dirname $0`
PWDNOW=`pwd`

eval $(grep MATCH_DETAILS ${PWDNOW}/details_conf.cfg)
eval $(grep MATCH_PRINT ${PWDNOW}/details_conf.cfg)
eval $(grep REPORT_LIMIT ${PWDNOW}/details_conf.cfg)
eval $(grep HDFS_LIMIT ${PWDNOW}/details_conf.cfg)

upl=$((REPORT_LIMIT+1))

ls -lt $MATCH_DETAILS |awk '/^d/ {print $9}'| sort -r > list
q=`wc -l list | cut -d ' ' -f 1`

if [ $q -gt $REPORT_LIMIT ]; then

sed -n ${upl},${q}p list > dellist

x=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=x;m++))
do
 grepstr='sed -n '$m'p dellist'
 greps=`$grepstr`
 rm -rf ${MATCH_DETAILS}/${greps}
 echo 'delete file:' $greps

done
rm -f dellist
else
echo ' MATCH_DETAILS 数据没有达到删除上限!'
fi
rm -f list

#删除hdfs上的输出文件

up_l=$((HDFS_LIMIT+1))
hadoop fs -ls ${MATCH_PRINT} |awk '/^d/ {print $8}'| sort -r>files.txt
s=`cat files.txt |wc -l`

if [ $s -gt $HDFS_LIMIT ]; then
	sed -n ${up_l},${s}p files.txt | while read line
	do
		echo 'delete file:' $line
		hadoop fs -rm -r $line
	done
else
	echo "hdfs 数据没有达到删除上限!"
fi

rm -f files.txt
[[email protected] URLAppProgram_sf]$ 

[[email protected] URLAppProgram_sf]$ cat /home/hzadmin/urlAPP/hive.sh
#!/bin/bash
. $HOME/.bash_profile
#dateday=`date -d -1day +%Y%m%d`
dateday=$1
hive -e "
set mapreduce.job.queuename=thirdpart1;use dpi;
add jar /home/hzadmin/bj_ggsn/jar/Decode.jar;
create temporary function decode as 'Decode';
select PHONE_ID,VISIT_TYPE,TYPE_CODE,TYPE_NAME1,TYPE_NAME2,TYPE_NAME3,TYPE_NAME4,TYPE_NAME5,TYPE_NAME6,TYPE_LEVEL,APP_TYPE_CODE,APP_TYPE_NAME1,APP_TYPE_NAME2,APP_TYPE_NAME3,sum(DOWNLOAD_BYTES),sum(UPLOAD_BYTES),keyword,count(id),Decode(gen_flag,'','3G','4G','4G',gen_flag) from dpi_http_dtl_mark_match where receive_day='${dateday}' and phone_id is not null group by PHONE_ID,VISIT_TYPE,TYPE_CODE,TYPE_NAME1,TYPE_NAME2,TYPE_NAME3,TYPE_NAME4,TYPE_NAME5,TYPE_NAME6,TYPE_LEVEL,APP_TYPE_CODE,APP_TYPE_NAME1,APP_TYPE_NAME2,APP_TYPE_NAME3,keyword,gen_flag" > /dfs/ftp/hzadmin/test/${dateday}.txt

split -l 12000000 /dfs/ftp/hzadmin/test/${dateday}.txt /dfs/ftp/hzadmin/test/${dateday}
mv /dfs/ftp/hzadmin/test/${dateday}aa /dfs/ftp/hzadmin/bj_data/${dateday}001.txt
mv /dfs/ftp/hzadmin/test/${dateday}ab /dfs/ftp/hzadmin/bj_data/${dateday}002.txt
mv /dfs/ftp/hzadmin/test/${dateday}ac /dfs/ftp/hzadmin/bj_data/${dateday}003.txt
mv /dfs/ftp/hzadmin/test/${dateday}ad /dfs/ftp/hzadmin/bj_data/${dateday}004.txt
mv /dfs/ftp/hzadmin/test/${dateday}ae /dfs/ftp/hzadmin/bj_data/${dateday}005.txt
mv /dfs/ftp/hzadmin/test/${dateday}af /dfs/ftp/hzadmin/bj_data/${dateday}006.txt
mv /dfs/ftp/hzadmin/test/${dateday}ag /dfs/ftp/hzadmin/bj_data/${dateday}007.txt
mv /dfs/ftp/hzadmin/test/${dateday}ah /dfs/ftp/hzadmin/bj_data/${dateday}008.txt
mv /dfs/ftp/hzadmin/test/${dateday}ai /dfs/ftp/hzadmin/bj_data/${dateday}009.txt
[[email protected] URLAppProgram_sf]$ 

[[email protected] URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/start1.sh
#!/bin/sh
source ~/.bash_profile
datetime=$(date --date "1 days ago" +%Y%m%d)
cd /home/hzadmin/bj_ggsn/
sh /home/hzadmin/bj_ggsn/select1.sh $datetime  >> log/${datetime}_1.log 2>&1
sh /home/hzadmin/bj_ggsn/select2.sh $datetime  >> log/${datetime}_2.log 2>&1
hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh
sh /home/hzadmin/urlAPP/BoncRun.sh
sh /home/hzadmin/urlAPP/hive.sh $datetime
sh /home/hzadmin/bj_ggsn/delete.sh
[[email protected] URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/delete.sh
#!/bin/bash
source ~/.bash_profile
dataday=$(date --date "7 days ago" +%Y%m%d)
hadoop fs -rm -r /share/hzadmin/urlapp/spp/dpi_http_dtl_mark_match_summary/receive_day=$dataday
hadoop fs -rm -r /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/$dataday
hadoop fs -rm -r /share/hzadmin/external_table/DMP_SSA/DPI/$dataday
hive -e "use dpi;alter table dpi_http_dtl_mark_match_summary drop partition(receive_day='$dataday')"
[[email protected] URLAppProgram_sf]$ 

[[email protected] URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/start2.sh
#!/bin/sh
source ~/.bash_profile
#datetime=$(date --date "1 days ago" +%Y%m%d)
datetime=$1
cd /home/hzadmin/bj_ggsn/
sh /home/hzadmin/bj_ggsn/select1.sh $datetime
sh /home/hzadmin/bj_ggsn/select2.sh $datetime
hadoop fs -rm -r /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
#sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh
sh /home/hzadmin/urlAPP/BoncRun1.sh $datetime
sh /home/hzadmin/urlAPP/ResultMatch/match_detail.sh -d $datetime
sh /home/hzadmin/urlAPP/hive.sh $datetime
[[email protected]I2 URLAppProgram_sf]$ crontab -l
00 03 * * * sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log
00 13 * * * sh /dfs/ftp/hzadmin/trydemo/dailycheckdemo.sh >>/dfs/ftp/hzadmin/trydemo/log.txt
[[email protected] URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/start1.sh
#!/bin/sh
source ~/.bash_profile
datetime=$(date --date "1 days ago" +%Y%m%d)
cd /home/hzadmin/bj_ggsn/
sh /home/hzadmin/bj_ggsn/select1.sh $datetime  >> log/${datetime}_1.log 2>&1
sh /home/hzadmin/bj_ggsn/select2.sh $datetime  >> log/${datetime}_2.log 2>&1
hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh
sh /home/hzadmin/urlAPP/BoncRun.sh
sh /home/hzadmin/urlAPP/hive.sh $datetime
sh /home/hzadmin/bj_ggsn/delete.sh
[[email protected] URLAppProgram_sf]$
[[email protected] URLAppProgram_sf]$
[[email protected] URLAppProgram_sf]$
[[email protected] URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/select2.sh
hours=/home/hzadmin/bj_ggsn/hours.txt
datetime=$1
while read LINE
do
   hadoop fs -test -e /share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE}
   if [ $? -eq 0 ]; then
        hive -e "use dpi;alter table bj_ggsn_4g add partition (receive_day='${datetime}',hours='${LINE}') location '/share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE}'" >>log/${datetime}.log 2>>log/${datetime}.log
   else
        echo 'not exist'
   fi
done < $hours
hive -e"
use dpi;
set hive.auto.convert.join=false;
set mapreduce.job.queuename=thirdpart1;
from t_user m join bj_ggsn_4g t
 on(m.usernum = t.MDN and m.receive_day = '${datetime}' and t.receive_day = '${datetime}')
 insert into table bj_ggsn_mobile
  partition (receive_day = '${datetime}')
    select regexp_extract(t.MDN,'(1[0-9]{10})') MDN,
         t.LAC,
         t.CI,
         t.IMEI,
         t.BUSITYPE,
         t.CAPTURETIME,
         t.ENDTIME,
         t.DURATION,
         t.FLOWUP,
         t.FLOWDOWN,
         t.FLOWALL,
         t.RATTYPE,
         t.TERMIANL_IP,
         t.DESTIP,
         t.STATUSCODE,
         t.USERAGENT,
         t.APN,
         t.IMSI,
         t.SGSNIP,
         t.GGSNIP,
         t.CONTENTTYPE,
         t.SOURCEPORT,
         t.DESTPORT,
         t.LOGOCODE,
         t.URL,
         t.RESULT,
         t.HOST,
         '4G',
         t.YULIU2,
         t.YULIU3;
"
[[email protected] URLAppProgram_sf]$

时间: 2024-08-23 07:53:59

DPI流程交接.cmd-1.cp的相关文章

windows CMD大全

windows CMD大全 参考:https://www.cnblogs.com/accumulater/p/7110811.html 一.打开CMD界面流程: CMD命令:开始->运行->键入cmd或command(在命令行里可以看到系统版本.文件系统版本) 二 .CMD命令锦集 A:基本操作命令 1. gpedit.msc-----组策略 2. sndrec32-------录音机 3. Nslookup-------IP地址侦测器 ,是一个 监测网络中 DNS 服务器是否能正确实现域名解

优雅的实现Activiti动态调整流程(自由跳转、前进、后退、分裂、前加签、后加签等),含范例代码!

最近对Activiti做了一些深入的研究,对Activiti的流程机制有了些理解,对动态调整流程也有了一些实践方法. 现在好好总结一下,一来是对这段时间自己辛苦探索的一个记录,二来也是为后来者指指路~~~ 如下内容准备采用QA的方式写,很多问题都是当初自己极疑惑的问题,希望能为大家解惑! Q:可以动态调整流程吗? A:可以!可以动态更改流程指向,或者创建新的节点,等等... Q: 更改流程还需要注意什么? A: 必须要实现持久化!否则一旦应用重启,你的流程就犯糊涂了!譬如,你创建了一个新节点,但

Shell分支与循环

1.常用的环境变量 变量名称 说明 示例 PATH 查找所有外部命令的路径集合. 如果没有PATH环境变量,那么执行的所有外部命令都需要说明其文件路径 如果PATH环境变量中所包含及2个以上路径对应同样的同样名称的命令,在执行这个命令时,选选择那个先被找到的文件 echo $PATH PATH=$PATH:/usr/bin export PATH SHELL 当前默认的SHELL环境(Shell程序) ? 表示上一个命令执行成功或者失败 HOME 用户主目录 特殊的环境变量?:其保存上一个命令执

在 Docker 上运行一个 RESTful 风格的微服务

tags: Microservice Restful Docker Author: Andy Ai Weibo:NinetyH GitHub: https://github.com/aiyanbo/docker-restful-demo 实现构思 1. 使用 Maven 进行项目构建 2. 使用 Jersey 实现一个 RESTful 风格的微服务 3. 在 Docker 里面执行 mvn package 对项目打包 4. 在 Docker 容器里运行这个微服务 实现一个微服务 场景 & 需求

salt常用模块及API

saltstack提供了非常丰富的功能模块,涉及操作系统的基础功能,常用工具支持等,更多模块信息见官网模块介绍:https://docs.saltstack.com/en/latest/ref/modules/all/index.html 当然,也可以通过sys模块列出当前版本支持的模块.命令如下: #salt '*' sys.list_modules API的原理是通过调用master client模块,实例化一个LocalClient对象,再调用cmd()方法来实现的,以下是API实现tes

python的工作记录A

马上进入工作自动化: [[email protected] ~]# cat svn_bbs.py import os,sys,commands,subprocess import re,time svnUrl = "svn://xxx" svnExportCmdPre = "svn export svn://xxx/" sitePath = "/xxx" updateFolder = "/srv/salt/xxx/" salt

chroot限制普通用户登录特定目录

需求:普通用户登陆到服务器上只能执行ssh,ls,cat等有限的基础命令,另外要求把用户锁定在特定目录中,不能看到其他任何目录下文件 1.创建查看日志的用户useradd -m ttxsgoto -s /bin/bashpasswd ttxsgoto 2.初始化chroot环境mkdir /home/chrootmkdir /home/chroot/{bin,dev,lib,lib64,etc,home} CMD="/bin/bash /bin/ls /bin/cp /bin/mkdir /bi

saltstack python api 调用

一.Python安装saltstack pip install salt 二.接口调用实例 1.test.ping import salt.client client = salt.client.LocalClient() ret = client.cmd('*','test.ping') print ret 2.cmd模块 1)功能:实现远程命令的调用执行 2)实例: 获取所有主机的内存使用情况 salt '*' cmd.run "free -m" client.cmd('SN201

saltstack+svn+python发布

1 #!/usr/bin/python 2 #-*- coding: utf-8 -*- 3 ############################################### 4 # # 5 # creator: zenghui # 6 # datetime: 20/12/2014 # 7 # # 8 ############################################### 9 10 import commands,subprocess,os,sys,time