3、Hive安装配置
3.1安装MySQL
在datanode5上安装MySQL
# yum -y installmysql-server mysql
# mysql
mysql> grant all privileges on *.* [email protected]‘10.40.214.%‘ identified by "hive";
mysql> flush privileges;
3.2安装hive
# tar -zxf apache-hive-0.13.1-bin.tar.gz-C /var/data/; mv /var/data/apache-hive-0.13.1 /var/data/hive
# cd /var/data/hive
# vimbin/hive-config.sh ##在脚本开头添加下面的内容
exportJAVA_HOME=/usr/java/jdk1.7.0_71
exportHIVE_HOME=/var/data/hive
exportHADOOP_HOME=/var/data/Hadoop
# vim hive-site.xml ##配置hive
<configuration>
<property>
<name>hive.exec.scratchdir</name>
<value>/tmp/hive-${user.name}</value>
<description>Scratchspace for Hive jobs</description>
</property>
<property>
<name>hive.exec.local.scratchdir</name>
<value>/tmp/${user.name}</value>
<description>Localscratch space for Hive jobs</description>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://10.40.214.9:3306/hive?createDatabaseIfNotExist=true</value>
<description>JDBCconnect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
<description>Driverclass name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.PersistenceManagerFactoryClass</name>
<value>org.datanucleus.api.jdo.JDOPersistenceManagerFactory</value>
<description>classimplementing the jdo persistence</description>
</property>
<property>
<name>javax.jdo.option.DetachAllOnCommit</name>
<value>true</value>
<description>detachesall objects from session so that they can be used after transaction iscommitted</description>
</property>
<property>
<name>javax.jdo.option.NonTransactionalRead</name>
<value>true</value>
<description>readsoutside of transactions</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>usernameto use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hive</value>
<description>passwordto use against metastore database</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
<description>locationof default database for the warehouse</description>
</property>
</configuration>
# cpmysql-connector-java-5.1.18-bin.jar /var/data/hive/lib/ ##加入MySQL java驱动jar包
# vim /etc/profile ##配置hive环境变量
#hivepath
exportHIVE_HOME=/var/data/hive
exportPATH=$PATH:$HIVE_HOME/bin
# source /etc/profile
# hive ##启动hive
3.3hive的使用
##创建库
# hive -e "createdatabase maillog;"
##创建表
# hive -e"create table maillog.izhenxin(mail_time string, message_id string,mail_to string, mail_domain string, mail_relay string, mail_delay string,mail_delays string,mail_dsn string, mail_status string) ROW FORMAT DELIMITEDFIELDS TERMINATED BY ‘|‘ STORED AS TEXTFILE;"
##删除表
# hive -e "droptable maillog.izhenxin;"
##导入数据到hive表中
# hive -e "LOADDATA LOCAL INPATH ‘/opt/zhangdh/to_result.txt‘ OVERWRITE INTO TABLEmaillog.izhenxin;"
##简单的数据查询
# hive -e "usemaillog;select * from izhenxin_total limit 10;"
##用hive统计数据,会执行mapreduce过程
# hive -e"select mail_domain,sum(case when mail_status=‘sent‘ then 1 else 0 end )sent,sum(case when mail_status=‘bounced‘ then 1 else 0 end ) bounced,sum(casewhen mail_status=‘deferred‘ then 1 else 0 end ) deferred from maillog.izhenxingroup by mail_domain order by sent desc;"
##将hive表数据的查询结果,添加到一个新表中
# hive -e"create table maillog.izhenxin_total(mail_domain string, sent_number int,bounced_number int, deferred int) ROW FORMAT DELIMITED FIELDS TERMINATED BY‘\t‘ STORED AS TEXTFILE;"
# hive -e "usemaillog; insert into table izhenxin_total select mail_domain,sum(case whenmail_status=‘sent‘ then 1 else 0 end ) sent,sum(case when mail_status=‘bounced‘then 1 else 0 end ) bounced,sum(case when mail_status=‘deferred‘ then 1 else 0end ) deferred from maillog.izhenxin group by mail_domain order by sentdesc;"