用Mapreduce 方式生成HFile，导入HBase

详细代码信息

package com.tansun.di.hbase.put;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * @author 作者 E-mail:
 * @version 创建时间：2016年6月1日 下午5:16:14
 * 类说明
 */
public class CreateHfileByMapReduce {

    public static class MyBulkMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>{
        @Override
        protected void setup( Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>.Context context )
            throws IOException, InterruptedException {

            super.setup( context );
        }
        @Override
        protected void map( LongWritable key, Text value,
                            Context context )
            throws IOException, InterruptedException {
            String[] split = value.toString().split("\t"); // 根据实际情况修改
            if (split.length == 4){
                byte[] rowkey = split[0].getBytes();
                ImmutableBytesWritable imrowkey = new ImmutableBytesWritable( rowkey );
                context.write(imrowkey, new KeyValue(rowkey, Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(split[1])));
                context.write(imrowkey, new KeyValue(rowkey, Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes(split[2])));
                context.write(imrowkey, new KeyValue(rowkey, Bytes.toBytes("info"), Bytes.toBytes("phone"), Bytes.toBytes(split[3])));
            }
        }
    }

    @SuppressWarnings( "deprecation" )
    public static void main( String[] args ) {
       /* if (args.length != 4){
            System.err.println("Usage: CreateHfileByMapReduce <table_name><data_input_path><hfile_output_path> ");
            System.exit(2);
        }*/

     /*   String tableName = args[0];
        String inputPath  = args[1];
        String outputPath = args[2];
        */
        String tableName = "student";
        String inputPath  = "hdfs://ts.node1.com:8022/datas/test1";
        String outputPath = "hdfs://ts.node1.com:8022/datas/output";
        HTable hTable = null;
        Configuration conf = HBaseConfiguration.create();
        try {
           hTable  = new HTable(conf, tableName);
           Job job = Job.getInstance( conf, "CreateHfileByMapReduce");
           job.setJarByClass( CreateHfileByMapReduce.class );
           job.setMapperClass(MyBulkMapper.class);
           job.setInputFormatClass(org.apache.hadoop.mapreduce.lib.input.TextInputFormat.class);
           //
           HFileOutputFormat.configureIncrementalLoad(job, hTable);
           FileInputFormat.addInputPath( job, new Path(inputPath) );
           FileOutputFormat.setOutputPath( job, new Path(outputPath) );

           System.exit( job.waitForCompletion(true)? 0: 1 );

        }
        catch ( Exception e ) {

            e.printStackTrace();
        }
    }
}

　　异常信息：

Application application_1463973945893_0006 failed 2 times due to AM Container for appattempt_1463973945893_0006_000002 exited with exitCode: 1
For more detailed output, check application tracking page:http://ts.node1.com:8088/proxy/application_1463973945893_0006/Then, click on links to logs of each attempt.
Diagnostics: Exception from container-launch.
Container id: container_1463973945893_0006_02_000001
Exit code: 1
Exception message: /yarn/nm/usercache/hdfs/appcache/application_1463973945893_0006/container_1463973945893_0006_02_000001/launch_container.sh: line 11: %PWD%;$HADOOP_CONF_DIR;/usr/hdp/current/hadoop-client/*;/usr/hdp/current/hadoop-client/lib/*;/usr/hdp/current/hadoop-hdfs-client/*;/usr/hdp/current/hadoop-hdfs-client/lib/*;/usr/hdp/current/hadoop-yarn-client/*;/usr/hdp/current/hadoop-yarn-client/lib/*;$PWD/mr-framework/hadoop/share/hadoop/mapreduce/*:$PWD/mr-framework/hadoop/share/hadoop/mapreduce/lib/*:$PWD/mr-framework/hadoop/share/hadoop/common/*:$PWD/mr-framework/hadoop/share/hadoop/common/lib/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/lib/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/lib/*:$PWD/mr-framework/hadoop/share/hadoop/tools/lib/*:/usr/hdp/${hdp.version}/hadoop/lib/hadoop-lzo-0.6.0.${hdp.version}.jar:/etc/hadoop/conf/secure;job.jar/job.jar;job.jar/classes/;job.jar/lib/*;%PWD%/*: bad substitution
/yarn/nm/usercache/hdfs/appcache/application_1463973945893_0006/container_1463973945893_0006_02_000001/launch_container.sh: line 125: %JAVA_HOME%/bin/java -Dlog4j.configuration=container-log4j.properties -Dyarn.app.container.log.dir=/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001 -Dyarn.app.container.log.filesize=0 -Dhadoop.root.logger=INFO,CLA -Dhdp.version=${hdp.version} -Xmx204m -Dhdp.version=${hdp.version} org.apache.hadoop.mapreduce.v2.app.MRAppMaster 1>/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001/stdout 2>/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001/stderr : bad substitution
Stack trace: ExitCodeException exitCode=1: /yarn/nm/usercache/hdfs/appcache/application_1463973945893_0006/container_1463973945893_0006_02_000001/launch_container.sh: line 11: %PWD%;$HADOOP_CONF_DIR;/usr/hdp/current/hadoop-client/*;/usr/hdp/current/hadoop-client/lib/*;/usr/hdp/current/hadoop-hdfs-client/*;/usr/hdp/current/hadoop-hdfs-client/lib/*;/usr/hdp/current/hadoop-yarn-client/*;/usr/hdp/current/hadoop-yarn-client/lib/*;$PWD/mr-framework/hadoop/share/hadoop/mapreduce/*:$PWD/mr-framework/hadoop/share/hadoop/mapreduce/lib/*:$PWD/mr-framework/hadoop/share/hadoop/common/*:$PWD/mr-framework/hadoop/share/hadoop/common/lib/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/lib/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/lib/*:$PWD/mr-framework/hadoop/share/hadoop/tools/lib/*:/usr/hdp/${hdp.version}/hadoop/lib/hadoop-lzo-0.6.0.${hdp.version}.jar:/etc/hadoop/conf/secure;job.jar/job.jar;job.jar/classes/;job.jar/lib/*;%PWD%/*: bad substitution
/yarn/nm/usercache/hdfs/appcache/application_1463973945893_0006/container_1463973945893_0006_02_000001/launch_container.sh: line 125: %JAVA_HOME%/bin/java -Dlog4j.configuration=container-log4j.properties -Dyarn.app.container.log.dir=/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001 -Dyarn.app.container.log.filesize=0 -Dhadoop.root.logger=INFO,CLA -Dhdp.version=${hdp.version} -Xmx204m -Dhdp.version=${hdp.version} org.apache.hadoop.mapreduce.v2.app.MRAppMaster 1>/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001/stdout 2>/var/log/hadoop-yarn/container/application_1463973945893_0006/container_1463973945893_0006_02_000001/stderr : bad substitution
at org.apache.hadoop.util.Shell.runCommand(Shell.java:543)
at org.apache.hadoop.util.Shell.run(Shell.java:460)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:720)
at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:211)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Container exited with a non-zero exit code 1
Failing this attempt. Failing the application.

时间： 2024-12-26 11:32:33

用Mapreduce 方式生成HFile，导入HBase的相关文章

MapReduce生成HFile文件,再使用BulkLoad导入HBase中(完全分布式运行)

声明: 若要转载, 请标明出处. 前提: 在对于大量的数据导入到HBase中, 如果一条一条进行插入, 则太耗时了, 所以可以先采用MapReduce生成HFile文件, 然后使用BulkLoad导入HBase中. 引用: 一.这种方式有很多的优点: 1. 如果我们一次性入库hbase巨量数据,处理速度慢不说,还特别占用Region资源, 一个比较高效便捷的方法就是使用 "Bulk Loading"方法,即HBase提供的HFileOutputFormat类. 2. 它是利用hbase

非mapreduce生成Hfile，然后导入hbase当中

最近一个群友的boss让研究hbase,让hbase的入库速度达到5w+/s,这可愁死了,4台个人电脑组成的集群,多线程入库调了好久,速度也才1w左右,都没有达到理想的那种速度,然后就想到了这种方式,但是网上多是用mapreduce来实现入库,而现在的需求是实时入库,不生成文件了,所以就只能自己用代码实现了,但是网上查了很多资料都没有查到,最后在一个网友的指引下,看了源码,最后找到了生成Hfile的方式,实现了之后,发现单线程入库速度才达到1w4左右,和之前的多线程的全速差不多了,百思不得其解之

数据导入HBase最常用的三种方式及实践分析

数据导入HBase最常用的三种方式及实践分析摘要:要使用Hadoop,需要将现有的各种类型的数据库或数据文件中的数据导入HBase.一般而言,有三种常见方式:使用HBase的API中的Put方法,使用HBase 的bulk load工具和使用定制的MapReduce Job方式.本文均有详细描述. [编者按]要使用Hadoop,数据合并至关重要,HBase应用甚广.一般而言,需要针对不同情景模式将现有的各种类型的数据库或数据文件中的数据转入至HBase 中.常见方式为:使用H

使用MapReduce将HDFS数据导入到HBase（二）

package com.bank.service; import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.hbase.mapred

使用MapReduce将HDFS数据导入到HBase（一）

package com.bank.service; import java.io.IOException; import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hba

利用BulkLoad导入Hbase表

1.插入HBase表传统方法具有什么问题? 我们先看下 HBase 的写流程: 通常 MapReduce 在写HBase时使用的是TableOutputFormat方式,在map/reduce中直接生成put对象写入HBase,该方式在大量数据写入时效率低下,因为HBase会block写入,频繁进行flush.split.compact等大量IO操作,这样对HBase节点的稳定性也会造成一定的影响,例如GC时间过长,响应变慢,导致节点超时退出,并引起一系列连锁反应,而HBase支持BulkLoa

Hive 数据导入HBase的2中方法详解

最近经常被问到这个问题,所以简单写一下总结. Hive数据导入到HBase基本有2个方案: 1.HBase中建表,然后Hive中建一个外部表,这样当Hive中写入数据后,HBase中也会同时更新 2.MapReduce读取Hive数据,然后写入(API或者Bulkload)到HBase 1.Hive 外部表创建hbase表 (1) 建立一个表格classes具有1个列族user create 'classes','user' (2) 查看表的构造 hbase(main):005:0> desc

数据批量导入HBase

测试数据: datas 1001 lilei 17 13800001111 1002 lily 16 13800001112 1003 lucy 16 13800001113 1004 meimei 16 13800001114 数据批量导入使用mr,先生成HFile文件然后在用completebulkload工具导入. 1.需要先在hbase 创建表名: hbase> create 'student', {NAME => 'info'} maven pom.xml配置文件如下: <de

【甘道夫】通过bulk load将HDFS上的数据导入HBase

引言通过bulkload将HDFS上的数据装载进HBase是常用的入门级HBase技能,下面简单记录下关键步骤. bulkload的详细情况请参见官网文档. 过程第一步:每台机器执行 ln -s $HBASE_HOME/conf/hbase-site.xml $HADOOP_HOME/etc/hadoop/hbase-site.xml 第二步:编辑$HADOOP_HOME/etc/hadoop/hadoop-env.sh,拷贝到所有节点末尾添加: export HADOOP_CLASSPA