一、需求
服务器的apache日志文件可能比较小,Hadoop更适合处理大文件,效率会更高,此时就需要合并分散的文件,开发一个PutMerge程序,用于合并本地文件后存入HDFS系统中
二、java代码
package org.apache.hadoop.studyhdfs; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; public class PutMergeFile { public static void main(String[] args) throws Exception { //赋初始值 args =new String[]{"/opt/dataFile","hdfs://Hadoop-senior02.beifeng.com:8020/input/putMerge.xml"}; //1.get conf Configuration conf =new Configuration(); //2.get filesystem FileSystem hdfs =FileSystem.get(conf); FileSystem local =FileSystem.getLocal(conf); //3.get path Path localPath=new Path(args[0]); Path hdfsPath=new Path(args[1]); //4.inputFiles FileStatus[] inputFiles =local.listStatus(localPath); //out stream FSDataOutputStream outputStream =hdfs.create(hdfsPath); FSDataInputStream inputStream; for(FileStatus inputFile:inputFiles){ System.out.println(inputFile.getPath().getName()); inputStream=local.open(inputFile.getPath()); IOUtils.copyBytes(inputStream, outputStream, 4096,false); IOUtils.closeStream(inputStream); } IOUtils.closeStream(outputStream); } }
三、结果查看
代码:$bin/hdfs dfs -text /input/putMerge.xml
时间: 2024-10-27 05:50:18