把代码过程经常用到的一些代码记录起来,下面的代码内容是关于java超快速文本去重复的代码。
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
public class SpeedClear {
public static void main(String[] args) {
if(args.length==0){
print();
System.exit(1);
}
if(args.length!=2){
System.out.println("Format error...");
System.exit(1);
}
String pathname = args[0];
String newPath = args[1];
}
public static void clear(String pathname, String newPath) {
System.out.println("Start... ");
File file = new File(pathname);
BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));
OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(new File(newPath)),"utf-8") ;
Set<String> set = new HashSet<String>();
int x = 0;
if(x%30000==0){
System.out.print("..") ;
}
x++;
}
fis.close();
for (String xxser : set) {
out.write(xxser+"rn");
}
System.out.println("") ;
System.out.println("size = " + set.size());
System.out.println("End...");
}catch(Exception e){
System.out.println("文件太大了,建议先100MB大小..") ;
}
}
public static void print(){
System.out.println("ttTo repeat tt");
System.out.println();
System.out.println(" format: java -Xmx1000m SpeedClear c:\old.txt c:\new.txttt");
System.out.println();
System.out.println("ttAuthor:xxser QQ:616100108");
}
}
原文地址:https://blog.51cto.com/14311234/2424235
时间: 2024-11-10 13:56:23