2018-06-09总结:
ForkJoin确实可以很快速的去解析文件并统计关键词的数量,但是如果文件过大就会出现内存溢出,是否可以通过虚拟内存方式解决内存溢出的问题呢?
package com.oxygen.forkjoin.model; import java.util.List; /** * 文档 * @author renguanyu * */ public class Document { private List<String> lines; public Document(List<String> lines) { super(); this.lines = lines; } public List<String> getLines() { return lines; } public void setLines(List<String> lines) { this.lines = lines; } }
package com.oxygen.forkjoin.model; import java.util.List; /** * 文件夹 * @author renguanyu * */ public class Folder { private List<Folder> subFolders; private List<Document> documents; public Folder(List<Folder> subFolders, List<Document> documents) { this.subFolders = subFolders; this.documents = documents; } public List<Folder> getSubFolders() { return subFolders; } public void setSubFolders(List<Folder> subFolders) { this.subFolders = subFolders; } public List<Document> getDocuments() { return documents; } public void setDocuments(List<Document> documents) { this.documents = documents; } }
package com.oxygen.forkjoin.service; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import com.oxygen.forkjoin.model.Document; /** * 文档服务 * @author renguanyu * */ public class DocumentService { /** * 读取文件中所以数据 * @param file 文件 * @return 文档 */ public static Document fromFile(File file) { List<String> lines = new ArrayList<>(); try(BufferedReader reader = new BufferedReader(new FileReader(file))) { String line = reader.readLine(); while (line != null) { lines.add(line); line = reader.readLine(); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return new Document(lines); } }
package com.oxygen.forkjoin.service; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ForkJoinPool; import com.oxygen.forkjoin.model.Document; import com.oxygen.forkjoin.model.Folder; import com.oxygen.forkjoin.task.FolderSearchTask; /** * 文件夹服务 * @author renguanyu * */ public class FolderService{ /** * 递归查询文件夹中所有的数据 * 1.在内存中建立文件夹的结构 * 2.把数据都加载到这个结构中,方便下一步计算 * @param dir 文件夹 * @return 文件夹 */ public static Folder fromDirectory(File dir) { List<Document> documents = new ArrayList<>(); List<Folder> subFolders = new ArrayList<>(); for (File entry : dir.listFiles()) { if (entry.isDirectory()) { subFolders.add(FolderService.fromDirectory(entry)); } else { documents.add(DocumentService.fromFile(entry)); } } return new Folder(subFolders, documents); } /** * 获取关键词总数 * @param targetFolder 目标文件夹 * @param keyword 关键词 * @throws IOException */ public static long getKeywordTotal(String targetFolder, String keyword) { ForkJoinPool forkJoinPool = new ForkJoinPool(); //把文件夹中的数据加载到内存中,我这个文件夹中就一个日志文件 File dir = new File(targetFolder); Folder folder = FolderService.fromDirectory(dir); //创建一个搜索任务 FolderSearchTask task = new FolderSearchTask(folder, keyword); //开始执行fork/join任务 long counts = forkJoinPool.invoke(task); return counts; } }
package com.oxygen.forkjoin.task; import java.util.List; import java.util.concurrent.RecursiveTask; import com.oxygen.forkjoin.model.Document; /** * 文档搜索任务 * @author renguanyu * */ public class DocumentSearchTask extends RecursiveTask<Long> { private static final long serialVersionUID = 1L; private Document document; private String searchedWord; public DocumentSearchTask(Document document, String searchedWord) { super(); this.document = document; this.searchedWord = searchedWord; } @Override protected Long compute() { long count = 0; List<String> lines = document.getLines(); for (String line : lines) { String[] words = line.trim().split("(\\s|\\p{Punct})+"); for (String word : words) { if (searchedWord.equals(word)) { count = count + 1; } } } return count; } }
package com.oxygen.forkjoin.task; import java.util.ArrayList; import java.util.List; import java.util.concurrent.RecursiveTask; import com.oxygen.forkjoin.model.Document; import com.oxygen.forkjoin.model.Folder; /** * 文件夹搜索任务 * @author renguanyu * */ public class FolderSearchTask extends RecursiveTask<Long> { private static final long serialVersionUID = 1L; private Folder folder; private String searchedWord; public FolderSearchTask(Folder folder, String searchedWord) { super(); this.folder = folder; this.searchedWord = searchedWord; } //计算方法 @Override protected Long compute() { long count = 0L; List<RecursiveTask<Long>> forks = new ArrayList<>(); //获取文件夹下的子文件夹 for (Folder subFolder : folder.getSubFolders()) { //递归文件夹搜索任务 FolderSearchTask task = new FolderSearchTask(subFolder, searchedWord); //把任务添加到分叉列表,用于合并任务 forks.add(task); //放到工作队列中 task.fork(); } //获取文件夹下的文档 for (Document document : folder.getDocuments()) { DocumentSearchTask task = new DocumentSearchTask(document, searchedWord); //把任务添加到分叉列表,用于合并任务 forks.add(task); //放到工作队列中 task.fork(); } //合并工作队列中各个线程计算结果的值 for (RecursiveTask<Long> task : forks) { count = count + task.join(); } return count; } }
package com.oxygen.forkjoin.test; import java.io.IOException; import com.oxygen.forkjoin.service.FolderService; /** * 测试程序 * @author renguanyu * */ public class MainTest { public static void main(String[] args) throws IOException { long startTime = System.currentTimeMillis(); long counts = FolderService.getKeywordTotal("C:\\test\\logs\\", "null"); long stopTime = System.currentTimeMillis(); long completeTime = stopTime - startTime; System.out.println(counts + " , fork / join search took " + completeTime + "ms"); } }
原文地址:https://www.cnblogs.com/q651231292/p/9158915.html
时间: 2024-11-05 18:41:05