1. lucene-3.5.0.jar
2. 新建目录C:\testsource,新建目录C:\testindex。
3.在C:\testsource下新建test1.txt, test2.txt,内容分别为:“商务休闲品牌男装西裤衬衫”,“潮流休闲品牌女装裙子大衣”。
4.创建索引
import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * 给text文件建立索引 * @author [email protected] */ public class TextFileIndexer { public static void main(String[] args) throws Exception{ //text文件路径 File sourceDir = new File("C:\\testsource"); File[] sourceFiles = sourceDir.listFiles(); //索引文件路径 File indexDir = new File("C:\\testindex"); Directory indexFilesDir = FSDirectory.open(indexDir); //构建analyzer Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); //配置IndexWriter IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer); iwConfig.setOpenMode(OpenMode.CREATE); //构建IndexWriter IndexWriter indexWriter = new IndexWriter(indexFilesDir, iwConfig); long startTime = new Date().getTime(); for(int i=0; i<sourceFiles.length; i++){ if(sourceFiles[i].isFile() && sourceFiles[i].getName().endsWith(".txt")){ System.out.println("\nFile " + sourceFiles[i].getCanonicalPath() + "正在被索引......"); String temp = fileReaderAll(sourceFiles[i].getCanonicalPath(), "UTF-8"); System.out.println(temp); Field FieldPath = new Field("path", sourceFiles[i].getPath(), Field.Store.YES, Field.Index.NO); Field FieldBody = new Field("body", temp, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); Document document = new Document(); document.add(FieldPath); document.add(FieldBody); indexWriter.addDocument(document); } } //关闭IndexWriter indexWriter.close(); long endTime = new Date().getTime(); System.out.println("\n花费了" + (endTime-startTime) + " 毫秒把文档增加到索引里面去!索引文件地址:" + sourceDir.getPath()); } //读取文件所有内容 private static String fileReaderAll(String filePath, String charset) throws IOException { String line = new String(); String temp = new String(); BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), charset)); while((line=reader.readLine())!=null){ temp += line; } reader.close(); return temp; } }
输出结果:
File C:\testsource\test1.txt正在被索引...... 商务休闲品牌男装西裤衬衫 File C:\testsource\test2.txt正在被索引...... 潮流休闲品牌女装裙子大衣 花费了569 毫秒把文档增加到索引里面去!索引文件地址:C:\testsource
5.关键字检索
import java.io.File; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * 关键字检索 * @author [email protected] */ public class TextQuery { public static void main(String[] args) throws Exception{ String queryString = "休闲 装"; //索引文件路径 String indexDir = "C:\\testindex"; IndexReader indexReader = IndexReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher indexSearcher = new IndexSearcher(indexReader); Query query = null; Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); QueryParser queryParser = new QueryParser(Version.LUCENE_35, "body", analyzer); queryParser.setDefaultOperator(QueryParser.AND_OPERATOR); query = queryParser.parse(queryString); ScoreDoc[] hits = null; if(indexSearcher!=null){ //返回最多为10条记录 TopDocs results = indexSearcher.search(query, 10); hits = results.scoreDocs; if(hits.length>0){ System.out.println("找到:" + hits.length + " 个结果!"); }else{ System.out.println("没有找到"); } indexSearcher.close(); } } }
输出结果:
找到:2 个结果!
时间: 2024-10-11 16:36:44