package com.my.lucene.index; import java.io.File; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class TestIndex { // 创建索引 public void index() { // path:文档的路径, File path = new File("d:\\lucene\\doc"); Directory indexpath = null; IndexWriter writer = null; // 1.创建Directory:indexpath:索引存放的路径 try { indexpath = FSDirectory.open(new File("d:\\lucene\\index")); // 2.创建indexwriter,参数:文档directory,分次器,大小 writer = new IndexWriter(indexpath, new StandardAnalyzer( Version.LUCENE_30), true, MaxFieldLength.LIMITED); // 3.创建document,将本地文档加载到document中 Document doc = null; for (File files : path.listFiles()) { doc = new Document(); // 本地文档内容添加到document -->索引中 // field的格式key-value doc.add(new Field("content", new FileReader(files))); doc.add(new Field("name", files.getName(), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("path", files.getAbsolutePath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // 4.将文档添加到索引中 writer.addDocument(doc); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { try { writer.close(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } } // 搜索 public void search() throws Exception { // 1.创建directory:从哪里搜索,就是创建索引的路径 Directory indexpath = FSDirectory.open(new File("d:\\lucene\\index")); // 2.创建indexReader:打开index IndexReader reader = IndexReader.open(indexpath); // 3.根据indexReader创建indexSearch IndexSearcher search = new IndexSearcher(reader); // 4.创建搜索的query;参数:版本号,域名称,就是在创建索引的时候制定的Field("key",..)的key值,分次器 QueryParser parser = new QueryParser(Version.LUCENE_30, "content", new StandardAnalyzer(Version.LUCENE_30)); // parse的参数就是要搜索的内容,在文档中搜索java字段 Query query = parser.parse("java"); // 5.根据serach搜索并返回topdocs的文档:参数:query对象,搜索的条数 TopDocs docs = search.search(query, 10); // 6.根据topdocs获取scoredoc ScoreDoc[] sc = docs.scoreDocs; for (ScoreDoc sd : sc) { // 7.根据search和scoredocs获取具体的document // d为文档的id,sd类似于数据库中的rs结果集,通过sd.doc得到文档的句柄(是创建索引时候生成的) // 通过文档的id得到具体的文档,从而得到文档内容 Document d = search.doc(sd.doc); // 8.根据document获取具体的值 // d.get()参数就是field的key System.out.println(d.get("name")); System.out.println(d.get("path")); } } }
测试代码:
package com.my.lucene.test; import org.junit.Test; import com.my.lucene.index.TestIndex; public class MainTest { @Test public void TestIndexJunit() throws Exception{ TestIndex indexs = new TestIndex(); //indexs.index(); indexs.search(); } }
时间: 2024-10-31 15:32:29