package junitTest; import java.io.IOException; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; public class LuenceDemo { public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); // Store theindex in memory: // 索引存到内存中的目录 Directory directory = new RAMDirectory(); // To store anindex on disk, use this instead: // Directorydirectory = FSDirectory.open("/tmp/testindex"); // 配置索引 IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); IndexWriter iwriter = new IndexWriter(directory, config); // 这里,将5篇文档filedname信息和content信息存入索引 Document doc[] = new Document[6]; for (int i = 0; i < 6; i++) { doc[i] = new Document(); } String[] text = { "中华人民共和国中央人民政府", "中国是个伟大的国家", "我出生在美丽的中国,我爱中国,中国", "中华美丽的中国爱你", "美国跟中国式的国家", "卧槽,你是中国的" }; doc[0].add(new Field("fieldname", text[0], TextField.TYPE_STORED)); //doc[0].add(new Field("content", text[5], TextField.TYPE_STORED)); doc[1].add(new Field("fieldname", text[1], TextField.TYPE_STORED)); doc[2].add(new Field("fieldname", text[2], TextField.TYPE_STORED)); doc[3].add(new Field("fieldname", text[3], TextField.TYPE_STORED)); doc[4].add(new Field("fieldname", text[4], TextField.TYPE_STORED)); doc[5].add(new Field("fieldname", text[5], TextField.TYPE_STORED)); iwriter.addDocument(doc[0]); iwriter.addDocument(doc[1]); iwriter.addDocument(doc[2]); iwriter.addDocument(doc[3]); iwriter.addDocument(doc[4]); iwriter.addDocument(doc[5]); iwriter.close(); // Now searchthe index: // 索引构建完毕,准备搜索。 // 设定搜索目录 DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(ireader); // Parse asimple query that searches for "text": // QueryParserparser = new QueryParser(Version.LUCENE_CURRENT, // "fieldname",analyzer); // 使用同样的方式对多field进行搜索 String[] multiFields = { "fieldname", "content" }; MultiFieldQueryParser parser = new MultiFieldQueryParser( Version.LUCENE_4_9, multiFields, analyzer); // 设定具体的搜索词 Query query = parser.parse("卧槽,中国"); TopDocs docs =isearcher.search(query,null, 10);//查找 ScoreDoc[] hits = docs.scoreDocs; SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style=‘color:green‘>", "</span>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); //高亮htmlFormatter对象 //设置高亮附近的字数 highlighter.setTextFragmenter(new SimpleFragmenter(100)); // assertEquals(1, hits.length); System.out.println("Searched " + hits.length + " documents."); // Iteratethrough the results: for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); String[] scoreExplain = null; // scoreExplain可以显示文档的得分详情,这里用split截取总分 scoreExplain = isearcher.explain(query, hits[i].doc).toString() .split(" ", 2); String scores = scoreExplain[0]; // assertEquals("Thisis the text to be indexed.", // hitDoc.get("fieldname")); System.out.println("score:"+scores); String value =hitDoc.get("fieldname"); ; TokenStream tokenStream = analyzer.tokenStream(value, new StringReader(value)); String str1 = highlighter.getBestFragment(tokenStream, value); System.out.println(str1); } ireader.close(); directory.close(); } }
项目中需要引入lucene4.9版本的jar包
Lucene建立索引然后搜索的小Demo
时间: 2024-11-04 09:22:54