检索结果高亮对于用户的体验度和友好度非常重要,可以快速标记出用户检索对关键词。本例中的索引仍使用上一篇博客(Lucene查询索引)中创建的索引,代码高亮参考了Lucene4.x高亮 fast高亮 前端高亮。
实现效果:
核心代码
package ucas.ir.lucene;
import java.io.File;
import java.io.IOException;
import javax.print.Doc;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class IndexSearch {
public static void main(String[] args) {
Directory directory = null;
try {
File indexpath = new File("/Users/yp/Documents/workspace/UCASIR/WebContent/index");
if (indexpath.exists() != true) {
indexpath.mkdirs();
}
// 设置要查询的索引目录
directory = FSDirectory.open(indexpath);
// 创建indexSearcher
DirectoryReader dReader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(dReader);
// 设置分词方式
Analyzer analyze2 = new StandardAnalyzer(Version.LUCENE_43);// 标准分词
Analyzer analyzer = new IKAnalyzer();
// 设置查询域
String field="news_title";
QueryParser parser = new QueryParser(Version.LUCENE_43, field, analyzer);
// 查询字符串
Query query = parser.parse("阿法狗");
QueryScorer scorer=new QueryScorer(query,field);
SimpleHTMLFormatter fors=new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>");
Highlighter highlighter=new Highlighter(fors, scorer);
System.out.println("query:" + query.toString());
// 返回前10条
TopDocs topDocs = searcher.search(query, 10);
if (topDocs != null) {
System.out.println("符合条件第文档总数:" + topDocs.totalHits);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
TokenStream tokenStream=TokenSources.getAnyTokenStream(searcher.getIndexReader(), topDocs.scoreDocs[i].doc, field, analyzer);
Fragmenter fragment=new SimpleSpanFragmenter(scorer);
highlighter.setTextFragmenter(fragment);
//高亮news_title域
String str=highlighter.getBestFragment(tokenStream, doc.get("news_title"));//获取高亮的片段,可以对其数量进行限制
System.out.println("高亮title:"+str);
tokenStream=TokenSources.getAnyTokenStream(searcher.getIndexReader(), topDocs.scoreDocs[i].doc, "news_summary", analyzer);
str=highlighter.getBestFragment(tokenStream, doc.get("news_summary"));//获取高亮的片段,可以对其数量进行限制
System.out.println("高亮summary:"+str);
}
}
directory.close();
dReader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
结果:
时间: 2024-10-02 09:28:41