一、首先来综述一下Lucene自定义评分的步骤:
1、创建一个评分域
FieldScoreQuery fd = new FieldScoreQuery("score", Type.INT);
2、根据评分域和原有的query创建自定义的query对象
MyCustomScoreQuery query = new MyCustomScoreQuery(q, fd);
@SuppressWarnings("serial") private class MyCustomScoreQuery extends CustomScoreQuery { public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) { super(subQuery, valSrcQuery); } @Override protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException { //默认情况实现的评分是通过原有的评分*传入进来的评分域所获取的评分来确定最终评分的 //为了根据不同的需求进行评分,需要自己进行评分的设定 /** * 自定评分的步骤 * 创建一个类继承于CustomScoreProvider * 覆盖customScore方法 */ //return super.getCustomScoreProvider(reader); return new MyCustomScoreProvider(reader); } }
3、创建一个类继承于CustomScoreProvider,覆盖customScore方法
private class MyCustomScoreProvider extends CustomScoreProvider { public MyCustomScoreProvider(IndexReader reader) { super(reader); } /** * subQueryScore表示默认文档的打分 * valSrcScore表示评分域的打分 */ @Override public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException { //return super.customScore(doc, subQueryScore, valSrcScore); return subQueryScore/valSrcScore; } }
二、根据域进行自定义评分设定
1、根据文件后缀名进行自定义评分
private class FilenameScoreQuery extends CustomScoreQuery { public FilenameScoreQuery(Query subQuery) { super(subQuery); } @Override protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException { //return super.getCustomScoreProvider(reader); return new FilenameScoreProvider(reader); } } private class FilenameScoreProvider extends CustomScoreProvider { String [] filenames = null; public FilenameScoreProvider(IndexReader reader) { super(reader); try { filenames = FieldCache.DEFAULT.getStrings(reader, "filename"); } catch (IOException e) { e.printStackTrace(); } } @Override public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException { //如何根据doc获取相应的field的值 /** * 在reader没有关闭之前,所有的数据会存储在一个缓存域中,可以通过缓存获取很多有用的信息 * filenames = FieldCache.DEFAULT.getStrings(reader, "filename");可以获取所有的filename域的信息 */ String filename = filenames[doc]; if(filename.endsWith(".txt")||filename.endsWith(".ini")) { return subQueryScore*1.5f; } //return super.customScore(doc, subQueryScore, valSrcScore); return subQueryScore/1.5f; } }
2、根据日期进行自定义评分
private class DateScoreProvider extends CustomScoreProvider { long[] dates = null; public DateScoreProvider(IndexReader reader) { super(reader); try { dates = FieldCache.DEFAULT.getLongs(reader, "date"); } catch (IOException e) { e.printStackTrace(); } } @Override public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException { long date = dates[doc]; long today = new Date().getTime(); long year = 1000*60*60*365; if(today - date <= year) { //为其加分 } return super.customScore(doc, subQueryScore, valSrcScore); } }
Lucene实现自定义评分的关键思想:
indexSearch.search中要传入一个CustomScoreQuery,要覆盖getCustomScoreProvider方法,并且要返回CustomScoreProvider
对象,在用匿名内部内的方式写一个CustomScoreProvider 覆盖customScore方法,这个方法有3个参数,第一个参数代表文档id,第二个参数代表原来评分,最后一个代表我们设置的评分域,然后我们就可以定义自己的一套评分算法为我们的搜索制定评分了。
完整代码如下:
1、工具类:
package com.dhb.util; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.Random; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; public class FileIndexUtils { private static Directory directory = null; static { try { directory = FSDirectory.open(new File("D:/luceneData/files/")); } catch (IOException e) { e.printStackTrace(); } } public static Directory getDirectory() { return directory; } public static void index(boolean hasNew) { IndexWriter writer = null; try { IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)); writer = new IndexWriter(directory, iwc); //是否新建索引 if(hasNew) { writer.deleteAll(); } Document doc = null; File f = new File("D:/luceneData/example"); Random rand = new Random(); int index = 0; for (File file : f.listFiles()) { int score = rand.nextInt(600); //测试自定义评分用的 doc = new Document(); //测试自定义Filter用的 doc.add(new Field("id", String.valueOf(index++), Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("content", new FileReader(file))); doc.add(new Field("filename", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(file.lastModified())); doc.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) (file.length()))); doc.add(new NumericField("score", Field.Store.YES, true).setIntValue(score)); writer.addDocument(doc); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if(writer!=null) try { writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } }
注意:(先自己生成索引,我这里就没调用了,因为放在另一个地方了,没有贴了)
2、自定义类
package com.dhb.util; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.lucene.document.Document; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.function.CustomScoreProvider; import org.apache.lucene.search.function.CustomScoreQuery; import org.apache.lucene.search.function.FieldScoreQuery; import org.apache.lucene.search.function.FieldScoreQuery.Type; import org.apache.lucene.search.function.ValueSourceQuery; public class MyScoreQuery { public void searchByScoreQuery() { try { IndexSearcher searcher = new IndexSearcher(IndexReader.open(FileIndexUtils.getDirectory())); Query q = new TermQuery(new Term("content", "java")); //1、创建一个评分域 FieldScoreQuery fd = new FieldScoreQuery("score", Type.INT); //2、根据评分域和原有的query创建自定义的query对象 MyCustomScoreQuery query = new MyCustomScoreQuery(q, fd); TopDocs tds = null; tds = searcher.search(query, 100); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); for (ScoreDoc sd : tds.scoreDocs) { Document d = searcher.doc(sd.doc); System.out.println(sd.doc + ":(" + sd.score + ")[" + d.get("filename") + "【" + d.get("path") + "】---" + d.get("size") + "----" + sdf.format(Long.valueOf(d.get("date")))+"自定义评分:"+d.get("score")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public void searchByFileScoreQuery() { try { IndexSearcher searcher = new IndexSearcher(IndexReader.open(FileIndexUtils.getDirectory())); Query q = new TermQuery(new Term("content", "java")); //1、创建一个评分域 //FieldScoreQuery fd = new FieldScoreQuery("score", Type.INT); FilenameScoreQuery query = new FilenameScoreQuery(q); //2、根据评分域和原有的query创建自定义的query对象 //MyCustomScoreQuery query = new MyCustomScoreQuery(q, fd); TopDocs tds = null; tds = searcher.search(query, 100); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); for (ScoreDoc sd : tds.scoreDocs) { Document d = searcher.doc(sd.doc); System.out.println(sd.doc + ":(" + sd.score + ")[" + d.get("filename") + "【" + d.get("path") + "】---" + d.get("size") + "----" + sdf.format(Long.valueOf(d.get("date")))+"自定义评分:"+d.get("score")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } @SuppressWarnings("serial") private class MyCustomScoreQuery extends CustomScoreQuery { public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) { super(subQuery, valSrcQuery); } @Override protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException { //默认情况实现的评分是通过原有的评分*传入进来的评分域所获取的评分来确定最终评分的 //为了根据不同的需求进行评分,需要自己进行评分的设定 /** * 自定评分的步骤 * 创建一个类继承于CustomScoreProvider * 覆盖customScore方法 */ //return super.getCustomScoreProvider(reader); return new MyCustomScoreProvider(reader); } } private class MyCustomScoreProvider extends CustomScoreProvider { public MyCustomScoreProvider(IndexReader reader) { super(reader); } /** * subQueryScore表示默认文档的打分 * valSrcScore表示评分域的打分 */ @Override public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException { //return super.customScore(doc, subQueryScore, valSrcScore); return subQueryScore/valSrcScore; } } @SuppressWarnings("serial") private class FilenameScoreQuery extends CustomScoreQuery { public FilenameScoreQuery(Query subQuery) { super(subQuery); } @Override protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException { //return super.getCustomScoreProvider(reader); return new FilenameScoreProvider(reader); } } private class FilenameScoreProvider extends CustomScoreProvider { String [] filenames = null; public FilenameScoreProvider(IndexReader reader) { super(reader); try { filenames = FieldCache.DEFAULT.getStrings(reader, "filename"); } catch (IOException e) { e.printStackTrace(); } } @Override public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException { //如何根据doc获取相应的field的值 /** * 在reader没有关闭之前,所有的数据会存储在一个缓存域中,可以通过缓存获取很多有用的信息 * filenames = FieldCache.DEFAULT.getStrings(reader, "filename");可以获取所有的filename域的信息 */ String filename = filenames[doc]; if(filename.endsWith(".txt")||filename.endsWith(".ini")) { return subQueryScore*1.5f; } //return super.customScore(doc, subQueryScore, valSrcScore); return subQueryScore/1.5f; } } @SuppressWarnings("unused") private class DateScoreProvider extends CustomScoreProvider { long[] dates = null; public DateScoreProvider(IndexReader reader) { super(reader); try { dates = FieldCache.DEFAULT.getLongs(reader, "date"); } catch (IOException e) { e.printStackTrace(); } } @Override public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException { long date = dates[doc]; long today = new Date().getTime(); long year = 1000*60*60*365; if(today - date <= year) { //为其加分 } return super.customScore(doc, subQueryScore, valSrcScore); } } }
3、测试类
package com.dhb.test; import org.junit.Test; import com.dhb.util.MyScoreQuery; public class TestCustomScore { @Test public void test01() { MyScoreQuery msq = new MyScoreQuery(); msq.searchByScoreQuery(); } @Test public void test02() { MyScoreQuery msq = new MyScoreQuery(); msq.searchByFileScoreQuery(); } }
时间: 2024-10-09 14:49:06