<strong><span style="font-size:18px;">/*** * @author YangXin * @info 使用Doublemetaphone函数对Twitter优化。 * Doublemetaphone函数能够为发音类似的单词创建同样的键 * */ package unitTwelve; import java.io.IOException; import org.apache.commons.codec.language.DoubleMetaphone; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.en.PorterStemFilter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.Version; public class TwitterAnalyzer extends Analyzer{ private DoubleMetaphone filter = new DoubleMetaphone(); public TokenStream result = new PorterStemFilter(new StopFilter(true, new StandardTokenizer(Version.LUCENE_CURRENT, reader), StandardAnalyzer.STOP_WORDS_SET)); TermAttribute termAtt = (TermAttribute) result.addAttribute(TermAttribute.class); StringBuilder buf = new StringBuilder(); try{ while(result.incrementToken()){ String word = new String(termAtt.term(), 0, termAtt.termLength()); buf.append(filter.encode(filter.encode(word)).append(" ")); } }catch(IOException e){ e.printStackTrace(); } return new WhitespaceTokenizer(new StringReader(buf.toString())); } }</span></strong>
时间: 2024-11-05 06:21:03