搜索可分为如下几步:
- 创建Directory
- 创建IndexReader
- 根据IndexReader创建IndexSearch
- 创建搜索的Query
- 根据searcher搜索并且返回TopDocs
- 根据TopDocs获取ScoreDoc对象
- 根据searcher和ScoreDoc对象获取具体的Document对象
- 根据Document对象获取需要的值
下面是例子代码:
3.5版本:
3.5版本比较简单,只需要Lucene核心包lucene-core即可,pom文件如下所示:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.darren.lucene35.helloworld</groupId> <artifactId>lucene35_helloworld</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>lucene35_helloworld</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <lucene.version>3.5.0</lucene.version> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>${lucene.version}</version> </dependency> </dependencies> </project>
例子代码如下:
package com.darren.lucene35; import java.io.File; import java.io.FileReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class HelloLucene { /** * 搜索 */ public void search() { IndexReader indexReader = null; try { // 1、创建Directory Directory directory = FSDirectory.open(new File("F:/test/lucene/index")); // 2、创建IndexReader indexReader = IndexReader.open(directory); // 3、根据IndexReader创建IndexSearch IndexSearcher indexSearcher = new IndexSearcher(indexReader); // 4、创建搜索的Query // 使用默认的标准分词器 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); // 在content中搜索Darren // 创建parser来确定要搜索文件的内容,第二个参数为搜索的域 QueryParser queryParser = new QueryParser(Version.LUCENE_35, "content", analyzer); // 创建Query表示搜索域为content包含Darren的文档 Query query = queryParser.parse("Darren"); // 5、根据searcher搜索并且返回TopDocs TopDocs topDocs = indexSearcher.search(query, 10); // 6、根据TopDocs获取ScoreDoc对象 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { // 7、根据searcher和ScoreDoc对象获取具体的Document对象 Document document = indexSearcher.doc(scoreDoc.doc); // 8、根据Document对象获取需要的值 System.out.println(document.get("filename") + " " + document.get("filepath")); } } catch (Exception e) { e.printStackTrace(); } finally { try { if (indexReader != null) { indexReader.clone(); } } catch (Exception e) { e.printStackTrace(); } } } }
4.5版本:
4.5版本需要Lucene核心包lucene-core和查询包lucene-queryparser,从4.0版本之后分词包从核心包分离,pom文件如下所示:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.darren.lucene45.helloworld</groupId> <artifactId>lucene45_helloworld</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>lucene45_helloworld</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <lucene.version>4.5.1</lucene.version> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>${lucene.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>${lucene.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>${lucene.version}</version> </dependency> </dependencies> </project>
例子代码如下:
package com.darren.lucene45; import java.io.File; import java.io.FileReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class HelloLucene { /** * 搜索 */ public void search() { DirectoryReader directoryReader = null; try { // 1、创建Directory Directory directory = FSDirectory.open(new File("F:/test/lucene/index")); // 2、创建IndexReader /** * 注意Reader与3.5版本不同: * * 所以使用DirectoryReader * * @Deprecated public static DirectoryReader open(final Directory directory) throws IOException { return * DirectoryReader.open(directory); } */ // 如下方法过时 // IndexReader indexReader = IndexReader.open(directory); directoryReader = DirectoryReader.open(directory); // 3、根据IndexReader创建IndexSearch IndexSearcher indexSearcher = new IndexSearcher(directoryReader); // 4、创建搜索的Query Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45); /** * 注意与3.5版本不同: * * 需要引入lucene-queryparser包,因为从4.0版本后lucene-queryparser包从核心包分离 */ // 创建parser来确定要搜索文件的内容,第二个参数为搜索的域 QueryParser queryParser = new QueryParser(Version.LUCENE_45, "content", analyzer); // 创建Query表示搜索域为content包含Darren的文档 Query query = queryParser.parse("Darren"); // 5、根据searcher搜索并且返回TopDocs TopDocs topDocs = indexSearcher.search(query, 10); // 6、根据TopDocs获取ScoreDoc对象 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { // 7、根据searcher和ScoreDoc对象获取具体的Document对象 Document document = directoryReader.document(scoreDoc.doc); // 8、根据Document对象获取需要的值 System.out.println(document.get("filename") + " " + document.get("filepath")); } } catch (Exception e) { e.printStackTrace(); } finally { try { if (directoryReader != null) { directoryReader.close(); } } catch (Exception e) { e.printStackTrace(); } } } }
5.0版本:
5.0版本和4.5版本一样,pom文件如下所示:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.darren.lucene50.helloworld</groupId> <artifactId>lucene50_helloworld</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>lucene50_helloworld</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <lucene.version>5.0.0</lucene.version> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>${lucene.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>${lucene.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>${lucene.version}</version> </dependency> </dependencies> </project>
例子代码如下:
package com.darren.lucene50; import java.io.File; import java.io.FileReader; import java.nio.file.FileSystems; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class HelloLucene { /** * 搜索 */ public void search() { DirectoryReader directoryReader = null; try { // 1、创建Directory Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("F:/test/lucene/index")); // 2、创建IndexReader /** * 注意Reader与3.5版本不同: * * 所以使用DirectoryReader * * @Deprecated public static DirectoryReader open(final Directory directory) throws IOException { return * DirectoryReader.open(directory); } * * 但是和4.5版本相同 */ // 如下方法过时 // IndexReader indexReader = IndexReader.open(directory); directoryReader = DirectoryReader.open(directory); // 3、根据IndexReader创建IndexSearch IndexSearcher indexSearcher = new IndexSearcher(directoryReader); // 4、创建搜索的Query /** * 注意StandardAnalyzer与3.5版本4.5版本不同: * * 不需要版本号 */ Analyzer analyzer = new StandardAnalyzer(); // 创建parser来确定要搜索文件的内容,第一个参数为搜索的域 /** * 注意QueryParser与3.5版本4.5版本不同: * * 不需要版本号 */ QueryParser queryParser = new QueryParser("content", analyzer); // 创建Query表示搜索域为content包含Darren的文档 Query query = queryParser.parse("Darren"); // 5、根据searcher搜索并且返回TopDocs TopDocs topDocs = indexSearcher.search(query, 10); // 6、根据TopDocs获取ScoreDoc对象 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { // 7、根据searcher和ScoreDoc对象获取具体的Document对象 Document document = indexSearcher.doc(scoreDoc.doc); // 8、根据Document对象获取需要的值 System.out.println(document.get("filename") + " " + document.get("filepath")); } } catch (Exception e) { e.printStackTrace(); } finally { try { if (directoryReader != null) { directoryReader.close(); } } catch (Exception e) { e.printStackTrace(); } } } }
测试代码:
package com.darren.lucene50; import org.junit.Test; public class HelloLuceneTest { @Test public void testSearch() { HelloLucene helloLucene = new HelloLucene(); helloLucene.search(); } }
时间: 2024-11-03 21:09:01