白编辑了....cnblogs怎么也没给我保存个 草稿....化繁为简,你照着我的做就ok了
lib什么的记得,去E:\solrbase\dist和E:\solrbase\lib\ext下面找 然后拷贝到你的tomcat的lib下
总共编辑的3个文件,这三个都在你Solr示例的\solr\collection1\conf下..
solrconfig.xml
schema.xml
xml-data-config.xml
下面贴文件内容了
schema.xml,定义你导入的业务数据的定义,类似数据库的表
<?xml version="1.0" encoding="UTF-8" ?> <schema name="example" version="1.5"> <fields> <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> <field name="title" type="text_general" indexed="true" stored="true" /> <field name="image" type="string" indexed="false" stored="true" /> <field name="value" type="double" indexed="false" stored="true" /> <field name="price" type="double" indexed="true" stored="true" /> <field name="rebate" type="double" indexed="true" stored="true" /> <field name="bought" type="long" indexed="true" stored="true" /> <field name="city" type="string" indexed="true" stored="true" /> <field name="sort" type="string" indexed="true" stored="true" /> <field name="loc" type="string" indexed="true" stored="true" /> <field name="startTime" type="date" indexed="true" stored="true" /> <field name="endTime" type="date" indexed="true" stored="true" /> <!-- catchall field, containing all other searchable text fields (implemented via copyField further on in this schema --> <field name="text" type="text_general" indexed="true" stored="false" multiValued="true" /> <field name="_version_" type="long" indexed="true" stored="true" /> </fields> <uniqueKey>id</uniqueKey> <copyField source="title" dest="text" /> <types> <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> <!-- boolean type: "true" or "false" --> <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" /> <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" /> <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" /> <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" /> <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" /> <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0" /> <fieldtype name="binary" class="solr.BinaryField" /> <fieldType name="pint" class="solr.IntField" /> <fieldType name="plong" class="solr.LongField" /> <fieldType name="pfloat" class="solr.FloatField" /> <fieldType name="pdouble" class="solr.DoubleField" /> <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" /> <fieldType name="random" class="solr.RandomSortField" indexed="true" /> <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.WhitespaceTokenizerFactory" /> </analyzer> </fieldType> <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> <analyzer type="query"> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType> <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <!-- normalize width before bigram, as e.g. half-width dakuten combine --> <filter class="solr.CJKWidthFilterFactory" /> <!-- for any non-CJK --> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.CJKBigramFilterFactory" /> </analyzer> </fieldType> </types> </schema>
solrconfig.xml,添加个
requestHandler name="/dataimport
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> <lst name="defaults"> <str name="config">xml-data-config.xml</str> </lst> </requestHandler>
xml-data-config.xml,一个建立Solr和xml内容之间的桥梁
<dataConfig> <script> <![CDATA[ function ReplaceLocAddId(row) { var loc_1 = row.get(‘loc‘).split(‘/deal/‘); var loc_2 = loc_1[1].split(‘.html‘); var id = loc_2[0]; row.put(‘id‘, id); var sdf = new java.text.SimpleDateFormat(‘yyyy-MM-dd HH:mm:ss‘); row.put(‘startTime‘, com.demo.tuan.DateUtils.format(row.get(‘startTime‘))); row.put(‘endTime‘, com.demo.tuan.DateUtils.format(row.get(‘endTime‘))); row.put(‘rebate‘, row.get(‘rebate‘).replace(‘折‘,‘‘)); return row; } ]]> </script> <dataSource type="FileDataSource" encoding="UTF-8" /> <document> <entity name="collection1" pk="loc" url="D:/meituan_hao123.xml" processor="XPathEntityProcessor" forEach="/urlset/url" transformer="script:ReplaceLocAddId,DateFormatTransformer"> <field column="loc" xpath="/urlset/url/loc" commonField="true" /> <field column="city" xpath="/urlset/url/data/display/city" commonField="true" /> <field column="sort" xpath="/urlset/url/data/display/sort" commonField="true" /> <field column="title" xpath="/urlset/url/data/display/title" commonField="true" /> <field column="image" xpath="/urlset/url/data/display/image" commonField="true" /> <field column="value" xpath="/urlset/url/data/display/value" commonField="true" /> <field column="price" xpath="/urlset/url/data/display/price" commonField="true" /> <field column="rebate" xpath="/urlset/url/data/display/rebate" commonField="true" /> <field column="bought" xpath="/urlset/url/data/display/bought" commonField="true" /> <field column="startTime" xpath="/urlset/url/data/display/startTime" dateTimeFormat="yyyy-MM-dd HH:mm:ss" commonField="true" /> <field column="endTime" xpath="/urlset/url/data/display/endTime" dateTimeFormat="yyyy-MM-dd HH:mm:ss" commonField="true" /> </entity> </document> </dataConfig>
-----OVER------
时间: 2024-10-06 13:32:33