@tsihedeyo
2014-09-11T18:08:04.000000Z
字数 3502
阅读 2668
egeio
solr
schema.xml字段配置,基本所有的都有可能用来索引,indexed基本设置为true,stored只有在需要给请求者返回原始信息的时候设置为true,该处的修改都得重建索引。
<fields>
<!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
or Solr won't start. _version_ and update log are required for SolrCloud
-->
<field name="_version_" type="long" indexed="true" stored="true"/>
<!-- points to the root document of a block of nested documents. Required for nested document support, may be removed otherwise -->
<field name="_root_" type="string" indexed="true" stored="false"/>
<!-- Only remove the "id" field if you have a very good reason to. While not strictly
required, it is highly recommended. A <uniqueKey> is present in almost all Solr
installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
-->
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="item_type" type="string" indexed="true" stored="true"/>
<field name="name" type="text_ik" indexed="true" stored="true"/>
<field name="content" type="text_ik" indexed="false" stored="false"/>
<!-- description 被copy进了text,text设置了索引true和存储true,因而这里设置为了false,同理理解content-->
<field name="description" type="text_ik" indexed="false" stored="false"/>
<field name="user_name" type="text_ik" indexed="true" stored="true"/>
<field name="size" type="int" indexed="true" stored="true"/>
<!-- in fq , we use user_id to filter files whose user_id is not current user_id-->
<field name="user_id" type="int" indexed="true" stored="false"/>
<field name="updater_id" type="int" indexed="true" stored="false"/>
<!-- in fq , we use folder_id to filter files that is avaliable for the user-->
<field name="folder_id" type="int" indexed="true" stored="true" multiValued="true"/>
<field name="extension" type="string" indexed="true" stored="true"/>
<field name="deleted" type="date" indexed="true" stored="true" />
<field name="updated" type="date" indexed="true" stored="true" />
<field name="user_email" type="text_ik" indexed="true" stored="true"/>
<!-- text 包含了description,content的内容,全文内容搜索基本是对于这个字段进行搜索-->
<field name="text" type="text_ik" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*" type="ignored" multiValued="true" />
</fields>
<!-- 对于Solr来说doc的唯一标识-->
<uniqueKey>id</uniqueKey>
<!-- 多个字段统一处理,content description 处理上基本无差别,因而放入text中统一处理 -->
<copyField source="content" dest="text"/>
<copyField source="description" dest="text"/>
除了Solr的基本类型之外,设置了text_ik字段类型支持全文搜索。Solr的基本类型要匹配都需要完全匹配,数值型可支持一些范围搜索。自定义text_ik解释如下:
<fieldType name="text_ik" class="solr.TextField" >
<!-- solr对该类型建立索引的处理方式 -->
<analyzer type="index">
<!-- IK分词器, 库文件在/solr-webapp/webapp/lib下 -->
<tokenizer class="org.wltea.analyzer.lucene.IKAnalyzerTokenizerFactory" useSmart="false"/>
<!-- 停止词过滤 -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en_ch.txt" enablePositionIncrements="true" />
<!--SnowballPorterFilter : dogs to dog; helped to help -->
<filter class="solr.SnowballPorterFilterFactory" language="English" />
<!--NGram过滤器: enterprise cut into en ent enter , min1 because for chinese name ,the IK always cut into singel word if min>1 , you can never find out by the name-->
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="25"/>
<!--RemoveDuplicatesTokenFilterFactory, 一般没看到什么用 -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<!-- solr对于该类型的查询输入的处理方式 -->
<analyzer type="query">
<tokenizer class="org.wltea.analyzer.lucene.IKAnalyzerTokenizerFactory" useSmart="false"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
</analyzer>
</fieldType>