Home | 简体中文 | 繁体中文 | 杂文 | 打赏(Donations) | ITEYE 博客 | OSChina 博客 | Facebook | Linkedin | 知乎专栏 | Search | Email

22.7. 中文分词插件管理

22.7.1. 通过 elasticsearch-plugin 命令安装分词插件

root@netkiller ~ % /usr/share/elasticsearch/bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v5.5.1/elasticsearch-analysis-ik-5.5.1.zip
-> Downloading https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v5.5.1/elasticsearch-analysis-ik-5.5.1.zip
[=================================================] 100%   
-> Installed analysis-ik
			

创建 mapping

root@netkiller ~ % curl -XPUT http://localhost:9200/information
			
root@netkiller ~ % curl -XPOST http://localhost:9200/information/article/_mapping -d'
{
        "properties": {
            "content": {
                "type": "text",
                "analyzer": "ik_max_word",
                "search_analyzer": "ik_max_word"
            },
            "title": {
                "type": "text",
                "analyzer": "ik_max_word",
                "search_analyzer": "ik_max_word"
            }
        }
}'

root@netkiller ~ % curl "http://localhost:9200/information/article/_mapping?pretty"
{
  "information" : {
    "mappings" : {
      "article" : {
        "properties" : {
          "content" : {
            "type" : "text",
            "analyzer" : "ik_max_word"
          },
          "title" : {
            "type" : "text",
            "analyzer" : "ik_max_word"
          }
        }
      }
    }
  }
}

			

22.7.2. 手工安装插件

curl -s https://raw.githubusercontent.com/oscm/shell/master/search/elasticsearch/elasticsearch-analysis-ik-5.5.0.sh | bash
			

22.7.3. 创建索引

curl -XPUT http://localhost:9200/information
			

22.7.4. 删除索引

如果索引已经存在请删除后重新创建索引

curl -XDELETE http://localhost:9200/information/news/_mapping?pretty
curl -XDELETE http://localhost:9200/information/?pretty			
			

22.7.5. 配置索引分词插件

			

curl -XPOST http://localhost:9200/information/news/_mapping?pretty -d'
{
    "news": {
            "_all": {
            "analyzer": "ik_max_word",
            "search_analyzer": "ik_max_word",
            "term_vector": "no",
            "store": "false"
        },
        "properties": {
            "content": {
                "type": "text",
                "store": "no",
                "term_vector": "with_positions_offsets",
                "analyzer": "ik_max_word",
                "search_analyzer": "ik_max_word",
                "include_in_all": "true",
                "boost": 8
            }
        }
    }
}'
			
			

22.7.5.1. 测试分词效果

				
curl -XPOST http://localhost:9200/information/news/ -d'
{"title": "越南胡志明游记·教堂·管风琴的天籁之音","content":"这是我平生第一次去教堂,也是第一次完整的参加宗教仪式。当我驻足教堂外的时候,耳边传来天籁之音,是管风琴,确切的说是电子风琴。真正的管风琴造价昂贵,管风琴通常需要根据教堂尺寸定制,无法量产。我记得中国只有4座管风琴,深圳音乐厅有一座。"}
'
curl -XPOST http://localhost:9200/information/news/ -d'
{"title": "越南胡志明游记·信仰·法事","content":"佛经的形成过程是与佛教的发展相始终的,按照佛教发展的时间顺序,最早形成的是小乘佛教三藏,之后形成的是大乘佛教三藏,最后形成的是密宗三藏。"}
'

curl -XPOST http://localhost:9200/information/news/_search  -d'
{
    "query" : { "term" : { "content" : "佛经" }},
    "highlight" : {
        "pre_tags" : ["<strong>", "<strong>"],
        "post_tags" : ["</strong>", "</strong>"],
        "fields" : {
            "content" : {}
        }
    }
}'		

curl -XPOST http://localhost:9200/information/news/_search  -d'
{
    "query" : { "term" : { "content" : "中国" }},
    "highlight" : {
        "pre_tags" : ["<b>", "<i>"],
        "post_tags" : ["</b>", "</i>"],
        "fields" : {
            "content" : {}
        }
    }
}'