DRYな備忘録

Don't Repeat Yourself.

Elasticsearchのkuromojiでワロタ

"吾輩は猫である"

curl "http://localhost:9200/my_index/_analyze?pretty=true&field=description&analyzer=kuromoji" -d "吾輩は猫である。"
{
  "tokens" : [ {
    "token" : "吾輩",
    "start_offset" : 0,
    "end_offset" : 2,
    "type" : "word",
    "position" : 1
  }, {
    "token" : "猫",
    "start_offset" : 3,
    "end_offset" : 4,
    "type" : "word",
    "position" : 3
  } ]
}

ただしくは

curl "http://localhost:9200/my_index/_analyze?pretty=true&field=description&tokenizer=kuromoji_tokenizer" -d "吾輩は猫である。"
{
  "tokens" : [ {
    "token" : "吾輩",
    "start_offset" : 0,
    "end_offset" : 2,
    "type" : "word",
    "position" : 1
  }, {
    "token" : "は",
    "start_offset" : 2,
    "end_offset" : 3,
    "type" : "word",
    "position" : 2
  }, {
    "token" : "猫",
    "start_offset" : 3,
    "end_offset" : 4,
    "type" : "word",
    "position" : 3
  }, {
    "token" : "で",
    "start_offset" : 4,
    "end_offset" : 5,
    "type" : "word",
    "position" : 4
  }, {
    "token" : "ある",
    "start_offset" : 5,
    "end_offset" : 7,
    "type" : "word",
    "position" : 5
  } ]
}