Kouhei Sutou
null+****@clear*****
Thu Sep 20 17:15:55 JST 2018
Kouhei Sutou 2018-09-20 17:15:55 +0900 (Thu, 20 Sep 2018) Revision: 95f8fd3600bc16c640808bb30c9dd49cae23ded1 https://github.com/groonga/groonga/commit/95f8fd3600bc16c640808bb30c9dd49cae23ded1 Message: TokenNgram: add unify_digit option TokenNgram("unify_digit", false) == TokenBigramSplitDigit (not exist ;p) Added files: test/command/suite/tokenizers/ngram/unify_digit.expected test/command/suite/tokenizers/ngram/unify_digit.test Modified files: lib/tokenizers.c Modified: lib/tokenizers.c (+6 -0) =================================================================== --- lib/tokenizers.c 2018-09-20 17:13:51 +0900 (473e05d7c) +++ lib/tokenizers.c 2018-09-20 17:15:55 +0900 (e988cff7f) @@ -736,6 +736,12 @@ ngram_open_options(grn_ctx *ctx, raw_options, i, options->unify_alphabet); + } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "unify_digit")) { + options->unify_digit = + grn_vector_get_element_bool(ctx, + raw_options, + i, + options->unify_digit); } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "unify_symbol")) { options->unify_symbol = grn_vector_get_element_bool(ctx, Added: test/command/suite/tokenizers/ngram/unify_digit.expected (+40 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/ngram/unify_digit.expected 2018-09-20 17:15:55 +0900 (02889dc3f) @@ -0,0 +1,40 @@ +tokenize 'TokenNgram("unify_digit", false)' "012345" NormalizerAuto +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "01", + "position": 0, + "force_prefix": false + }, + { + "value": "12", + "position": 1, + "force_prefix": false + }, + { + "value": "23", + "position": 2, + "force_prefix": false + }, + { + "value": "34", + "position": 3, + "force_prefix": false + }, + { + "value": "45", + "position": 4, + "force_prefix": false + }, + { + "value": "5", + "position": 5, + "force_prefix": false + } + ] +] Added: test/command/suite/tokenizers/ngram/unify_digit.test (+4 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/ngram/unify_digit.test 2018-09-20 17:15:55 +0900 (777b6f8d4) @@ -0,0 +1,4 @@ +tokenize \ + 'TokenNgram("unify_digit", false)' \ + "012345" \ + NormalizerAuto -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180920/f25b3b07/attachment-0001.htm