Kouhei Sutou
null+****@clear*****
Fri Apr 6 15:55:03 JST 2018
Kouhei Sutou 2018-04-06 15:55:03 +0900 (Fri, 06 Apr 2018) New Revision: 294e0a4f5321cefc31be68d99f5c4a8f70304f7e https://github.com/groonga/groonga/commit/294e0a4f5321cefc31be68d99f5c4a8f70304f7e Message: TokenNgram: add "remove_blank" option Added files: test/command/suite/tokenizers/ngram/remove_blank.expected test/command/suite/tokenizers/ngram/remove_blank.test Modified files: lib/tokenizers.c Modified: lib/tokenizers.c (+8 -1) =================================================================== --- lib/tokenizers.c 2018-04-06 15:30:48 +0900 (998b3ffca) +++ lib/tokenizers.c 2018-04-06 15:55:03 +0900 (405ffba5a) @@ -246,6 +246,7 @@ typedef struct { grn_bool uni_digit; grn_bool uni_symbol; grn_bool ignore_blank; + grn_bool remove_blank; grn_bool loose_symbol; } grn_ngram_options; @@ -278,6 +279,7 @@ ngram_options_init(grn_ngram_options *options, uint8_t unit) options->uni_digit = GRN_TRUE; options->uni_symbol = GRN_TRUE; options->ignore_blank = GRN_FALSE; + options->remove_blank = grn_ngram_tokenizer_remove_blank_enable; options->loose_symbol = GRN_FALSE; } @@ -359,7 +361,7 @@ ngram_init_raw(grn_ctx *ctx, unsigned int normalized_length_in_bytes; grn_ngram_tokenizer *tokenizer; - if (!grn_ngram_tokenizer_remove_blank_enable) { + if (!options->remove_blank) { normalize_flags &= ~GRN_STRING_REMOVE_BLANK; } query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags); @@ -528,6 +530,11 @@ ngram_open_options(grn_ctx *ctx, raw_options, i, options->unit); + } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "remove_blank")) { + options->remove_blank = grn_vector_get_element_bool(ctx, + raw_options, + i, + options->remove_blank); } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "loose_symbol")) { options->loose_symbol = grn_vector_get_element_bool(ctx, raw_options, Added: test/command/suite/tokenizers/ngram/remove_blank.expected (+45 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/ngram/remove_blank.expected 2018-04-06 15:55:03 +0900 (237816cf9) @@ -0,0 +1,45 @@ +tokenize 'TokenNgram("remove_blank", false)' " a b c " NormalizerAuto +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": " ", + "position": 0, + "force_prefix": false + }, + { + "value": "a", + "position": 1, + "force_prefix": false + }, + { + "value": " ", + "position": 2, + "force_prefix": false + }, + { + "value": "b", + "position": 3, + "force_prefix": false + }, + { + "value": " ", + "position": 4, + "force_prefix": false + }, + { + "value": "c", + "position": 5, + "force_prefix": false + }, + { + "value": " ", + "position": 6, + "force_prefix": true + } + ] +] Added: test/command/suite/tokenizers/ngram/remove_blank.test (+4 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/ngram/remove_blank.test 2018-04-06 15:55:03 +0900 (5a6ee5a87) @@ -0,0 +1,4 @@ +tokenize \ + 'TokenNgram("remove_blank", false)' \ + " a b c " \ + NormalizerAuto -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180406/14addf0d/attachment-0001.htm