Kouhei Sutou
null+****@clear*****
Thu Jun 5 23:34:56 JST 2014
Kouhei Sutou 2014-06-05 23:34:56 +0900 (Thu, 05 Jun 2014) New Revision: 7d11c2f6960f244ed0ed5c0408dd80beed668fa2 https://github.com/groonga/groonga/commit/7d11c2f6960f244ed0ed5c0408dd80beed668fa2 Message: tokenize: add valid tokenizer check Added files: test/command/suite/tokenize/invalid/tokenizer/invalid.expected test/command/suite/tokenize/invalid/tokenizer/invalid.test test/command/suite/tokenize/invalid/tokenizer/nonexistent.expected test/command/suite/tokenize/invalid/tokenizer/nonexistent.test Removed files: test/command/suite/tokenize/invalid/tokenizer/unknown.expected test/command/suite/tokenize/invalid/tokenizer/unknown.test Modified files: lib/proc.c Modified: lib/proc.c (+28 -1) =================================================================== --- lib/proc.c 2014-06-05 23:30:25 +0900 (c4fb1ea) +++ lib/proc.c 2014-06-05 23:34:56 +0900 (54adab5) @@ -2958,6 +2958,20 @@ is_normalizer(grn_ctx *ctx, grn_obj *object) return GRN_TRUE; } +static grn_bool +is_tokenizer(grn_ctx *ctx, grn_obj *object) +{ + if (object->header.type != GRN_PROC) { + return GRN_FALSE; + } + + if (grn_proc_get_type(ctx, object) != GRN_PROC_TOKENIZER) { + return GRN_FALSE; + } + + return GRN_TRUE; +} + static const char * char_type_name(grn_char_type type) { @@ -3190,12 +3204,25 @@ create_lexicon_for_tokenize(grn_ctx *ctx, GRN_TEXT_LEN(tokenizer_name)); if (!tokenizer) { ERR(GRN_INVALID_ARGUMENT, - "[tokenize] unknown tokenizer: <%.*s>", + "[tokenize] nonexistent tokenizer: <%.*s>", (int)GRN_TEXT_LEN(tokenizer_name), GRN_TEXT_VALUE(tokenizer_name)); return NULL; } + if (!is_tokenizer(ctx, tokenizer)) { + grn_obj inspected; + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, tokenizer); + ERR(GRN_INVALID_ARGUMENT, + "[tokenize] not tokenizer: %.*s", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + grn_obj_unlink(ctx, tokenizer); + return NULL; + } + if (GRN_TEXT_LEN(normalizer_name) > 0) { normalizer = grn_ctx_get(ctx, GRN_TEXT_VALUE(normalizer_name), Added: test/command/suite/tokenize/invalid/tokenizer/invalid.expected (+15 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenize/invalid/tokenizer/invalid.expected 2014-06-05 23:34:56 +0900 (23fe362) @@ -0,0 +1,15 @@ +tokenize NormalizerAuto "aBcDe 123" +[ + [ + [ + -22, + 0.0, + 0.0 + ], + "[tokenize] not tokenizer: #<proc:normalizer NormalizerAuto arguments:[$1]>" + ], + [ + + ] +] +#|e| [tokenize] not tokenizer: #<proc:normalizer NormalizerAuto arguments:[$1]> Added: test/command/suite/tokenize/invalid/tokenizer/invalid.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenize/invalid/tokenizer/invalid.test 2014-06-05 23:34:56 +0900 (cbba396) @@ -0,0 +1 @@ +tokenize NormalizerAuto "aBcDe 123" Added: test/command/suite/tokenize/invalid/tokenizer/nonexistent.expected (+3 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenize/invalid/tokenizer/nonexistent.expected 2014-06-05 23:34:56 +0900 (ee16c98) @@ -0,0 +1,3 @@ +tokenize TokenNonexistent "aBcDe 123" +[[[-22,0.0,0.0],"[tokenize] nonexistent tokenizer: <TokenNonexistent>"],[]] +#|e| [tokenize] nonexistent tokenizer: <TokenNonexistent> Added: test/command/suite/tokenize/invalid/tokenizer/nonexistent.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenize/invalid/tokenizer/nonexistent.test 2014-06-05 23:34:56 +0900 (19a692f) @@ -0,0 +1 @@ +tokenize TokenNonexistent "aBcDe 123" Deleted: test/command/suite/tokenize/invalid/tokenizer/unknown.expected (+0 -3) 100644 =================================================================== --- test/command/suite/tokenize/invalid/tokenizer/unknown.expected 2014-06-05 23:30:25 +0900 (edd2634) +++ /dev/null @@ -1,3 +0,0 @@ -tokenize TokenUnknown "aBcDe 123" -[[[-22,0.0,0.0],"[tokenize] unknown tokenizer: <TokenUnknown>"],[]] -#|e| [tokenize] unknown tokenizer: <TokenUnknown> Deleted: test/command/suite/tokenize/invalid/tokenizer/unknown.test (+0 -1) 100644 =================================================================== --- test/command/suite/tokenize/invalid/tokenizer/unknown.test 2014-06-05 23:30:25 +0900 (40dd10c) +++ /dev/null @@ -1 +0,0 @@ -tokenize TokenUnknown "aBcDe 123" -------------- next part -------------- HTML����������������������������... ダウンロード