naoa
null+****@clear*****
Fri May 20 16:06:03 JST 2016
naoa 2016-04-22 22:52:37 +0900 (Fri, 22 Apr 2016) New Revision: e29b4a409650cd0e1002143b105ef65df341f56e https://github.com/groonga/groonga/commit/e29b4a409650cd0e1002143b105ef65df341f56e Merged 429e423: Merge pull request #533 from naoa/overlap-token-skip Message: ngram tokenizer: add remove blank disable mode It is useful when using overlap token skip mode. Modified files: lib/tokenizers.c Modified: lib/tokenizers.c (+16 -0) =================================================================== --- lib/tokenizers.c 2016-04-22 22:50:11 +0900 (c5f112f) +++ lib/tokenizers.c 2016-04-22 22:52:37 +0900 (87938e6) @@ -237,6 +237,8 @@ delimit_null_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_d /* ngram tokenizer */ +static grn_bool grn_ngram_tokenizer_remove_blank_disable = GRN_FALSE; + typedef struct { grn_tokenizer_token token; grn_tokenizer_query *query; @@ -268,6 +270,9 @@ ngram_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data, ui unsigned int normalized_length_in_bytes; grn_ngram_tokenizer *tokenizer; + if (grn_ngram_tokenizer_remove_blank_disable) { + normalize_flags &= ~GRN_STRING_REMOVE_BLANK; + } query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags); if (!query) { return NULL; @@ -808,6 +813,17 @@ grn_db_init_builtin_tokenizers(grn_ctx *ctx) GRN_TEXT_INIT(&vars[1].value, 0); GRN_UINT32_INIT(&vars[2].value, 0); + { + char grn_ngram_tokenizer_remove_blank_disable_env[GRN_ENV_BUFFER_SIZE]; + + grn_getenv("GRN_NGRAM_TOKENIZER_REMOVE_BLANK_DISABLE", + grn_ngram_tokenizer_remove_blank_disable_env, + GRN_ENV_BUFFER_SIZE); + if (grn_ngram_tokenizer_remove_blank_disable_env[0]) { + grn_ngram_tokenizer_remove_blank_disable = GRN_TRUE; + } + } + obj = DEF_TOKENIZER("TokenDelimit", delimit_init, delimited_next, delimited_fin, vars); if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_DELIMIT) { return GRN_FILE_CORRUPT; } -------------- next part -------------- HTML����������������������������...ダウンロード