[Groonga-commit] groonga/groonga at e29b4a4 [master] ngram tokenizer: add remove blank disable mode

アーカイブの一覧に戻る

naoa null+****@clear*****
Fri May 20 16:06:03 JST 2016


naoa	2016-04-22 22:52:37 +0900 (Fri, 22 Apr 2016)

  New Revision: e29b4a409650cd0e1002143b105ef65df341f56e
  https://github.com/groonga/groonga/commit/e29b4a409650cd0e1002143b105ef65df341f56e

  Merged 429e423: Merge pull request #533 from naoa/overlap-token-skip

  Message:
    ngram tokenizer: add remove blank disable mode
    
    It is useful when using overlap token skip mode.

  Modified files:
    lib/tokenizers.c

  Modified: lib/tokenizers.c (+16 -0)
===================================================================
--- lib/tokenizers.c    2016-04-22 22:50:11 +0900 (c5f112f)
+++ lib/tokenizers.c    2016-04-22 22:52:37 +0900 (87938e6)
@@ -237,6 +237,8 @@ delimit_null_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_d
 
 /* ngram tokenizer */
 
+static grn_bool grn_ngram_tokenizer_remove_blank_disable = GRN_FALSE;
+
 typedef struct {
   grn_tokenizer_token token;
   grn_tokenizer_query *query;
@@ -268,6 +270,9 @@ ngram_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data, ui
   unsigned int normalized_length_in_bytes;
   grn_ngram_tokenizer *tokenizer;
 
+  if (grn_ngram_tokenizer_remove_blank_disable) {
+    normalize_flags &= ~GRN_STRING_REMOVE_BLANK;
+  }
   query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags);
   if (!query) {
     return NULL;
@@ -808,6 +813,17 @@ grn_db_init_builtin_tokenizers(grn_ctx *ctx)
   GRN_TEXT_INIT(&vars[1].value, 0);
   GRN_UINT32_INIT(&vars[2].value, 0);
 
+  {
+    char grn_ngram_tokenizer_remove_blank_disable_env[GRN_ENV_BUFFER_SIZE];
+
+    grn_getenv("GRN_NGRAM_TOKENIZER_REMOVE_BLANK_DISABLE",
+               grn_ngram_tokenizer_remove_blank_disable_env,
+               GRN_ENV_BUFFER_SIZE);
+    if (grn_ngram_tokenizer_remove_blank_disable_env[0]) {
+      grn_ngram_tokenizer_remove_blank_disable = GRN_TRUE;
+    }
+  }
+
   obj = DEF_TOKENIZER("TokenDelimit",
                       delimit_init, delimited_next, delimited_fin, vars);
   if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_DELIMIT) { return GRN_FILE_CORRUPT; }
-------------- next part --------------
HTML����������������������������...
ダウンロード 



More information about the Groonga-commit mailing list
アーカイブの一覧に戻る