[Groonga-commit] groonga/groonga at 294e0a4 [master] TokenNgram: add "remove_blank" option

アーカイブの一覧に戻る

Kouhei Sutou null+****@clear*****
Fri Apr 6 15:55:03 JST 2018


Kouhei Sutou	2018-04-06 15:55:03 +0900 (Fri, 06 Apr 2018)

  New Revision: 294e0a4f5321cefc31be68d99f5c4a8f70304f7e
  https://github.com/groonga/groonga/commit/294e0a4f5321cefc31be68d99f5c4a8f70304f7e

  Message:
    TokenNgram: add "remove_blank" option

  Added files:
    test/command/suite/tokenizers/ngram/remove_blank.expected
    test/command/suite/tokenizers/ngram/remove_blank.test
  Modified files:
    lib/tokenizers.c

  Modified: lib/tokenizers.c (+8 -1)
===================================================================
--- lib/tokenizers.c    2018-04-06 15:30:48 +0900 (998b3ffca)
+++ lib/tokenizers.c    2018-04-06 15:55:03 +0900 (405ffba5a)
@@ -246,6 +246,7 @@ typedef struct {
   grn_bool uni_digit;
   grn_bool uni_symbol;
   grn_bool ignore_blank;
+  grn_bool remove_blank;
   grn_bool loose_symbol;
 } grn_ngram_options;
 
@@ -278,6 +279,7 @@ ngram_options_init(grn_ngram_options *options, uint8_t unit)
   options->uni_digit = GRN_TRUE;
   options->uni_symbol = GRN_TRUE;
   options->ignore_blank = GRN_FALSE;
+  options->remove_blank = grn_ngram_tokenizer_remove_blank_enable;
   options->loose_symbol = GRN_FALSE;
 }
 
@@ -359,7 +361,7 @@ ngram_init_raw(grn_ctx *ctx,
   unsigned int normalized_length_in_bytes;
   grn_ngram_tokenizer *tokenizer;
 
-  if (!grn_ngram_tokenizer_remove_blank_enable) {
+  if (!options->remove_blank) {
     normalize_flags &= ~GRN_STRING_REMOVE_BLANK;
   }
   query = grn_tokenizer_query_open(ctx, nargs, args, normalize_flags);
@@ -528,6 +530,11 @@ ngram_open_options(grn_ctx *ctx,
                                                    raw_options,
                                                    i,
                                                    options->unit);
+    } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "remove_blank")) {
+      options->remove_blank = grn_vector_get_element_bool(ctx,
+                                                          raw_options,
+                                                          i,
+                                                          options->remove_blank);
     } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "loose_symbol")) {
       options->loose_symbol = grn_vector_get_element_bool(ctx,
                                                           raw_options,

  Added: test/command/suite/tokenizers/ngram/remove_blank.expected (+45 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenizers/ngram/remove_blank.expected    2018-04-06 15:55:03 +0900 (237816cf9)
@@ -0,0 +1,45 @@
+tokenize   'TokenNgram("remove_blank", false)'   " a b c "   NormalizerAuto
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "value": " ",
+      "position": 0,
+      "force_prefix": false
+    },
+    {
+      "value": "a",
+      "position": 1,
+      "force_prefix": false
+    },
+    {
+      "value": " ",
+      "position": 2,
+      "force_prefix": false
+    },
+    {
+      "value": "b",
+      "position": 3,
+      "force_prefix": false
+    },
+    {
+      "value": " ",
+      "position": 4,
+      "force_prefix": false
+    },
+    {
+      "value": "c",
+      "position": 5,
+      "force_prefix": false
+    },
+    {
+      "value": " ",
+      "position": 6,
+      "force_prefix": true
+    }
+  ]
+]

  Added: test/command/suite/tokenizers/ngram/remove_blank.test (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenizers/ngram/remove_blank.test    2018-04-06 15:55:03 +0900 (5a6ee5a87)
@@ -0,0 +1,4 @@
+tokenize \
+  'TokenNgram("remove_blank", false)' \
+  " a b c " \
+  NormalizerAuto
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180406/14addf0d/attachment-0001.htm 



More information about the Groonga-commit mailing list
アーカイブの一覧に戻る