[Groonga-commit] groonga/groonga at bb0daaa [master] test: add test for GRN_II_OVERLAP_TOKEN_SKIP_ENABLE

アーカイブの一覧に戻る

Naoya Murakami null+****@clear*****
Fri May 20 16:05:55 JST 2016


Naoya Murakami	2016-04-23 14:12:12 +0900 (Sat, 23 Apr 2016)

  New Revision: bb0daaa4a1ff9596db0a2fcdb5134f2f683ff6b8
  https://github.com/groonga/groonga/commit/bb0daaa4a1ff9596db0a2fcdb5134f2f683ff6b8

  Merged 429e423: Merge pull request #533 from naoa/overlap-token-skip

  Message:
    test: add test for GRN_II_OVERLAP_TOKEN_SKIP_ENABLE

  Added files:
    test/command/suite/select/env/overlap_token_skip/long.expected
    test/command/suite/select/env/overlap_token_skip/long.test
    test/command/suite/select/env/overlap_token_skip/non_overlap.expected
    test/command/suite/select/env/overlap_token_skip/non_overlap.test
    test/command/suite/select/env/overlap_token_skip/one.expected
    test/command/suite/select/env/overlap_token_skip/one.test
    test/command/suite/select/env/overlap_token_skip/short.expected
    test/command/suite/select/env/overlap_token_skip/short.test
    test/command/suite/select/env/overlap_token_skip/skip.expected
    test/command/suite/select/env/overlap_token_skip/skip.test

  Added: test/command/suite/select/env/overlap_token_skip/long.expected (+228 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/env/overlap_token_skip/long.expected    2016-04-23 14:12:12 +0900 (70bef1c)
@@ -0,0 +1,228 @@
+table_create Entries TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Entries body COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigramIgnoreBlankSplitSymbolAlpha   --normalizer NormalizerAuto
+[[0,0.0,0.0],true]
+load --table Entries
+[
+{"body": "This is very very long sentence."}
+]
+[[0,0.0,0.0],1]
+column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body
+[[0,0.0,0.0],true]
+table_tokenize Terms "This is very very long sentence." --index_column index
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "value": "th",
+      "position": 0,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "hi",
+      "position": 1,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "is",
+      "position": 2,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "si",
+      "position": 3,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "is",
+      "position": 4,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "sv",
+      "position": 5,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "ve",
+      "position": 6,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "er",
+      "position": 7,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "ry",
+      "position": 8,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "yv",
+      "position": 9,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "ve",
+      "position": 10,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "er",
+      "position": 11,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "ry",
+      "position": 12,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "yl",
+      "position": 13,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "lo",
+      "position": 14,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "on",
+      "position": 15,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "ng",
+      "position": 16,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "gs",
+      "position": 17,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "se",
+      "position": 18,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "en",
+      "position": 19,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "nt",
+      "position": 20,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "te",
+      "position": 21,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "en",
+      "position": 22,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "nc",
+      "position": 23,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "ce",
+      "position": 24,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "e.",
+      "position": 25,
+      "force_prefix": false,
+      "estimated_size": 1
+    }
+  ]
+]
+log_level --level debug
+[[0,0.0,0.0],true]
+select Entries --filter 'body @ "This is very very long sentence."'
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        1
+      ],
+      [
+        [
+          "_id",
+          "UInt32"
+        ],
+        [
+          "body",
+          "ShortText"
+        ]
+      ],
+      [
+        1,
+        "This is very very long sentence."
+      ]
+    ]
+  ]
+]
+#|d| [ii][overlap_token_skip] tid=19 pos=0 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=7 pos=1 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=16 pos=3 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=17 pos=5 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=5 pos=7 estimated_size=3
+#|d| [ii][overlap_token_skip] tid=22 pos=9 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=5 pos=11 estimated_size=3
+#|d| [ii][overlap_token_skip] tid=21 pos=13 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=9 pos=14 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=13 pos=15 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=11 pos=16 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=15 pos=18 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=12 pos=20 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=18 pos=21 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=10 pos=23 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=3 pos=25 estimated_size=1
+log_level --level notice
+[[0,0.0,0.0],true]

  Added: test/command/suite/select/env/overlap_token_skip/long.test (+23 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/env/overlap_token_skip/long.test    2016-04-23 14:12:12 +0900 (17d3845)
@@ -0,0 +1,23 @@
+#@omit "need enable `GRN_II_OVERLAP_TOKEN_SKIP_ENABLE` environment variable."
+
+table_create Entries TABLE_NO_KEY
+column_create Entries body COLUMN_SCALAR ShortText
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigramIgnoreBlankSplitSymbolAlpha \
+  --normalizer NormalizerAuto
+
+load --table Entries
+[
+{"body": "This is very very long sentence."}
+]
+
+column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body
+
+table_tokenize Terms "This is very very long sentence." --index_column index
+
+log_level --level debug
+#@add-important-log-levels debug
+select Entries --filter 'body @ "This is very very long sentence."'
+#@remove-important-log-levels debug
+log_level --level notice

  Added: test/command/suite/select/env/overlap_token_skip/non_overlap.expected (+43 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/env/overlap_token_skip/non_overlap.expected    2016-04-23 14:12:12 +0900 (1871a8d)
@@ -0,0 +1,43 @@
+table_create Entries TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Entries body COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto
+[[0,0.0,0.0],true]
+load --table Entries
+[
+{"body": "Hong Kong"}
+]
+[[0,0.0,0.0],1]
+column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body
+[[0,0.0,0.0],true]
+table_tokenize Terms "Hong Kong" --index_column index
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "value": "hong",
+      "position": 0,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "kong",
+      "position": 1,
+      "force_prefix": false,
+      "estimated_size": 1
+    }
+  ]
+]
+log_level --level debug
+[[0,0.0,0.0],true]
+select Entries --filter 'body @ "Hong Kong"'
+[[0,0.0,0.0],[[[1],[["_id","UInt32"],["body","ShortText"]],[1,"Hong Kong"]]]]
+#|d| [ii][overlap_token_skip] tid=1 pos=0 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=2 pos=1 estimated_size=1
+log_level --level notice
+[[0,0.0,0.0],true]

  Added: test/command/suite/select/env/overlap_token_skip/non_overlap.test (+23 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/env/overlap_token_skip/non_overlap.test    2016-04-23 14:12:12 +0900 (7ff4a3d)
@@ -0,0 +1,23 @@
+#@omit "need enable `GRN_II_OVERLAP_TOKEN_SKIP_ENABLE` environment variable."
+
+table_create Entries TABLE_NO_KEY
+column_create Entries body COLUMN_SCALAR ShortText
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto
+
+load --table Entries
+[
+{"body": "Hong Kong"}
+]
+
+column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body
+
+table_tokenize Terms "Hong Kong" --index_column index
+
+log_level --level debug
+#@add-important-log-levels debug
+select Entries --filter 'body @ "Hong Kong"'
+#@remove-important-log-levels debug
+log_level --level notice

  Added: test/command/suite/select/env/overlap_token_skip/one.expected (+36 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/env/overlap_token_skip/one.expected    2016-04-23 14:12:12 +0900 (57d0b9b)
@@ -0,0 +1,36 @@
+table_create Entries TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Entries body COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto
+[[0,0.0,0.0],true]
+load --table Entries
+[
+{"body": "HongKong"}
+]
+[[0,0.0,0.0],1]
+column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body
+[[0,0.0,0.0],true]
+table_tokenize Terms "HongKong" --index_column index
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "value": "hongkong",
+      "position": 0,
+      "force_prefix": false,
+      "estimated_size": 1
+    }
+  ]
+]
+log_level --level debug
+[[0,0.0,0.0],true]
+select Entries --filter 'body @ "HongKong"'
+[[0,0.0,0.0],[[[1],[["_id","UInt32"],["body","ShortText"]],[1,"HongKong"]]]]
+#|d| [ii][overlap_token_skip] tid=1 pos=0 estimated_size=1
+log_level --level notice
+[[0,0.0,0.0],true]

  Added: test/command/suite/select/env/overlap_token_skip/one.test (+23 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/env/overlap_token_skip/one.test    2016-04-23 14:12:12 +0900 (7ed2e3b)
@@ -0,0 +1,23 @@
+#@omit "need enable `GRN_II_OVERLAP_TOKEN_SKIP_ENABLE` environment variable."
+
+table_create Entries TABLE_NO_KEY
+column_create Entries body COLUMN_SCALAR ShortText
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto
+
+load --table Entries
+[
+{"body": "HongKong"}
+]
+
+column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body
+
+table_tokenize Terms "HongKong" --index_column index
+
+log_level --level debug
+#@add-important-log-levels debug
+select Entries --filter 'body @ "HongKong"'
+#@remove-important-log-levels debug
+log_level --level notice

  Added: test/command/suite/select/env/overlap_token_skip/short.expected (+76 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/env/overlap_token_skip/short.expected    2016-04-23 14:12:12 +0900 (043a5ff)
@@ -0,0 +1,76 @@
+table_create Entries TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Entries body COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigramSplitSymbolAlpha   --normalizer NormalizerAuto
+[[0,0.0,0.0],true]
+load --table Entries
+[
+{"body": "HongKong"}
+]
+[[0,0.0,0.0],1]
+column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body
+[[0,0.0,0.0],true]
+table_tokenize Terms "HongKong" --index_column index
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "value": "ho",
+      "position": 0,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "on",
+      "position": 1,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "ng",
+      "position": 2,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "gk",
+      "position": 3,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "ko",
+      "position": 4,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "on",
+      "position": 5,
+      "force_prefix": false,
+      "estimated_size": 3
+    },
+    {
+      "value": "ng",
+      "position": 6,
+      "force_prefix": false,
+      "estimated_size": 3
+    }
+  ]
+]
+log_level --level debug
+[[0,0.0,0.0],true]
+select Entries --filter 'body @ "HongKong"'
+[[0,0.0,0.0],[[[1],[["_id","UInt32"],["body","ShortText"]],[1,"HongKong"]]]]
+#|d| [ii][overlap_token_skip] tid=3 pos=0 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=6 pos=1 estimated_size=3
+#|d| [ii][overlap_token_skip] tid=2 pos=3 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=4 pos=4 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=5 pos=6 estimated_size=3
+log_level --level notice
+[[0,0.0,0.0],true]

  Added: test/command/suite/select/env/overlap_token_skip/short.test (+23 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/env/overlap_token_skip/short.test    2016-04-23 14:12:12 +0900 (08842f2)
@@ -0,0 +1,23 @@
+#@omit "need enable `GRN_II_OVERLAP_TOKEN_SKIP_ENABLE` environment variable."
+
+table_create Entries TABLE_NO_KEY
+column_create Entries body COLUMN_SCALAR ShortText
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigramSplitSymbolAlpha \
+  --normalizer NormalizerAuto
+
+load --table Entries
+[
+{"body": "HongKong"}
+]
+
+column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body
+
+table_tokenize Terms "HongKong" --index_column index
+
+log_level --level debug
+#@add-important-log-levels debug
+select Entries --filter 'body @ "HongKong"'
+#@remove-important-log-levels debug
+log_level --level notice

  Added: test/command/suite/select/env/overlap_token_skip/skip.expected (+80 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/env/overlap_token_skip/skip.expected    2016-04-23 14:12:12 +0900 (4ea9d8b)
@@ -0,0 +1,80 @@
+plugin_register token_filters/stop_word
+[[0,0.0,0.0],true]
+table_create Entries TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Entries body COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto   --token_filters TokenFilterStopWord
+[[0,0.0,0.0],true]
+load --table Entries
+[
+{"body": "This is a pen"}
+]
+[[0,0.0,0.0],1]
+column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body
+[[0,0.0,0.0],true]
+column_create Terms is_stop_word COLUMN_SCALAR Bool
+[[0,0.0,0.0],true]
+load --table Terms
+[
+{"_key": "is", "is_stop_word": true},
+{"_key": "a", "is_stop_word": true}
+]
+[[0,0.0,0.0],2]
+table_tokenize Terms "This is a pen" --index_column index
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "value": "this",
+      "position": 0,
+      "force_prefix": false,
+      "estimated_size": 1
+    },
+    {
+      "value": "pen",
+      "position": 3,
+      "force_prefix": false,
+      "estimated_size": 1
+    }
+  ]
+]
+log_level --level debug
+[[0,0.0,0.0],true]
+select Entries --filter 'body @ "This is a pen"'
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        1
+      ],
+      [
+        [
+          "_id",
+          "UInt32"
+        ],
+        [
+          "body",
+          "ShortText"
+        ]
+      ],
+      [
+        1,
+        "This is a pen"
+      ]
+    ]
+  ]
+]
+#|d| [ii][overlap_token_skip] tid=4 pos=0 estimated_size=1
+#|d| [ii][overlap_token_skip] tid=3 pos=3 estimated_size=1
+log_level --level notice
+[[0,0.0,0.0],true]

  Added: test/command/suite/select/env/overlap_token_skip/skip.test (+32 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/env/overlap_token_skip/skip.test    2016-04-23 14:12:12 +0900 (67e31f7)
@@ -0,0 +1,32 @@
+#@omit "need enable `GRN_II_OVERLAP_TOKEN_SKIP_ENABLE` environment variable."
+plugin_register token_filters/stop_word
+
+table_create Entries TABLE_NO_KEY
+column_create Entries body COLUMN_SCALAR ShortText
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto \
+  --token_filters TokenFilterStopWord
+
+load --table Entries
+[
+{"body": "This is a pen"}
+]
+
+column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body
+column_create Terms is_stop_word COLUMN_SCALAR Bool
+
+load --table Terms
+[
+{"_key": "is", "is_stop_word": true},
+{"_key": "a", "is_stop_word": true}
+]
+
+table_tokenize Terms "This is a pen" --index_column index
+
+log_level --level debug
+#@add-important-log-levels debug
+select Entries --filter 'body @ "This is a pen"'
+#@remove-important-log-levels debug
+log_level --level notice
-------------- next part --------------
HTML����������������������������...
ダウンロード 



More information about the Groonga-commit mailing list
アーカイブの一覧に戻る