Naoya Murakami
null+****@clear*****
Fri May 20 16:05:55 JST 2016
Naoya Murakami 2016-04-23 14:12:12 +0900 (Sat, 23 Apr 2016) New Revision: bb0daaa4a1ff9596db0a2fcdb5134f2f683ff6b8 https://github.com/groonga/groonga/commit/bb0daaa4a1ff9596db0a2fcdb5134f2f683ff6b8 Merged 429e423: Merge pull request #533 from naoa/overlap-token-skip Message: test: add test for GRN_II_OVERLAP_TOKEN_SKIP_ENABLE Added files: test/command/suite/select/env/overlap_token_skip/long.expected test/command/suite/select/env/overlap_token_skip/long.test test/command/suite/select/env/overlap_token_skip/non_overlap.expected test/command/suite/select/env/overlap_token_skip/non_overlap.test test/command/suite/select/env/overlap_token_skip/one.expected test/command/suite/select/env/overlap_token_skip/one.test test/command/suite/select/env/overlap_token_skip/short.expected test/command/suite/select/env/overlap_token_skip/short.test test/command/suite/select/env/overlap_token_skip/skip.expected test/command/suite/select/env/overlap_token_skip/skip.test Added: test/command/suite/select/env/overlap_token_skip/long.expected (+228 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/env/overlap_token_skip/long.expected 2016-04-23 14:12:12 +0900 (70bef1c) @@ -0,0 +1,228 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigramIgnoreBlankSplitSymbolAlpha --normalizer NormalizerAuto +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "This is very very long sentence."} +] +[[0,0.0,0.0],1] +column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body +[[0,0.0,0.0],true] +table_tokenize Terms "This is very very long sentence." --index_column index +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "th", + "position": 0, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "hi", + "position": 1, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "is", + "position": 2, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "si", + "position": 3, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "is", + "position": 4, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "sv", + "position": 5, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "ve", + "position": 6, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "er", + "position": 7, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "ry", + "position": 8, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "yv", + "position": 9, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "ve", + "position": 10, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "er", + "position": 11, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "ry", + "position": 12, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "yl", + "position": 13, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "lo", + "position": 14, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "on", + "position": 15, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "ng", + "position": 16, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "gs", + "position": 17, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "se", + "position": 18, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "en", + "position": 19, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "nt", + "position": 20, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "te", + "position": 21, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "en", + "position": 22, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "nc", + "position": 23, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "ce", + "position": 24, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "e.", + "position": 25, + "force_prefix": false, + "estimated_size": 1 + } + ] +] +log_level --level debug +[[0,0.0,0.0],true] +select Entries --filter 'body @ "This is very very long sentence."' +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "_id", + "UInt32" + ], + [ + "body", + "ShortText" + ] + ], + [ + 1, + "This is very very long sentence." + ] + ] + ] +] +#|d| [ii][overlap_token_skip] tid=19 pos=0 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=7 pos=1 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=16 pos=3 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=17 pos=5 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=5 pos=7 estimated_size=3 +#|d| [ii][overlap_token_skip] tid=22 pos=9 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=5 pos=11 estimated_size=3 +#|d| [ii][overlap_token_skip] tid=21 pos=13 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=9 pos=14 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=13 pos=15 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=11 pos=16 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=15 pos=18 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=12 pos=20 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=18 pos=21 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=10 pos=23 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=3 pos=25 estimated_size=1 +log_level --level notice +[[0,0.0,0.0],true] Added: test/command/suite/select/env/overlap_token_skip/long.test (+23 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/env/overlap_token_skip/long.test 2016-04-23 14:12:12 +0900 (17d3845) @@ -0,0 +1,23 @@ +#@omit "need enable `GRN_II_OVERLAP_TOKEN_SKIP_ENABLE` environment variable." + +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigramIgnoreBlankSplitSymbolAlpha \ + --normalizer NormalizerAuto + +load --table Entries +[ +{"body": "This is very very long sentence."} +] + +column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body + +table_tokenize Terms "This is very very long sentence." --index_column index + +log_level --level debug +#@add-important-log-levels debug +select Entries --filter 'body @ "This is very very long sentence."' +#@remove-important-log-levels debug +log_level --level notice Added: test/command/suite/select/env/overlap_token_skip/non_overlap.expected (+43 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/env/overlap_token_skip/non_overlap.expected 2016-04-23 14:12:12 +0900 (1871a8d) @@ -0,0 +1,43 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "Hong Kong"} +] +[[0,0.0,0.0],1] +column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body +[[0,0.0,0.0],true] +table_tokenize Terms "Hong Kong" --index_column index +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "hong", + "position": 0, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "kong", + "position": 1, + "force_prefix": false, + "estimated_size": 1 + } + ] +] +log_level --level debug +[[0,0.0,0.0],true] +select Entries --filter 'body @ "Hong Kong"' +[[0,0.0,0.0],[[[1],[["_id","UInt32"],["body","ShortText"]],[1,"Hong Kong"]]]] +#|d| [ii][overlap_token_skip] tid=1 pos=0 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=2 pos=1 estimated_size=1 +log_level --level notice +[[0,0.0,0.0],true] Added: test/command/suite/select/env/overlap_token_skip/non_overlap.test (+23 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/env/overlap_token_skip/non_overlap.test 2016-04-23 14:12:12 +0900 (7ff4a3d) @@ -0,0 +1,23 @@ +#@omit "need enable `GRN_II_OVERLAP_TOKEN_SKIP_ENABLE` environment variable." + +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigram \ + --normalizer NormalizerAuto + +load --table Entries +[ +{"body": "Hong Kong"} +] + +column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body + +table_tokenize Terms "Hong Kong" --index_column index + +log_level --level debug +#@add-important-log-levels debug +select Entries --filter 'body @ "Hong Kong"' +#@remove-important-log-levels debug +log_level --level notice Added: test/command/suite/select/env/overlap_token_skip/one.expected (+36 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/env/overlap_token_skip/one.expected 2016-04-23 14:12:12 +0900 (57d0b9b) @@ -0,0 +1,36 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "HongKong"} +] +[[0,0.0,0.0],1] +column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body +[[0,0.0,0.0],true] +table_tokenize Terms "HongKong" --index_column index +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "hongkong", + "position": 0, + "force_prefix": false, + "estimated_size": 1 + } + ] +] +log_level --level debug +[[0,0.0,0.0],true] +select Entries --filter 'body @ "HongKong"' +[[0,0.0,0.0],[[[1],[["_id","UInt32"],["body","ShortText"]],[1,"HongKong"]]]] +#|d| [ii][overlap_token_skip] tid=1 pos=0 estimated_size=1 +log_level --level notice +[[0,0.0,0.0],true] Added: test/command/suite/select/env/overlap_token_skip/one.test (+23 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/env/overlap_token_skip/one.test 2016-04-23 14:12:12 +0900 (7ed2e3b) @@ -0,0 +1,23 @@ +#@omit "need enable `GRN_II_OVERLAP_TOKEN_SKIP_ENABLE` environment variable." + +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigram \ + --normalizer NormalizerAuto + +load --table Entries +[ +{"body": "HongKong"} +] + +column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body + +table_tokenize Terms "HongKong" --index_column index + +log_level --level debug +#@add-important-log-levels debug +select Entries --filter 'body @ "HongKong"' +#@remove-important-log-levels debug +log_level --level notice Added: test/command/suite/select/env/overlap_token_skip/short.expected (+76 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/env/overlap_token_skip/short.expected 2016-04-23 14:12:12 +0900 (043a5ff) @@ -0,0 +1,76 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigramSplitSymbolAlpha --normalizer NormalizerAuto +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "HongKong"} +] +[[0,0.0,0.0],1] +column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body +[[0,0.0,0.0],true] +table_tokenize Terms "HongKong" --index_column index +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "ho", + "position": 0, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "on", + "position": 1, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "ng", + "position": 2, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "gk", + "position": 3, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "ko", + "position": 4, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "on", + "position": 5, + "force_prefix": false, + "estimated_size": 3 + }, + { + "value": "ng", + "position": 6, + "force_prefix": false, + "estimated_size": 3 + } + ] +] +log_level --level debug +[[0,0.0,0.0],true] +select Entries --filter 'body @ "HongKong"' +[[0,0.0,0.0],[[[1],[["_id","UInt32"],["body","ShortText"]],[1,"HongKong"]]]] +#|d| [ii][overlap_token_skip] tid=3 pos=0 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=6 pos=1 estimated_size=3 +#|d| [ii][overlap_token_skip] tid=2 pos=3 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=4 pos=4 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=5 pos=6 estimated_size=3 +log_level --level notice +[[0,0.0,0.0],true] Added: test/command/suite/select/env/overlap_token_skip/short.test (+23 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/env/overlap_token_skip/short.test 2016-04-23 14:12:12 +0900 (08842f2) @@ -0,0 +1,23 @@ +#@omit "need enable `GRN_II_OVERLAP_TOKEN_SKIP_ENABLE` environment variable." + +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigramSplitSymbolAlpha \ + --normalizer NormalizerAuto + +load --table Entries +[ +{"body": "HongKong"} +] + +column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body + +table_tokenize Terms "HongKong" --index_column index + +log_level --level debug +#@add-important-log-levels debug +select Entries --filter 'body @ "HongKong"' +#@remove-important-log-levels debug +log_level --level notice Added: test/command/suite/select/env/overlap_token_skip/skip.expected (+80 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/env/overlap_token_skip/skip.expected 2016-04-23 14:12:12 +0900 (4ea9d8b) @@ -0,0 +1,80 @@ +plugin_register token_filters/stop_word +[[0,0.0,0.0],true] +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "This is a pen"} +] +[[0,0.0,0.0],1] +column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body +[[0,0.0,0.0],true] +column_create Terms is_stop_word COLUMN_SCALAR Bool +[[0,0.0,0.0],true] +load --table Terms +[ +{"_key": "is", "is_stop_word": true}, +{"_key": "a", "is_stop_word": true} +] +[[0,0.0,0.0],2] +table_tokenize Terms "This is a pen" --index_column index +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "this", + "position": 0, + "force_prefix": false, + "estimated_size": 1 + }, + { + "value": "pen", + "position": 3, + "force_prefix": false, + "estimated_size": 1 + } + ] +] +log_level --level debug +[[0,0.0,0.0],true] +select Entries --filter 'body @ "This is a pen"' +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "_id", + "UInt32" + ], + [ + "body", + "ShortText" + ] + ], + [ + 1, + "This is a pen" + ] + ] + ] +] +#|d| [ii][overlap_token_skip] tid=4 pos=0 estimated_size=1 +#|d| [ii][overlap_token_skip] tid=3 pos=3 estimated_size=1 +log_level --level notice +[[0,0.0,0.0],true] Added: test/command/suite/select/env/overlap_token_skip/skip.test (+32 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/env/overlap_token_skip/skip.test 2016-04-23 14:12:12 +0900 (67e31f7) @@ -0,0 +1,32 @@ +#@omit "need enable `GRN_II_OVERLAP_TOKEN_SKIP_ENABLE` environment variable." +plugin_register token_filters/stop_word + +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigram \ + --normalizer NormalizerAuto \ + --token_filters TokenFilterStopWord + +load --table Entries +[ +{"body": "This is a pen"} +] + +column_create Terms index COLUMN_INDEX|WITH_POSITION Entries body +column_create Terms is_stop_word COLUMN_SCALAR Bool + +load --table Terms +[ +{"_key": "is", "is_stop_word": true}, +{"_key": "a", "is_stop_word": true} +] + +table_tokenize Terms "This is a pen" --index_column index + +log_level --level debug +#@add-important-log-levels debug +select Entries --filter 'body @ "This is a pen"' +#@remove-important-log-levels debug +log_level --level notice -------------- next part -------------- HTML����������������������������...ダウンロード