Kouhei Sutou
null+****@clear*****
Thu Nov 6 15:59:53 JST 2014
Kouhei Sutou 2014-11-06 15:59:53 +0900 (Thu, 06 Nov 2014) New Revision: a5bc385ca21454a474534f45d3e8c8af9e779131 https://github.com/ranguba/rroonga/commit/a5bc385ca21454a474534f45d3e8c8af9e779131 Message: Support token filters in Groonga::Schema Modified files: lib/groonga/schema.rb test/test-schema.rb Modified: lib/groonga/schema.rb (+36 -0) =================================================================== --- lib/groonga/schema.rb 2014-11-06 15:45:47 +0900 (b441002) +++ lib/groonga/schema.rb 2014-11-06 15:59:53 +0900 (c721436) @@ -252,6 +252,25 @@ module Groonga # {Groonga::IndexColumn} を定義する場合は `"TokenBigram"` # などを指定する必要がある。 # + # @option options [::Array<String, Groonga::Procedure>, nil] + # :token_filters (nil) The token filters to be used + # in the table. + # + # Here is an example to set two token filters. + # + # ```ruby + # Groonga::Schema.define do |schema| + # schema.create_table("Terms", + # # ... + # :token_filters => [ + # "TokenFilterStem", + # "TokenFilterStopWord", + # ]) do |table| + # # ... + # end + # end + # ``` + # # @option options :key_normalize (false) Keys are normalized # if this value is `true`. # @@ -1172,6 +1191,7 @@ module Groonga :type, :path, :persistent, :key_type, :value_type, :sub_records, :default_tokenizer, + :token_filters, :key_normalize, :key_with_sis, :named_path, :normalizer] @@ -1213,10 +1233,17 @@ module Groonga :context => context, :sub_records => @options[:sub_records], } + token_filters = @options[:token_filters] + if token_filters + token_filters = token_filters.collect do |token_filter| + normalize_type(token_filter) + end + end key_support_table_common = { :key_type => normalize_key_type(@options[:key_type] || "ShortText"), :key_normalize => @options[:key_normalize], :default_tokenizer => normalize_type(@options[:default_tokenizer]), + :token_filters => token_filters, :normalizer => normalize_type(@options[:normalizer]), } @@ -1292,13 +1319,22 @@ module Groonga when Groonga::Hash, Groonga::PatriciaTrie, Groonga::DoubleArrayTrie key_type = normalize_key_type(options[:key_type]) return false unless table.domain == resolve_name(key_type) + default_tokenizer = normalize_type(options[:default_tokenizer]) default_tokenizer = resolve_name(default_tokenizer) return false unless table.default_tokenizer == default_tokenizer + + token_filters = options[:token_filters] || [] + token_filters = token_filters.collect do |token_filter| + resolve_name(normalize_type(token_filter)) + end + return false unless table.token_filters == token_filters + normalizer = normalize_type(options[:normalizer]) normalizer ||= default_normalizer_name if options[:key_normalize] normalizer = resolve_name(normalizer) return false unless table.normalizer == normalizer + if table.is_a?(Groonga::PatriciaTrie) key_with_sis = options[:key_with_sis] key_with_sis = false if key_with_sis.nil? Modified: test/test-schema.rb (+10 -3) =================================================================== --- test/test-schema.rb 2014-11-06 15:45:47 +0900 (c2b6ecf) +++ test/test-schema.rb 2014-11-06 15:59:53 +0900 (de9d40c) @@ -91,14 +91,17 @@ class SchemaTest < Test::Unit::TestCase end def test_full_option + context.register_plugin("token_filters/stop_word") path = @tmp_dir + "hash.groonga" tokenizer = context["TokenTrigram"] + token_filter = context["TokenFilterStopWord"] Groonga::Schema.create_table("Posts", :type => :hash, :key_type => "integer", :path => path.to_s, :value_type => "UInt32", :default_tokenizer => tokenizer, + :token_filters => [token_filter], :named_path => true) do |table| end table = context["Posts"] @@ -112,7 +115,7 @@ class SchemaTest < Test::Unit::TestCase "size: <0>, " + "encoding: <#{Groonga::Encoding.default.inspect}>, " + "default_tokenizer: <#{tokenizer.name}>, " + - "token_filters: [], " + + "token_filters: [<#{token_filter.name}>], " + "normalizer: (nil)>", table.inspect) end @@ -150,6 +153,7 @@ class SchemaTest < Test::Unit::TestCase end def test_full_option + context.register_plugin("token_filters/stop_word") path = @tmp_dir + "patricia-trie.groonga" Groonga::Schema.create_table("Posts", :type => :patricia_trie, @@ -157,6 +161,7 @@ class SchemaTest < Test::Unit::TestCase :path => path.to_s, :value_type => "Float", :default_tokenizer => "TokenBigram", + :token_filters => ["TokenFilterStopWord"], :key_with_sis => true, :named_path => true, :normalizer => "NormalizerAuto") do |table| @@ -172,7 +177,7 @@ class SchemaTest < Test::Unit::TestCase "size: <0>, " + "encoding: <#{Groonga::Encoding.default.inspect}>, " + "default_tokenizer: <TokenBigram>, " + - "token_filters: [], " + + "token_filters: [<TokenFilterStopWord>], " + "normalizer: <NormalizerAuto>>", table.inspect) end @@ -211,6 +216,7 @@ class SchemaTest < Test::Unit::TestCase end def test_full_option + context.register_plugin("token_filters/stop_word") path = @tmp_dir + "double-array-trie.groonga" Groonga::Schema.create_table("Posts", :type => :double_array_trie, @@ -218,6 +224,7 @@ class SchemaTest < Test::Unit::TestCase :path => path.to_s, :value_type => "Float", :default_tokenizer => "TokenBigram", + :token_filters => ["TokenFilterStopWord"], :named_path => true, :normalizer => "NormalizerAuto") do |table| end @@ -232,7 +239,7 @@ class SchemaTest < Test::Unit::TestCase "size: <0>, " + "encoding: <#{Groonga::Encoding.default.inspect}>, " + "default_tokenizer: <TokenBigram>, " + - "token_filters: [], " + + "token_filters: [<TokenFilterStopWord>], " + "normalizer: <NormalizerAuto>>", table.inspect) end -------------- next part -------------- HTML����������������������������... ダウンロード