[Groonga-commit] ranguba/rroonga at a5bc385 [master] Support token filters in Groonga::Schema

アーカイブの一覧に戻る

Kouhei Sutou null+****@clear*****
Thu Nov 6 15:59:53 JST 2014


Kouhei Sutou	2014-11-06 15:59:53 +0900 (Thu, 06 Nov 2014)

  New Revision: a5bc385ca21454a474534f45d3e8c8af9e779131
  https://github.com/ranguba/rroonga/commit/a5bc385ca21454a474534f45d3e8c8af9e779131

  Message:
    Support token filters in Groonga::Schema

  Modified files:
    lib/groonga/schema.rb
    test/test-schema.rb

  Modified: lib/groonga/schema.rb (+36 -0)
===================================================================
--- lib/groonga/schema.rb    2014-11-06 15:45:47 +0900 (b441002)
+++ lib/groonga/schema.rb    2014-11-06 15:59:53 +0900 (c721436)
@@ -252,6 +252,25 @@ module Groonga
       #     {Groonga::IndexColumn} を定義する場合は `"TokenBigram"`
       #     などを指定する必要がある。
       #
+      #   @option options [::Array<String, Groonga::Procedure>, nil]
+      #     :token_filters (nil) The token filters to be used
+      #     in the table.
+      #
+      #     Here is an example to set two token filters.
+      #
+      #     ```ruby
+      #     Groonga::Schema.define do |schema|
+      #       schema.create_table("Terms",
+      #                           # ...
+      #                           :token_filters => [
+      #                             "TokenFilterStem",
+      #                             "TokenFilterStopWord",
+      #                           ]) do |table|
+      #         # ...
+      #       end
+      #     end
+      #     ```
+      #
       #   @option options :key_normalize (false) Keys are normalized
       #     if this value is `true`.
       #
@@ -1172,6 +1191,7 @@ module Groonga
                                :type, :path, :persistent,
                                :key_type, :value_type, :sub_records,
                                :default_tokenizer,
+                               :token_filters,
                                :key_normalize, :key_with_sis,
                                :named_path,
                                :normalizer]
@@ -1213,10 +1233,17 @@ module Groonga
           :context => context,
           :sub_records => @options[:sub_records],
         }
+        token_filters = @options[:token_filters]
+        if token_filters
+          token_filters = token_filters.collect do |token_filter|
+            normalize_type(token_filter)
+          end
+        end
         key_support_table_common = {
           :key_type => normalize_key_type(@options[:key_type] || "ShortText"),
           :key_normalize => @options[:key_normalize],
           :default_tokenizer => normalize_type(@options[:default_tokenizer]),
+          :token_filters => token_filters,
           :normalizer => normalize_type(@options[:normalizer]),
         }
 
@@ -1292,13 +1319,22 @@ module Groonga
         when Groonga::Hash, Groonga::PatriciaTrie, Groonga::DoubleArrayTrie
           key_type = normalize_key_type(options[:key_type])
           return false unless table.domain == resolve_name(key_type)
+
           default_tokenizer = normalize_type(options[:default_tokenizer])
           default_tokenizer = resolve_name(default_tokenizer)
           return false unless table.default_tokenizer == default_tokenizer
+
+          token_filters = options[:token_filters] || []
+          token_filters = token_filters.collect do |token_filter|
+            resolve_name(normalize_type(token_filter))
+          end
+          return false unless table.token_filters == token_filters
+
           normalizer = normalize_type(options[:normalizer])
           normalizer ||= default_normalizer_name if options[:key_normalize]
           normalizer = resolve_name(normalizer)
           return false unless table.normalizer == normalizer
+
           if table.is_a?(Groonga::PatriciaTrie)
             key_with_sis = options[:key_with_sis]
             key_with_sis = false if key_with_sis.nil?

  Modified: test/test-schema.rb (+10 -3)
===================================================================
--- test/test-schema.rb    2014-11-06 15:45:47 +0900 (c2b6ecf)
+++ test/test-schema.rb    2014-11-06 15:59:53 +0900 (de9d40c)
@@ -91,14 +91,17 @@ class SchemaTest < Test::Unit::TestCase
     end
 
     def test_full_option
+      context.register_plugin("token_filters/stop_word")
       path = @tmp_dir + "hash.groonga"
       tokenizer = context["TokenTrigram"]
+      token_filter = context["TokenFilterStopWord"]
       Groonga::Schema.create_table("Posts",
                                    :type => :hash,
                                    :key_type => "integer",
                                    :path => path.to_s,
                                    :value_type => "UInt32",
                                    :default_tokenizer => tokenizer,
+                                   :token_filters => [token_filter],
                                    :named_path => true) do |table|
       end
       table = context["Posts"]
@@ -112,7 +115,7 @@ class SchemaTest < Test::Unit::TestCase
                    "size: <0>, " +
                    "encoding: <#{Groonga::Encoding.default.inspect}>, " +
                    "default_tokenizer: <#{tokenizer.name}>, " +
-                   "token_filters: [], " +
+                   "token_filters: [<#{token_filter.name}>], " +
                    "normalizer: (nil)>",
                    table.inspect)
     end
@@ -150,6 +153,7 @@ class SchemaTest < Test::Unit::TestCase
     end
 
     def test_full_option
+      context.register_plugin("token_filters/stop_word")
       path = @tmp_dir + "patricia-trie.groonga"
       Groonga::Schema.create_table("Posts",
                                    :type => :patricia_trie,
@@ -157,6 +161,7 @@ class SchemaTest < Test::Unit::TestCase
                                    :path => path.to_s,
                                    :value_type => "Float",
                                    :default_tokenizer => "TokenBigram",
+                                   :token_filters => ["TokenFilterStopWord"],
                                    :key_with_sis => true,
                                    :named_path => true,
                                    :normalizer => "NormalizerAuto") do |table|
@@ -172,7 +177,7 @@ class SchemaTest < Test::Unit::TestCase
                    "size: <0>, " +
                    "encoding: <#{Groonga::Encoding.default.inspect}>, " +
                    "default_tokenizer: <TokenBigram>, " +
-                   "token_filters: [], " +
+                   "token_filters: [<TokenFilterStopWord>], " +
                    "normalizer: <NormalizerAuto>>",
                    table.inspect)
     end
@@ -211,6 +216,7 @@ class SchemaTest < Test::Unit::TestCase
     end
 
     def test_full_option
+      context.register_plugin("token_filters/stop_word")
       path = @tmp_dir + "double-array-trie.groonga"
       Groonga::Schema.create_table("Posts",
                                    :type => :double_array_trie,
@@ -218,6 +224,7 @@ class SchemaTest < Test::Unit::TestCase
                                    :path => path.to_s,
                                    :value_type => "Float",
                                    :default_tokenizer => "TokenBigram",
+                                   :token_filters => ["TokenFilterStopWord"],
                                    :named_path => true,
                                    :normalizer => "NormalizerAuto") do |table|
       end
@@ -232,7 +239,7 @@ class SchemaTest < Test::Unit::TestCase
                    "size: <0>, " +
                    "encoding: <#{Groonga::Encoding.default.inspect}>, " +
                    "default_tokenizer: <TokenBigram>, " +
-                   "token_filters: [], " +
+                   "token_filters: [<TokenFilterStopWord>], " +
                    "normalizer: <NormalizerAuto>>",
                    table.inspect)
     end
-------------- next part --------------
HTML����������������������������...
ダウンロード 



More information about the Groonga-commit mailing list
アーカイブの一覧に戻る