[Groonga-commit] groonga/groonga [master] suggest: don't learn duplicated terms for suggest

アーカイブの一覧に戻る

null+****@clear***** null+****@clear*****
2012年 3月 12日 (月) 15:36:07 JST


Kouhei Sutou	2012-03-12 15:36:07 +0900 (Mon, 12 Mar 2012)

  New Revision: 8a333f32a5767edc18d5d3c86682071941bbdfbb

  Log:
    suggest: don't learn duplicated terms for suggest

  Added files:
    test/function/suite/suggest/suggest/learn-duplicated.expected
    test/function/suite/suggest/suggest/learn-duplicated.test
  Modified files:
    plugins/suggest/suggest.c

  Modified: plugins/suggest/suggest.c (+15 -1)
===================================================================
--- plugins/suggest/suggest.c    2012-03-12 15:24:02 +0900 (8f2fb05)
+++ plugins/suggest/suggest.c    2012-03-12 15:36:07 +0900 (7147ed5)
@@ -787,6 +787,7 @@ learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner)
     grn_id tid;
     grn_obj *pre_item = &(learner->pre_item);
     grn_obj *post_item = learner->post_item;
+    grn_hash *token_ids = NULL;
     while ((tid = grn_token_next(ctx, token)) && tid != learner->post_item_id) {
       uint64_t key;
       int added;
@@ -801,7 +802,20 @@ learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner)
 	grn_obj_set_value(ctx, learner->pairs_post, pair_id,
                           post_item, GRN_OBJ_SET);
       }
-      learner_increment(ctx, learner, learner->pairs_freq2, pair_id);
+      if (!token_ids) {
+        token_ids = grn_hash_create(ctx, NULL, sizeof(grn_id), 0,
+                                    GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY);
+      }
+      if (token_ids) {
+        int token_added;
+        grn_hash_add(ctx, token_ids, &tid, sizeof(grn_id), NULL, &token_added);
+        if (token_added) {
+          learner_increment(ctx, learner, learner->pairs_freq2, pair_id);
+        }
+      }
+    }
+    if (token_ids) {
+      grn_hash_close(ctx, token_ids);
     }
     grn_token_close(ctx, token);
   }

  Added: test/function/suite/suggest/suggest/learn-duplicated.expected (+37 -0) 100644
===================================================================
--- /dev/null
+++ test/function/suite/suggest/suggest/learn-duplicated.expected    2012-03-12 15:36:07 +0900 (d50a54f)
@@ -0,0 +1,37 @@
+load --table event_query --each 'suggest_preparer(_id, type, item, sequence, time, pair_query)'
+[
+{"sequence": "1", "time": 1312950803.86057, "item": "engine engine engine", "type": "submit"},
+{"sequence": "2", "time": 1312950803.96857, "item": "engine engine engine", "type": "submit"}
+]
+[[0,0.0,0.0],2]
+suggest   --table item_query   --column kana   --types suggest   --query engine   --frequency_threshold 0   --conditional_probability_threshold 3
+[[0,0.0,0.0],{"suggest":[[0],[["_key","ShortText"],["_score","Int32"]]]}]
+suggest   --table item_query   --column kana   --types suggest   --query engine   --frequency_threshold 0   --conditional_probability_threshold 1
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "suggest": [
+      [
+        1
+      ],
+      [
+        [
+          "_key",
+          "ShortText"
+        ],
+        [
+          "_score",
+          "Int32"
+        ]
+      ],
+      [
+        "engine engine engine",
+        2
+      ]
+    ]
+  }
+]

  Added: test/function/suite/suggest/suggest/learn-duplicated.test (+25 -0) 100644
===================================================================
--- /dev/null
+++ test/function/suite/suggest/suggest/learn-duplicated.test    2012-03-12 15:36:07 +0900 (3caad5f)
@@ -0,0 +1,25 @@
+# disable-logging
+# suggest-create-dataset query
+# enable-logging
+
+load --table event_query --each 'suggest_preparer(_id, type, item, sequence, time, pair_query)'
+[
+{"sequence": "1", "time": 1312950803.86057, "item": "engine engine engine", "type": "submit"},
+{"sequence": "2", "time": 1312950803.96857, "item": "engine engine engine", "type": "submit"}
+]
+
+suggest \
+  --table item_query \
+  --column kana \
+  --types suggest \
+  --query engine \
+  --frequency_threshold 0 \
+  --conditional_probability_threshold 3
+
+suggest \
+  --table item_query \
+  --column kana \
+  --types suggest \
+  --query engine \
+  --frequency_threshold 0 \
+  --conditional_probability_threshold 1




Groonga-commit メーリングリストの案内
アーカイブの一覧に戻る