null+****@clear*****
null+****@clear*****
2012年 3月 12日 (月) 15:36:07 JST
Kouhei Sutou 2012-03-12 15:36:07 +0900 (Mon, 12 Mar 2012) New Revision: 8a333f32a5767edc18d5d3c86682071941bbdfbb Log: suggest: don't learn duplicated terms for suggest Added files: test/function/suite/suggest/suggest/learn-duplicated.expected test/function/suite/suggest/suggest/learn-duplicated.test Modified files: plugins/suggest/suggest.c Modified: plugins/suggest/suggest.c (+15 -1) =================================================================== --- plugins/suggest/suggest.c 2012-03-12 15:24:02 +0900 (8f2fb05) +++ plugins/suggest/suggest.c 2012-03-12 15:36:07 +0900 (7147ed5) @@ -787,6 +787,7 @@ learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner) grn_id tid; grn_obj *pre_item = &(learner->pre_item); grn_obj *post_item = learner->post_item; + grn_hash *token_ids = NULL; while ((tid = grn_token_next(ctx, token)) && tid != learner->post_item_id) { uint64_t key; int added; @@ -801,7 +802,20 @@ learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner) grn_obj_set_value(ctx, learner->pairs_post, pair_id, post_item, GRN_OBJ_SET); } - learner_increment(ctx, learner, learner->pairs_freq2, pair_id); + if (!token_ids) { + token_ids = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, + GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY); + } + if (token_ids) { + int token_added; + grn_hash_add(ctx, token_ids, &tid, sizeof(grn_id), NULL, &token_added); + if (token_added) { + learner_increment(ctx, learner, learner->pairs_freq2, pair_id); + } + } + } + if (token_ids) { + grn_hash_close(ctx, token_ids); } grn_token_close(ctx, token); } Added: test/function/suite/suggest/suggest/learn-duplicated.expected (+37 -0) 100644 =================================================================== --- /dev/null +++ test/function/suite/suggest/suggest/learn-duplicated.expected 2012-03-12 15:36:07 +0900 (d50a54f) @@ -0,0 +1,37 @@ +load --table event_query --each 'suggest_preparer(_id, type, item, sequence, time, pair_query)' +[ +{"sequence": "1", "time": 1312950803.86057, "item": "engine engine engine", "type": "submit"}, +{"sequence": "2", "time": 1312950803.96857, "item": "engine engine engine", "type": "submit"} +] +[[0,0.0,0.0],2] +suggest --table item_query --column kana --types suggest --query engine --frequency_threshold 0 --conditional_probability_threshold 3 +[[0,0.0,0.0],{"suggest":[[0],[["_key","ShortText"],["_score","Int32"]]]}] +suggest --table item_query --column kana --types suggest --query engine --frequency_threshold 0 --conditional_probability_threshold 1 +[ + [ + 0, + 0.0, + 0.0 + ], + { + "suggest": [ + [ + 1 + ], + [ + [ + "_key", + "ShortText" + ], + [ + "_score", + "Int32" + ] + ], + [ + "engine engine engine", + 2 + ] + ] + } +] Added: test/function/suite/suggest/suggest/learn-duplicated.test (+25 -0) 100644 =================================================================== --- /dev/null +++ test/function/suite/suggest/suggest/learn-duplicated.test 2012-03-12 15:36:07 +0900 (3caad5f) @@ -0,0 +1,25 @@ +# disable-logging +# suggest-create-dataset query +# enable-logging + +load --table event_query --each 'suggest_preparer(_id, type, item, sequence, time, pair_query)' +[ +{"sequence": "1", "time": 1312950803.86057, "item": "engine engine engine", "type": "submit"}, +{"sequence": "2", "time": 1312950803.96857, "item": "engine engine engine", "type": "submit"} +] + +suggest \ + --table item_query \ + --column kana \ + --types suggest \ + --query engine \ + --frequency_threshold 0 \ + --conditional_probability_threshold 3 + +suggest \ + --table item_query \ + --column kana \ + --types suggest \ + --query engine \ + --frequency_threshold 0 \ + --conditional_probability_threshold 1