Kouhei Sutou
null+****@clear*****
Tue Mar 4 23:17:18 JST 2014
Kouhei Sutou 2014-03-04 23:17:18 +0900 (Tue, 04 Mar 2014) New Revision: 382ff0751a06ebfaeb2a828a48648d339548b385 https://github.com/groonga/groonga/commit/382ff0751a06ebfaeb2a828a48648d339548b385 Message: Change the default internal weight to 1 from 0 If the default internal weight is 0, "weight 10" produces "score 11". It is strange. The following chunks in ii.c may be wrong: @@ -5705,7 +5705,7 @@ grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii, while (grn_ii_cursor_next(ctx, c)) { pos = c->post; if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg))) { - res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * (pos->tf + pos->weight), op); + res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * pos->tf * pos->weight, op); } } } @@ -5926,6 +5926,7 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_ if (max - min <= max_interval) { if (rep) { pi.pos = min; res_add(ctx, s, &pi, weight, op); } noccur++; + tscore += ti->p->weight; /* FIXME: Is it right? */ if (ti->pos == max + 1) { break; } @@ -5950,14 +5951,14 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_ ... } } } - if (noccur && !rep) { res_add(ctx, s, &pi, (noccur + tscore) * weight, op); } + if (noccur && !rep) { res_add(ctx, s, &pi, (noccur * tscore) * weight, op); } Modified files: lib/db.c lib/expr.c lib/ii.c test/command/suite/select/adjuster/multiple.expected test/command/suite/select/adjuster/no_factor.expected test/command/suite/select/adjuster/not_all_match.expected test/command/suite/select/adjuster/one.expected test/command/suite/select/match_columns/weight/forward_index.expected test/command/suite/select/match_columns/weight/nested_forward_index.expected test/command/suite/select/query/forward_index.expected Modified: lib/db.c (+5 -4) =================================================================== --- lib/db.c 2014-03-03 17:51:19 +0900 (b25bb68) +++ lib/db.c 2014-03-04 23:17:18 +0900 (5f0d428) @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009-2013 Brazil +/* Copyright(C) 2009-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -2807,7 +2807,7 @@ grn_obj_search_accessor(grn_ctx *ctx, grn_obj *obj, grn_obj *query, posting.rid = *record_id; posting.sid = 1; posting.pos = 0; - posting.weight = recinfo->score - 1; + posting.weight = recinfo->score; grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op); }); grn_ii_resolve_sel_and(ctx, (grn_hash *)res, op); @@ -3976,7 +3976,7 @@ grn_vector_decode(grn_ctx *ctx, grn_obj *v, const char *data, uint32_t data_size GRN_B_DEC(l, p); vp->length = l; vp->offset = o; - vp->weight = 0; + vp->weight = 1; vp->domain = 0; o += l; } @@ -9706,7 +9706,8 @@ set_vector(grn_ctx *ctx, grn_obj *column, grn_id id, grn_obj *vector) if (!cast_failed) { grn_vector_add_element(ctx, &buf, GRN_TEXT_VALUE(element), - GRN_TEXT_LEN(element), 0, + GRN_TEXT_LEN(element), + 1, element->header.domain); } if (element == &casted_element) { GRN_OBJ_FIN(ctx, element); } Modified: lib/expr.c (+4 -4) =================================================================== --- lib/expr.c 2014-03-03 17:51:19 +0900 (46baf69) +++ lib/expr.c 2014-03-04 23:17:18 +0900 (a717451) @@ -4322,7 +4322,7 @@ scan_info_build_find_index_column_inverted_index(grn_ctx *ctx, uint32_t offset = 0; grn_obj *index; int sid = 0; - int32_t weight = 0; + int32_t weight = 1; index = ec->value; if (n_rest_codes > 2 && @@ -4968,7 +4968,7 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, grn_ii_posting posting; posting.sid = 1; posting.pos = 0; - posting.weight = 0; + posting.weight = 1; switch (a->action) { case GRN_ACCESSOR_GET_ID : GRN_UINT32_INIT(&dest, 0); @@ -5043,7 +5043,7 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, grn_ii_posting posting; posting.sid = 1; posting.pos = 0; - posting.weight = 0; + posting.weight = 1; switch (a->action) { case GRN_ACCESSOR_GET_ID : /* todo */ @@ -6935,7 +6935,7 @@ grn_column_filter(grn_ctx *ctx, grn_obj *column, uint32_t value_ = grn_atoui(GRN_TEXT_VALUE(value), GRN_BULK_CURR(value), NULL); posting.sid = 1; posting.pos = 0; - posting.weight = 0; + posting.weight = 1; GRN_COLUMN_EACH(ctx, column, id, vp, { if (*vp < value_) { posting.rid = id; Modified: lib/ii.c (+20 -18) =================================================================== --- lib/ii.c 2014-03-03 17:51:19 +0900 (05415cc) +++ lib/ii.c 2014-03-04 23:17:18 +0900 (46236dc) @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009-2012 Brazil +/* Copyright(C) 2009-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -3754,7 +3754,7 @@ grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_h u2.sid = 1; } u2.tf = 1; - u2.weight = 0; + u2.weight = 1; if (u2.rid != u->rid || u2.sid != u->sid) { uint8_t *bs2 = encode_rec(ctx, ii, &u2, &size2, 0); if (!bs2) { @@ -3785,7 +3785,7 @@ grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_h break; } if (!br) { - if (u->tf == 1 && u->weight == 0) { + if (u->tf == 1 && u->weight == 1) { if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { if (u->rid < 0x100000 && u->sid < 0x800) { a[0] = (u->rid << 12) + (u->sid << 1) + 1; @@ -4024,7 +4024,7 @@ grn_ii_cursor_open(grn_ctx *ctx, grn_ii *ii, grn_id tid, c->pb.sid = 1; } c->pb.tf = 1; - c->pb.weight = 0; + c->pb.weight = 1; c->pb.pos = a[1]; } else { uint32_t chunk; @@ -4111,7 +4111,7 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c) if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { c->pc.weight = *c->cwp++; } else { - c->pc.weight = 0; + c->pc.weight = 1; } c->pc.pos = 0; /* @@ -4217,7 +4217,7 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c) if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { GRN_B_DEC(c->pb.weight, c->bp); } else { - c->pb.weight = 0; + c->pb.weight = 1; } c->pb.rest = c->pb.tf; c->pb.pos = 0; @@ -4918,7 +4918,7 @@ grn_uvector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, return GRN_NO_MEMORY_AVAILABLE; } } - if (grn_ii_updspec_add(ctx, *u, j, 0)) { + if (grn_ii_updspec_add(ctx, *u, j, 1)) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!"); return GRN_NO_MEMORY_AVAILABLE; } @@ -4958,7 +4958,7 @@ grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, GRN_OBJ_INIT(&newv, GRN_VECTOR, GRN_OBJ_DO_SHALLOW_COPY, GRN_DB_TEXT); newv.u.v.body = new; new = &newv; - grn_vector_delimit(ctx, new, 0, GRN_ID_NIL); + grn_vector_delimit(ctx, new, 1, GRN_ID_NIL); if (new_ != newvalue) { grn_obj_close(ctx, new_); } } /* fallthru */ @@ -5040,7 +5040,7 @@ grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, GRN_OBJ_INIT(&oldv, GRN_VECTOR, GRN_OBJ_DO_SHALLOW_COPY, GRN_DB_TEXT); oldv.u.v.body = old; old = &oldv; - grn_vector_delimit(ctx, old, 0, GRN_ID_NIL); + grn_vector_delimit(ctx, old, 1, GRN_ID_NIL); if (old_ != oldvalue) { grn_obj_close(ctx, old_); } } /* fallthru */ @@ -5455,7 +5455,7 @@ res_add(grn_ctx *ctx, grn_hash *s, grn_rset_posinfo *pi, uint32_t score, grn_rc grn_ii_posting_add(grn_ctx *ctx, grn_ii_posting *pos, grn_hash *s, grn_operator op) { - res_add(ctx, s, (grn_rset_posinfo *)(pos), (1 + pos->weight), op); + res_add(ctx, s, (grn_rset_posinfo *)(pos), pos->weight, op); return ctx->rc; } @@ -5697,7 +5697,7 @@ grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii, pos = c->post; if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg))) { while (grn_ii_cursor_next_pos(ctx, c)) { - res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * (1 + pos->weight), op); + res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * pos->weight, op); } } } @@ -5705,7 +5705,7 @@ grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii, while (grn_ii_cursor_next(ctx, c)) { pos = c->post; if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg))) { - res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * (pos->tf + pos->weight), op); + res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * pos->tf * pos->weight, op); } } } @@ -5926,6 +5926,7 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_ if (max - min <= max_interval) { if (rep) { pi.pos = min; res_add(ctx, s, &pi, weight, op); } noccur++; + tscore += ti->p->weight; /* FIXME: Is it right? */ if (ti->pos == max + 1) { break; } @@ -5950,14 +5951,15 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_ score = ti->p->weight; count = 1; pos = ti->pos; } if (count == n) { - if (rep) { pi.pos = pos; res_add(ctx, s, &pi, (score + 1) * weight, op); } - tscore += score; + if (rep) { pi.pos = pos; res_add(ctx, s, &pi, score * weight, op); } + tscore += score / count; score = 0; count = 0; pos++; noccur++; } } + tscore /= noccur; } - if (noccur && !rep) { res_add(ctx, s, &pi, (noccur + tscore) * weight, op); } + if (noccur && !rep) { res_add(ctx, s, &pi, (noccur * tscore) * weight, op); } #undef SKIP_OR_BREAK } } @@ -6049,7 +6051,7 @@ grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator op) if ((c = grn_ii_cursor_open(ctx, ii, id, GRN_ID_NIL, GRN_ID_MAX, rep ? ii->n_elements : ii->n_elements - 1, 0))) { while ((pos = grn_ii_cursor_next(ctx, c))) { - res_add(ctx, s, (grn_rset_posinfo *) pos, (1 + pos->weight), op); + res_add(ctx, s, (grn_rset_posinfo *) pos, pos->weight, op); } grn_ii_cursor_close(ctx, c); } @@ -6100,7 +6102,7 @@ grn_ii_cursor_next_all(grn_ctx *ctx, grn_ii_cursor *c) if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { c->pc.weight = *c->cwp++; } else { - c->pc.weight = 0; + c->pc.weight = 1; } c->pc.pos = 0; /* @@ -6198,7 +6200,7 @@ grn_ii_cursor_next_all(grn_ctx *ctx, grn_ii_cursor *c) if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { GRN_B_DEC(c->pb.weight, c->bp); } else { - c->pb.weight = 0; + c->pb.weight = 1; } c->pb.rest = c->pb.tf; c->pb.pos = 0; Modified: test/command/suite/select/adjuster/multiple.expected (+3 -3) =================================================================== --- test/command/suite/select/adjuster/multiple.expected 2014-03-03 17:51:19 +0900 (0f8ce4c) +++ test/command/suite/select/adjuster/multiple.expected 2014-03-04 23:17:18 +0900 (e2268e9) @@ -53,15 +53,15 @@ select Memos --filter true --adjuster 'tags @ "groonga" * 4 + tags @ "mroong ], [ "Groonga is fast", - 405 + 401 ], [ "Mroonga is also fast", - 348 + 341 ], [ "Ruby is an object oriented script language", - 203 + 201 ] ] ] Modified: test/command/suite/select/adjuster/no_factor.expected (+2 -2) =================================================================== --- test/command/suite/select/adjuster/no_factor.expected 2014-03-03 17:51:19 +0900 (afe57c5) +++ test/command/suite/select/adjuster/no_factor.expected 2014-03-04 23:17:18 +0900 (33b46a7) @@ -53,11 +53,11 @@ select Memos --filter true --adjuster 'tags @ "groonga" + tags @ "mroonga"' ], [ "Groonga is fast", - 102 + 101 ], [ "Mroonga is also fast", - 113 + 111 ], [ "Ruby is an object oriented script language", Modified: test/command/suite/select/adjuster/not_all_match.expected (+2 -2) =================================================================== --- test/command/suite/select/adjuster/not_all_match.expected 2014-03-03 17:51:19 +0900 (d2862a4) +++ test/command/suite/select/adjuster/not_all_match.expected 2014-03-04 23:17:18 +0900 (a3f4e64) @@ -56,11 +56,11 @@ select Memos --filter '_id != 1' --adjuster 'tags @ "groonga" * 1' --outpu ], [ "Groonga is fast", - 102 + 101 ], [ "Mroonga is also fast", - 12 + 11 ], [ "Ruby is an object oriented script language", Modified: test/command/suite/select/adjuster/one.expected (+2 -2) =================================================================== --- test/command/suite/select/adjuster/one.expected 2014-03-03 17:51:19 +0900 (fcb0f17) +++ test/command/suite/select/adjuster/one.expected 2014-03-04 23:17:18 +0900 (8350c90) @@ -53,11 +53,11 @@ select Memos --filter true --adjuster 'tags @ "groonga" * 2' --output_colu ], [ "Groonga is fast", - 203 + 201 ], [ "Mroonga is also fast", - 23 + 21 ], [ "Ruby is an object oriented script language", Modified: test/command/suite/select/match_columns/weight/forward_index.expected (+2 -2) =================================================================== --- test/command/suite/select/match_columns/weight/forward_index.expected 2014-03-03 17:51:19 +0900 (062b92a) +++ test/command/suite/select/match_columns/weight/forward_index.expected 2014-03-04 23:17:18 +0900 (06454db) @@ -53,11 +53,11 @@ select Memos --match_columns 'tags * 10' --query groonga --output_columns ], [ "Groonga is fast", - 1010 + 1000 ], [ "Mroonga is also fast", - 110 + 100 ] ] ] Modified: test/command/suite/select/match_columns/weight/nested_forward_index.expected (+3 -3) =================================================================== --- test/command/suite/select/match_columns/weight/nested_forward_index.expected 2014-03-03 17:51:19 +0900 (9d88186) +++ test/command/suite/select/match_columns/weight/nested_forward_index.expected 2014-03-04 23:17:18 +0900 (0e8f281) @@ -97,15 +97,15 @@ select Programmers --match_columns 'products.tags * 10' --query groonga -- ], [ "daijiro", - 1520 + 1500 ], [ "kou", - 1630 + 1600 ], [ "maruyama", - 510 + 500 ] ] ] Modified: test/command/suite/select/query/forward_index.expected (+2 -2) =================================================================== --- test/command/suite/select/query/forward_index.expected 2014-03-03 17:51:19 +0900 (5e79fc4) +++ test/command/suite/select/query/forward_index.expected 2014-03-04 23:17:18 +0900 (9c10f29) @@ -53,11 +53,11 @@ select Memos --match_columns tags --query groonga --output_columns _key,_s ], [ "Groonga is fast", - 101 + 100 ], [ "Mroonga is also fast", - 11 + 10 ] ] ] -------------- next part -------------- HTML����������������������������...ダウンロード