[Groonga-commit] groonga/groonga at 382ff07 [default-weight-1] Change the default internal weight to 1 from 0

アーカイブの一覧に戻る

Kouhei Sutou null+****@clear*****
Tue Mar 4 23:17:18 JST 2014


Kouhei Sutou	2014-03-04 23:17:18 +0900 (Tue, 04 Mar 2014)

  New Revision: 382ff0751a06ebfaeb2a828a48648d339548b385
  https://github.com/groonga/groonga/commit/382ff0751a06ebfaeb2a828a48648d339548b385

  Message:
    Change the default internal weight to 1 from 0
    
    If the default internal weight is 0, "weight 10" produces "score
    11". It is strange.
    
    The following chunks in ii.c may be wrong:
    
        @@ -5705,7 +5705,7 @@ grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii,
                 while (grn_ii_cursor_next(ctx, c)) {
                   pos = c->post;
                   if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg))) {
        -            res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * (pos->tf + pos->weight), op);
        +            res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * pos->tf * pos->weight, op);
                   }
                 }
               }
    
        @@ -5926,6 +5926,7 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_
                       if (max - min <= max_interval) {
                         if (rep) { pi.pos = min; res_add(ctx, s, &pi, weight, op); }
                         noccur++;
        +                tscore += ti->p->weight; /* FIXME: Is it right? */
                         if (ti->pos == max + 1) {
                           break;
                         }
    
        @@ -5950,14 +5951,14 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_
        ...
                     }
                   }
                 }
        -        if (noccur && !rep) { res_add(ctx, s, &pi, (noccur + tscore) * weight, op); }
        +        if (noccur && !rep) { res_add(ctx, s, &pi, (noccur * tscore) * weight, op); }

  Modified files:
    lib/db.c
    lib/expr.c
    lib/ii.c
    test/command/suite/select/adjuster/multiple.expected
    test/command/suite/select/adjuster/no_factor.expected
    test/command/suite/select/adjuster/not_all_match.expected
    test/command/suite/select/adjuster/one.expected
    test/command/suite/select/match_columns/weight/forward_index.expected
    test/command/suite/select/match_columns/weight/nested_forward_index.expected
    test/command/suite/select/query/forward_index.expected

  Modified: lib/db.c (+5 -4)
===================================================================
--- lib/db.c    2014-03-03 17:51:19 +0900 (b25bb68)
+++ lib/db.c    2014-03-04 23:17:18 +0900 (5f0d428)
@@ -1,5 +1,5 @@
 /* -*- c-basic-offset: 2 -*- */
-/* Copyright(C) 2009-2013 Brazil
+/* Copyright(C) 2009-2014 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -2807,7 +2807,7 @@ grn_obj_search_accessor(grn_ctx *ctx, grn_obj *obj, grn_obj *query,
           posting.rid = *record_id;
           posting.sid = 1;
           posting.pos = 0;
-          posting.weight = recinfo->score - 1;
+          posting.weight = recinfo->score;
           grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op);
         });
         grn_ii_resolve_sel_and(ctx, (grn_hash *)res, op);
@@ -3976,7 +3976,7 @@ grn_vector_decode(grn_ctx *ctx, grn_obj *v, const char *data, uint32_t data_size
       GRN_B_DEC(l, p);
       vp->length = l;
       vp->offset = o;
-      vp->weight = 0;
+      vp->weight = 1;
       vp->domain = 0;
       o += l;
     }
@@ -9706,7 +9706,8 @@ set_vector(grn_ctx *ctx, grn_obj *column, grn_id id, grn_obj *vector)
           if (!cast_failed) {
             grn_vector_add_element(ctx, &buf,
                                    GRN_TEXT_VALUE(element),
-                                   GRN_TEXT_LEN(element), 0,
+                                   GRN_TEXT_LEN(element),
+                                   1,
                                    element->header.domain);
           }
           if (element == &casted_element) { GRN_OBJ_FIN(ctx, element); }

  Modified: lib/expr.c (+4 -4)
===================================================================
--- lib/expr.c    2014-03-03 17:51:19 +0900 (46baf69)
+++ lib/expr.c    2014-03-04 23:17:18 +0900 (a717451)
@@ -4322,7 +4322,7 @@ scan_info_build_find_index_column_inverted_index(grn_ctx *ctx,
   uint32_t offset = 0;
   grn_obj *index;
   int sid = 0;
-  int32_t weight = 0;
+  int32_t weight = 1;
 
   index = ec->value;
   if (n_rest_codes > 2 &&
@@ -4968,7 +4968,7 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
           grn_ii_posting posting;
           posting.sid = 1;
           posting.pos = 0;
-          posting.weight = 0;
+          posting.weight = 1;
           switch (a->action) {
           case GRN_ACCESSOR_GET_ID :
             GRN_UINT32_INIT(&dest, 0);
@@ -5043,7 +5043,7 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
           grn_ii_posting posting;
           posting.sid = 1;
           posting.pos = 0;
-          posting.weight = 0;
+          posting.weight = 1;
           switch (a->action) {
           case GRN_ACCESSOR_GET_ID :
             /* todo */
@@ -6935,7 +6935,7 @@ grn_column_filter(grn_ctx *ctx, grn_obj *column,
   uint32_t value_ = grn_atoui(GRN_TEXT_VALUE(value), GRN_BULK_CURR(value), NULL);
   posting.sid = 1;
   posting.pos = 0;
-  posting.weight = 0;
+  posting.weight = 1;
   GRN_COLUMN_EACH(ctx, column, id, vp, {
     if (*vp < value_) {
       posting.rid = id;

  Modified: lib/ii.c (+20 -18)
===================================================================
--- lib/ii.c    2014-03-03 17:51:19 +0900 (05415cc)
+++ lib/ii.c    2014-03-04 23:17:18 +0900 (46236dc)
@@ -1,5 +1,5 @@
 /* -*- c-basic-offset: 2 -*- */
-/* Copyright(C) 2009-2012 Brazil
+/* Copyright(C) 2009-2014 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -3754,7 +3754,7 @@ grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_h
           u2.sid = 1;
         }
         u2.tf = 1;
-        u2.weight = 0;
+        u2.weight = 1;
         if (u2.rid != u->rid || u2.sid != u->sid) {
           uint8_t *bs2 = encode_rec(ctx, ii, &u2, &size2, 0);
           if (!bs2) {
@@ -3785,7 +3785,7 @@ grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_h
     break;
   }
   if (!br) {
-    if (u->tf == 1 && u->weight == 0) {
+    if (u->tf == 1 && u->weight == 1) {
       if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
         if (u->rid < 0x100000 && u->sid < 0x800) {
           a[0] = (u->rid << 12) + (u->sid << 1) + 1;
@@ -4024,7 +4024,7 @@ grn_ii_cursor_open(grn_ctx *ctx, grn_ii *ii, grn_id tid,
         c->pb.sid = 1;
       }
       c->pb.tf = 1;
-      c->pb.weight = 0;
+      c->pb.weight = 1;
       c->pb.pos = a[1];
     } else {
       uint32_t chunk;
@@ -4111,7 +4111,7 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c)
             if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
               c->pc.weight = *c->cwp++;
             } else {
-              c->pc.weight = 0;
+              c->pc.weight = 1;
             }
             c->pc.pos = 0;
             /*
@@ -4217,7 +4217,7 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c)
           if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
             GRN_B_DEC(c->pb.weight, c->bp);
           } else {
-            c->pb.weight = 0;
+            c->pb.weight = 1;
           }
           c->pb.rest = c->pb.tf;
           c->pb.pos = 0;
@@ -4918,7 +4918,7 @@ grn_uvector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
         return GRN_NO_MEMORY_AVAILABLE;
       }
     }
-    if (grn_ii_updspec_add(ctx, *u, j, 0)) {
+    if (grn_ii_updspec_add(ctx, *u, j, 1)) {
       GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!");
       return GRN_NO_MEMORY_AVAILABLE;
     }
@@ -4958,7 +4958,7 @@ grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
         GRN_OBJ_INIT(&newv, GRN_VECTOR, GRN_OBJ_DO_SHALLOW_COPY, GRN_DB_TEXT);
         newv.u.v.body = new;
         new = &newv;
-        grn_vector_delimit(ctx, new, 0, GRN_ID_NIL);
+        grn_vector_delimit(ctx, new, 1, GRN_ID_NIL);
         if (new_ != newvalue) { grn_obj_close(ctx, new_); }
       }
       /* fallthru */
@@ -5040,7 +5040,7 @@ grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
         GRN_OBJ_INIT(&oldv, GRN_VECTOR, GRN_OBJ_DO_SHALLOW_COPY, GRN_DB_TEXT);
         oldv.u.v.body = old;
         old = &oldv;
-        grn_vector_delimit(ctx, old, 0, GRN_ID_NIL);
+        grn_vector_delimit(ctx, old, 1, GRN_ID_NIL);
         if (old_ != oldvalue) { grn_obj_close(ctx, old_); }
       }
       /* fallthru */
@@ -5455,7 +5455,7 @@ res_add(grn_ctx *ctx, grn_hash *s, grn_rset_posinfo *pi, uint32_t score,
 grn_rc
 grn_ii_posting_add(grn_ctx *ctx, grn_ii_posting *pos, grn_hash *s, grn_operator op)
 {
-  res_add(ctx, s, (grn_rset_posinfo *)(pos), (1 + pos->weight), op);
+  res_add(ctx, s, (grn_rset_posinfo *)(pos), pos->weight, op);
   return ctx->rc;
 }
 
@@ -5697,7 +5697,7 @@ grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii,
           pos = c->post;
           if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg))) {
             while (grn_ii_cursor_next_pos(ctx, c)) {
-              res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * (1 + pos->weight), op);
+              res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * pos->weight, op);
             }
           }
         }
@@ -5705,7 +5705,7 @@ grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii,
         while (grn_ii_cursor_next(ctx, c)) {
           pos = c->post;
           if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg))) {
-            res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * (pos->tf + pos->weight), op);
+            res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * pos->tf * pos->weight, op);
           }
         }
       }
@@ -5926,6 +5926,7 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_
               if (max - min <= max_interval) {
                 if (rep) { pi.pos = min; res_add(ctx, s, &pi, weight, op); }
                 noccur++;
+                tscore += ti->p->weight; /* FIXME: Is it right? */
                 if (ti->pos == max + 1) {
                   break;
                 }
@@ -5950,14 +5951,15 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_
               score = ti->p->weight; count = 1; pos = ti->pos;
             }
             if (count == n) {
-              if (rep) { pi.pos = pos; res_add(ctx, s, &pi, (score + 1) * weight, op); }
-              tscore += score;
+              if (rep) { pi.pos = pos; res_add(ctx, s, &pi, score * weight, op); }
+              tscore += score / count;
               score = 0; count = 0; pos++;
               noccur++;
             }
           }
+          tscore /= noccur;
         }
-        if (noccur && !rep) { res_add(ctx, s, &pi, (noccur + tscore) * weight, op); }
+        if (noccur && !rep) { res_add(ctx, s, &pi, (noccur * tscore) * weight, op); }
 #undef SKIP_OR_BREAK
       }
     }
@@ -6049,7 +6051,7 @@ grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator op)
   if ((c = grn_ii_cursor_open(ctx, ii, id, GRN_ID_NIL, GRN_ID_MAX,
                               rep ? ii->n_elements : ii->n_elements - 1, 0))) {
     while ((pos = grn_ii_cursor_next(ctx, c))) {
-      res_add(ctx, s, (grn_rset_posinfo *) pos, (1 + pos->weight), op);
+      res_add(ctx, s, (grn_rset_posinfo *) pos, pos->weight, op);
     }
     grn_ii_cursor_close(ctx, c);
   }
@@ -6100,7 +6102,7 @@ grn_ii_cursor_next_all(grn_ctx *ctx, grn_ii_cursor *c)
             if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
               c->pc.weight = *c->cwp++;
             } else {
-              c->pc.weight = 0;
+              c->pc.weight = 1;
             }
             c->pc.pos = 0;
             /*
@@ -6198,7 +6200,7 @@ grn_ii_cursor_next_all(grn_ctx *ctx, grn_ii_cursor *c)
           if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
             GRN_B_DEC(c->pb.weight, c->bp);
           } else {
-            c->pb.weight = 0;
+            c->pb.weight = 1;
           }
           c->pb.rest = c->pb.tf;
           c->pb.pos = 0;

  Modified: test/command/suite/select/adjuster/multiple.expected (+3 -3)
===================================================================
--- test/command/suite/select/adjuster/multiple.expected    2014-03-03 17:51:19 +0900 (0f8ce4c)
+++ test/command/suite/select/adjuster/multiple.expected    2014-03-04 23:17:18 +0900 (e2268e9)
@@ -53,15 +53,15 @@ select Memos   --filter true   --adjuster 'tags @ "groonga" * 4 + tags @ "mroong
       ],
       [
         "Groonga is fast",
-        405
+        401
       ],
       [
         "Mroonga is also fast",
-        348
+        341
       ],
       [
         "Ruby is an object oriented script language",
-        203
+        201
       ]
     ]
   ]

  Modified: test/command/suite/select/adjuster/no_factor.expected (+2 -2)
===================================================================
--- test/command/suite/select/adjuster/no_factor.expected    2014-03-03 17:51:19 +0900 (afe57c5)
+++ test/command/suite/select/adjuster/no_factor.expected    2014-03-04 23:17:18 +0900 (33b46a7)
@@ -53,11 +53,11 @@ select Memos   --filter true   --adjuster 'tags @ "groonga" + tags @ "mroonga"'
       ],
       [
         "Groonga is fast",
-        102
+        101
       ],
       [
         "Mroonga is also fast",
-        113
+        111
       ],
       [
         "Ruby is an object oriented script language",

  Modified: test/command/suite/select/adjuster/not_all_match.expected (+2 -2)
===================================================================
--- test/command/suite/select/adjuster/not_all_match.expected    2014-03-03 17:51:19 +0900 (d2862a4)
+++ test/command/suite/select/adjuster/not_all_match.expected    2014-03-04 23:17:18 +0900 (a3f4e64)
@@ -56,11 +56,11 @@ select Memos   --filter '_id != 1'   --adjuster 'tags @ "groonga" * 1'   --outpu
       ],
       [
         "Groonga is fast",
-        102
+        101
       ],
       [
         "Mroonga is also fast",
-        12
+        11
       ],
       [
         "Ruby is an object oriented script language",

  Modified: test/command/suite/select/adjuster/one.expected (+2 -2)
===================================================================
--- test/command/suite/select/adjuster/one.expected    2014-03-03 17:51:19 +0900 (fcb0f17)
+++ test/command/suite/select/adjuster/one.expected    2014-03-04 23:17:18 +0900 (8350c90)
@@ -53,11 +53,11 @@ select Memos   --filter true   --adjuster 'tags @ "groonga" * 2'   --output_colu
       ],
       [
         "Groonga is fast",
-        203
+        201
       ],
       [
         "Mroonga is also fast",
-        23
+        21
       ],
       [
         "Ruby is an object oriented script language",

  Modified: test/command/suite/select/match_columns/weight/forward_index.expected (+2 -2)
===================================================================
--- test/command/suite/select/match_columns/weight/forward_index.expected    2014-03-03 17:51:19 +0900 (062b92a)
+++ test/command/suite/select/match_columns/weight/forward_index.expected    2014-03-04 23:17:18 +0900 (06454db)
@@ -53,11 +53,11 @@ select Memos   --match_columns 'tags * 10'   --query groonga   --output_columns
       ],
       [
         "Groonga is fast",
-        1010
+        1000
       ],
       [
         "Mroonga is also fast",
-        110
+        100
       ]
     ]
   ]

  Modified: test/command/suite/select/match_columns/weight/nested_forward_index.expected (+3 -3)
===================================================================
--- test/command/suite/select/match_columns/weight/nested_forward_index.expected    2014-03-03 17:51:19 +0900 (9d88186)
+++ test/command/suite/select/match_columns/weight/nested_forward_index.expected    2014-03-04 23:17:18 +0900 (0e8f281)
@@ -97,15 +97,15 @@ select Programmers   --match_columns 'products.tags * 10'   --query groonga   --
       ],
       [
         "daijiro",
-        1520
+        1500
       ],
       [
         "kou",
-        1630
+        1600
       ],
       [
         "maruyama",
-        510
+        500
       ]
     ]
   ]

  Modified: test/command/suite/select/query/forward_index.expected (+2 -2)
===================================================================
--- test/command/suite/select/query/forward_index.expected    2014-03-03 17:51:19 +0900 (5e79fc4)
+++ test/command/suite/select/query/forward_index.expected    2014-03-04 23:17:18 +0900 (9c10f29)
@@ -53,11 +53,11 @@ select Memos   --match_columns tags   --query groonga   --output_columns _key,_s
       ],
       [
         "Groonga is fast",
-        101
+        100
       ],
       [
         "Mroonga is also fast",
-        11
+        10
       ]
     ]
   ]
-------------- next part --------------
HTML����������������������������...
ダウンロード 



More information about the Groonga-commit mailing list
アーカイブの一覧に戻る