[Groonga-commit] groonga/groonga at 4769e34 [master] Fix a bug that estimating size by regexp query with anchor doesn't work

アーカイブの一覧に戻る

Kouhei Sutou null+****@clear*****
Thu Jul 9 23:28:21 JST 2015


Kouhei Sutou	2015-07-09 23:28:21 +0900 (Thu, 09 Jul 2015)

  New Revision: 4769e34d8564464620e1daf396e5a8f055e5e7cd
  https://github.com/groonga/groonga/commit/4769e34d8564464620e1daf396e5a8f055e5e7cd

  Message:
    Fix a bug that estimating size by regexp query with anchor doesn't work

  Modified files:
    lib/ii.c

  Modified: lib/ii.c (+75 -13)
===================================================================
--- lib/ii.c    2015-07-09 23:04:09 +0900 (388e056)
+++ lib/ii.c    2015-07-09 23:28:21 +0900 (391d3c3)
@@ -6060,18 +6060,16 @@ grn_ii_term_extract(grn_ctx *ctx, grn_ii *ii, const char *string,
 }
 
 static grn_rc
-grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
-                     const char *string, unsigned int string_len,
-                     grn_hash *s, grn_operator op, grn_select_optarg *optarg)
+grn_ii_parse_regexp_query(grn_ctx *ctx,
+                          const char *log_tag,
+                          const char *string, unsigned int string_len,
+                          grn_obj *parsed_string)
 {
-  grn_rc rc;
-  grn_obj parsed_string;
   grn_bool escaping = GRN_FALSE;
   int nth_char = 0;
   const char *current = string;
   const char *string_end = string + string_len;
 
-  GRN_TEXT_INIT(&parsed_string, 0);
   while (current < string_end) {
     const char *target;
     int char_len;
@@ -6079,7 +6077,8 @@ grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
     char_len = grn_charlen(ctx, current, string_end);
     if (char_len == 0) {
       ERR(GRN_INVALID_ARGUMENT,
-          "[ii][select][regexp] invalid encoding character: <%.*s|%#x|>",
+          "%s invalid encoding character: <%.*s|%#x|>",
+          log_tag,
           (int)(current - string), string,
           *current);
       return ctx->rc;
@@ -6114,10 +6113,29 @@ grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
       }
     }
 
-    GRN_TEXT_PUT(ctx, &parsed_string, target, char_len);
+    GRN_TEXT_PUT(ctx, parsed_string, target, char_len);
     nth_char++;
   }
 
+  return GRN_SUCCESS;
+}
+
+static grn_rc
+grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
+                     const char *string, unsigned int string_len,
+                     grn_hash *s, grn_operator op, grn_select_optarg *optarg)
+{
+  grn_rc rc;
+  grn_obj parsed_string;
+
+  GRN_TEXT_INIT(&parsed_string, 0);
+  rc = grn_ii_parse_regexp_query(ctx, "[ii][select][regexp]",
+                                 string, string_len, &parsed_string);
+  if (rc != GRN_SUCCESS) {
+    GRN_OBJ_FIN(ctx, &parsed_string);
+    return rc;
+  }
+
   if (optarg) {
     optarg->mode = GRN_OP_MATCH;
   }
@@ -6126,6 +6144,11 @@ grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
                      GRN_TEXT_VALUE(&parsed_string),
                      GRN_TEXT_LEN(&parsed_string),
                      s, op, optarg);
+  GRN_OBJ_FIN(ctx, &parsed_string);
+
+  if (optarg) {
+    optarg->mode = GRN_OP_REGEXP;
+  }
 
   return rc;
 }
@@ -6582,6 +6605,40 @@ exit :
   return rc;
 }
 
+static uint32_t
+grn_ii_estimate_size_for_query_regexp(grn_ctx *ctx, grn_ii *ii,
+                                      const char *query, unsigned int query_len,
+                                      grn_search_optarg *optarg)
+{
+  grn_rc rc;
+  grn_obj parsed_query;
+  uint32_t size;
+
+  GRN_TEXT_INIT(&parsed_query, 0);
+  rc = grn_ii_parse_regexp_query(ctx, "[ii][estimate-size][query][regexp]",
+                                 query, query_len, &parsed_query);
+  if (rc != GRN_SUCCESS) {
+    GRN_OBJ_FIN(ctx, &parsed_query);
+    return 0;
+  }
+
+  if (optarg) {
+    optarg->mode = GRN_OP_MATCH;
+  }
+
+  size = grn_ii_estimate_size_for_query(ctx, ii,
+                                        GRN_TEXT_VALUE(&parsed_query),
+                                        GRN_TEXT_LEN(&parsed_query),
+                                        optarg);
+  GRN_OBJ_FIN(ctx, &parsed_query);
+
+  if (optarg) {
+    optarg->mode = GRN_OP_REGEXP;
+  }
+
+  return size;
+}
+
 uint32_t
 grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii,
                                const char *query, unsigned int query_len,
@@ -6600,11 +6657,6 @@ grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii,
     return 0;
   }
 
-  tis = GRN_MALLOC(sizeof(token_info *) * query_len * 2);
-  if (!tis) {
-    return 0;
-  }
-
   if (optarg) {
     switch (optarg->mode) {
     case GRN_OP_NEAR :
@@ -6622,6 +6674,16 @@ grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii,
     }
   }
 
+  if (mode == GRN_OP_REGEXP) {
+    return grn_ii_estimate_size_for_query_regexp(ctx, ii, query, query_len,
+                                                 optarg);
+  }
+
+  tis = GRN_MALLOC(sizeof(token_info *) * query_len * 2);
+  if (!tis) {
+    return 0;
+  }
+
   rc = token_info_build(ctx, lexicon, ii, query, query_len,
                         tis, &n_tis, &only_skip_token, mode);
   if (rc != GRN_SUCCESS) {
-------------- next part --------------
HTML����������������������������...
ダウンロード 



More information about the Groonga-commit mailing list
アーカイブの一覧に戻る