[Groonga-commit] groonga/groonga-normalizer-mysql [master] Support GRN_STRING_REMOVE_BLANK flag

アーカイブの一覧に戻る

Kouhei Sutou null+****@clear*****
Fri Feb 1 14:40:07 JST 2013


Kouhei Sutou	2013-02-01 14:40:07 +0900 (Fri, 01 Feb 2013)

  New Revision: 337f7c79a4c4fc31c321c70b11fe3651a2f11967
  https://github.com/groonga/groonga-normalizer-mysql/commit/337f7c79a4c4fc31c321c70b11fe3651a2f11967

  Log:
    Support GRN_STRING_REMOVE_BLANK flag

  Added files:
    test/suite/remove_blank.expected
    test/suite/remove_blank.test
  Modified files:
    normalizers/mysql.c

  Modified: normalizers/mysql.c (+21 -11)
===================================================================
--- normalizers/mysql.c    2013-01-31 17:04:12 +0900 (dabc60c)
+++ normalizers/mysql.c    2013-02-01 14:40:07 +0900 (482b634)
@@ -1584,8 +1584,12 @@ normalize(grn_ctx *ctx, grn_obj *string)
   unsigned int normalized_length_in_bytes = 0;
   unsigned int normalized_n_characters = 0;
   grn_encoding encoding;
+  int flags;
+  grn_bool remove_blank_p;
 
   encoding = grn_string_get_encoding(ctx, string);
+  flags = grn_string_get_flags(ctx, string);
+  remove_blank_p = flags & GRN_STRING_REMOVE_BLANK;
   grn_string_get_original(ctx, string, &original, &original_length_in_bytes);
   normalized = GRN_PLUGIN_MALLOC(ctx, original_length_in_bytes + 1);
   rest = original;
@@ -1633,20 +1637,26 @@ normalize(grn_ctx *ctx, grn_obj *string)
       break;
     }
 
-    if (plane >= 0x00 && mysql_unicode_normalize_table[plane]) {
-      uint32_t normalized_code = mysql_unicode_normalize_table[plane][low_code];
-      unsigned int n_bytes;
-      n_bytes = unichar_to_utf8(normalized_code,
-                                normalized + normalized_length_in_bytes);
-      normalized_length_in_bytes += n_bytes;
+    if (remove_blank_p && character_length == 1 && rest[0] == ' ') {
+      /* TODO: set GRN_CHAR_BLANK */
     } else {
-      int i;
-      for (i = 0; i < character_length; i++) {
-        normalized[normalized_length_in_bytes + i] = rest[i];
+      if (plane >= 0x00 && mysql_unicode_normalize_table[plane]) {
+        uint32_t normalized_code;
+        unsigned int n_bytes;
+        normalized_code = mysql_unicode_normalize_table[plane][low_code];
+        n_bytes = unichar_to_utf8(normalized_code,
+                                  normalized + normalized_length_in_bytes);
+        normalized_length_in_bytes += n_bytes;
+      } else {
+        int i;
+        for (i = 0; i < character_length; i++) {
+          normalized[normalized_length_in_bytes + i] = rest[i];
+        }
+        normalized_length_in_bytes += character_length;
       }
-      normalized_length_in_bytes += character_length;
+      normalized_n_characters++;
     }
-    normalized_n_characters++;
+
     rest += character_length;
     rest_length -= character_length;
   }

  Added: test/suite/remove_blank.expected (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/suite/remove_blank.expected    2013-02-01 14:40:07 +0900 (024f710)
@@ -0,0 +1,4 @@
+register normalizers/mysql
+[[0,0.0,0.0],true]
+normalize NormalizerMySQLGeneralCI "a b c" REMOVE_BLANK
+[[0,0.0,0.0],"ABC"]

  Added: test/suite/remove_blank.test (+3 -0) 100644
===================================================================
--- /dev/null
+++ test/suite/remove_blank.test    2013-02-01 14:40:07 +0900 (a070785)
@@ -0,0 +1,3 @@
+register normalizers/mysql
+
+normalize NormalizerMySQLGeneralCI "a b c" REMOVE_BLANK
-------------- next part --------------
HTML����������������������������...
ダウンロード 



More information about the Groonga-commit mailing list
アーカイブの一覧に戻る