Kouhei Sutou
null+****@clear*****
Fri Feb 1 14:40:07 JST 2013
Kouhei Sutou 2013-02-01 14:40:07 +0900 (Fri, 01 Feb 2013) New Revision: 337f7c79a4c4fc31c321c70b11fe3651a2f11967 https://github.com/groonga/groonga-normalizer-mysql/commit/337f7c79a4c4fc31c321c70b11fe3651a2f11967 Log: Support GRN_STRING_REMOVE_BLANK flag Added files: test/suite/remove_blank.expected test/suite/remove_blank.test Modified files: normalizers/mysql.c Modified: normalizers/mysql.c (+21 -11) =================================================================== --- normalizers/mysql.c 2013-01-31 17:04:12 +0900 (dabc60c) +++ normalizers/mysql.c 2013-02-01 14:40:07 +0900 (482b634) @@ -1584,8 +1584,12 @@ normalize(grn_ctx *ctx, grn_obj *string) unsigned int normalized_length_in_bytes = 0; unsigned int normalized_n_characters = 0; grn_encoding encoding; + int flags; + grn_bool remove_blank_p; encoding = grn_string_get_encoding(ctx, string); + flags = grn_string_get_flags(ctx, string); + remove_blank_p = flags & GRN_STRING_REMOVE_BLANK; grn_string_get_original(ctx, string, &original, &original_length_in_bytes); normalized = GRN_PLUGIN_MALLOC(ctx, original_length_in_bytes + 1); rest = original; @@ -1633,20 +1637,26 @@ normalize(grn_ctx *ctx, grn_obj *string) break; } - if (plane >= 0x00 && mysql_unicode_normalize_table[plane]) { - uint32_t normalized_code = mysql_unicode_normalize_table[plane][low_code]; - unsigned int n_bytes; - n_bytes = unichar_to_utf8(normalized_code, - normalized + normalized_length_in_bytes); - normalized_length_in_bytes += n_bytes; + if (remove_blank_p && character_length == 1 && rest[0] == ' ') { + /* TODO: set GRN_CHAR_BLANK */ } else { - int i; - for (i = 0; i < character_length; i++) { - normalized[normalized_length_in_bytes + i] = rest[i]; + if (plane >= 0x00 && mysql_unicode_normalize_table[plane]) { + uint32_t normalized_code; + unsigned int n_bytes; + normalized_code = mysql_unicode_normalize_table[plane][low_code]; + n_bytes = unichar_to_utf8(normalized_code, + normalized + normalized_length_in_bytes); + normalized_length_in_bytes += n_bytes; + } else { + int i; + for (i = 0; i < character_length; i++) { + normalized[normalized_length_in_bytes + i] = rest[i]; + } + normalized_length_in_bytes += character_length; } - normalized_length_in_bytes += character_length; + normalized_n_characters++; } - normalized_n_characters++; + rest += character_length; rest_length -= character_length; } Added: test/suite/remove_blank.expected (+4 -0) 100644 =================================================================== --- /dev/null +++ test/suite/remove_blank.expected 2013-02-01 14:40:07 +0900 (024f710) @@ -0,0 +1,4 @@ +register normalizers/mysql +[[0,0.0,0.0],true] +normalize NormalizerMySQLGeneralCI "a b c" REMOVE_BLANK +[[0,0.0,0.0],"ABC"] Added: test/suite/remove_blank.test (+3 -0) 100644 =================================================================== --- /dev/null +++ test/suite/remove_blank.test 2013-02-01 14:40:07 +0900 (a070785) @@ -0,0 +1,3 @@ +register normalizers/mysql + +normalize NormalizerMySQLGeneralCI "a b c" REMOVE_BLANK -------------- next part -------------- HTML����������������������������... ダウンロード