Kouhei Sutou
null+****@clear*****
Wed May 13 22:01:30 JST 2015
Kouhei Sutou 2015-05-13 22:01:30 +0900 (Wed, 13 May 2015) New Revision: c82646b64b538854e5757e887e5d456c88825b41 https://github.com/groonga/groonga-normalizer-mysql/commit/c82646b64b538854e5757e887e5d456c88825b41 Message: Fix a bug that full-width space isn't treated as blank character [groonga-dev,03215] Reported by Shota Mitsui. Thanks!!! Added files: test/suite/unicode_ci/remove_blank_full_width.expected test/suite/unicode_ci/remove_blank_full_width.test Modified files: normalizers/mysql.c Modified: normalizers/mysql.c (+31 -22) =================================================================== --- normalizers/mysql.c 2015-05-06 19:35:56 +0900 (c7eb102) +++ normalizers/mysql.c 2015-05-13 22:01:30 +0900 (e7961ee) @@ -364,41 +364,50 @@ normalize(grn_ctx *ctx, grn_obj *string, rest_length = original_length_in_bytes; while (rest_length > 0) { int character_length; + grn_bool custom_normalized = GRN_FALSE; + unsigned int normalized_character_length; + unsigned int previous_normalized_length_in_bytes = + normalized_length_in_bytes; + unsigned int previous_normalized_n_characters = + normalized_n_characters; character_length = grn_plugin_charlen(ctx, rest, rest_length, encoding); if (character_length == 0) { break; } - if (remove_blank_p && character_length == 1 && rest[0] == ' ') { + if (custom_normalizer) { + custom_normalized = custom_normalizer(ctx, + rest, + &character_length, + rest_length - character_length, + normalize_table, + normalized, + &normalized_character_length, + &normalized_length_in_bytes, + &normalized_n_characters); + } + if (!custom_normalized) { + normalize_character(rest, character_length, + normalize_table, normalize_table_size, + normalized, + &normalized_character_length, + &normalized_length_in_bytes, + &normalized_n_characters); + } + + if (remove_blank_p && + normalized_character_length == 1 && + normalized[previous_normalized_length_in_bytes] == ' ') { if (current_type > types) { current_type[-1] |= GRN_CHAR_BLANK; } if (current_check) { current_check[0]++; } + normalized_length_in_bytes = previous_normalized_length_in_bytes; + normalized_n_characters = previous_normalized_n_characters; } else { - grn_bool custom_normalized = GRN_FALSE; - unsigned int normalized_character_length; - if (custom_normalizer) { - custom_normalized = custom_normalizer(ctx, - rest, - &character_length, - rest_length - character_length, - normalize_table, - normalized, - &normalized_character_length, - &normalized_length_in_bytes, - &normalized_n_characters); - } - if (!custom_normalized) { - normalize_character(rest, character_length, - normalize_table, normalize_table_size, - normalized, - &normalized_character_length, - &normalized_length_in_bytes, - &normalized_n_characters); - } if (current_type && normalized_character_length > 0) { char *current_normalized; current_normalized = Added: test/suite/unicode_ci/remove_blank_full_width.expected (+4 -0) 100644 =================================================================== --- /dev/null +++ test/suite/unicode_ci/remove_blank_full_width.expected 2015-05-13 22:01:30 +0900 (d710539) @@ -0,0 +1,4 @@ +register normalizers/mysql +[[0,0.0,0.0],true] +normalize NormalizerMySQLUnicodeCI " a b c" REMOVE_BLANK|WITH_CHECKS +[[0,0.0,0.0],{"normalized":"ABC","types":[],"checks":[2,3,4]}] Added: test/suite/unicode_ci/remove_blank_full_width.test (+3 -0) 100644 =================================================================== --- /dev/null +++ test/suite/unicode_ci/remove_blank_full_width.test 2015-05-13 22:01:30 +0900 (68b7eb8) @@ -0,0 +1,3 @@ +register normalizers/mysql + +normalize NormalizerMySQLUnicodeCI " a b c" REMOVE_BLANK|WITH_CHECKS -------------- next part -------------- HTML����������������������������... ダウンロード