Kouhei Sutou
null+****@clear*****
Thu Jun 21 16:49:06 JST 2018
Kouhei Sutou 2018-06-21 16:49:06 +0900 (Thu, 21 Jun 2018) New Revision: 97071c6172aee5f09f5d890ff0ae7170e0b15566 https://github.com/groonga/groonga/commit/97071c6172aee5f09f5d890ff0ae7170e0b15566 Message: Add encoding converter We use the following rules: * grn_ctx::errbuf: grn_encoding * grn_logger_put: grn_encoding * mruby: UTF-8 * path: locale Added files: lib/encoding.c lib/grn_encoding.h Modified files: lib/c_sources.am Modified: lib/c_sources.am (+2 -0) =================================================================== --- lib/c_sources.am 2018-06-21 16:22:58 +0900 (62c888600) +++ lib/c_sources.am 2018-06-21 16:49:06 +0900 (ee601829f) @@ -20,6 +20,8 @@ libgroonga_c_sources = \ db.c \ grn_db.h \ dump.c \ + encoding.c \ + grn_encoding.h \ ts.c \ grn_ts.h \ type.c \ Added: lib/encoding.c (+334 -0) 100644 =================================================================== --- /dev/null +++ lib/encoding.c 2018-06-21 16:49:06 +0900 (e3d040928) @@ -0,0 +1,334 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2018 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include <stdio.h> + +#include "grn.h" +#include "grn_ctx.h" +#include "grn_encoding.h" +#include "grn_windows.h" + +#include <string.h> + +#ifdef WIN32 +static const char * +grn_encoding_convert(grn_ctx *ctx, + const char *context, + UINT from_code_page, + UINT to_code_page, + const char *from_string, + ssize_t from_string_size, + size_t *converted_string_size) +{ + char *converted_string = NULL; + + if (from_string_size < 0) { + from_string_size = strlen(from_string); + } + + if (from_code_page == to_code_page) { + goto exit; + } + + { + WCHAR *utf16_string; + DWORD n_utf16_chars; + size_t converted_string_size_; + + n_utf16_chars = MultiByteToWideChar(from_code_page, + 0, + from_string, + from_string_size, + NULL, + 0); + if (n_utf16_chars == 0) { + SERR("%s failed to estimate the number of UTF-16 characters", + context); + goto exit; + } + utf16_string = GRN_MALLOCN(WCHAR, n_utf16_chars); + n_utf16_chars = MultiByteToWideChar(from_code_page, + 0, + from_string, + from_string_size, + utf16_string, + n_utf16_chars); + if (n_utf16_chars == 0) { + SERR("%s failed to convert to UTF-16 characters", + context); + GRN_FREE(utf16_string); + goto exit; + } + + converted_string_size_ = WideCharToMultiByte(to_code_page, + 0, + utf16_string, + n_utf16_chars, + NULL, + 0, + NULL, + NULL); + if (converted_string_size_ == 0) { + SERR("%s failed to estimate required buffer size for converted string", + context); + GRN_FREE(utf16_string); + goto exit; + } + + converted_string = GRN_MALLOCN(char, *converted_string_size + 1); + converted_string_size_ = WideCharToMultiByte(to_code_page, + 0, + utf16_string, + n_utf16_chars, + converted_string, + converted_string_size_, + NULL, + NULL); + GRN_FREE(utf16_string); + if (converted_string_size_ == 0) { + SERR("%s failed to estimate required buffer size for converted string", + context); + GRN_FREE(converted_string); + converted_string = NULL; + goto exit; + } + converted_string[converted_string_size_] = '\0'; + if (converted_string_size) { + *converted_string_size = converted_string_size_; + } + } + +exit : + if (!converted_string) { + converted_string = GRN_MALLOCN(char, from_string_size + 1); + if (converted_string) { + grn_memcpy(converted_string, from_string, from_string_size); + converted_string[from_string_size] = '\0'; + if (converted_string_size) { + *converted_string_size = from_string_size; + } + } else { + ERR(ctx->rc, + "%s failed to allocate a buffer for converted string", + context); + if (converted_string_size) { + *converted_string_size = 0; + } + } + } + + return converted_string; +} + +const char * +grn_encoding_convert_to_locale(grn_ctx *ctx, + const char *grn_encoding_string, + ssize_t grn_encoding_string_size, + size_t *converted_string_size) +{ + return grn_encoding_convert(ctx, + "[encoding][convert][grn->locale]", + CP_ACP, + grn_windows_encoding_to_code_page(ctx->encoding), + grn_encoding_string, + grn_encoding_string_size, + converted_string_size); +} + +const char * +grn_encoding_convert_to_utf8(grn_ctx *ctx, + const char *grn_encoding_string, + ssize_t grn_encoding_string_size, + size_t *converted_string_size) +{ + return grn_encoding_convert(ctx, + "[encoding][convert][grn->utf8]", + CP_UTF8, + grn_windows_encoding_to_code_page(ctx->encoding), + grn_encoding_string, + grn_encoding_string_size, + converted_string_size); +} + +const char * +grn_encoding_convert_from_locale(grn_ctx *ctx, + const char *locale_string, + ssize_t locale_string_size, + size_t *converted_string_size) +{ + return grn_encoding_convert(ctx, + "[encoding][convert][locale->grn]", + grn_windows_encoding_to_code_page(ctx->encoding), + CP_ACP, + locale_string, + locale_string_size, + converted_string_size); +} + +const char * +grn_encoding_convert_from_utf8(grn_ctx *ctx, + const char *utf8_string, + ssize_t utf8_string_size, + size_t *converted_string_size) +{ + return grn_encoding_convert(ctx, + "[encoding][convert][utf8->grn]", + grn_windows_encoding_to_code_page(ctx->encoding), + CP_UTF8, + utf8_string, + utf8_string_size, + converted_string_size); +} + +const char * +grn_encoding_convert_to_utf8_from_locale(grn_ctx *ctx, + const char *locale_string, + ssize_t locale_string_size, + size_t *converted_string_size) +{ + return grn_encoding_convert(ctx, + "[encoding][convert][locale->utf8]", + CP_ACP, + CP_UTF8, + locale_string, + locale_string_size, + converted_string_size); +} + +const char * +grn_encoding_convert_to_locale_from_utf8(grn_ctx *ctx, + const char *utf8_string, + ssize_t utf8_string_size, + size_t *converted_string_size) +{ + return grn_encoding_convert(ctx, + "[encoding][convert][utf8->locale]", + CP_UTF8, + CP_ACP, + utf8_string, + utf8_string_size, + converted_string_size); +} + +void +grn_encoding_converted_free(grn_ctx *ctx, const char *converted_string) +{ + GRN_FREE((char *)converted_string); +} +#else /* WIN32 */ +const char * +grn_encoding_convert_to_locale(grn_ctx *ctx, + const char *grn_encoding_string, + ssize_t grn_encoding_string_size, + size_t *converted_string_size) +{ + if (converted_string_size) { + if (grn_encoding_string_size < 0) { + *converted_string_size = strlen(grn_encoding_string); + } else { + *converted_string_size = grn_encoding_string_size; + } + } + return grn_encoding_string; +} + +const char * +grn_encoding_convert_to_utf8(grn_ctx *ctx, + const char *grn_encoding_string, + ssize_t grn_encoding_string_size, + size_t *converted_string_size) +{ + if (converted_string_size) { + if (grn_encoding_string_size < 0) { + *converted_string_size = strlen(grn_encoding_string); + } else { + *converted_string_size = grn_encoding_string_size; + } + } + return grn_encoding_string; +} + +const char * +grn_encoding_convert_from_locale(grn_ctx *ctx, + const char *locale_string, + ssize_t locale_string_size, + size_t *converted_string_size) +{ + if (converted_string_size) { + if (locale_string_size < 0) { + *converted_string_size = strlen(locale_string); + } else { + *converted_string_size = locale_string_size; + } + } + return locale_string; +} + +const char * +grn_encoding_convert_from_utf8(grn_ctx *ctx, + const char *utf8_string, + ssize_t utf8_string_size, + size_t *converted_string_size) +{ + if (converted_string_size) { + if (utf8_string_size < 0) { + *converted_string_size = strlen(utf8_string); + } else { + *converted_string_size = utf8_string_size; + } + } + return utf8_string; +} + +const char * +grn_encoding_convert_to_utf8_from_locale(grn_ctx *ctx, + const char *locale_string, + ssize_t locale_string_size, + size_t *converted_string_size) +{ + if (converted_string_size) { + if (locale_string_size < 0) { + *converted_string_size = strlen(locale_string); + } else { + *converted_string_size = locale_string_size; + } + } + return locale_string; +} + +const char * +grn_encoding_convert_to_locale_from_utf8(grn_ctx *ctx, + const char *utf8_string, + ssize_t utf8_string_size, + size_t *converted_string_size) +{ + if (converted_string_size) { + if (utf8_string_size < 0) { + *converted_string_size = strlen(utf8_string); + } else { + *converted_string_size = utf8_string_size; + } + } + return utf8_string; +} + +void +grn_encoding_converted_free(grn_ctx *ctx, const char *converted_string) +{ +} +#endif /* WIN32 */ Added: lib/grn_encoding.h (+70 -0) 100644 =================================================================== --- /dev/null +++ lib/grn_encoding.h 2018-06-21 16:49:06 +0900 (0788d52de) @@ -0,0 +1,70 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2018 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#pragma once + +#include "grn.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * grn_ctx::errbuf: grn_encoding + * grn_logger_put: grn_encoding + * mruby: UTF-8 + * path: locale + */ + +GRN_API const char * +grn_encoding_convert_to_locale(grn_ctx *ctx, + const char *grn_encoding_string, + ssize_t grn_encoding_string_size, + size_t *converted_string_size); +GRN_API const char * +grn_encoding_convert_to_utf8(grn_ctx *ctx, + const char *grn_encoding_string, + ssize_t grn_encoding_string_size, + size_t *converted_string_size); +GRN_API const char * +grn_encoding_convert_from_locale(grn_ctx *ctx, + const char *locale_string, + ssize_t locale_string_size, + size_t *converted_string_size); +GRN_API const char * +grn_encoding_convert_from_utf8(grn_ctx *ctx, + const char *utf8_string, + ssize_t utf8_string_size, + size_t *converted_string_size); +GRN_API const char * +grn_encoding_convert_to_utf8_from_locale(grn_ctx *ctx, + const char *locale_string, + ssize_t locale_string_size, + size_t *converted_string_size); +GRN_API const char * +grn_encoding_convert_to_locale_from_utf8(grn_ctx *ctx, + const char *utf8_string, + ssize_t utf8_string_size, + size_t *converted_string_size); +GRN_API void +grn_encoding_converted_free(grn_ctx *ctx, + const char *converted_string); + +#ifdef __cplusplus +} +#endif -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180621/51439970/attachment-0001.htm