null+****@clear*****
null+****@clear*****
2011年 12月 9日 (金) 10:44:29 JST
Susumu Yata 2011-12-09 01:44:29 +0000 (Fri, 09 Dec 2011) New Revision: a39da9071ba08616d0b31a5c62f904f9c7c336a9 Log: fixed some bugs and added comments. Modified files: plugins/tokenizers/mecab.c Modified: plugins/tokenizers/mecab.c (+31 -10) =================================================================== --- plugins/tokenizers/mecab.c 2011-12-08 02:11:40 +0000 (4d0e9a7) +++ plugins/tokenizers/mecab.c 2011-12-09 01:44:29 +0000 (ddc8816) @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009-2010 Brazil +/* Copyright(C) 2009-2011 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -32,6 +32,10 @@ static mecab_t *sole_mecab; static grn_critical_section sole_mecab_lock; +/* + This macro is called only once. + Why don't you put this directly? + */ #define SOLE_MECAB_CONFIRM do {\ if (!sole_mecab) {\ static char *argv[] = {"", "-Owakati"};\ @@ -49,6 +53,7 @@ static grn_critical_section sole_mecab_lock; typedef struct { grn_str *nstr; mecab_t *mecab; + /* Why these pointers are unsigned? */ unsigned char *buf; unsigned char *next; unsigned char *end; @@ -68,6 +73,12 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) grn_obj_flags table_flags; grn_mecab_tokenizer *token; unsigned int bufsize, maxtrial = 10, len; + /* + user_data->ptr should be initialized with NULL? + How an error is detected? user_data->ptr == NULL? + If mecab_next() and mecab_fin() are always called after mecab_init(), + it may cause a critical error. + */ if (!(str = grn_ctx_pop(ctx))) { ERR(GRN_INVALID_ARGUMENT, "missing argument"); return NULL; @@ -80,9 +91,7 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) return NULL; } if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return NULL; } - user_data->ptr = token; token->mecab = sole_mecab; - // if (!(token->mecab = mecab_new3())) { grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL); nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE); if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str), @@ -93,8 +102,9 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) } len = token->nstr->norm_blen; for (bufsize = len * 2 + 1; maxtrial; bufsize *= 2, maxtrial--) { - if(!(buf = GRN_MALLOC(bufsize + 1))) { + if (!(buf = GRN_MALLOC(bufsize + 1))) { GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !"); + grn_str_close(ctx, token->nstr); GRN_FREE(token); return NULL; } @@ -110,15 +120,17 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) } if (!s) { ERR(GRN_TOKENIZER_ERROR, "mecab_sparse_tostr failed len=%d bufsize=%d err=%s", - len, bufsize, mecab_err); + len, bufsize, mecab_err); + grn_str_close(ctx, token->nstr); GRN_FREE(token); return NULL; } - // certain version of mecab returns trailing lf or spaces. + /* A certain version of mecab returns trailing lf or spaces. */ for (p = buf + strlen(buf) - 1; buf <= p && isspace(*(unsigned char *)p); p--) { *p = '\0'; } - //grn_log("sparsed='%s'", s); + /* grn_log("sparsed='%s'", s); */ + user_data->ptr = token; token->buf = (unsigned char *)buf; token->next = (unsigned char *)buf; token->end = (unsigned char *)buf + strlen(buf); @@ -131,7 +143,7 @@ static grn_obj * mecab_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { size_t cl; - // grn_obj *table = args[0]; + /* grn_obj *table = args[0]; */ grn_mecab_tokenizer *token = user_data->ptr; const unsigned char *p = token->next, *r; const unsigned char *e = token->end; @@ -157,9 +169,9 @@ mecab_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) static grn_obj * mecab_fin(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { - // grn_obj *table = args[0]; + /* grn_obj *table = args[0]; */ grn_mecab_tokenizer *token = user_data->ptr; - // if (token->mecab) { mecab_destroy(token->mecab); } + /* if (token->mecab) { mecab_destroy(token->mecab); } */ grn_str_close(ctx, token->nstr); GRN_FREE(token->buf); GRN_FREE(token); @@ -223,6 +235,10 @@ GRN_PLUGIN_INIT(grn_ctx *ctx) check_mecab_dictionary_encoding(ctx); + /* + This function returns GRN_SUCCESS even if an encoding error is detected. + */ + return GRN_SUCCESS; } @@ -243,6 +259,11 @@ GRN_PLUGIN_REGISTER(grn_ctx *ctx) mecab_init, mecab_next, mecab_fin, 3, vars); if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_MECAB) { return GRN_FILE_CORRUPT; } + /* + obj will never be used? + grn_proc_create() is called here but grn_proc_destroy() does not appear. + */ + return GRN_SUCCESS; }