Moriyoshi Koizumi
moriy****@users*****
2004年 2月 4日 (水) 11:36:25 JST
moriyoshi 04/02/04 11:36:25 Modified: filters html_entities.c html_entities.h mbfilter_htmlent.c mbfl mbfilter.h Log: - Various fixes / cleanups for the html entity filter. This patch addresses possible buffer overrun and algorithmic failure. Revision Changes Path 1.4 +4 -4 libmbfl/filters/html_entities.c Index: html_entities.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/html_entities.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- html_entities.c 26 Jul 2003 11:21:12 -0000 1.3 +++ html_entities.c 4 Feb 2004 02:36:25 -0000 1.4 @@ -31,13 +31,13 @@ #include "config.h" #endif -#include "mbfilter.h" +#include "html_entities.h" -const mbfl_html_entity mbfl_html_entity_list[] = { /* - {"quot", 34}, DO NOT CONVERT THESE AUTOMATICALLY +const mbfl_html_entity_entry mbfl_html_entity_list[] = { + {"quot", 34}, {"amp", 38}, {"lt", 60}, - {"gt", 62}, */ + {"gt", 62}, {"nbsp", 160}, {"iexcl", 161}, {"cent", 162}, 1.2 +5 -3 libmbfl/filters/html_entities.h Index: html_entities.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/html_entities.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- html_entities.h 7 Dec 2002 22:31:19 -0000 1.1 +++ html_entities.h 4 Feb 2004 02:36:25 -0000 1.2 @@ -30,9 +30,11 @@ #ifndef MBFL_HTML_ENTITIES_H #define MBFL_HTML_ENTITIES_H -#include "mbfilter.h" +typedef struct _mbfl_html_entity_entry { + char * name; + int code; +} mbfl_html_entity_entry; -extern mbfl_html_entity *mbfl_html_entity_list; +extern mbfl_html_entity_entry *mbfl_html_entity_list; #endif /* MBFL_HTML_ENTITIES_H */ - 1.10 +37 -32 libmbfl/filters/mbfilter_htmlent.c Index: mbfilter_htmlent.c =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/filters/mbfilter_htmlent.c,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- mbfilter_htmlent.c 4 Feb 2004 02:05:17 -0000 1.9 +++ mbfilter_htmlent.c 4 Feb 2004 02:36:25 -0000 1.10 @@ -43,7 +43,15 @@ #include "mbfilter_htmlent.h" #include "html_entities.h" -static const unsigned char mblen_table_html[] = { /* 0x00, 0x80 - 0xFF, only valid for numeric entities */ +static const int htmlentitifieds[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -51,15 +59,7 @@ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; static const char *mbfl_encoding_html_ent_aliases[] = {"HTML", "html", NULL}; @@ -69,7 +69,7 @@ "HTML-ENTITIES", "HTML-ENTITIES", (const char *(*)[])&mbfl_encoding_html_ent_aliases, - NULL, /* mblen_table_html, Do not use table instead calulate length based on entities actually used */ + NULL, MBFL_ENCTYPE_HTML_ENT, NULL }; @@ -99,40 +99,45 @@ */ int mbfl_filt_conv_html_enc(int c, mbfl_convert_filter *filter) { - int tmp[10]; - int i = 0, p = 0, e; + int tmp[64]; + int i; unsigned int uc; + mbfl_html_entity_entry *e; - if (c<256 && mblen_table_html[c]==1) { + if (c < sizeof(htmlentitifieds) / sizeof(htmlentitifieds[0]) && + htmlentitifieds[c] != 1) { CK((*filter->output_function)(c, filter->data)); } else { - /*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbfl_filt_conv_html_enc(0x%08X = %d)", c, c);*/ CK((*filter->output_function)('&', filter->data)); - while (1) { - e = mbfl_html_entity_list[i].code; - if (c < e || e == -1) { - break; - } - if (c == e) { - while(mbfl_html_entity_list[i].name[p]) { - CK((*filter->output_function)((int)mbfl_html_entity_list[i].name[p++], filter->data)); + for (i = 0; (e = &mbfl_html_entity_list[i])->name != NULL; i++) { + if (c == e->code) { + char *p; + + for (p = e->name; *p != '\0'; p++) { + CK((*filter->output_function)((int)*p, filter->data)); } - break; + goto last; } - i++; } - i=0; - if (!p) { + + { + int *p = tmp + sizeof(tmp); + CK((*filter->output_function)('#', filter->data)); + uc = (unsigned int)c; + + *(--p) = '\0'; do { - tmp[i++] = '0'+uc%10; + *(--p) = "0123456789"[uc % 10]; uc /= 10; } while (uc); - do { - CK((*filter->output_function)(tmp[--i], filter->data)); - } while (i); + + for (; *p != '\0'; p++) { + CK((*filter->output_function)(*p, filter->data)); + } } + last: CK((*filter->output_function)(';', filter->data)); } return c; @@ -170,7 +175,7 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter) { int pos, ent = 0; - const mbfl_html_entity *entity; + const mbfl_html_entity_entry *entity; char *buffer = (char*)filter->cache; if (!filter->status) { 1.17 +0 -10 libmbfl/mbfl/mbfilter.h Index: mbfilter.h =================================================================== RCS file: /cvsroot/php-i18n/libmbfl/mbfl/mbfilter.h,v retrieving revision 1.16 retrieving revision 1.17 diff -u -r1.16 -r1.17 --- mbfilter.h 30 Dec 2002 20:44:21 -0000 1.16 +++ mbfilter.h 4 Feb 2004 02:36:25 -0000 1.17 @@ -224,24 +224,14 @@ mbfl_string *result, mbfl_encoding_id outcode); - /* * convert HTML numeric entity */ MBFLAPI mbfl_string *mbfl_html_numeric_entity(mbfl_string *string, mbfl_string *result, int *convmap, int mapsize, int type); - /* * convert of harfwidth and fullwidth for japanese */ MBFLAPI mbfl_string *mbfl_ja_jp_hantozen(mbfl_string *string, mbfl_string *result, int mode); - -/* - * HTML Entity table - */ -typedef struct _mbfl_html_entity { - char * name; - int code; -} mbfl_html_entity; #endif /* MBFL_MBFILTER_H */