OSDN > 開発者 > lafekasloan101 > 作業部屋 > system-coremmmm > コミット

system-corennnn
Fork

R/O
HTTP
SSH
HTTPS

コミット

コミットメタ情報

リビジョン	ecf5fd58a8f50362ce9e8d4245a33d56f29f142b (tree)
日時	2016-08-02 11:13:40
作者	Sergio Giro <sgiro@goog...>
コミッター	gitbuildkicker

ログメッセージ

libutils/Unicode.cpp: Correct length computation and add checks for utf16->utf8

Inconsistent behaviour between utf16_to_utf8 and utf16_to_utf8_length
is causing a heap overflow.

Correcting the length computation and adding bound checks to the
conversion functions.

Test: ran libutils_tests
Bug: 29250543
Change-Id: I6115e3357141ed245c63c6eb25fc0fd0a9a7a2bb
(cherry picked from commit c4966a363e46d2e1074d1a365e232af0dcedd6a1)

変更サマリ

modified: include/utils/Unicode.h (diff)
modified: libutils/String8.cpp (diff)
modified: libutils/Unicode.cpp (diff)
modified: libutils/tests/String8_test.cpp (diff)

差分

--- a/include/utils/Unicode.h

+++ b/include/utils/Unicode.h

		@@ -88,7 +88,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len);
88	88	* "dst" becomes \xE3\x81\x82\xE3\x81\x84
89	89	* (note that "dst" is NOT null-terminated, like strncpy)
90	90	*/
91		-void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst);
	91	+void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len);
92	92
93	93	/**
94	94	* Returns the unicode value at "index".

		@@ -110,7 +110,7 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len);
110	110	* enough to fit the UTF-16 as measured by utf16_to_utf8_length with an added
111	111	* NULL terminator.
112	112	*/
113		-void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst);
	113	+void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len);
114	114
115	115	/**
116	116	* Returns the length of "src" when "src" is valid UTF-8 string.

--- a/libutils/String8.cpp

+++ b/libutils/String8.cpp

		@@ -104,20 +104,21 @@ static char* allocFromUTF16(const char16_t* in, size_t len)
104	104	{
105	105	if (len == 0) return getEmptyString();
106	106
107		- const ssize_t bytes = utf16_to_utf8_length(in, len);
108		- if (bytes < 0) {
	107	+ // Allow for closing '\0'
	108	+ const ssize_t resultStrLen = utf16_to_utf8_length(in, len) + 1;
	109	+ if (resultStrLen < 1) {
109	110	return getEmptyString();
110	111	}
111	112
112		- SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
	113	+ SharedBuffer* buf = SharedBuffer::alloc(resultStrLen);
113	114	ALOG_ASSERT(buf, "Unable to allocate shared buffer");
114	115	if (!buf) {
115	116	return getEmptyString();
116	117	}
117	118
118		- char* str = (char*)buf->data();
119		- utf16_to_utf8(in, len, str);
120		- return str;
	119	+ char* resultStr = (char*)buf->data();
	120	+ utf16_to_utf8(in, len, resultStr, resultStrLen);
	121	+ return resultStr;
121	122	}
122	123
123	124	static char* allocFromUTF32(const char32_t* in, size_t len)

		@@ -126,21 +127,21 @@ static char* allocFromUTF32(const char32_t* in, size_t len)
126	127	return getEmptyString();
127	128	}
128	129
129		- const ssize_t bytes = utf32_to_utf8_length(in, len);
130		- if (bytes < 0) {
	130	+ const ssize_t resultStrLen = utf32_to_utf8_length(in, len) + 1;
	131	+ if (resultStrLen < 1) {
131	132	return getEmptyString();
132	133	}
133	134
134		- SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
	135	+ SharedBuffer* buf = SharedBuffer::alloc(resultStrLen);
135	136	ALOG_ASSERT(buf, "Unable to allocate shared buffer");
136	137	if (!buf) {
137	138	return getEmptyString();
138	139	}
139	140
140		- char* str = (char*) buf->data();
141		- utf32_to_utf8(in, len, str);
	141	+ char* resultStr = (char*) buf->data();
	142	+ utf32_to_utf8(in, len, resultStr, resultStrLen);
142	143
143		- return str;
	144	+ return resultStr;
144	145	}
145	146
146	147	// ---------------------------------------------------------------------------

--- a/libutils/Unicode.cpp

+++ b/libutils/Unicode.cpp

		@@ -14,6 +14,7 @@
14	14	* limitations under the License.
15	15	*/
16	16
	17	+#include <log/log.h>
17	18	#include <utils/Unicode.h>
18	19
19	20	#include <stddef.h>

		@@ -182,7 +183,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len)
182	183	return ret;
183	184	}
184	185
185		-void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst)
	186	+void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len)
186	187	{
187	188	if (src == NULL \|\| src_len == 0 \|\| dst == NULL) {
188	189	return;

		@@ -193,9 +194,12 @@ void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst)
193	194	char *cur = dst;
194	195	while (cur_utf32 < end_utf32) {
195	196	size_t len = utf32_codepoint_utf8_length(*cur_utf32);
	197	+ LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
196	198	utf32_codepoint_to_utf8((uint8_t )cur, cur_utf32++, len);
197	199	cur += len;
	200	+ dst_len -= len;
198	201	}
	202	+ LOG_ALWAYS_FATAL_IF(dst_len < 1, "dst_len < 1: %zu < 1", dst_len);
199	203	*cur = '\0';
200	204	}
201	205

		@@ -348,7 +352,7 @@ int strzcmp16_h_n(const char16_t s1H, size_t n1, const char16_t s2N, size_t n2
348	352	: 0);
349	353	}
350	354
351		-void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst)
	355	+void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len)
352	356	{
353	357	if (src == NULL \|\| src_len == 0 \|\| dst == NULL) {
354	358	return;

		@@ -369,9 +373,12 @@ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst)
369	373	utf32 = (char32_t) *cur_utf16++;
370	374	}
371	375	const size_t len = utf32_codepoint_utf8_length(utf32);
	376	+ LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
372	377	utf32_codepoint_to_utf8((uint8_t*)cur, utf32, len);
373	378	cur += len;
	379	+ dst_len -= len;
374	380	}
	381	+ LOG_ALWAYS_FATAL_IF(dst_len < 1, "%zu < 1", dst_len);
375	382	*cur = '\0';
376	383	}
377	384

		@@ -432,10 +439,10 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len)
432	439	const char16_t* const end = src + src_len;
433	440	while (src < end) {
434	441	if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
435		- && (*++src & 0xFC00) == 0xDC00) {
	442	+ && (*(src + 1) & 0xFC00) == 0xDC00) {
436	443	// surrogate pairs are always 4 bytes.
437	444	ret += 4;
438		- src++;
	445	+ src += 2;
439	446	} else {
440	447	ret += utf32_codepoint_utf8_length((char32_t) *src++);
441	448	}

--- a/libutils/tests/String8_test.cpp

+++ b/libutils/tests/String8_test.cpp

		@@ -17,6 +17,7 @@
17	17	#define LOG_TAG "String8_test"
18	18	#include <utils/Log.h>
19	19	#include <utils/String8.h>
	20	+#include <utils/String16.h>
20	21
21	22	#include <gtest/gtest.h>
22	23

		@@ -77,4 +78,22 @@ TEST_F(String8Test, SetToSizeMaxReturnsNoMemory) {
77	78	EXPECT_EQ(NO_MEMORY, String8("").setTo(in, SIZE_MAX));
78	79	}
79	80
	81	+// http://b/29250543
	82	+TEST_F(String8Test, CorrectInvalidSurrogate) {
	83	+ // d841d8 is an invalid start for a surrogate pair. Make sure this is handled by ignoring the
	84	+ // first character in the pair and handling the rest correctly.
	85	+ String16 string16(u"\xd841\xd841\xdc41\x0000");
	86	+ String8 string8(string16);
	87	+
	88	+ EXPECT_EQ(4U, string8.length());
	89	+}
	90	+
	91	+TEST_F(String8Test, CheckUtf32Conversion) {
	92	+ // Since bound checks were added, check the conversion can be done without fatal errors.
	93	+ // The utf8 lengths of these are chars are 1 + 2 + 3 + 4 = 10.
	94	+ const char32_t string32[] = U"\x0000007f\x000007ff\x0000911\x0010fffe";
	95	+ String8 string8(string32);
	96	+ EXPECT_EQ(10U, string8.length());
	97	+}
	98	+
80	99	}

system-corennnn Fork

コミット

タグ

よく使われているワード(クリックで追加)

コミットメタ情報

ログメッセージ

変更サマリ

差分

system-corennnn
Fork