From 35f65ae255f915a3e062d8359d655bef9ed341d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9a=20Saviot?= Date: Thu, 31 Jan 2019 11:55:03 +0100 Subject: [PATCH] [unicode] Handle not properly written UTF8 in nextCodePoint --- ion/src/shared/unicode/utf8_decoder.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ion/src/shared/unicode/utf8_decoder.cpp b/ion/src/shared/unicode/utf8_decoder.cpp index 45e92ad49..af0d59d46 100644 --- a/ion/src/shared/unicode/utf8_decoder.cpp +++ b/ion/src/shared/unicode/utf8_decoder.cpp @@ -23,7 +23,13 @@ CodePoint UTF8Decoder::nextCodePoint() { uint32_t result = last_k_bits(*m_stringPosition++, 8-leadingOnes-1); for (int i = 0; i < leadingOnes - 1; i++) { result <<= 6; - result += (*m_stringPosition++ & 0x3F); + char nextChunk = *m_stringPosition++; + if (!nextChunk && 0x80) { + /* The code point is not properly written. This might be due to a code + * point being translated into chars in a too small buffer. */ + return UCodePointNull; + } + result += (nextChunk & 0x3F); } return CodePoint(result); }