[unicode] Handle not properly written UTF8 in nextCodePoint

2026-03-19 05:40:38 +01:00 · 2019-01-31 11:55:03 +01:00
parent cbb1055218
commit 35f65ae255
1 changed files with 7 additions and 1 deletions
--- a/ion/src/shared/unicode/utf8_decoder.cpp
+++ b/ion/src/shared/unicode/utf8_decoder.cpp
@@ -23,7 +23,13 @@ CodePoint UTF8Decoder::nextCodePoint() {
  uint32_t result = last_k_bits(*m_stringPosition++, 8-leadingOnes-1);
  for (int i = 0; i < leadingOnes - 1; i++) {
    result <<= 6;
-    result += (*m_stringPosition++ & 0x3F);
+    char nextChunk = *m_stringPosition++;
+    if (!nextChunk && 0x80) {
+      /* The code point is not properly written. This might be due to a code
+       * point being translated into chars in a too small buffer. */
+      return UCodePointNull;
+    }
+    result += (nextChunk & 0x3F);
  }
  return CodePoint(result);
 }