From a2451aa104dc63b3f342ef9c71e3820ecbbe138e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9a=20Saviot?= Date: Fri, 1 Feb 2019 12:07:51 +0100 Subject: [PATCH] [poincare/parser] Syntax error on combining code points --- poincare/src/parsing/tokenizer.cpp | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/poincare/src/parsing/tokenizer.cpp b/poincare/src/parsing/tokenizer.cpp index 10ec5ef36..6c5b8c762 100644 --- a/poincare/src/parsing/tokenizer.cpp +++ b/poincare/src/parsing/tokenizer.cpp @@ -15,24 +15,16 @@ static inline bool isDigit(const CodePoint c) { const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, bool * testResult) { UTF8Decoder decoder(m_text); - CodePoint firstCodePoint = decoder.nextCodePoint(); + CodePoint c = decoder.nextCodePoint(); const char * nextTextPosition = decoder.stringPosition(); - if (firstCodePoint != UCodePointNull) { - CodePoint codePoint = decoder.nextCodePoint(); - while (codePoint.isCombining()) { - nextTextPosition = decoder.stringPosition(); - codePoint = decoder.nextCodePoint(); - } - } - // TODO handle combined code points? For now the combining codepoints get dropped. - bool shouldPop = popTest(firstCodePoint, context); + bool shouldPop = popTest(c, context); if (testResult != nullptr) { *testResult = shouldPop; } if (shouldPop) { m_text = nextTextPosition; } - return firstCodePoint; + return c; } const CodePoint Tokenizer::popCodePoint() { @@ -60,6 +52,8 @@ size_t Tokenizer::popWhile(PopTest popTest, CodePoint context) { } size_t Tokenizer::popIdentifier() { + /* TODO handle combined code points? For now combining code points will + * trigger a syntax error. */ return popWhile([](CodePoint c, CodePoint context) { return isLetter(c) || isDigit(c) || c == context; }, '_'); }