From 5cb26b19a5f11acdbf069c6799b9df413b903877 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89milie=20Feral?= Date: Tue, 4 Feb 2020 10:56:36 +0100 Subject: [PATCH] [poincare] Parsing: Unit token does not accept '_' char except at first position. This enables to parse "_km_s" as Multiplication(Unit(km), Unit(s)) --- poincare/src/parsing/tokenizer.cpp | 10 +++++----- poincare/src/parsing/tokenizer.h | 2 +- poincare/test/parsing.cpp | 11 +++++++++++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/poincare/src/parsing/tokenizer.cpp b/poincare/src/parsing/tokenizer.cpp index e51521fbf..4cf8dc99c 100644 --- a/poincare/src/parsing/tokenizer.cpp +++ b/poincare/src/parsing/tokenizer.cpp @@ -38,7 +38,7 @@ size_t Tokenizer::popWhile(PopTest popTest, CodePoint context) { return length; } -size_t Tokenizer::popIdentifier() { +size_t Tokenizer::popIdentifier(CodePoint additionalAcceptedCodePoint) { /* TODO handle combined code points? For now combining code points will * trigger a syntax error. * This method is used to parse any identifier, reserved or custom, or even @@ -47,8 +47,8 @@ size_t Tokenizer::popIdentifier() { * instance input '2πx' without any danger. */ return popWhile([](CodePoint c, CodePoint context) { - return c.isDecimalDigit() || c.isLatinLetter() || c == '_' || c.isGreekCapitalLetter() || (c.isGreekSmallLetter() && c != UCodePointGreekSmallLetterPi); - }); + return c.isDecimalDigit() || c.isLatinLetter() || (c != UCodePointNull && c == context) || c.isGreekCapitalLetter() || (c.isGreekSmallLetter() && c != UCodePointGreekSmallLetterPi); + }, additionalAcceptedCodePoint); } size_t Tokenizer::popDigits() { @@ -162,7 +162,7 @@ Token Tokenizer::popToken() { * reserved or custom identifier, popIdentifier is called in both cases. */ Token result(Token::Unit); - result.setString(start + 1, popIdentifier()); // + 1 for the underscore + result.setString(start + 1, popIdentifier(UCodePointNull)); // + 1 for the underscore return result; } if (c.isLatinLetter() || @@ -170,7 +170,7 @@ Token Tokenizer::popToken() { c.isGreekSmallLetter()) // Greek small letter pi is matched earlier { Token result(Token::Identifier); - result.setString(start, UTF8Decoder::CharSizeOfCodePoint(c) + popIdentifier()); // We already popped 1 code point + result.setString(start, UTF8Decoder::CharSizeOfCodePoint(c) + popIdentifier('_')); // We already popped 1 code point return result; } if ('(' <= c && c <= '/') { diff --git a/poincare/src/parsing/tokenizer.h b/poincare/src/parsing/tokenizer.h index 8ca26cebe..4d62561ec 100644 --- a/poincare/src/parsing/tokenizer.h +++ b/poincare/src/parsing/tokenizer.h @@ -23,7 +23,7 @@ private: size_t popDigits(); size_t popBinaryDigits(); size_t popHexadecimalDigits(); - size_t popIdentifier(); + size_t popIdentifier(CodePoint additionalAcceptedCodePoint); Token popNumber(); const char * m_text; diff --git a/poincare/test/parsing.cpp b/poincare/test/parsing.cpp index 5c8b1b08e..b5e6678ca 100644 --- a/poincare/test/parsing.cpp +++ b/poincare/test/parsing.cpp @@ -304,10 +304,21 @@ QUIZ_CASE(poincare_parsing_units) { } } + // Non-existing units are not parsable assert_text_not_parsable("_n"); assert_text_not_parsable("_a"); + + // Any identifier starting with '_' is tokenized as a unit assert_tokenizes_as_unit("_m"); assert_tokenizes_as_unit("_A"); + + // Can parse implicit multiplication with units + Expression kilometer = Expression::Parse("_km", nullptr); + Expression second = Expression::Parse("_s", nullptr); + assert_parsed_expression_is("_kmπ", Multiplication::Builder(kilometer, Constant::Builder(UCodePointGreekSmallLetterPi))); + assert_parsed_expression_is("π_km", Multiplication::Builder(Constant::Builder(UCodePointGreekSmallLetterPi), kilometer)); + assert_parsed_expression_is("_s_km", Multiplication::Builder(second, kilometer)); + assert_parsed_expression_is("3_s", Multiplication::Builder(BasedInteger::Builder(3), second)); } QUIZ_CASE(poincare_parsing_identifiers) {