From de0efe69f2dc76666de90d479752d620b8de99d2 Mon Sep 17 00:00:00 2001 From: Ruben Dashyan Date: Tue, 2 Oct 2018 10:35:14 +0200 Subject: [PATCH] [poincare] Use a recursive-descent-alike parser --- poincare/src/parsing/parser.cpp | 236 +++++++++++++++++------------ poincare/src/parsing/parser.h | 30 +++- poincare/src/parsing/token.h | 22 +-- poincare/src/parsing/tokenizer.cpp | 42 +++-- poincare/src/parsing/tokenizer.h | 3 +- 5 files changed, 190 insertions(+), 143 deletions(-) diff --git a/poincare/src/parsing/parser.cpp b/poincare/src/parsing/parser.cpp index 76b063054..d017daf3b 100644 --- a/poincare/src/parsing/parser.cpp +++ b/poincare/src/parsing/parser.cpp @@ -2,115 +2,155 @@ namespace Poincare { +Expression Parser::parseNumber(Expression leftHandSide) { + assert(leftHandSide.isUninitialized()); + return m_currentToken.expression(); +} + +Expression Parser::parsePlus(Expression leftHandSide) { + assert(!leftHandSide.isUninitialized()); + return Addition(leftHandSide, parseUntil(Token::Type::Plus)); // Addition is left-associative. +} + +Expression Parser::parseTimes(Expression leftHandSide) { + assert(!leftHandSide.isUninitialized()); + return Multiplication(leftHandSide, parseUntil(Token::Type::Times)); // Multiplication is left-associative. +} + +Expression Parser::parseSlash(Expression leftHandSide) { + assert(!leftHandSide.isUninitialized()); + return Division(leftHandSide, parseUntil(Token::Type::Power)); // Division is left-associative. +} + +Expression Parser::parseMinus(Expression leftHandSide) { + if (leftHandSide.isUninitialized()) { + return Opposite(parseUntil(Token::Type::Times)); + } else { + return Subtraction(leftHandSide, parseUntil(Token::Type::Minus)); // Subtraction is left-associative. + } +} + +Expression Parser::parsePower(Expression leftHandSide) { + assert(!leftHandSide.isUninitialized()); + return Power(leftHandSide, parseUntil(Token::Type::Power)); // Power is right-associative +} + +Expression Parser::parseLeftParenthesis(Expression leftHandSide) { + assert(leftHandSide.isUninitialized()); + Expression rightHandSide = parseUntil(Token::Type::RightParenthesis); + assert(m_nextToken.type() == Token::Type::RightParenthesis); + m_currentToken = m_nextToken; + m_nextToken = popToken(); + return Parenthesis(rightHandSide); +} + +Expression Parser::parseSquareRoot(Expression leftHandSide) { + assert(leftHandSide.isUninitialized()); + return SquareRoot(parseUntil(Token::Type::Bang)); // FIXME what is the precedence of SquareRoot? +} + +Expression Parser::parseBang(Expression leftHandSide) { + assert(!leftHandSide.isUninitialized()); + return Factorial(leftHandSide); +} + +/*Expression Parser::parseIdentifier() { + * Identifier, Symbol, Comma + * If an Identifier token is not followed by a LeftParenthesis + * Symbol(const char name); + * It must have length 1 + * Otherwise + * Function()? * + return leftHandSide; // FIXME +}*/ + +Expression Parser::parseEqual(Expression leftHandSide) { + assert(!leftHandSide.isUninitialized()); + return Equal(leftHandSide, parseUntil(Token::Type::Equal)); +} + +/*Expression Parser::parseStore(Expression leftHandSide) { + assert(!leftHandSide.isUninitialized()); + Expression symbol = parseIdentifier(leftHandSide); // FIXME Symbol + // TODO assert(m_nextToken == EndOfStream); + return Store(leftHandSide, static_cast(symbol)); +}*/ + +Expression Parser::noParse(Expression leftHandSide) { // FIXME nullptr? + return leftHandSide; +} + +typedef Expression (Parser::*TokenParser)(Expression leftHandSide); + +TokenParser tokenParsers[] = { + &Parser::noParse, //EndOfStream + &Parser::parseEqual, + &Parser::noParse, //Store, FIXME + &Parser::noParse, //RightBracket, + &Parser::noParse, //RightBrace, + &Parser::noParse, //RightParenthesis, + &Parser::parsePlus, + &Parser::parseMinus, + &Parser::parseTimes, + &Parser::parseSlash, + &Parser::parsePower, + &Parser::parseSquareRoot, + &Parser::parseBang, + &Parser::noParse, //LeftBracket, FIXME + &Parser::noParse, //LeftBrace, FIXME + &Parser::parseLeftParenthesis, + &Parser::parseNumber, + &Parser::noParse, //Identifier, FIXME + &Parser::noParse, //Comma, FIXME + &Parser::noParse //Undefined +}; + Expression Parser::parse() { - return shift(Expression(), popToken(), Token::Type::EndOfStream); + return parseUntil(Token::Type::EndOfStream); +} + +Expression Parser::parseUntil(Token::Type stoppingType) { + Expression leftHandSide; + while (canPopToken(stoppingType)) { + leftHandSide = (this->*(tokenParsers[static_cast(m_currentToken.type())]))(leftHandSide); + } + assert(!leftHandSide.isUninitialized()); + return leftHandSide; } static inline bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2) { return (t1 == Token::Type::RightParenthesis || t1 == Token::Type::Number || t1 == Token::Type::Identifier) && - (t2 == Token::Type::LeftParenthesis || t2 == Token::Type::Number || t2 == Token::Type::Identifier) - ; + (t2 == Token::Type::LeftParenthesis || t2 == Token::Type::Number || t2 == Token::Type::Identifier || t2 == Token::Type::SquareRoot); + //TODO if (t1 == Token::Type::Identifier && t2 == Token::Type::LeftParenthesis) t1 should be parsed as a function } -Expression Parser::shift(Expression leftHandSide, Token currentToken, Token::Type stoppingType) { - - if (currentToken.type() == Token::Type::EndOfStream) { - return leftHandSide; - } - - Token nextToken = popToken(); - - if (tokenTypesCanBeImplicitlyMultiplied(currentToken.type(), nextToken.type())) { - // TODO implicit multiplication - } - - // If currentToken ... FIXME trouver une bonne description: constitutes an operand? - if (currentToken.type() == Token::Type::Number) { - //assert(leftHandSide.isUndefined()); - return shift(Number::ParseDigits(currentToken.text(), currentToken.length()), nextToken, stoppingType); - } - /*if (currentToken.isLeftGroupingToken()) { - assert(leftHandSide.isUndefined()); - return shift(Parenthesis(), nextToken, Token::Type::RightParenthesis); // FIXME grouping tokens - }*/ - - // If currentToken does not require rightHandSide - if (currentToken.type() == Token::Type::Bang) { - return shift(Factorial(leftHandSide), nextToken); - } - /*if (currentToken.type() == Token::Type::Identifier) { - if (nextToken.type() == Token::Type::LeftParenthesis) { - // FIXME return shift(Function(rightHandSide), nextToken); - } else { - return shift(Identifier(currentToken.text(), currentToken.length()), nextToken); - } - }*/ - - // If currentToken requires a rightHandSide expression - - // First, build rightHandSide - Expression rightHandSide = shift(Expression(), nextToken, currentToken.type()); - if (comparePrecedence(currentToken, stoppingType)) { - return leftHandSide; - } - - // Then construct the whole expression and continue - if (currentToken.type() == Token::Type::Plus) { - return shift(Addition(leftHandSide, rightHandSide), nextToken, stoppingType); - } - if (currentToken.type() == Token::Type::Minus) { - if (leftHandSide.isUndefined()) { - return shift(Opposite(rightHandSide), nextToken, stoppingType); - } else { - return shift(Subtraction(leftHandSide, rightHandSide), nextToken, stoppingType); - } - } - if (currentToken.type() == Token::Type::Times) { - return shift(Multiplication(leftHandSide, rightHandSide), nextToken); - } - if (currentToken.type() == Token::Type::Slash) { - return shift(Division(leftHandSide, rightHandSide), nextToken); - } - if (currentToken.type() == Token::Type::Power) { - return shift(Power(leftHandSide, rightHandSide), nextToken); - } - if (currentToken.type() == Token::Type::SquareRoot) { - //assert(leftHandSide.isUndefined()); - return shift(SquareRoot(rightHandSide), nextToken); - } - - // TODO remaining tokens: comma, equal, store +static inline bool comparePrecedence(Token::Type nextTokenType, Token::Type stoppingType) { + // if (stoppingType == EndOfStream) return nextTokenType > EndOfStream + // if (stoppingType == RightParenthesis) return nextTokenType > RightParenthesis + // if (stoppingType == Plus) return nextTokenType > Plus + // if (stoppingType == Times) return nextTokenType > Times + // if (stoppingType == Power) return nextTokenType >= Power // >= makes the operator right-associative + // EndOfStream < RightParenthesis < Plus < Times < Power + return ((nextTokenType > stoppingType) || + (nextTokenType == stoppingType && + (stoppingType == Token::Type::Power) + ) && + (nextTokenType != Token::Type::EndOfStream)); } -bool Parser::comparePrecedence(Token currentToken, Token::Type stoppingType) const { - /* Returns true if nextToken is of higher precedence than currentToken TODO */ - return - // First, parse what is after the left grouping token - // until the corresponding right grouping token appears - /*!nextToken.isLeftGroupingToken() - && - // If nextToken is an unary minus - !(nextToken.is(Token::Type::Minus) - && - TokenHasTag(nextToken, TokenTag::UnaryMinus)) - && - ( - ( - currentToken.is(Token::Type::Minus) && - TokenHasTag(currentToken, TokenTag::UnaryMinus) && - (nextToken.type() < Token::Type::Power) - ) ||*/ - // A token with higher precedence should be reduced first - (currentToken.type() > stoppingType) /*|| - // Handle left-associative operators - ( - (currentToken.type() == nextToken.type()) && - TokenIsLeftAssociative(currentToken) - ) - )*/ - ; +bool Parser::canPopToken(Token::Type stoppingType) { + if (tokenTypesCanBeImplicitlyMultiplied(m_currentToken.type(), m_nextToken.type())) { + m_currentToken = Token(Token::Type::Times); + return true; + } + if (comparePrecedence(m_nextToken.type(), stoppingType)) { + m_currentToken = m_nextToken; + m_nextToken = popToken(); + return true; + } + return false; } } diff --git a/poincare/src/parsing/parser.h b/poincare/src/parsing/parser.h index f87be4d2e..a2564b0a3 100644 --- a/poincare/src/parsing/parser.h +++ b/poincare/src/parsing/parser.h @@ -2,29 +2,49 @@ #define POINCARE_PARSING_PARSER_H #include "tokenizer.h" -#include #include #include #include #include +#include #include #include -#include #include #include #include #include +#include +// matrix ? with brackets +// braces ? namespace Poincare { class Parser : public Tokenizer { public: - Parser(const char * input) : Tokenizer(input) {} + Parser(const char * input) : + Tokenizer(input), + m_currentToken(Token(Token::Type::Undefined)), + m_nextToken(popToken()) {} Expression parse(); + + Expression parseNumber(Expression leftHandSide); + Expression parsePlus(Expression leftHandSide); + Expression parseTimes(Expression leftHandSide); + Expression parseSlash(Expression leftHandSide); + Expression parseMinus(Expression leftHandSide); + Expression parsePower(Expression leftHandSide); + Expression parseLeftParenthesis(Expression leftHandSide); + Expression parseSquareRoot(Expression leftHandSide); + Expression parseBang(Expression leftHandSide); + Expression parseEqual(Expression leftHandSide); + Expression noParse(Expression leftHandSide); private: - Expression shift(Expression leftHandSide, Token lookahead, Token::Type stoppingType = Token::Type::EndOfStream); - bool comparePrecedence(Token currentToken, Token::Type stoppingType) const; + Expression parseUntil(Token::Type stoppingType); + bool canPopToken(Token::Type stoppingType); + + Token m_currentToken; + Token m_nextToken; }; } diff --git a/poincare/src/parsing/token.h b/poincare/src/parsing/token.h index f8182f3ba..b00b211a7 100644 --- a/poincare/src/parsing/token.h +++ b/poincare/src/parsing/token.h @@ -13,11 +13,8 @@ public: Equal, Store, RightBracket, - LeftBracket, RightBrace, - LeftBrace, RightParenthesis, - LeftParenthesis, Plus, Minus, Times, @@ -25,6 +22,9 @@ public: Power, SquareRoot, Bang, + LeftBracket, + LeftBrace, + LeftParenthesis, Number, Identifier, Comma, @@ -36,21 +36,11 @@ public: Type type() const { return m_type; } bool is(Type t) const { return m_type == t; } bool isEndOfStream() const { return is(Type::EndOfStream); } - bool isLeftGroupingToken() const { - return is(Type::LeftBracket) || is(Type::LeftParenthesis) || is(Type::LeftBrace); - } - bool isRightGroupingToken() const { - return is(Type::RightBracket) || is(Type::RightParenthesis) || is(Type::RightBrace); - } - const char * text() const { return m_text; } - void setText(const char * text) { m_text = text; } - size_t length() const { return m_length; } - void setLength(size_t length) { m_length = length; } - + Expression expression() const { return m_expression; } + void setExpression(Expression e) { m_expression = e; } private: Type m_type; - const char * m_text; - size_t m_length; + Expression m_expression; }; } diff --git a/poincare/src/parsing/tokenizer.cpp b/poincare/src/parsing/tokenizer.cpp index f36d663c3..4b12ae952 100644 --- a/poincare/src/parsing/tokenizer.cpp +++ b/poincare/src/parsing/tokenizer.cpp @@ -10,7 +10,7 @@ bool Tokenizer::canPopChar(char c) { return false; } -size_t Tokenizer::popInteger() { +size_t Tokenizer::popDigits() { size_t length = 0; char c = currentChar(); while (c >= '0' && c <= '9') { @@ -22,12 +22,13 @@ size_t Tokenizer::popInteger() { Token Tokenizer::popNumber() { const char * integerPartText = m_text; - size_t integerPartLength = popInteger(); + size_t integerPartLength = popDigits(); -/* const char * decimalPartText = m_text; + const char * decimalPartText = m_text; size_t decimalPartLength = 0; if (canPopChar('.')) { - decimalPartLength = popInteger(); + decimalPartText = m_text; + decimalPartLength = popDigits(); } if (integerPartLength == 0 && decimalPartLength == 0) { @@ -39,18 +40,14 @@ Token Tokenizer::popNumber() { bool exponentIsNegative = false; if (canPopChar('e')) { exponentIsNegative = canPopChar('-'); - exponentPartLength = popInteger(); + exponentPartLength = popDigits(); if (exponentPartLength == 0) { return Token(); } } Token result(Token::Type::Number); - //TODO result.setExpression(Number(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength)); - return result;*/ - Token result(Token::Type::Number); - result.setText(integerPartText); - result.setLength(integerPartLength); + result.setExpression(Number::ParseNumber(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength)); return result; } @@ -59,7 +56,6 @@ static inline bool isLetter(char c) { } Token Tokenizer::popIdentifier() { - const char * text = m_text; size_t length = 0; char c = currentChar(); while (isLetter(c)) { @@ -71,15 +67,18 @@ Token Tokenizer::popIdentifier() { return result; } -Token Tokenizer::popToken() { // associative array? +Token Tokenizer::popToken() { const char c = currentChar(); - if (canPopChar(0)) { - return Token(Token::Type::EndOfStream); + if ((c == '.') || (c >= '0' && c <= '9')) { + return popNumber(); + } + if (isLetter(c)) { + return popIdentifier(); } if (canPopChar('!')) { return Token(Token::Type::Bang); } - if (c >= '(' && (c <= '/' && c != '.')) { + if (c >= '(' && c <= '/' && c != '.') { Token::Type typeForChar[] = { Token::Type::LeftParenthesis, Token::Type::RightParenthesis, @@ -112,20 +111,17 @@ Token Tokenizer::popToken() { // associative array? if (canPopChar('}')) { return Token(Token::Type::RightBrace); } - if (canPopChar('\x89')) { + if (canPopChar('\x89')) { // Ion::Charset::SmallPi return Token(Token::Type::Number); } - if (canPopChar('\x90')) { + if (canPopChar('\x90')) { // Ion::Charset::Store return Token(Token::Type::Store); } - if (canPopChar('\x91')) { + if (canPopChar('\x91')) { // Ion::Charset::Root return Token(Token::Type::SquareRoot); } - if ((c == '.') || (c >= '0' && c <= '9')) { - return popNumber(); - } - if (isLetter(c)) { - return popIdentifier(); + if (canPopChar(0)) { + return Token(Token::Type::EndOfStream); } return Token(); // TODO error } diff --git a/poincare/src/parsing/tokenizer.h b/poincare/src/parsing/tokenizer.h index 770df757d..9f40a9d8f 100644 --- a/poincare/src/parsing/tokenizer.h +++ b/poincare/src/parsing/tokenizer.h @@ -2,6 +2,7 @@ #define POINCARE_PARSING_TOKENIZER_H #include "token.h" +#include namespace Poincare { @@ -13,7 +14,7 @@ private: const char popChar() { return *++m_text; } const char currentChar() const { return *m_text; } bool canPopChar(char c); - size_t popInteger(); + size_t popDigits(); Token popNumber(); Token popIdentifier();