[poincare] Use a recursive-descent-alike parser

2026-03-19 05:40:38 +01:00 · 2018-10-02 10:35:14 +02:00
parent d0f2a54d6d
commit de0efe69f2
5 changed files with 190 additions and 143 deletions
--- a/poincare/src/parsing/parser.cpp
+++ b/poincare/src/parsing/parser.cpp
@@ -2,115 +2,155 @@

 namespace Poincare {

+Expression Parser::parseNumber(Expression leftHandSide) {
+  assert(leftHandSide.isUninitialized());
+  return m_currentToken.expression();
+}
+
+Expression Parser::parsePlus(Expression leftHandSide) {
+  assert(!leftHandSide.isUninitialized());
+  return Addition(leftHandSide, parseUntil(Token::Type::Plus)); // Addition is left-associative.
+}
+
+Expression Parser::parseTimes(Expression leftHandSide) {
+  assert(!leftHandSide.isUninitialized());
+  return Multiplication(leftHandSide, parseUntil(Token::Type::Times)); // Multiplication is left-associative.
+}
+
+Expression Parser::parseSlash(Expression leftHandSide) {
+  assert(!leftHandSide.isUninitialized());
+  return Division(leftHandSide, parseUntil(Token::Type::Power)); // Division is left-associative.
+}
+
+Expression Parser::parseMinus(Expression leftHandSide) {
+  if (leftHandSide.isUninitialized()) {
+    return Opposite(parseUntil(Token::Type::Times));
+  } else {
+    return Subtraction(leftHandSide, parseUntil(Token::Type::Minus)); // Subtraction is left-associative.
+  }
+}
+
+Expression Parser::parsePower(Expression leftHandSide) {
+  assert(!leftHandSide.isUninitialized());
+  return Power(leftHandSide, parseUntil(Token::Type::Power)); // Power is right-associative
+}
+
+Expression Parser::parseLeftParenthesis(Expression leftHandSide) {
+  assert(leftHandSide.isUninitialized());
+  Expression rightHandSide = parseUntil(Token::Type::RightParenthesis);
+  assert(m_nextToken.type() == Token::Type::RightParenthesis);
+  m_currentToken = m_nextToken;
+  m_nextToken = popToken();
+  return Parenthesis(rightHandSide);
+}
+
+Expression Parser::parseSquareRoot(Expression leftHandSide) {
+  assert(leftHandSide.isUninitialized());
+  return SquareRoot(parseUntil(Token::Type::Bang)); // FIXME what is the precedence of SquareRoot?
+}
+
+Expression Parser::parseBang(Expression leftHandSide) {
+  assert(!leftHandSide.isUninitialized());
+  return Factorial(leftHandSide);
+}
+
+/*Expression Parser::parseIdentifier() {
+     * Identifier, Symbol, Comma
+     *   If an Identifier token is not followed by a LeftParenthesis
+     *     Symbol(const char name);
+     *     It must have length 1
+     *   Otherwise
+     *     Function()? *
+  return leftHandSide; // FIXME
+}*/
+
+Expression Parser::parseEqual(Expression leftHandSide) {
+  assert(!leftHandSide.isUninitialized());
+  return Equal(leftHandSide, parseUntil(Token::Type::Equal));
+}
+
+/*Expression Parser::parseStore(Expression leftHandSide) {
+  assert(!leftHandSide.isUninitialized());
+  Expression symbol = parseIdentifier(leftHandSide); // FIXME Symbol
+  // TODO assert(m_nextToken == EndOfStream);
+  return Store(leftHandSide, static_cast<Symbol>(symbol));
+}*/
+
+Expression Parser::noParse(Expression leftHandSide) { // FIXME nullptr?
+  return leftHandSide;
+}
+
+typedef Expression (Parser::*TokenParser)(Expression leftHandSide);
+
+TokenParser tokenParsers[] = {
+  &Parser::noParse, //EndOfStream
+  &Parser::parseEqual,
+  &Parser::noParse, //Store, FIXME
+  &Parser::noParse, //RightBracket,
+  &Parser::noParse, //RightBrace,
+  &Parser::noParse, //RightParenthesis,
+  &Parser::parsePlus,
+  &Parser::parseMinus,
+  &Parser::parseTimes,
+  &Parser::parseSlash,
+  &Parser::parsePower,
+  &Parser::parseSquareRoot,
+  &Parser::parseBang,
+  &Parser::noParse, //LeftBracket, FIXME
+  &Parser::noParse, //LeftBrace, FIXME
+  &Parser::parseLeftParenthesis,
+  &Parser::parseNumber,
+  &Parser::noParse, //Identifier, FIXME
+  &Parser::noParse, //Comma, FIXME
+  &Parser::noParse //Undefined
+};
+
 Expression Parser::parse() {
-  return shift(Expression(), popToken(), Token::Type::EndOfStream);
+  return parseUntil(Token::Type::EndOfStream);
+}
+
+Expression Parser::parseUntil(Token::Type stoppingType) {
+  Expression leftHandSide;
+  while (canPopToken(stoppingType)) {
+    leftHandSide = (this->*(tokenParsers[static_cast<int>(m_currentToken.type())]))(leftHandSide);
+  }
+  assert(!leftHandSide.isUninitialized());
+  return leftHandSide;
 }

 static inline bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2) {
  return
    (t1 == Token::Type::RightParenthesis || t1 == Token::Type::Number || t1 == Token::Type::Identifier)
    &&
-    (t2 == Token::Type::LeftParenthesis || t2 == Token::Type::Number || t2 == Token::Type::Identifier)
-  ;
+    (t2 == Token::Type::LeftParenthesis  || t2 == Token::Type::Number || t2 == Token::Type::Identifier || t2 == Token::Type::SquareRoot);
+  //TODO if (t1 == Token::Type::Identifier && t2 == Token::Type::LeftParenthesis) t1 should be parsed as a function
 }

-Expression Parser::shift(Expression leftHandSide, Token currentToken, Token::Type stoppingType) {
-
-  if (currentToken.type() == Token::Type::EndOfStream) {
-    return leftHandSide;
-  }
-
-  Token nextToken = popToken();
-
-  if (tokenTypesCanBeImplicitlyMultiplied(currentToken.type(), nextToken.type())) {
-    // TODO implicit multiplication
-  }
-
-  // If currentToken ... FIXME trouver une bonne description: constitutes an operand?
-  if (currentToken.type() == Token::Type::Number) {
-    //assert(leftHandSide.isUndefined());
-    return shift(Number::ParseDigits(currentToken.text(), currentToken.length()), nextToken, stoppingType);
-  }
-  /*if (currentToken.isLeftGroupingToken()) {
-    assert(leftHandSide.isUndefined());
-    return shift(Parenthesis(), nextToken, Token::Type::RightParenthesis); // FIXME grouping tokens
-  }*/
-
-  // If currentToken does not require rightHandSide
-  if (currentToken.type() == Token::Type::Bang) {
-    return shift(Factorial(leftHandSide), nextToken);
-  }
-  /*if (currentToken.type() == Token::Type::Identifier) {
-    if (nextToken.type() == Token::Type::LeftParenthesis) {
-      // FIXME return shift(Function(rightHandSide), nextToken);
-    } else {
-      return shift(Identifier(currentToken.text(), currentToken.length()), nextToken);
-    }
-  }*/
-
-  // If currentToken requires a rightHandSide expression
-
-  // First, build rightHandSide
-  Expression rightHandSide = shift(Expression(), nextToken, currentToken.type());
-  if (comparePrecedence(currentToken, stoppingType)) {
-    return leftHandSide;
-  }
-
-  // Then construct the whole expression and continue
-  if (currentToken.type() == Token::Type::Plus) {
-    return shift(Addition(leftHandSide, rightHandSide), nextToken, stoppingType);
-  }
-  if (currentToken.type() == Token::Type::Minus) {
-    if (leftHandSide.isUndefined()) {
-      return shift(Opposite(rightHandSide), nextToken, stoppingType);
-    } else {
-      return shift(Subtraction(leftHandSide, rightHandSide), nextToken, stoppingType);
-    }
-  }
-  if (currentToken.type() == Token::Type::Times) {
-    return shift(Multiplication(leftHandSide, rightHandSide), nextToken);
-  }
-  if (currentToken.type() == Token::Type::Slash) {
-    return shift(Division(leftHandSide, rightHandSide), nextToken);
-  }
-  if (currentToken.type() == Token::Type::Power) {
-    return shift(Power(leftHandSide, rightHandSide), nextToken);
-  }
-  if (currentToken.type() == Token::Type::SquareRoot) {
-    //assert(leftHandSide.isUndefined());
-    return shift(SquareRoot(rightHandSide), nextToken);
-  }
-
-  // TODO remaining tokens: comma, equal, store
+static inline bool comparePrecedence(Token::Type nextTokenType, Token::Type stoppingType) {
+  // if (stoppingType == EndOfStream) return nextTokenType > EndOfStream
+  // if (stoppingType == RightParenthesis) return nextTokenType > RightParenthesis
+  // if (stoppingType == Plus) return nextTokenType > Plus
+  // if (stoppingType == Times) return nextTokenType > Times
+  // if (stoppingType == Power) return nextTokenType >= Power // >= makes the operator right-associative
+  // EndOfStream < RightParenthesis < Plus < Times < Power
+  return ((nextTokenType > stoppingType) ||
+          (nextTokenType == stoppingType &&
+          (stoppingType == Token::Type::Power)
+         ) &&
+         (nextTokenType != Token::Type::EndOfStream));
 }

-bool Parser::comparePrecedence(Token currentToken, Token::Type stoppingType) const {
-  /* Returns true if nextToken is of higher precedence than currentToken TODO */
-  return
-    // First, parse what is after the left grouping token
-    // until the corresponding right grouping token appears
-    /*!nextToken.isLeftGroupingToken()
-    &&
-    // If nextToken is an unary minus
-    !(nextToken.is(Token::Type::Minus)
-      &&
-      TokenHasTag(nextToken, TokenTag::UnaryMinus))
-    &&
-    (
-      (
-        currentToken.is(Token::Type::Minus) &&
-        TokenHasTag(currentToken, TokenTag::UnaryMinus) &&
-        (nextToken.type() < Token::Type::Power)
-      ) ||*/
-      // A token with higher precedence should be reduced first
-      (currentToken.type() > stoppingType) /*||
-      // Handle left-associative operators
-      (
-        (currentToken.type() == nextToken.type()) &&
-        TokenIsLeftAssociative(currentToken)
-      )
-    )*/
-  ;
+bool Parser::canPopToken(Token::Type stoppingType) {
+  if (tokenTypesCanBeImplicitlyMultiplied(m_currentToken.type(), m_nextToken.type())) {
+    m_currentToken = Token(Token::Type::Times);
+    return true;
+  }
+  if (comparePrecedence(m_nextToken.type(), stoppingType)) {
+    m_currentToken = m_nextToken;
+    m_nextToken = popToken();
+    return true;
+  }
+  return false;
 }

 }
--- a/poincare/src/parsing/parser.h
+++ b/poincare/src/parsing/parser.h
@@ -2,29 +2,49 @@
 #define POINCARE_PARSING_PARSER_H

 #include "tokenizer.h"
-#include <poincare/expression.h>

 #include <poincare/addition.h>
 #include <poincare/division.h>
 #include <poincare/equal.h>
 #include <poincare/factorial.h>
+#include <poincare/multiplication.h>
 #include <poincare/opposite.h>
 #include <poincare/parenthesis.h>
-#include <poincare/number.h>
 #include <poincare/power.h>
 #include <poincare/square_root.h>
 #include <poincare/store.h>
 #include <poincare/subtraction.h>
+#include <poincare/symbol.h>
+// matrix ? with brackets
+// braces ?

 namespace Poincare {

 class Parser : public Tokenizer {
 public:
-  Parser(const char * input) : Tokenizer(input) {}
+  Parser(const char * input) :
+    Tokenizer(input),
+    m_currentToken(Token(Token::Type::Undefined)),
+    m_nextToken(popToken()) {}
  Expression parse();
+
+  Expression parseNumber(Expression leftHandSide);
+  Expression parsePlus(Expression leftHandSide);
+  Expression parseTimes(Expression leftHandSide);
+  Expression parseSlash(Expression leftHandSide);
+  Expression parseMinus(Expression leftHandSide);
+  Expression parsePower(Expression leftHandSide);
+  Expression parseLeftParenthesis(Expression leftHandSide);
+  Expression parseSquareRoot(Expression leftHandSide);
+  Expression parseBang(Expression leftHandSide);
+  Expression parseEqual(Expression leftHandSide);
+  Expression noParse(Expression leftHandSide);
 private:
-  Expression shift(Expression leftHandSide, Token lookahead, Token::Type stoppingType = Token::Type::EndOfStream);
-  bool comparePrecedence(Token currentToken, Token::Type stoppingType) const;
+  Expression parseUntil(Token::Type stoppingType);
+  bool canPopToken(Token::Type stoppingType);
+
+  Token m_currentToken;
+  Token m_nextToken;
 };

 }
--- a/poincare/src/parsing/token.h
+++ b/poincare/src/parsing/token.h
@@ -13,11 +13,8 @@ public:
    Equal,
    Store,
    RightBracket,
-    LeftBracket,
    RightBrace,
-    LeftBrace,
    RightParenthesis,
-    LeftParenthesis,
    Plus,
    Minus,
    Times,
@@ -25,6 +22,9 @@ public:
    Power,
    SquareRoot,
    Bang,
+    LeftBracket,
+    LeftBrace,
+    LeftParenthesis,
    Number,
    Identifier,
    Comma,
@@ -36,21 +36,11 @@ public:
  Type type() const { return m_type; }
  bool is(Type t) const { return m_type == t; }
  bool isEndOfStream() const { return is(Type::EndOfStream); }
-  bool isLeftGroupingToken() const {
-    return is(Type::LeftBracket) || is(Type::LeftParenthesis) || is(Type::LeftBrace);
-  }
-  bool isRightGroupingToken() const {
-    return is(Type::RightBracket) || is(Type::RightParenthesis) || is(Type::RightBrace);
-  }
-  const char * text() const { return m_text; }
-  void setText(const char * text) { m_text = text; }
-  size_t length() const { return m_length; }
-  void setLength(size_t length) { m_length = length; }
-
+  Expression expression() const { return m_expression; }
+  void setExpression(Expression e) { m_expression = e; }
 private:
  Type m_type;
-  const char * m_text;
-  size_t m_length;
+  Expression m_expression;
 };

 }
--- a/poincare/src/parsing/tokenizer.cpp
+++ b/poincare/src/parsing/tokenizer.cpp
@@ -10,7 +10,7 @@ bool Tokenizer::canPopChar(char c) {
  return false;
 }

-size_t Tokenizer::popInteger() {
+size_t Tokenizer::popDigits() {
  size_t length = 0;
  char c = currentChar();
  while (c >= '0' && c <= '9') {
@@ -22,12 +22,13 @@ size_t Tokenizer::popInteger() {

 Token Tokenizer::popNumber() {
  const char * integerPartText = m_text;
-  size_t integerPartLength = popInteger();
+  size_t integerPartLength = popDigits();

-/*  const char * decimalPartText = m_text;
+  const char * decimalPartText = m_text;
  size_t decimalPartLength = 0;
  if (canPopChar('.')) {
-    decimalPartLength = popInteger();
+    decimalPartText = m_text;
+    decimalPartLength = popDigits();
  }

  if (integerPartLength == 0 && decimalPartLength == 0) {
@@ -39,18 +40,14 @@ Token Tokenizer::popNumber() {
  bool exponentIsNegative = false;
  if (canPopChar('e')) {
    exponentIsNegative = canPopChar('-');
-    exponentPartLength = popInteger();
+    exponentPartLength = popDigits();
    if (exponentPartLength == 0) {
      return Token();
    }
  }

  Token result(Token::Type::Number);
-  //TODO result.setExpression(Number(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
-  return result;*/
-  Token result(Token::Type::Number);
-  result.setText(integerPartText);
-  result.setLength(integerPartLength);
+  result.setExpression(Number::ParseNumber(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
  return result;
 }

@@ -59,7 +56,6 @@ static inline bool isLetter(char c) {
 }

 Token Tokenizer::popIdentifier() {
-  const char * text = m_text;
  size_t length = 0;
  char c = currentChar();
  while (isLetter(c)) {
@@ -71,15 +67,18 @@ Token Tokenizer::popIdentifier() {
  return result;
 }

-Token Tokenizer::popToken() { // associative array?
+Token Tokenizer::popToken() {
  const char c = currentChar();
-  if (canPopChar(0)) {
-    return Token(Token::Type::EndOfStream);
+  if ((c == '.') || (c >= '0' && c <= '9')) {
+    return popNumber();
+  }
+  if (isLetter(c)) {
+    return popIdentifier();
  }
  if (canPopChar('!')) {
    return Token(Token::Type::Bang);
  }
-  if (c >= '(' && (c <= '/' && c != '.')) {
+  if (c >= '(' && c <= '/' && c != '.') {
    Token::Type typeForChar[] = {
      Token::Type::LeftParenthesis,
      Token::Type::RightParenthesis,
@@ -112,20 +111,17 @@ Token Tokenizer::popToken() { // associative array?
  if (canPopChar('}')) {
    return Token(Token::Type::RightBrace);
  }
-  if (canPopChar('\x89')) {
+  if (canPopChar('\x89')) { // Ion::Charset::SmallPi
    return Token(Token::Type::Number);
  }
-  if (canPopChar('\x90')) {
+  if (canPopChar('\x90')) { // Ion::Charset::Store
    return Token(Token::Type::Store);
  }
-  if (canPopChar('\x91')) {
+  if (canPopChar('\x91')) { // Ion::Charset::Root
    return Token(Token::Type::SquareRoot);
  }
-  if ((c == '.') || (c >= '0' && c <= '9')) {
-    return popNumber();
-  }
-  if (isLetter(c)) {
-    return popIdentifier();
+  if (canPopChar(0)) {
+    return Token(Token::Type::EndOfStream);
  }
  return Token(); // TODO error
 }
--- a/poincare/src/parsing/tokenizer.h
+++ b/poincare/src/parsing/tokenizer.h
@@ -2,6 +2,7 @@
 #define POINCARE_PARSING_TOKENIZER_H

 #include "token.h"
+#include <poincare/number.h>

 namespace Poincare {

@@ -13,7 +14,7 @@ private:
  const char popChar() { return *++m_text; }
  const char currentChar() const { return *m_text; }
  bool canPopChar(char c);
-  size_t popInteger();
+  size_t popDigits();
  Token popNumber();
  Token popIdentifier();