[poincare] Second version of homemade parser

Using function stack instead of auxiliary stack.
2026-03-18 21:30:38 +01:00 · 2018-09-26 17:30:24 +02:00
parent 2b87c735d3
commit d0f2a54d6d
8 changed files with 137 additions and 257 deletions
--- a/poincare/Makefile
+++ b/poincare/Makefile
@@ -132,7 +132,6 @@ objs += $(addprefix poincare/src/,\

 objs += $(addprefix poincare/src/parsing/,\
  parser.o\
-  parser_reductions.o\
  tokenizer.o\
 )

--- a/poincare/src/parsing/parser.cpp
+++ b/poincare/src/parsing/parser.cpp
@@ -2,7 +2,11 @@

 namespace Poincare {

-static bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2) {
+Expression Parser::parse() {
+  return shift(Expression(), popToken(), Token::Type::EndOfStream);
+}
+
+static inline bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2) {
  return
    (t1 == Token::Type::RightParenthesis || t1 == Token::Type::Number || t1 == Token::Type::Identifier)
    &&
@@ -10,51 +14,103 @@ static bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2)
  ;
 }

-static bool minusTokenIsUnaryIfAfter(Token::Type t) {
-  return false; //TODO
-}
+Expression Parser::shift(Expression leftHandSide, Token currentToken, Token::Type stoppingType) {

-void Parser::reduce() {
-  Token popped = m_tokenStack.pop();
-  // if(popped.is(Token::Type::Identifier)) TODO
-  if(tokenTypesCanBeImplicitlyMultiplied(popped.type(), m_lookahead.type())){
-    m_tokenStack.push(Token(Token::Type::Times));
-    // this makes implicit multiplication have higher precedence than division, power, square root
+  if (currentToken.type() == Token::Type::EndOfStream) {
+    return leftHandSide;
  }
-  //if minusTokenIsUnaryIfAfter
-  sReductions[static_cast<int>(popped.type())](&m_expressionStack, popped);
-}

-void Parser::shift() {
-  m_tokenStack.push(m_lookahead);
-  m_lookahead = m_tokenizer.popToken();
-}
+  Token nextToken = popToken();

-bool Parser::needsReduction() {
-  /* This method compares the precedence of m_tokenStack.top() and m_lookahead.
-   * Its return value decides whether to reduce m_tokenStack.
-   * In addition to precedence comparison, the method may handle associativity of binary operators:
-   *   1+1+1 yields 1+(1+1)
-   *   2-5-7 yields (2-5)-7
-   *   2/3/4 yields (2/3)/4
-   * but this b  */
-  //TODO opposite and power are not symmetric
-  return (!m_lookahead.isLeftGroupingToken()) && (!m_tokenStack.isEmpty()) && (m_tokenStack.top().type() >= m_lookahead.type());
-}
+  if (tokenTypesCanBeImplicitlyMultiplied(currentToken.type(), nextToken.type())) {
+    // TODO implicit multiplication
+  }

-Expression Parser::parse(){
-  while(!m_lookahead.isEndOfStream()) {
-    shift();
-    while (needsReduction()) {
-      reduce();
+  // If currentToken ... FIXME trouver une bonne description: constitutes an operand?
+  if (currentToken.type() == Token::Type::Number) {
+    //assert(leftHandSide.isUndefined());
+    return shift(Number::ParseDigits(currentToken.text(), currentToken.length()), nextToken, stoppingType);
+  }
+  /*if (currentToken.isLeftGroupingToken()) {
+    assert(leftHandSide.isUndefined());
+    return shift(Parenthesis(), nextToken, Token::Type::RightParenthesis); // FIXME grouping tokens
+  }*/
+
+  // If currentToken does not require rightHandSide
+  if (currentToken.type() == Token::Type::Bang) {
+    return shift(Factorial(leftHandSide), nextToken);
+  }
+  /*if (currentToken.type() == Token::Type::Identifier) {
+    if (nextToken.type() == Token::Type::LeftParenthesis) {
+      // FIXME return shift(Function(rightHandSide), nextToken);
+    } else {
+      return shift(Identifier(currentToken.text(), currentToken.length()), nextToken);
+    }
+  }*/
+
+  // If currentToken requires a rightHandSide expression
+
+  // First, build rightHandSide
+  Expression rightHandSide = shift(Expression(), nextToken, currentToken.type());
+  if (comparePrecedence(currentToken, stoppingType)) {
+    return leftHandSide;
+  }
+
+  // Then construct the whole expression and continue
+  if (currentToken.type() == Token::Type::Plus) {
+    return shift(Addition(leftHandSide, rightHandSide), nextToken, stoppingType);
+  }
+  if (currentToken.type() == Token::Type::Minus) {
+    if (leftHandSide.isUndefined()) {
+      return shift(Opposite(rightHandSide), nextToken, stoppingType);
+    } else {
+      return shift(Subtraction(leftHandSide, rightHandSide), nextToken, stoppingType);
    }
  }
-  assert(m_expressionStack.size() == 1); // FIXME: Handle as parsing error
-  return m_expressionStack.pop();
+  if (currentToken.type() == Token::Type::Times) {
+    return shift(Multiplication(leftHandSide, rightHandSide), nextToken);
+  }
+  if (currentToken.type() == Token::Type::Slash) {
+    return shift(Division(leftHandSide, rightHandSide), nextToken);
+  }
+  if (currentToken.type() == Token::Type::Power) {
+    return shift(Power(leftHandSide, rightHandSide), nextToken);
+  }
+  if (currentToken.type() == Token::Type::SquareRoot) {
+    //assert(leftHandSide.isUndefined());
+    return shift(SquareRoot(rightHandSide), nextToken);
+  }
+
+  // TODO remaining tokens: comma, equal, store
 }

-Expression Parse(const char * input){
-  return Parser(input).parse();
+bool Parser::comparePrecedence(Token currentToken, Token::Type stoppingType) const {
+  /* Returns true if nextToken is of higher precedence than currentToken TODO */
+  return
+    // First, parse what is after the left grouping token
+    // until the corresponding right grouping token appears
+    /*!nextToken.isLeftGroupingToken()
+    &&
+    // If nextToken is an unary minus
+    !(nextToken.is(Token::Type::Minus)
+      &&
+      TokenHasTag(nextToken, TokenTag::UnaryMinus))
+    &&
+    (
+      (
+        currentToken.is(Token::Type::Minus) &&
+        TokenHasTag(currentToken, TokenTag::UnaryMinus) &&
+        (nextToken.type() < Token::Type::Power)
+      ) ||*/
+      // A token with higher precedence should be reduced first
+      (currentToken.type() > stoppingType) /*||
+      // Handle left-associative operators
+      (
+        (currentToken.type() == nextToken.type()) &&
+        TokenIsLeftAssociative(currentToken)
+      )
+    )*/
+  ;
 }

 }
--- a/poincare/src/parsing/parser.h
+++ b/poincare/src/parsing/parser.h
@@ -2,51 +2,31 @@
 #define POINCARE_PARSING_PARSER_H

 #include "tokenizer.h"
-#include "stack.h"
+#include <poincare/expression.h>
+
+#include <poincare/addition.h>
+#include <poincare/division.h>
+#include <poincare/equal.h>
+#include <poincare/factorial.h>
+#include <poincare/opposite.h>
+#include <poincare/parenthesis.h>
+#include <poincare/number.h>
+#include <poincare/power.h>
+#include <poincare/square_root.h>
+#include <poincare/store.h>
+#include <poincare/subtraction.h>

 namespace Poincare {

-class Parser {
+class Parser : public Tokenizer {
 public:
-  using ExpressionStack = Stack<Expression,100>;
-  using TokenStack = Stack<Token,100>;
-
-  Parser(const char * input) :
-    m_tokenizer(input),
-    m_lookahead(m_tokenizer.popToken()),
-    m_tokenStack(),
-    m_expressionStack()
-  {}
+  Parser(const char * input) : Tokenizer(input) {}
  Expression parse();
-
-  enum class TokenTag {
-    None,
-    UnaryMinus,
-    IdentifierIsFunction
-  };
-  static void TokenSetTag(Token * t, TokenTag tag) {
-    t->setTag(static_cast<int>(tag));
-  }
-  static bool TokenHasTag(const Token & t, TokenTag tag) {
-    return t.tag() == static_cast<int>(tag);
-  }
-
 private:
-  typedef void (*Reduction)(ExpressionStack * stack, const Token & token);
-  static const Reduction sReductions[];
-
-  void shift();
-  bool needsReduction();
-  void reduce();
-
-  Tokenizer m_tokenizer;
-  Token m_lookahead;
-  TokenStack m_tokenStack;
-  ExpressionStack m_expressionStack;
+  Expression shift(Expression leftHandSide, Token lookahead, Token::Type stoppingType = Token::Type::EndOfStream);
+  bool comparePrecedence(Token currentToken, Token::Type stoppingType) const;
 };

-Expression Parse(const char * input);
-
 }

 #endif
--- a/poincare/src/parsing/parser_reductions.cpp
+++ b/poincare/src/parsing/parser_reductions.cpp
@@ -1,113 +0,0 @@
-#include "parser.h"
-
-#include <poincare/equal.h>
-#include <poincare/store.h>
-#include <poincare/power.h>
-#include <poincare/addition.h>
-#include <poincare/opposite.h>
-#include <poincare/parenthesis.h>
-#include <poincare/subtraction.h>
-#include <poincare/division.h>
-#include <poincare/square_root.h>
-#include <poincare/factorial.h>
-
-namespace Poincare {
-
-static void nopReduction(Parser::ExpressionStack * stack, const Token & token) {
-}
-
-static void equalReduction(Parser::ExpressionStack * stack, const Token & token) {
-  Expression rightHandSide = stack->pop();
-  stack->push(Equal(stack->pop(), rightHandSide));
-}
-
-
-// TODO
-#if 0
-static void stoReduction(Parser::ExpressionStack * stack, const Token & token) {
-  Expression rightHandSide = stack->pop();
-  stack->push(Store(stack->pop(), rightHandSide));
-}
-#endif
-
-static void parenthesisReduction(Parser::ExpressionStack * stack, const Token & token) {
-  stack->push(Parenthesis(stack->pop()));
-}
-
-static void plusReduction(Parser::ExpressionStack * stack, const Token & token) {
-  assert(stack->size() >= 2);
-  Expression rightHandSide = stack->pop();
-  stack->push(Addition(stack->pop(), rightHandSide));
-}
-
-void minusReduction(Parser::ExpressionStack * stack, const Token & token) {
-  if (Parser::TokenHasTag(token, Parser::TokenTag::UnaryMinus)) {
-    stack->push(Opposite(stack->pop()));
-  } else {
-    Expression rightHandSide = stack->pop();
-    stack->push(Subtraction(stack->pop(), rightHandSide));
-  }
-}
-
-static void timesReduction(Parser::ExpressionStack * stack, const Token & token) {
-  Expression rightHandSide = stack->pop();
-  stack->push(Multiplication(stack->pop(), rightHandSide));
-}
-
-static void slashReduction(Parser::ExpressionStack * stack, const Token & token) {
-  Expression rightHandSide = stack->pop();
-  stack->push(Division(stack->pop(), rightHandSide));
-}
-
-static void powerReduction(Parser::ExpressionStack * stack, const Token & token) {
-  Expression rightHandSide = stack->pop();
-  stack->push(Power(stack->pop(), rightHandSide));
-}
-
-static void squareRootReduction(Parser::ExpressionStack * stack, const Token & token) {
-  stack->push(SquareRoot(stack->pop()));
-}
-
-static void numberReduction(Parser::ExpressionStack * stack, const Token & token) {
-  stack->push(token.expression());
-}
-
-static void identifierReduction(Parser::ExpressionStack * stack, const Token & token) {
-  if (Parser::TokenHasTag(token, Parser::TokenTag::IdentifierIsFunction)) {
-    //TODO
-  }
-  stack->push(token.expression());
-}
-
-static void commaReduction(Parser::ExpressionStack * stack, const Token & token) {
-  //TODO, goes with functions
-}
-
-static void bangReduction(Parser::ExpressionStack * stack, const Token & token) {
-  stack->push(Factorial(stack->pop()));
-}
-
-const Parser::Reduction Parser::sReductions[] = {
-  &nopReduction,        // EndOfStream
-  &equalReduction,      // Equal
-  &nopReduction, //&stoReduction,        // Sto
-  &nopReduction,        // RightBracket
-  &parenthesisReduction,    // LeftBracket
-  &nopReduction,        // RightBrace
-  &parenthesisReduction,    // LeftBrace
-  &nopReduction,        // RightParenthesis
-  &parenthesisReduction,    // LeftParenthesis
-  &plusReduction,       // Plus
-  &minusReduction,      // Minus
-  &timesReduction,      // Times
-  &slashReduction,      // Slash
-  &powerReduction,      // Power
-  &squareRootReduction, // SquareRoot
-  &bangReduction,       // Bang
-  &numberReduction,     // Number
-  &identifierReduction, // Identifier
-  &commaReduction,      // Comma
-  &nopReduction         // Undefined
-};
-
-}
--- a/poincare/src/parsing/stack.h
+++ b/poincare/src/parsing/stack.h
@@ -1,40 +0,0 @@
-#ifndef POINCARE_PARSING_STACK_H
-#define POINCARE_PARSING_STACK_H
-
-#include <assert.h>
-
-namespace Poincare {
-
-template <class T, int maxSize>
-class Stack {
-public:
-  Stack() : m_nextItem(m_stack) {};
-  int push(T item) {
-    assert(m_nextItem >= m_stack);
-    if (m_nextItem >= m_stack + maxSize) {
-      return -1;
-    }
-    *m_nextItem = item;
-    m_nextItem++;
-    return 0;
-  }
-  T pop() {
-    assert(m_nextItem > m_stack);
-    m_nextItem--;
-    return *m_nextItem;
-  }
-  T top() const {
-    assert(m_nextItem > m_stack);
-    assert(m_nextItem < m_stack + maxSize);
-    return *(m_nextItem-1);
-  }
-  bool isEmpty() const { return (m_stack == m_nextItem); };
-  int size() const { return m_nextItem - m_stack; }
-private:
-  T m_stack[maxSize];
-  T * m_nextItem;
-};
-
-}
-
-#endif
--- a/poincare/src/parsing/token.h
+++ b/poincare/src/parsing/token.h
@@ -1,16 +1,17 @@
 #ifndef POINCARE_PARSING_TOKEN_H
 #define POINCARE_PARSING_TOKEN_H

-#include <poincare/expression.h>
+#include <poincare/expression.h> // size_t

 namespace Poincare {

 class Token {
 public:
-  enum class Type : uint8_t { // Ordered from lower to higher precedence
+  enum class Type {
+    // Ordered from lower to higher precedence to make parser's job easier
    EndOfStream, // Must be the first
    Equal,
-    Sto,
+    Store,
    RightBracket,
    LeftBracket,
    RightBrace,
@@ -18,13 +19,11 @@ public:
    RightParenthesis,
    LeftParenthesis,
    Plus,
-    Minus, // Subtraction
+    Minus,
    Times,
    Slash,
-    // Opposite, unary
    Power,
    SquareRoot,
-    // Implicit times: see Parser::reduce()
    Bang,
    Number,
    Identifier,
@@ -32,7 +31,7 @@ public:
    Undefined
  };

-  Token(Type type = Type::Undefined) : m_type(type), m_tag(0) {};
+  Token(Type type = Type::Undefined) : m_type(type) {};

  Type type() const { return m_type; }
  bool is(Type t) const { return m_type == t; }
@@ -40,17 +39,18 @@ public:
  bool isLeftGroupingToken() const {
    return is(Type::LeftBracket) || is(Type::LeftParenthesis) || is(Type::LeftBrace);
  }
-
-  uint8_t tag() const { return m_tag; }
-  void setTag(uint8_t t) { m_tag = t; }
-
-  Expression expression() const { return m_expression; }
-  void setExpression(Expression expression) { m_expression = expression; }
+  bool isRightGroupingToken() const {
+    return is(Type::RightBracket) || is(Type::RightParenthesis) || is(Type::RightBrace);
+  }
+  const char * text() const { return m_text; }
+  void setText(const char * text) { m_text = text; }
+  size_t length() const { return m_length; }
+  void setLength(size_t length) { m_length = length; }

 private:
  Type m_type;
-  uint8_t m_tag;
-  Expression m_expression;
+  const char * m_text;
+  size_t m_length;
 };

 }
--- a/poincare/src/parsing/tokenizer.cpp
+++ b/poincare/src/parsing/tokenizer.cpp
@@ -1,8 +1,5 @@
 #include "tokenizer.h"

-#include <poincare/rational.h> // FIXME: Change me for Number
-#include <poincare/symbol.h>
-
 namespace Poincare {

 bool Tokenizer::canPopChar(char c) {
@@ -27,7 +24,7 @@ Token Tokenizer::popNumber() {
  const char * integerPartText = m_text;
  size_t integerPartLength = popInteger();

-  const char * decimalPartText = m_text;
+/*  const char * decimalPartText = m_text;
  size_t decimalPartLength = 0;
  if (canPopChar('.')) {
    decimalPartLength = popInteger();
@@ -49,9 +46,11 @@ Token Tokenizer::popNumber() {
  }

  Token result(Token::Type::Number);
-  //result.setExpression(Number(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
-  // FIXME!!!!
-  result.setExpression(Rational(1));
+  //TODO result.setExpression(Number(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
+  return result;*/
+  Token result(Token::Type::Number);
+  result.setText(integerPartText);
+  result.setLength(integerPartLength);
  return result;
 }

@@ -68,8 +67,7 @@ Token Tokenizer::popIdentifier() {
    c = popChar();
  }
  Token result(Token::Type::Identifier);
-  //result.setExpression(Identifier(text, length));
-  result.setExpression(Symbol('a')); // FIXME
+  //TODO result.setExpression(Identifier(text, length));
  return result;
 }

@@ -118,7 +116,7 @@ Token Tokenizer::popToken() { // associative array?
    return Token(Token::Type::Number);
  }
  if (canPopChar('\x90')) {
-    return Token(Token::Type::Sto);
+    return Token(Token::Type::Store);
  }
  if (canPopChar('\x91')) {
    return Token(Token::Type::SquareRoot);
@@ -129,7 +127,7 @@ Token Tokenizer::popToken() { // associative array?
  if (isLetter(c)) {
    return popIdentifier();
  }
-  return Token();
+  return Token(); // TODO error
 }

 }
--- a/poincare/src/parsing/tokenizer.h
+++ b/poincare/src/parsing/tokenizer.h
@@ -9,14 +9,14 @@ class Tokenizer {
 public:
  Tokenizer(const char * input) : m_text(input) {};
  Token popToken();
-  const char * text() const { return m_text; };
 private:
-  const char popChar() { return *++m_text; };
-  const char currentChar() const { return *m_text; };
+  const char popChar() { return *++m_text; }
+  const char currentChar() const { return *m_text; }
  bool canPopChar(char c);
  size_t popInteger();
  Token popNumber();
  Token popIdentifier();
+
  const char * m_text;
 };