[poincare] Second version of homemade parser

Using function stack instead of auxiliary stack.
This commit is contained in:
Ruben Dashyan
2018-09-26 17:30:24 +02:00
committed by Émilie Feral
parent 2b87c735d3
commit d0f2a54d6d
8 changed files with 137 additions and 257 deletions

View File

@@ -132,7 +132,6 @@ objs += $(addprefix poincare/src/,\
objs += $(addprefix poincare/src/parsing/,\
parser.o\
parser_reductions.o\
tokenizer.o\
)

View File

@@ -2,7 +2,11 @@
namespace Poincare {
static bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2) {
Expression Parser::parse() {
return shift(Expression(), popToken(), Token::Type::EndOfStream);
}
static inline bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2) {
return
(t1 == Token::Type::RightParenthesis || t1 == Token::Type::Number || t1 == Token::Type::Identifier)
&&
@@ -10,51 +14,103 @@ static bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2)
;
}
static bool minusTokenIsUnaryIfAfter(Token::Type t) {
return false; //TODO
}
Expression Parser::shift(Expression leftHandSide, Token currentToken, Token::Type stoppingType) {
void Parser::reduce() {
Token popped = m_tokenStack.pop();
// if(popped.is(Token::Type::Identifier)) TODO
if(tokenTypesCanBeImplicitlyMultiplied(popped.type(), m_lookahead.type())){
m_tokenStack.push(Token(Token::Type::Times));
// this makes implicit multiplication have higher precedence than division, power, square root
if (currentToken.type() == Token::Type::EndOfStream) {
return leftHandSide;
}
//if minusTokenIsUnaryIfAfter
sReductions[static_cast<int>(popped.type())](&m_expressionStack, popped);
}
void Parser::shift() {
m_tokenStack.push(m_lookahead);
m_lookahead = m_tokenizer.popToken();
}
Token nextToken = popToken();
bool Parser::needsReduction() {
/* This method compares the precedence of m_tokenStack.top() and m_lookahead.
* Its return value decides whether to reduce m_tokenStack.
* In addition to precedence comparison, the method may handle associativity of binary operators:
* 1+1+1 yields 1+(1+1)
* 2-5-7 yields (2-5)-7
* 2/3/4 yields (2/3)/4
* but this b */
//TODO opposite and power are not symmetric
return (!m_lookahead.isLeftGroupingToken()) && (!m_tokenStack.isEmpty()) && (m_tokenStack.top().type() >= m_lookahead.type());
}
if (tokenTypesCanBeImplicitlyMultiplied(currentToken.type(), nextToken.type())) {
// TODO implicit multiplication
}
Expression Parser::parse(){
while(!m_lookahead.isEndOfStream()) {
shift();
while (needsReduction()) {
reduce();
// If currentToken ... FIXME trouver une bonne description: constitutes an operand?
if (currentToken.type() == Token::Type::Number) {
//assert(leftHandSide.isUndefined());
return shift(Number::ParseDigits(currentToken.text(), currentToken.length()), nextToken, stoppingType);
}
/*if (currentToken.isLeftGroupingToken()) {
assert(leftHandSide.isUndefined());
return shift(Parenthesis(), nextToken, Token::Type::RightParenthesis); // FIXME grouping tokens
}*/
// If currentToken does not require rightHandSide
if (currentToken.type() == Token::Type::Bang) {
return shift(Factorial(leftHandSide), nextToken);
}
/*if (currentToken.type() == Token::Type::Identifier) {
if (nextToken.type() == Token::Type::LeftParenthesis) {
// FIXME return shift(Function(rightHandSide), nextToken);
} else {
return shift(Identifier(currentToken.text(), currentToken.length()), nextToken);
}
}*/
// If currentToken requires a rightHandSide expression
// First, build rightHandSide
Expression rightHandSide = shift(Expression(), nextToken, currentToken.type());
if (comparePrecedence(currentToken, stoppingType)) {
return leftHandSide;
}
// Then construct the whole expression and continue
if (currentToken.type() == Token::Type::Plus) {
return shift(Addition(leftHandSide, rightHandSide), nextToken, stoppingType);
}
if (currentToken.type() == Token::Type::Minus) {
if (leftHandSide.isUndefined()) {
return shift(Opposite(rightHandSide), nextToken, stoppingType);
} else {
return shift(Subtraction(leftHandSide, rightHandSide), nextToken, stoppingType);
}
}
assert(m_expressionStack.size() == 1); // FIXME: Handle as parsing error
return m_expressionStack.pop();
if (currentToken.type() == Token::Type::Times) {
return shift(Multiplication(leftHandSide, rightHandSide), nextToken);
}
if (currentToken.type() == Token::Type::Slash) {
return shift(Division(leftHandSide, rightHandSide), nextToken);
}
if (currentToken.type() == Token::Type::Power) {
return shift(Power(leftHandSide, rightHandSide), nextToken);
}
if (currentToken.type() == Token::Type::SquareRoot) {
//assert(leftHandSide.isUndefined());
return shift(SquareRoot(rightHandSide), nextToken);
}
// TODO remaining tokens: comma, equal, store
}
Expression Parse(const char * input){
return Parser(input).parse();
bool Parser::comparePrecedence(Token currentToken, Token::Type stoppingType) const {
/* Returns true if nextToken is of higher precedence than currentToken TODO */
return
// First, parse what is after the left grouping token
// until the corresponding right grouping token appears
/*!nextToken.isLeftGroupingToken()
&&
// If nextToken is an unary minus
!(nextToken.is(Token::Type::Minus)
&&
TokenHasTag(nextToken, TokenTag::UnaryMinus))
&&
(
(
currentToken.is(Token::Type::Minus) &&
TokenHasTag(currentToken, TokenTag::UnaryMinus) &&
(nextToken.type() < Token::Type::Power)
) ||*/
// A token with higher precedence should be reduced first
(currentToken.type() > stoppingType) /*||
// Handle left-associative operators
(
(currentToken.type() == nextToken.type()) &&
TokenIsLeftAssociative(currentToken)
)
)*/
;
}
}

View File

@@ -2,51 +2,31 @@
#define POINCARE_PARSING_PARSER_H
#include "tokenizer.h"
#include "stack.h"
#include <poincare/expression.h>
#include <poincare/addition.h>
#include <poincare/division.h>
#include <poincare/equal.h>
#include <poincare/factorial.h>
#include <poincare/opposite.h>
#include <poincare/parenthesis.h>
#include <poincare/number.h>
#include <poincare/power.h>
#include <poincare/square_root.h>
#include <poincare/store.h>
#include <poincare/subtraction.h>
namespace Poincare {
class Parser {
class Parser : public Tokenizer {
public:
using ExpressionStack = Stack<Expression,100>;
using TokenStack = Stack<Token,100>;
Parser(const char * input) :
m_tokenizer(input),
m_lookahead(m_tokenizer.popToken()),
m_tokenStack(),
m_expressionStack()
{}
Parser(const char * input) : Tokenizer(input) {}
Expression parse();
enum class TokenTag {
None,
UnaryMinus,
IdentifierIsFunction
};
static void TokenSetTag(Token * t, TokenTag tag) {
t->setTag(static_cast<int>(tag));
}
static bool TokenHasTag(const Token & t, TokenTag tag) {
return t.tag() == static_cast<int>(tag);
}
private:
typedef void (*Reduction)(ExpressionStack * stack, const Token & token);
static const Reduction sReductions[];
void shift();
bool needsReduction();
void reduce();
Tokenizer m_tokenizer;
Token m_lookahead;
TokenStack m_tokenStack;
ExpressionStack m_expressionStack;
Expression shift(Expression leftHandSide, Token lookahead, Token::Type stoppingType = Token::Type::EndOfStream);
bool comparePrecedence(Token currentToken, Token::Type stoppingType) const;
};
Expression Parse(const char * input);
}
#endif

View File

@@ -1,113 +0,0 @@
#include "parser.h"
#include <poincare/equal.h>
#include <poincare/store.h>
#include <poincare/power.h>
#include <poincare/addition.h>
#include <poincare/opposite.h>
#include <poincare/parenthesis.h>
#include <poincare/subtraction.h>
#include <poincare/division.h>
#include <poincare/square_root.h>
#include <poincare/factorial.h>
namespace Poincare {
static void nopReduction(Parser::ExpressionStack * stack, const Token & token) {
}
static void equalReduction(Parser::ExpressionStack * stack, const Token & token) {
Expression rightHandSide = stack->pop();
stack->push(Equal(stack->pop(), rightHandSide));
}
// TODO
#if 0
static void stoReduction(Parser::ExpressionStack * stack, const Token & token) {
Expression rightHandSide = stack->pop();
stack->push(Store(stack->pop(), rightHandSide));
}
#endif
static void parenthesisReduction(Parser::ExpressionStack * stack, const Token & token) {
stack->push(Parenthesis(stack->pop()));
}
static void plusReduction(Parser::ExpressionStack * stack, const Token & token) {
assert(stack->size() >= 2);
Expression rightHandSide = stack->pop();
stack->push(Addition(stack->pop(), rightHandSide));
}
void minusReduction(Parser::ExpressionStack * stack, const Token & token) {
if (Parser::TokenHasTag(token, Parser::TokenTag::UnaryMinus)) {
stack->push(Opposite(stack->pop()));
} else {
Expression rightHandSide = stack->pop();
stack->push(Subtraction(stack->pop(), rightHandSide));
}
}
static void timesReduction(Parser::ExpressionStack * stack, const Token & token) {
Expression rightHandSide = stack->pop();
stack->push(Multiplication(stack->pop(), rightHandSide));
}
static void slashReduction(Parser::ExpressionStack * stack, const Token & token) {
Expression rightHandSide = stack->pop();
stack->push(Division(stack->pop(), rightHandSide));
}
static void powerReduction(Parser::ExpressionStack * stack, const Token & token) {
Expression rightHandSide = stack->pop();
stack->push(Power(stack->pop(), rightHandSide));
}
static void squareRootReduction(Parser::ExpressionStack * stack, const Token & token) {
stack->push(SquareRoot(stack->pop()));
}
static void numberReduction(Parser::ExpressionStack * stack, const Token & token) {
stack->push(token.expression());
}
static void identifierReduction(Parser::ExpressionStack * stack, const Token & token) {
if (Parser::TokenHasTag(token, Parser::TokenTag::IdentifierIsFunction)) {
//TODO
}
stack->push(token.expression());
}
static void commaReduction(Parser::ExpressionStack * stack, const Token & token) {
//TODO, goes with functions
}
static void bangReduction(Parser::ExpressionStack * stack, const Token & token) {
stack->push(Factorial(stack->pop()));
}
const Parser::Reduction Parser::sReductions[] = {
&nopReduction, // EndOfStream
&equalReduction, // Equal
&nopReduction, //&stoReduction, // Sto
&nopReduction, // RightBracket
&parenthesisReduction, // LeftBracket
&nopReduction, // RightBrace
&parenthesisReduction, // LeftBrace
&nopReduction, // RightParenthesis
&parenthesisReduction, // LeftParenthesis
&plusReduction, // Plus
&minusReduction, // Minus
&timesReduction, // Times
&slashReduction, // Slash
&powerReduction, // Power
&squareRootReduction, // SquareRoot
&bangReduction, // Bang
&numberReduction, // Number
&identifierReduction, // Identifier
&commaReduction, // Comma
&nopReduction // Undefined
};
}

View File

@@ -1,40 +0,0 @@
#ifndef POINCARE_PARSING_STACK_H
#define POINCARE_PARSING_STACK_H
#include <assert.h>
namespace Poincare {
template <class T, int maxSize>
class Stack {
public:
Stack() : m_nextItem(m_stack) {};
int push(T item) {
assert(m_nextItem >= m_stack);
if (m_nextItem >= m_stack + maxSize) {
return -1;
}
*m_nextItem = item;
m_nextItem++;
return 0;
}
T pop() {
assert(m_nextItem > m_stack);
m_nextItem--;
return *m_nextItem;
}
T top() const {
assert(m_nextItem > m_stack);
assert(m_nextItem < m_stack + maxSize);
return *(m_nextItem-1);
}
bool isEmpty() const { return (m_stack == m_nextItem); };
int size() const { return m_nextItem - m_stack; }
private:
T m_stack[maxSize];
T * m_nextItem;
};
}
#endif

View File

@@ -1,16 +1,17 @@
#ifndef POINCARE_PARSING_TOKEN_H
#define POINCARE_PARSING_TOKEN_H
#include <poincare/expression.h>
#include <poincare/expression.h> // size_t
namespace Poincare {
class Token {
public:
enum class Type : uint8_t { // Ordered from lower to higher precedence
enum class Type {
// Ordered from lower to higher precedence to make parser's job easier
EndOfStream, // Must be the first
Equal,
Sto,
Store,
RightBracket,
LeftBracket,
RightBrace,
@@ -18,13 +19,11 @@ public:
RightParenthesis,
LeftParenthesis,
Plus,
Minus, // Subtraction
Minus,
Times,
Slash,
// Opposite, unary
Power,
SquareRoot,
// Implicit times: see Parser::reduce()
Bang,
Number,
Identifier,
@@ -32,7 +31,7 @@ public:
Undefined
};
Token(Type type = Type::Undefined) : m_type(type), m_tag(0) {};
Token(Type type = Type::Undefined) : m_type(type) {};
Type type() const { return m_type; }
bool is(Type t) const { return m_type == t; }
@@ -40,17 +39,18 @@ public:
bool isLeftGroupingToken() const {
return is(Type::LeftBracket) || is(Type::LeftParenthesis) || is(Type::LeftBrace);
}
uint8_t tag() const { return m_tag; }
void setTag(uint8_t t) { m_tag = t; }
Expression expression() const { return m_expression; }
void setExpression(Expression expression) { m_expression = expression; }
bool isRightGroupingToken() const {
return is(Type::RightBracket) || is(Type::RightParenthesis) || is(Type::RightBrace);
}
const char * text() const { return m_text; }
void setText(const char * text) { m_text = text; }
size_t length() const { return m_length; }
void setLength(size_t length) { m_length = length; }
private:
Type m_type;
uint8_t m_tag;
Expression m_expression;
const char * m_text;
size_t m_length;
};
}

View File

@@ -1,8 +1,5 @@
#include "tokenizer.h"
#include <poincare/rational.h> // FIXME: Change me for Number
#include <poincare/symbol.h>
namespace Poincare {
bool Tokenizer::canPopChar(char c) {
@@ -27,7 +24,7 @@ Token Tokenizer::popNumber() {
const char * integerPartText = m_text;
size_t integerPartLength = popInteger();
const char * decimalPartText = m_text;
/* const char * decimalPartText = m_text;
size_t decimalPartLength = 0;
if (canPopChar('.')) {
decimalPartLength = popInteger();
@@ -49,9 +46,11 @@ Token Tokenizer::popNumber() {
}
Token result(Token::Type::Number);
//result.setExpression(Number(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
// FIXME!!!!
result.setExpression(Rational(1));
//TODO result.setExpression(Number(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
return result;*/
Token result(Token::Type::Number);
result.setText(integerPartText);
result.setLength(integerPartLength);
return result;
}
@@ -68,8 +67,7 @@ Token Tokenizer::popIdentifier() {
c = popChar();
}
Token result(Token::Type::Identifier);
//result.setExpression(Identifier(text, length));
result.setExpression(Symbol('a')); // FIXME
//TODO result.setExpression(Identifier(text, length));
return result;
}
@@ -118,7 +116,7 @@ Token Tokenizer::popToken() { // associative array?
return Token(Token::Type::Number);
}
if (canPopChar('\x90')) {
return Token(Token::Type::Sto);
return Token(Token::Type::Store);
}
if (canPopChar('\x91')) {
return Token(Token::Type::SquareRoot);
@@ -129,7 +127,7 @@ Token Tokenizer::popToken() { // associative array?
if (isLetter(c)) {
return popIdentifier();
}
return Token();
return Token(); // TODO error
}
}

View File

@@ -9,14 +9,14 @@ class Tokenizer {
public:
Tokenizer(const char * input) : m_text(input) {};
Token popToken();
const char * text() const { return m_text; };
private:
const char popChar() { return *++m_text; };
const char currentChar() const { return *m_text; };
const char popChar() { return *++m_text; }
const char currentChar() const { return *m_text; }
bool canPopChar(char c);
size_t popInteger();
Token popNumber();
Token popIdentifier();
const char * m_text;
};