mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-03-18 21:30:38 +01:00
[poincare] Second version of homemade parser
Using function stack instead of auxiliary stack.
This commit is contained in:
committed by
Émilie Feral
parent
2b87c735d3
commit
d0f2a54d6d
@@ -132,7 +132,6 @@ objs += $(addprefix poincare/src/,\
|
||||
|
||||
objs += $(addprefix poincare/src/parsing/,\
|
||||
parser.o\
|
||||
parser_reductions.o\
|
||||
tokenizer.o\
|
||||
)
|
||||
|
||||
|
||||
@@ -2,7 +2,11 @@
|
||||
|
||||
namespace Poincare {
|
||||
|
||||
static bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2) {
|
||||
Expression Parser::parse() {
|
||||
return shift(Expression(), popToken(), Token::Type::EndOfStream);
|
||||
}
|
||||
|
||||
static inline bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2) {
|
||||
return
|
||||
(t1 == Token::Type::RightParenthesis || t1 == Token::Type::Number || t1 == Token::Type::Identifier)
|
||||
&&
|
||||
@@ -10,51 +14,103 @@ static bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2)
|
||||
;
|
||||
}
|
||||
|
||||
static bool minusTokenIsUnaryIfAfter(Token::Type t) {
|
||||
return false; //TODO
|
||||
}
|
||||
Expression Parser::shift(Expression leftHandSide, Token currentToken, Token::Type stoppingType) {
|
||||
|
||||
void Parser::reduce() {
|
||||
Token popped = m_tokenStack.pop();
|
||||
// if(popped.is(Token::Type::Identifier)) TODO
|
||||
if(tokenTypesCanBeImplicitlyMultiplied(popped.type(), m_lookahead.type())){
|
||||
m_tokenStack.push(Token(Token::Type::Times));
|
||||
// this makes implicit multiplication have higher precedence than division, power, square root
|
||||
if (currentToken.type() == Token::Type::EndOfStream) {
|
||||
return leftHandSide;
|
||||
}
|
||||
//if minusTokenIsUnaryIfAfter
|
||||
sReductions[static_cast<int>(popped.type())](&m_expressionStack, popped);
|
||||
}
|
||||
|
||||
void Parser::shift() {
|
||||
m_tokenStack.push(m_lookahead);
|
||||
m_lookahead = m_tokenizer.popToken();
|
||||
}
|
||||
Token nextToken = popToken();
|
||||
|
||||
bool Parser::needsReduction() {
|
||||
/* This method compares the precedence of m_tokenStack.top() and m_lookahead.
|
||||
* Its return value decides whether to reduce m_tokenStack.
|
||||
* In addition to precedence comparison, the method may handle associativity of binary operators:
|
||||
* 1+1+1 yields 1+(1+1)
|
||||
* 2-5-7 yields (2-5)-7
|
||||
* 2/3/4 yields (2/3)/4
|
||||
* but this b */
|
||||
//TODO opposite and power are not symmetric
|
||||
return (!m_lookahead.isLeftGroupingToken()) && (!m_tokenStack.isEmpty()) && (m_tokenStack.top().type() >= m_lookahead.type());
|
||||
}
|
||||
if (tokenTypesCanBeImplicitlyMultiplied(currentToken.type(), nextToken.type())) {
|
||||
// TODO implicit multiplication
|
||||
}
|
||||
|
||||
Expression Parser::parse(){
|
||||
while(!m_lookahead.isEndOfStream()) {
|
||||
shift();
|
||||
while (needsReduction()) {
|
||||
reduce();
|
||||
// If currentToken ... FIXME trouver une bonne description: constitutes an operand?
|
||||
if (currentToken.type() == Token::Type::Number) {
|
||||
//assert(leftHandSide.isUndefined());
|
||||
return shift(Number::ParseDigits(currentToken.text(), currentToken.length()), nextToken, stoppingType);
|
||||
}
|
||||
/*if (currentToken.isLeftGroupingToken()) {
|
||||
assert(leftHandSide.isUndefined());
|
||||
return shift(Parenthesis(), nextToken, Token::Type::RightParenthesis); // FIXME grouping tokens
|
||||
}*/
|
||||
|
||||
// If currentToken does not require rightHandSide
|
||||
if (currentToken.type() == Token::Type::Bang) {
|
||||
return shift(Factorial(leftHandSide), nextToken);
|
||||
}
|
||||
/*if (currentToken.type() == Token::Type::Identifier) {
|
||||
if (nextToken.type() == Token::Type::LeftParenthesis) {
|
||||
// FIXME return shift(Function(rightHandSide), nextToken);
|
||||
} else {
|
||||
return shift(Identifier(currentToken.text(), currentToken.length()), nextToken);
|
||||
}
|
||||
}*/
|
||||
|
||||
// If currentToken requires a rightHandSide expression
|
||||
|
||||
// First, build rightHandSide
|
||||
Expression rightHandSide = shift(Expression(), nextToken, currentToken.type());
|
||||
if (comparePrecedence(currentToken, stoppingType)) {
|
||||
return leftHandSide;
|
||||
}
|
||||
|
||||
// Then construct the whole expression and continue
|
||||
if (currentToken.type() == Token::Type::Plus) {
|
||||
return shift(Addition(leftHandSide, rightHandSide), nextToken, stoppingType);
|
||||
}
|
||||
if (currentToken.type() == Token::Type::Minus) {
|
||||
if (leftHandSide.isUndefined()) {
|
||||
return shift(Opposite(rightHandSide), nextToken, stoppingType);
|
||||
} else {
|
||||
return shift(Subtraction(leftHandSide, rightHandSide), nextToken, stoppingType);
|
||||
}
|
||||
}
|
||||
assert(m_expressionStack.size() == 1); // FIXME: Handle as parsing error
|
||||
return m_expressionStack.pop();
|
||||
if (currentToken.type() == Token::Type::Times) {
|
||||
return shift(Multiplication(leftHandSide, rightHandSide), nextToken);
|
||||
}
|
||||
if (currentToken.type() == Token::Type::Slash) {
|
||||
return shift(Division(leftHandSide, rightHandSide), nextToken);
|
||||
}
|
||||
if (currentToken.type() == Token::Type::Power) {
|
||||
return shift(Power(leftHandSide, rightHandSide), nextToken);
|
||||
}
|
||||
if (currentToken.type() == Token::Type::SquareRoot) {
|
||||
//assert(leftHandSide.isUndefined());
|
||||
return shift(SquareRoot(rightHandSide), nextToken);
|
||||
}
|
||||
|
||||
// TODO remaining tokens: comma, equal, store
|
||||
}
|
||||
|
||||
Expression Parse(const char * input){
|
||||
return Parser(input).parse();
|
||||
bool Parser::comparePrecedence(Token currentToken, Token::Type stoppingType) const {
|
||||
/* Returns true if nextToken is of higher precedence than currentToken TODO */
|
||||
return
|
||||
// First, parse what is after the left grouping token
|
||||
// until the corresponding right grouping token appears
|
||||
/*!nextToken.isLeftGroupingToken()
|
||||
&&
|
||||
// If nextToken is an unary minus
|
||||
!(nextToken.is(Token::Type::Minus)
|
||||
&&
|
||||
TokenHasTag(nextToken, TokenTag::UnaryMinus))
|
||||
&&
|
||||
(
|
||||
(
|
||||
currentToken.is(Token::Type::Minus) &&
|
||||
TokenHasTag(currentToken, TokenTag::UnaryMinus) &&
|
||||
(nextToken.type() < Token::Type::Power)
|
||||
) ||*/
|
||||
// A token with higher precedence should be reduced first
|
||||
(currentToken.type() > stoppingType) /*||
|
||||
// Handle left-associative operators
|
||||
(
|
||||
(currentToken.type() == nextToken.type()) &&
|
||||
TokenIsLeftAssociative(currentToken)
|
||||
)
|
||||
)*/
|
||||
;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -2,51 +2,31 @@
|
||||
#define POINCARE_PARSING_PARSER_H
|
||||
|
||||
#include "tokenizer.h"
|
||||
#include "stack.h"
|
||||
#include <poincare/expression.h>
|
||||
|
||||
#include <poincare/addition.h>
|
||||
#include <poincare/division.h>
|
||||
#include <poincare/equal.h>
|
||||
#include <poincare/factorial.h>
|
||||
#include <poincare/opposite.h>
|
||||
#include <poincare/parenthesis.h>
|
||||
#include <poincare/number.h>
|
||||
#include <poincare/power.h>
|
||||
#include <poincare/square_root.h>
|
||||
#include <poincare/store.h>
|
||||
#include <poincare/subtraction.h>
|
||||
|
||||
namespace Poincare {
|
||||
|
||||
class Parser {
|
||||
class Parser : public Tokenizer {
|
||||
public:
|
||||
using ExpressionStack = Stack<Expression,100>;
|
||||
using TokenStack = Stack<Token,100>;
|
||||
|
||||
Parser(const char * input) :
|
||||
m_tokenizer(input),
|
||||
m_lookahead(m_tokenizer.popToken()),
|
||||
m_tokenStack(),
|
||||
m_expressionStack()
|
||||
{}
|
||||
Parser(const char * input) : Tokenizer(input) {}
|
||||
Expression parse();
|
||||
|
||||
enum class TokenTag {
|
||||
None,
|
||||
UnaryMinus,
|
||||
IdentifierIsFunction
|
||||
};
|
||||
static void TokenSetTag(Token * t, TokenTag tag) {
|
||||
t->setTag(static_cast<int>(tag));
|
||||
}
|
||||
static bool TokenHasTag(const Token & t, TokenTag tag) {
|
||||
return t.tag() == static_cast<int>(tag);
|
||||
}
|
||||
|
||||
private:
|
||||
typedef void (*Reduction)(ExpressionStack * stack, const Token & token);
|
||||
static const Reduction sReductions[];
|
||||
|
||||
void shift();
|
||||
bool needsReduction();
|
||||
void reduce();
|
||||
|
||||
Tokenizer m_tokenizer;
|
||||
Token m_lookahead;
|
||||
TokenStack m_tokenStack;
|
||||
ExpressionStack m_expressionStack;
|
||||
Expression shift(Expression leftHandSide, Token lookahead, Token::Type stoppingType = Token::Type::EndOfStream);
|
||||
bool comparePrecedence(Token currentToken, Token::Type stoppingType) const;
|
||||
};
|
||||
|
||||
Expression Parse(const char * input);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,113 +0,0 @@
|
||||
#include "parser.h"
|
||||
|
||||
#include <poincare/equal.h>
|
||||
#include <poincare/store.h>
|
||||
#include <poincare/power.h>
|
||||
#include <poincare/addition.h>
|
||||
#include <poincare/opposite.h>
|
||||
#include <poincare/parenthesis.h>
|
||||
#include <poincare/subtraction.h>
|
||||
#include <poincare/division.h>
|
||||
#include <poincare/square_root.h>
|
||||
#include <poincare/factorial.h>
|
||||
|
||||
namespace Poincare {
|
||||
|
||||
static void nopReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
}
|
||||
|
||||
static void equalReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
Expression rightHandSide = stack->pop();
|
||||
stack->push(Equal(stack->pop(), rightHandSide));
|
||||
}
|
||||
|
||||
|
||||
// TODO
|
||||
#if 0
|
||||
static void stoReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
Expression rightHandSide = stack->pop();
|
||||
stack->push(Store(stack->pop(), rightHandSide));
|
||||
}
|
||||
#endif
|
||||
|
||||
static void parenthesisReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
stack->push(Parenthesis(stack->pop()));
|
||||
}
|
||||
|
||||
static void plusReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
assert(stack->size() >= 2);
|
||||
Expression rightHandSide = stack->pop();
|
||||
stack->push(Addition(stack->pop(), rightHandSide));
|
||||
}
|
||||
|
||||
void minusReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
if (Parser::TokenHasTag(token, Parser::TokenTag::UnaryMinus)) {
|
||||
stack->push(Opposite(stack->pop()));
|
||||
} else {
|
||||
Expression rightHandSide = stack->pop();
|
||||
stack->push(Subtraction(stack->pop(), rightHandSide));
|
||||
}
|
||||
}
|
||||
|
||||
static void timesReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
Expression rightHandSide = stack->pop();
|
||||
stack->push(Multiplication(stack->pop(), rightHandSide));
|
||||
}
|
||||
|
||||
static void slashReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
Expression rightHandSide = stack->pop();
|
||||
stack->push(Division(stack->pop(), rightHandSide));
|
||||
}
|
||||
|
||||
static void powerReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
Expression rightHandSide = stack->pop();
|
||||
stack->push(Power(stack->pop(), rightHandSide));
|
||||
}
|
||||
|
||||
static void squareRootReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
stack->push(SquareRoot(stack->pop()));
|
||||
}
|
||||
|
||||
static void numberReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
stack->push(token.expression());
|
||||
}
|
||||
|
||||
static void identifierReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
if (Parser::TokenHasTag(token, Parser::TokenTag::IdentifierIsFunction)) {
|
||||
//TODO
|
||||
}
|
||||
stack->push(token.expression());
|
||||
}
|
||||
|
||||
static void commaReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
//TODO, goes with functions
|
||||
}
|
||||
|
||||
static void bangReduction(Parser::ExpressionStack * stack, const Token & token) {
|
||||
stack->push(Factorial(stack->pop()));
|
||||
}
|
||||
|
||||
const Parser::Reduction Parser::sReductions[] = {
|
||||
&nopReduction, // EndOfStream
|
||||
&equalReduction, // Equal
|
||||
&nopReduction, //&stoReduction, // Sto
|
||||
&nopReduction, // RightBracket
|
||||
&parenthesisReduction, // LeftBracket
|
||||
&nopReduction, // RightBrace
|
||||
&parenthesisReduction, // LeftBrace
|
||||
&nopReduction, // RightParenthesis
|
||||
&parenthesisReduction, // LeftParenthesis
|
||||
&plusReduction, // Plus
|
||||
&minusReduction, // Minus
|
||||
×Reduction, // Times
|
||||
&slashReduction, // Slash
|
||||
&powerReduction, // Power
|
||||
&squareRootReduction, // SquareRoot
|
||||
&bangReduction, // Bang
|
||||
&numberReduction, // Number
|
||||
&identifierReduction, // Identifier
|
||||
&commaReduction, // Comma
|
||||
&nopReduction // Undefined
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
#ifndef POINCARE_PARSING_STACK_H
|
||||
#define POINCARE_PARSING_STACK_H
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
namespace Poincare {
|
||||
|
||||
template <class T, int maxSize>
|
||||
class Stack {
|
||||
public:
|
||||
Stack() : m_nextItem(m_stack) {};
|
||||
int push(T item) {
|
||||
assert(m_nextItem >= m_stack);
|
||||
if (m_nextItem >= m_stack + maxSize) {
|
||||
return -1;
|
||||
}
|
||||
*m_nextItem = item;
|
||||
m_nextItem++;
|
||||
return 0;
|
||||
}
|
||||
T pop() {
|
||||
assert(m_nextItem > m_stack);
|
||||
m_nextItem--;
|
||||
return *m_nextItem;
|
||||
}
|
||||
T top() const {
|
||||
assert(m_nextItem > m_stack);
|
||||
assert(m_nextItem < m_stack + maxSize);
|
||||
return *(m_nextItem-1);
|
||||
}
|
||||
bool isEmpty() const { return (m_stack == m_nextItem); };
|
||||
int size() const { return m_nextItem - m_stack; }
|
||||
private:
|
||||
T m_stack[maxSize];
|
||||
T * m_nextItem;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,16 +1,17 @@
|
||||
#ifndef POINCARE_PARSING_TOKEN_H
|
||||
#define POINCARE_PARSING_TOKEN_H
|
||||
|
||||
#include <poincare/expression.h>
|
||||
#include <poincare/expression.h> // size_t
|
||||
|
||||
namespace Poincare {
|
||||
|
||||
class Token {
|
||||
public:
|
||||
enum class Type : uint8_t { // Ordered from lower to higher precedence
|
||||
enum class Type {
|
||||
// Ordered from lower to higher precedence to make parser's job easier
|
||||
EndOfStream, // Must be the first
|
||||
Equal,
|
||||
Sto,
|
||||
Store,
|
||||
RightBracket,
|
||||
LeftBracket,
|
||||
RightBrace,
|
||||
@@ -18,13 +19,11 @@ public:
|
||||
RightParenthesis,
|
||||
LeftParenthesis,
|
||||
Plus,
|
||||
Minus, // Subtraction
|
||||
Minus,
|
||||
Times,
|
||||
Slash,
|
||||
// Opposite, unary
|
||||
Power,
|
||||
SquareRoot,
|
||||
// Implicit times: see Parser::reduce()
|
||||
Bang,
|
||||
Number,
|
||||
Identifier,
|
||||
@@ -32,7 +31,7 @@ public:
|
||||
Undefined
|
||||
};
|
||||
|
||||
Token(Type type = Type::Undefined) : m_type(type), m_tag(0) {};
|
||||
Token(Type type = Type::Undefined) : m_type(type) {};
|
||||
|
||||
Type type() const { return m_type; }
|
||||
bool is(Type t) const { return m_type == t; }
|
||||
@@ -40,17 +39,18 @@ public:
|
||||
bool isLeftGroupingToken() const {
|
||||
return is(Type::LeftBracket) || is(Type::LeftParenthesis) || is(Type::LeftBrace);
|
||||
}
|
||||
|
||||
uint8_t tag() const { return m_tag; }
|
||||
void setTag(uint8_t t) { m_tag = t; }
|
||||
|
||||
Expression expression() const { return m_expression; }
|
||||
void setExpression(Expression expression) { m_expression = expression; }
|
||||
bool isRightGroupingToken() const {
|
||||
return is(Type::RightBracket) || is(Type::RightParenthesis) || is(Type::RightBrace);
|
||||
}
|
||||
const char * text() const { return m_text; }
|
||||
void setText(const char * text) { m_text = text; }
|
||||
size_t length() const { return m_length; }
|
||||
void setLength(size_t length) { m_length = length; }
|
||||
|
||||
private:
|
||||
Type m_type;
|
||||
uint8_t m_tag;
|
||||
Expression m_expression;
|
||||
const char * m_text;
|
||||
size_t m_length;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
#include "tokenizer.h"
|
||||
|
||||
#include <poincare/rational.h> // FIXME: Change me for Number
|
||||
#include <poincare/symbol.h>
|
||||
|
||||
namespace Poincare {
|
||||
|
||||
bool Tokenizer::canPopChar(char c) {
|
||||
@@ -27,7 +24,7 @@ Token Tokenizer::popNumber() {
|
||||
const char * integerPartText = m_text;
|
||||
size_t integerPartLength = popInteger();
|
||||
|
||||
const char * decimalPartText = m_text;
|
||||
/* const char * decimalPartText = m_text;
|
||||
size_t decimalPartLength = 0;
|
||||
if (canPopChar('.')) {
|
||||
decimalPartLength = popInteger();
|
||||
@@ -49,9 +46,11 @@ Token Tokenizer::popNumber() {
|
||||
}
|
||||
|
||||
Token result(Token::Type::Number);
|
||||
//result.setExpression(Number(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
|
||||
// FIXME!!!!
|
||||
result.setExpression(Rational(1));
|
||||
//TODO result.setExpression(Number(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
|
||||
return result;*/
|
||||
Token result(Token::Type::Number);
|
||||
result.setText(integerPartText);
|
||||
result.setLength(integerPartLength);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -68,8 +67,7 @@ Token Tokenizer::popIdentifier() {
|
||||
c = popChar();
|
||||
}
|
||||
Token result(Token::Type::Identifier);
|
||||
//result.setExpression(Identifier(text, length));
|
||||
result.setExpression(Symbol('a')); // FIXME
|
||||
//TODO result.setExpression(Identifier(text, length));
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -118,7 +116,7 @@ Token Tokenizer::popToken() { // associative array?
|
||||
return Token(Token::Type::Number);
|
||||
}
|
||||
if (canPopChar('\x90')) {
|
||||
return Token(Token::Type::Sto);
|
||||
return Token(Token::Type::Store);
|
||||
}
|
||||
if (canPopChar('\x91')) {
|
||||
return Token(Token::Type::SquareRoot);
|
||||
@@ -129,7 +127,7 @@ Token Tokenizer::popToken() { // associative array?
|
||||
if (isLetter(c)) {
|
||||
return popIdentifier();
|
||||
}
|
||||
return Token();
|
||||
return Token(); // TODO error
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -9,14 +9,14 @@ class Tokenizer {
|
||||
public:
|
||||
Tokenizer(const char * input) : m_text(input) {};
|
||||
Token popToken();
|
||||
const char * text() const { return m_text; };
|
||||
private:
|
||||
const char popChar() { return *++m_text; };
|
||||
const char currentChar() const { return *m_text; };
|
||||
const char popChar() { return *++m_text; }
|
||||
const char currentChar() const { return *m_text; }
|
||||
bool canPopChar(char c);
|
||||
size_t popInteger();
|
||||
Token popNumber();
|
||||
Token popIdentifier();
|
||||
|
||||
const char * m_text;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user