[poincare] Use a recursive-descent-alike parser

This commit is contained in:
Ruben Dashyan
2018-10-02 10:35:14 +02:00
committed by Émilie Feral
parent d0f2a54d6d
commit de0efe69f2
5 changed files with 190 additions and 143 deletions

View File

@@ -2,115 +2,155 @@
namespace Poincare {
Expression Parser::parseNumber(Expression leftHandSide) {
assert(leftHandSide.isUninitialized());
return m_currentToken.expression();
}
Expression Parser::parsePlus(Expression leftHandSide) {
assert(!leftHandSide.isUninitialized());
return Addition(leftHandSide, parseUntil(Token::Type::Plus)); // Addition is left-associative.
}
Expression Parser::parseTimes(Expression leftHandSide) {
assert(!leftHandSide.isUninitialized());
return Multiplication(leftHandSide, parseUntil(Token::Type::Times)); // Multiplication is left-associative.
}
Expression Parser::parseSlash(Expression leftHandSide) {
assert(!leftHandSide.isUninitialized());
return Division(leftHandSide, parseUntil(Token::Type::Power)); // Division is left-associative.
}
Expression Parser::parseMinus(Expression leftHandSide) {
if (leftHandSide.isUninitialized()) {
return Opposite(parseUntil(Token::Type::Times));
} else {
return Subtraction(leftHandSide, parseUntil(Token::Type::Minus)); // Subtraction is left-associative.
}
}
Expression Parser::parsePower(Expression leftHandSide) {
assert(!leftHandSide.isUninitialized());
return Power(leftHandSide, parseUntil(Token::Type::Power)); // Power is right-associative
}
Expression Parser::parseLeftParenthesis(Expression leftHandSide) {
assert(leftHandSide.isUninitialized());
Expression rightHandSide = parseUntil(Token::Type::RightParenthesis);
assert(m_nextToken.type() == Token::Type::RightParenthesis);
m_currentToken = m_nextToken;
m_nextToken = popToken();
return Parenthesis(rightHandSide);
}
Expression Parser::parseSquareRoot(Expression leftHandSide) {
assert(leftHandSide.isUninitialized());
return SquareRoot(parseUntil(Token::Type::Bang)); // FIXME what is the precedence of SquareRoot?
}
Expression Parser::parseBang(Expression leftHandSide) {
assert(!leftHandSide.isUninitialized());
return Factorial(leftHandSide);
}
/*Expression Parser::parseIdentifier() {
* Identifier, Symbol, Comma
* If an Identifier token is not followed by a LeftParenthesis
* Symbol(const char name);
* It must have length 1
* Otherwise
* Function()? *
return leftHandSide; // FIXME
}*/
Expression Parser::parseEqual(Expression leftHandSide) {
assert(!leftHandSide.isUninitialized());
return Equal(leftHandSide, parseUntil(Token::Type::Equal));
}
/*Expression Parser::parseStore(Expression leftHandSide) {
assert(!leftHandSide.isUninitialized());
Expression symbol = parseIdentifier(leftHandSide); // FIXME Symbol
// TODO assert(m_nextToken == EndOfStream);
return Store(leftHandSide, static_cast<Symbol>(symbol));
}*/
Expression Parser::noParse(Expression leftHandSide) { // FIXME nullptr?
return leftHandSide;
}
typedef Expression (Parser::*TokenParser)(Expression leftHandSide);
TokenParser tokenParsers[] = {
&Parser::noParse, //EndOfStream
&Parser::parseEqual,
&Parser::noParse, //Store, FIXME
&Parser::noParse, //RightBracket,
&Parser::noParse, //RightBrace,
&Parser::noParse, //RightParenthesis,
&Parser::parsePlus,
&Parser::parseMinus,
&Parser::parseTimes,
&Parser::parseSlash,
&Parser::parsePower,
&Parser::parseSquareRoot,
&Parser::parseBang,
&Parser::noParse, //LeftBracket, FIXME
&Parser::noParse, //LeftBrace, FIXME
&Parser::parseLeftParenthesis,
&Parser::parseNumber,
&Parser::noParse, //Identifier, FIXME
&Parser::noParse, //Comma, FIXME
&Parser::noParse //Undefined
};
Expression Parser::parse() {
return shift(Expression(), popToken(), Token::Type::EndOfStream);
return parseUntil(Token::Type::EndOfStream);
}
Expression Parser::parseUntil(Token::Type stoppingType) {
Expression leftHandSide;
while (canPopToken(stoppingType)) {
leftHandSide = (this->*(tokenParsers[static_cast<int>(m_currentToken.type())]))(leftHandSide);
}
assert(!leftHandSide.isUninitialized());
return leftHandSide;
}
static inline bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2) {
return
(t1 == Token::Type::RightParenthesis || t1 == Token::Type::Number || t1 == Token::Type::Identifier)
&&
(t2 == Token::Type::LeftParenthesis || t2 == Token::Type::Number || t2 == Token::Type::Identifier)
;
(t2 == Token::Type::LeftParenthesis || t2 == Token::Type::Number || t2 == Token::Type::Identifier || t2 == Token::Type::SquareRoot);
//TODO if (t1 == Token::Type::Identifier && t2 == Token::Type::LeftParenthesis) t1 should be parsed as a function
}
Expression Parser::shift(Expression leftHandSide, Token currentToken, Token::Type stoppingType) {
if (currentToken.type() == Token::Type::EndOfStream) {
return leftHandSide;
}
Token nextToken = popToken();
if (tokenTypesCanBeImplicitlyMultiplied(currentToken.type(), nextToken.type())) {
// TODO implicit multiplication
}
// If currentToken ... FIXME trouver une bonne description: constitutes an operand?
if (currentToken.type() == Token::Type::Number) {
//assert(leftHandSide.isUndefined());
return shift(Number::ParseDigits(currentToken.text(), currentToken.length()), nextToken, stoppingType);
}
/*if (currentToken.isLeftGroupingToken()) {
assert(leftHandSide.isUndefined());
return shift(Parenthesis(), nextToken, Token::Type::RightParenthesis); // FIXME grouping tokens
}*/
// If currentToken does not require rightHandSide
if (currentToken.type() == Token::Type::Bang) {
return shift(Factorial(leftHandSide), nextToken);
}
/*if (currentToken.type() == Token::Type::Identifier) {
if (nextToken.type() == Token::Type::LeftParenthesis) {
// FIXME return shift(Function(rightHandSide), nextToken);
} else {
return shift(Identifier(currentToken.text(), currentToken.length()), nextToken);
}
}*/
// If currentToken requires a rightHandSide expression
// First, build rightHandSide
Expression rightHandSide = shift(Expression(), nextToken, currentToken.type());
if (comparePrecedence(currentToken, stoppingType)) {
return leftHandSide;
}
// Then construct the whole expression and continue
if (currentToken.type() == Token::Type::Plus) {
return shift(Addition(leftHandSide, rightHandSide), nextToken, stoppingType);
}
if (currentToken.type() == Token::Type::Minus) {
if (leftHandSide.isUndefined()) {
return shift(Opposite(rightHandSide), nextToken, stoppingType);
} else {
return shift(Subtraction(leftHandSide, rightHandSide), nextToken, stoppingType);
}
}
if (currentToken.type() == Token::Type::Times) {
return shift(Multiplication(leftHandSide, rightHandSide), nextToken);
}
if (currentToken.type() == Token::Type::Slash) {
return shift(Division(leftHandSide, rightHandSide), nextToken);
}
if (currentToken.type() == Token::Type::Power) {
return shift(Power(leftHandSide, rightHandSide), nextToken);
}
if (currentToken.type() == Token::Type::SquareRoot) {
//assert(leftHandSide.isUndefined());
return shift(SquareRoot(rightHandSide), nextToken);
}
// TODO remaining tokens: comma, equal, store
static inline bool comparePrecedence(Token::Type nextTokenType, Token::Type stoppingType) {
// if (stoppingType == EndOfStream) return nextTokenType > EndOfStream
// if (stoppingType == RightParenthesis) return nextTokenType > RightParenthesis
// if (stoppingType == Plus) return nextTokenType > Plus
// if (stoppingType == Times) return nextTokenType > Times
// if (stoppingType == Power) return nextTokenType >= Power // >= makes the operator right-associative
// EndOfStream < RightParenthesis < Plus < Times < Power
return ((nextTokenType > stoppingType) ||
(nextTokenType == stoppingType &&
(stoppingType == Token::Type::Power)
) &&
(nextTokenType != Token::Type::EndOfStream));
}
bool Parser::comparePrecedence(Token currentToken, Token::Type stoppingType) const {
/* Returns true if nextToken is of higher precedence than currentToken TODO */
return
// First, parse what is after the left grouping token
// until the corresponding right grouping token appears
/*!nextToken.isLeftGroupingToken()
&&
// If nextToken is an unary minus
!(nextToken.is(Token::Type::Minus)
&&
TokenHasTag(nextToken, TokenTag::UnaryMinus))
&&
(
(
currentToken.is(Token::Type::Minus) &&
TokenHasTag(currentToken, TokenTag::UnaryMinus) &&
(nextToken.type() < Token::Type::Power)
) ||*/
// A token with higher precedence should be reduced first
(currentToken.type() > stoppingType) /*||
// Handle left-associative operators
(
(currentToken.type() == nextToken.type()) &&
TokenIsLeftAssociative(currentToken)
)
)*/
;
bool Parser::canPopToken(Token::Type stoppingType) {
if (tokenTypesCanBeImplicitlyMultiplied(m_currentToken.type(), m_nextToken.type())) {
m_currentToken = Token(Token::Type::Times);
return true;
}
if (comparePrecedence(m_nextToken.type(), stoppingType)) {
m_currentToken = m_nextToken;
m_nextToken = popToken();
return true;
}
return false;
}
}

View File

@@ -2,29 +2,49 @@
#define POINCARE_PARSING_PARSER_H
#include "tokenizer.h"
#include <poincare/expression.h>
#include <poincare/addition.h>
#include <poincare/division.h>
#include <poincare/equal.h>
#include <poincare/factorial.h>
#include <poincare/multiplication.h>
#include <poincare/opposite.h>
#include <poincare/parenthesis.h>
#include <poincare/number.h>
#include <poincare/power.h>
#include <poincare/square_root.h>
#include <poincare/store.h>
#include <poincare/subtraction.h>
#include <poincare/symbol.h>
// matrix ? with brackets
// braces ?
namespace Poincare {
class Parser : public Tokenizer {
public:
Parser(const char * input) : Tokenizer(input) {}
Parser(const char * input) :
Tokenizer(input),
m_currentToken(Token(Token::Type::Undefined)),
m_nextToken(popToken()) {}
Expression parse();
Expression parseNumber(Expression leftHandSide);
Expression parsePlus(Expression leftHandSide);
Expression parseTimes(Expression leftHandSide);
Expression parseSlash(Expression leftHandSide);
Expression parseMinus(Expression leftHandSide);
Expression parsePower(Expression leftHandSide);
Expression parseLeftParenthesis(Expression leftHandSide);
Expression parseSquareRoot(Expression leftHandSide);
Expression parseBang(Expression leftHandSide);
Expression parseEqual(Expression leftHandSide);
Expression noParse(Expression leftHandSide);
private:
Expression shift(Expression leftHandSide, Token lookahead, Token::Type stoppingType = Token::Type::EndOfStream);
bool comparePrecedence(Token currentToken, Token::Type stoppingType) const;
Expression parseUntil(Token::Type stoppingType);
bool canPopToken(Token::Type stoppingType);
Token m_currentToken;
Token m_nextToken;
};
}

View File

@@ -13,11 +13,8 @@ public:
Equal,
Store,
RightBracket,
LeftBracket,
RightBrace,
LeftBrace,
RightParenthesis,
LeftParenthesis,
Plus,
Minus,
Times,
@@ -25,6 +22,9 @@ public:
Power,
SquareRoot,
Bang,
LeftBracket,
LeftBrace,
LeftParenthesis,
Number,
Identifier,
Comma,
@@ -36,21 +36,11 @@ public:
Type type() const { return m_type; }
bool is(Type t) const { return m_type == t; }
bool isEndOfStream() const { return is(Type::EndOfStream); }
bool isLeftGroupingToken() const {
return is(Type::LeftBracket) || is(Type::LeftParenthesis) || is(Type::LeftBrace);
}
bool isRightGroupingToken() const {
return is(Type::RightBracket) || is(Type::RightParenthesis) || is(Type::RightBrace);
}
const char * text() const { return m_text; }
void setText(const char * text) { m_text = text; }
size_t length() const { return m_length; }
void setLength(size_t length) { m_length = length; }
Expression expression() const { return m_expression; }
void setExpression(Expression e) { m_expression = e; }
private:
Type m_type;
const char * m_text;
size_t m_length;
Expression m_expression;
};
}

View File

@@ -10,7 +10,7 @@ bool Tokenizer::canPopChar(char c) {
return false;
}
size_t Tokenizer::popInteger() {
size_t Tokenizer::popDigits() {
size_t length = 0;
char c = currentChar();
while (c >= '0' && c <= '9') {
@@ -22,12 +22,13 @@ size_t Tokenizer::popInteger() {
Token Tokenizer::popNumber() {
const char * integerPartText = m_text;
size_t integerPartLength = popInteger();
size_t integerPartLength = popDigits();
/* const char * decimalPartText = m_text;
const char * decimalPartText = m_text;
size_t decimalPartLength = 0;
if (canPopChar('.')) {
decimalPartLength = popInteger();
decimalPartText = m_text;
decimalPartLength = popDigits();
}
if (integerPartLength == 0 && decimalPartLength == 0) {
@@ -39,18 +40,14 @@ Token Tokenizer::popNumber() {
bool exponentIsNegative = false;
if (canPopChar('e')) {
exponentIsNegative = canPopChar('-');
exponentPartLength = popInteger();
exponentPartLength = popDigits();
if (exponentPartLength == 0) {
return Token();
}
}
Token result(Token::Type::Number);
//TODO result.setExpression(Number(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
return result;*/
Token result(Token::Type::Number);
result.setText(integerPartText);
result.setLength(integerPartLength);
result.setExpression(Number::ParseNumber(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
return result;
}
@@ -59,7 +56,6 @@ static inline bool isLetter(char c) {
}
Token Tokenizer::popIdentifier() {
const char * text = m_text;
size_t length = 0;
char c = currentChar();
while (isLetter(c)) {
@@ -71,15 +67,18 @@ Token Tokenizer::popIdentifier() {
return result;
}
Token Tokenizer::popToken() { // associative array?
Token Tokenizer::popToken() {
const char c = currentChar();
if (canPopChar(0)) {
return Token(Token::Type::EndOfStream);
if ((c == '.') || (c >= '0' && c <= '9')) {
return popNumber();
}
if (isLetter(c)) {
return popIdentifier();
}
if (canPopChar('!')) {
return Token(Token::Type::Bang);
}
if (c >= '(' && (c <= '/' && c != '.')) {
if (c >= '(' && c <= '/' && c != '.') {
Token::Type typeForChar[] = {
Token::Type::LeftParenthesis,
Token::Type::RightParenthesis,
@@ -112,20 +111,17 @@ Token Tokenizer::popToken() { // associative array?
if (canPopChar('}')) {
return Token(Token::Type::RightBrace);
}
if (canPopChar('\x89')) {
if (canPopChar('\x89')) { // Ion::Charset::SmallPi
return Token(Token::Type::Number);
}
if (canPopChar('\x90')) {
if (canPopChar('\x90')) { // Ion::Charset::Store
return Token(Token::Type::Store);
}
if (canPopChar('\x91')) {
if (canPopChar('\x91')) { // Ion::Charset::Root
return Token(Token::Type::SquareRoot);
}
if ((c == '.') || (c >= '0' && c <= '9')) {
return popNumber();
}
if (isLetter(c)) {
return popIdentifier();
if (canPopChar(0)) {
return Token(Token::Type::EndOfStream);
}
return Token(); // TODO error
}

View File

@@ -2,6 +2,7 @@
#define POINCARE_PARSING_TOKENIZER_H
#include "token.h"
#include <poincare/number.h>
namespace Poincare {
@@ -13,7 +14,7 @@ private:
const char popChar() { return *++m_text; }
const char currentChar() const { return *m_text; }
bool canPopChar(char c);
size_t popInteger();
size_t popDigits();
Token popNumber();
Token popIdentifier();