mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-03-19 05:40:38 +01:00
[poincare] Use a recursive-descent-alike parser
This commit is contained in:
committed by
Émilie Feral
parent
d0f2a54d6d
commit
de0efe69f2
@@ -2,115 +2,155 @@
|
||||
|
||||
namespace Poincare {
|
||||
|
||||
Expression Parser::parseNumber(Expression leftHandSide) {
|
||||
assert(leftHandSide.isUninitialized());
|
||||
return m_currentToken.expression();
|
||||
}
|
||||
|
||||
Expression Parser::parsePlus(Expression leftHandSide) {
|
||||
assert(!leftHandSide.isUninitialized());
|
||||
return Addition(leftHandSide, parseUntil(Token::Type::Plus)); // Addition is left-associative.
|
||||
}
|
||||
|
||||
Expression Parser::parseTimes(Expression leftHandSide) {
|
||||
assert(!leftHandSide.isUninitialized());
|
||||
return Multiplication(leftHandSide, parseUntil(Token::Type::Times)); // Multiplication is left-associative.
|
||||
}
|
||||
|
||||
Expression Parser::parseSlash(Expression leftHandSide) {
|
||||
assert(!leftHandSide.isUninitialized());
|
||||
return Division(leftHandSide, parseUntil(Token::Type::Power)); // Division is left-associative.
|
||||
}
|
||||
|
||||
Expression Parser::parseMinus(Expression leftHandSide) {
|
||||
if (leftHandSide.isUninitialized()) {
|
||||
return Opposite(parseUntil(Token::Type::Times));
|
||||
} else {
|
||||
return Subtraction(leftHandSide, parseUntil(Token::Type::Minus)); // Subtraction is left-associative.
|
||||
}
|
||||
}
|
||||
|
||||
Expression Parser::parsePower(Expression leftHandSide) {
|
||||
assert(!leftHandSide.isUninitialized());
|
||||
return Power(leftHandSide, parseUntil(Token::Type::Power)); // Power is right-associative
|
||||
}
|
||||
|
||||
Expression Parser::parseLeftParenthesis(Expression leftHandSide) {
|
||||
assert(leftHandSide.isUninitialized());
|
||||
Expression rightHandSide = parseUntil(Token::Type::RightParenthesis);
|
||||
assert(m_nextToken.type() == Token::Type::RightParenthesis);
|
||||
m_currentToken = m_nextToken;
|
||||
m_nextToken = popToken();
|
||||
return Parenthesis(rightHandSide);
|
||||
}
|
||||
|
||||
Expression Parser::parseSquareRoot(Expression leftHandSide) {
|
||||
assert(leftHandSide.isUninitialized());
|
||||
return SquareRoot(parseUntil(Token::Type::Bang)); // FIXME what is the precedence of SquareRoot?
|
||||
}
|
||||
|
||||
Expression Parser::parseBang(Expression leftHandSide) {
|
||||
assert(!leftHandSide.isUninitialized());
|
||||
return Factorial(leftHandSide);
|
||||
}
|
||||
|
||||
/*Expression Parser::parseIdentifier() {
|
||||
* Identifier, Symbol, Comma
|
||||
* If an Identifier token is not followed by a LeftParenthesis
|
||||
* Symbol(const char name);
|
||||
* It must have length 1
|
||||
* Otherwise
|
||||
* Function()? *
|
||||
return leftHandSide; // FIXME
|
||||
}*/
|
||||
|
||||
Expression Parser::parseEqual(Expression leftHandSide) {
|
||||
assert(!leftHandSide.isUninitialized());
|
||||
return Equal(leftHandSide, parseUntil(Token::Type::Equal));
|
||||
}
|
||||
|
||||
/*Expression Parser::parseStore(Expression leftHandSide) {
|
||||
assert(!leftHandSide.isUninitialized());
|
||||
Expression symbol = parseIdentifier(leftHandSide); // FIXME Symbol
|
||||
// TODO assert(m_nextToken == EndOfStream);
|
||||
return Store(leftHandSide, static_cast<Symbol>(symbol));
|
||||
}*/
|
||||
|
||||
Expression Parser::noParse(Expression leftHandSide) { // FIXME nullptr?
|
||||
return leftHandSide;
|
||||
}
|
||||
|
||||
typedef Expression (Parser::*TokenParser)(Expression leftHandSide);
|
||||
|
||||
TokenParser tokenParsers[] = {
|
||||
&Parser::noParse, //EndOfStream
|
||||
&Parser::parseEqual,
|
||||
&Parser::noParse, //Store, FIXME
|
||||
&Parser::noParse, //RightBracket,
|
||||
&Parser::noParse, //RightBrace,
|
||||
&Parser::noParse, //RightParenthesis,
|
||||
&Parser::parsePlus,
|
||||
&Parser::parseMinus,
|
||||
&Parser::parseTimes,
|
||||
&Parser::parseSlash,
|
||||
&Parser::parsePower,
|
||||
&Parser::parseSquareRoot,
|
||||
&Parser::parseBang,
|
||||
&Parser::noParse, //LeftBracket, FIXME
|
||||
&Parser::noParse, //LeftBrace, FIXME
|
||||
&Parser::parseLeftParenthesis,
|
||||
&Parser::parseNumber,
|
||||
&Parser::noParse, //Identifier, FIXME
|
||||
&Parser::noParse, //Comma, FIXME
|
||||
&Parser::noParse //Undefined
|
||||
};
|
||||
|
||||
Expression Parser::parse() {
|
||||
return shift(Expression(), popToken(), Token::Type::EndOfStream);
|
||||
return parseUntil(Token::Type::EndOfStream);
|
||||
}
|
||||
|
||||
Expression Parser::parseUntil(Token::Type stoppingType) {
|
||||
Expression leftHandSide;
|
||||
while (canPopToken(stoppingType)) {
|
||||
leftHandSide = (this->*(tokenParsers[static_cast<int>(m_currentToken.type())]))(leftHandSide);
|
||||
}
|
||||
assert(!leftHandSide.isUninitialized());
|
||||
return leftHandSide;
|
||||
}
|
||||
|
||||
static inline bool tokenTypesCanBeImplicitlyMultiplied(Token::Type t1, Token::Type t2) {
|
||||
return
|
||||
(t1 == Token::Type::RightParenthesis || t1 == Token::Type::Number || t1 == Token::Type::Identifier)
|
||||
&&
|
||||
(t2 == Token::Type::LeftParenthesis || t2 == Token::Type::Number || t2 == Token::Type::Identifier)
|
||||
;
|
||||
(t2 == Token::Type::LeftParenthesis || t2 == Token::Type::Number || t2 == Token::Type::Identifier || t2 == Token::Type::SquareRoot);
|
||||
//TODO if (t1 == Token::Type::Identifier && t2 == Token::Type::LeftParenthesis) t1 should be parsed as a function
|
||||
}
|
||||
|
||||
Expression Parser::shift(Expression leftHandSide, Token currentToken, Token::Type stoppingType) {
|
||||
|
||||
if (currentToken.type() == Token::Type::EndOfStream) {
|
||||
return leftHandSide;
|
||||
}
|
||||
|
||||
Token nextToken = popToken();
|
||||
|
||||
if (tokenTypesCanBeImplicitlyMultiplied(currentToken.type(), nextToken.type())) {
|
||||
// TODO implicit multiplication
|
||||
}
|
||||
|
||||
// If currentToken ... FIXME trouver une bonne description: constitutes an operand?
|
||||
if (currentToken.type() == Token::Type::Number) {
|
||||
//assert(leftHandSide.isUndefined());
|
||||
return shift(Number::ParseDigits(currentToken.text(), currentToken.length()), nextToken, stoppingType);
|
||||
}
|
||||
/*if (currentToken.isLeftGroupingToken()) {
|
||||
assert(leftHandSide.isUndefined());
|
||||
return shift(Parenthesis(), nextToken, Token::Type::RightParenthesis); // FIXME grouping tokens
|
||||
}*/
|
||||
|
||||
// If currentToken does not require rightHandSide
|
||||
if (currentToken.type() == Token::Type::Bang) {
|
||||
return shift(Factorial(leftHandSide), nextToken);
|
||||
}
|
||||
/*if (currentToken.type() == Token::Type::Identifier) {
|
||||
if (nextToken.type() == Token::Type::LeftParenthesis) {
|
||||
// FIXME return shift(Function(rightHandSide), nextToken);
|
||||
} else {
|
||||
return shift(Identifier(currentToken.text(), currentToken.length()), nextToken);
|
||||
}
|
||||
}*/
|
||||
|
||||
// If currentToken requires a rightHandSide expression
|
||||
|
||||
// First, build rightHandSide
|
||||
Expression rightHandSide = shift(Expression(), nextToken, currentToken.type());
|
||||
if (comparePrecedence(currentToken, stoppingType)) {
|
||||
return leftHandSide;
|
||||
}
|
||||
|
||||
// Then construct the whole expression and continue
|
||||
if (currentToken.type() == Token::Type::Plus) {
|
||||
return shift(Addition(leftHandSide, rightHandSide), nextToken, stoppingType);
|
||||
}
|
||||
if (currentToken.type() == Token::Type::Minus) {
|
||||
if (leftHandSide.isUndefined()) {
|
||||
return shift(Opposite(rightHandSide), nextToken, stoppingType);
|
||||
} else {
|
||||
return shift(Subtraction(leftHandSide, rightHandSide), nextToken, stoppingType);
|
||||
}
|
||||
}
|
||||
if (currentToken.type() == Token::Type::Times) {
|
||||
return shift(Multiplication(leftHandSide, rightHandSide), nextToken);
|
||||
}
|
||||
if (currentToken.type() == Token::Type::Slash) {
|
||||
return shift(Division(leftHandSide, rightHandSide), nextToken);
|
||||
}
|
||||
if (currentToken.type() == Token::Type::Power) {
|
||||
return shift(Power(leftHandSide, rightHandSide), nextToken);
|
||||
}
|
||||
if (currentToken.type() == Token::Type::SquareRoot) {
|
||||
//assert(leftHandSide.isUndefined());
|
||||
return shift(SquareRoot(rightHandSide), nextToken);
|
||||
}
|
||||
|
||||
// TODO remaining tokens: comma, equal, store
|
||||
static inline bool comparePrecedence(Token::Type nextTokenType, Token::Type stoppingType) {
|
||||
// if (stoppingType == EndOfStream) return nextTokenType > EndOfStream
|
||||
// if (stoppingType == RightParenthesis) return nextTokenType > RightParenthesis
|
||||
// if (stoppingType == Plus) return nextTokenType > Plus
|
||||
// if (stoppingType == Times) return nextTokenType > Times
|
||||
// if (stoppingType == Power) return nextTokenType >= Power // >= makes the operator right-associative
|
||||
// EndOfStream < RightParenthesis < Plus < Times < Power
|
||||
return ((nextTokenType > stoppingType) ||
|
||||
(nextTokenType == stoppingType &&
|
||||
(stoppingType == Token::Type::Power)
|
||||
) &&
|
||||
(nextTokenType != Token::Type::EndOfStream));
|
||||
}
|
||||
|
||||
bool Parser::comparePrecedence(Token currentToken, Token::Type stoppingType) const {
|
||||
/* Returns true if nextToken is of higher precedence than currentToken TODO */
|
||||
return
|
||||
// First, parse what is after the left grouping token
|
||||
// until the corresponding right grouping token appears
|
||||
/*!nextToken.isLeftGroupingToken()
|
||||
&&
|
||||
// If nextToken is an unary minus
|
||||
!(nextToken.is(Token::Type::Minus)
|
||||
&&
|
||||
TokenHasTag(nextToken, TokenTag::UnaryMinus))
|
||||
&&
|
||||
(
|
||||
(
|
||||
currentToken.is(Token::Type::Minus) &&
|
||||
TokenHasTag(currentToken, TokenTag::UnaryMinus) &&
|
||||
(nextToken.type() < Token::Type::Power)
|
||||
) ||*/
|
||||
// A token with higher precedence should be reduced first
|
||||
(currentToken.type() > stoppingType) /*||
|
||||
// Handle left-associative operators
|
||||
(
|
||||
(currentToken.type() == nextToken.type()) &&
|
||||
TokenIsLeftAssociative(currentToken)
|
||||
)
|
||||
)*/
|
||||
;
|
||||
bool Parser::canPopToken(Token::Type stoppingType) {
|
||||
if (tokenTypesCanBeImplicitlyMultiplied(m_currentToken.type(), m_nextToken.type())) {
|
||||
m_currentToken = Token(Token::Type::Times);
|
||||
return true;
|
||||
}
|
||||
if (comparePrecedence(m_nextToken.type(), stoppingType)) {
|
||||
m_currentToken = m_nextToken;
|
||||
m_nextToken = popToken();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -2,29 +2,49 @@
|
||||
#define POINCARE_PARSING_PARSER_H
|
||||
|
||||
#include "tokenizer.h"
|
||||
#include <poincare/expression.h>
|
||||
|
||||
#include <poincare/addition.h>
|
||||
#include <poincare/division.h>
|
||||
#include <poincare/equal.h>
|
||||
#include <poincare/factorial.h>
|
||||
#include <poincare/multiplication.h>
|
||||
#include <poincare/opposite.h>
|
||||
#include <poincare/parenthesis.h>
|
||||
#include <poincare/number.h>
|
||||
#include <poincare/power.h>
|
||||
#include <poincare/square_root.h>
|
||||
#include <poincare/store.h>
|
||||
#include <poincare/subtraction.h>
|
||||
#include <poincare/symbol.h>
|
||||
// matrix ? with brackets
|
||||
// braces ?
|
||||
|
||||
namespace Poincare {
|
||||
|
||||
class Parser : public Tokenizer {
|
||||
public:
|
||||
Parser(const char * input) : Tokenizer(input) {}
|
||||
Parser(const char * input) :
|
||||
Tokenizer(input),
|
||||
m_currentToken(Token(Token::Type::Undefined)),
|
||||
m_nextToken(popToken()) {}
|
||||
Expression parse();
|
||||
|
||||
Expression parseNumber(Expression leftHandSide);
|
||||
Expression parsePlus(Expression leftHandSide);
|
||||
Expression parseTimes(Expression leftHandSide);
|
||||
Expression parseSlash(Expression leftHandSide);
|
||||
Expression parseMinus(Expression leftHandSide);
|
||||
Expression parsePower(Expression leftHandSide);
|
||||
Expression parseLeftParenthesis(Expression leftHandSide);
|
||||
Expression parseSquareRoot(Expression leftHandSide);
|
||||
Expression parseBang(Expression leftHandSide);
|
||||
Expression parseEqual(Expression leftHandSide);
|
||||
Expression noParse(Expression leftHandSide);
|
||||
private:
|
||||
Expression shift(Expression leftHandSide, Token lookahead, Token::Type stoppingType = Token::Type::EndOfStream);
|
||||
bool comparePrecedence(Token currentToken, Token::Type stoppingType) const;
|
||||
Expression parseUntil(Token::Type stoppingType);
|
||||
bool canPopToken(Token::Type stoppingType);
|
||||
|
||||
Token m_currentToken;
|
||||
Token m_nextToken;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -13,11 +13,8 @@ public:
|
||||
Equal,
|
||||
Store,
|
||||
RightBracket,
|
||||
LeftBracket,
|
||||
RightBrace,
|
||||
LeftBrace,
|
||||
RightParenthesis,
|
||||
LeftParenthesis,
|
||||
Plus,
|
||||
Minus,
|
||||
Times,
|
||||
@@ -25,6 +22,9 @@ public:
|
||||
Power,
|
||||
SquareRoot,
|
||||
Bang,
|
||||
LeftBracket,
|
||||
LeftBrace,
|
||||
LeftParenthesis,
|
||||
Number,
|
||||
Identifier,
|
||||
Comma,
|
||||
@@ -36,21 +36,11 @@ public:
|
||||
Type type() const { return m_type; }
|
||||
bool is(Type t) const { return m_type == t; }
|
||||
bool isEndOfStream() const { return is(Type::EndOfStream); }
|
||||
bool isLeftGroupingToken() const {
|
||||
return is(Type::LeftBracket) || is(Type::LeftParenthesis) || is(Type::LeftBrace);
|
||||
}
|
||||
bool isRightGroupingToken() const {
|
||||
return is(Type::RightBracket) || is(Type::RightParenthesis) || is(Type::RightBrace);
|
||||
}
|
||||
const char * text() const { return m_text; }
|
||||
void setText(const char * text) { m_text = text; }
|
||||
size_t length() const { return m_length; }
|
||||
void setLength(size_t length) { m_length = length; }
|
||||
|
||||
Expression expression() const { return m_expression; }
|
||||
void setExpression(Expression e) { m_expression = e; }
|
||||
private:
|
||||
Type m_type;
|
||||
const char * m_text;
|
||||
size_t m_length;
|
||||
Expression m_expression;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ bool Tokenizer::canPopChar(char c) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t Tokenizer::popInteger() {
|
||||
size_t Tokenizer::popDigits() {
|
||||
size_t length = 0;
|
||||
char c = currentChar();
|
||||
while (c >= '0' && c <= '9') {
|
||||
@@ -22,12 +22,13 @@ size_t Tokenizer::popInteger() {
|
||||
|
||||
Token Tokenizer::popNumber() {
|
||||
const char * integerPartText = m_text;
|
||||
size_t integerPartLength = popInteger();
|
||||
size_t integerPartLength = popDigits();
|
||||
|
||||
/* const char * decimalPartText = m_text;
|
||||
const char * decimalPartText = m_text;
|
||||
size_t decimalPartLength = 0;
|
||||
if (canPopChar('.')) {
|
||||
decimalPartLength = popInteger();
|
||||
decimalPartText = m_text;
|
||||
decimalPartLength = popDigits();
|
||||
}
|
||||
|
||||
if (integerPartLength == 0 && decimalPartLength == 0) {
|
||||
@@ -39,18 +40,14 @@ Token Tokenizer::popNumber() {
|
||||
bool exponentIsNegative = false;
|
||||
if (canPopChar('e')) {
|
||||
exponentIsNegative = canPopChar('-');
|
||||
exponentPartLength = popInteger();
|
||||
exponentPartLength = popDigits();
|
||||
if (exponentPartLength == 0) {
|
||||
return Token();
|
||||
}
|
||||
}
|
||||
|
||||
Token result(Token::Type::Number);
|
||||
//TODO result.setExpression(Number(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
|
||||
return result;*/
|
||||
Token result(Token::Type::Number);
|
||||
result.setText(integerPartText);
|
||||
result.setLength(integerPartLength);
|
||||
result.setExpression(Number::ParseNumber(integerPartText, integerPartLength, decimalPartText, decimalPartLength, exponentIsNegative, exponentPartText, exponentPartLength));
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -59,7 +56,6 @@ static inline bool isLetter(char c) {
|
||||
}
|
||||
|
||||
Token Tokenizer::popIdentifier() {
|
||||
const char * text = m_text;
|
||||
size_t length = 0;
|
||||
char c = currentChar();
|
||||
while (isLetter(c)) {
|
||||
@@ -71,15 +67,18 @@ Token Tokenizer::popIdentifier() {
|
||||
return result;
|
||||
}
|
||||
|
||||
Token Tokenizer::popToken() { // associative array?
|
||||
Token Tokenizer::popToken() {
|
||||
const char c = currentChar();
|
||||
if (canPopChar(0)) {
|
||||
return Token(Token::Type::EndOfStream);
|
||||
if ((c == '.') || (c >= '0' && c <= '9')) {
|
||||
return popNumber();
|
||||
}
|
||||
if (isLetter(c)) {
|
||||
return popIdentifier();
|
||||
}
|
||||
if (canPopChar('!')) {
|
||||
return Token(Token::Type::Bang);
|
||||
}
|
||||
if (c >= '(' && (c <= '/' && c != '.')) {
|
||||
if (c >= '(' && c <= '/' && c != '.') {
|
||||
Token::Type typeForChar[] = {
|
||||
Token::Type::LeftParenthesis,
|
||||
Token::Type::RightParenthesis,
|
||||
@@ -112,20 +111,17 @@ Token Tokenizer::popToken() { // associative array?
|
||||
if (canPopChar('}')) {
|
||||
return Token(Token::Type::RightBrace);
|
||||
}
|
||||
if (canPopChar('\x89')) {
|
||||
if (canPopChar('\x89')) { // Ion::Charset::SmallPi
|
||||
return Token(Token::Type::Number);
|
||||
}
|
||||
if (canPopChar('\x90')) {
|
||||
if (canPopChar('\x90')) { // Ion::Charset::Store
|
||||
return Token(Token::Type::Store);
|
||||
}
|
||||
if (canPopChar('\x91')) {
|
||||
if (canPopChar('\x91')) { // Ion::Charset::Root
|
||||
return Token(Token::Type::SquareRoot);
|
||||
}
|
||||
if ((c == '.') || (c >= '0' && c <= '9')) {
|
||||
return popNumber();
|
||||
}
|
||||
if (isLetter(c)) {
|
||||
return popIdentifier();
|
||||
if (canPopChar(0)) {
|
||||
return Token(Token::Type::EndOfStream);
|
||||
}
|
||||
return Token(); // TODO error
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#define POINCARE_PARSING_TOKENIZER_H
|
||||
|
||||
#include "token.h"
|
||||
#include <poincare/number.h>
|
||||
|
||||
namespace Poincare {
|
||||
|
||||
@@ -13,7 +14,7 @@ private:
|
||||
const char popChar() { return *++m_text; }
|
||||
const char currentChar() const { return *m_text; }
|
||||
bool canPopChar(char c);
|
||||
size_t popInteger();
|
||||
size_t popDigits();
|
||||
Token popNumber();
|
||||
Token popIdentifier();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user