mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-03-26 17:20:53 +01:00
[poincare] Clean parsing with unicodes
This commit is contained in:
@@ -361,12 +361,14 @@ void Parser::parseSequence(Expression & leftHandSide, const char name, Token::Ty
|
||||
if (m_status != Status::Progress) {
|
||||
} else if (!popTokenIfType(rightDelimiter)) {
|
||||
m_status = Status::Error; // Right delimiter missing.
|
||||
} else if (rank.isIdenticalTo(Symbol::Builder("n",1))) {
|
||||
char sym[5] = {name, '(', 'n', ')', 0};
|
||||
leftHandSide = Symbol::Builder(sym, 4);
|
||||
} else if (rank.isIdenticalTo(Addition::Builder(Symbol::Builder("n",1),Rational::Builder("1")))) {
|
||||
char sym[7] = {name, '(', 'n', '+', '1', ')', 0};
|
||||
leftHandSide = Symbol::Builder(sym, 6);
|
||||
} else if (rank.isIdenticalTo(Symbol::Builder('n'))) {
|
||||
constexpr int symbolNameSize = 5;
|
||||
char sym[symbolNameSize] = {name, '(', 'n', ')', 0};
|
||||
leftHandSide = Symbol::Builder(sym, symbolNameSize);
|
||||
} else if (rank.isIdenticalTo(Addition::Builder(Symbol::Builder('n'), Rational::Builder("1")))) {
|
||||
constexpr int symbolNameSize = 7;
|
||||
char sym[symbolNameSize] = {name, '(', 'n', '+', '1', ')', 0};
|
||||
leftHandSide = Symbol::Builder(sym, symbolNameSize);
|
||||
} else {
|
||||
m_status = Status::Error; // Unexpected parameter.
|
||||
}
|
||||
@@ -383,8 +385,12 @@ void Parser::parseSpecialIdentifier(Expression & leftHandSide) {
|
||||
} else if (m_currentToken.compareTo(Unreal::Name()) == 0) {
|
||||
leftHandSide = Unreal::Builder();
|
||||
} else if (m_currentToken.compareTo("u_") == 0 || m_currentToken.compareTo("v_") == 0) { // Special case for sequences (e.g. "u_{n}")
|
||||
/* We now that m_currentToken.text()[0] is either 'u' or 'v', so we do not
|
||||
* need to pass a code point to parseSequence. */
|
||||
parseSequence(leftHandSide, m_currentToken.text()[0], Token::LeftBrace, Token::RightBrace);
|
||||
} else if (m_currentToken.compareTo("u") == 0 || m_currentToken.compareTo("v") == 0) { // Special case for sequences (e.g. "u(n)")
|
||||
/* We now that m_currentToken.text()[0] is either 'u' or 'v', so we do not
|
||||
* need to pass a code point to parseSequence. */
|
||||
parseSequence(leftHandSide, m_currentToken.text()[0], Token::LeftParenthesis, Token::RightParenthesis);
|
||||
} else if (m_currentToken.compareTo("log_") == 0) { // Special case for the log function (e.g. "log_{2}(8)")
|
||||
if (!popTokenIfType(Token::LeftBrace)) {
|
||||
@@ -426,7 +432,7 @@ void Parser::parseCustomIdentifier(Expression & leftHandSide, const char * name,
|
||||
return;
|
||||
}
|
||||
parameter = parameter.childAtIndex(0);
|
||||
if (parameter.type() == ExpressionNode::Type::Symbol && strncmp(static_cast<SymbolAbstract&>(parameter).name(),name, length) == 0) {
|
||||
if (parameter.type() == ExpressionNode::Type::Symbol && strncmp(static_cast<SymbolAbstract&>(parameter).name(), name, length) == 0) {
|
||||
m_status = Status::Error; // Function and variable must have distinct names.
|
||||
} else if (!popTokenIfType(Token::RightParenthesis)) {
|
||||
m_status = Status::Error; // Right parenthesis missing.
|
||||
@@ -487,8 +493,7 @@ void Parser::parseMatrix(Expression & leftHandSide, Token::Type stoppingType) {
|
||||
return;
|
||||
}
|
||||
if ((numberOfRows == 0 && (numberOfColumns = row.numberOfChildren()) == 0)
|
||||
||
|
||||
(numberOfColumns != row.numberOfChildren())) {
|
||||
|| (numberOfColumns != row.numberOfChildren())) {
|
||||
m_status = Status::Error; // Incorrect matrix.
|
||||
return;
|
||||
} else {
|
||||
|
||||
@@ -14,16 +14,22 @@ static inline bool isDigit(const CodePoint c) {
|
||||
|
||||
const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, bool * testResult) {
|
||||
UTF8Decoder decoder(m_text);
|
||||
const char * currentPointer = m_text;
|
||||
const char * nextPointer = decoder.nextCodePointPointer();
|
||||
CodePoint firstCodePoint = decoder.nextCodePoint();
|
||||
size_t numberOfBytesForCodePoint = UTF8Decoder::CharSizeOfCodePoint(firstCodePoint);
|
||||
size_t numberOfBytesForCodePoint = nextPointer - currentPointer;
|
||||
if (firstCodePoint != KDCodePointNull) {
|
||||
currentPointer = nextPointer;
|
||||
nextPointer = decoder.nextCodePointPointer();
|
||||
CodePoint codePoint = decoder.nextCodePoint();
|
||||
while (codePoint.isCombining()) {
|
||||
numberOfBytesForCodePoint+= UTF8Decoder::CharSizeOfCodePoint(codePoint);
|
||||
numberOfBytesForCodePoint+= nextPointer - currentPointer;
|
||||
currentPointer = nextPointer;
|
||||
nextPointer = decoder.nextCodePointPointer();
|
||||
codePoint = decoder.nextCodePoint();
|
||||
}
|
||||
}
|
||||
// TODO handle combined code points?
|
||||
// TODO handle combined code points? For now the combining codepoints get dropped.
|
||||
bool shouldPop = popTest(firstCodePoint, context);
|
||||
if (testResult != nullptr) {
|
||||
*testResult = shouldPop;
|
||||
@@ -181,7 +187,9 @@ Token Tokenizer::popToken() {
|
||||
if (c == KDCodePointSquareRoot) {
|
||||
Token result(Token::Identifier);
|
||||
// TODO compute size manually?
|
||||
result.setString(start, UTF8Decoder::CharSizeOfCodePoint(KDCodePointSquareRoot));
|
||||
constexpr int squareRootCharLength = 3;
|
||||
assert(UTF8Decoder::CharSizeOfCodePoint(KDCodePointSquareRoot) == squareRootCharLength);
|
||||
result.setString(start, squareRootCharLength);
|
||||
return result;
|
||||
}
|
||||
if (c == KDCodePointEmpty) {
|
||||
|
||||
Reference in New Issue
Block a user