[poincare] Parsing: Unit token does not accept '_' char except at first

position. This enables to parse "_km_s" as Multiplication(Unit(km),
Unit(s))
This commit is contained in:
Émilie Feral
2020-02-04 10:56:36 +01:00
committed by Léa Saviot
parent f91ca59ca3
commit 5cb26b19a5
3 changed files with 17 additions and 6 deletions

View File

@@ -38,7 +38,7 @@ size_t Tokenizer::popWhile(PopTest popTest, CodePoint context) {
return length;
}
size_t Tokenizer::popIdentifier() {
size_t Tokenizer::popIdentifier(CodePoint additionalAcceptedCodePoint) {
/* TODO handle combined code points? For now combining code points will
* trigger a syntax error.
* This method is used to parse any identifier, reserved or custom, or even
@@ -47,8 +47,8 @@ size_t Tokenizer::popIdentifier() {
* instance input '2πx' without any danger.
*/
return popWhile([](CodePoint c, CodePoint context) {
return c.isDecimalDigit() || c.isLatinLetter() || c == '_' || c.isGreekCapitalLetter() || (c.isGreekSmallLetter() && c != UCodePointGreekSmallLetterPi);
});
return c.isDecimalDigit() || c.isLatinLetter() || (c != UCodePointNull && c == context) || c.isGreekCapitalLetter() || (c.isGreekSmallLetter() && c != UCodePointGreekSmallLetterPi);
}, additionalAcceptedCodePoint);
}
size_t Tokenizer::popDigits() {
@@ -162,7 +162,7 @@ Token Tokenizer::popToken() {
* reserved or custom identifier, popIdentifier is called in both cases.
*/
Token result(Token::Unit);
result.setString(start + 1, popIdentifier()); // + 1 for the underscore
result.setString(start + 1, popIdentifier(UCodePointNull)); // + 1 for the underscore
return result;
}
if (c.isLatinLetter() ||
@@ -170,7 +170,7 @@ Token Tokenizer::popToken() {
c.isGreekSmallLetter()) // Greek small letter pi is matched earlier
{
Token result(Token::Identifier);
result.setString(start, UTF8Decoder::CharSizeOfCodePoint(c) + popIdentifier()); // We already popped 1 code point
result.setString(start, UTF8Decoder::CharSizeOfCodePoint(c) + popIdentifier('_')); // We already popped 1 code point
return result;
}
if ('(' <= c && c <= '/') {

View File

@@ -23,7 +23,7 @@ private:
size_t popDigits();
size_t popBinaryDigits();
size_t popHexadecimalDigits();
size_t popIdentifier();
size_t popIdentifier(CodePoint additionalAcceptedCodePoint);
Token popNumber();
const char * m_text;

View File

@@ -304,10 +304,21 @@ QUIZ_CASE(poincare_parsing_units) {
}
}
// Non-existing units are not parsable
assert_text_not_parsable("_n");
assert_text_not_parsable("_a");
// Any identifier starting with '_' is tokenized as a unit
assert_tokenizes_as_unit("_m");
assert_tokenizes_as_unit("_A");
// Can parse implicit multiplication with units
Expression kilometer = Expression::Parse("_km", nullptr);
Expression second = Expression::Parse("_s", nullptr);
assert_parsed_expression_is("_kmπ", Multiplication::Builder(kilometer, Constant::Builder(UCodePointGreekSmallLetterPi)));
assert_parsed_expression_is("π_km", Multiplication::Builder(Constant::Builder(UCodePointGreekSmallLetterPi), kilometer));
assert_parsed_expression_is("_s_km", Multiplication::Builder(second, kilometer));
assert_parsed_expression_is("3_s", Multiplication::Builder(BasedInteger::Builder(3), second));
}
QUIZ_CASE(poincare_parsing_identifiers) {