From 9b0b4e30964bcf48b6f91a5e388622583f0346ee Mon Sep 17 00:00:00 2001 From: Ruben Dashyan Date: Fri, 24 Jan 2020 14:30:42 +0100 Subject: [PATCH] [ion/unicode] Define and use CodePoint const methods isLowerCaseLetter isUpperCaseLetter isLetter isDigit Remove similar ones from UTF8Helper and from Poincare::Tokenizer. --- apps/code/python_toolbox.cpp | 6 +++--- apps/code/script.cpp | 4 ++-- apps/shared/function.cpp | 11 ++++------- ion/include/ion/unicode/code_point.h | 12 ++++++++++++ ion/include/ion/unicode/utf8_helper.h | 4 ---- ion/src/shared/unicode/utf8_helper.cpp | 16 ---------------- poincare/src/parsing/tokenizer.cpp | 18 +++++------------- 7 files changed, 26 insertions(+), 45 deletions(-) diff --git a/apps/code/python_toolbox.cpp b/apps/code/python_toolbox.cpp index fffcdc795..c00be0489 100644 --- a/apps/code/python_toolbox.cpp +++ b/apps/code/python_toolbox.cpp @@ -414,7 +414,7 @@ bool PythonToolbox::handleEvent(Ion::Events::Event event) { } if (event.hasText() && strlen(event.text()) == 1 ) { char c = event.text()[0]; - if (UTF8Helper::CodePointIsLetter(c)) { + if (CodePoint(c).isLetter()) { scrollToLetter(c); return true; } @@ -475,7 +475,7 @@ int PythonToolbox::maxNumberOfDisplayedRows() { } void PythonToolbox::scrollToLetter(char letter) { - assert(UTF8Helper::CodePointIsLetter(letter)); + assert(CodePoint(letter).isLetter()); /* We look for a child MessageTree that starts with the wanted letter. If we * do not find one, we scroll to the first child MessageTree that starts with * a letter higher than the wanted letter. */ @@ -487,7 +487,7 @@ void PythonToolbox::scrollToLetter(char letter) { index = i; break; } - if (index < 0 && l >= lowerLetter && UTF8Helper::CodePointIsLowerCaseLetter(l)) { + if (index < 0 && l >= lowerLetter && CodePoint(l).isLowerCaseLetter()) { index = i; } } diff --git a/apps/code/script.cpp b/apps/code/script.cpp index 5ebb87c96..32de62ad1 100644 --- a/apps/code/script.cpp +++ b/apps/code/script.cpp @@ -48,7 +48,7 @@ bool Script::nameCompliant(const char * name) { * problems with case sensitivity. */ UTF8Decoder decoder(name); CodePoint c = decoder.nextCodePoint(); - if (c == UCodePointNull || !(UTF8Helper::CodePointIsLowerCaseLetter(c) || c == '_' || c == '.')) { + if (c == UCodePointNull || !(c.isLowerCaseLetter() || c == '_' || c == '.')) { /* The name cannot be empty. Its first letter must be in [a-z_] or the * extension dot. */ return false; @@ -57,7 +57,7 @@ bool Script::nameCompliant(const char * name) { if (c == '.' && strcmp(decoder.stringPosition(), ScriptStore::k_scriptExtension) == 0) { return true; } - if (!(UTF8Helper::CodePointIsLowerCaseLetter(c) || c == '_' || UTF8Helper::CodePointIsNumber(c))) { + if (!(c.isLowerCaseLetter() || c == '_' || c.isDigit())) { return false; } c = decoder.nextCodePoint(); diff --git a/apps/shared/function.cpp b/apps/shared/function.cpp index 14ed1df0a..687ee3f58 100644 --- a/apps/shared/function.cpp +++ b/apps/shared/function.cpp @@ -1,7 +1,6 @@ #include "function.h" #include "poincare_helpers.h" #include "poincare/src/parsing/parser.h" -#include #include #include #include @@ -16,7 +15,7 @@ bool Function::BaseNameCompliant(const char * baseName, NameNotCompliantError * UTF8Decoder decoder(baseName); CodePoint c = decoder.nextCodePoint(); - if (UTF8Helper::CodePointIsNumber(c)) { + if (c.isDigit()) { // The name cannot start with a number if (error != nullptr) { *error = NameNotCompliantError::NameCannotStartWithNumber; @@ -26,11 +25,9 @@ bool Function::BaseNameCompliant(const char * baseName, NameNotCompliantError * // The name should only have allowed characters while (c != UCodePointNull) { - if (!(UTF8Helper::CodePointIsUpperCaseLetter(c) - || UTF8Helper::CodePointIsLowerCaseLetter(c) - || UTF8Helper::CodePointIsNumber(c)) - || c == '_') - { + // FIXME '_' should be accepted but not as first character + // TODO Factor this piece of code with similar one in the Parser + if (!(c.isLetter() || c.isDigit()) || c == '_') { if (error != nullptr) { *error = NameNotCompliantError::CharacterNotAllowed; } diff --git a/ion/include/ion/unicode/code_point.h b/ion/include/ion/unicode/code_point.h index 837ead4cd..7079545c8 100644 --- a/ion/include/ion/unicode/code_point.h +++ b/ion/include/ion/unicode/code_point.h @@ -9,6 +9,18 @@ public: constexpr CodePoint(uint32_t c) : m_code(c) {} constexpr operator uint32_t() const { return m_code; } + bool isLowerCaseLetter() const { + return 'a' <= m_code && m_code <= 'z'; + } + bool isUpperCaseLetter() const { + return 'A' <= m_code && m_code <= 'Z'; + } + bool isLetter() const { + return isLowerCaseLetter() || isUpperCaseLetter(); + } + bool isDigit() const { + return '0' <= m_code && m_code <= '9'; + } bool isCombining() const { return (m_code >= 0x300 && m_code <= 0x036F); } diff --git a/ion/include/ion/unicode/utf8_helper.h b/ion/include/ion/unicode/utf8_helper.h index ee0584a5b..2c90d7096 100644 --- a/ion/include/ion/unicode/utf8_helper.h +++ b/ion/include/ion/unicode/utf8_helper.h @@ -69,10 +69,6 @@ const char * PerformAtCodePoints( bool PreviousCodePointIs(const char * buffer, const char * location, CodePoint c); bool CodePointIs(const char * location, CodePoint c); -bool CodePointIsLetter(CodePoint c); -bool CodePointIsLowerCaseLetter(CodePoint c); -bool CodePointIsUpperCaseLetter(CodePoint c); -bool CodePointIsNumber(CodePoint c); // Shift the buffer and return the number of bytes removed. int RemovePreviousGlyph(const char * text, char * location, CodePoint * c = nullptr); diff --git a/ion/src/shared/unicode/utf8_helper.cpp b/ion/src/shared/unicode/utf8_helper.cpp index bddee8d06..76a3fbd72 100644 --- a/ion/src/shared/unicode/utf8_helper.cpp +++ b/ion/src/shared/unicode/utf8_helper.cpp @@ -267,22 +267,6 @@ bool CodePointIs(const char * location, CodePoint c) { return decoder.nextCodePoint() == c; } -bool CodePointIsLetter(CodePoint c) { - return CodePointIsLowerCaseLetter(c) || CodePointIsUpperCaseLetter(c); -} - -bool CodePointIsLowerCaseLetter(CodePoint c) { - return c >= 'a' && c <= 'z'; -} - -bool CodePointIsUpperCaseLetter(CodePoint c) { - return c >= 'A' && c <= 'Z'; -} - -bool CodePointIsNumber(CodePoint c) { - return c >= '0' && c <= '9'; -} - int RemovePreviousGlyph(const char * text, char * location, CodePoint * c) { if (location <= text) { assert(location == text); diff --git a/poincare/src/parsing/tokenizer.cpp b/poincare/src/parsing/tokenizer.cpp index 67f838d72..f27812b8f 100644 --- a/poincare/src/parsing/tokenizer.cpp +++ b/poincare/src/parsing/tokenizer.cpp @@ -6,14 +6,6 @@ namespace Poincare { -static inline bool isLetter(const CodePoint c) { - return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); -} - -static inline bool isDigit(const CodePoint c) { - return '0' <= c && c <= '9'; -} - const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, bool * testResult) { UTF8Decoder decoder(m_text); CodePoint c = decoder.nextCodePoint(); @@ -55,11 +47,11 @@ size_t Tokenizer::popWhile(PopTest popTest, CodePoint context) { size_t Tokenizer::popIdentifier() { /* TODO handle combined code points? For now combining code points will * trigger a syntax error. */ - return popWhile([](CodePoint c, CodePoint context) { return isLetter(c) || isDigit(c) || c == context; }, '_'); + return popWhile([](CodePoint c, CodePoint context) { return c.isLetter() || c.isDigit() || c == context; }, '_'); } size_t Tokenizer::popDigits() { - return popWhile([](CodePoint c, CodePoint context) { return isDigit(c); }); + return popWhile([](CodePoint c, CodePoint context) { return c.isDigit(); }); } size_t Tokenizer::popBinaryDigits() { @@ -67,7 +59,7 @@ size_t Tokenizer::popBinaryDigits() { } size_t Tokenizer::popHexadecimalDigits() { - return popWhile([](CodePoint c, CodePoint context) { return isDigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); }); + return popWhile([](CodePoint c, CodePoint context) { return c.isDigit() || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); }); } Token Tokenizer::popNumber() { @@ -141,7 +133,7 @@ Token Tokenizer::popToken() { /* If the next code point is the start of a number, we do not want to pop it * because popNumber needs this code point. */ bool nextCodePointIsNeitherDotNorDigit = true; - const CodePoint c = nextCodePoint([](CodePoint cp, CodePoint context) { return cp != context && !isDigit(cp); }, '.', &nextCodePointIsNeitherDotNorDigit); + const CodePoint c = nextCodePoint([](CodePoint cp, CodePoint context) { return cp != context && !cp.isDigit(); }, '.', &nextCodePointIsNeitherDotNorDigit); // According to c, recognize the Token::Type. if (!nextCodePointIsNeitherDotNorDigit) { @@ -152,7 +144,7 @@ Token Tokenizer::popToken() { result.setString(start + 1, popIdentifier()); return result; } - if (isLetter(c)) { + if (c.isLetter()) { Token result(Token::Identifier); result.setString(start, 1 + popIdentifier()); // We already popped 1 code point return result;