[ion/unicode] Define and use CodePoint const methods

isLowerCaseLetter isUpperCaseLetter isLetter isDigit Remove similar ones from UTF8Helper and from Poincare::Tokenizer.
2026-03-18 21:30:38 +01:00 · 2020-01-24 14:30:42 +01:00
parent 7665ad6fe0
commit 9b0b4e3096
7 changed files with 26 additions and 45 deletions
--- a/apps/code/python_toolbox.cpp
+++ b/apps/code/python_toolbox.cpp
@@ -414,7 +414,7 @@ bool PythonToolbox::handleEvent(Ion::Events::Event event) {
  }
  if (event.hasText() && strlen(event.text()) == 1 ) {
    char c = event.text()[0];
-    if (UTF8Helper::CodePointIsLetter(c)) {
+    if (CodePoint(c).isLetter()) {
      scrollToLetter(c);
      return true;
    }
@@ -475,7 +475,7 @@ int PythonToolbox::maxNumberOfDisplayedRows() {
 }

 void PythonToolbox::scrollToLetter(char letter) {
-  assert(UTF8Helper::CodePointIsLetter(letter));
+  assert(CodePoint(letter).isLetter());
  /* We look for a child MessageTree that starts with the wanted letter. If we
   * do not find one, we scroll to the first child MessageTree that starts with
   * a letter higher than the wanted letter. */
@@ -487,7 +487,7 @@ void PythonToolbox::scrollToLetter(char letter) {
      index = i;
      break;
    }
-    if (index < 0 && l >= lowerLetter && UTF8Helper::CodePointIsLowerCaseLetter(l)) {
+    if (index < 0 && l >= lowerLetter && CodePoint(l).isLowerCaseLetter()) {
      index = i;
    }
  }
--- a/apps/code/script.cpp
+++ b/apps/code/script.cpp
@@ -48,7 +48,7 @@ bool Script::nameCompliant(const char * name) {
   * problems with case sensitivity. */
  UTF8Decoder decoder(name);
  CodePoint c = decoder.nextCodePoint();
-  if (c == UCodePointNull || !(UTF8Helper::CodePointIsLowerCaseLetter(c) || c == '_' || c == '.')) {
+  if (c == UCodePointNull || !(c.isLowerCaseLetter() || c == '_' || c == '.')) {
    /* The name cannot be empty. Its first letter must be in [a-z_] or the
     * extension dot. */
    return false;
@@ -57,7 +57,7 @@ bool Script::nameCompliant(const char * name) {
    if (c == '.' && strcmp(decoder.stringPosition(), ScriptStore::k_scriptExtension) == 0) {
      return true;
    }
-    if (!(UTF8Helper::CodePointIsLowerCaseLetter(c) || c == '_' || UTF8Helper::CodePointIsNumber(c))) {
+    if (!(c.isLowerCaseLetter() || c == '_' || c.isDigit())) {
      return false;
    }
    c = decoder.nextCodePoint();
--- a/apps/shared/function.cpp
+++ b/apps/shared/function.cpp
@@ -1,7 +1,6 @@
 #include "function.h"
 #include "poincare_helpers.h"
 #include "poincare/src/parsing/parser.h"
-#include <ion/unicode/utf8_helper.h>
 #include <ion/unicode/utf8_decoder.h>
 #include <string.h>
 #include <cmath>
@@ -16,7 +15,7 @@ bool Function::BaseNameCompliant(const char * baseName, NameNotCompliantError *

  UTF8Decoder decoder(baseName);
  CodePoint c = decoder.nextCodePoint();
-  if (UTF8Helper::CodePointIsNumber(c)) {
+  if (c.isDigit()) {
    // The name cannot start with a number
    if (error != nullptr) {
      *error = NameNotCompliantError::NameCannotStartWithNumber;
@@ -26,11 +25,9 @@ bool Function::BaseNameCompliant(const char * baseName, NameNotCompliantError *

  // The name should only have allowed characters
  while (c != UCodePointNull) {
-    if (!(UTF8Helper::CodePointIsUpperCaseLetter(c)
-        || UTF8Helper::CodePointIsLowerCaseLetter(c)
-        || UTF8Helper::CodePointIsNumber(c))
-        || c == '_')
-    {
+    // FIXME '_' should be accepted but not as first character
+    // TODO Factor this piece of code with similar one in the Parser
+    if (!(c.isLetter() || c.isDigit()) || c == '_') {
      if (error != nullptr) {
        *error = NameNotCompliantError::CharacterNotAllowed;
      }
--- a/ion/include/ion/unicode/code_point.h
+++ b/ion/include/ion/unicode/code_point.h
@@ -9,6 +9,18 @@ public:
  constexpr CodePoint(uint32_t c) : m_code(c) {}
  constexpr operator uint32_t() const { return m_code; }

+  bool isLowerCaseLetter() const {
+    return 'a' <= m_code && m_code <= 'z';
+  }
+  bool isUpperCaseLetter() const {
+    return 'A' <= m_code && m_code <= 'Z';
+  }
+  bool isLetter() const {
+    return isLowerCaseLetter() || isUpperCaseLetter();
+  }
+  bool isDigit() const {
+    return '0' <= m_code && m_code <= '9';
+  }
  bool isCombining() const {
    return (m_code >= 0x300 && m_code <= 0x036F);
  }
--- a/ion/include/ion/unicode/utf8_helper.h
+++ b/ion/include/ion/unicode/utf8_helper.h
@@ -69,10 +69,6 @@ const char * PerformAtCodePoints(

 bool PreviousCodePointIs(const char * buffer, const char * location, CodePoint c);
 bool CodePointIs(const char * location, CodePoint c);
-bool CodePointIsLetter(CodePoint c);
-bool CodePointIsLowerCaseLetter(CodePoint c);
-bool CodePointIsUpperCaseLetter(CodePoint c);
-bool CodePointIsNumber(CodePoint c);

 // Shift the buffer and return the number of bytes removed.
 int RemovePreviousGlyph(const char * text, char * location, CodePoint * c = nullptr);
--- a/ion/src/shared/unicode/utf8_helper.cpp
+++ b/ion/src/shared/unicode/utf8_helper.cpp
@@ -267,22 +267,6 @@ bool CodePointIs(const char * location, CodePoint c) {
  return decoder.nextCodePoint() == c;
 }

-bool CodePointIsLetter(CodePoint c) {
-  return CodePointIsLowerCaseLetter(c) || CodePointIsUpperCaseLetter(c);
-}
-
-bool CodePointIsLowerCaseLetter(CodePoint c) {
-  return c >= 'a' && c <= 'z';
-}
-
-bool CodePointIsUpperCaseLetter(CodePoint c) {
-  return c >= 'A' && c <= 'Z';
-}
-
-bool CodePointIsNumber(CodePoint c) {
-  return c >= '0' && c <= '9';
-}
-
 int RemovePreviousGlyph(const char * text, char * location, CodePoint * c) {
  if (location <= text) {
    assert(location == text);
--- a/poincare/src/parsing/tokenizer.cpp
+++ b/poincare/src/parsing/tokenizer.cpp
@@ -6,14 +6,6 @@

 namespace Poincare {

-static inline bool isLetter(const CodePoint c) {
-  return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
-}
-
-static inline bool isDigit(const CodePoint c) {
-  return '0' <= c && c <= '9';
-}
-
 const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, bool * testResult) {
  UTF8Decoder decoder(m_text);
  CodePoint c = decoder.nextCodePoint();
@@ -55,11 +47,11 @@ size_t Tokenizer::popWhile(PopTest popTest, CodePoint context) {
 size_t Tokenizer::popIdentifier() {
  /* TODO handle combined code points? For now combining code points will
   * trigger a syntax error. */
-  return popWhile([](CodePoint c, CodePoint context) { return isLetter(c) || isDigit(c) || c == context; }, '_');
+  return popWhile([](CodePoint c, CodePoint context) { return c.isLetter() || c.isDigit() || c == context; }, '_');
 }

 size_t Tokenizer::popDigits() {
-  return popWhile([](CodePoint c, CodePoint context) { return isDigit(c); });
+  return popWhile([](CodePoint c, CodePoint context) { return c.isDigit(); });
 }

 size_t Tokenizer::popBinaryDigits() {
@@ -67,7 +59,7 @@ size_t Tokenizer::popBinaryDigits() {
 }

 size_t Tokenizer::popHexadecimalDigits() {
-  return popWhile([](CodePoint c, CodePoint context) { return isDigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); });
+  return popWhile([](CodePoint c, CodePoint context) { return c.isDigit() || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); });
 }

 Token Tokenizer::popNumber() {
@@ -141,7 +133,7 @@ Token Tokenizer::popToken() {
  /* If the next code point is the start of a number, we do not want to pop it
   * because popNumber needs this code point. */
  bool nextCodePointIsNeitherDotNorDigit = true;
-  const CodePoint c = nextCodePoint([](CodePoint cp, CodePoint context) { return cp != context && !isDigit(cp); }, '.', &nextCodePointIsNeitherDotNorDigit);
+  const CodePoint c = nextCodePoint([](CodePoint cp, CodePoint context) { return cp != context && !cp.isDigit(); }, '.', &nextCodePointIsNeitherDotNorDigit);

  // According to c, recognize the Token::Type.
  if (!nextCodePointIsNeitherDotNorDigit) {
@@ -152,7 +144,7 @@ Token Tokenizer::popToken() {
    result.setString(start + 1, popIdentifier());
    return result;
  }
-  if (isLetter(c)) {
+  if (c.isLetter()) {
    Token result(Token::Identifier);
    result.setString(start, 1 + popIdentifier()); // We already popped 1 code point
    return result;