[escher/ion] Standard methods in UTF8Helper used in text_area

String manipulations need to be done using an UTF8 decoder, but some manipulations can be optimized if the code points we manipulate are only one char long. These optimizations are done inside UTF8Helper methods.
2026-03-22 15:20:39 +01:00 · 2019-01-24 11:06:07 +01:00
parent e6051fae42
commit aac78ca32a
6 changed files with 153 additions and 52 deletions
--- a/apps/solver/equation.cpp
+++ b/apps/solver/equation.cpp
@@ -50,7 +50,7 @@ Expression Equation::standardForm(Context * context) const {
 }

 bool Equation::containsIComplex() const {
-  return UTF8Helper::CodePointSearch(text(), UCodePointMathematicalBoldSmallI) != nullptr;
+  return *(UTF8Helper::CodePointSearch(text(), UCodePointMathematicalBoldSmallI)) != 0;
 }

 void Equation::tidyStandardForm() {
--- a/escher/include/escher/text_area.h
+++ b/escher/include/escher/text_area.h
@@ -30,7 +30,7 @@ protected:
      m_buffer = buffer;
      m_bufferSize = bufferSize;
    }
-    const char * text() const { return const_cast<const char *>(m_buffer); }
+    const char * text() const { return m_buffer; }

    class Line {
    public:
--- a/escher/src/text_area.cpp
+++ b/escher/src/text_area.cpp
@@ -99,8 +99,8 @@ void TextArea::setText(char * textBuffer, size_t textBufferSize) {
 bool TextArea::insertTextWithIndentation(const char * textBuffer, const char * location) {
  // Compute the indentation
  int indentation = indentationBeforeCursor();
-  const char * previousChar = cursorLocation()-1;
-  if (previousChar >= const_cast<TextArea *>(this)->contentView()->text() && *previousChar == ':') {
+  const char * buffer = contentView()->text();
+  if (cursorLocation() > buffer && UTF8Helper::PreviousCodePointIs(buffer, cursorLocation(), ':')) {
    indentation += k_indentationSpaces;
  }

@@ -117,26 +117,29 @@ bool TextArea::insertTextWithIndentation(const char * textBuffer, const char * l
  // Insert the indentation
  UTF8Helper::PerformAtCodePoints(
      textBuffer, '\n',
-      [](char * codePointLocation, void * text, int indentation){
+      [](char * codePointLocation, void * text, int indentation) {
        ((Text *)text)->insertSpacesAtLocation(indentation, codePointLocation);
      },
+      [](char * c1, void * c2, int c3) { },
      (void *)(contentView()->getText()),
      indentation);
  return true;
 }

 int TextArea::indentationBeforeCursor() const {
-  const char * p = cursorLocation()-1;
  int indentationSize = 0;
-  // No need to use the UTF8Decoder here, be cause we look for an ASCII char.
-  while (p >= const_cast<TextArea *>(this)->contentView()->text() && *p != '\n') {
-    if (*p == ' ') {
-      indentationSize++;
-    } else {
-      indentationSize = 0;
-    }
-    p--;
-  }
+  /* Compute the number of spaces at the beginning of the line. Increase the
+   * indentation size when encountering spaces, reset it to 0 when encountering
+   * another code point, until reaching the beginning of the line. */
+  UTF8Helper::PerformAtCodePoints(const_cast<TextArea *>(this)->contentView()->text(), ' ',
+      [](char * codePointLocation, void * indentationSize, int context){
+        int * castedSize = (int *) indentationSize;
+        *castedSize = *castedSize + 1;
+      },
+      [](char * codePointLocation, void * indentationSize, int context){
+        *((int *) indentationSize) = 0;
+      },
+      &indentationSize, 0, '\n', false, cursorLocation());
  return indentationSize;
 }

@@ -208,6 +211,7 @@ CodePoint TextArea::Text::removeCodePoint(const char * * position) {

  // Shift the buffer
  int codePointSize = *position - newCursorLocation;
+  assert(codePointSize == UTF8Decoder::CharSizeOfCodePoint(deletedCodePoint));
  assert(newCursorLocation >= m_buffer);
  for (size_t i = newCursorLocation - m_buffer; i < m_bufferSize; i++) {
    m_buffer[i] = m_buffer[i + codePointSize];
@@ -269,11 +273,7 @@ TextArea::Text::Line::Line(const char * text) :
  m_charLength(0)
 {
  if (m_text != nullptr) {
-    // No need to use the UTF8Decoder here, because we look for an ASCII char.
-    while (*text != 0 && *text != '\n') {
-      text++;
-    }
-    m_charLength = text - m_text;
+    m_charLength = UTF8Helper::CodePointSearch(text, '\n') - m_text;
  }
 }

@@ -284,14 +284,18 @@ KDCoordinate TextArea::Text::Line::glyphWidth(const KDFont * const font) const {
 bool TextArea::Text::Line::contains(const char * c) const {
  return (c >= m_text)
    && ((c < m_text + m_charLength)
-        || (c == m_text + m_charLength && (*c == 0 || *c == '\n'))) ;
+        || (c == m_text + m_charLength
+          && (UTF8Helper::CodePointIs(c, 0)
+            || UTF8Helper::CodePointIs(c, '\n')))) ;
 }

 /* TextArea::Text::LineIterator */

 TextArea::Text::LineIterator & TextArea::Text::LineIterator::operator++() {
  const char * last = m_line.text() + m_line.charLength();
-  m_line = Line(*last == 0 ? nullptr : last+1);
+  assert(UTF8Helper::CodePointIs(last, 0) || UTF8Helper::CodePointIs(last, '\n'));
+  assert(UTF8Decoder::CharSizeOfCodePoint('\n') == 1);
+  m_line = Line(UTF8Helper::CodePointIs(last, 0) ? nullptr : last + 1);
  return *this;
 }

@@ -376,14 +380,18 @@ bool TextArea::TextArea::ContentView::insertTextAtLocation(const char * text, co
  bool lineBreak = false;

  // Scan for \n and 0
-  const char * textScanner = text;
-  while (*textScanner != 0) {
-    textScanner++;
-    lineBreak |= *textScanner == '\n';
-  }
-  assert(*textScanner == 0);
-  m_text.insertText(text, textScanner - text, const_cast<char *>(location));
-  reloadRectFromPosition(location/*-1 TODO  LEA */, lineBreak);
+  const char * nullLocation = UTF8Helper::PerformAtCodePoints(
+      text, '\n',
+      [](char * codePointLocation, void * lineBreak, int indentation) {
+        *((bool *)lineBreak) = true;
+      },
+      [](char * c1, void * c2, int c3) { },
+      &lineBreak,
+      0);
+
+  assert(UTF8Helper::CodePointIs(nullLocation, 0));
+  m_text.insertText(text, nullLocation - text, const_cast<char *>(location));
+  reloadRectFromPosition(location, lineBreak);
  return true;
 }

@@ -416,7 +424,7 @@ bool TextArea::ContentView::removeStartOfLine() {
    assert(cursorLocation() == text());
    return false;
  }
-  size_t removedLine = m_text.removeRemainingLine(cursorLocation(), -1); //TODO LEA Before : cursorLocation()-1
+  size_t removedLine = m_text.removeRemainingLine(cursorLocation(), -1);
  if (removedLine > 0) {
    assert(cursorLocation() >= text() + removedLine);
    setCursorLocation(cursorLocation() - removedLine);
@@ -442,6 +450,7 @@ KDRect TextArea::ContentView::glyphFrameAtPosition(const char * position) const
    y++;
  }
  assert(found);
+  (void) found;

  return KDRect(
    x,
--- a/ion/include/ion/unicode/utf8_helper.h
+++ b/ion/include/ion/unicode/utf8_helper.h
@@ -9,16 +9,39 @@ namespace UTF8Helper {
 // Returns the number of occurences of a code point in a string
 int CountOccurrences(const char * s, CodePoint c);

-// Returns the first occurence of a code point in a string
+/* Returns the first occurence of a code point in a string, the position of the
+ * null terminating char otherwise. */
 const char * CodePointSearch(const char * s, CodePoint c);

 /* Copy src into dst while removing all code points c. Also update an index
 * that should be lower if code points where removed before it. */
 void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * indexToDUpdate = nullptr);

-// Perform an action each time a code point is found
+/* Perform actionCodePoint each time a given code point is found, and
+ * actionOtherCodePoint for other code points.
+ * goingRight tells if we are decoding towards the right or the left. If
+ * goingRight is false, initialPosition must be provided, and the action is not
+ * done for *(initial position) even if it matches c.
+ * The return value is the first address for which we did not perform an action.
+ *
+ *                                         x = actionCodePoint is performed
+ *                                         o = actionOtherCodePoint is performed
+ * Going right == true:                    s = stoppingCodePoint
+ *     o  o  o  o  x  x  o  o  x  o
+ *    |  |  |  |  |c |c |  |  |c |  |s |  |  |c |
+ *    ^start of string
+ *
+ * Going right == false:
+ *     o  o  o  o  x  x  o  o
+ *    |  |c |  |s |c |c |  |  |c |  |  |c |  |  |
+ *    ^start of string        ^initialPosition
+ *
+ * */
 typedef void (*CodePointAction)(char * codePointLocation, void * contextPointer, int contextInt);
-void PerformAtCodePoints(const char * s, CodePoint c, CodePointAction action, void * contextPointer, int contextInt);
+const char *  PerformAtCodePoints(const char * string, CodePoint c, CodePointAction actionCodePoint, CodePointAction actionOtherCodePoint, void * contextPointer, int contextInt, CodePoint stoppingCodePoint = UCodePointNull, bool goingRight = true, const char * initialPosition = nullptr);
+
+bool PreviousCodePointIs(const char * buffer, const char * location, CodePoint c);
+bool CodePointIs(const char * location, CodePoint c);

 };

--- a/ion/src/shared/unicode/utf8_helper.cpp
+++ b/ion/src/shared/unicode/utf8_helper.cpp
@@ -34,6 +34,13 @@ int CountOccurrences(const char * s, CodePoint c) {
 }

 const char * CodePointSearch(const char * s, CodePoint c) {
+  if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) {
+    const char * result = s;
+    while (*result != 0 && *result != c) {
+      result++;
+    }
+    return result;
+  }
  UTF8Decoder decoder(s);
  const char * currentPointer = s;
  CodePoint codePoint = decoder.nextCodePoint();
@@ -43,10 +50,7 @@ const char * CodePointSearch(const char * s, CodePoint c) {
    codePoint = decoder.nextCodePoint();
    nextPointer = decoder.stringPosition();
  }
-  if (codePoint == c) {
-    return currentPointer;
-  }
-  return nullptr;
+  return currentPointer;
 }

 void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * pointerToUpdate) {
@@ -74,30 +78,95 @@ void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePo
  }
 }

-void PerformAtCodePoints(const char * s, CodePoint c, CodePointAction action, void * contextPointer, int contextInt) {
-  if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) {
-    /* The code point is one char long, so it is equal to its char translation.
-     * We can do a classic char search. */
-    const char * i = s;
-    while (*i != 0) {
-      if (*i == c) {
-        action(const_cast<char *>(i), contextPointer, contextInt);
+const char * PerformAtCodePoints(const char * s, CodePoint c, CodePointAction actionCodePoint, CodePointAction actionOtherCodePoint, void * contextPointer, int contextInt, CodePoint stoppingCodePoint, bool goingRight, const char * initialPosition) {
+  /* If we are decoding towards the left, we must have a starting position. If
+   * we are decoding towards the right, the starting position is the start of
+   * string. */
+  assert((goingRight && initialPosition == nullptr)
+      || (!goingRight && initialPosition != nullptr));
+
+  if (UTF8Decoder::CharSizeOfCodePoint(c) == 1 && UTF8Decoder::CharSizeOfCodePoint(stoppingCodePoint) == 1) {
+    /* The code points are one char long, so they are equal to their char
+     * translations. We can do a classic char search. */
+    if (goingRight) {
+      const char * i = s;
+      while (*i != stoppingCodePoint && *i != 0) {
+        if (*i == c) {
+          actionCodePoint(const_cast<char *>(i), contextPointer, contextInt);
+        } else {
+          actionOtherCodePoint(const_cast<char *>(i), contextPointer, contextInt);
+        }
+        i++;
      }
-      i++;
+      return i;
    }
-  } else {
-    // The code point is more than one char long, we use a UTF8Decoder.
+    const char * i = initialPosition - 1;
+    while (i >= s && *i != stoppingCodePoint) {
+      if (*i == c) {
+        actionCodePoint(const_cast<char *>(i), contextPointer, contextInt);
+      } else {
+        actionOtherCodePoint(const_cast<char *>(i), contextPointer, contextInt);
+      }
+      i--;
+    }
+    return i;
+  }
+  // The code point is more than one char long, we use a UTF8Decoder.
+  if (goingRight) {
    UTF8Decoder decoder(s);
    const char * codePointPointer = decoder.stringPosition();
    CodePoint codePoint = decoder.nextCodePoint();
-    while (codePoint != UCodePointNull) {
+    while (codePoint != stoppingCodePoint && codePoint != UCodePointNull) {
      if (codePoint == c) {
-        action(const_cast<char *>(codePointPointer), contextPointer, contextInt);
+        actionCodePoint(const_cast<char *>(codePointPointer), contextPointer, contextInt);
+      } else {
+        actionOtherCodePoint(const_cast<char *>(codePointPointer), contextPointer, contextInt);
      }
      codePointPointer = decoder.stringPosition();
      codePoint = decoder.nextCodePoint();
    }
+    return codePointPointer;
  }
+  assert(!goingRight);
+  if (initialPosition <= s) {
+    return initialPosition;
+  }
+  UTF8Decoder decoder(s, initialPosition);
+  CodePoint codePoint = decoder.previousCodePoint();
+  const char * codePointPointer = decoder.stringPosition();
+  while (codePointPointer >= s && codePoint != stoppingCodePoint) {
+    if (codePoint == c) {
+      actionCodePoint(const_cast<char *>(codePointPointer), contextPointer, contextInt);
+    } else {
+      actionOtherCodePoint(const_cast<char *>(codePointPointer), contextPointer, contextInt);
+    }
+    if (codePointPointer > s) {
+      codePoint = decoder.previousCodePoint();
+      codePointPointer = decoder.stringPosition();
+    } else {
+      /* If the current pointer is s, we cannot continue decoding. Decreasing s
+       * will stop the while loop. */
+      codePointPointer = s-1;
+    }
+  }
+  return codePointPointer;
+}
+
+bool PreviousCodePointIs(const char * buffer, const char * location, CodePoint c) {
+  assert(location > buffer);
+  if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) {
+    return *(location -1) == c;
+  }
+  UTF8Decoder decoder(buffer, location);
+  return decoder.previousCodePoint() == c;
+}
+
+bool CodePointIs(const char * location, CodePoint c) {
+  if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) {
+    return *(location) == c;
+  }
+  UTF8Decoder decoder(location);
+  return decoder.nextCodePoint() == c;
 }

 };
--- a/poincare/src/expression.cpp
+++ b/poincare/src/expression.cpp
@@ -343,7 +343,7 @@ void Expression::SetEncounteredComplex(bool encounterComplex) {
 }

 Preferences::ComplexFormat Expression::UpdatedComplexFormatWithTextInput(Preferences::ComplexFormat complexFormat, const char * textInput) {
-  if (complexFormat == Preferences::ComplexFormat::Real && UTF8Helper::CodePointSearch(textInput, UCodePointMathematicalBoldSmallI) != nullptr) {
+  if (complexFormat == Preferences::ComplexFormat::Real && *(UTF8Helper::CodePointSearch(textInput, UCodePointMathematicalBoldSmallI)) != 0) {
    return Preferences::ComplexFormat::Cartesian;
  }
  return complexFormat;