[escher] Fix text inputs so they use UTF8

2026-01-18 16:27:34 +01:00 · 2019-01-18 16:41:39 +01:00
parent 242bcda631
commit 5142c071df
24 changed files with 510 additions and 360 deletions
--- a/kandinsky/include/kandinsky/font.h
+++ b/kandinsky/include/kandinsky/font.h
@@ -32,7 +32,10 @@ public:
  static constexpr const KDFont * LargeFont = &privateLargeFont;
  static constexpr const KDFont * SmallFont = &privateSmallFont;

-  KDSize stringSize(const char * text) const;
+  KDSize stringSize(const char * text) const {
+    return stringSizeUntil(text, nullptr);
+  }
+  KDSize stringSizeUntil(const char * text, const char * limit) const;

  union GlyphBuffer {
  public:
--- a/kandinsky/include/kandinsky/unicode/utf8_decoder.h
+++ b/kandinsky/include/kandinsky/unicode/utf8_decoder.h
@@ -1,8 +1,9 @@
 #ifndef KANDINSKY_UNICODE_UTF8_DECODER_H
 #define KANDINSKY_UNICODE_UTF8_DECODER_H

-#include <stddef.h>
 #include "code_point.h"
+#include <stddef.h>
+#include <assert.h>

 /* UTF-8 encodes all valid code points using at most 4 bytes (= 28 bits), the
 * lowest codes being equal to ASCII codes. There are less than 2^21 different
@@ -17,13 +18,20 @@

 class UTF8Decoder {
 public:
-  UTF8Decoder(const char * string) : m_string(string) {}
+  UTF8Decoder(const char * string, const char * initialPosition = nullptr) :
+    m_string(string),
+    m_stringPosition(initialPosition == nullptr ? string : initialPosition)
+  {
+    assert(m_string != nullptr);
+  }
  CodePoint nextCodePoint();
-  const char * stringPosition() const { return m_string; }
+  CodePoint previousCodePoint();
+  const char * stringPosition() const { return m_stringPosition; }
  static size_t CharSizeOfCodePoint(CodePoint c);
  static size_t CodePointToChars(CodePoint c, char * buffer, int bufferSize);
 private:
-  const char * m_string;
+  const char * const m_string;
+  const char * m_stringPosition;
 };

 #endif
--- a/kandinsky/include/kandinsky/unicode/utf8_helper.h
+++ b/kandinsky/include/kandinsky/unicode/utf8_helper.h
@@ -10,7 +10,7 @@ const char * CodePointSearch(const char * s, CodePoint c);
 /* CopyAndRemoveCodePoint copies src into dst while removing all code points c.
 * It also updates an index that should be lower if code points where removed
 * before it. */
-void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, size_t * indexToDUpdate = nullptr);
+void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * indexToDUpdate = nullptr);

 };

--- a/kandinsky/src/font.cpp
+++ b/kandinsky/src/font.cpp
@@ -5,25 +5,26 @@

 constexpr static int k_tabCharacterWidth = 4;

-KDSize KDFont::stringSize(const char * text) const {
+KDSize KDFont::stringSizeUntil(const char * text, const char * limit) const {
  if (text == nullptr) {
    return KDSizeZero;
  }
  KDSize stringSize = KDSize(0, m_glyphSize.height());

  UTF8Decoder decoder(text);
+  const char * currentStringPosition = decoder.stringPosition();
  CodePoint codePoint = decoder.nextCodePoint();
-  while (codePoint != KDCodePointNull) {
+  while (codePoint != KDCodePointNull && (limit == nullptr || currentStringPosition < limit)) {
    KDSize cSize = KDSize(m_glyphSize.width(), 0);
    if (codePoint == KDCodePointLineFeed) {
      cSize = KDSize(0, m_glyphSize.height());
-      codePoint = decoder.nextCodePoint();
    } else if (codePoint == KDCodePointTabulation) {
-      cSize = KDSize(k_tabCharacterWidth*m_glyphSize.width(), 0);
+      cSize = KDSize(k_tabCharacterWidth * m_glyphSize.width(), 0);
    } else if (codePoint.isCombining()) {
      cSize = KDSizeZero;
    }
-    stringSize = KDSize(stringSize.width()+cSize.width(), stringSize.height()+cSize.height());
+    stringSize = KDSize(stringSize.width() + cSize.width(), stringSize.height() + cSize.height());
+    currentStringPosition = decoder.stringPosition();
    codePoint = decoder.nextCodePoint();
  }
  return stringSize;
--- a/kandinsky/src/unicode/utf8_decoder.cpp
+++ b/kandinsky/src/unicode/utf8_decoder.cpp
@@ -16,15 +16,45 @@ static inline uint8_t last_k_bits(uint8_t value, uint8_t bits) {
 }

 CodePoint UTF8Decoder::nextCodePoint() {
-  int leadingOnes = leading_ones(*m_string);
-  uint32_t result = last_k_bits(*m_string++, 8-leadingOnes-1);
-  for (int i=0; i<(leadingOnes-1); i++) {
+  assert(m_stringPosition == m_stringPosition || *(m_stringPosition - 1) != 0);
+  int leadingOnes = leading_ones(*m_stringPosition);
+  uint32_t result = last_k_bits(*m_stringPosition++, 8-leadingOnes-1);
+  for (int i = 0; i < leadingOnes - 1; i++) {
    result <<= 6;
-    result += (*m_string++ & 0x3F);
+    result += (*m_stringPosition++ & 0x3F);
  }
  return CodePoint(result);
 }

+CodePoint UTF8Decoder::previousCodePoint() {
+  assert(m_stringPosition > m_string);
+  if (leading_ones(*(m_stringPosition - 1)) == 0) {
+    // The current code point is one char long
+    m_stringPosition--;
+    return *m_stringPosition;
+  }
+  // The current code point spans over multiple chars
+  uint32_t result = 0;
+  int i = 0;
+  int leadingOnes = 1;
+  m_stringPosition--;
+  assert(leading_ones(*m_stringPosition) == 1);
+  while (leadingOnes == 1) {
+    assert(m_stringPosition > m_string);
+    result += (*m_stringPosition & 0x3F) << (6 * i);
+    i++;
+    m_stringPosition--;
+    leadingOnes = leading_ones(*m_stringPosition);
+  }
+
+  assert(i <= 3);
+  assert(leadingOnes > 1 && leadingOnes <= 4);
+  assert(m_stringPosition >= m_string);
+
+  result+= last_k_bits(*m_stringPosition, 8-leadingOnes-1);
+  return CodePoint(result);
+}
+
 size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) {
  constexpr int bufferSize = CodePoint::MaxCodePointCharLength;
  char buffer[bufferSize];
--- a/kandinsky/src/unicode/utf8_helper.cpp
+++ b/kandinsky/src/unicode/utf8_helper.cpp
@@ -23,7 +23,7 @@ const char * CodePointSearch(const char * s, CodePoint c) {
  return nullptr;
 }

-void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, size_t * indexToUpdate) {
+void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * pointerToUpdate) {
  UTF8Decoder decoder(src);
  const char * currentPointer = src;
  const char * maxPointer = src + strlen(src) + 1;
@@ -38,9 +38,9 @@ void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePo
      int copySize = min(nextPointer - currentPointer, dstSize - bufferIndex);
      memcpy(dst + bufferIndex, currentPointer, copySize);
      bufferIndex+= copySize;
-    } else if (indexToUpdate != nullptr && currentPointer - src < *indexToUpdate) {
-      assert(*indexToUpdate >= codePointCharSize);
-      *indexToUpdate-= codePointCharSize;
+    } else if (pointerToUpdate != nullptr && currentPointer < *pointerToUpdate) {
+      assert(*pointerToUpdate - src >= codePointCharSize);
+      *pointerToUpdate = *pointerToUpdate - codePointCharSize;
    }
    currentPointer = nextPointer;
    codePoint = decoder.nextCodePoint();