[ion/kandinsky] Move unicode to ion

2026-01-18 16:27:34 +01:00 · 2019-01-23 11:00:33 +01:00
parent 6b6d3d7fe2
commit bbb10f1993
72 changed files with 241 additions and 226 deletions
--- a/kandinsky/Makefile
+++ b/kandinsky/Makefile
@@ -13,8 +13,6 @@ src += $(addprefix kandinsky/src/,\
  ion_context.cpp \
  point.cpp \
  rect.cpp \
-  unicode/utf8_decoder.cpp\
-  unicode/utf8_helper.cpp\
 )

 src += $(addprefix kandinsky/fonts/, \
@@ -26,7 +24,6 @@ tests += $(addprefix kandinsky/test/,\
  color.cpp\
  font.cpp\
  rect.cpp\
-  utf8_decoder.cpp\
 )

 RASTERIZER_CFLAGS := -std=c99 `pkg-config freetype2 --cflags`
--- a/kandinsky/include/kandinsky/font.h
+++ b/kandinsky/include/kandinsky/font.h
@@ -5,7 +5,7 @@
 #include <stddef.h>
 #include <kandinsky/size.h>
 #include <kandinsky/coordinate.h>
-#include <kandinsky/unicode/code_point.h>
+#include <ion/unicode/code_point.h>
 #include "palette.h"

 /* We use UTF-8 encoding. This means that a character is encoded as a code point
--- a/kandinsky/include/kandinsky/unicode/code_point.h
+++ b/kandinsky/include/kandinsky/unicode/code_point.h
@@ -1,53 +0,0 @@
-#ifndef KANDINSKY_UNICODE_CODE_POINT_H
-#define KANDINSKY_UNICODE_CODE_POINT_H
-
-#include <stdint.h>
-
-class CodePoint {
-public:
-  constexpr static int MaxCodePointCharLength = sizeof(uint32_t) / sizeof(char);
-  constexpr CodePoint(uint32_t c) : m_code(c) {}
-  constexpr operator uint32_t() const { return m_code; }
-
-  bool isCombining() const {
-    return (m_code >= 0x300 && m_code <= 0x036F);
-  }
-private:
-  uint32_t m_code;
-};
-
-
-// TODO LEA Remove unneeded values
-static constexpr CodePoint KDCodePointNull                     = 0x0;
-static constexpr CodePoint KDCodePointTabulation               = 0x9;
-static constexpr CodePoint KDCodePointLineFeed                 = 0xa;
-
-/* 0x11, 0x12, 0x13, 0x14 represent DEVICE CONTROL ONE TO FOUR. They are not
- * used, so we can use them for another purpose */
-static constexpr CodePoint KDCodePointEmpty                    = 0x11; // Used to be parsed into EmptyExpression
-static constexpr CodePoint KDCodePointLeftSuperscript          = 0x12; // Used to parse Power
-static constexpr CodePoint KDCodePointRightSuperscript         = 0x13; // Used to parse Power
-static constexpr CodePoint KDCodePointUnknownX                 = 0x14; // Used to store expressions
-
-static constexpr CodePoint KDCodePointSpace                    = 0x20;    //
-static constexpr CodePoint KDCodePointDegree                   = 0xb0;    // °
-static constexpr CodePoint KDCodePointMiddleDot                = 0xb7;    // ·
-static constexpr CodePoint KDCodePointMultiplicationSign       = 0xd7;    // ×
-static constexpr CodePoint KDCodePointGreekCapitalLetterGamma  = 0x393;   // Γ
-static constexpr CodePoint KDCodePointGreekCapitalLetterDelta  = 0x394;   // Δ
-static constexpr CodePoint KDCodePointGreekSmallLetterTheta    = 0x3b8;   // θ
-static constexpr CodePoint KDCodePointGreekSmallLetterLambda   = 0x3bb;   // λ
-static constexpr CodePoint KDCodePointGreekSmallLetterPi       = 0x3c0;   // π
-static constexpr CodePoint KDCodePointGreekSmallLetterSigma    = 0x3c3;   // σ
-static constexpr CodePoint KDCodePointLatinLetterSmallCapitalE = 0x1d07;  // ᴇ
-static constexpr CodePoint KDCodePointScriptSmallE             = 0x212f;  // ℯ
-static constexpr CodePoint KDCodePointRightwardsArrow          = 0x2192;  // →
-static constexpr CodePoint KDCodePointNArySummation            = 0x2211;  // ∑
-static constexpr CodePoint KDCodePointSquareRoot               = 0x221a;  // √
-static constexpr CodePoint KDCodePointIntegral                 = 0x222b;  // ∫
-static constexpr CodePoint KDCodePointAlmostEqualTo            = 0x2248;  // ≈
-static constexpr CodePoint KDCodePointLessThanOrEqualTo        = 0x2264;  // ≤
-static constexpr CodePoint KDCodePointGreaterThanOrEqualTo     = 0x2265;  // ≥
-static constexpr CodePoint KDCodePointMathematicalBoldSmallI   = 0x1d422; // 𝐢
-
-#endif
--- a/kandinsky/include/kandinsky/unicode/utf8_decoder.h
+++ b/kandinsky/include/kandinsky/unicode/utf8_decoder.h
@@ -1,37 +0,0 @@
-#ifndef KANDINSKY_UNICODE_UTF8_DECODER_H
-#define KANDINSKY_UNICODE_UTF8_DECODER_H
-
-#include "code_point.h"
-#include <stddef.h>
-#include <assert.h>
-
-/* UTF-8 encodes all valid code points using at most 4 bytes (= 28 bits), the
- * lowest codes being equal to ASCII codes. There are less than 2^21 different
- * UTF-8 valid code points.
- *
- * The encoding is the following:
- * For code points between ...   ->  The corresponding bits are ...
- * 0 and 7F         -> 0xxxxxxx
- * 80 and 7FF       -> 110xxxxx 10xxxxxx
- * 800 and FFFF     -> 1110xxxx 10xxxxxx 10xxxxxx
- * 10000 and 10FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
-
-class UTF8Decoder {
-public:
-  UTF8Decoder(const char * string, const char * initialPosition = nullptr) :
-    m_string(string),
-    m_stringPosition(initialPosition == nullptr ? string : initialPosition)
-  {
-    assert(m_string != nullptr);
-  }
-  CodePoint nextCodePoint();
-  CodePoint previousCodePoint();
-  const char * stringPosition() const { return m_stringPosition; }
-  static size_t CharSizeOfCodePoint(CodePoint c);
-  static size_t CodePointToChars(CodePoint c, char * buffer, int bufferSize);
-private:
-  const char * const m_string;
-  const char * m_stringPosition;
-};
-
-#endif
--- a/kandinsky/include/kandinsky/unicode/utf8_helper.h
+++ b/kandinsky/include/kandinsky/unicode/utf8_helper.h
@@ -1,17 +0,0 @@
-#ifndef KANDINSKY_UNICODE_UTF8_HELPER_H
-#define KANDINSKY_UNICODE_UTF8_HELPER_H
-
-#include "code_point.h"
-#include <stddef.h>
-
-namespace UTF8Helper {
-
-const char * CodePointSearch(const char * s, CodePoint c);
-/* CopyAndRemoveCodePoint copies src into dst while removing all code points c.
- * It also updates an index that should be lower if code points where removed
- * before it. */
-void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * indexToDUpdate = nullptr);
-
-};
-
-#endif
--- a/kandinsky/src/context_text.cpp
+++ b/kandinsky/src/context_text.cpp
@@ -1,7 +1,7 @@
 #include <assert.h>
 #include <kandinsky/context.h>
 #include <kandinsky/font.h>
-#include <kandinsky/unicode/utf8_decoder.h>
+#include <ion/unicode/utf8_decoder.h>

 constexpr static int k_tabCharacterWidth = 4;

@@ -14,11 +14,11 @@ KDPoint KDContext::drawString(const char * text, KDPoint p, const KDFont * font,

  UTF8Decoder decoder(text);
  CodePoint codePoint = decoder.nextCodePoint();
-  while (codePoint != KDCodePointNull) {
-    if (codePoint == KDCodePointLineFeed) {
+  while (codePoint != UCodePointNull) {
+    if (codePoint == UCodePointLineFeed) {
      position = KDPoint(0, position.y() + glyphSize.height());
      codePoint = decoder.nextCodePoint();
-    } else if (codePoint == KDCodePointTabulation) {
+    } else if (codePoint == UCodePointTabulation) {
      position = position.translatedBy(KDPoint(k_tabCharacterWidth * glyphSize.width(), 0));
      codePoint = decoder.nextCodePoint();
    } else {
--- a/kandinsky/src/font.cpp
+++ b/kandinsky/src/font.cpp
@@ -1,7 +1,7 @@
 #include <assert.h>
 #include <kandinsky/font.h>
 #include <ion.h>
-#include <kandinsky/unicode/utf8_decoder.h>
+#include <ion/unicode/utf8_decoder.h>

 constexpr static int k_tabCharacterWidth = 4;

@@ -14,11 +14,11 @@ KDSize KDFont::stringSizeUntil(const char * text, const char * limit) const {
  UTF8Decoder decoder(text);
  const char * currentStringPosition = decoder.stringPosition();
  CodePoint codePoint = decoder.nextCodePoint();
-  while (codePoint != KDCodePointNull && (limit == nullptr || currentStringPosition < limit)) {
+  while (codePoint != UCodePointNull && (limit == nullptr || currentStringPosition < limit)) {
    KDSize cSize = KDSize(m_glyphSize.width(), 0);
-    if (codePoint == KDCodePointLineFeed) {
+    if (codePoint == UCodePointLineFeed) {
      cSize = KDSize(0, m_glyphSize.height());
-    } else if (codePoint == KDCodePointTabulation) {
+    } else if (codePoint == UCodePointTabulation) {
      cSize = KDSize(k_tabCharacterWidth * m_glyphSize.width(), 0);
    } else if (codePoint.isCombining()) {
      cSize = KDSizeZero;
--- a/kandinsky/src/unicode/utf8_decoder.cpp
+++ b/kandinsky/src/unicode/utf8_decoder.cpp
@@ -1,91 +0,0 @@
-#include <kandinsky/unicode/utf8_decoder.h>
-#include <assert.h>
-
-static inline int leading_ones(uint8_t value) {
-  for (int i=0; i<8; i++) {
-    if (!(value & 0x80)) {
-      return i;
-    }
-    value = value << 1;
-  }
-  assert(false);
-}
-
-static inline uint8_t last_k_bits(uint8_t value, uint8_t bits) {
-  return (value & ((1<<bits)-1));
-}
-
-CodePoint UTF8Decoder::nextCodePoint() {
-  assert(m_stringPosition == m_stringPosition || *(m_stringPosition - 1) != 0);
-  int leadingOnes = leading_ones(*m_stringPosition);
-  uint32_t result = last_k_bits(*m_stringPosition++, 8-leadingOnes-1);
-  for (int i = 0; i < leadingOnes - 1; i++) {
-    result <<= 6;
-    result += (*m_stringPosition++ & 0x3F);
-  }
-  return CodePoint(result);
-}
-
-CodePoint UTF8Decoder::previousCodePoint() {
-  assert(m_stringPosition > m_string);
-  if (leading_ones(*(m_stringPosition - 1)) == 0) {
-    // The current code point is one char long
-    m_stringPosition--;
-    return *m_stringPosition;
-  }
-  // The current code point spans over multiple chars
-  uint32_t result = 0;
-  int i = 0;
-  int leadingOnes = 1;
-  m_stringPosition--;
-  assert(leading_ones(*m_stringPosition) == 1);
-  while (leadingOnes == 1) {
-    assert(m_stringPosition > m_string);
-    result += (*m_stringPosition & 0x3F) << (6 * i);
-    i++;
-    m_stringPosition--;
-    leadingOnes = leading_ones(*m_stringPosition);
-  }
-
-  assert(i <= 3);
-  assert(leadingOnes > 1 && leadingOnes <= 4);
-  assert(m_stringPosition >= m_string);
-
-  result+= last_k_bits(*m_stringPosition, 8-leadingOnes-1);
-  return CodePoint(result);
-}
-
-size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) {
-  constexpr int bufferSize = CodePoint::MaxCodePointCharLength;
-  char buffer[bufferSize];
-  return CodePointToChars(c, buffer, bufferSize);
-}
-
-size_t UTF8Decoder::CodePointToChars(CodePoint c, char * buffer, int bufferSize) {
-  if (bufferSize <= 0) {
-    return 0;
-  }
-  size_t i = 0;
-  if (c <= 0x7F) {
-    buffer[i++] = c;
-  } else if (c <= 0x7FF) {
-    buffer[i++] = 0b11000000 | (c >> 6);
-    if (bufferSize <= i) { return i; }
-    buffer[i++] = 0b10000000 | (c & 0b111111);
-  } else if (c <= 0xFFFF) {
-    buffer[i++] = 0b11100000 | (c >> 12);
-    if (bufferSize <= i) { return i; }
-    buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
-    if (bufferSize <= i) { return i; }
-    buffer[i++] = 0b10000000 | (c & 0b111111);
-  } else {
-    buffer[i++] = 0b11110000 | (c >> 18);
-    if (bufferSize <= i) { return i; }
-    buffer[i++] = 0b10000000 | ((c >> 12) & 0b111111);
-    if (bufferSize <= i) { return i; }
-    buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
-    if (bufferSize <= i) { return i; }
-    buffer[i++] = 0b10000000 | (c & 0b111111);
-  }
-  return i;
-}
--- a/kandinsky/src/unicode/utf8_helper.cpp
+++ b/kandinsky/src/unicode/utf8_helper.cpp
@@ -1,51 +0,0 @@
-#include <kandinsky/unicode/utf8_helper.h>
-#include <kandinsky/unicode/utf8_decoder.h>
-#include <string.h>
-#include <assert.h>
-
-namespace UTF8Helper {
-
-static inline int min(int x, int y) { return x < y ? x : y; }
-
-const char * CodePointSearch(const char * s, CodePoint c) {
-  UTF8Decoder decoder(s);
-  const char * currentPointer = s;
-  CodePoint codePoint = decoder.nextCodePoint();
-  const char * nextPointer = decoder.stringPosition();
-  while (codePoint != KDCodePointNull && codePoint != c) {
-    currentPointer = nextPointer;
-    codePoint = decoder.nextCodePoint();
-    nextPointer = decoder.stringPosition();
-  }
-  if (codePoint == c) {
-    return currentPointer;
-  }
-  return nullptr;
-}
-
-void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * pointerToUpdate) {
-  UTF8Decoder decoder(src);
-  const char * currentPointer = src;
-  const char * maxPointer = src + strlen(src) + 1;
-  CodePoint codePoint = decoder.nextCodePoint();
-  const char * nextPointer = decoder.stringPosition();
-  size_t bufferIndex = 0;
-  size_t codePointCharSize = UTF8Decoder::CharSizeOfCodePoint(c);
-
-  // Remove CodePoint c
-  while (currentPointer < maxPointer && bufferIndex < dstSize) {
-    if (codePoint != c) {
-      int copySize = min(nextPointer - currentPointer, dstSize - bufferIndex);
-      memcpy(dst + bufferIndex, currentPointer, copySize);
-      bufferIndex+= copySize;
-    } else if (pointerToUpdate != nullptr && currentPointer < *pointerToUpdate) {
-      assert(*pointerToUpdate - src >= codePointCharSize);
-      *pointerToUpdate = *pointerToUpdate - codePointCharSize;
-    }
-    currentPointer = nextPointer;
-    codePoint = decoder.nextCodePoint();
-    nextPointer = decoder.stringPosition();
-  }
-}
-
-};