mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-01-18 16:27:34 +01:00
[ion/kandinsky] Move unicode to ion
This commit is contained in:
@@ -13,8 +13,6 @@ src += $(addprefix kandinsky/src/,\
|
||||
ion_context.cpp \
|
||||
point.cpp \
|
||||
rect.cpp \
|
||||
unicode/utf8_decoder.cpp\
|
||||
unicode/utf8_helper.cpp\
|
||||
)
|
||||
|
||||
src += $(addprefix kandinsky/fonts/, \
|
||||
@@ -26,7 +24,6 @@ tests += $(addprefix kandinsky/test/,\
|
||||
color.cpp\
|
||||
font.cpp\
|
||||
rect.cpp\
|
||||
utf8_decoder.cpp\
|
||||
)
|
||||
|
||||
RASTERIZER_CFLAGS := -std=c99 `pkg-config freetype2 --cflags`
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
#include <stddef.h>
|
||||
#include <kandinsky/size.h>
|
||||
#include <kandinsky/coordinate.h>
|
||||
#include <kandinsky/unicode/code_point.h>
|
||||
#include <ion/unicode/code_point.h>
|
||||
#include "palette.h"
|
||||
|
||||
/* We use UTF-8 encoding. This means that a character is encoded as a code point
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
#ifndef KANDINSKY_UNICODE_CODE_POINT_H
|
||||
#define KANDINSKY_UNICODE_CODE_POINT_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
class CodePoint {
|
||||
public:
|
||||
constexpr static int MaxCodePointCharLength = sizeof(uint32_t) / sizeof(char);
|
||||
constexpr CodePoint(uint32_t c) : m_code(c) {}
|
||||
constexpr operator uint32_t() const { return m_code; }
|
||||
|
||||
bool isCombining() const {
|
||||
return (m_code >= 0x300 && m_code <= 0x036F);
|
||||
}
|
||||
private:
|
||||
uint32_t m_code;
|
||||
};
|
||||
|
||||
|
||||
// TODO LEA Remove unneeded values
|
||||
static constexpr CodePoint KDCodePointNull = 0x0;
|
||||
static constexpr CodePoint KDCodePointTabulation = 0x9;
|
||||
static constexpr CodePoint KDCodePointLineFeed = 0xa;
|
||||
|
||||
/* 0x11, 0x12, 0x13, 0x14 represent DEVICE CONTROL ONE TO FOUR. They are not
|
||||
* used, so we can use them for another purpose */
|
||||
static constexpr CodePoint KDCodePointEmpty = 0x11; // Used to be parsed into EmptyExpression
|
||||
static constexpr CodePoint KDCodePointLeftSuperscript = 0x12; // Used to parse Power
|
||||
static constexpr CodePoint KDCodePointRightSuperscript = 0x13; // Used to parse Power
|
||||
static constexpr CodePoint KDCodePointUnknownX = 0x14; // Used to store expressions
|
||||
|
||||
static constexpr CodePoint KDCodePointSpace = 0x20; //
|
||||
static constexpr CodePoint KDCodePointDegree = 0xb0; // °
|
||||
static constexpr CodePoint KDCodePointMiddleDot = 0xb7; // ·
|
||||
static constexpr CodePoint KDCodePointMultiplicationSign = 0xd7; // ×
|
||||
static constexpr CodePoint KDCodePointGreekCapitalLetterGamma = 0x393; // Γ
|
||||
static constexpr CodePoint KDCodePointGreekCapitalLetterDelta = 0x394; // Δ
|
||||
static constexpr CodePoint KDCodePointGreekSmallLetterTheta = 0x3b8; // θ
|
||||
static constexpr CodePoint KDCodePointGreekSmallLetterLambda = 0x3bb; // λ
|
||||
static constexpr CodePoint KDCodePointGreekSmallLetterPi = 0x3c0; // π
|
||||
static constexpr CodePoint KDCodePointGreekSmallLetterSigma = 0x3c3; // σ
|
||||
static constexpr CodePoint KDCodePointLatinLetterSmallCapitalE = 0x1d07; // ᴇ
|
||||
static constexpr CodePoint KDCodePointScriptSmallE = 0x212f; // ℯ
|
||||
static constexpr CodePoint KDCodePointRightwardsArrow = 0x2192; // →
|
||||
static constexpr CodePoint KDCodePointNArySummation = 0x2211; // ∑
|
||||
static constexpr CodePoint KDCodePointSquareRoot = 0x221a; // √
|
||||
static constexpr CodePoint KDCodePointIntegral = 0x222b; // ∫
|
||||
static constexpr CodePoint KDCodePointAlmostEqualTo = 0x2248; // ≈
|
||||
static constexpr CodePoint KDCodePointLessThanOrEqualTo = 0x2264; // ≤
|
||||
static constexpr CodePoint KDCodePointGreaterThanOrEqualTo = 0x2265; // ≥
|
||||
static constexpr CodePoint KDCodePointMathematicalBoldSmallI = 0x1d422; // 𝐢
|
||||
|
||||
#endif
|
||||
@@ -1,37 +0,0 @@
|
||||
#ifndef KANDINSKY_UNICODE_UTF8_DECODER_H
|
||||
#define KANDINSKY_UNICODE_UTF8_DECODER_H
|
||||
|
||||
#include "code_point.h"
|
||||
#include <stddef.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* UTF-8 encodes all valid code points using at most 4 bytes (= 28 bits), the
|
||||
* lowest codes being equal to ASCII codes. There are less than 2^21 different
|
||||
* UTF-8 valid code points.
|
||||
*
|
||||
* The encoding is the following:
|
||||
* For code points between ... -> The corresponding bits are ...
|
||||
* 0 and 7F -> 0xxxxxxx
|
||||
* 80 and 7FF -> 110xxxxx 10xxxxxx
|
||||
* 800 and FFFF -> 1110xxxx 10xxxxxx 10xxxxxx
|
||||
* 10000 and 10FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||
|
||||
class UTF8Decoder {
|
||||
public:
|
||||
UTF8Decoder(const char * string, const char * initialPosition = nullptr) :
|
||||
m_string(string),
|
||||
m_stringPosition(initialPosition == nullptr ? string : initialPosition)
|
||||
{
|
||||
assert(m_string != nullptr);
|
||||
}
|
||||
CodePoint nextCodePoint();
|
||||
CodePoint previousCodePoint();
|
||||
const char * stringPosition() const { return m_stringPosition; }
|
||||
static size_t CharSizeOfCodePoint(CodePoint c);
|
||||
static size_t CodePointToChars(CodePoint c, char * buffer, int bufferSize);
|
||||
private:
|
||||
const char * const m_string;
|
||||
const char * m_stringPosition;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,17 +0,0 @@
|
||||
#ifndef KANDINSKY_UNICODE_UTF8_HELPER_H
|
||||
#define KANDINSKY_UNICODE_UTF8_HELPER_H
|
||||
|
||||
#include "code_point.h"
|
||||
#include <stddef.h>
|
||||
|
||||
namespace UTF8Helper {
|
||||
|
||||
const char * CodePointSearch(const char * s, CodePoint c);
|
||||
/* CopyAndRemoveCodePoint copies src into dst while removing all code points c.
|
||||
* It also updates an index that should be lower if code points where removed
|
||||
* before it. */
|
||||
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * indexToDUpdate = nullptr);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,7 +1,7 @@
|
||||
#include <assert.h>
|
||||
#include <kandinsky/context.h>
|
||||
#include <kandinsky/font.h>
|
||||
#include <kandinsky/unicode/utf8_decoder.h>
|
||||
#include <ion/unicode/utf8_decoder.h>
|
||||
|
||||
constexpr static int k_tabCharacterWidth = 4;
|
||||
|
||||
@@ -14,11 +14,11 @@ KDPoint KDContext::drawString(const char * text, KDPoint p, const KDFont * font,
|
||||
|
||||
UTF8Decoder decoder(text);
|
||||
CodePoint codePoint = decoder.nextCodePoint();
|
||||
while (codePoint != KDCodePointNull) {
|
||||
if (codePoint == KDCodePointLineFeed) {
|
||||
while (codePoint != UCodePointNull) {
|
||||
if (codePoint == UCodePointLineFeed) {
|
||||
position = KDPoint(0, position.y() + glyphSize.height());
|
||||
codePoint = decoder.nextCodePoint();
|
||||
} else if (codePoint == KDCodePointTabulation) {
|
||||
} else if (codePoint == UCodePointTabulation) {
|
||||
position = position.translatedBy(KDPoint(k_tabCharacterWidth * glyphSize.width(), 0));
|
||||
codePoint = decoder.nextCodePoint();
|
||||
} else {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include <assert.h>
|
||||
#include <kandinsky/font.h>
|
||||
#include <ion.h>
|
||||
#include <kandinsky/unicode/utf8_decoder.h>
|
||||
#include <ion/unicode/utf8_decoder.h>
|
||||
|
||||
constexpr static int k_tabCharacterWidth = 4;
|
||||
|
||||
@@ -14,11 +14,11 @@ KDSize KDFont::stringSizeUntil(const char * text, const char * limit) const {
|
||||
UTF8Decoder decoder(text);
|
||||
const char * currentStringPosition = decoder.stringPosition();
|
||||
CodePoint codePoint = decoder.nextCodePoint();
|
||||
while (codePoint != KDCodePointNull && (limit == nullptr || currentStringPosition < limit)) {
|
||||
while (codePoint != UCodePointNull && (limit == nullptr || currentStringPosition < limit)) {
|
||||
KDSize cSize = KDSize(m_glyphSize.width(), 0);
|
||||
if (codePoint == KDCodePointLineFeed) {
|
||||
if (codePoint == UCodePointLineFeed) {
|
||||
cSize = KDSize(0, m_glyphSize.height());
|
||||
} else if (codePoint == KDCodePointTabulation) {
|
||||
} else if (codePoint == UCodePointTabulation) {
|
||||
cSize = KDSize(k_tabCharacterWidth * m_glyphSize.width(), 0);
|
||||
} else if (codePoint.isCombining()) {
|
||||
cSize = KDSizeZero;
|
||||
|
||||
@@ -1,91 +0,0 @@
|
||||
#include <kandinsky/unicode/utf8_decoder.h>
|
||||
#include <assert.h>
|
||||
|
||||
static inline int leading_ones(uint8_t value) {
|
||||
for (int i=0; i<8; i++) {
|
||||
if (!(value & 0x80)) {
|
||||
return i;
|
||||
}
|
||||
value = value << 1;
|
||||
}
|
||||
assert(false);
|
||||
}
|
||||
|
||||
static inline uint8_t last_k_bits(uint8_t value, uint8_t bits) {
|
||||
return (value & ((1<<bits)-1));
|
||||
}
|
||||
|
||||
CodePoint UTF8Decoder::nextCodePoint() {
|
||||
assert(m_stringPosition == m_stringPosition || *(m_stringPosition - 1) != 0);
|
||||
int leadingOnes = leading_ones(*m_stringPosition);
|
||||
uint32_t result = last_k_bits(*m_stringPosition++, 8-leadingOnes-1);
|
||||
for (int i = 0; i < leadingOnes - 1; i++) {
|
||||
result <<= 6;
|
||||
result += (*m_stringPosition++ & 0x3F);
|
||||
}
|
||||
return CodePoint(result);
|
||||
}
|
||||
|
||||
CodePoint UTF8Decoder::previousCodePoint() {
|
||||
assert(m_stringPosition > m_string);
|
||||
if (leading_ones(*(m_stringPosition - 1)) == 0) {
|
||||
// The current code point is one char long
|
||||
m_stringPosition--;
|
||||
return *m_stringPosition;
|
||||
}
|
||||
// The current code point spans over multiple chars
|
||||
uint32_t result = 0;
|
||||
int i = 0;
|
||||
int leadingOnes = 1;
|
||||
m_stringPosition--;
|
||||
assert(leading_ones(*m_stringPosition) == 1);
|
||||
while (leadingOnes == 1) {
|
||||
assert(m_stringPosition > m_string);
|
||||
result += (*m_stringPosition & 0x3F) << (6 * i);
|
||||
i++;
|
||||
m_stringPosition--;
|
||||
leadingOnes = leading_ones(*m_stringPosition);
|
||||
}
|
||||
|
||||
assert(i <= 3);
|
||||
assert(leadingOnes > 1 && leadingOnes <= 4);
|
||||
assert(m_stringPosition >= m_string);
|
||||
|
||||
result+= last_k_bits(*m_stringPosition, 8-leadingOnes-1);
|
||||
return CodePoint(result);
|
||||
}
|
||||
|
||||
size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) {
|
||||
constexpr int bufferSize = CodePoint::MaxCodePointCharLength;
|
||||
char buffer[bufferSize];
|
||||
return CodePointToChars(c, buffer, bufferSize);
|
||||
}
|
||||
|
||||
size_t UTF8Decoder::CodePointToChars(CodePoint c, char * buffer, int bufferSize) {
|
||||
if (bufferSize <= 0) {
|
||||
return 0;
|
||||
}
|
||||
size_t i = 0;
|
||||
if (c <= 0x7F) {
|
||||
buffer[i++] = c;
|
||||
} else if (c <= 0x7FF) {
|
||||
buffer[i++] = 0b11000000 | (c >> 6);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | (c & 0b111111);
|
||||
} else if (c <= 0xFFFF) {
|
||||
buffer[i++] = 0b11100000 | (c >> 12);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | (c & 0b111111);
|
||||
} else {
|
||||
buffer[i++] = 0b11110000 | (c >> 18);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | ((c >> 12) & 0b111111);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | (c & 0b111111);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
#include <kandinsky/unicode/utf8_helper.h>
|
||||
#include <kandinsky/unicode/utf8_decoder.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
namespace UTF8Helper {
|
||||
|
||||
static inline int min(int x, int y) { return x < y ? x : y; }
|
||||
|
||||
const char * CodePointSearch(const char * s, CodePoint c) {
|
||||
UTF8Decoder decoder(s);
|
||||
const char * currentPointer = s;
|
||||
CodePoint codePoint = decoder.nextCodePoint();
|
||||
const char * nextPointer = decoder.stringPosition();
|
||||
while (codePoint != KDCodePointNull && codePoint != c) {
|
||||
currentPointer = nextPointer;
|
||||
codePoint = decoder.nextCodePoint();
|
||||
nextPointer = decoder.stringPosition();
|
||||
}
|
||||
if (codePoint == c) {
|
||||
return currentPointer;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * pointerToUpdate) {
|
||||
UTF8Decoder decoder(src);
|
||||
const char * currentPointer = src;
|
||||
const char * maxPointer = src + strlen(src) + 1;
|
||||
CodePoint codePoint = decoder.nextCodePoint();
|
||||
const char * nextPointer = decoder.stringPosition();
|
||||
size_t bufferIndex = 0;
|
||||
size_t codePointCharSize = UTF8Decoder::CharSizeOfCodePoint(c);
|
||||
|
||||
// Remove CodePoint c
|
||||
while (currentPointer < maxPointer && bufferIndex < dstSize) {
|
||||
if (codePoint != c) {
|
||||
int copySize = min(nextPointer - currentPointer, dstSize - bufferIndex);
|
||||
memcpy(dst + bufferIndex, currentPointer, copySize);
|
||||
bufferIndex+= copySize;
|
||||
} else if (pointerToUpdate != nullptr && currentPointer < *pointerToUpdate) {
|
||||
assert(*pointerToUpdate - src >= codePointCharSize);
|
||||
*pointerToUpdate = *pointerToUpdate - codePointCharSize;
|
||||
}
|
||||
currentPointer = nextPointer;
|
||||
codePoint = decoder.nextCodePoint();
|
||||
nextPointer = decoder.stringPosition();
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
Reference in New Issue
Block a user