[ion/kandinsky] Move unicode to ion

This commit is contained in:
Léa Saviot
2019-01-23 11:00:33 +01:00
committed by Émilie Feral
parent 519e12fd26
commit b6eb663f53
72 changed files with 241 additions and 226 deletions

View File

@@ -13,8 +13,6 @@ src += $(addprefix kandinsky/src/,\
ion_context.cpp \
point.cpp \
rect.cpp \
unicode/utf8_decoder.cpp\
unicode/utf8_helper.cpp\
)
src += $(addprefix kandinsky/fonts/, \
@@ -26,7 +24,6 @@ tests += $(addprefix kandinsky/test/,\
color.cpp\
font.cpp\
rect.cpp\
utf8_decoder.cpp\
)
RASTERIZER_CFLAGS := -std=c99 `pkg-config freetype2 --cflags`

View File

@@ -5,7 +5,7 @@
#include <stddef.h>
#include <kandinsky/size.h>
#include <kandinsky/coordinate.h>
#include <kandinsky/unicode/code_point.h>
#include <ion/unicode/code_point.h>
#include "palette.h"
/* We use UTF-8 encoding. This means that a character is encoded as a code point

View File

@@ -1,53 +0,0 @@
#ifndef KANDINSKY_UNICODE_CODE_POINT_H
#define KANDINSKY_UNICODE_CODE_POINT_H
#include <stdint.h>
class CodePoint {
public:
constexpr static int MaxCodePointCharLength = sizeof(uint32_t) / sizeof(char);
constexpr CodePoint(uint32_t c) : m_code(c) {}
constexpr operator uint32_t() const { return m_code; }
bool isCombining() const {
return (m_code >= 0x300 && m_code <= 0x036F);
}
private:
uint32_t m_code;
};
// TODO LEA Remove unneeded values
static constexpr CodePoint KDCodePointNull = 0x0;
static constexpr CodePoint KDCodePointTabulation = 0x9;
static constexpr CodePoint KDCodePointLineFeed = 0xa;
/* 0x11, 0x12, 0x13, 0x14 represent DEVICE CONTROL ONE TO FOUR. They are not
* used, so we can use them for another purpose */
static constexpr CodePoint KDCodePointEmpty = 0x11; // Used to be parsed into EmptyExpression
static constexpr CodePoint KDCodePointLeftSuperscript = 0x12; // Used to parse Power
static constexpr CodePoint KDCodePointRightSuperscript = 0x13; // Used to parse Power
static constexpr CodePoint KDCodePointUnknownX = 0x14; // Used to store expressions
static constexpr CodePoint KDCodePointSpace = 0x20; //
static constexpr CodePoint KDCodePointDegree = 0xb0; // °
static constexpr CodePoint KDCodePointMiddleDot = 0xb7; // ·
static constexpr CodePoint KDCodePointMultiplicationSign = 0xd7; // ×
static constexpr CodePoint KDCodePointGreekCapitalLetterGamma = 0x393; // Γ
static constexpr CodePoint KDCodePointGreekCapitalLetterDelta = 0x394; // Δ
static constexpr CodePoint KDCodePointGreekSmallLetterTheta = 0x3b8; // θ
static constexpr CodePoint KDCodePointGreekSmallLetterLambda = 0x3bb; // λ
static constexpr CodePoint KDCodePointGreekSmallLetterPi = 0x3c0; // π
static constexpr CodePoint KDCodePointGreekSmallLetterSigma = 0x3c3; // σ
static constexpr CodePoint KDCodePointLatinLetterSmallCapitalE = 0x1d07; // ᴇ
static constexpr CodePoint KDCodePointScriptSmallE = 0x212f; //
static constexpr CodePoint KDCodePointRightwardsArrow = 0x2192; // →
static constexpr CodePoint KDCodePointNArySummation = 0x2211; // ∑
static constexpr CodePoint KDCodePointSquareRoot = 0x221a; // √
static constexpr CodePoint KDCodePointIntegral = 0x222b; // ∫
static constexpr CodePoint KDCodePointAlmostEqualTo = 0x2248; // ≈
static constexpr CodePoint KDCodePointLessThanOrEqualTo = 0x2264; // ≤
static constexpr CodePoint KDCodePointGreaterThanOrEqualTo = 0x2265; // ≥
static constexpr CodePoint KDCodePointMathematicalBoldSmallI = 0x1d422; // 𝐢
#endif

View File

@@ -1,37 +0,0 @@
#ifndef KANDINSKY_UNICODE_UTF8_DECODER_H
#define KANDINSKY_UNICODE_UTF8_DECODER_H
#include "code_point.h"
#include <stddef.h>
#include <assert.h>
/* UTF-8 encodes all valid code points using at most 4 bytes (= 28 bits), the
* lowest codes being equal to ASCII codes. There are less than 2^21 different
* UTF-8 valid code points.
*
* The encoding is the following:
* For code points between ... -> The corresponding bits are ...
* 0 and 7F -> 0xxxxxxx
* 80 and 7FF -> 110xxxxx 10xxxxxx
* 800 and FFFF -> 1110xxxx 10xxxxxx 10xxxxxx
* 10000 and 10FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
class UTF8Decoder {
public:
UTF8Decoder(const char * string, const char * initialPosition = nullptr) :
m_string(string),
m_stringPosition(initialPosition == nullptr ? string : initialPosition)
{
assert(m_string != nullptr);
}
CodePoint nextCodePoint();
CodePoint previousCodePoint();
const char * stringPosition() const { return m_stringPosition; }
static size_t CharSizeOfCodePoint(CodePoint c);
static size_t CodePointToChars(CodePoint c, char * buffer, int bufferSize);
private:
const char * const m_string;
const char * m_stringPosition;
};
#endif

View File

@@ -1,17 +0,0 @@
#ifndef KANDINSKY_UNICODE_UTF8_HELPER_H
#define KANDINSKY_UNICODE_UTF8_HELPER_H
#include "code_point.h"
#include <stddef.h>
namespace UTF8Helper {
const char * CodePointSearch(const char * s, CodePoint c);
/* CopyAndRemoveCodePoint copies src into dst while removing all code points c.
* It also updates an index that should be lower if code points where removed
* before it. */
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * indexToDUpdate = nullptr);
};
#endif

View File

@@ -1,7 +1,7 @@
#include <assert.h>
#include <kandinsky/context.h>
#include <kandinsky/font.h>
#include <kandinsky/unicode/utf8_decoder.h>
#include <ion/unicode/utf8_decoder.h>
constexpr static int k_tabCharacterWidth = 4;
@@ -14,11 +14,11 @@ KDPoint KDContext::drawString(const char * text, KDPoint p, const KDFont * font,
UTF8Decoder decoder(text);
CodePoint codePoint = decoder.nextCodePoint();
while (codePoint != KDCodePointNull) {
if (codePoint == KDCodePointLineFeed) {
while (codePoint != UCodePointNull) {
if (codePoint == UCodePointLineFeed) {
position = KDPoint(0, position.y() + glyphSize.height());
codePoint = decoder.nextCodePoint();
} else if (codePoint == KDCodePointTabulation) {
} else if (codePoint == UCodePointTabulation) {
position = position.translatedBy(KDPoint(k_tabCharacterWidth * glyphSize.width(), 0));
codePoint = decoder.nextCodePoint();
} else {

View File

@@ -1,7 +1,7 @@
#include <assert.h>
#include <kandinsky/font.h>
#include <ion.h>
#include <kandinsky/unicode/utf8_decoder.h>
#include <ion/unicode/utf8_decoder.h>
constexpr static int k_tabCharacterWidth = 4;
@@ -14,11 +14,11 @@ KDSize KDFont::stringSizeUntil(const char * text, const char * limit) const {
UTF8Decoder decoder(text);
const char * currentStringPosition = decoder.stringPosition();
CodePoint codePoint = decoder.nextCodePoint();
while (codePoint != KDCodePointNull && (limit == nullptr || currentStringPosition < limit)) {
while (codePoint != UCodePointNull && (limit == nullptr || currentStringPosition < limit)) {
KDSize cSize = KDSize(m_glyphSize.width(), 0);
if (codePoint == KDCodePointLineFeed) {
if (codePoint == UCodePointLineFeed) {
cSize = KDSize(0, m_glyphSize.height());
} else if (codePoint == KDCodePointTabulation) {
} else if (codePoint == UCodePointTabulation) {
cSize = KDSize(k_tabCharacterWidth * m_glyphSize.width(), 0);
} else if (codePoint.isCombining()) {
cSize = KDSizeZero;

View File

@@ -1,91 +0,0 @@
#include <kandinsky/unicode/utf8_decoder.h>
#include <assert.h>
static inline int leading_ones(uint8_t value) {
for (int i=0; i<8; i++) {
if (!(value & 0x80)) {
return i;
}
value = value << 1;
}
assert(false);
}
static inline uint8_t last_k_bits(uint8_t value, uint8_t bits) {
return (value & ((1<<bits)-1));
}
CodePoint UTF8Decoder::nextCodePoint() {
assert(m_stringPosition == m_stringPosition || *(m_stringPosition - 1) != 0);
int leadingOnes = leading_ones(*m_stringPosition);
uint32_t result = last_k_bits(*m_stringPosition++, 8-leadingOnes-1);
for (int i = 0; i < leadingOnes - 1; i++) {
result <<= 6;
result += (*m_stringPosition++ & 0x3F);
}
return CodePoint(result);
}
CodePoint UTF8Decoder::previousCodePoint() {
assert(m_stringPosition > m_string);
if (leading_ones(*(m_stringPosition - 1)) == 0) {
// The current code point is one char long
m_stringPosition--;
return *m_stringPosition;
}
// The current code point spans over multiple chars
uint32_t result = 0;
int i = 0;
int leadingOnes = 1;
m_stringPosition--;
assert(leading_ones(*m_stringPosition) == 1);
while (leadingOnes == 1) {
assert(m_stringPosition > m_string);
result += (*m_stringPosition & 0x3F) << (6 * i);
i++;
m_stringPosition--;
leadingOnes = leading_ones(*m_stringPosition);
}
assert(i <= 3);
assert(leadingOnes > 1 && leadingOnes <= 4);
assert(m_stringPosition >= m_string);
result+= last_k_bits(*m_stringPosition, 8-leadingOnes-1);
return CodePoint(result);
}
size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) {
constexpr int bufferSize = CodePoint::MaxCodePointCharLength;
char buffer[bufferSize];
return CodePointToChars(c, buffer, bufferSize);
}
size_t UTF8Decoder::CodePointToChars(CodePoint c, char * buffer, int bufferSize) {
if (bufferSize <= 0) {
return 0;
}
size_t i = 0;
if (c <= 0x7F) {
buffer[i++] = c;
} else if (c <= 0x7FF) {
buffer[i++] = 0b11000000 | (c >> 6);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | (c & 0b111111);
} else if (c <= 0xFFFF) {
buffer[i++] = 0b11100000 | (c >> 12);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | (c & 0b111111);
} else {
buffer[i++] = 0b11110000 | (c >> 18);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | ((c >> 12) & 0b111111);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | (c & 0b111111);
}
return i;
}

View File

@@ -1,51 +0,0 @@
#include <kandinsky/unicode/utf8_helper.h>
#include <kandinsky/unicode/utf8_decoder.h>
#include <string.h>
#include <assert.h>
namespace UTF8Helper {
static inline int min(int x, int y) { return x < y ? x : y; }
const char * CodePointSearch(const char * s, CodePoint c) {
UTF8Decoder decoder(s);
const char * currentPointer = s;
CodePoint codePoint = decoder.nextCodePoint();
const char * nextPointer = decoder.stringPosition();
while (codePoint != KDCodePointNull && codePoint != c) {
currentPointer = nextPointer;
codePoint = decoder.nextCodePoint();
nextPointer = decoder.stringPosition();
}
if (codePoint == c) {
return currentPointer;
}
return nullptr;
}
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * pointerToUpdate) {
UTF8Decoder decoder(src);
const char * currentPointer = src;
const char * maxPointer = src + strlen(src) + 1;
CodePoint codePoint = decoder.nextCodePoint();
const char * nextPointer = decoder.stringPosition();
size_t bufferIndex = 0;
size_t codePointCharSize = UTF8Decoder::CharSizeOfCodePoint(c);
// Remove CodePoint c
while (currentPointer < maxPointer && bufferIndex < dstSize) {
if (codePoint != c) {
int copySize = min(nextPointer - currentPointer, dstSize - bufferIndex);
memcpy(dst + bufferIndex, currentPointer, copySize);
bufferIndex+= copySize;
} else if (pointerToUpdate != nullptr && currentPointer < *pointerToUpdate) {
assert(*pointerToUpdate - src >= codePointCharSize);
*pointerToUpdate = *pointerToUpdate - codePointCharSize;
}
currentPointer = nextPointer;
codePoint = decoder.nextCodePoint();
nextPointer = decoder.stringPosition();
}
}
};