[escher] Fix text inputs so they use UTF8

This commit is contained in:
Léa Saviot
2019-01-18 16:41:39 +01:00
committed by Émilie Feral
parent 242bcda631
commit 5142c071df
24 changed files with 510 additions and 360 deletions

View File

@@ -32,7 +32,10 @@ public:
static constexpr const KDFont * LargeFont = &privateLargeFont;
static constexpr const KDFont * SmallFont = &privateSmallFont;
KDSize stringSize(const char * text) const;
KDSize stringSize(const char * text) const {
return stringSizeUntil(text, nullptr);
}
KDSize stringSizeUntil(const char * text, const char * limit) const;
union GlyphBuffer {
public:

View File

@@ -1,8 +1,9 @@
#ifndef KANDINSKY_UNICODE_UTF8_DECODER_H
#define KANDINSKY_UNICODE_UTF8_DECODER_H
#include <stddef.h>
#include "code_point.h"
#include <stddef.h>
#include <assert.h>
/* UTF-8 encodes all valid code points using at most 4 bytes (= 28 bits), the
* lowest codes being equal to ASCII codes. There are less than 2^21 different
@@ -17,13 +18,20 @@
class UTF8Decoder {
public:
UTF8Decoder(const char * string) : m_string(string) {}
UTF8Decoder(const char * string, const char * initialPosition = nullptr) :
m_string(string),
m_stringPosition(initialPosition == nullptr ? string : initialPosition)
{
assert(m_string != nullptr);
}
CodePoint nextCodePoint();
const char * stringPosition() const { return m_string; }
CodePoint previousCodePoint();
const char * stringPosition() const { return m_stringPosition; }
static size_t CharSizeOfCodePoint(CodePoint c);
static size_t CodePointToChars(CodePoint c, char * buffer, int bufferSize);
private:
const char * m_string;
const char * const m_string;
const char * m_stringPosition;
};
#endif

View File

@@ -10,7 +10,7 @@ const char * CodePointSearch(const char * s, CodePoint c);
/* CopyAndRemoveCodePoint copies src into dst while removing all code points c.
* It also updates an index that should be lower if code points where removed
* before it. */
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, size_t * indexToDUpdate = nullptr);
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * indexToDUpdate = nullptr);
};

View File

@@ -5,25 +5,26 @@
constexpr static int k_tabCharacterWidth = 4;
KDSize KDFont::stringSize(const char * text) const {
KDSize KDFont::stringSizeUntil(const char * text, const char * limit) const {
if (text == nullptr) {
return KDSizeZero;
}
KDSize stringSize = KDSize(0, m_glyphSize.height());
UTF8Decoder decoder(text);
const char * currentStringPosition = decoder.stringPosition();
CodePoint codePoint = decoder.nextCodePoint();
while (codePoint != KDCodePointNull) {
while (codePoint != KDCodePointNull && (limit == nullptr || currentStringPosition < limit)) {
KDSize cSize = KDSize(m_glyphSize.width(), 0);
if (codePoint == KDCodePointLineFeed) {
cSize = KDSize(0, m_glyphSize.height());
codePoint = decoder.nextCodePoint();
} else if (codePoint == KDCodePointTabulation) {
cSize = KDSize(k_tabCharacterWidth*m_glyphSize.width(), 0);
cSize = KDSize(k_tabCharacterWidth * m_glyphSize.width(), 0);
} else if (codePoint.isCombining()) {
cSize = KDSizeZero;
}
stringSize = KDSize(stringSize.width()+cSize.width(), stringSize.height()+cSize.height());
stringSize = KDSize(stringSize.width() + cSize.width(), stringSize.height() + cSize.height());
currentStringPosition = decoder.stringPosition();
codePoint = decoder.nextCodePoint();
}
return stringSize;

View File

@@ -16,15 +16,45 @@ static inline uint8_t last_k_bits(uint8_t value, uint8_t bits) {
}
CodePoint UTF8Decoder::nextCodePoint() {
int leadingOnes = leading_ones(*m_string);
uint32_t result = last_k_bits(*m_string++, 8-leadingOnes-1);
for (int i=0; i<(leadingOnes-1); i++) {
assert(m_stringPosition == m_stringPosition || *(m_stringPosition - 1) != 0);
int leadingOnes = leading_ones(*m_stringPosition);
uint32_t result = last_k_bits(*m_stringPosition++, 8-leadingOnes-1);
for (int i = 0; i < leadingOnes - 1; i++) {
result <<= 6;
result += (*m_string++ & 0x3F);
result += (*m_stringPosition++ & 0x3F);
}
return CodePoint(result);
}
CodePoint UTF8Decoder::previousCodePoint() {
assert(m_stringPosition > m_string);
if (leading_ones(*(m_stringPosition - 1)) == 0) {
// The current code point is one char long
m_stringPosition--;
return *m_stringPosition;
}
// The current code point spans over multiple chars
uint32_t result = 0;
int i = 0;
int leadingOnes = 1;
m_stringPosition--;
assert(leading_ones(*m_stringPosition) == 1);
while (leadingOnes == 1) {
assert(m_stringPosition > m_string);
result += (*m_stringPosition & 0x3F) << (6 * i);
i++;
m_stringPosition--;
leadingOnes = leading_ones(*m_stringPosition);
}
assert(i <= 3);
assert(leadingOnes > 1 && leadingOnes <= 4);
assert(m_stringPosition >= m_string);
result+= last_k_bits(*m_stringPosition, 8-leadingOnes-1);
return CodePoint(result);
}
size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) {
constexpr int bufferSize = CodePoint::MaxCodePointCharLength;
char buffer[bufferSize];

View File

@@ -23,7 +23,7 @@ const char * CodePointSearch(const char * s, CodePoint c) {
return nullptr;
}
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, size_t * indexToUpdate) {
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, const char * * pointerToUpdate) {
UTF8Decoder decoder(src);
const char * currentPointer = src;
const char * maxPointer = src + strlen(src) + 1;
@@ -38,9 +38,9 @@ void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePo
int copySize = min(nextPointer - currentPointer, dstSize - bufferIndex);
memcpy(dst + bufferIndex, currentPointer, copySize);
bufferIndex+= copySize;
} else if (indexToUpdate != nullptr && currentPointer - src < *indexToUpdate) {
assert(*indexToUpdate >= codePointCharSize);
*indexToUpdate-= codePointCharSize;
} else if (pointerToUpdate != nullptr && currentPointer < *pointerToUpdate) {
assert(*pointerToUpdate - src >= codePointCharSize);
*pointerToUpdate = *pointerToUpdate - codePointCharSize;
}
currentPointer = nextPointer;
codePoint = decoder.nextCodePoint();