[unicode] Use the UTF8Decoder to scan const char *

This commit is contained in:
Léa Saviot
2019-01-16 17:03:30 +01:00
committed by Émilie Feral
parent 65e5adafac
commit 41afa92f10
14 changed files with 189 additions and 65 deletions

View File

@@ -14,6 +14,7 @@ src += $(addprefix kandinsky/src/,\
point.cpp \
rect.cpp \
unicode/utf8_decoder.cpp\
unicode/utf8_helper.cpp\
)
src += $(addprefix kandinsky/fonts/, \

View File

@@ -18,7 +18,10 @@
class UTF8Decoder {
public:
UTF8Decoder(const char * string) : m_string(string) {}
/* TODO: Rename methods? nextCodePoint increases m_string but
* nextCodePointPointer does not */
CodePoint nextCodePoint();
const char * nextCodePointPointer();
static size_t CharSizeOfCodePoint(CodePoint c);
static size_t CodePointToChars(CodePoint c, char * buffer, int bufferSize);
private:

View File

@@ -0,0 +1,17 @@
#ifndef KANDINSKY_UNICODE_UTF8_HELPER_H
#define KANDINSKY_UNICODE_UTF8_HELPER_H
#include "code_point.h"
#include <stddef.h>
namespace UTF8Helper {
const char * CodePointSearch(const char * s, CodePoint c);
/* CopyAndRemoveCodePoint copies src into dst while removing all code points c.
* It also updates an index that should be lower if code points where removed
* before it. */
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, size_t * indexToDUpdate = nullptr);
};
#endif

View File

@@ -25,6 +25,10 @@ CodePoint UTF8Decoder::nextCodePoint() {
return CodePoint(result);
}
const char * UTF8Decoder::nextCodePointPointer() {
return m_string + leading_ones(*m_string);
}
size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) {
constexpr int bufferSize = CodePoint::MaxCodePointCharLength;
char buffer[bufferSize];
@@ -32,21 +36,29 @@ size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) {
}
size_t UTF8Decoder::CodePointToChars(CodePoint c, char * buffer, int bufferSize) {
assert(bufferSize >= CodePoint::MaxCodePointCharLength);
if (bufferSize <= 0) {
return 0;
}
size_t i = 0;
if (c <= 0x7F) {
buffer[i++] = c;
} else if (c <= 0x7FF) {
buffer[i++] = 0b11000000 | (c >> 6);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | (c & 0b111111);
} else if (c <= 0xFFFF) {
buffer[i++] = 0b11100000 | (c >> 12);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | (c & 0b111111);
} else {
buffer[i++] = 0b11110000 | (c >> 18);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | ((c >> 12) & 0b111111);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | (c & 0b111111);
}
return i;

View File

@@ -0,0 +1,51 @@
#include <kandinsky/unicode/utf8_helper.h>
#include <kandinsky/unicode/utf8_decoder.h>
#include <string.h>
#include <assert.h>
namespace UTF8Helper {
static inline int min(int x, int y) { return x < y ? x : y; }
const char * CodePointSearch(const char * s, CodePoint c) {
UTF8Decoder decoder(s);
const char * currentPointer = s;
const char * nextPointer = decoder.nextCodePointPointer();
CodePoint codePoint = decoder.nextCodePoint();
while (codePoint != KDCodePointNull && codePoint != c) {
currentPointer = nextPointer;
nextPointer = decoder.nextCodePointPointer();
codePoint = decoder.nextCodePoint();
}
if (codePoint == c) {
return currentPointer;
}
return nullptr;
}
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, size_t * indexToUpdate) {
UTF8Decoder decoder(src);
const char * currentPointer = src;
const char * nextPointer = decoder.nextCodePointPointer();
const char * maxPointer = src + strlen(src) + 1;
CodePoint codePoint = decoder.nextCodePoint();
size_t bufferIndex = 0;
size_t codePointCharSize = UTF8Decoder::CharSizeOfCodePoint(c);
// Remove CodePoint c
while (currentPointer < maxPointer && bufferIndex < dstSize) {
if (codePoint != c) {
int copySize = min(nextPointer - currentPointer, dstSize - bufferIndex);
memcpy(dst + bufferIndex, currentPointer, copySize);
bufferIndex+= copySize;
} else if (indexToUpdate != nullptr && currentPointer - src < *indexToUpdate) {
assert(*indexToUpdate >= codePointCharSize);
*indexToUpdate-= codePointCharSize;
}
currentPointer = nextPointer;
nextPointer = decoder.nextCodePointPointer();
codePoint = decoder.nextCodePoint();
}
}
};