mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-01-18 16:27:34 +01:00
[unicode] Use the UTF8Decoder to scan const char *
This commit is contained in:
@@ -14,6 +14,7 @@ src += $(addprefix kandinsky/src/,\
|
||||
point.cpp \
|
||||
rect.cpp \
|
||||
unicode/utf8_decoder.cpp\
|
||||
unicode/utf8_helper.cpp\
|
||||
)
|
||||
|
||||
src += $(addprefix kandinsky/fonts/, \
|
||||
|
||||
@@ -18,7 +18,10 @@
|
||||
class UTF8Decoder {
|
||||
public:
|
||||
UTF8Decoder(const char * string) : m_string(string) {}
|
||||
/* TODO: Rename methods? nextCodePoint increases m_string but
|
||||
* nextCodePointPointer does not */
|
||||
CodePoint nextCodePoint();
|
||||
const char * nextCodePointPointer();
|
||||
static size_t CharSizeOfCodePoint(CodePoint c);
|
||||
static size_t CodePointToChars(CodePoint c, char * buffer, int bufferSize);
|
||||
private:
|
||||
|
||||
17
kandinsky/include/kandinsky/unicode/utf8_helper.h
Normal file
17
kandinsky/include/kandinsky/unicode/utf8_helper.h
Normal file
@@ -0,0 +1,17 @@
|
||||
#ifndef KANDINSKY_UNICODE_UTF8_HELPER_H
|
||||
#define KANDINSKY_UNICODE_UTF8_HELPER_H
|
||||
|
||||
#include "code_point.h"
|
||||
#include <stddef.h>
|
||||
|
||||
namespace UTF8Helper {
|
||||
|
||||
const char * CodePointSearch(const char * s, CodePoint c);
|
||||
/* CopyAndRemoveCodePoint copies src into dst while removing all code points c.
|
||||
* It also updates an index that should be lower if code points where removed
|
||||
* before it. */
|
||||
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, size_t * indexToDUpdate = nullptr);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -25,6 +25,10 @@ CodePoint UTF8Decoder::nextCodePoint() {
|
||||
return CodePoint(result);
|
||||
}
|
||||
|
||||
const char * UTF8Decoder::nextCodePointPointer() {
|
||||
return m_string + leading_ones(*m_string);
|
||||
}
|
||||
|
||||
size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) {
|
||||
constexpr int bufferSize = CodePoint::MaxCodePointCharLength;
|
||||
char buffer[bufferSize];
|
||||
@@ -32,21 +36,29 @@ size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) {
|
||||
}
|
||||
|
||||
size_t UTF8Decoder::CodePointToChars(CodePoint c, char * buffer, int bufferSize) {
|
||||
assert(bufferSize >= CodePoint::MaxCodePointCharLength);
|
||||
if (bufferSize <= 0) {
|
||||
return 0;
|
||||
}
|
||||
size_t i = 0;
|
||||
if (c <= 0x7F) {
|
||||
buffer[i++] = c;
|
||||
} else if (c <= 0x7FF) {
|
||||
buffer[i++] = 0b11000000 | (c >> 6);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | (c & 0b111111);
|
||||
} else if (c <= 0xFFFF) {
|
||||
buffer[i++] = 0b11100000 | (c >> 12);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | (c & 0b111111);
|
||||
} else {
|
||||
buffer[i++] = 0b11110000 | (c >> 18);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | ((c >> 12) & 0b111111);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
|
||||
if (bufferSize <= i) { return i; }
|
||||
buffer[i++] = 0b10000000 | (c & 0b111111);
|
||||
}
|
||||
return i;
|
||||
|
||||
51
kandinsky/src/unicode/utf8_helper.cpp
Normal file
51
kandinsky/src/unicode/utf8_helper.cpp
Normal file
@@ -0,0 +1,51 @@
|
||||
#include <kandinsky/unicode/utf8_helper.h>
|
||||
#include <kandinsky/unicode/utf8_decoder.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
namespace UTF8Helper {
|
||||
|
||||
static inline int min(int x, int y) { return x < y ? x : y; }
|
||||
|
||||
const char * CodePointSearch(const char * s, CodePoint c) {
|
||||
UTF8Decoder decoder(s);
|
||||
const char * currentPointer = s;
|
||||
const char * nextPointer = decoder.nextCodePointPointer();
|
||||
CodePoint codePoint = decoder.nextCodePoint();
|
||||
while (codePoint != KDCodePointNull && codePoint != c) {
|
||||
currentPointer = nextPointer;
|
||||
nextPointer = decoder.nextCodePointPointer();
|
||||
codePoint = decoder.nextCodePoint();
|
||||
}
|
||||
if (codePoint == c) {
|
||||
return currentPointer;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, size_t * indexToUpdate) {
|
||||
UTF8Decoder decoder(src);
|
||||
const char * currentPointer = src;
|
||||
const char * nextPointer = decoder.nextCodePointPointer();
|
||||
const char * maxPointer = src + strlen(src) + 1;
|
||||
CodePoint codePoint = decoder.nextCodePoint();
|
||||
size_t bufferIndex = 0;
|
||||
size_t codePointCharSize = UTF8Decoder::CharSizeOfCodePoint(c);
|
||||
|
||||
// Remove CodePoint c
|
||||
while (currentPointer < maxPointer && bufferIndex < dstSize) {
|
||||
if (codePoint != c) {
|
||||
int copySize = min(nextPointer - currentPointer, dstSize - bufferIndex);
|
||||
memcpy(dst + bufferIndex, currentPointer, copySize);
|
||||
bufferIndex+= copySize;
|
||||
} else if (indexToUpdate != nullptr && currentPointer - src < *indexToUpdate) {
|
||||
assert(*indexToUpdate >= codePointCharSize);
|
||||
*indexToUpdate-= codePointCharSize;
|
||||
}
|
||||
currentPointer = nextPointer;
|
||||
nextPointer = decoder.nextCodePointPointer();
|
||||
codePoint = decoder.nextCodePoint();
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
Reference in New Issue
Block a user