mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-01-19 00:37:25 +01:00
[kandinsky] UTF8Decoder::CodePointToChars
This commit is contained in:
@@ -1,12 +1,25 @@
|
||||
#ifndef KANDINSKY_UNICODE_UTF8DECODER_H
|
||||
#define KANDINSKY_UNICODE_UTF8DECODER_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include "code_point.h"
|
||||
|
||||
/* UTF-8 encodes all valid code points using at most 4 bytes (= 28 bits), the
|
||||
* lowest codes being equal to ASCII codes. There are less than 2^21 different
|
||||
* UTF-8 valid code points.
|
||||
*
|
||||
* The encoding is the following:
|
||||
* For code points between ... -> The corresponding bits are ...
|
||||
* 0 and 7F -> 0xxxxxxx
|
||||
* 80 and 7FF -> 110xxxxx 10xxxxxx
|
||||
* 800 and FFFF -> 1110xxxx 10xxxxxx 10xxxxxx
|
||||
* 10000 and 10FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||
|
||||
class UTF8Decoder {
|
||||
public:
|
||||
UTF8Decoder(const char * string) : m_string(string) {}
|
||||
CodePoint nextCodePoint();
|
||||
static size_t CodePointToChars(CodePoint c, char * buffer, int bufferSize);
|
||||
private:
|
||||
const char * m_string;
|
||||
};
|
||||
|
||||
@@ -24,3 +24,24 @@ CodePoint UTF8Decoder::nextCodePoint() {
|
||||
}
|
||||
return CodePoint(result);
|
||||
}
|
||||
|
||||
size_t UTF8Decoder::CodePointToChars(CodePoint c, char * buffer, int bufferSize) {
|
||||
assert(bufferSize >= sizeof(CodePoint)/sizeof(char));
|
||||
size_t i = 0;
|
||||
if (c <= 0x7F) {
|
||||
buffer[i++] = c;
|
||||
} else if (c <= 0x7FF) {
|
||||
buffer[i++] = 0b11000000 | (c >> 6);
|
||||
buffer[i++] = 0b10000000 | (c & 0b111111);
|
||||
} else if (c <= 0xFFFF) {
|
||||
buffer[i++] = 0b11100000 | (c >> 12);
|
||||
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
|
||||
buffer[i++] = 0b10000000 | (c & 0b111111);
|
||||
} else {
|
||||
buffer[i++] = 0b11110000 | (c >> 18);
|
||||
buffer[i++] = 0b10000000 | ((c >> 12) & 0b111111);
|
||||
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
|
||||
buffer[i++] = 0b10000000 | (c & 0b111111);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user