mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-01-19 00:37:25 +01:00
[unicode] Clean decoder use
This commit is contained in:
@@ -119,22 +119,20 @@ void PythonTextArea::ContentView::drawLine(KDContext * ctx, int line, const char
|
||||
* basis. This can work, however the MicroPython lexer won't accept a line
|
||||
* starting with a whitespace. So we're discarding leading whitespaces
|
||||
* beforehand. */
|
||||
UTF8Decoder decoder(text);
|
||||
const char * p = decoder.stringPosition();
|
||||
CodePoint c = decoder.nextCodePoint();
|
||||
while (p < text + byteLength && c == ' ') {
|
||||
p = decoder.stringPosition();
|
||||
c = decoder.nextCodePoint();
|
||||
const char * firstNonSpace = UTF8Helper::NotCodePointSearch(text, ' ');
|
||||
if (UTF8Helper::CodePointIs(firstNonSpace, UCodePointNull)) {
|
||||
nlr_pop();
|
||||
return;
|
||||
}
|
||||
|
||||
mp_lexer_t * lex = mp_lexer_new_from_str_len(0, p, byteLength - (p - text), 0);
|
||||
mp_lexer_t * lex = mp_lexer_new_from_str_len(0, firstNonSpace, byteLength - (firstNonSpace - text), 0);
|
||||
LOG_DRAW("Pop token %d\n", lex->tok_kind);
|
||||
|
||||
const char * tokenFrom = p;
|
||||
const char * tokenFrom = firstNonSpace;
|
||||
size_t tokenLength = 0;
|
||||
while (lex->tok_kind != MP_TOKEN_NEWLINE && lex->tok_kind != MP_TOKEN_END) {
|
||||
|
||||
tokenFrom = p + lex->tok_column - 1;
|
||||
tokenFrom = firstNonSpace + lex->tok_column - 1;
|
||||
tokenLength = TokenLength(lex);
|
||||
LOG_DRAW("Draw \"%.*s\" for token %d\n", tokenLength, tokenFrom, lex->tok_kind);
|
||||
drawStringAt(ctx, line,
|
||||
@@ -151,7 +149,7 @@ void PythonTextArea::ContentView::drawLine(KDContext * ctx, int line, const char
|
||||
|
||||
tokenFrom += tokenLength;
|
||||
if (tokenFrom < text + byteLength) {
|
||||
LOG_DRAW("Draw comment \"%.*s\" from %d\n", byteLength - (tokenFrom - text), p, tokenFrom);
|
||||
LOG_DRAW("Draw comment \"%.*s\" from %d\n", byteLength - (tokenFrom - text), firstNonSpace, tokenFrom);
|
||||
drawStringAt(ctx, line,
|
||||
UTF8Helper::GlyphOffsetAtCodePoint(text, tokenFrom),
|
||||
tokenFrom,
|
||||
|
||||
@@ -57,10 +57,10 @@ bool TextArea::handleEvent(Ion::Events::Event event) {
|
||||
decoder.previousCodePoint();
|
||||
return setCursorLocation(decoder.stringPosition());
|
||||
} else if (event == Ion::Events::Right) {
|
||||
if (*cursorLocation() == 0) {
|
||||
if (UTF8Helper::CodePointIs(cursorLocation(), UCodePointNull)) {
|
||||
return false;
|
||||
}
|
||||
UTF8Decoder decoder(text(), cursorLocation());
|
||||
UTF8Decoder decoder(cursorLocation());
|
||||
decoder.nextCodePoint();
|
||||
return setCursorLocation(decoder.stringPosition());
|
||||
} else if (event == Ion::Events::Up) {
|
||||
@@ -216,7 +216,9 @@ CodePoint TextArea::Text::removeCodePoint(char * * position) {
|
||||
|
||||
size_t TextArea::Text::removeRemainingLine(const char * location, int direction) {
|
||||
assert(m_buffer != nullptr);
|
||||
assert(location >= m_buffer && location < m_buffer + m_bufferSize);
|
||||
assert(location >= m_buffer && location <= m_buffer + m_bufferSize);
|
||||
assert(direction > 0 || location > m_buffer);
|
||||
assert(direction < 0 || location < m_buffer + m_bufferSize);
|
||||
|
||||
UTF8Decoder decoder(m_buffer, location);
|
||||
const char * codePointPosition = decoder.stringPosition();
|
||||
|
||||
@@ -440,7 +440,7 @@ bool TextField::privateHandleMoveEvent(Ion::Events::Event event) {
|
||||
}
|
||||
if (event == Ion::Events::Right && isEditing() && cursorLocation() < m_contentView.draftTextBuffer() + draftTextLength()) {
|
||||
assert(isEditing());
|
||||
UTF8Decoder decoder(m_contentView.draftTextBuffer(), cursorLocation());
|
||||
UTF8Decoder decoder(cursorLocation());
|
||||
decoder.nextCodePoint();
|
||||
return setCursorLocation(decoder.stringPosition());
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ const char * CodePointSearch(const char * s, CodePoint c);
|
||||
|
||||
/* Returns the first occurence of a code point that is not c in a string,
|
||||
* stopping at the null-terminating char or the start of string. */
|
||||
const char * NotCodePointSearch(const char * s, CodePoint c, bool goingLeft, const char * initialPosition);
|
||||
const char * NotCodePointSearch(const char * s, CodePoint c, bool goingLeft = false, const char * initialPosition = nullptr);
|
||||
|
||||
/* Copy src into dst while removing all code points c. Also update an index
|
||||
* that should be lower if code points where removed before it. Ensure null-
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
static inline int leading_ones(uint8_t value) {
|
||||
for (int i=0; i<8; i++) {
|
||||
if (!(value & 0x80)) {
|
||||
assert(i <= 4);
|
||||
return i;
|
||||
}
|
||||
value = value << 1;
|
||||
@@ -17,7 +18,7 @@ static inline uint8_t last_k_bits(uint8_t value, uint8_t bits) {
|
||||
}
|
||||
|
||||
CodePoint UTF8Decoder::nextCodePoint() {
|
||||
assert(m_stringPosition == m_stringPosition || *(m_stringPosition - 1) != 0);
|
||||
assert(m_stringPosition == m_string || *(m_stringPosition - 1) != 0);
|
||||
int leadingOnes = leading_ones(*m_stringPosition);
|
||||
uint32_t result = last_k_bits(*m_stringPosition++, 8-leadingOnes-1);
|
||||
for (int i = 0; i < leadingOnes - 1; i++) {
|
||||
|
||||
@@ -9,6 +9,7 @@ static inline int minInt(int x, int y) { return x < y ? x : y; }
|
||||
static inline size_t minSizeT(size_t x, size_t y) { return x < y ? x : y; }
|
||||
|
||||
int CountOccurrences(const char * s, CodePoint c) {
|
||||
assert(c != UCodePointNull);
|
||||
int count = 0;
|
||||
if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) {
|
||||
/* The code point is one char long, so it is equal to its char translation.
|
||||
@@ -55,6 +56,7 @@ const char * CodePointSearch(const char * s, CodePoint c) {
|
||||
}
|
||||
|
||||
const char * NotCodePointSearch(const char * s, CodePoint c, bool goingLeft, const char * initialPosition) {
|
||||
// TODO LEA: optimize for one byte long c?
|
||||
if (goingLeft) {
|
||||
assert(initialPosition != nullptr);
|
||||
if (initialPosition == s) {
|
||||
|
||||
@@ -15,19 +15,13 @@ static inline bool isDigit(const CodePoint c) {
|
||||
|
||||
const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, bool * testResult) {
|
||||
UTF8Decoder decoder(m_text);
|
||||
const char * currentPointer = m_text;
|
||||
CodePoint firstCodePoint = decoder.nextCodePoint();
|
||||
const char * nextPointer = decoder.stringPosition();
|
||||
size_t numberOfBytesForCodePoint = nextPointer - currentPointer;
|
||||
const char * nextTextPosition = decoder.stringPosition();
|
||||
if (firstCodePoint != UCodePointNull) {
|
||||
currentPointer = nextPointer;
|
||||
CodePoint codePoint = decoder.nextCodePoint();
|
||||
nextPointer = decoder.stringPosition();
|
||||
while (codePoint.isCombining()) {
|
||||
numberOfBytesForCodePoint+= nextPointer - currentPointer;
|
||||
currentPointer = nextPointer;
|
||||
nextTextPosition = decoder.stringPosition();
|
||||
codePoint = decoder.nextCodePoint();
|
||||
nextPointer = decoder.stringPosition();
|
||||
}
|
||||
}
|
||||
// TODO handle combined code points? For now the combining codepoints get dropped.
|
||||
@@ -36,7 +30,7 @@ const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, boo
|
||||
*testResult = shouldPop;
|
||||
}
|
||||
if (shouldPop) {
|
||||
m_text+= numberOfBytesForCodePoint;
|
||||
m_text = nextTextPosition;
|
||||
}
|
||||
return firstCodePoint;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user