[unicode] Clean decoder use

This commit is contained in:
Léa Saviot
2019-01-30 11:48:31 +01:00
committed by Émilie Feral
parent fd407ce3d9
commit 7b5636f298
7 changed files with 22 additions and 25 deletions

View File

@@ -119,22 +119,20 @@ void PythonTextArea::ContentView::drawLine(KDContext * ctx, int line, const char
* basis. This can work, however the MicroPython lexer won't accept a line
* starting with a whitespace. So we're discarding leading whitespaces
* beforehand. */
UTF8Decoder decoder(text);
const char * p = decoder.stringPosition();
CodePoint c = decoder.nextCodePoint();
while (p < text + byteLength && c == ' ') {
p = decoder.stringPosition();
c = decoder.nextCodePoint();
const char * firstNonSpace = UTF8Helper::NotCodePointSearch(text, ' ');
if (UTF8Helper::CodePointIs(firstNonSpace, UCodePointNull)) {
nlr_pop();
return;
}
mp_lexer_t * lex = mp_lexer_new_from_str_len(0, p, byteLength - (p - text), 0);
mp_lexer_t * lex = mp_lexer_new_from_str_len(0, firstNonSpace, byteLength - (firstNonSpace - text), 0);
LOG_DRAW("Pop token %d\n", lex->tok_kind);
const char * tokenFrom = p;
const char * tokenFrom = firstNonSpace;
size_t tokenLength = 0;
while (lex->tok_kind != MP_TOKEN_NEWLINE && lex->tok_kind != MP_TOKEN_END) {
tokenFrom = p + lex->tok_column - 1;
tokenFrom = firstNonSpace + lex->tok_column - 1;
tokenLength = TokenLength(lex);
LOG_DRAW("Draw \"%.*s\" for token %d\n", tokenLength, tokenFrom, lex->tok_kind);
drawStringAt(ctx, line,
@@ -151,7 +149,7 @@ void PythonTextArea::ContentView::drawLine(KDContext * ctx, int line, const char
tokenFrom += tokenLength;
if (tokenFrom < text + byteLength) {
LOG_DRAW("Draw comment \"%.*s\" from %d\n", byteLength - (tokenFrom - text), p, tokenFrom);
LOG_DRAW("Draw comment \"%.*s\" from %d\n", byteLength - (tokenFrom - text), firstNonSpace, tokenFrom);
drawStringAt(ctx, line,
UTF8Helper::GlyphOffsetAtCodePoint(text, tokenFrom),
tokenFrom,

View File

@@ -57,10 +57,10 @@ bool TextArea::handleEvent(Ion::Events::Event event) {
decoder.previousCodePoint();
return setCursorLocation(decoder.stringPosition());
} else if (event == Ion::Events::Right) {
if (*cursorLocation() == 0) {
if (UTF8Helper::CodePointIs(cursorLocation(), UCodePointNull)) {
return false;
}
UTF8Decoder decoder(text(), cursorLocation());
UTF8Decoder decoder(cursorLocation());
decoder.nextCodePoint();
return setCursorLocation(decoder.stringPosition());
} else if (event == Ion::Events::Up) {
@@ -216,7 +216,9 @@ CodePoint TextArea::Text::removeCodePoint(char * * position) {
size_t TextArea::Text::removeRemainingLine(const char * location, int direction) {
assert(m_buffer != nullptr);
assert(location >= m_buffer && location < m_buffer + m_bufferSize);
assert(location >= m_buffer && location <= m_buffer + m_bufferSize);
assert(direction > 0 || location > m_buffer);
assert(direction < 0 || location < m_buffer + m_bufferSize);
UTF8Decoder decoder(m_buffer, location);
const char * codePointPosition = decoder.stringPosition();

View File

@@ -440,7 +440,7 @@ bool TextField::privateHandleMoveEvent(Ion::Events::Event event) {
}
if (event == Ion::Events::Right && isEditing() && cursorLocation() < m_contentView.draftTextBuffer() + draftTextLength()) {
assert(isEditing());
UTF8Decoder decoder(m_contentView.draftTextBuffer(), cursorLocation());
UTF8Decoder decoder(cursorLocation());
decoder.nextCodePoint();
return setCursorLocation(decoder.stringPosition());
}

View File

@@ -15,7 +15,7 @@ const char * CodePointSearch(const char * s, CodePoint c);
/* Returns the first occurence of a code point that is not c in a string,
* stopping at the null-terminating char or the start of string. */
const char * NotCodePointSearch(const char * s, CodePoint c, bool goingLeft, const char * initialPosition);
const char * NotCodePointSearch(const char * s, CodePoint c, bool goingLeft = false, const char * initialPosition = nullptr);
/* Copy src into dst while removing all code points c. Also update an index
* that should be lower if code points where removed before it. Ensure null-

View File

@@ -4,6 +4,7 @@
static inline int leading_ones(uint8_t value) {
for (int i=0; i<8; i++) {
if (!(value & 0x80)) {
assert(i <= 4);
return i;
}
value = value << 1;
@@ -17,7 +18,7 @@ static inline uint8_t last_k_bits(uint8_t value, uint8_t bits) {
}
CodePoint UTF8Decoder::nextCodePoint() {
assert(m_stringPosition == m_stringPosition || *(m_stringPosition - 1) != 0);
assert(m_stringPosition == m_string || *(m_stringPosition - 1) != 0);
int leadingOnes = leading_ones(*m_stringPosition);
uint32_t result = last_k_bits(*m_stringPosition++, 8-leadingOnes-1);
for (int i = 0; i < leadingOnes - 1; i++) {

View File

@@ -9,6 +9,7 @@ static inline int minInt(int x, int y) { return x < y ? x : y; }
static inline size_t minSizeT(size_t x, size_t y) { return x < y ? x : y; }
int CountOccurrences(const char * s, CodePoint c) {
assert(c != UCodePointNull);
int count = 0;
if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) {
/* The code point is one char long, so it is equal to its char translation.
@@ -55,6 +56,7 @@ const char * CodePointSearch(const char * s, CodePoint c) {
}
const char * NotCodePointSearch(const char * s, CodePoint c, bool goingLeft, const char * initialPosition) {
// TODO LEA: optimize for one byte long c?
if (goingLeft) {
assert(initialPosition != nullptr);
if (initialPosition == s) {

View File

@@ -15,19 +15,13 @@ static inline bool isDigit(const CodePoint c) {
const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, bool * testResult) {
UTF8Decoder decoder(m_text);
const char * currentPointer = m_text;
CodePoint firstCodePoint = decoder.nextCodePoint();
const char * nextPointer = decoder.stringPosition();
size_t numberOfBytesForCodePoint = nextPointer - currentPointer;
const char * nextTextPosition = decoder.stringPosition();
if (firstCodePoint != UCodePointNull) {
currentPointer = nextPointer;
CodePoint codePoint = decoder.nextCodePoint();
nextPointer = decoder.stringPosition();
while (codePoint.isCombining()) {
numberOfBytesForCodePoint+= nextPointer - currentPointer;
currentPointer = nextPointer;
nextTextPosition = decoder.stringPosition();
codePoint = decoder.nextCodePoint();
nextPointer = decoder.stringPosition();
}
}
// TODO handle combined code points? For now the combining codepoints get dropped.
@@ -36,7 +30,7 @@ const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, boo
*testResult = shouldPop;
}
if (shouldPop) {
m_text+= numberOfBytesForCodePoint;
m_text = nextTextPosition;
}
return firstCodePoint;
}