diff --git a/apps/code/python_text_area.cpp b/apps/code/python_text_area.cpp index b38700d23..c7e958aab 100644 --- a/apps/code/python_text_area.cpp +++ b/apps/code/python_text_area.cpp @@ -119,22 +119,20 @@ void PythonTextArea::ContentView::drawLine(KDContext * ctx, int line, const char * basis. This can work, however the MicroPython lexer won't accept a line * starting with a whitespace. So we're discarding leading whitespaces * beforehand. */ - UTF8Decoder decoder(text); - const char * p = decoder.stringPosition(); - CodePoint c = decoder.nextCodePoint(); - while (p < text + byteLength && c == ' ') { - p = decoder.stringPosition(); - c = decoder.nextCodePoint(); + const char * firstNonSpace = UTF8Helper::NotCodePointSearch(text, ' '); + if (UTF8Helper::CodePointIs(firstNonSpace, UCodePointNull)) { + nlr_pop(); + return; } - mp_lexer_t * lex = mp_lexer_new_from_str_len(0, p, byteLength - (p - text), 0); + mp_lexer_t * lex = mp_lexer_new_from_str_len(0, firstNonSpace, byteLength - (firstNonSpace - text), 0); LOG_DRAW("Pop token %d\n", lex->tok_kind); - const char * tokenFrom = p; + const char * tokenFrom = firstNonSpace; size_t tokenLength = 0; while (lex->tok_kind != MP_TOKEN_NEWLINE && lex->tok_kind != MP_TOKEN_END) { - tokenFrom = p + lex->tok_column - 1; + tokenFrom = firstNonSpace + lex->tok_column - 1; tokenLength = TokenLength(lex); LOG_DRAW("Draw \"%.*s\" for token %d\n", tokenLength, tokenFrom, lex->tok_kind); drawStringAt(ctx, line, @@ -151,7 +149,7 @@ void PythonTextArea::ContentView::drawLine(KDContext * ctx, int line, const char tokenFrom += tokenLength; if (tokenFrom < text + byteLength) { - LOG_DRAW("Draw comment \"%.*s\" from %d\n", byteLength - (tokenFrom - text), p, tokenFrom); + LOG_DRAW("Draw comment \"%.*s\" from %d\n", byteLength - (tokenFrom - text), firstNonSpace, tokenFrom); drawStringAt(ctx, line, UTF8Helper::GlyphOffsetAtCodePoint(text, tokenFrom), tokenFrom, diff --git a/escher/src/text_area.cpp b/escher/src/text_area.cpp index 9e41714d9..3478e06ba 100644 --- a/escher/src/text_area.cpp +++ b/escher/src/text_area.cpp @@ -57,10 +57,10 @@ bool TextArea::handleEvent(Ion::Events::Event event) { decoder.previousCodePoint(); return setCursorLocation(decoder.stringPosition()); } else if (event == Ion::Events::Right) { - if (*cursorLocation() == 0) { + if (UTF8Helper::CodePointIs(cursorLocation(), UCodePointNull)) { return false; } - UTF8Decoder decoder(text(), cursorLocation()); + UTF8Decoder decoder(cursorLocation()); decoder.nextCodePoint(); return setCursorLocation(decoder.stringPosition()); } else if (event == Ion::Events::Up) { @@ -216,7 +216,9 @@ CodePoint TextArea::Text::removeCodePoint(char * * position) { size_t TextArea::Text::removeRemainingLine(const char * location, int direction) { assert(m_buffer != nullptr); - assert(location >= m_buffer && location < m_buffer + m_bufferSize); + assert(location >= m_buffer && location <= m_buffer + m_bufferSize); + assert(direction > 0 || location > m_buffer); + assert(direction < 0 || location < m_buffer + m_bufferSize); UTF8Decoder decoder(m_buffer, location); const char * codePointPosition = decoder.stringPosition(); diff --git a/escher/src/text_field.cpp b/escher/src/text_field.cpp index 2ee9ef894..0ef6931e6 100644 --- a/escher/src/text_field.cpp +++ b/escher/src/text_field.cpp @@ -440,7 +440,7 @@ bool TextField::privateHandleMoveEvent(Ion::Events::Event event) { } if (event == Ion::Events::Right && isEditing() && cursorLocation() < m_contentView.draftTextBuffer() + draftTextLength()) { assert(isEditing()); - UTF8Decoder decoder(m_contentView.draftTextBuffer(), cursorLocation()); + UTF8Decoder decoder(cursorLocation()); decoder.nextCodePoint(); return setCursorLocation(decoder.stringPosition()); } diff --git a/ion/include/ion/unicode/utf8_helper.h b/ion/include/ion/unicode/utf8_helper.h index 4a7f27985..337c38f96 100644 --- a/ion/include/ion/unicode/utf8_helper.h +++ b/ion/include/ion/unicode/utf8_helper.h @@ -15,7 +15,7 @@ const char * CodePointSearch(const char * s, CodePoint c); /* Returns the first occurence of a code point that is not c in a string, * stopping at the null-terminating char or the start of string. */ -const char * NotCodePointSearch(const char * s, CodePoint c, bool goingLeft, const char * initialPosition); +const char * NotCodePointSearch(const char * s, CodePoint c, bool goingLeft = false, const char * initialPosition = nullptr); /* Copy src into dst while removing all code points c. Also update an index * that should be lower if code points where removed before it. Ensure null- diff --git a/ion/src/shared/unicode/utf8_decoder.cpp b/ion/src/shared/unicode/utf8_decoder.cpp index c95279bb1..45e92ad49 100644 --- a/ion/src/shared/unicode/utf8_decoder.cpp +++ b/ion/src/shared/unicode/utf8_decoder.cpp @@ -4,6 +4,7 @@ static inline int leading_ones(uint8_t value) { for (int i=0; i<8; i++) { if (!(value & 0x80)) { + assert(i <= 4); return i; } value = value << 1; @@ -17,7 +18,7 @@ static inline uint8_t last_k_bits(uint8_t value, uint8_t bits) { } CodePoint UTF8Decoder::nextCodePoint() { - assert(m_stringPosition == m_stringPosition || *(m_stringPosition - 1) != 0); + assert(m_stringPosition == m_string || *(m_stringPosition - 1) != 0); int leadingOnes = leading_ones(*m_stringPosition); uint32_t result = last_k_bits(*m_stringPosition++, 8-leadingOnes-1); for (int i = 0; i < leadingOnes - 1; i++) { diff --git a/ion/src/shared/unicode/utf8_helper.cpp b/ion/src/shared/unicode/utf8_helper.cpp index 2123229bc..384db546b 100644 --- a/ion/src/shared/unicode/utf8_helper.cpp +++ b/ion/src/shared/unicode/utf8_helper.cpp @@ -9,6 +9,7 @@ static inline int minInt(int x, int y) { return x < y ? x : y; } static inline size_t minSizeT(size_t x, size_t y) { return x < y ? x : y; } int CountOccurrences(const char * s, CodePoint c) { + assert(c != UCodePointNull); int count = 0; if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) { /* The code point is one char long, so it is equal to its char translation. @@ -55,6 +56,7 @@ const char * CodePointSearch(const char * s, CodePoint c) { } const char * NotCodePointSearch(const char * s, CodePoint c, bool goingLeft, const char * initialPosition) { + // TODO LEA: optimize for one byte long c? if (goingLeft) { assert(initialPosition != nullptr); if (initialPosition == s) { diff --git a/poincare/src/parsing/tokenizer.cpp b/poincare/src/parsing/tokenizer.cpp index 878e442ec..10ec5ef36 100644 --- a/poincare/src/parsing/tokenizer.cpp +++ b/poincare/src/parsing/tokenizer.cpp @@ -15,19 +15,13 @@ static inline bool isDigit(const CodePoint c) { const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, bool * testResult) { UTF8Decoder decoder(m_text); - const char * currentPointer = m_text; CodePoint firstCodePoint = decoder.nextCodePoint(); - const char * nextPointer = decoder.stringPosition(); - size_t numberOfBytesForCodePoint = nextPointer - currentPointer; + const char * nextTextPosition = decoder.stringPosition(); if (firstCodePoint != UCodePointNull) { - currentPointer = nextPointer; CodePoint codePoint = decoder.nextCodePoint(); - nextPointer = decoder.stringPosition(); while (codePoint.isCombining()) { - numberOfBytesForCodePoint+= nextPointer - currentPointer; - currentPointer = nextPointer; + nextTextPosition = decoder.stringPosition(); codePoint = decoder.nextCodePoint(); - nextPointer = decoder.stringPosition(); } } // TODO handle combined code points? For now the combining codepoints get dropped. @@ -36,7 +30,7 @@ const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, boo *testResult = shouldPop; } if (shouldPop) { - m_text+= numberOfBytesForCodePoint; + m_text = nextTextPosition; } return firstCodePoint; }