[unicode] Use the UTF8Decoder to scan const char *

This commit is contained in:
Léa Saviot
2019-01-16 17:03:30 +01:00
committed by Émilie Feral
parent 65e5adafac
commit 41afa92f10
14 changed files with 189 additions and 65 deletions

View File

@@ -1,5 +1,6 @@
#include "toolbox_helpers.h"
#include <apps/i18n.h>
#include <kandinsky/unicode/utf8_decoder.h>
#include <string.h>
#include <assert.h>
@@ -7,17 +8,24 @@ namespace Shared {
namespace ToolboxHelpers {
int CursorIndexInCommandText(const char * text) {
// TODO LEA
size_t textLength = strlen(text);
for (size_t i = 0; i < textLength; i++) {
if (text[i] == '(' || text[i] == '\'') {
return i + 1;
UTF8Decoder decoder(text);
size_t index = 0;
const char * currentPointer = text;
const char * nextPointer = decoder.nextCodePointPointer();
CodePoint codePoint = decoder.nextCodePoint();
while (codePoint != KDCodePointNull) {
if (codePoint == '(' || codePoint == '\'') {
return index + 1;
}
if (text[i] == ']') {
return i;
if (codePoint == '[') {
return index;
}
index+= nextPointer - currentPointer;
currentPointer = nextPointer;
nextPointer = decoder.nextCodePointPointer();
codePoint = decoder.nextCodePoint();
}
return textLength;
return index;
}
void TextToInsertForCommandMessage(I18n::Message message, char * buffer, int bufferSize, bool replaceArgsWithEmptyChar) {
@@ -49,7 +57,7 @@ void TextToInsertForCommandText(const char * command, char * buffer, int bufferS
buffer[currentNewTextIndex++] = command[i];
} else {
if (replaceArgsWithEmptyChar && !argumentAlreadyReplaced) {
// TODO LEA buffer[currentNewTextIndex++] = Ion::Charset::Empty;
currentNewTextIndex += UTF8Decoder::CodePointToChars(KDCodePointEmpty, buffer + currentNewTextIndex, bufferSize - currentNewTextIndex);
argumentAlreadyReplaced = true;
}
}

View File

@@ -1,9 +1,9 @@
#include "equation.h"
#include <poincare/equal.h>
#include <poincare/undefined.h>
#include <poincare/unreal.h>
#include <poincare/rational.h>
#include <kandinsky/unicode/utf8_helper.h>
using namespace Poincare;
@@ -50,7 +50,7 @@ Expression Equation::standardForm(Context * context) const {
}
bool Equation::containsIComplex() const {
return false; //TODO LEA strchr(text(), KDCodePointMathematicalBoldSmallI) != nullptr;
return UTF8Helper::CodePointSearch(text(), KDCodePointMathematicalBoldSmallI) != nullptr;
}
void Equation::tidyStandardForm() {

View File

@@ -9,6 +9,7 @@
#include <assert.h>
#include <poincare/matrix_layout.h>
#include <poincare/layout_helper.h>
#include <kandinsky/unicode/utf8_decoder.h>
using namespace Poincare;
using namespace Shared;
@@ -199,7 +200,7 @@ bool VariableBoxController::selectLeaf(int selectedRow) {
assert(nameLength < nameToHandleMaxSize);
nameToHandle[nameLength++] = '(';
assert(nameLength < nameToHandleMaxSize);
// TODO LEA nameToHandle[nameLength++] = Ion::Charset::Empty;
nameLength+= UTF8Decoder::CodePointToChars(KDCodePointEmpty, nameToHandle+nameLength, nameToHandleMaxSize - nameLength);
assert(nameLength < nameToHandleMaxSize);
nameToHandle[nameLength++] = ')';
assert(nameLength < nameToHandleMaxSize);

View File

@@ -1,6 +1,7 @@
#include <escher/text_area.h>
#include <escher/clipboard.h>
#include <escher/text_input_helpers.h>
#include <kandinsky/unicode/utf8_helper.h>
#include <stddef.h>
#include <assert.h>
@@ -24,21 +25,15 @@ bool TextArea::handleEventWithText(const char * text, bool indentation, bool for
size_t cursorIndexInCommand = TextInputHelpers::CursorIndexInCommand(text);
size_t eventTextSize = min(strlen(text) + 1, TextField::maxBufferSize());
char buffer[TextField::maxBufferSize()];
size_t bufferIndex = 0;
constexpr int bufferSize = TextField::maxBufferSize();
char buffer[bufferSize];
// Remove EmptyChars
for (size_t i = bufferIndex; i < eventTextSize; i++) {
/* TODO LEA
if (text[i] != Ion::Charset::Empty) {
buffer[bufferIndex++] = text[i];
} else if (i < cursorIndexInCommand) {
cursorIndexInCommand--;
} */
}
// Remove the Empty code points
UTF8Helper::CopyAndRemoveCodePoint(buffer, bufferSize, text, KDCodePointEmpty, &cursorIndexInCommand);
// Insert the text
if ((indentation && insertTextWithIndentation(buffer, cursorLocation())) || insertTextAtLocation(buffer, cursorLocation())) {
// Set the cursor location
if (forceCursorRightOfText) {
nextCursorLocation += strlen(buffer);
} else {

View File

@@ -1,6 +1,7 @@
#include <escher/text_field.h>
#include <escher/text_input_helpers.h>
#include <escher/clipboard.h>
#include <kandinsky/unicode/utf8_helper.h>
#include <assert.h>
/* TextField::ContentView */
@@ -405,29 +406,21 @@ bool TextField::privateHandleMoveEvent(Ion::Events::Event event) {
}
bool TextField::handleEventWithText(const char * eventText, bool indentation, bool forceCursorRightOfText) {
//TODO LEA
size_t previousTextLength = strlen(text());
size_t eventTextLength = strlen(eventText);
if (!isEditing()) {
setEditing(true);
}
if (eventTextLength == 0) {
if (eventText[0] == 0) {
setCursorLocation(0);
return m_delegate->textFieldDidHandleEvent(this, true, previousTextLength != 0);
}
size_t eventTextSize = min(eventTextLength + 1, TextField::maxBufferSize());
char buffer[TextField::maxBufferSize()];
int newBufferIndex = 0;
// Remove EmptyChars
/* TODO for (size_t i = 0; i < eventTextSize; i++) {
if (eventText[i] != Ion::Charset::Empty) {
buffer[newBufferIndex++] = eventText[i];
}
}*/
// Remove the Empty code points
constexpr int bufferSize = TextField::maxBufferSize();
char buffer[bufferSize];
UTF8Helper::CopyAndRemoveCodePoint(buffer, bufferSize, eventText, KDCodePointEmpty);
int nextCursorLocation = draftTextLength();
if (insertTextAtLocation(buffer, cursorLocation())) {

View File

@@ -1,18 +1,35 @@
#include <escher/text_input_helpers.h>
#include <kandinsky/unicode/utf8_decoder.h>
#include <string.h>
namespace TextInputHelpers {
size_t CursorIndexInCommand(const char * text) {
// TODO LEA
size_t index = 0;
while (text[index] != 0) {
if (text[index] == '\'' && text[index+1] == '\'') {
return index + 1;
} /* TODO else if (text[index] == Ion::Charset::Empty) {
UTF8Decoder decoder(text);
const char * currentPointer = text;
const char * nextPointer = decoder.nextCodePointPointer();
CodePoint codePoint = decoder.nextCodePoint();
while (codePoint != KDCodePointNull) {
if (codePoint == KDCodePointEmpty) {
return index;
}*/
index++;
}
//TODO make sure changing empty / ' order was OK
if (codePoint == '\'') {
index+= nextPointer - currentPointer;
currentPointer = nextPointer;
nextPointer = decoder.nextCodePointPointer();
codePoint = decoder.nextCodePoint();
if (codePoint == '\'') {
return index;
}
// Continue because we already incremented codePoint
continue;
}
index+= nextPointer - currentPointer;
currentPointer = nextPointer;
nextPointer = decoder.nextCodePointPointer();
codePoint = decoder.nextCodePoint();
}
return index;
}

View File

@@ -14,6 +14,7 @@ src += $(addprefix kandinsky/src/,\
point.cpp \
rect.cpp \
unicode/utf8_decoder.cpp\
unicode/utf8_helper.cpp\
)
src += $(addprefix kandinsky/fonts/, \

View File

@@ -18,7 +18,10 @@
class UTF8Decoder {
public:
UTF8Decoder(const char * string) : m_string(string) {}
/* TODO: Rename methods? nextCodePoint increases m_string but
* nextCodePointPointer does not */
CodePoint nextCodePoint();
const char * nextCodePointPointer();
static size_t CharSizeOfCodePoint(CodePoint c);
static size_t CodePointToChars(CodePoint c, char * buffer, int bufferSize);
private:

View File

@@ -0,0 +1,17 @@
#ifndef KANDINSKY_UNICODE_UTF8_HELPER_H
#define KANDINSKY_UNICODE_UTF8_HELPER_H
#include "code_point.h"
#include <stddef.h>
namespace UTF8Helper {
const char * CodePointSearch(const char * s, CodePoint c);
/* CopyAndRemoveCodePoint copies src into dst while removing all code points c.
* It also updates an index that should be lower if code points where removed
* before it. */
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, size_t * indexToDUpdate = nullptr);
};
#endif

View File

@@ -25,6 +25,10 @@ CodePoint UTF8Decoder::nextCodePoint() {
return CodePoint(result);
}
const char * UTF8Decoder::nextCodePointPointer() {
return m_string + leading_ones(*m_string);
}
size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) {
constexpr int bufferSize = CodePoint::MaxCodePointCharLength;
char buffer[bufferSize];
@@ -32,21 +36,29 @@ size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) {
}
size_t UTF8Decoder::CodePointToChars(CodePoint c, char * buffer, int bufferSize) {
assert(bufferSize >= CodePoint::MaxCodePointCharLength);
if (bufferSize <= 0) {
return 0;
}
size_t i = 0;
if (c <= 0x7F) {
buffer[i++] = c;
} else if (c <= 0x7FF) {
buffer[i++] = 0b11000000 | (c >> 6);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | (c & 0b111111);
} else if (c <= 0xFFFF) {
buffer[i++] = 0b11100000 | (c >> 12);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | (c & 0b111111);
} else {
buffer[i++] = 0b11110000 | (c >> 18);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | ((c >> 12) & 0b111111);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | ((c >> 6) & 0b111111);
if (bufferSize <= i) { return i; }
buffer[i++] = 0b10000000 | (c & 0b111111);
}
return i;

View File

@@ -0,0 +1,51 @@
#include <kandinsky/unicode/utf8_helper.h>
#include <kandinsky/unicode/utf8_decoder.h>
#include <string.h>
#include <assert.h>
namespace UTF8Helper {
static inline int min(int x, int y) { return x < y ? x : y; }
const char * CodePointSearch(const char * s, CodePoint c) {
UTF8Decoder decoder(s);
const char * currentPointer = s;
const char * nextPointer = decoder.nextCodePointPointer();
CodePoint codePoint = decoder.nextCodePoint();
while (codePoint != KDCodePointNull && codePoint != c) {
currentPointer = nextPointer;
nextPointer = decoder.nextCodePointPointer();
codePoint = decoder.nextCodePoint();
}
if (codePoint == c) {
return currentPointer;
}
return nullptr;
}
void CopyAndRemoveCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c, size_t * indexToUpdate) {
UTF8Decoder decoder(src);
const char * currentPointer = src;
const char * nextPointer = decoder.nextCodePointPointer();
const char * maxPointer = src + strlen(src) + 1;
CodePoint codePoint = decoder.nextCodePoint();
size_t bufferIndex = 0;
size_t codePointCharSize = UTF8Decoder::CharSizeOfCodePoint(c);
// Remove CodePoint c
while (currentPointer < maxPointer && bufferIndex < dstSize) {
if (codePoint != c) {
int copySize = min(nextPointer - currentPointer, dstSize - bufferIndex);
memcpy(dst + bufferIndex, currentPointer, copySize);
bufferIndex+= copySize;
} else if (indexToUpdate != nullptr && currentPointer - src < *indexToUpdate) {
assert(*indexToUpdate >= codePointCharSize);
*indexToUpdate-= codePointCharSize;
}
currentPointer = nextPointer;
nextPointer = decoder.nextCodePointPointer();
codePoint = decoder.nextCodePoint();
}
}
};

View File

@@ -6,6 +6,7 @@
#include <poincare/symbol.h>
#include <poincare/variable_context.h>
#include <ion.h>
#include <kandinsky/unicode/utf8_helper.h>
#include <cmath>
#include <float.h>
@@ -342,10 +343,9 @@ void Expression::SetEncounteredComplex(bool encounterComplex) {
}
Preferences::ComplexFormat Expression::UpdatedComplexFormatWithTextInput(Preferences::ComplexFormat complexFormat, const char * textInput) {
/* TODO LEA if (complexFormat == Preferences::ComplexFormat::Real && strchr(textInput, KDCodePointMathematicalBoldSmallI) != nullptr) {
if (complexFormat == Preferences::ComplexFormat::Real && UTF8Helper::CodePointSearch(textInput, KDCodePointMathematicalBoldSmallI) != nullptr) {
return Preferences::ComplexFormat::Cartesian;
}
*/
return complexFormat;
}

View File

@@ -9,6 +9,7 @@
#include <poincare/nth_root_layout.h>
#include <poincare/right_parenthesis_layout.h>
#include <poincare/vertical_offset_layout.h>
#include <kandinsky/unicode/utf8_decoder.h>
#include <stdio.h>
namespace Poincare {
@@ -136,47 +137,54 @@ void LayoutCursor::addXNTCodePointLayout() {
}
void LayoutCursor::insertText(const char * text) {
// TODO LEA
#if 0
int textLength = strlen(text);
if (textLength <= 0) {
return;
}
Layout newChild;
Layout pointedChild;
for (int i = 0; i < textLength; i++) {
if (text[i] == //TODO Ion::Charset::Empty) {
UTF8Decoder decoder(text);
CodePoint codePoint = decoder.nextCodePoint();
if (codePoint == KDCodePointNull) {
return;
}
assert(!codePoint.isCombining());
while (codePoint != KDCodePointNull) {
if (codePoint == KDCodePointEmpty) {
codePoint = decoder.nextCodePoint();
assert(!codePoint.isCombining());
continue;
}
if (text[i] == //TODO Ion::Charset::MultiplicationSign) {
if (codePoint == KDCodePointMultiplicationSign) {
newChild = CodePointLayout::Builder(KDCodePointMiddleDot);
} else*/ if (text[i] == '(') {
} else if (codePoint == '(') {
newChild = LeftParenthesisLayout::Builder();
if (pointedChild.isUninitialized()) {
pointedChild = newChild;
}
} else if (text[i] == ')') {
} else if (codePoint == ')') {
newChild = RightParenthesisLayout::Builder();
}
/* We never insert text with brackets for now. Removing this code saves the
* binary file 2K. */
#if 0
else if (text[i] == '[') {
else if (codePoint == '[') {
newChild = LeftSquareBracketLayout();
} else if (text[i] == ']') {
} else if (codePoint == ']') {
newChild = RightSquareBracketLayout();
}
#endif
else {
newChild = CodePointLayout::Builder(text[i]);
newChild = CodePointLayout::Builder(codePoint);
}
m_layout.addSibling(this, newChild, true);
// Get the next code point
codePoint = decoder.nextCodePoint();
while (codePoint.isCombining()) {
codePoint = decoder.nextCodePoint();
}
}
if (!pointedChild.isUninitialized() && !pointedChild.parent().isUninitialized()) {
m_layout = pointedChild;
m_position = Position::Right;
}
#endif
}
void LayoutCursor::addLayoutAndMoveCursor(Layout l) {

View File

@@ -4,6 +4,7 @@
#include <poincare/left_parenthesis_layout.h>
#include <poincare/right_parenthesis_layout.h>
#include <poincare/vertical_offset_layout.h>
#include <kandinsky/unicode/utf8_decoder.h>
#include <assert.h>
namespace Poincare {
@@ -56,9 +57,26 @@ Layout LayoutHelper::Parentheses(Layout layout, bool cloneLayout) {
HorizontalLayout LayoutHelper::String(const char * buffer, int bufferLen, const KDFont * font) {
assert(bufferLen > 0);
HorizontalLayout resultLayout = HorizontalLayout::Builder();
/* TODO LEA */
for (int i = 0; i < bufferLen; i++) {
resultLayout.addChildAtIndex(CodePointLayout::Builder(buffer[i], font), i, i, nullptr);
UTF8Decoder decoder(buffer);
const char * currentPointer = buffer;
const char * nextPointer = decoder.nextCodePointPointer();
CodePoint codePoint = decoder.nextCodePoint();
assert(!codePoint.isCombining());
int layoutIndex = 0;
int bufferIndex = 0;
while (codePoint != KDCodePointNull && bufferIndex < bufferLen) {
resultLayout.addChildAtIndex(CodePointLayout::Builder(codePoint, font), layoutIndex, layoutIndex, nullptr);
layoutIndex++;
bufferIndex+= nextPointer - currentPointer;
currentPointer = nextPointer;
nextPointer = decoder.nextCodePointPointer();
codePoint = decoder.nextCodePoint();
while (codePoint.isCombining()) {
bufferIndex+= nextPointer - currentPointer;
currentPointer = nextPointer;
nextPointer = decoder.nextCodePointPointer();
codePoint = decoder.nextCodePoint();
}
}
return resultLayout;
}