mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-01-20 01:08:15 +01:00
To check whether an ExternalText could be written with Epsilon's fonts, UTF8Helper made a reference to Kandinsky, which is prohibited. This check is now done in Escher, before dispatching the event. Change-Id: I55e9db1ba43c3115775499db47b90a6bdd7cc7b3
493 lines
18 KiB
C++
493 lines
18 KiB
C++
#include <ion/unicode/utf8_helper.h>
|
||
#include <ion/unicode/utf8_decoder.h>
|
||
#include <kandinsky/font.h>
|
||
#include <string.h>
|
||
#include <assert.h>
|
||
#include <algorithm>
|
||
|
||
namespace UTF8Helper {
|
||
|
||
int CountOccurrences(const char * s, CodePoint c) {
|
||
assert(c != UCodePointNull);
|
||
int count = 0;
|
||
if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) {
|
||
/* The code point is one char long, so it is equal to its char translation.
|
||
* We can do a classic char search. */
|
||
const char * i = s;
|
||
while (*i != 0) {
|
||
if (*i == c) {
|
||
count++;
|
||
}
|
||
i++;
|
||
}
|
||
} else {
|
||
// The code point is more than one char long, we use a UTF8Decoder.
|
||
UTF8Decoder decoder(s);
|
||
CodePoint codePoint = decoder.nextCodePoint();
|
||
while (codePoint != UCodePointNull) {
|
||
if (codePoint == c) {
|
||
count++;
|
||
}
|
||
codePoint = decoder.nextCodePoint();
|
||
}
|
||
}
|
||
return count;
|
||
}
|
||
|
||
const char * CodePointSearch(const char * s, CodePoint c, const char * stoppingPosition) {
|
||
if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) {
|
||
const char * result = s;
|
||
while (*result != 0 && *result != c && (stoppingPosition == nullptr || result != stoppingPosition)) {
|
||
result++;
|
||
}
|
||
return result;
|
||
}
|
||
UTF8Decoder decoder(s);
|
||
const char * currentPointer = s;
|
||
CodePoint codePoint = decoder.nextCodePoint();
|
||
const char * nextPointer = decoder.stringPosition();
|
||
while (codePoint != UCodePointNull && codePoint != c && (stoppingPosition == nullptr || currentPointer < stoppingPosition)) {
|
||
currentPointer = nextPointer;
|
||
codePoint = decoder.nextCodePoint();
|
||
nextPointer = decoder.stringPosition();
|
||
}
|
||
return currentPointer;
|
||
}
|
||
|
||
bool HasCodePoint(const char * s, CodePoint c, const char * stoppingPosition) {
|
||
assert(c != 0);
|
||
const char * resultPosition = CodePointSearch(s, c, stoppingPosition);
|
||
return *resultPosition != 0 && (stoppingPosition == nullptr || resultPosition < stoppingPosition);
|
||
}
|
||
|
||
const char * NotCodePointSearch(const char * s, CodePoint c, bool goingLeft, const char * initialPosition) {
|
||
if (goingLeft && initialPosition == s) {
|
||
return s;
|
||
}
|
||
assert(goingLeft || initialPosition == nullptr);
|
||
assert(!goingLeft || initialPosition != nullptr);
|
||
if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) {
|
||
/* The code points are one char long, so they are equal to their char
|
||
* translations. We can do a classic char search. */
|
||
const char * codePointPointer = goingLeft ? initialPosition - 1 : s;
|
||
while ((goingLeft ?codePointPointer > s : *codePointPointer != 0) && *codePointPointer == c) {
|
||
codePointPointer += goingLeft ? -1 : 1;
|
||
}
|
||
return codePointPointer;
|
||
}
|
||
if (goingLeft) {
|
||
UTF8Decoder decoder(s, initialPosition);
|
||
CodePoint codePoint = decoder.previousCodePoint();
|
||
const char * codePointPointer = decoder.stringPosition();
|
||
while (codePointPointer > s && codePoint == c) {
|
||
codePoint = decoder.previousCodePoint();
|
||
codePointPointer = decoder.stringPosition();
|
||
}
|
||
return codePointPointer;
|
||
}
|
||
UTF8Decoder decoder(s);
|
||
const char * codePointPointer = decoder.stringPosition();
|
||
CodePoint codePoint = decoder.nextCodePoint();
|
||
while (codePoint != UCodePointNull && codePoint == c) {
|
||
codePointPointer = decoder.stringPosition();
|
||
codePoint = decoder.nextCodePoint();
|
||
}
|
||
return codePointPointer;
|
||
}
|
||
|
||
bool CopyAndRemoveCodePoints(char * dst, size_t dstSize, const char * src, CodePoint * codePoints, int numberOfCodePoints) {
|
||
UTF8Decoder decoder(src);
|
||
CodePoint codePoint = decoder.nextCodePoint();
|
||
if (dstSize <= 0) {
|
||
return codePoint == UCodePointNull;
|
||
}
|
||
assert(numberOfCodePoints >= 1);
|
||
const char * currentPointer = src;
|
||
const char * nextPointer = decoder.stringPosition();
|
||
size_t bufferIndex = 0;
|
||
|
||
// Remove CodePoint c
|
||
while (codePoint != UCodePointNull && bufferIndex < dstSize) {
|
||
bool remove = false;
|
||
for (int i = 0; i < numberOfCodePoints; i++) {
|
||
if (codePoint == codePoints[i]) {
|
||
remove = true;
|
||
break;
|
||
}
|
||
}
|
||
if (!remove) {
|
||
size_t copySize = nextPointer - currentPointer;
|
||
if (copySize > dstSize - 1 - bufferIndex) {
|
||
// Copying the current code point to the buffer would overflow the buffer
|
||
break;
|
||
}
|
||
memcpy(dst + bufferIndex, currentPointer, copySize);
|
||
bufferIndex+= copySize;
|
||
}
|
||
currentPointer = nextPointer;
|
||
codePoint = decoder.nextCodePoint();
|
||
nextPointer = decoder.stringPosition();
|
||
}
|
||
*(dst + bufferIndex) = 0;
|
||
return codePoint == UCodePointNull;
|
||
}
|
||
|
||
void RemoveCodePoint(char * buffer, CodePoint c, const char * * pointerToUpdate, const char * stoppingPosition) {
|
||
constexpr int patternMaxSize = CodePoint::MaxCodePointCharLength + 1; // +1 for null terminating char
|
||
char pattern[patternMaxSize];
|
||
int codePointCharSize = UTF8Decoder::CharSizeOfCodePoint(c);
|
||
UTF8Decoder::CodePointToChars(c, pattern, codePointCharSize);
|
||
pattern[codePointCharSize] = '\0';
|
||
TextPair pair(pattern, "");
|
||
TryAndReplacePatternsInStringByPatterns(buffer, strlen(buffer), &pair, 1, true, pointerToUpdate, stoppingPosition);
|
||
}
|
||
|
||
bool SlideStringByNumberOfChar(char * text, int slidingSize, size_t textMaxLength) {
|
||
size_t lenText = strlen(text);
|
||
if (lenText + slidingSize > textMaxLength || lenText + slidingSize < 0) {
|
||
return false;
|
||
}
|
||
if (slidingSize > 0) {
|
||
memmove(text+slidingSize, text, strlen(text)+1);
|
||
} else if (slidingSize < 0) {
|
||
memmove(text, text-slidingSize, strlen(text)+1);
|
||
}
|
||
// In case slidingSize = 0, there is nothing to do
|
||
return true;
|
||
}
|
||
|
||
/* Replaces the first chars of a string by other ones. If the sizes are different
|
||
* the rest of the string will be moved right after the replacement chars.
|
||
* If successful returns true.*/
|
||
static bool replaceFirstCharsByPattern(char * text, size_t lengthOfPatternToRemove, const char * replacementPattern, size_t textMaxLength) {
|
||
size_t lengthOfReplacementPattern = strlen(replacementPattern);
|
||
if (lengthOfPatternToRemove <= strlen(text) && SlideStringByNumberOfChar(text, lengthOfReplacementPattern-lengthOfPatternToRemove, textMaxLength)) {
|
||
for (size_t i = 0; i < lengthOfReplacementPattern; i++) {
|
||
text[i] = replacementPattern[i];
|
||
}
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
void TryAndReplacePatternsInStringByPatterns(char * text, int textMaxLength, TextPair * textPairs, int numberOfPairs, bool firstToSecond, const char * * pointerToUpdate, const char * stoppingPosition) {
|
||
size_t i = 0;
|
||
size_t iPrev = 0;
|
||
size_t textLength = strlen(text);
|
||
size_t lengthOfParenthesisExtention = strlen("(\x11)");
|
||
while(i < textLength) {
|
||
iPrev = i;
|
||
bool didReplace = false;
|
||
for (int j = 0; j < numberOfPairs; j++) {
|
||
TextPair p = textPairs[j];
|
||
size_t firstStringLength = strlen(p.firstString());
|
||
size_t secondStringLength = strlen(p.secondString());
|
||
/* Instead of storing TextPair("√(\x11)", "sqrt(\x11)") for the keyboard
|
||
* events and TextPair("√", "sqrt") for the copy paste, we store just the
|
||
* first and register it as "function". Therefore we can decide to remove
|
||
* the (\x11) part or not depending on the application. This process is
|
||
* repeated for all 4 function keys usable in python (√, ℯ, ln, log)*/
|
||
if (p.removeParenthesesExtention()) {
|
||
firstStringLength -= lengthOfParenthesisExtention;
|
||
secondStringLength -= lengthOfParenthesisExtention;
|
||
}
|
||
char firstString[TextPair::k_maxLength];
|
||
char secondString[TextPair::k_maxLength];
|
||
// Getting rid of the eventual (\x11) part
|
||
strlcpy((char *)firstString, p.firstString(), firstStringLength+1);
|
||
strlcpy((char *)secondString, p.secondString(), secondStringLength+1);
|
||
|
||
char * matchedString = firstToSecond ? firstString : secondString;
|
||
size_t matchedStringLength = strlen(matchedString);
|
||
char * replacingString = firstToSecond ? secondString : firstString;
|
||
size_t replacingStringLength = strlen(replacingString);
|
||
|
||
if (strncmp(&text[i], matchedString, matchedStringLength) == 0) {
|
||
didReplace = replaceFirstCharsByPattern(&text[i], matchedStringLength, replacingString, textMaxLength);
|
||
if (didReplace) {
|
||
int delta = replacingStringLength - matchedStringLength;
|
||
textLength += delta;
|
||
if (pointerToUpdate != nullptr && &text[i] < *pointerToUpdate) {
|
||
// We still have to update the pointer as the modification cursor has not yet exceeded it.
|
||
*pointerToUpdate = *pointerToUpdate + delta;
|
||
}
|
||
if (stoppingPosition != nullptr) {
|
||
stoppingPosition = stoppingPosition + delta;
|
||
}
|
||
if (replacingStringLength != 0) {
|
||
i += replacingStringLength - 1;
|
||
/* When working with multiple TextPairs at the same time, it can be
|
||
* usefull to go back by one char. That is the case for empty matrixes
|
||
* Indeed, in the string ",,]", ",," is replaced by ",\x11,".
|
||
* The ",]" pattern right after would be missed if not for the -1.*/
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if (iPrev == i && !didReplace) {
|
||
// In case no pattern matched with the text, we go to the next char.
|
||
i++;
|
||
}
|
||
if ((stoppingPosition != nullptr) && (&text[i] >= stoppingPosition)) {
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
size_t CopyUntilCodePoint(char * dst, size_t dstSize, const char * src, CodePoint c) {
|
||
UTF8Decoder decoder(src);
|
||
const char * codePointPointer = decoder.stringPosition();
|
||
CodePoint codePoint = decoder.nextCodePoint();
|
||
while (codePoint != UCodePointNull && codePoint != c) {
|
||
codePointPointer = decoder.stringPosition();
|
||
codePoint = decoder.nextCodePoint();
|
||
}
|
||
assert(codePointPointer >= src);
|
||
size_t copySize = std::min(dstSize - 1, static_cast<size_t>(codePointPointer - src));
|
||
assert(UTF8Helper::CodePointIs(src + copySize, 0) || UTF8Helper::CodePointIs(src + copySize, c));
|
||
memmove(dst, src, copySize);
|
||
assert(copySize < dstSize);
|
||
dst[copySize] = 0;
|
||
return copySize;
|
||
}
|
||
|
||
const char * PerformAtCodePoints(const char * s, CodePoint c, CodePointAction actionCodePoint, CodePointAction actionOtherCodePoint, void * contextPointer, int contextInt1, int contextInt2, CodePoint stoppingCodePoint, bool goingRight, const char * initialPosition, const char * stoppingPosition) {
|
||
/* If we are decoding towards the left, we must have a starting position. If
|
||
* we are decoding towards the right, the starting position is the start of
|
||
* string. */
|
||
assert((goingRight && initialPosition == nullptr)
|
||
|| (!goingRight && initialPosition != nullptr));
|
||
|
||
if (UTF8Decoder::CharSizeOfCodePoint(c) == 1 && UTF8Decoder::CharSizeOfCodePoint(stoppingCodePoint) == 1) {
|
||
/* The code points are one char long, so they are equal to their char
|
||
* translations. We can do a classic char search. */
|
||
if (goingRight) {
|
||
const char * i = s;
|
||
while (*i != stoppingCodePoint && *i != 0 && i != stoppingPosition) {
|
||
if (*i == c) {
|
||
actionCodePoint(i - s, contextPointer, contextInt1, contextInt2);
|
||
} else {
|
||
// FIXME we are stopping at every char, not every code point -> it does not make any bug for now
|
||
actionOtherCodePoint(i - s, contextPointer, contextInt1, contextInt2);
|
||
}
|
||
i++;
|
||
}
|
||
return i;
|
||
}
|
||
const char * i = initialPosition - 1;
|
||
while (i >= s && *i != stoppingCodePoint && i != stoppingPosition) {
|
||
if (*i == c) {
|
||
actionCodePoint(i - s, contextPointer, contextInt1, contextInt2);
|
||
} else {
|
||
actionOtherCodePoint(i - s, contextPointer, contextInt1, contextInt2);
|
||
}
|
||
i--;
|
||
}
|
||
return i;
|
||
}
|
||
// The code point is more than one char long, we use a UTF8Decoder.
|
||
if (goingRight) {
|
||
UTF8Decoder decoder(s);
|
||
const char * codePointPointer = decoder.stringPosition();
|
||
CodePoint codePoint = decoder.nextCodePoint();
|
||
while (codePoint != stoppingCodePoint && codePoint != UCodePointNull && codePointPointer != stoppingPosition) {
|
||
if (codePoint == c) {
|
||
actionCodePoint(codePointPointer - s, contextPointer, contextInt1, contextInt2);
|
||
} else {
|
||
actionOtherCodePoint(codePointPointer - s, contextPointer, contextInt1, contextInt2);
|
||
}
|
||
codePointPointer = decoder.stringPosition();
|
||
codePoint = decoder.nextCodePoint();
|
||
}
|
||
return codePointPointer;
|
||
}
|
||
assert(!goingRight);
|
||
if (initialPosition <= s) {
|
||
return initialPosition;
|
||
}
|
||
UTF8Decoder decoder(s, initialPosition);
|
||
CodePoint codePoint = decoder.previousCodePoint();
|
||
const char * codePointPointer = decoder.stringPosition();
|
||
while (codePointPointer >= s && codePoint != stoppingCodePoint && codePointPointer != stoppingPosition) {
|
||
if (codePoint == c) {
|
||
actionCodePoint(codePointPointer - s, contextPointer, contextInt1, contextInt2);
|
||
} else {
|
||
actionOtherCodePoint(codePointPointer - s, contextPointer, contextInt1, contextInt2);
|
||
}
|
||
if (codePointPointer > s) {
|
||
codePoint = decoder.previousCodePoint();
|
||
codePointPointer = decoder.stringPosition();
|
||
} else {
|
||
/* If the current pointer is s, we cannot continue decoding. Decreasing s
|
||
* will stop the while loop. */
|
||
codePointPointer = s-1;
|
||
}
|
||
}
|
||
return codePointPointer;
|
||
}
|
||
|
||
CodePoint PreviousCodePoint(const char * buffer, const char * location) {
|
||
if (location == buffer) {
|
||
return UCodePointNull;
|
||
}
|
||
UTF8Decoder decoder(buffer, location);
|
||
return decoder.previousCodePoint();
|
||
}
|
||
|
||
CodePoint CodePointAtLocation(const char * location) {
|
||
UTF8Decoder decoder(location);
|
||
return decoder.nextCodePoint();
|
||
}
|
||
|
||
bool PreviousCodePointIs(const char * buffer, const char * location, CodePoint c) {
|
||
assert(location > buffer);
|
||
if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) {
|
||
return *(location -1) == c;
|
||
}
|
||
return PreviousCodePoint(buffer, location) == c;
|
||
}
|
||
|
||
bool CodePointIs(const char * location, CodePoint c) {
|
||
if (UTF8Decoder::CharSizeOfCodePoint(c) == 1) {
|
||
return *(location) == c;
|
||
}
|
||
return CodePointAtLocation(location) == c;
|
||
}
|
||
|
||
bool CodePointIsEndOfWord(CodePoint c) {
|
||
return c == '\n' || c == ' ' || c == UCodePointNull;
|
||
}
|
||
|
||
int RemovePreviousGlyph(const char * text, char * location, CodePoint * c) {
|
||
if (location <= text) {
|
||
assert(location == text);
|
||
return 0;
|
||
}
|
||
|
||
// Find the previous glyph
|
||
UTF8Decoder decoder(text, location);
|
||
const char * previousGlyphPos = decoder.previousGlyphPosition();
|
||
if (c != nullptr) {
|
||
*c = decoder.nextCodePoint();
|
||
}
|
||
|
||
// Shift the buffer
|
||
int shiftedSize = location - previousGlyphPos;
|
||
char * iterator = const_cast<char *>(previousGlyphPos);
|
||
assert(iterator >= text);
|
||
do {
|
||
*iterator = *(iterator + shiftedSize);
|
||
iterator++;
|
||
} while (*(iterator - 1) != 0); // Stop shifting after writing a null terminating char.
|
||
|
||
return shiftedSize;
|
||
}
|
||
|
||
const char * CodePointAtGlyphOffset(const char * buffer, int position) {
|
||
assert(buffer != nullptr);
|
||
if (position < 0) {
|
||
return buffer;
|
||
}
|
||
|
||
UTF8Decoder decoder(buffer);
|
||
const char * codePointPointer = decoder.stringPosition();
|
||
CodePoint codePoint = decoder.nextCodePoint();
|
||
int glyphIndex = 0;
|
||
while (codePoint != UCodePointNull) {
|
||
if (glyphIndex == position) {
|
||
assert(!codePoint.isCombining());
|
||
return codePointPointer;
|
||
}
|
||
if (!codePoint.isCombining()) {
|
||
glyphIndex++;
|
||
}
|
||
codePointPointer = decoder.stringPosition();
|
||
codePoint = decoder.nextCodePoint();
|
||
}
|
||
return codePointPointer;
|
||
}
|
||
|
||
size_t GlyphOffsetAtCodePoint(const char * buffer, const char * position) {
|
||
assert(position >= buffer);
|
||
|
||
UTF8Decoder decoder(buffer);
|
||
const char * codePointPointer = decoder.stringPosition();
|
||
CodePoint codePoint = decoder.nextCodePoint();
|
||
size_t glyphIndex = 0;
|
||
while (codePoint != UCodePointNull) {
|
||
if (codePointPointer == position) {
|
||
assert(!codePoint.isCombining());
|
||
return glyphIndex;
|
||
}
|
||
if (!codePoint.isCombining()) {
|
||
glyphIndex++;
|
||
}
|
||
codePointPointer = decoder.stringPosition();
|
||
codePoint = decoder.nextCodePoint();
|
||
}
|
||
return glyphIndex;
|
||
}
|
||
|
||
size_t StringGlyphLength(const char * s, int maxSize) {
|
||
if (maxSize == 0) {
|
||
return 0;
|
||
}
|
||
UTF8Decoder decoder(s);
|
||
CodePoint codePoint = 0;
|
||
size_t glyphIndex = 0;
|
||
while (maxSize < 0 || ((decoder.stringPosition() - s) < maxSize)) {
|
||
codePoint = decoder.nextCodePoint();
|
||
if (codePoint == UCodePointNull) {
|
||
break;
|
||
}
|
||
if (!codePoint.isCombining()) {
|
||
glyphIndex++;
|
||
}
|
||
}
|
||
return glyphIndex;
|
||
}
|
||
|
||
const char * BeginningOfWord(const char * text, const char * word) {
|
||
if (text == word) {
|
||
return text;
|
||
}
|
||
UTF8Decoder decoder(text, word);
|
||
const char * codePointPointer = decoder.stringPosition();
|
||
CodePoint codePoint = decoder.previousCodePoint();
|
||
while (!CodePointIsEndOfWord(codePoint)) {
|
||
codePointPointer = decoder.stringPosition();
|
||
if (codePointPointer == text) {
|
||
break;
|
||
}
|
||
codePoint = decoder.previousCodePoint();
|
||
}
|
||
return codePointPointer;
|
||
}
|
||
|
||
const char * EndOfWord(const char * word) {
|
||
UTF8Decoder decoder(word);
|
||
CodePoint codePoint = decoder.nextCodePoint();
|
||
const char * result = word;
|
||
while (!CodePointIsEndOfWord(codePoint)) {
|
||
result = decoder.stringPosition();
|
||
codePoint = decoder.nextCodePoint();
|
||
}
|
||
return result;
|
||
}
|
||
|
||
void countGlyphsInLine(const char * text, int * before, int * after, const char * beforeLocation, const char *afterLocation) {
|
||
UTF8Helper::CodePointAction countGlyph = [](int, void * glyphCount, int, int) {
|
||
int * castedCount = (int *) glyphCount;
|
||
*castedCount = *castedCount + 1;
|
||
};
|
||
// Count glyphs before
|
||
UTF8Helper::PerformAtCodePoints(text, UCodePointLineFeed, nullptr, countGlyph, before, 0, 0, UCodePointLineFeed, false, beforeLocation);
|
||
if (afterLocation == nullptr) {
|
||
afterLocation = beforeLocation;
|
||
}
|
||
// Count glyphs after
|
||
UTF8Helper::PerformAtCodePoints(afterLocation, UCodePointLineFeed, nullptr, countGlyph, after, 0, 0, UCodePointLineFeed);
|
||
}
|
||
|
||
}
|