mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-01-19 00:37:25 +01:00
363 lines
14 KiB
C++
363 lines
14 KiB
C++
#include <quiz.h>
|
||
#include <ion/unicode/utf8_decoder.h>
|
||
#include <ion/unicode/utf8_helper.h>
|
||
#include <string.h>
|
||
|
||
void assert_occurrences_count(const char * string, CodePoint c, int result) {
|
||
quiz_assert(UTF8Helper::CountOccurrences(string, c) == result);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_helper_count_occurrences) {
|
||
assert_occurrences_count("1234", '1', 1);
|
||
assert_occurrences_count("2232", '2', 3);
|
||
assert_occurrences_count("2π2∑32∑", UCodePointGreekSmallLetterPi, 1);
|
||
assert_occurrences_count("2π2∑32∑", UCodePointNArySummation, 2);
|
||
}
|
||
|
||
void assert_code_point_searched_is(const char * string, CodePoint c, const char * result) {
|
||
quiz_assert(UTF8Helper::CodePointSearch(string, c) == result);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_helper_code_point_search) {
|
||
const char * s = "2π2∑32∑";
|
||
assert_code_point_searched_is(s, '2', s);
|
||
assert_code_point_searched_is(s, UCodePointGreekSmallLetterPi, s+1);
|
||
assert_code_point_searched_is(s, 'a', s+strlen(s));
|
||
}
|
||
|
||
void assert_not_code_point_searched_is(const char * string, CodePoint c, bool goingLeft, const char * initialPosition, const char * result) {
|
||
quiz_assert(UTF8Helper::NotCodePointSearch(string, c, goingLeft, initialPosition) == result);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_helper_not_code_point_search) {
|
||
// Going right
|
||
const char * s = "2π2∑32∑";
|
||
assert_not_code_point_searched_is(s, '2', false, nullptr, s+1);
|
||
s = "aaaaa";
|
||
assert_not_code_point_searched_is(s, 'a', false, nullptr, s+5);
|
||
s = "π234";
|
||
assert_not_code_point_searched_is(s, UCodePointGreekSmallLetterPi, false, nullptr, s+UTF8Decoder::CharSizeOfCodePoint(UCodePointGreekSmallLetterPi));
|
||
// Going left
|
||
s = "aaaaa";
|
||
assert_not_code_point_searched_is(s, 'a', true, s+5, s);
|
||
assert_not_code_point_searched_is(s, 'b', true, s+5, s+4);
|
||
s = "234π";
|
||
assert_not_code_point_searched_is(s, UCodePointGreekSmallLetterPi, true, s+5, s+2);
|
||
}
|
||
|
||
void assert_copy_and_remove_code_points_gives(char * dst, size_t dstSize, const char * src, CodePoint * c, int numberOfCodePoints, const char * result) {
|
||
size_t resultLen = strlen(result);
|
||
quiz_assert(dstSize >= resultLen + 1);
|
||
UTF8Helper::CopyAndRemoveCodePoints(dst, dstSize, src, c, numberOfCodePoints);
|
||
for (size_t i = 0; i <= resultLen; i++) {
|
||
quiz_assert(dst[i] == result[i]);
|
||
}
|
||
}
|
||
static int bufferSize = 100;
|
||
QUIZ_CASE(ion_utf8_copy_and_remove_code_point) {
|
||
char buffer[bufferSize];
|
||
|
||
const char * s = "12345";
|
||
CodePoint c1[] = {'1'};
|
||
const char * result = "2345";
|
||
assert_copy_and_remove_code_points_gives(buffer, bufferSize, s, c1, 1, result);
|
||
|
||
s = "12345";
|
||
CodePoint c2[] = {'2'};
|
||
result = "1345";
|
||
assert_copy_and_remove_code_points_gives(buffer, bufferSize, s, c2, 1, result);
|
||
|
||
s = "2123224252";
|
||
CodePoint c3[] = {'2'};
|
||
result = "1345";
|
||
assert_copy_and_remove_code_points_gives(buffer, bufferSize, s, c3, 1, result);
|
||
|
||
s = "12345";
|
||
CodePoint c4[] = {'6'};
|
||
result = "12345";
|
||
assert_copy_and_remove_code_points_gives(buffer, bufferSize, s, c4, 1, result);
|
||
|
||
s = "12ᴇ4";
|
||
CodePoint c5[] = {UCodePointLatinLetterSmallCapitalE};
|
||
result = "124";
|
||
assert_copy_and_remove_code_points_gives(buffer, bufferSize, s, c5, 1, result);
|
||
|
||
s = "12ᴇᴇᴇ4";
|
||
CodePoint c6[] = {UCodePointLatinLetterSmallCapitalE};
|
||
result = "124";
|
||
assert_copy_and_remove_code_points_gives(buffer, bufferSize, s, c6, 1, result);
|
||
|
||
// The buffer size is to small to hold s
|
||
s = "1234ᴇ";
|
||
CodePoint c7[] = {'5'};
|
||
result = "1234"; // "1234ᴇ" size is 7
|
||
assert_copy_and_remove_code_points_gives(buffer, 6, s, c7, 1, result);
|
||
assert_copy_and_remove_code_points_gives(buffer, 7, s, c7, 1, result);
|
||
result = "1234ᴇ";
|
||
assert_copy_and_remove_code_points_gives(buffer, 8, s, c7, 1, result);
|
||
|
||
s = "1234ᴇ";
|
||
CodePoint c8[] = {'4'};
|
||
result = "123ᴇ";
|
||
assert_copy_and_remove_code_points_gives(buffer, 7, s, c8, 1, result);
|
||
|
||
// Remove several code points
|
||
s = "1234ᴇ3";
|
||
CodePoint c9[] = {'4', UCodePointLatinLetterSmallCapitalE};
|
||
result = "1233";
|
||
assert_copy_and_remove_code_points_gives(buffer, bufferSize, s, c9, 2, result);
|
||
}
|
||
|
||
void assert_remove_code_point_gives(char * buffer, CodePoint c, const char * * indexToUpdate, const char * stoppingPosition, const char * indexToUpdateResult, const char * result) {
|
||
UTF8Helper::RemoveCodePoint(buffer, c, indexToUpdate, stoppingPosition);
|
||
quiz_assert(*indexToUpdate == indexToUpdateResult);
|
||
quiz_assert(strcmp(buffer, result) == 0);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_remove_code_point) {
|
||
char buffer[bufferSize];
|
||
|
||
const char * s = "2345";
|
||
strlcpy(buffer, s, strlen(s) + 1);
|
||
CodePoint c = '1';
|
||
const char * indexToUpdate = buffer + 3;
|
||
const char * indexToUpdateResult = indexToUpdate;
|
||
const char * result = "2345";
|
||
const char * stoppingPosition = nullptr;
|
||
assert_remove_code_point_gives(buffer, c, &indexToUpdate, stoppingPosition, indexToUpdateResult, result);
|
||
|
||
s = "12345";
|
||
strlcpy(buffer, s, strlen(s) + 1);
|
||
c = '2';
|
||
indexToUpdate = buffer + 3;
|
||
indexToUpdateResult = indexToUpdate - UTF8Decoder::CharSizeOfCodePoint(c);
|
||
result = "1345";
|
||
stoppingPosition = nullptr;
|
||
assert_remove_code_point_gives(buffer, c, &indexToUpdate, stoppingPosition, indexToUpdateResult, result);
|
||
|
||
s = "2123224252";
|
||
strlcpy(buffer, s, strlen(s) + 1);
|
||
c = '2';
|
||
indexToUpdate = buffer + 5;
|
||
indexToUpdateResult = indexToUpdate - 3*UTF8Decoder::CharSizeOfCodePoint(c);
|
||
result = "13452";
|
||
stoppingPosition = buffer+8;
|
||
assert_remove_code_point_gives(buffer, c, &indexToUpdate, stoppingPosition, indexToUpdateResult, result);
|
||
|
||
s = "12ᴇ4";
|
||
strlcpy(buffer, s, strlen(s) + 1);
|
||
c = UCodePointLatinLetterSmallCapitalE;
|
||
indexToUpdate = buffer + strlen(s);
|
||
indexToUpdateResult = indexToUpdate - UTF8Decoder::CharSizeOfCodePoint(c);
|
||
result = "124";
|
||
stoppingPosition = nullptr;
|
||
assert_remove_code_point_gives(buffer, c, &indexToUpdate, stoppingPosition, indexToUpdateResult, result);
|
||
|
||
s = "12ᴇᴇᴇ4";
|
||
strlcpy(buffer, s, strlen(s) + 1);
|
||
c = UCodePointLatinLetterSmallCapitalE;
|
||
indexToUpdate = buffer + 2 + UTF8Decoder::CharSizeOfCodePoint(c);
|
||
indexToUpdateResult = buffer + 2;
|
||
result = "124";
|
||
stoppingPosition = nullptr;
|
||
assert_remove_code_point_gives(buffer, c, &indexToUpdate, stoppingPosition, indexToUpdateResult, result);
|
||
}
|
||
|
||
void assert_slide_string_by_number_of_char_gives(const char * string, int slidingSize, bool successResult, const char * stringResult = nullptr) {
|
||
char buffer[bufferSize];
|
||
strlcpy(buffer, string, bufferSize);
|
||
bool success = UTF8Helper::SlideStringByNumberOfChar((char *)buffer, slidingSize, bufferSize);
|
||
quiz_assert(success == successResult);
|
||
if (successResult) {
|
||
quiz_assert(strncmp(buffer, stringResult, bufferSize) == 0);
|
||
}
|
||
}
|
||
|
||
|
||
QUIZ_CASE(ion_utf8_move_string_from_index_by_number_of_char) {
|
||
const char * string1 = "12345";
|
||
assert_slide_string_by_number_of_char_gives(string1, 1, true, "112345");
|
||
const char * string2 = "(1+3)";
|
||
assert_slide_string_by_number_of_char_gives(string2, 3, true, "(1+(1+3)");
|
||
assert_slide_string_by_number_of_char_gives(string2, bufferSize - strlen(string2)/2, false);
|
||
const char * string3 = "exp(3+4)";
|
||
assert_slide_string_by_number_of_char_gives(string3, -3, true, "(3+4)");
|
||
assert_slide_string_by_number_of_char_gives(string3, -(strlen(string3)+3), false);
|
||
assert_slide_string_by_number_of_char_gives(string3, -8, true, "");
|
||
}
|
||
|
||
void assert_try_and_replace_pattern_in_string_by_pattern_gives(char * buffer, int bufferSize, UTF8Helper::TextPair * textPairs, int numberOfPairs, bool firstToSecond, const char * stringResult, const char ** indexToUpdate = nullptr, const char * indexToUpdateResult = nullptr, const char * stoppingPosition = nullptr) {
|
||
UTF8Helper::TryAndReplacePatternsInStringByPatterns(buffer, bufferSize, textPairs, numberOfPairs, firstToSecond, indexToUpdate, stoppingPosition);
|
||
quiz_assert(strncmp(buffer, stringResult, bufferSize) == 0);
|
||
if (indexToUpdateResult != nullptr) {
|
||
quiz_assert(*indexToUpdate == indexToUpdateResult);
|
||
}
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_try_and_replace_pattern_in_string_by_pattern) {
|
||
constexpr int numberOfPairs = 2;
|
||
constexpr UTF8Helper::TextPair textPairs[numberOfPairs] = {
|
||
UTF8Helper::TextPair("12", "2.3"),
|
||
UTF8Helper::TextPair("exp", "ln"),
|
||
};
|
||
|
||
char buffer[bufferSize];
|
||
const char * string = "1234512";
|
||
strlcpy(buffer, string, bufferSize);
|
||
const char * indexToUpdate = buffer + 3;
|
||
const char * indexToUpdateResult = indexToUpdate + 1;
|
||
const char * result = "2.33452.3";
|
||
const char * stoppingPosition = nullptr;
|
||
assert_try_and_replace_pattern_in_string_by_pattern_gives(buffer, bufferSize, (UTF8Helper::TextPair *)&textPairs, numberOfPairs, true, result, &indexToUpdate, indexToUpdateResult);
|
||
|
||
string = "exp(2.3)12";
|
||
strlcpy(buffer, string, bufferSize);
|
||
indexToUpdate = buffer + 3;
|
||
indexToUpdateResult = indexToUpdate - 1;
|
||
result = "ln(2.3)12";
|
||
stoppingPosition = buffer + 5;
|
||
assert_try_and_replace_pattern_in_string_by_pattern_gives(buffer, bufferSize, (UTF8Helper::TextPair *)&textPairs, numberOfPairs, true, result, &indexToUpdate, indexToUpdateResult, stoppingPosition);
|
||
|
||
string = "12*ln(7)+ln";
|
||
strlcpy(buffer, string, bufferSize);
|
||
indexToUpdate = buffer + 7;
|
||
indexToUpdateResult = indexToUpdate + 1;
|
||
result = "12*exp(7)+ln";
|
||
stoppingPosition = buffer + 7;
|
||
assert_try_and_replace_pattern_in_string_by_pattern_gives(buffer, bufferSize, (UTF8Helper::TextPair *)&textPairs, numberOfPairs, false, result, &indexToUpdate, indexToUpdateResult, stoppingPosition);
|
||
|
||
}
|
||
|
||
void assert_string_copy_until_code_point_gives(char * dst, size_t dstSize, const char * src, CodePoint c, const char * result, size_t returnedResult) {
|
||
quiz_assert(UTF8Helper::CopyUntilCodePoint(dst, dstSize, src, c) == returnedResult);
|
||
quiz_assert(strcmp(dst, result) == 0);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_helper_copy_until_code_point) {
|
||
char buffer[bufferSize];
|
||
|
||
const char * s = "1234";
|
||
CodePoint c = '1';
|
||
const char * result = "";
|
||
size_t returnedResult = 0;
|
||
assert_string_copy_until_code_point_gives(buffer, bufferSize, s, c, result, returnedResult);
|
||
|
||
s = "1234";
|
||
c = '3';
|
||
result = "12";
|
||
returnedResult = 2;
|
||
assert_string_copy_until_code_point_gives(buffer, bufferSize, s, c, result, returnedResult);
|
||
|
||
s = "1234";
|
||
c = '6';
|
||
result = s;
|
||
returnedResult = 4;
|
||
assert_string_copy_until_code_point_gives(buffer, bufferSize, s, c, result, returnedResult);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_helper_previous_code_point_is) {
|
||
const char * s = "1234";
|
||
quiz_assert(UTF8Helper::PreviousCodePointIs(s, s+2, '2'));
|
||
quiz_assert(!UTF8Helper::PreviousCodePointIs(s, s+2, '4'));
|
||
s = "1234∑";
|
||
quiz_assert(UTF8Helper::PreviousCodePointIs(s, s+strlen(s), UCodePointNArySummation));
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_helper_code_point_is) {
|
||
const char * s = "34";
|
||
quiz_assert(UTF8Helper::CodePointIs(s, '3'));
|
||
quiz_assert(!UTF8Helper::CodePointIs(s, '4'));
|
||
s = "∑1234";
|
||
quiz_assert(UTF8Helper::CodePointIs(s, UCodePointNArySummation));
|
||
}
|
||
|
||
void assert_string_remove_previous_glyph_gives(const char * text, char * location, const char * result, int returnedResult, CodePoint returnedCodePoint) {
|
||
CodePoint c = 0;
|
||
quiz_assert(UTF8Helper::RemovePreviousGlyph(text, location, &c) == returnedResult);
|
||
quiz_assert(c == returnedCodePoint);
|
||
quiz_assert(strcmp(text, result) == 0);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_helper_remove_previous_glyph) {
|
||
char buffer[bufferSize];
|
||
// 3é4
|
||
buffer[0] = '3';
|
||
buffer[1] = 0x65;
|
||
buffer[2] = 0xCC;
|
||
buffer[3] = 0x81;
|
||
buffer[4] = '4';
|
||
buffer[5] = 0;
|
||
size_t sLen = 5;
|
||
char * location = const_cast<char *>(buffer+sLen - 1);
|
||
const char * result = "34";
|
||
int returnedResult = sLen - 2;
|
||
CodePoint returnedCodePoint = 'e';
|
||
assert_string_remove_previous_glyph_gives(buffer, location, result, returnedResult, returnedCodePoint);
|
||
|
||
const char * s = "345";
|
||
sLen = strlen(s);
|
||
strlcpy(buffer, s, sLen+1);
|
||
location = const_cast<char *>(buffer+sLen);
|
||
result = "34";
|
||
returnedResult = UTF8Decoder::CharSizeOfCodePoint('5');
|
||
returnedCodePoint = '5';
|
||
assert_string_remove_previous_glyph_gives(buffer, location, result, returnedResult, returnedCodePoint);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_code_point_at_glyph_offset) {
|
||
const char * s = "abc";
|
||
quiz_assert(UTF8Helper::CodePointAtGlyphOffset(s, 0) == s);
|
||
quiz_assert(UTF8Helper::CodePointAtGlyphOffset(s, 1) == s+1);
|
||
quiz_assert(UTF8Helper::CodePointAtGlyphOffset(s, 2) == s+2);
|
||
s = "a∑∫𝐢ty";
|
||
quiz_assert(UTF8Helper::CodePointAtGlyphOffset(s, 4) == s + strlen(s) - 2);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_glyph_offset_at_code_point) {
|
||
const char * s = "abc";
|
||
quiz_assert(UTF8Helper::GlyphOffsetAtCodePoint(s, s) == 0);
|
||
quiz_assert(UTF8Helper::GlyphOffsetAtCodePoint(s, s+1) == 1);
|
||
quiz_assert(UTF8Helper::GlyphOffsetAtCodePoint(s, s+2) == 2);
|
||
s = "a∑∫𝐢ty";
|
||
quiz_assert(UTF8Helper::GlyphOffsetAtCodePoint(s, s + strlen(s) - 2) == 4);
|
||
}
|
||
|
||
void assert_string_glyph_length_is(const char * string, int maxSize, size_t result) {
|
||
quiz_assert(UTF8Helper::StringGlyphLength(string, maxSize) == result);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_helper_string_glyph_length) {
|
||
assert_string_glyph_length_is("123", -1, 3);
|
||
assert_string_glyph_length_is("1ᴇ3", -1, 3);
|
||
assert_string_glyph_length_is("∑∫𝐢", -1, 3);
|
||
assert_string_glyph_length_is("123", 2, 2);
|
||
uint8_t testString[] = {'a', 'b', 'c', 0b11111111, 0b11111111, 0}; // Malformed utf-8 string
|
||
assert_string_glyph_length_is((const char *)testString, 3, 3);
|
||
}
|
||
|
||
|
||
void assert_beginning_of_word_is(const char * text, const char * word, const char * beginningOfWord) {
|
||
quiz_assert(UTF8Helper::BeginningOfWord(text, word) == beginningOfWord);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_helper_beginning_of_word) {
|
||
const char * test_sentence = "01 34+ \n89";
|
||
assert_beginning_of_word_is(test_sentence, test_sentence, test_sentence);
|
||
assert_beginning_of_word_is(test_sentence, test_sentence + 1, test_sentence);
|
||
assert_beginning_of_word_is(test_sentence, test_sentence + 2, test_sentence);
|
||
assert_beginning_of_word_is(test_sentence, test_sentence + 5, test_sentence + 3);
|
||
assert_beginning_of_word_is(test_sentence, test_sentence + 8, test_sentence + 8);
|
||
}
|
||
|
||
void assert_end_of_word_is(const char * word, const char * endOfWord) {
|
||
quiz_assert(UTF8Helper::EndOfWord(word) == endOfWord);
|
||
}
|
||
|
||
QUIZ_CASE(ion_utf8_helper_end_of_word) {
|
||
const char * test_sentence = "01 34+ 789";
|
||
assert_end_of_word_is(test_sentence, test_sentence + 2);
|
||
assert_end_of_word_is(test_sentence + 2, test_sentence + 2);
|
||
assert_end_of_word_is(test_sentence + 3, test_sentence + 6);
|
||
assert_end_of_word_is(test_sentence + 8, test_sentence + 10);
|
||
}
|