diff --git a/ion/include/ion/unicode/utf8_decoder.h b/ion/include/ion/unicode/utf8_decoder.h index 432b5c072..ca7efe5e2 100644 --- a/ion/include/ion/unicode/utf8_decoder.h +++ b/ion/include/ion/unicode/utf8_decoder.h @@ -38,6 +38,8 @@ public: } CodePoint nextCodePoint(); CodePoint previousCodePoint(); + const char * nextGlyphPosition(); + const char * previousGlyphPosition(); const char * stringPosition() const { return m_stringPosition; } static size_t CharSizeOfCodePoint(CodePoint c); static size_t CodePointToChars(CodePoint c, char * buffer, size_t bufferSize); diff --git a/ion/src/shared/unicode/utf8_decoder.cpp b/ion/src/shared/unicode/utf8_decoder.cpp index af0d59d46..ea02ba7b3 100644 --- a/ion/src/shared/unicode/utf8_decoder.cpp +++ b/ion/src/shared/unicode/utf8_decoder.cpp @@ -63,6 +63,30 @@ CodePoint UTF8Decoder::previousCodePoint() { return CodePoint(result); } +const char * UTF8Decoder::nextGlyphPosition() { + assert(*m_stringPosition != 0 && (m_stringPosition == m_string || *(m_stringPosition - 1) != 0)); + CodePoint followingCodePoint = nextCodePoint(); + const char * resultGlyphPosition = m_stringPosition; + followingCodePoint = nextCodePoint(); + while (followingCodePoint != UCodePointNull && followingCodePoint.isCombining()) { + resultGlyphPosition = m_stringPosition; + followingCodePoint = nextCodePoint(); + } + m_stringPosition = resultGlyphPosition; + return resultGlyphPosition; +} + +const char * UTF8Decoder::previousGlyphPosition() { + assert(m_stringPosition > m_string); + CodePoint previousCP = previousCodePoint(); + const char * resultGlyphPosition = m_stringPosition; + while (m_stringPosition > m_string && previousCP.isCombining()) { + previousCP = previousCodePoint(); + resultGlyphPosition = m_stringPosition; + } + return resultGlyphPosition; +} + size_t UTF8Decoder::CharSizeOfCodePoint(CodePoint c) { if (c <= 0x7F) { return 1; diff --git a/ion/test/utf8_decoder.cpp b/ion/test/utf8_decoder.cpp index 26248502a..5eb482509 100644 --- a/ion/test/utf8_decoder.cpp +++ b/ion/test/utf8_decoder.cpp @@ -1,5 +1,6 @@ #include #include +#include void assert_decodes_to(const char * string, CodePoint c) { UTF8Decoder d(string); @@ -12,6 +13,18 @@ void assert_previous_code_point_is_to(const char * string, const char * stringPo quiz_assert(d.previousCodePoint() == c); } +void assert_code_point_at_next_glyph_position_is(const char * string, CodePoint c) { + UTF8Decoder d(string); + d.nextGlyphPosition(); + quiz_assert(d.nextCodePoint() == c); +} + +void assert_code_point_at_previous_glyph_position_is(const char * string, const char * stringPosition, CodePoint c) { + UTF8Decoder d(string, stringPosition); + d.previousGlyphPosition(); + quiz_assert(d.nextCodePoint() == c); +} + QUIZ_CASE(ion_utf8_decode_forward) { assert_decodes_to("\x20", 0x20); assert_decodes_to("\xC2\xA2", 0xA2); @@ -25,3 +38,16 @@ QUIZ_CASE(ion_utf8_decode_backwards) { assert_previous_code_point_is_to(a, a+4, *(a+3)); assert_previous_code_point_is_to(a, a+6, *(a+5)); } + +QUIZ_CASE(ion_utf8_decoder_next_glyph) { + const char * string = u8"a\u0065\u0301i"; + assert_code_point_at_next_glyph_position_is(string, 'e'); + assert_code_point_at_next_glyph_position_is(string+1, 'i'); +} + +QUIZ_CASE(ion_utf8_decoder_previous_glyph) { + const char * string = u8"a\u0065\u0301i"; + const char * iPosition = UTF8Helper::CodePointSearch(string, 'i'); + assert_code_point_at_previous_glyph_position_is(string, iPosition, 'e'); + assert_code_point_at_previous_glyph_position_is(string,string+1, 'a'); +}