[kandinsky] Assume UTF-8 encoding for strings

This commit is contained in:
Romain Goyet
2018-10-31 10:28:57 +01:00
committed by Émilie Feral
parent 52ae2a2643
commit 8106ba6371
6 changed files with 237 additions and 58 deletions

View File

@@ -2,13 +2,15 @@
#define KANDINSKY_FONT_H
#include <stdint.h>
#include <stddef.h>
#include <kandinsky/size.h>
#include <kandinsky/coordinate.h>
#include <kandinsky/unicode/codepoint.h>
#include "palette.h"
class KDFont {
private:
static constexpr int k_bitsPerPixel = 4;
static constexpr int k_bitsPerPixel = 4; // TODO: Should be generated by the rasterizer
static const KDFont privateLargeFont;
static const KDFont privateSmallFont;
public:
@@ -17,38 +19,57 @@ public:
KDSize stringSize(const char * text) const;
union GlyphBuffer {
public:
GlyphBuffer() {} // Don't initialize either buffer
KDColor * colorBuffer() { return m_colors; }
uint8_t * greyscaleBuffer() { return m_greyscales; }
uint8_t * secondaryGreyscaleBuffer() { return m_greyscales + k_maxGlyphPixelCount; }
private:
private:
static constexpr int k_maxGlyphPixelCount = 180; //TODO: Should be generated by the rasterizer
uint8_t m_greyscales[2*k_maxGlyphPixelCount];
KDColor m_colors[k_maxGlyphPixelCount];
};
using GlyphIndex = uint8_t;
class CodepointIndexPair {
public:
constexpr CodepointIndexPair(Codepoint c, GlyphIndex i) : m_codepoint(c), m_glyphIndex(i) {}
Codepoint codepoint() const { return m_codepoint; }
GlyphIndex glyphIndex() const { return m_glyphIndex; }
private:
Codepoint m_codepoint;
GlyphIndex m_glyphIndex;
};
GlyphIndex indexForCodepoint(Codepoint c) const;
void setGlyphGreyscalesForCodepoint(Codepoint codepoint, GlyphBuffer * glyphBuffer) const;
void accumulateGlyphGreyscalesForCodepoint(Codepoint codepoint, GlyphBuffer * glyphBuffer) const;
using RenderPalette = KDPalette<(1<<k_bitsPerPixel)>;
void fetchGlyphForChar(char c, const RenderPalette * renderPalette, KDColor * pixelBuffer) const;
void colorizeGlyphBuffer(const RenderPalette * renderPalette, GlyphBuffer * glyphBuffer) const;
RenderPalette renderPalette(KDColor textColor, KDColor backgroundColor) const {
return RenderPalette::Gradient(textColor, backgroundColor);
}
KDSize glyphSize() const { return m_glyphSize; }
constexpr KDFont(KDCoordinate glyphWidth, KDCoordinate glyphHeight, const uint16_t * glyphDataOffset, const uint8_t * data) :
m_glyphSize(glyphWidth, glyphHeight), m_glyphDataOffset(glyphDataOffset), m_data(data) { }
constexpr KDFont(size_t tableLength, const CodepointIndexPair * table, KDCoordinate glyphWidth, KDCoordinate glyphHeight, const uint16_t * glyphDataOffset, const uint8_t * data) :
m_tableLength(tableLength), m_table(table), m_glyphSize(glyphWidth, glyphHeight), m_glyphDataOffset(glyphDataOffset), m_data(data) { }
private:
void fetchGreyscaleGlyphForChar(char c, uint8_t * greyscaleBuffer) const;
void fetchGreyscaleGlyphAtIndex(GlyphIndex index, uint8_t * greyscaleBuffer) const;
const uint8_t * compressedGlyphData(char c) const {
return m_data + m_glyphDataOffset[charAsIndex(c)];
}
uint16_t compressedGlyphDataSize(char c) const {
return m_glyphDataOffset[charAsIndex(c)+1] - m_glyphDataOffset[charAsIndex(c)];
}
uint8_t charAsIndex(char c) const {
// FIXME: This is most likely false for chars greater than 127
return static_cast<uint8_t>(c) - k_magicCharOffsetValue;
}
int signedCharAsIndex(char c) const {
int cInt = c;
if (cInt < 0) {
/* A char casted as int takes its value between -127 and +128, but we want
* a positive value. -127 is thus 129, -126 is 130, etc. */
cInt=128+(cInt-(-127)+1);
}
return cInt - k_magicCharOffsetValue;
const uint8_t * compressedGlyphData(GlyphIndex index) const {
return m_data + m_glyphDataOffset[index];
}
uint16_t compressedGlyphDataSize(GlyphIndex index) const {
return m_glyphDataOffset[index+1] - m_glyphDataOffset[index];
}
size_t m_tableLength;
const CodepointIndexPair * m_table;
KDSize m_glyphSize;
const uint16_t * m_glyphDataOffset;
const uint8_t * m_data;

View File

@@ -0,0 +1,23 @@
#ifndef KANDINSKY_UNICODE_CODEPOINT_H
#define KANDINSKY_UNICODE_CODEPOINT_H
#include <stdint.h>
class Codepoint {
public:
constexpr Codepoint(uint32_t c) : m_code(c) {}
operator uint16_t() const { return m_code; }
bool isCombining() const {
return (m_code >= 0x300 && m_code <= 0x036F);
}
private:
uint32_t m_code;
};
static constexpr Codepoint Null = 0x0;
static constexpr Codepoint Tabulation = 0x9;
static constexpr Codepoint LineFeed = 0xA;
#endif

View File

@@ -0,0 +1,14 @@
#ifndef KANDINSKY_UNICODE_UTF8DECODER_H
#define KANDINSKY_UNICODE_UTF8DECODER_H
#include "codepoint.h"
class UTF8Decoder {
public:
UTF8Decoder(const char * string) : m_string(string) {}
Codepoint nextCodepoint();
private:
const char * m_string;
};
#endif

View File

@@ -1,35 +1,44 @@
#include <assert.h>
#include <kandinsky/context.h>
#include <kandinsky/font.h>
#include <kandinsky/unicode/utf8decoder.h>
constexpr int maxGlyphPixelCount = 180;
constexpr static int k_tabCharacterWidth = 4;
KDPoint KDContext::drawString(const char * text, KDPoint p, const KDFont * font, KDColor textColor, KDColor backgroundColor, int maxLength) {
KDPoint position = p;
KDSize glyphSize = font->glyphSize();
KDFont::RenderPalette palette = font->renderPalette(textColor, backgroundColor);
KDColor glyph[maxGlyphPixelCount];
KDFont::GlyphBuffer glyphBuffer;
const char * end = text + maxLength;
while(*text != 0 && text != end) {
if (*text == '\n') {
UTF8Decoder decoder(text);
Codepoint codepoint = decoder.nextCodepoint();
while (codepoint != Null) {
if (codepoint == LineFeed) {
position = KDPoint(0, position.y() + glyphSize.height());
} else if (*text == '\t') {
codepoint = decoder.nextCodepoint();
} else if (codepoint == Tabulation) {
position = position.translatedBy(KDPoint(k_tabCharacterWidth * glyphSize.width(), 0));
codepoint = decoder.nextCodepoint();
} else {
// Fetch and draw glyph for current char
font->fetchGlyphForChar(*text, &palette, glyph);
assert(!codepoint.isCombining());
font->setGlyphGreyscalesForCodepoint(codepoint, &glyphBuffer);
codepoint = decoder.nextCodepoint();
while (codepoint.isCombining()) {
font->accumulateGlyphGreyscalesForCodepoint(codepoint, &glyphBuffer);
codepoint = decoder.nextCodepoint();
}
font->colorizeGlyphBuffer(&palette, &glyphBuffer);
// Flush accumulated content
fillRectWithPixels(
KDRect(position, glyphSize.width(), glyphSize.height()),
glyph,
glyph // It's OK to trash the content of glyph since we'll re-fetch it for the next char anyway
KDRect(position, glyphSize),
glyphBuffer.colorBuffer(),
glyphBuffer.colorBuffer() // It's OK to trash the content of the color buffer since we'll re-fetch it for the next char anyway
);
position = position.translatedBy(KDPoint(glyphSize.width(), 0));
}
text++;
}
return position;
}

View File

@@ -1,6 +1,7 @@
#include <assert.h>
#include <kandinsky/font.h>
#include <ion.h>
#include <kandinsky/unicode/utf8decoder.h>
constexpr static int k_tabCharacterWidth = 4;
@@ -9,39 +10,48 @@ KDSize KDFont::stringSize(const char * text) const {
return KDSizeZero;
}
KDSize stringSize = KDSize(0, m_glyphSize.height());
while (*text != 0) {
UTF8Decoder decoder(text);
Codepoint codepoint = decoder.nextCodepoint();
while (codepoint != Null) {
KDSize cSize = KDSize(m_glyphSize.width(), 0);
if (*text == '\t') {
cSize = KDSize(k_tabCharacterWidth*m_glyphSize.width(), 0);
}
if (*text == '\n') {
if (codepoint == LineFeed) {
cSize = KDSize(0, m_glyphSize.height());
codepoint = decoder.nextCodepoint();
} else if (codepoint == Tabulation) {
cSize = KDSize(k_tabCharacterWidth*m_glyphSize.width(), 0);
} else if (codepoint.isCombining()) {
cSize = KDSizeZero;
}
stringSize = KDSize(stringSize.width()+cSize.width(), stringSize.height()+cSize.height());
text++;
codepoint = decoder.nextCodepoint();
}
return stringSize;
}
void KDFont::fetchGreyscaleGlyphForChar(char c, uint8_t * greyscaleBuffer) const {
void KDFont::setGlyphGreyscalesForCodepoint(Codepoint codepoint, GlyphBuffer * glyphBuffer) const {
fetchGreyscaleGlyphAtIndex(indexForCodepoint(codepoint), glyphBuffer->greyscaleBuffer());
}
void KDFont::accumulateGlyphGreyscalesForCodepoint(Codepoint codepoint, GlyphBuffer * glyphBuffer) const {
uint8_t * greyscaleBuffer = glyphBuffer->greyscaleBuffer();
uint8_t * accumulationGreyscaleBuffer = glyphBuffer->secondaryGreyscaleBuffer();
fetchGreyscaleGlyphAtIndex(indexForCodepoint(codepoint), accumulationGreyscaleBuffer);
for (int i=0; i<m_glyphSize.width()*m_glyphSize.height(); i++) {
greyscaleBuffer[i] |= accumulationGreyscaleBuffer[i];
}
}
void KDFont::fetchGreyscaleGlyphAtIndex(KDFont::GlyphIndex index, uint8_t * greyscaleBuffer) const {
Ion::decompress(
compressedGlyphData(c),
compressedGlyphData(index),
greyscaleBuffer,
compressedGlyphDataSize(c),
compressedGlyphDataSize(index),
m_glyphSize.width() * m_glyphSize.height() * k_bitsPerPixel/8
);
}
void KDFont::fetchGlyphForChar(char c, const KDFont::RenderPalette * renderPalette, KDColor * pixelBuffer) const {
int pixelCount = m_glyphSize.width() * m_glyphSize.height() - 1;
int charIndex = signedCharAsIndex(c);
if (charIndex < 0 || charIndex >= k_numberOfGlyphs) {
// There is no data for this glyph
for (int i = 0; i < pixelCount; i++) {
pixelBuffer[i] = KDColorBlack;
}
return;
}
void KDFont::colorizeGlyphBuffer(const RenderPalette * renderPalette, GlyphBuffer * glyphBuffer) const {
/* Since a greyscale value is smaller than a color value (see assertion), we
* can store the temporary greyscale values in the output pixel buffer.
* What's great is that now, if we fill the pixel buffer right-to-left with
@@ -49,8 +59,9 @@ void KDFont::fetchGlyphForChar(char c, const KDFont::RenderPalette * renderPalet
* the remaining grayscale values since those are smaller. So we can avoid a
* separate buffer for the temporary greyscale values. */
assert(k_bitsPerPixel < 8*sizeof(KDColor));
uint8_t * greyscaleBuffer = reinterpret_cast<uint8_t *>(pixelBuffer);
fetchGreyscaleGlyphForChar(c, greyscaleBuffer);
uint8_t * greyscaleBuffer = glyphBuffer->greyscaleBuffer();
KDColor * colorBuffer = glyphBuffer->colorBuffer();
uint8_t mask = (0xFF >> (8-k_bitsPerPixel));
int pixelIndex = pixelCount; // Let's start at the final pixel
@@ -62,7 +73,82 @@ void KDFont::fetchGlyphForChar(char c, const KDFont::RenderPalette * renderPalet
uint8_t greyscale = greyscaleByte & mask;
greyscaleByte = greyscaleByte >> k_bitsPerPixel;
assert(pixelIndex >= 0);
pixelBuffer[pixelIndex--] = renderPalette->colorAtIndex(greyscale);
colorBuffer[pixelIndex--] = renderPalette->colorAtIndex(greyscale);
}
}
}
KDFont::GlyphIndex KDFont::indexForCodepoint(Codepoint c) const {
#define USE_BINARY_SEARCH 0
#if USE_BINARY_SEARCH
int lowerBound = 0;
int upperBound = m_tableLength;
while (true) {
int currentIndex = (lowerBound+upperBound)/2;
// printf("Considering %d in [%d,%d]\n", currentIndex, lowerBound, upperBound);
const CodepointIndexPair * currentPair = m_table + currentIndex;
const CodepointIndexPair * nextPair = currentIndex + 1 < m_tableLength ? currentPair + 1 : nullptr;
// printf("At this point, currentPair->codepoint() = %d and c = %d\n", currentPair->codepoint(), c);
if (currentPair->codepoint() == c) {
return currentPair->glyphIndex();
} else if (currentPair->codepoint() > c) {
// We need to look below
if (upperBound == currentIndex) {
// There's nothing below. Error out.
return 0;
}
upperBound = currentIndex;
continue;
} else if (nextPair == nullptr) {
return 0;
} else if (nextPair->codepoint() == c) {
return nextPair->glyphIndex();
} else if (nextPair->codepoint() < c) {
// We need to look above
if (lowerBound == currentIndex) {
// There's nothing above. Error out.
return 0;
}
lowerBound = currentIndex;
continue;
} else {
// At this point,
// currentPair->codepoint < c && nextPair != nullptr && nextPair->codepoint > c
// Yay, it's over!
// There can be an empty space between the currentPair and the nextPair
// e.g. currentPair(3,1) and nextPair(9, 4)
// means value at codepoints 3, 4, 5, 6, 7, 8, 9
// are glyph identifiers 1, ?, ?, ?, ?, ?, 4
// solved as 1, 2, 3, 0, 0, 0, 4
// Let's hunt down the zeroes
Codepoint lastCodepointOfCurrentPair = currentPair->codepoint() + (nextPair->glyphIndex() - currentPair->glyphIndex() - 1);
if (c > lastCodepointOfCurrentPair) {
return 0;
}
return currentPair->glyphIndex() + (c - currentPair->codepoint());
}
}
#else
const CodepointIndexPair * currentPair = m_table;
if (c < currentPair->codepoint()) {
return 0;
}
const CodepointIndexPair * endPair = m_table + m_tableLength - 1;
while (currentPair < endPair) {
const CodepointIndexPair * nextPair = currentPair + 1;
if (c < nextPair->codepoint()) {
Codepoint lastCodepointOfCurrentPair = currentPair->codepoint() + (nextPair->glyphIndex() - currentPair->glyphIndex() - 1);
if (c > lastCodepointOfCurrentPair) {
return 0;
}
return currentPair->glyphIndex() + (c - currentPair->codepoint());
}
currentPair = nextPair;
}
if (endPair->codepoint() == c) {
return endPair->glyphIndex();
}
return 0;
#endif
}

View File

@@ -0,0 +1,26 @@
#include <kandinsky/unicode/utf8decoder.h>
#include <assert.h>
static inline int leading_ones(uint8_t value) {
for (int i=0; i<8; i++) {
if (!(value & 0x80)) {
return i;
}
value = value << 1;
}
assert(false);
}
static inline uint8_t last_k_bits(uint8_t value, uint8_t bits) {
return (value & ((1<<bits)-1));
}
Codepoint UTF8Decoder::nextCodepoint() {
int leadingOnes = leading_ones(*m_string);
uint32_t result = last_k_bits(*m_string++, 8-leadingOnes-1);
for (int i=0; i<(leadingOnes-1); i++) {
result <<= 6;
result += (*m_string++ & 0x3F);
}
return Codepoint(result);
}