[ion/unicode] Define and use CodePoint const methods

isLowerCaseLetter
isUpperCaseLetter
isLetter
isDigit

Remove similar ones from UTF8Helper and from Poincare::Tokenizer.
This commit is contained in:
Ruben Dashyan
2020-01-24 14:30:42 +01:00
committed by Léa Saviot
parent 7665ad6fe0
commit 9b0b4e3096
7 changed files with 26 additions and 45 deletions

View File

@@ -414,7 +414,7 @@ bool PythonToolbox::handleEvent(Ion::Events::Event event) {
}
if (event.hasText() && strlen(event.text()) == 1 ) {
char c = event.text()[0];
if (UTF8Helper::CodePointIsLetter(c)) {
if (CodePoint(c).isLetter()) {
scrollToLetter(c);
return true;
}
@@ -475,7 +475,7 @@ int PythonToolbox::maxNumberOfDisplayedRows() {
}
void PythonToolbox::scrollToLetter(char letter) {
assert(UTF8Helper::CodePointIsLetter(letter));
assert(CodePoint(letter).isLetter());
/* We look for a child MessageTree that starts with the wanted letter. If we
* do not find one, we scroll to the first child MessageTree that starts with
* a letter higher than the wanted letter. */
@@ -487,7 +487,7 @@ void PythonToolbox::scrollToLetter(char letter) {
index = i;
break;
}
if (index < 0 && l >= lowerLetter && UTF8Helper::CodePointIsLowerCaseLetter(l)) {
if (index < 0 && l >= lowerLetter && CodePoint(l).isLowerCaseLetter()) {
index = i;
}
}

View File

@@ -48,7 +48,7 @@ bool Script::nameCompliant(const char * name) {
* problems with case sensitivity. */
UTF8Decoder decoder(name);
CodePoint c = decoder.nextCodePoint();
if (c == UCodePointNull || !(UTF8Helper::CodePointIsLowerCaseLetter(c) || c == '_' || c == '.')) {
if (c == UCodePointNull || !(c.isLowerCaseLetter() || c == '_' || c == '.')) {
/* The name cannot be empty. Its first letter must be in [a-z_] or the
* extension dot. */
return false;
@@ -57,7 +57,7 @@ bool Script::nameCompliant(const char * name) {
if (c == '.' && strcmp(decoder.stringPosition(), ScriptStore::k_scriptExtension) == 0) {
return true;
}
if (!(UTF8Helper::CodePointIsLowerCaseLetter(c) || c == '_' || UTF8Helper::CodePointIsNumber(c))) {
if (!(c.isLowerCaseLetter() || c == '_' || c.isDigit())) {
return false;
}
c = decoder.nextCodePoint();

View File

@@ -1,7 +1,6 @@
#include "function.h"
#include "poincare_helpers.h"
#include "poincare/src/parsing/parser.h"
#include <ion/unicode/utf8_helper.h>
#include <ion/unicode/utf8_decoder.h>
#include <string.h>
#include <cmath>
@@ -16,7 +15,7 @@ bool Function::BaseNameCompliant(const char * baseName, NameNotCompliantError *
UTF8Decoder decoder(baseName);
CodePoint c = decoder.nextCodePoint();
if (UTF8Helper::CodePointIsNumber(c)) {
if (c.isDigit()) {
// The name cannot start with a number
if (error != nullptr) {
*error = NameNotCompliantError::NameCannotStartWithNumber;
@@ -26,11 +25,9 @@ bool Function::BaseNameCompliant(const char * baseName, NameNotCompliantError *
// The name should only have allowed characters
while (c != UCodePointNull) {
if (!(UTF8Helper::CodePointIsUpperCaseLetter(c)
|| UTF8Helper::CodePointIsLowerCaseLetter(c)
|| UTF8Helper::CodePointIsNumber(c))
|| c == '_')
{
// FIXME '_' should be accepted but not as first character
// TODO Factor this piece of code with similar one in the Parser
if (!(c.isLetter() || c.isDigit()) || c == '_') {
if (error != nullptr) {
*error = NameNotCompliantError::CharacterNotAllowed;
}

View File

@@ -9,6 +9,18 @@ public:
constexpr CodePoint(uint32_t c) : m_code(c) {}
constexpr operator uint32_t() const { return m_code; }
bool isLowerCaseLetter() const {
return 'a' <= m_code && m_code <= 'z';
}
bool isUpperCaseLetter() const {
return 'A' <= m_code && m_code <= 'Z';
}
bool isLetter() const {
return isLowerCaseLetter() || isUpperCaseLetter();
}
bool isDigit() const {
return '0' <= m_code && m_code <= '9';
}
bool isCombining() const {
return (m_code >= 0x300 && m_code <= 0x036F);
}

View File

@@ -69,10 +69,6 @@ const char * PerformAtCodePoints(
bool PreviousCodePointIs(const char * buffer, const char * location, CodePoint c);
bool CodePointIs(const char * location, CodePoint c);
bool CodePointIsLetter(CodePoint c);
bool CodePointIsLowerCaseLetter(CodePoint c);
bool CodePointIsUpperCaseLetter(CodePoint c);
bool CodePointIsNumber(CodePoint c);
// Shift the buffer and return the number of bytes removed.
int RemovePreviousGlyph(const char * text, char * location, CodePoint * c = nullptr);

View File

@@ -267,22 +267,6 @@ bool CodePointIs(const char * location, CodePoint c) {
return decoder.nextCodePoint() == c;
}
bool CodePointIsLetter(CodePoint c) {
return CodePointIsLowerCaseLetter(c) || CodePointIsUpperCaseLetter(c);
}
bool CodePointIsLowerCaseLetter(CodePoint c) {
return c >= 'a' && c <= 'z';
}
bool CodePointIsUpperCaseLetter(CodePoint c) {
return c >= 'A' && c <= 'Z';
}
bool CodePointIsNumber(CodePoint c) {
return c >= '0' && c <= '9';
}
int RemovePreviousGlyph(const char * text, char * location, CodePoint * c) {
if (location <= text) {
assert(location == text);

View File

@@ -6,14 +6,6 @@
namespace Poincare {
static inline bool isLetter(const CodePoint c) {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
}
static inline bool isDigit(const CodePoint c) {
return '0' <= c && c <= '9';
}
const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, bool * testResult) {
UTF8Decoder decoder(m_text);
CodePoint c = decoder.nextCodePoint();
@@ -55,11 +47,11 @@ size_t Tokenizer::popWhile(PopTest popTest, CodePoint context) {
size_t Tokenizer::popIdentifier() {
/* TODO handle combined code points? For now combining code points will
* trigger a syntax error. */
return popWhile([](CodePoint c, CodePoint context) { return isLetter(c) || isDigit(c) || c == context; }, '_');
return popWhile([](CodePoint c, CodePoint context) { return c.isLetter() || c.isDigit() || c == context; }, '_');
}
size_t Tokenizer::popDigits() {
return popWhile([](CodePoint c, CodePoint context) { return isDigit(c); });
return popWhile([](CodePoint c, CodePoint context) { return c.isDigit(); });
}
size_t Tokenizer::popBinaryDigits() {
@@ -67,7 +59,7 @@ size_t Tokenizer::popBinaryDigits() {
}
size_t Tokenizer::popHexadecimalDigits() {
return popWhile([](CodePoint c, CodePoint context) { return isDigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); });
return popWhile([](CodePoint c, CodePoint context) { return c.isDigit() || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); });
}
Token Tokenizer::popNumber() {
@@ -141,7 +133,7 @@ Token Tokenizer::popToken() {
/* If the next code point is the start of a number, we do not want to pop it
* because popNumber needs this code point. */
bool nextCodePointIsNeitherDotNorDigit = true;
const CodePoint c = nextCodePoint([](CodePoint cp, CodePoint context) { return cp != context && !isDigit(cp); }, '.', &nextCodePointIsNeitherDotNorDigit);
const CodePoint c = nextCodePoint([](CodePoint cp, CodePoint context) { return cp != context && !cp.isDigit(); }, '.', &nextCodePointIsNeitherDotNorDigit);
// According to c, recognize the Token::Type.
if (!nextCodePointIsNeitherDotNorDigit) {
@@ -152,7 +144,7 @@ Token Tokenizer::popToken() {
result.setString(start + 1, popIdentifier());
return result;
}
if (isLetter(c)) {
if (c.isLetter()) {
Token result(Token::Identifier);
result.setString(start, 1 + popIdentifier()); // We already popped 1 code point
return result;