mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-03-18 21:30:38 +01:00
[ion/unicode] Define and use CodePoint const methods
isLowerCaseLetter isUpperCaseLetter isLetter isDigit Remove similar ones from UTF8Helper and from Poincare::Tokenizer.
This commit is contained in:
committed by
Léa Saviot
parent
7665ad6fe0
commit
9b0b4e3096
@@ -414,7 +414,7 @@ bool PythonToolbox::handleEvent(Ion::Events::Event event) {
|
||||
}
|
||||
if (event.hasText() && strlen(event.text()) == 1 ) {
|
||||
char c = event.text()[0];
|
||||
if (UTF8Helper::CodePointIsLetter(c)) {
|
||||
if (CodePoint(c).isLetter()) {
|
||||
scrollToLetter(c);
|
||||
return true;
|
||||
}
|
||||
@@ -475,7 +475,7 @@ int PythonToolbox::maxNumberOfDisplayedRows() {
|
||||
}
|
||||
|
||||
void PythonToolbox::scrollToLetter(char letter) {
|
||||
assert(UTF8Helper::CodePointIsLetter(letter));
|
||||
assert(CodePoint(letter).isLetter());
|
||||
/* We look for a child MessageTree that starts with the wanted letter. If we
|
||||
* do not find one, we scroll to the first child MessageTree that starts with
|
||||
* a letter higher than the wanted letter. */
|
||||
@@ -487,7 +487,7 @@ void PythonToolbox::scrollToLetter(char letter) {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
if (index < 0 && l >= lowerLetter && UTF8Helper::CodePointIsLowerCaseLetter(l)) {
|
||||
if (index < 0 && l >= lowerLetter && CodePoint(l).isLowerCaseLetter()) {
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ bool Script::nameCompliant(const char * name) {
|
||||
* problems with case sensitivity. */
|
||||
UTF8Decoder decoder(name);
|
||||
CodePoint c = decoder.nextCodePoint();
|
||||
if (c == UCodePointNull || !(UTF8Helper::CodePointIsLowerCaseLetter(c) || c == '_' || c == '.')) {
|
||||
if (c == UCodePointNull || !(c.isLowerCaseLetter() || c == '_' || c == '.')) {
|
||||
/* The name cannot be empty. Its first letter must be in [a-z_] or the
|
||||
* extension dot. */
|
||||
return false;
|
||||
@@ -57,7 +57,7 @@ bool Script::nameCompliant(const char * name) {
|
||||
if (c == '.' && strcmp(decoder.stringPosition(), ScriptStore::k_scriptExtension) == 0) {
|
||||
return true;
|
||||
}
|
||||
if (!(UTF8Helper::CodePointIsLowerCaseLetter(c) || c == '_' || UTF8Helper::CodePointIsNumber(c))) {
|
||||
if (!(c.isLowerCaseLetter() || c == '_' || c.isDigit())) {
|
||||
return false;
|
||||
}
|
||||
c = decoder.nextCodePoint();
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#include "function.h"
|
||||
#include "poincare_helpers.h"
|
||||
#include "poincare/src/parsing/parser.h"
|
||||
#include <ion/unicode/utf8_helper.h>
|
||||
#include <ion/unicode/utf8_decoder.h>
|
||||
#include <string.h>
|
||||
#include <cmath>
|
||||
@@ -16,7 +15,7 @@ bool Function::BaseNameCompliant(const char * baseName, NameNotCompliantError *
|
||||
|
||||
UTF8Decoder decoder(baseName);
|
||||
CodePoint c = decoder.nextCodePoint();
|
||||
if (UTF8Helper::CodePointIsNumber(c)) {
|
||||
if (c.isDigit()) {
|
||||
// The name cannot start with a number
|
||||
if (error != nullptr) {
|
||||
*error = NameNotCompliantError::NameCannotStartWithNumber;
|
||||
@@ -26,11 +25,9 @@ bool Function::BaseNameCompliant(const char * baseName, NameNotCompliantError *
|
||||
|
||||
// The name should only have allowed characters
|
||||
while (c != UCodePointNull) {
|
||||
if (!(UTF8Helper::CodePointIsUpperCaseLetter(c)
|
||||
|| UTF8Helper::CodePointIsLowerCaseLetter(c)
|
||||
|| UTF8Helper::CodePointIsNumber(c))
|
||||
|| c == '_')
|
||||
{
|
||||
// FIXME '_' should be accepted but not as first character
|
||||
// TODO Factor this piece of code with similar one in the Parser
|
||||
if (!(c.isLetter() || c.isDigit()) || c == '_') {
|
||||
if (error != nullptr) {
|
||||
*error = NameNotCompliantError::CharacterNotAllowed;
|
||||
}
|
||||
|
||||
@@ -9,6 +9,18 @@ public:
|
||||
constexpr CodePoint(uint32_t c) : m_code(c) {}
|
||||
constexpr operator uint32_t() const { return m_code; }
|
||||
|
||||
bool isLowerCaseLetter() const {
|
||||
return 'a' <= m_code && m_code <= 'z';
|
||||
}
|
||||
bool isUpperCaseLetter() const {
|
||||
return 'A' <= m_code && m_code <= 'Z';
|
||||
}
|
||||
bool isLetter() const {
|
||||
return isLowerCaseLetter() || isUpperCaseLetter();
|
||||
}
|
||||
bool isDigit() const {
|
||||
return '0' <= m_code && m_code <= '9';
|
||||
}
|
||||
bool isCombining() const {
|
||||
return (m_code >= 0x300 && m_code <= 0x036F);
|
||||
}
|
||||
|
||||
@@ -69,10 +69,6 @@ const char * PerformAtCodePoints(
|
||||
|
||||
bool PreviousCodePointIs(const char * buffer, const char * location, CodePoint c);
|
||||
bool CodePointIs(const char * location, CodePoint c);
|
||||
bool CodePointIsLetter(CodePoint c);
|
||||
bool CodePointIsLowerCaseLetter(CodePoint c);
|
||||
bool CodePointIsUpperCaseLetter(CodePoint c);
|
||||
bool CodePointIsNumber(CodePoint c);
|
||||
|
||||
// Shift the buffer and return the number of bytes removed.
|
||||
int RemovePreviousGlyph(const char * text, char * location, CodePoint * c = nullptr);
|
||||
|
||||
@@ -267,22 +267,6 @@ bool CodePointIs(const char * location, CodePoint c) {
|
||||
return decoder.nextCodePoint() == c;
|
||||
}
|
||||
|
||||
bool CodePointIsLetter(CodePoint c) {
|
||||
return CodePointIsLowerCaseLetter(c) || CodePointIsUpperCaseLetter(c);
|
||||
}
|
||||
|
||||
bool CodePointIsLowerCaseLetter(CodePoint c) {
|
||||
return c >= 'a' && c <= 'z';
|
||||
}
|
||||
|
||||
bool CodePointIsUpperCaseLetter(CodePoint c) {
|
||||
return c >= 'A' && c <= 'Z';
|
||||
}
|
||||
|
||||
bool CodePointIsNumber(CodePoint c) {
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
int RemovePreviousGlyph(const char * text, char * location, CodePoint * c) {
|
||||
if (location <= text) {
|
||||
assert(location == text);
|
||||
|
||||
@@ -6,14 +6,6 @@
|
||||
|
||||
namespace Poincare {
|
||||
|
||||
static inline bool isLetter(const CodePoint c) {
|
||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
|
||||
}
|
||||
|
||||
static inline bool isDigit(const CodePoint c) {
|
||||
return '0' <= c && c <= '9';
|
||||
}
|
||||
|
||||
const CodePoint Tokenizer::nextCodePoint(PopTest popTest, CodePoint context, bool * testResult) {
|
||||
UTF8Decoder decoder(m_text);
|
||||
CodePoint c = decoder.nextCodePoint();
|
||||
@@ -55,11 +47,11 @@ size_t Tokenizer::popWhile(PopTest popTest, CodePoint context) {
|
||||
size_t Tokenizer::popIdentifier() {
|
||||
/* TODO handle combined code points? For now combining code points will
|
||||
* trigger a syntax error. */
|
||||
return popWhile([](CodePoint c, CodePoint context) { return isLetter(c) || isDigit(c) || c == context; }, '_');
|
||||
return popWhile([](CodePoint c, CodePoint context) { return c.isLetter() || c.isDigit() || c == context; }, '_');
|
||||
}
|
||||
|
||||
size_t Tokenizer::popDigits() {
|
||||
return popWhile([](CodePoint c, CodePoint context) { return isDigit(c); });
|
||||
return popWhile([](CodePoint c, CodePoint context) { return c.isDigit(); });
|
||||
}
|
||||
|
||||
size_t Tokenizer::popBinaryDigits() {
|
||||
@@ -67,7 +59,7 @@ size_t Tokenizer::popBinaryDigits() {
|
||||
}
|
||||
|
||||
size_t Tokenizer::popHexadecimalDigits() {
|
||||
return popWhile([](CodePoint c, CodePoint context) { return isDigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); });
|
||||
return popWhile([](CodePoint c, CodePoint context) { return c.isDigit() || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); });
|
||||
}
|
||||
|
||||
Token Tokenizer::popNumber() {
|
||||
@@ -141,7 +133,7 @@ Token Tokenizer::popToken() {
|
||||
/* If the next code point is the start of a number, we do not want to pop it
|
||||
* because popNumber needs this code point. */
|
||||
bool nextCodePointIsNeitherDotNorDigit = true;
|
||||
const CodePoint c = nextCodePoint([](CodePoint cp, CodePoint context) { return cp != context && !isDigit(cp); }, '.', &nextCodePointIsNeitherDotNorDigit);
|
||||
const CodePoint c = nextCodePoint([](CodePoint cp, CodePoint context) { return cp != context && !cp.isDigit(); }, '.', &nextCodePointIsNeitherDotNorDigit);
|
||||
|
||||
// According to c, recognize the Token::Type.
|
||||
if (!nextCodePointIsNeitherDotNorDigit) {
|
||||
@@ -152,7 +144,7 @@ Token Tokenizer::popToken() {
|
||||
result.setString(start + 1, popIdentifier());
|
||||
return result;
|
||||
}
|
||||
if (isLetter(c)) {
|
||||
if (c.isLetter()) {
|
||||
Token result(Token::Identifier);
|
||||
result.setString(start, 1 + popIdentifier()); // We already popped 1 code point
|
||||
return result;
|
||||
|
||||
Reference in New Issue
Block a user