diff --git a/apps/i18n.py b/apps/i18n.py index 93ec034c9..dd307b8ba 100644 --- a/apps/i18n.py +++ b/apps/i18n.py @@ -1,4 +1,6 @@ -#coding=utf-8 +# This script gather all .i18n files and aggregates them as a pair of .h/.cpp file +# In practice, it enforces a NFKD normalization +# It works with Python 2 and Python 3 import sys import re @@ -6,42 +8,8 @@ import unicodedata import argparse import io -ion_special_characters = { - u'Δ': "Ion::Charset::CapitalDelta", - u'Σ': "Ion::Charset::CapitalSigma", - u'λ': "Ion::Charset::SmallLambda", - u'μ': "Ion::Charset::SmallMu", - u'σ': "Ion::Charset::SmallSigma", - u'≤': "Ion::Charset::LessEqual", - u'≈': "Ion::Charset::AlmostEqual", - u'ø': "Ion::Charset::Empty", - u'•': "Ion::Charset::MiddleDot" -} - -def ion_char(i18n_letter): - if i18n_letter == '\'': - return "'\\\''" - if ord(i18n_letter) < 128: - return "'" + i18n_letter + "'" - if i18n_letter in ion_special_characters: - return ion_special_characters[i18n_letter] - normalized = unicodedata.normalize("NFD", i18n_letter).encode('ascii', 'ignore') - #sys.stderr.write("Warning: Normalizing unicode character \"" + i18n_letter + "\" -> \"" + normalized + "\"\n") - return "'" + normalized.decode() + "'" - def source_definition(i18n_string): - ion_characters = [] - i = 0 - while i < len(i18n_string): - if i18n_string[i] == '\\': - i = i+1 - newChar = "'\\"+i18n_string[i]+"'" - ion_characters.append(newChar) - else: - ion_characters.append(ion_char(i18n_string[i])) - i = i+1 - ion_characters.append("0") - return "{" + ", ".join(ion_characters) + "}" + return (u"\"" + unicodedata.normalize("NFKD", i18n_string) + u"\"").encode("utf-8") def split_line(line): match = re.match(r"^(\w+)\s*=\s*\"(.*)\"$", line) @@ -78,7 +46,7 @@ def parse_files(files): return {"messages": sorted(messages), "universal_messages": sorted(universal_messages), "data": data} def print_header(data, path, locales): - f = open(path, 'w') + f = open(path, "w") f.write("#ifndef APPS_I18N_H\n") f.write("#define APPS_I18N_H\n\n") f.write("// This file is auto-generated by i18n.py\n\n") @@ -114,7 +82,7 @@ def print_header(data, path, locales): f.close() def print_implementation(data, path, locales): - f = open(path, 'w') + f = open(path, "w") f.write("#include \"i18n.h\"\n") f.write("#include \n") f.write("#include \n\n"); @@ -141,7 +109,11 @@ def print_implementation(data, path, locales): if not message in data["data"][locale]: sys.stderr.write("Error: Undefined key \"" + message + "\" for locale \"" + locale + "\"\n") sys.exit(-1) - f.write("constexpr static char " + locale + message + "[] = " + data["data"][locale][message] + ";\n") + f.write("constexpr static char " + locale + message + "[] = ") + f = open(path, "ab") # Re-open the file as binary to output raw UTF-8 bytes + f.write(data["data"][locale][message]) + f = open(path, "a") # Re-open the file as text + f.write(";\n") f.write("\n") f.write("constexpr static const char * messages[%d][%d] = {\n" % (len(data["messages"]), len(locales))) for message in data["messages"]: @@ -152,6 +124,18 @@ def print_implementation(data, path, locales): f.write("};\n\n") # Write the translate method + for message in data["universal_messages"]: + f.write("constexpr static char universal" + message + "[] = ") + f = open(path, "ab") # Re-open the file as binary to output raw UTF-8 bytes + f.write(data["data"]["universal"][message]) + f = open(path, "a") # Re-open the file as text + f.write(";\n") + f.write("\n") + f.write("constexpr static const char * universalMessages[%d] = {\n" % len(data["universal_messages"])) + for message in data["universal_messages"]: + f.write(" universal" + message + ",\n") + f.write("};\n") + f.write("\n") f.write("const char * translate(Message m, Language l) {\n") f.write(" assert(m != Message::LocalizedMessageMarker);\n") f.write(" int localizedMessageOffset = (int)Message::LocalizedMessageMarker+1;\n")