[i18n] Encode strings as NFKD-normalized UTF-8 strings

This commit is contained in:
Romain Goyet
2018-10-31 10:37:41 +01:00
committed by Émilie Feral
parent 39e01f1d4c
commit 6fac2120fe

View File

@@ -1,4 +1,6 @@
#coding=utf-8
# This script gather all .i18n files and aggregates them as a pair of .h/.cpp file
# In practice, it enforces a NFKD normalization
# It works with Python 2 and Python 3
import sys
import re
@@ -6,42 +8,8 @@ import unicodedata
import argparse
import io
ion_special_characters = {
u'Δ': "Ion::Charset::CapitalDelta",
u'Σ': "Ion::Charset::CapitalSigma",
u'λ': "Ion::Charset::SmallLambda",
u'μ': "Ion::Charset::SmallMu",
u'σ': "Ion::Charset::SmallSigma",
u'': "Ion::Charset::LessEqual",
u'': "Ion::Charset::AlmostEqual",
u'ø': "Ion::Charset::Empty",
u'': "Ion::Charset::MiddleDot"
}
def ion_char(i18n_letter):
if i18n_letter == '\'':
return "'\\\''"
if ord(i18n_letter) < 128:
return "'" + i18n_letter + "'"
if i18n_letter in ion_special_characters:
return ion_special_characters[i18n_letter]
normalized = unicodedata.normalize("NFD", i18n_letter).encode('ascii', 'ignore')
#sys.stderr.write("Warning: Normalizing unicode character \"" + i18n_letter + "\" -> \"" + normalized + "\"\n")
return "'" + normalized.decode() + "'"
def source_definition(i18n_string):
ion_characters = []
i = 0
while i < len(i18n_string):
if i18n_string[i] == '\\':
i = i+1
newChar = "'\\"+i18n_string[i]+"'"
ion_characters.append(newChar)
else:
ion_characters.append(ion_char(i18n_string[i]))
i = i+1
ion_characters.append("0")
return "{" + ", ".join(ion_characters) + "}"
return (u"\"" + unicodedata.normalize("NFKD", i18n_string) + u"\"").encode("utf-8")
def split_line(line):
match = re.match(r"^(\w+)\s*=\s*\"(.*)\"$", line)
@@ -78,7 +46,7 @@ def parse_files(files):
return {"messages": sorted(messages), "universal_messages": sorted(universal_messages), "data": data}
def print_header(data, path, locales):
f = open(path, 'w')
f = open(path, "w")
f.write("#ifndef APPS_I18N_H\n")
f.write("#define APPS_I18N_H\n\n")
f.write("// This file is auto-generated by i18n.py\n\n")
@@ -114,7 +82,7 @@ def print_header(data, path, locales):
f.close()
def print_implementation(data, path, locales):
f = open(path, 'w')
f = open(path, "w")
f.write("#include \"i18n.h\"\n")
f.write("#include <apps/global_preferences.h>\n")
f.write("#include <assert.h>\n\n");
@@ -141,7 +109,11 @@ def print_implementation(data, path, locales):
if not message in data["data"][locale]:
sys.stderr.write("Error: Undefined key \"" + message + "\" for locale \"" + locale + "\"\n")
sys.exit(-1)
f.write("constexpr static char " + locale + message + "[] = " + data["data"][locale][message] + ";\n")
f.write("constexpr static char " + locale + message + "[] = ")
f = open(path, "ab") # Re-open the file as binary to output raw UTF-8 bytes
f.write(data["data"][locale][message])
f = open(path, "a") # Re-open the file as text
f.write(";\n")
f.write("\n")
f.write("constexpr static const char * messages[%d][%d] = {\n" % (len(data["messages"]), len(locales)))
for message in data["messages"]:
@@ -152,6 +124,18 @@ def print_implementation(data, path, locales):
f.write("};\n\n")
# Write the translate method
for message in data["universal_messages"]:
f.write("constexpr static char universal" + message + "[] = ")
f = open(path, "ab") # Re-open the file as binary to output raw UTF-8 bytes
f.write(data["data"]["universal"][message])
f = open(path, "a") # Re-open the file as text
f.write(";\n")
f.write("\n")
f.write("constexpr static const char * universalMessages[%d] = {\n" % len(data["universal_messages"]))
for message in data["universal_messages"]:
f.write(" universal" + message + ",\n")
f.write("};\n")
f.write("\n")
f.write("const char * translate(Message m, Language l) {\n")
f.write(" assert(m != Message::LocalizedMessageMarker);\n")
f.write(" int localizedMessageOffset = (int)Message::LocalizedMessageMarker+1;\n")