Unicode: Extract EastAsianWidth property
This property is needed to properly implement the line breaking algorithm from UAX #14. Task-number: QTBUG-97537 Pick-to: 6.3 Change-Id: Ia83cc553c9ef19fae33560721630849d2a95af84 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
parent
40b4ad1866
commit
838a7a01f3
File diff suppressed because it is too large
Load Diff
@ -43,7 +43,8 @@ struct Properties {
|
||||
ushort joining : 3;
|
||||
signed short digitValue : 5;
|
||||
signed short mirrorDiff : 16;
|
||||
ushort unicodeVersion : 8; /* 5 used */
|
||||
ushort unicodeVersion : 5; /* 5 used */
|
||||
ushort eastAsianWidth : 3; /* 3 used */
|
||||
ushort nfQuickCheck : 8;
|
||||
#ifdef Q_OS_WASM
|
||||
unsigned char : 0; //wasm 64 packing trick
|
||||
@ -68,6 +69,15 @@ Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char16_t ucs2) noexcept;
|
||||
|
||||
static_assert(sizeof(Properties) == 20);
|
||||
|
||||
enum class EastAsianWidth : unsigned int {
|
||||
A,
|
||||
F,
|
||||
H,
|
||||
N,
|
||||
Na,
|
||||
W,
|
||||
};
|
||||
|
||||
enum GraphemeBreakClass {
|
||||
GraphemeBreak_Any,
|
||||
GraphemeBreak_CR,
|
||||
@ -179,6 +189,10 @@ Q_CORE_EXPORT QStringView QT_FASTCALL idnaMapping(char32_t usc4) noexcept;
|
||||
inline QStringView idnaMapping(QChar ch) noexcept
|
||||
{ return idnaMapping(ch.unicode()); }
|
||||
|
||||
Q_CORE_EXPORT EastAsianWidth QT_FASTCALL eastAsianWidth(char32_t ucs4) noexcept;
|
||||
inline EastAsianWidth eastAsianWidth(QChar ch) noexcept
|
||||
{ return eastAsianWidth(ch.unicode()); }
|
||||
|
||||
} // namespace QUnicodeTables
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
2587
util/unicode/data/EastAsianWidth.txt
Normal file
2587
util/unicode/data/EastAsianWidth.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -59,6 +59,47 @@ static void initAgeMap()
|
||||
}
|
||||
}
|
||||
|
||||
static const char *east_asian_width_string =
|
||||
R"(enum class EastAsianWidth : unsigned int {
|
||||
A,
|
||||
F,
|
||||
H,
|
||||
N,
|
||||
Na,
|
||||
W,
|
||||
};
|
||||
|
||||
)";
|
||||
|
||||
enum class EastAsianWidth : unsigned int {
|
||||
A,
|
||||
F,
|
||||
H,
|
||||
N,
|
||||
Na,
|
||||
W,
|
||||
};
|
||||
|
||||
static QHash<QByteArray, EastAsianWidth> eastAsianWidthMap;
|
||||
|
||||
static void initEastAsianWidthMap()
|
||||
{
|
||||
constexpr struct W {
|
||||
EastAsianWidth width;
|
||||
const char *name;
|
||||
} widths[] = {
|
||||
{ EastAsianWidth::A, "A" },
|
||||
{ EastAsianWidth::F, "F" },
|
||||
{ EastAsianWidth::H, "H" },
|
||||
{ EastAsianWidth::N, "N" },
|
||||
{ EastAsianWidth::Na, "Na" },
|
||||
{ EastAsianWidth::W, "W" },
|
||||
};
|
||||
|
||||
for (auto &w : widths)
|
||||
eastAsianWidthMap.insert(w.name, w.width);
|
||||
}
|
||||
|
||||
static QHash<QByteArray, QChar::Category> categoryMap;
|
||||
|
||||
static void initCategoryMap()
|
||||
@ -849,7 +890,8 @@ static const char *property_string =
|
||||
" ushort joining : 3;\n"
|
||||
" signed short digitValue : 5;\n"
|
||||
" signed short mirrorDiff : 16;\n"
|
||||
" ushort unicodeVersion : 8; /* 5 used */\n"
|
||||
" ushort unicodeVersion : 5; /* 5 used */\n"
|
||||
" ushort eastAsianWidth : 3; /* 3 used */\n"
|
||||
" ushort nfQuickCheck : 8;\n" // could be narrowed
|
||||
"#ifdef Q_OS_WASM\n"
|
||||
" unsigned char : 0; //wasm 64 packing trick\n"
|
||||
@ -896,6 +938,10 @@ static const char *methods =
|
||||
"Q_CORE_EXPORT QStringView QT_FASTCALL idnaMapping(char32_t usc4) noexcept;\n"
|
||||
"inline QStringView idnaMapping(QChar ch) noexcept\n"
|
||||
"{ return idnaMapping(ch.unicode()); }\n"
|
||||
"\n"
|
||||
"Q_CORE_EXPORT EastAsianWidth QT_FASTCALL eastAsianWidth(char32_t ucs4) noexcept;\n"
|
||||
"inline EastAsianWidth eastAsianWidth(QChar ch) noexcept\n"
|
||||
"{ return eastAsianWidth(ch.unicode()); }\n"
|
||||
"\n";
|
||||
|
||||
static const int SizeOfPropertiesStruct = 20;
|
||||
@ -918,6 +964,7 @@ struct PropertyFlags {
|
||||
&& direction == o.direction
|
||||
&& joining == o.joining
|
||||
&& age == o.age
|
||||
&& eastAsianWidth == o.eastAsianWidth
|
||||
&& digitValue == o.digitValue
|
||||
&& mirrorDiff == o.mirrorDiff
|
||||
&& lowerCaseDiff == o.lowerCaseDiff
|
||||
@ -945,6 +992,8 @@ struct PropertyFlags {
|
||||
QChar::JoiningType joining : 3;
|
||||
// from DerivedAge.txt
|
||||
QChar::UnicodeVersion age : 5;
|
||||
// From EastAsianWidth.txt
|
||||
EastAsianWidth eastAsianWidth = EastAsianWidth::N;
|
||||
int digitValue = -1;
|
||||
|
||||
int mirrorDiff : 16;
|
||||
@ -1483,6 +1532,52 @@ static void readDerivedAge()
|
||||
}
|
||||
}
|
||||
|
||||
static void readEastAsianWidth()
|
||||
{
|
||||
qDebug("Reading EastAsianWidth.txt");
|
||||
|
||||
QFile f("data/EastAsianWidth.txt");
|
||||
if (!f.exists() || !f.open(QFile::ReadOnly))
|
||||
qFatal("Couldn't find or read EastAsianWidth.txt");
|
||||
|
||||
while (!f.atEnd()) {
|
||||
QByteArray line = f.readLine().trimmed();
|
||||
|
||||
int comment = line.indexOf('#');
|
||||
line = (comment < 0 ? line : line.left(comment)).simplified();
|
||||
|
||||
if (line.isEmpty())
|
||||
continue;
|
||||
|
||||
QList<QByteArray> fields = line.split(';');
|
||||
Q_ASSERT(fields.size() == 2);
|
||||
|
||||
// That would be split(".."), but that API does not exist.
|
||||
const QByteArray codePoints = fields[0].trimmed().replace("..", ".");
|
||||
QList<QByteArray> cl = codePoints.split('.');
|
||||
Q_ASSERT(cl.size() >= 1 && cl.size() <= 2);
|
||||
|
||||
const QByteArray widthString = fields[1].trimmed();
|
||||
if (!eastAsianWidthMap.contains(widthString)) {
|
||||
qFatal("Unhandled EastAsianWidth property value for %s: %s",
|
||||
qPrintable(codePoints), qPrintable(widthString));
|
||||
}
|
||||
auto width = eastAsianWidthMap.value(widthString);
|
||||
|
||||
bool ok;
|
||||
const int first = cl[0].toInt(&ok, 16);
|
||||
const int last = ok && cl.size() == 2 ? cl[1].toInt(&ok, 16) : first;
|
||||
Q_ASSERT(ok);
|
||||
|
||||
for (int codepoint = first; codepoint <= last; ++codepoint) {
|
||||
UnicodeData &ud = UnicodeData::valueRef(codepoint);
|
||||
// Ensure that ranges don't overlap.
|
||||
Q_ASSERT(ud.p.eastAsianWidth == EastAsianWidth::N);
|
||||
ud.p.eastAsianWidth = width;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void readDerivedNormalizationProps()
|
||||
{
|
||||
qDebug("Reading DerivedNormalizationProps.txt");
|
||||
@ -2896,9 +2991,12 @@ static QByteArray createPropertyInfo()
|
||||
// " signed short mirrorDiff : 16;\n"
|
||||
out += QByteArray::number( p.mirrorDiff );
|
||||
out += ", ";
|
||||
// " ushort unicodeVersion : 8; /* 5 used */\n"
|
||||
// " ushort unicodeVersion : 5; /* 5 used */\n"
|
||||
out += QByteArray::number( p.age );
|
||||
out += ", ";
|
||||
// " ushort eastAsianWidth : 3;" /* 3 used */\n"
|
||||
out += QByteArray::number( static_cast<unsigned int>(p.eastAsianWidth) );
|
||||
out += ", ";
|
||||
// " ushort nfQuickCheck : 8;\n"
|
||||
out += QByteArray::number( p.nfQuickCheck );
|
||||
out += ", ";
|
||||
@ -3003,6 +3101,11 @@ static QByteArray createPropertyInfo()
|
||||
"{\n"
|
||||
" return static_cast<IdnaStatus>(qGetProp(ucs4)->idnaStatus);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"Q_CORE_EXPORT EastAsianWidth QT_FASTCALL eastAsianWidth(char32_t ucs4) noexcept\n"
|
||||
"{\n"
|
||||
" return static_cast<EastAsianWidth>(qGetProp(ucs4)->eastAsianWidth);\n"
|
||||
"}\n"
|
||||
"\n";
|
||||
|
||||
return out;
|
||||
@ -3458,6 +3561,7 @@ QByteArray createCasingInfo()
|
||||
int main(int, char **)
|
||||
{
|
||||
initAgeMap();
|
||||
initEastAsianWidthMap();
|
||||
initCategoryMap();
|
||||
initDecompositionMap();
|
||||
initDirectionMap();
|
||||
@ -3473,6 +3577,7 @@ int main(int, char **)
|
||||
readBidiMirroring();
|
||||
readArabicShaping();
|
||||
readDerivedAge();
|
||||
readEastAsianWidth();
|
||||
readDerivedNormalizationProps();
|
||||
readSpecialCasing();
|
||||
readCaseFolding();
|
||||
@ -3548,6 +3653,7 @@ int main(int, char **)
|
||||
f.write("namespace QUnicodeTables {\n\n");
|
||||
f.write(property_string);
|
||||
f.write(sizeOfPropertiesStructCheck);
|
||||
f.write(east_asian_width_string);
|
||||
f.write(grapheme_break_class_string);
|
||||
f.write(word_break_class_string);
|
||||
f.write(sentence_break_class_string);
|
||||
|
Loading…
x
Reference in New Issue
Block a user