Update the Unicode Data and Algorithms up to Unicode 6.3.0

* Mongolian and Phags-pa characters have been given a Joining_Type
  classification for contextual shaping. As a part of these additions,
  one Phags-pa character has the Joining_Type value of L (Left Joining),
  which no character had been assigned before.
* The unassigned code points in the Currency Symbols block have been
  given the Bidi_Class property value ET and the Line_Break property
  value PR, to help implementations support new currency symbols,
  when they are encoded.
* Hebrew letters and basic punctuation marks have been assigned
  the newly introduced Word_Break property values Hebrew_Letter,
  Single_Quote, and Double_Quote.
* The Bidi_Class property has been extended with four new values
  for directional isolates.
For more details, see http://www.unicode.org/versions/Unicode6.3.0/

Change-Id: Iad62d02edc58a8497898dcd6d6c70d5aece317ea
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
This commit is contained in:
Konstantin Ritt 2014-01-12 21:14:25 +02:00 committed by The Qt Project
parent a6046be428
commit edfce46a6c
6 changed files with 199 additions and 66 deletions

View File

@ -185,8 +185,9 @@ QT_BEGIN_NAMESPACE
\value Unicode_6_0 Version 6.0
\value Unicode_6_1 Version 6.1
\value Unicode_6_2 Version 6.2
\value Unicode_6_3 Version 6.3 Since Qt 5.3
\value Unicode_Unassigned The value is not assigned to any character
in version 6.2 of Unicode.
in version 6.3 of Unicode.
\sa unicodeVersion(), currentUnicodeVersion()
*/
@ -408,14 +409,18 @@ QT_BEGIN_NAMESPACE
\value DirEN
\value DirES
\value DirET
\value DirFSI Since Qt 5.3
\value DirL
\value DirLRE
\value DirLRI Since Qt 5.3
\value DirLRO
\value DirNSM
\value DirON
\value DirPDF
\value DirPDI Since Qt 5.3
\value DirR
\value DirRLE
\value DirRLI Since Qt 5.3
\value DirRLO
\value DirS
\value DirWS

View File

@ -262,7 +262,8 @@ public:
enum Direction
{
DirL, DirR, DirEN, DirES, DirET, DirAN, DirCS, DirB, DirS, DirWS, DirON,
DirLRE, DirLRO, DirAL, DirRLE, DirRLO, DirPDF, DirNSM, DirBN
DirLRE, DirLRO, DirAL, DirRLE, DirRLO, DirPDF, DirNSM, DirBN,
DirLRI, DirRLI, DirFSI, DirPDI
};
enum Decomposition
@ -332,7 +333,8 @@ public:
Unicode_5_2,
Unicode_6_0,
Unicode_6_1,
Unicode_6_2
Unicode_6_2,
Unicode_6_3
};
// ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO

View File

@ -57,7 +57,7 @@ namespace QUnicodeTools {
// -----------------------------------------------------------------------------------------------------
//
// The text boundaries determination algorithm.
// See http://www.unicode.org/reports/tr29/tr29-21.html
// See http://www.unicode.org/reports/tr29/tr29-23.html
//
// -----------------------------------------------------------------------------------------------------
@ -112,26 +112,30 @@ static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes
namespace WB {
enum Action {
NoBreak = 0,
Break = 1,
Lookup = 2
NoBreak,
Break,
Lookup,
LookupW
};
static const uchar breakTable[QUnicodeTables::WordBreak_ExtendNumLet + 1][QUnicodeTables::WordBreak_ExtendNumLet + 1] = {
// Other CR LF Newline Extend RI Katakana ALetter MidNumLet MidLetter MidNum Numeric ExtendNumLet
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // Other
{ Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // CR
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // LF
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Newline
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // Extend
{ Break , Break , Break , Break , NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break }, // RegionalIndicator
{ Break , Break , Break , Break , NoBreak, Break , NoBreak, Break , Break , Break , Break , Break , NoBreak }, // Katakana
{ Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, Lookup , Lookup , Break , NoBreak, NoBreak }, // ALetter
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // MidNumLet
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
{ Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, Lookup , Break , Lookup , NoBreak, NoBreak }, // Numeric
{ Break , Break , Break , Break , NoBreak, Break , NoBreak, NoBreak, Break , Break , Break , NoBreak, NoBreak }, // ExtendNumLet
// Other CR LF Newline Extend RI Katakana HLetter ALetter SQuote DQuote MidNumLet MidLetter MidNum Numeric ExtendNumLet
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Other
{ Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // CR
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // LF
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Newline
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Extend
{ Break , Break , Break , Break , NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // RegionalIndicator
{ Break , Break , Break , Break , NoBreak, Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , NoBreak }, // Katakana
{ Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Lookup , LookupW, LookupW, Break , NoBreak, NoBreak }, // HebrewLetter
{ Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Break , LookupW, LookupW, Break , NoBreak, NoBreak }, // ALetter
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // SingleQuote
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // DoubleQuote
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNumLet
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
{ Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, NoBreak, Lookup , Break , Lookup , Break , Lookup , NoBreak, NoBreak }, // Numeric
{ Break , Break , Break , Break , NoBreak, Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , NoBreak, NoBreak }, // ExtendNumLet
};
} // namespace WB
@ -160,8 +164,8 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
if (qt_initcharattributes_default_algorithm_only) {
// as of Unicode 5.1, some punctuation marks were mapped to MidLetter and MidNumLet
// which caused "hi.there" to be treated like if it were just a single word;
// by remapping those characters in the Unicode tables generator.
// this code is needed to pass the coverage tests; remove once the issue is fixed.
// we keep the pre-5.1 behavior by remapping these characters in the Unicode tables generator
// and this code is needed to pass the coverage tests; remove once the issue is fixed.
if (ucs4 == 0x002E) // FULL STOP
ncls = QUnicodeTables::WordBreak_MidNumLet;
else if (ucs4 == 0x003A) // COLON
@ -170,8 +174,17 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
#endif
uchar action = WB::breakTable[cls][ncls];
if (Q_UNLIKELY(action == WB::Lookup)) {
action = WB::Break;
switch (action) {
case WB::Break:
break;
case WB::NoBreak:
if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend)) {
// WB4: X(Extend|Format)* -> X
continue;
}
break;
case WB::Lookup:
case WB::LookupW:
for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) {
ucs4 = string[lookahead];
if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
@ -184,20 +197,28 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
prop = QUnicodeTables::properties(ucs4);
QUnicodeTables::WordBreakClass tcls = (QUnicodeTables::WordBreakClass) prop->wordBreakClass;
if (Q_UNLIKELY(tcls == QUnicodeTables::WordBreak_Extend))
if (Q_UNLIKELY(tcls == QUnicodeTables::WordBreak_Extend)) {
// WB4: X(Extend|Format)* -> X
continue;
if (Q_LIKELY(tcls == cls)) {
}
if (Q_LIKELY(tcls == cls || (action == WB::LookupW && (tcls == QUnicodeTables::WordBreak_HebrewLetter
|| tcls == QUnicodeTables::WordBreak_ALetter)))) {
i = lookahead;
ncls = tcls;
action = WB::NoBreak;
}
break;
}
} else if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend)) {
// WB4: X(Extend|Format)* -> X
if (Q_LIKELY(action != WB::Break))
continue;
if (action != WB::NoBreak) {
action = WB::Break;
if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_SingleQuote && cls == QUnicodeTables::WordBreak_HebrewLetter))
action = WB::NoBreak; // WB7a
}
break;
}
cls = ncls;
if (action == WB::Break) {
attributes[pos].wordBreak = true;
@ -208,6 +229,7 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
currentWordType = WordTypeHiraganaKatakana;
attributes[pos].wordStart = true;
break;
case QUnicodeTables::WordBreak_HebrewLetter:
case QUnicodeTables::WordBreak_ALetter:
case QUnicodeTables::WordBreak_Numeric:
currentWordType = WordTypeAlphaNumeric;
@ -327,7 +349,7 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes
// -----------------------------------------------------------------------------------------------------
//
// The line breaking algorithm.
// See http://www.unicode.org/reports/tr14/tr14-30.html
// See http://www.unicode.org/reports/tr14/tr14-32.html
//
// -----------------------------------------------------------------------------------------------------

View File

@ -241,7 +241,8 @@ using namespace std;
static const char *directions[] = {
"DirL", "DirR", "DirEN", "DirES", "DirET", "DirAN", "DirCS", "DirB", "DirS", "DirWS", "DirON",
"DirLRE", "DirLRO", "DirAL", "DirRLE", "DirRLO", "DirPDF", "DirNSM", "DirBN"
"DirLRE", "DirLRO", "DirAL", "DirRLE", "DirRLO", "DirPDF", "DirNSM", "DirBN",
"DirLRI", "DirRLI", "DirFSI", "DirPDI"
};
#endif
@ -2536,7 +2537,8 @@ static inline bool nextCharJoins(const QString &string, int pos)
++pos;
if (pos == string.length())
return false;
return string.at(pos).joining() != QChar::OtherJoining;
// ### U+A872 has joining type L
return string.at(pos) == QChar(0xA872) || string.at(pos).joining() != QChar::OtherJoining;
}
static inline bool prevCharJoins(const QString &string, int pos)
@ -2551,13 +2553,9 @@ static inline bool prevCharJoins(const QString &string, int pos)
static inline bool isRetainableControlCode(QChar c)
{
return (c.unicode() == 0x202a // LRE
|| c.unicode() == 0x202b // LRE
|| c.unicode() == 0x202c // PDF
|| c.unicode() == 0x202d // LRO
|| c.unicode() == 0x202e // RLO
|| c.unicode() == 0x200e // LRM
|| c.unicode() == 0x200f); // RLM
return (c.unicode() >= 0x202a && c.unicode() <= 0x202e) // LRE, RLE, PDF, LRO, RLO
|| (c.unicode() >= 0x200e && c.unicode() <= 0x200f) // LRM, RLM
|| (c.unicode() >= 0x2066 && c.unicode() <= 0x2069); // LRM, RLM
}
static QString stringMidRetainingBidiCC(const QString &string,

View File

@ -450,6 +450,18 @@ void tst_QChar::category()
void tst_QChar::direction()
{
QVERIFY(QChar::direction(0x200E) == QChar::DirL);
QVERIFY(QChar::direction(0x200F) == QChar::DirR);
QVERIFY(QChar::direction(0x202A) == QChar::DirLRE);
QVERIFY(QChar::direction(0x202B) == QChar::DirRLE);
QVERIFY(QChar::direction(0x202C) == QChar::DirPDF);
QVERIFY(QChar::direction(0x202D) == QChar::DirLRO);
QVERIFY(QChar::direction(0x202E) == QChar::DirRLO);
QVERIFY(QChar::direction(0x2066) == QChar::DirLRI);
QVERIFY(QChar::direction(0x2067) == QChar::DirRLI);
QVERIFY(QChar::direction(0x2068) == QChar::DirFSI);
QVERIFY(QChar::direction(0x2069) == QChar::DirPDI);
QVERIFY(QChar('a').direction() == QChar::DirL);
QVERIFY(QChar('0').direction() == QChar::DirEN);
QVERIFY(QChar((ushort)0x627).direction() == QChar::DirAL);
@ -492,6 +504,9 @@ void tst_QChar::joining()
QVERIFY(QChar::joining(0xf0000u) == QChar::OtherJoining);
QVERIFY(QChar::joining(0xE0030u) == QChar::OtherJoining);
QVERIFY(QChar::joining(0x2FA17u) == QChar::OtherJoining);
// ### U+A872 has joining type L
QVERIFY(QChar::joining((uint)0xA872) == QChar::OtherJoining);
}
void tst_QChar::combiningClass()
@ -605,6 +620,11 @@ void tst_QChar::unicodeVersion()
QVERIFY(QChar::unicodeVersion((uint)0x20ba) == QChar::Unicode_6_2);
QVERIFY(QChar::unicodeVersion((uint)0x20ba) == QChar::Unicode_6_2);
QVERIFY(QChar(0x061c).unicodeVersion() == QChar::Unicode_6_3);
QVERIFY(QChar::unicodeVersion((ushort)0x061c) == QChar::Unicode_6_3);
QVERIFY(QChar::unicodeVersion((uint)0x061c) == QChar::Unicode_6_3);
QVERIFY(QChar::unicodeVersion((uint)0x061c) == QChar::Unicode_6_3);
QVERIFY(QChar(0x09ff).unicodeVersion() == QChar::Unicode_Unassigned);
QVERIFY(QChar::unicodeVersion((ushort)0x09ff) == QChar::Unicode_Unassigned);
QVERIFY(QChar::unicodeVersion((uint)0x09ff) == QChar::Unicode_Unassigned);

View File

@ -77,6 +77,7 @@ static void initAgeMap()
{ QChar::Unicode_6_0, "6.0" },
{ QChar::Unicode_6_1, "6.1" },
{ QChar::Unicode_6_2, "6.2" },
{ QChar::Unicode_6_3, "6.3" },
{ QChar::Unicode_Unassigned, 0 }
};
AgeMap *d = ageMap;
@ -176,34 +177,66 @@ static void initDecompositionMap()
}
static QHash<QByteArray, QChar::Direction> directionMap;
enum Direction {
DirL = QChar::DirL,
DirR = QChar::DirR,
DirEN = QChar::DirEN,
DirES = QChar::DirES,
DirET = QChar::DirET,
DirAN = QChar::DirAN,
DirCS = QChar::DirCS,
DirB = QChar::DirB,
DirS = QChar::DirS,
DirWS = QChar::DirWS,
DirON = QChar::DirON,
DirLRE = QChar::DirLRE,
DirLRO = QChar::DirLRO,
DirAL = QChar::DirAL,
DirRLE = QChar::DirRLE,
DirRLO = QChar::DirRLO,
DirPDF = QChar::DirPDF,
DirNSM = QChar::DirNSM,
DirBN = QChar::DirBN,
DirLRI = QChar::DirLRI,
DirRLI = QChar::DirRLI,
DirFSI = QChar::DirFSI,
DirPDI = QChar::DirPDI
, Dir_Unassigned
};
static QHash<QByteArray, Direction> directionMap;
static void initDirectionMap()
{
struct Dir {
QChar::Direction dir;
Direction dir;
const char *name;
} directions[] = {
{ QChar::DirL, "L" },
{ QChar::DirR, "R" },
{ QChar::DirEN, "EN" },
{ QChar::DirES, "ES" },
{ QChar::DirET, "ET" },
{ QChar::DirAN, "AN" },
{ QChar::DirCS, "CS" },
{ QChar::DirB, "B" },
{ QChar::DirS, "S" },
{ QChar::DirWS, "WS" },
{ QChar::DirON, "ON" },
{ QChar::DirLRE, "LRE" },
{ QChar::DirLRO, "LRO" },
{ QChar::DirAL, "AL" },
{ QChar::DirRLE, "RLE" },
{ QChar::DirRLO, "RLO" },
{ QChar::DirPDF, "PDF" },
{ QChar::DirNSM, "NSM" },
{ QChar::DirBN, "BN" },
{ QChar::DirL, 0 }
{ DirL, "L" },
{ DirR, "R" },
{ DirEN, "EN" },
{ DirES, "ES" },
{ DirET, "ET" },
{ DirAN, "AN" },
{ DirCS, "CS" },
{ DirB, "B" },
{ DirS, "S" },
{ DirWS, "WS" },
{ DirON, "ON" },
{ DirLRE, "LRE" },
{ DirLRO, "LRO" },
{ DirAL, "AL" },
{ DirRLE, "RLE" },
{ DirRLO, "RLO" },
{ DirPDF, "PDF" },
{ DirNSM, "NSM" },
{ DirBN, "BN" },
{ DirLRI, "LRI" },
{ DirRLI, "RLI" },
{ DirFSI, "FSI" },
{ DirPDI, "PDI" },
{ Dir_Unassigned, 0 }
};
Dir *d = directions;
while (d->name) {
@ -323,7 +356,10 @@ static const char *word_break_class_string =
" WordBreak_Extend,\n"
" WordBreak_RegionalIndicator,\n"
" WordBreak_Katakana,\n"
" WordBreak_HebrewLetter,\n"
" WordBreak_ALetter,\n"
" WordBreak_SingleQuote,\n"
" WordBreak_DoubleQuote,\n"
" WordBreak_MidNumLet,\n"
" WordBreak_MidLetter,\n"
" WordBreak_MidNum,\n"
@ -339,7 +375,10 @@ enum WordBreakClass {
WordBreak_Extend,
WordBreak_RegionalIndicator,
WordBreak_Katakana,
WordBreak_HebrewLetter,
WordBreak_ALetter,
WordBreak_SingleQuote,
WordBreak_DoubleQuote,
WordBreak_MidNumLet,
WordBreak_MidLetter,
WordBreak_MidNum,
@ -365,7 +404,10 @@ static void initWordBreak()
{ WordBreak_Extend, "Format" },
{ WordBreak_RegionalIndicator, "Regional_Indicator" },
{ WordBreak_Katakana, "Katakana" },
{ WordBreak_HebrewLetter, "Hebrew_Letter" },
{ WordBreak_ALetter, "ALetter" },
{ WordBreak_SingleQuote, "Single_Quote" },
{ WordBreak_DoubleQuote, "Double_Quote" },
{ WordBreak_MidNumLet, "MidNumLet" },
{ WordBreak_MidLetter, "MidLetter" },
{ WordBreak_MidNum, "MidNum" },
@ -815,6 +857,31 @@ static int appendToSpecialCaseMap(const QList<int> &map)
return pos;
}
static inline bool isDefaultIgnorable(uint ucs4)
{
// Default_Ignorable_Code_Point:
// Generated from
// Other_Default_Ignorable_Code_Point + Cf + Variation_Selector
// - White_Space - FFF9..FFFB (Annotation Characters)
// - 0600..0604, 06DD, 070F, 110BD (exceptional Cf characters that should be visible)
if (ucs4 <= 0xff)
return ucs4 == 0xad;
return ucs4 == 0x034f
|| (ucs4 >= 0x115f && ucs4 <= 0x1160)
|| (ucs4 >= 0x17b4 && ucs4 <= 0x17b5)
|| (ucs4 >= 0x180b && ucs4 <= 0x180d)
|| (ucs4 >= 0x200b && ucs4 <= 0x200f)
|| (ucs4 >= 0x202a && ucs4 <= 0x202e)
|| (ucs4 >= 0x2060 && ucs4 <= 0x206f)
|| ucs4 == 0x3164
|| (ucs4 >= 0xfe00 && ucs4 <= 0xfe0f)
|| ucs4 == 0xfeff
|| ucs4 == 0xffa0
|| (ucs4 >= 0xfff0 && ucs4 <= 0xfff8)
|| (ucs4 >= 0x1d173 && ucs4 <= 0xe0fff && (ucs4 <= 0x1d17a || ucs4 >= 0xe0000));
}
struct UnicodeData {
UnicodeData(int codepoint = 0) {
p.category = QChar::Other_NotAssigned; // Cn
@ -842,6 +909,17 @@ struct UnicodeData {
|| (codepoint >= 0x1EF00 && codepoint <= 0x1EFFF)) {
p.direction = QChar::DirR;
}
// The unassigned code points that default to ET are in the range:
// [U+20A0..U+20CF]
else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
p.direction = QChar::DirET;
}
// The unassigned code points that default to BN have one of the following properties:
// Default_Ignorable_Code_Point
// Noncharacter_Code_Point
else if (QChar::isNonCharacter(codepoint) || isDefaultIgnorable(codepoint)) {
p.direction = QChar::DirBN;
}
p.lineBreakClass = LineBreak_AL; // XX -> AL
// LineBreak.txt
@ -858,6 +936,11 @@ struct UnicodeData {
|| (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) {
p.lineBreakClass = LineBreak_ID;
}
// The unassigned code points that default to "PR" comprise a range in the following block:
// [U+20A0..U+20CF]
else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
p.lineBreakClass = LineBreak_PR;
}
mirroredChar = 0;
decompositionType = QChar::NoDecomposition;
@ -1008,7 +1091,10 @@ static void readUnicodeData()
else
++combiningClassUsage[data.p.combiningClass];
data.p.direction = directionMap.value(properties[UD_BidiCategory], data.p.direction);
Direction dir = directionMap.value(properties[UD_BidiCategory], Dir_Unassigned);
if (dir == Dir_Unassigned)
qFatal("unhandled direction value: %s", properties[UD_BidiCategory].constData());
data.p.direction = QChar::Direction(dir);
if (!properties[UD_UpperCase].isEmpty()) {
int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
@ -1180,8 +1266,8 @@ static void readArabicShaping()
qFatal("unassigned or unhandled joining value: %s", l[2].constData());
if (joining == Joining_Left) {
// There are currently no characters of joining type Left_Joining defined in Unicode.
qFatal("%x: joining type '%s' was met; the current implementation needs to be revised!", codepoint, l[2].constData());
qWarning("ACHTUNG!!! joining type '%s' has been met for U+%X; the current implementation needs to be revised!",
l[2].trimmed().constData(), codepoint);
}
UnicodeData &d = UnicodeData::valueRef(codepoint);