Update the Unicode Data and Algorithms up to Unicode 6.3.0
* Mongolian and Phags-pa characters have been given a Joining_Type classification for contextual shaping. As a part of these additions, one Phags-pa character has the Joining_Type value of L (Left Joining), which no character had been assigned before. * The unassigned code points in the Currency Symbols block have been given the Bidi_Class property value ET and the Line_Break property value PR, to help implementations support new currency symbols, when they are encoded. * Hebrew letters and basic punctuation marks have been assigned the newly introduced Word_Break property values Hebrew_Letter, Single_Quote, and Double_Quote. * The Bidi_Class property has been extended with four new values for directional isolates. For more details, see http://www.unicode.org/versions/Unicode6.3.0/ Change-Id: Iad62d02edc58a8497898dcd6d6c70d5aece317ea Reviewed-by: Lars Knoll <lars.knoll@digia.com>
This commit is contained in:
parent
a6046be428
commit
edfce46a6c
@ -185,8 +185,9 @@ QT_BEGIN_NAMESPACE
|
|||||||
\value Unicode_6_0 Version 6.0
|
\value Unicode_6_0 Version 6.0
|
||||||
\value Unicode_6_1 Version 6.1
|
\value Unicode_6_1 Version 6.1
|
||||||
\value Unicode_6_2 Version 6.2
|
\value Unicode_6_2 Version 6.2
|
||||||
|
\value Unicode_6_3 Version 6.3 Since Qt 5.3
|
||||||
\value Unicode_Unassigned The value is not assigned to any character
|
\value Unicode_Unassigned The value is not assigned to any character
|
||||||
in version 6.2 of Unicode.
|
in version 6.3 of Unicode.
|
||||||
|
|
||||||
\sa unicodeVersion(), currentUnicodeVersion()
|
\sa unicodeVersion(), currentUnicodeVersion()
|
||||||
*/
|
*/
|
||||||
@ -408,14 +409,18 @@ QT_BEGIN_NAMESPACE
|
|||||||
\value DirEN
|
\value DirEN
|
||||||
\value DirES
|
\value DirES
|
||||||
\value DirET
|
\value DirET
|
||||||
|
\value DirFSI Since Qt 5.3
|
||||||
\value DirL
|
\value DirL
|
||||||
\value DirLRE
|
\value DirLRE
|
||||||
|
\value DirLRI Since Qt 5.3
|
||||||
\value DirLRO
|
\value DirLRO
|
||||||
\value DirNSM
|
\value DirNSM
|
||||||
\value DirON
|
\value DirON
|
||||||
\value DirPDF
|
\value DirPDF
|
||||||
|
\value DirPDI Since Qt 5.3
|
||||||
\value DirR
|
\value DirR
|
||||||
\value DirRLE
|
\value DirRLE
|
||||||
|
\value DirRLI Since Qt 5.3
|
||||||
\value DirRLO
|
\value DirRLO
|
||||||
\value DirS
|
\value DirS
|
||||||
\value DirWS
|
\value DirWS
|
||||||
|
@ -262,7 +262,8 @@ public:
|
|||||||
enum Direction
|
enum Direction
|
||||||
{
|
{
|
||||||
DirL, DirR, DirEN, DirES, DirET, DirAN, DirCS, DirB, DirS, DirWS, DirON,
|
DirL, DirR, DirEN, DirES, DirET, DirAN, DirCS, DirB, DirS, DirWS, DirON,
|
||||||
DirLRE, DirLRO, DirAL, DirRLE, DirRLO, DirPDF, DirNSM, DirBN
|
DirLRE, DirLRO, DirAL, DirRLE, DirRLO, DirPDF, DirNSM, DirBN,
|
||||||
|
DirLRI, DirRLI, DirFSI, DirPDI
|
||||||
};
|
};
|
||||||
|
|
||||||
enum Decomposition
|
enum Decomposition
|
||||||
@ -332,7 +333,8 @@ public:
|
|||||||
Unicode_5_2,
|
Unicode_5_2,
|
||||||
Unicode_6_0,
|
Unicode_6_0,
|
||||||
Unicode_6_1,
|
Unicode_6_1,
|
||||||
Unicode_6_2
|
Unicode_6_2,
|
||||||
|
Unicode_6_3
|
||||||
};
|
};
|
||||||
// ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO
|
// ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ namespace QUnicodeTools {
|
|||||||
// -----------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// The text boundaries determination algorithm.
|
// The text boundaries determination algorithm.
|
||||||
// See http://www.unicode.org/reports/tr29/tr29-21.html
|
// See http://www.unicode.org/reports/tr29/tr29-23.html
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
@ -112,26 +112,30 @@ static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes
|
|||||||
namespace WB {
|
namespace WB {
|
||||||
|
|
||||||
enum Action {
|
enum Action {
|
||||||
NoBreak = 0,
|
NoBreak,
|
||||||
Break = 1,
|
Break,
|
||||||
Lookup = 2
|
Lookup,
|
||||||
|
LookupW
|
||||||
};
|
};
|
||||||
|
|
||||||
static const uchar breakTable[QUnicodeTables::WordBreak_ExtendNumLet + 1][QUnicodeTables::WordBreak_ExtendNumLet + 1] = {
|
static const uchar breakTable[QUnicodeTables::WordBreak_ExtendNumLet + 1][QUnicodeTables::WordBreak_ExtendNumLet + 1] = {
|
||||||
// Other CR LF Newline Extend RI Katakana ALetter MidNumLet MidLetter MidNum Numeric ExtendNumLet
|
// Other CR LF Newline Extend RI Katakana HLetter ALetter SQuote DQuote MidNumLet MidLetter MidNum Numeric ExtendNumLet
|
||||||
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // Other
|
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Other
|
||||||
{ Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // CR
|
{ Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // CR
|
||||||
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // LF
|
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // LF
|
||||||
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Newline
|
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Newline
|
||||||
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // Extend
|
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Extend
|
||||||
{ Break , Break , Break , Break , NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break }, // RegionalIndicator
|
{ Break , Break , Break , Break , NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // RegionalIndicator
|
||||||
{ Break , Break , Break , Break , NoBreak, Break , NoBreak, Break , Break , Break , Break , Break , NoBreak }, // Katakana
|
{ Break , Break , Break , Break , NoBreak, Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , NoBreak }, // Katakana
|
||||||
{ Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, Lookup , Lookup , Break , NoBreak, NoBreak }, // ALetter
|
{ Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Lookup , LookupW, LookupW, Break , NoBreak, NoBreak }, // HebrewLetter
|
||||||
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // MidNumLet
|
{ Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Break , LookupW, LookupW, Break , NoBreak, NoBreak }, // ALetter
|
||||||
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
|
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // SingleQuote
|
||||||
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
|
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // DoubleQuote
|
||||||
{ Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, Lookup , Break , Lookup , NoBreak, NoBreak }, // Numeric
|
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNumLet
|
||||||
{ Break , Break , Break , Break , NoBreak, Break , NoBreak, NoBreak, Break , Break , Break , NoBreak, NoBreak }, // ExtendNumLet
|
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
|
||||||
|
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
|
||||||
|
{ Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, NoBreak, Lookup , Break , Lookup , Break , Lookup , NoBreak, NoBreak }, // Numeric
|
||||||
|
{ Break , Break , Break , Break , NoBreak, Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , NoBreak, NoBreak }, // ExtendNumLet
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace WB
|
} // namespace WB
|
||||||
@ -160,8 +164,8 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
|
|||||||
if (qt_initcharattributes_default_algorithm_only) {
|
if (qt_initcharattributes_default_algorithm_only) {
|
||||||
// as of Unicode 5.1, some punctuation marks were mapped to MidLetter and MidNumLet
|
// as of Unicode 5.1, some punctuation marks were mapped to MidLetter and MidNumLet
|
||||||
// which caused "hi.there" to be treated like if it were just a single word;
|
// which caused "hi.there" to be treated like if it were just a single word;
|
||||||
// by remapping those characters in the Unicode tables generator.
|
// we keep the pre-5.1 behavior by remapping these characters in the Unicode tables generator
|
||||||
// this code is needed to pass the coverage tests; remove once the issue is fixed.
|
// and this code is needed to pass the coverage tests; remove once the issue is fixed.
|
||||||
if (ucs4 == 0x002E) // FULL STOP
|
if (ucs4 == 0x002E) // FULL STOP
|
||||||
ncls = QUnicodeTables::WordBreak_MidNumLet;
|
ncls = QUnicodeTables::WordBreak_MidNumLet;
|
||||||
else if (ucs4 == 0x003A) // COLON
|
else if (ucs4 == 0x003A) // COLON
|
||||||
@ -170,8 +174,17 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
uchar action = WB::breakTable[cls][ncls];
|
uchar action = WB::breakTable[cls][ncls];
|
||||||
if (Q_UNLIKELY(action == WB::Lookup)) {
|
switch (action) {
|
||||||
action = WB::Break;
|
case WB::Break:
|
||||||
|
break;
|
||||||
|
case WB::NoBreak:
|
||||||
|
if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend)) {
|
||||||
|
// WB4: X(Extend|Format)* -> X
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case WB::Lookup:
|
||||||
|
case WB::LookupW:
|
||||||
for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) {
|
for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) {
|
||||||
ucs4 = string[lookahead];
|
ucs4 = string[lookahead];
|
||||||
if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
|
if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
|
||||||
@ -184,20 +197,28 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
|
|||||||
|
|
||||||
prop = QUnicodeTables::properties(ucs4);
|
prop = QUnicodeTables::properties(ucs4);
|
||||||
QUnicodeTables::WordBreakClass tcls = (QUnicodeTables::WordBreakClass) prop->wordBreakClass;
|
QUnicodeTables::WordBreakClass tcls = (QUnicodeTables::WordBreakClass) prop->wordBreakClass;
|
||||||
if (Q_UNLIKELY(tcls == QUnicodeTables::WordBreak_Extend))
|
|
||||||
|
if (Q_UNLIKELY(tcls == QUnicodeTables::WordBreak_Extend)) {
|
||||||
|
// WB4: X(Extend|Format)* -> X
|
||||||
continue;
|
continue;
|
||||||
if (Q_LIKELY(tcls == cls)) {
|
}
|
||||||
|
|
||||||
|
if (Q_LIKELY(tcls == cls || (action == WB::LookupW && (tcls == QUnicodeTables::WordBreak_HebrewLetter
|
||||||
|
|| tcls == QUnicodeTables::WordBreak_ALetter)))) {
|
||||||
i = lookahead;
|
i = lookahead;
|
||||||
ncls = tcls;
|
ncls = tcls;
|
||||||
action = WB::NoBreak;
|
action = WB::NoBreak;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend)) {
|
if (action != WB::NoBreak) {
|
||||||
// WB4: X(Extend|Format)* -> X
|
action = WB::Break;
|
||||||
if (Q_LIKELY(action != WB::Break))
|
if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_SingleQuote && cls == QUnicodeTables::WordBreak_HebrewLetter))
|
||||||
continue;
|
action = WB::NoBreak; // WB7a
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
cls = ncls;
|
cls = ncls;
|
||||||
if (action == WB::Break) {
|
if (action == WB::Break) {
|
||||||
attributes[pos].wordBreak = true;
|
attributes[pos].wordBreak = true;
|
||||||
@ -208,6 +229,7 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
|
|||||||
currentWordType = WordTypeHiraganaKatakana;
|
currentWordType = WordTypeHiraganaKatakana;
|
||||||
attributes[pos].wordStart = true;
|
attributes[pos].wordStart = true;
|
||||||
break;
|
break;
|
||||||
|
case QUnicodeTables::WordBreak_HebrewLetter:
|
||||||
case QUnicodeTables::WordBreak_ALetter:
|
case QUnicodeTables::WordBreak_ALetter:
|
||||||
case QUnicodeTables::WordBreak_Numeric:
|
case QUnicodeTables::WordBreak_Numeric:
|
||||||
currentWordType = WordTypeAlphaNumeric;
|
currentWordType = WordTypeAlphaNumeric;
|
||||||
@ -327,7 +349,7 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes
|
|||||||
// -----------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// The line breaking algorithm.
|
// The line breaking algorithm.
|
||||||
// See http://www.unicode.org/reports/tr14/tr14-30.html
|
// See http://www.unicode.org/reports/tr14/tr14-32.html
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -241,7 +241,8 @@ using namespace std;
|
|||||||
|
|
||||||
static const char *directions[] = {
|
static const char *directions[] = {
|
||||||
"DirL", "DirR", "DirEN", "DirES", "DirET", "DirAN", "DirCS", "DirB", "DirS", "DirWS", "DirON",
|
"DirL", "DirR", "DirEN", "DirES", "DirET", "DirAN", "DirCS", "DirB", "DirS", "DirWS", "DirON",
|
||||||
"DirLRE", "DirLRO", "DirAL", "DirRLE", "DirRLO", "DirPDF", "DirNSM", "DirBN"
|
"DirLRE", "DirLRO", "DirAL", "DirRLE", "DirRLO", "DirPDF", "DirNSM", "DirBN",
|
||||||
|
"DirLRI", "DirRLI", "DirFSI", "DirPDI"
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@ -2536,7 +2537,8 @@ static inline bool nextCharJoins(const QString &string, int pos)
|
|||||||
++pos;
|
++pos;
|
||||||
if (pos == string.length())
|
if (pos == string.length())
|
||||||
return false;
|
return false;
|
||||||
return string.at(pos).joining() != QChar::OtherJoining;
|
// ### U+A872 has joining type L
|
||||||
|
return string.at(pos) == QChar(0xA872) || string.at(pos).joining() != QChar::OtherJoining;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool prevCharJoins(const QString &string, int pos)
|
static inline bool prevCharJoins(const QString &string, int pos)
|
||||||
@ -2551,13 +2553,9 @@ static inline bool prevCharJoins(const QString &string, int pos)
|
|||||||
|
|
||||||
static inline bool isRetainableControlCode(QChar c)
|
static inline bool isRetainableControlCode(QChar c)
|
||||||
{
|
{
|
||||||
return (c.unicode() == 0x202a // LRE
|
return (c.unicode() >= 0x202a && c.unicode() <= 0x202e) // LRE, RLE, PDF, LRO, RLO
|
||||||
|| c.unicode() == 0x202b // LRE
|
|| (c.unicode() >= 0x200e && c.unicode() <= 0x200f) // LRM, RLM
|
||||||
|| c.unicode() == 0x202c // PDF
|
|| (c.unicode() >= 0x2066 && c.unicode() <= 0x2069); // LRM, RLM
|
||||||
|| c.unicode() == 0x202d // LRO
|
|
||||||
|| c.unicode() == 0x202e // RLO
|
|
||||||
|| c.unicode() == 0x200e // LRM
|
|
||||||
|| c.unicode() == 0x200f); // RLM
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static QString stringMidRetainingBidiCC(const QString &string,
|
static QString stringMidRetainingBidiCC(const QString &string,
|
||||||
|
@ -450,6 +450,18 @@ void tst_QChar::category()
|
|||||||
|
|
||||||
void tst_QChar::direction()
|
void tst_QChar::direction()
|
||||||
{
|
{
|
||||||
|
QVERIFY(QChar::direction(0x200E) == QChar::DirL);
|
||||||
|
QVERIFY(QChar::direction(0x200F) == QChar::DirR);
|
||||||
|
QVERIFY(QChar::direction(0x202A) == QChar::DirLRE);
|
||||||
|
QVERIFY(QChar::direction(0x202B) == QChar::DirRLE);
|
||||||
|
QVERIFY(QChar::direction(0x202C) == QChar::DirPDF);
|
||||||
|
QVERIFY(QChar::direction(0x202D) == QChar::DirLRO);
|
||||||
|
QVERIFY(QChar::direction(0x202E) == QChar::DirRLO);
|
||||||
|
QVERIFY(QChar::direction(0x2066) == QChar::DirLRI);
|
||||||
|
QVERIFY(QChar::direction(0x2067) == QChar::DirRLI);
|
||||||
|
QVERIFY(QChar::direction(0x2068) == QChar::DirFSI);
|
||||||
|
QVERIFY(QChar::direction(0x2069) == QChar::DirPDI);
|
||||||
|
|
||||||
QVERIFY(QChar('a').direction() == QChar::DirL);
|
QVERIFY(QChar('a').direction() == QChar::DirL);
|
||||||
QVERIFY(QChar('0').direction() == QChar::DirEN);
|
QVERIFY(QChar('0').direction() == QChar::DirEN);
|
||||||
QVERIFY(QChar((ushort)0x627).direction() == QChar::DirAL);
|
QVERIFY(QChar((ushort)0x627).direction() == QChar::DirAL);
|
||||||
@ -492,6 +504,9 @@ void tst_QChar::joining()
|
|||||||
QVERIFY(QChar::joining(0xf0000u) == QChar::OtherJoining);
|
QVERIFY(QChar::joining(0xf0000u) == QChar::OtherJoining);
|
||||||
QVERIFY(QChar::joining(0xE0030u) == QChar::OtherJoining);
|
QVERIFY(QChar::joining(0xE0030u) == QChar::OtherJoining);
|
||||||
QVERIFY(QChar::joining(0x2FA17u) == QChar::OtherJoining);
|
QVERIFY(QChar::joining(0x2FA17u) == QChar::OtherJoining);
|
||||||
|
|
||||||
|
// ### U+A872 has joining type L
|
||||||
|
QVERIFY(QChar::joining((uint)0xA872) == QChar::OtherJoining);
|
||||||
}
|
}
|
||||||
|
|
||||||
void tst_QChar::combiningClass()
|
void tst_QChar::combiningClass()
|
||||||
@ -605,6 +620,11 @@ void tst_QChar::unicodeVersion()
|
|||||||
QVERIFY(QChar::unicodeVersion((uint)0x20ba) == QChar::Unicode_6_2);
|
QVERIFY(QChar::unicodeVersion((uint)0x20ba) == QChar::Unicode_6_2);
|
||||||
QVERIFY(QChar::unicodeVersion((uint)0x20ba) == QChar::Unicode_6_2);
|
QVERIFY(QChar::unicodeVersion((uint)0x20ba) == QChar::Unicode_6_2);
|
||||||
|
|
||||||
|
QVERIFY(QChar(0x061c).unicodeVersion() == QChar::Unicode_6_3);
|
||||||
|
QVERIFY(QChar::unicodeVersion((ushort)0x061c) == QChar::Unicode_6_3);
|
||||||
|
QVERIFY(QChar::unicodeVersion((uint)0x061c) == QChar::Unicode_6_3);
|
||||||
|
QVERIFY(QChar::unicodeVersion((uint)0x061c) == QChar::Unicode_6_3);
|
||||||
|
|
||||||
QVERIFY(QChar(0x09ff).unicodeVersion() == QChar::Unicode_Unassigned);
|
QVERIFY(QChar(0x09ff).unicodeVersion() == QChar::Unicode_Unassigned);
|
||||||
QVERIFY(QChar::unicodeVersion((ushort)0x09ff) == QChar::Unicode_Unassigned);
|
QVERIFY(QChar::unicodeVersion((ushort)0x09ff) == QChar::Unicode_Unassigned);
|
||||||
QVERIFY(QChar::unicodeVersion((uint)0x09ff) == QChar::Unicode_Unassigned);
|
QVERIFY(QChar::unicodeVersion((uint)0x09ff) == QChar::Unicode_Unassigned);
|
||||||
|
@ -77,6 +77,7 @@ static void initAgeMap()
|
|||||||
{ QChar::Unicode_6_0, "6.0" },
|
{ QChar::Unicode_6_0, "6.0" },
|
||||||
{ QChar::Unicode_6_1, "6.1" },
|
{ QChar::Unicode_6_1, "6.1" },
|
||||||
{ QChar::Unicode_6_2, "6.2" },
|
{ QChar::Unicode_6_2, "6.2" },
|
||||||
|
{ QChar::Unicode_6_3, "6.3" },
|
||||||
{ QChar::Unicode_Unassigned, 0 }
|
{ QChar::Unicode_Unassigned, 0 }
|
||||||
};
|
};
|
||||||
AgeMap *d = ageMap;
|
AgeMap *d = ageMap;
|
||||||
@ -176,34 +177,66 @@ static void initDecompositionMap()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static QHash<QByteArray, QChar::Direction> directionMap;
|
enum Direction {
|
||||||
|
DirL = QChar::DirL,
|
||||||
|
DirR = QChar::DirR,
|
||||||
|
DirEN = QChar::DirEN,
|
||||||
|
DirES = QChar::DirES,
|
||||||
|
DirET = QChar::DirET,
|
||||||
|
DirAN = QChar::DirAN,
|
||||||
|
DirCS = QChar::DirCS,
|
||||||
|
DirB = QChar::DirB,
|
||||||
|
DirS = QChar::DirS,
|
||||||
|
DirWS = QChar::DirWS,
|
||||||
|
DirON = QChar::DirON,
|
||||||
|
DirLRE = QChar::DirLRE,
|
||||||
|
DirLRO = QChar::DirLRO,
|
||||||
|
DirAL = QChar::DirAL,
|
||||||
|
DirRLE = QChar::DirRLE,
|
||||||
|
DirRLO = QChar::DirRLO,
|
||||||
|
DirPDF = QChar::DirPDF,
|
||||||
|
DirNSM = QChar::DirNSM,
|
||||||
|
DirBN = QChar::DirBN,
|
||||||
|
DirLRI = QChar::DirLRI,
|
||||||
|
DirRLI = QChar::DirRLI,
|
||||||
|
DirFSI = QChar::DirFSI,
|
||||||
|
DirPDI = QChar::DirPDI
|
||||||
|
|
||||||
|
, Dir_Unassigned
|
||||||
|
};
|
||||||
|
|
||||||
|
static QHash<QByteArray, Direction> directionMap;
|
||||||
|
|
||||||
static void initDirectionMap()
|
static void initDirectionMap()
|
||||||
{
|
{
|
||||||
struct Dir {
|
struct Dir {
|
||||||
QChar::Direction dir;
|
Direction dir;
|
||||||
const char *name;
|
const char *name;
|
||||||
} directions[] = {
|
} directions[] = {
|
||||||
{ QChar::DirL, "L" },
|
{ DirL, "L" },
|
||||||
{ QChar::DirR, "R" },
|
{ DirR, "R" },
|
||||||
{ QChar::DirEN, "EN" },
|
{ DirEN, "EN" },
|
||||||
{ QChar::DirES, "ES" },
|
{ DirES, "ES" },
|
||||||
{ QChar::DirET, "ET" },
|
{ DirET, "ET" },
|
||||||
{ QChar::DirAN, "AN" },
|
{ DirAN, "AN" },
|
||||||
{ QChar::DirCS, "CS" },
|
{ DirCS, "CS" },
|
||||||
{ QChar::DirB, "B" },
|
{ DirB, "B" },
|
||||||
{ QChar::DirS, "S" },
|
{ DirS, "S" },
|
||||||
{ QChar::DirWS, "WS" },
|
{ DirWS, "WS" },
|
||||||
{ QChar::DirON, "ON" },
|
{ DirON, "ON" },
|
||||||
{ QChar::DirLRE, "LRE" },
|
{ DirLRE, "LRE" },
|
||||||
{ QChar::DirLRO, "LRO" },
|
{ DirLRO, "LRO" },
|
||||||
{ QChar::DirAL, "AL" },
|
{ DirAL, "AL" },
|
||||||
{ QChar::DirRLE, "RLE" },
|
{ DirRLE, "RLE" },
|
||||||
{ QChar::DirRLO, "RLO" },
|
{ DirRLO, "RLO" },
|
||||||
{ QChar::DirPDF, "PDF" },
|
{ DirPDF, "PDF" },
|
||||||
{ QChar::DirNSM, "NSM" },
|
{ DirNSM, "NSM" },
|
||||||
{ QChar::DirBN, "BN" },
|
{ DirBN, "BN" },
|
||||||
{ QChar::DirL, 0 }
|
{ DirLRI, "LRI" },
|
||||||
|
{ DirRLI, "RLI" },
|
||||||
|
{ DirFSI, "FSI" },
|
||||||
|
{ DirPDI, "PDI" },
|
||||||
|
{ Dir_Unassigned, 0 }
|
||||||
};
|
};
|
||||||
Dir *d = directions;
|
Dir *d = directions;
|
||||||
while (d->name) {
|
while (d->name) {
|
||||||
@ -323,7 +356,10 @@ static const char *word_break_class_string =
|
|||||||
" WordBreak_Extend,\n"
|
" WordBreak_Extend,\n"
|
||||||
" WordBreak_RegionalIndicator,\n"
|
" WordBreak_RegionalIndicator,\n"
|
||||||
" WordBreak_Katakana,\n"
|
" WordBreak_Katakana,\n"
|
||||||
|
" WordBreak_HebrewLetter,\n"
|
||||||
" WordBreak_ALetter,\n"
|
" WordBreak_ALetter,\n"
|
||||||
|
" WordBreak_SingleQuote,\n"
|
||||||
|
" WordBreak_DoubleQuote,\n"
|
||||||
" WordBreak_MidNumLet,\n"
|
" WordBreak_MidNumLet,\n"
|
||||||
" WordBreak_MidLetter,\n"
|
" WordBreak_MidLetter,\n"
|
||||||
" WordBreak_MidNum,\n"
|
" WordBreak_MidNum,\n"
|
||||||
@ -339,7 +375,10 @@ enum WordBreakClass {
|
|||||||
WordBreak_Extend,
|
WordBreak_Extend,
|
||||||
WordBreak_RegionalIndicator,
|
WordBreak_RegionalIndicator,
|
||||||
WordBreak_Katakana,
|
WordBreak_Katakana,
|
||||||
|
WordBreak_HebrewLetter,
|
||||||
WordBreak_ALetter,
|
WordBreak_ALetter,
|
||||||
|
WordBreak_SingleQuote,
|
||||||
|
WordBreak_DoubleQuote,
|
||||||
WordBreak_MidNumLet,
|
WordBreak_MidNumLet,
|
||||||
WordBreak_MidLetter,
|
WordBreak_MidLetter,
|
||||||
WordBreak_MidNum,
|
WordBreak_MidNum,
|
||||||
@ -365,7 +404,10 @@ static void initWordBreak()
|
|||||||
{ WordBreak_Extend, "Format" },
|
{ WordBreak_Extend, "Format" },
|
||||||
{ WordBreak_RegionalIndicator, "Regional_Indicator" },
|
{ WordBreak_RegionalIndicator, "Regional_Indicator" },
|
||||||
{ WordBreak_Katakana, "Katakana" },
|
{ WordBreak_Katakana, "Katakana" },
|
||||||
|
{ WordBreak_HebrewLetter, "Hebrew_Letter" },
|
||||||
{ WordBreak_ALetter, "ALetter" },
|
{ WordBreak_ALetter, "ALetter" },
|
||||||
|
{ WordBreak_SingleQuote, "Single_Quote" },
|
||||||
|
{ WordBreak_DoubleQuote, "Double_Quote" },
|
||||||
{ WordBreak_MidNumLet, "MidNumLet" },
|
{ WordBreak_MidNumLet, "MidNumLet" },
|
||||||
{ WordBreak_MidLetter, "MidLetter" },
|
{ WordBreak_MidLetter, "MidLetter" },
|
||||||
{ WordBreak_MidNum, "MidNum" },
|
{ WordBreak_MidNum, "MidNum" },
|
||||||
@ -815,6 +857,31 @@ static int appendToSpecialCaseMap(const QList<int> &map)
|
|||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool isDefaultIgnorable(uint ucs4)
|
||||||
|
{
|
||||||
|
// Default_Ignorable_Code_Point:
|
||||||
|
// Generated from
|
||||||
|
// Other_Default_Ignorable_Code_Point + Cf + Variation_Selector
|
||||||
|
// - White_Space - FFF9..FFFB (Annotation Characters)
|
||||||
|
// - 0600..0604, 06DD, 070F, 110BD (exceptional Cf characters that should be visible)
|
||||||
|
if (ucs4 <= 0xff)
|
||||||
|
return ucs4 == 0xad;
|
||||||
|
|
||||||
|
return ucs4 == 0x034f
|
||||||
|
|| (ucs4 >= 0x115f && ucs4 <= 0x1160)
|
||||||
|
|| (ucs4 >= 0x17b4 && ucs4 <= 0x17b5)
|
||||||
|
|| (ucs4 >= 0x180b && ucs4 <= 0x180d)
|
||||||
|
|| (ucs4 >= 0x200b && ucs4 <= 0x200f)
|
||||||
|
|| (ucs4 >= 0x202a && ucs4 <= 0x202e)
|
||||||
|
|| (ucs4 >= 0x2060 && ucs4 <= 0x206f)
|
||||||
|
|| ucs4 == 0x3164
|
||||||
|
|| (ucs4 >= 0xfe00 && ucs4 <= 0xfe0f)
|
||||||
|
|| ucs4 == 0xfeff
|
||||||
|
|| ucs4 == 0xffa0
|
||||||
|
|| (ucs4 >= 0xfff0 && ucs4 <= 0xfff8)
|
||||||
|
|| (ucs4 >= 0x1d173 && ucs4 <= 0xe0fff && (ucs4 <= 0x1d17a || ucs4 >= 0xe0000));
|
||||||
|
}
|
||||||
|
|
||||||
struct UnicodeData {
|
struct UnicodeData {
|
||||||
UnicodeData(int codepoint = 0) {
|
UnicodeData(int codepoint = 0) {
|
||||||
p.category = QChar::Other_NotAssigned; // Cn
|
p.category = QChar::Other_NotAssigned; // Cn
|
||||||
@ -842,6 +909,17 @@ struct UnicodeData {
|
|||||||
|| (codepoint >= 0x1EF00 && codepoint <= 0x1EFFF)) {
|
|| (codepoint >= 0x1EF00 && codepoint <= 0x1EFFF)) {
|
||||||
p.direction = QChar::DirR;
|
p.direction = QChar::DirR;
|
||||||
}
|
}
|
||||||
|
// The unassigned code points that default to ET are in the range:
|
||||||
|
// [U+20A0..U+20CF]
|
||||||
|
else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
|
||||||
|
p.direction = QChar::DirET;
|
||||||
|
}
|
||||||
|
// The unassigned code points that default to BN have one of the following properties:
|
||||||
|
// Default_Ignorable_Code_Point
|
||||||
|
// Noncharacter_Code_Point
|
||||||
|
else if (QChar::isNonCharacter(codepoint) || isDefaultIgnorable(codepoint)) {
|
||||||
|
p.direction = QChar::DirBN;
|
||||||
|
}
|
||||||
|
|
||||||
p.lineBreakClass = LineBreak_AL; // XX -> AL
|
p.lineBreakClass = LineBreak_AL; // XX -> AL
|
||||||
// LineBreak.txt
|
// LineBreak.txt
|
||||||
@ -858,6 +936,11 @@ struct UnicodeData {
|
|||||||
|| (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) {
|
|| (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) {
|
||||||
p.lineBreakClass = LineBreak_ID;
|
p.lineBreakClass = LineBreak_ID;
|
||||||
}
|
}
|
||||||
|
// The unassigned code points that default to "PR" comprise a range in the following block:
|
||||||
|
// [U+20A0..U+20CF]
|
||||||
|
else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
|
||||||
|
p.lineBreakClass = LineBreak_PR;
|
||||||
|
}
|
||||||
|
|
||||||
mirroredChar = 0;
|
mirroredChar = 0;
|
||||||
decompositionType = QChar::NoDecomposition;
|
decompositionType = QChar::NoDecomposition;
|
||||||
@ -1008,7 +1091,10 @@ static void readUnicodeData()
|
|||||||
else
|
else
|
||||||
++combiningClassUsage[data.p.combiningClass];
|
++combiningClassUsage[data.p.combiningClass];
|
||||||
|
|
||||||
data.p.direction = directionMap.value(properties[UD_BidiCategory], data.p.direction);
|
Direction dir = directionMap.value(properties[UD_BidiCategory], Dir_Unassigned);
|
||||||
|
if (dir == Dir_Unassigned)
|
||||||
|
qFatal("unhandled direction value: %s", properties[UD_BidiCategory].constData());
|
||||||
|
data.p.direction = QChar::Direction(dir);
|
||||||
|
|
||||||
if (!properties[UD_UpperCase].isEmpty()) {
|
if (!properties[UD_UpperCase].isEmpty()) {
|
||||||
int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
|
int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
|
||||||
@ -1180,8 +1266,8 @@ static void readArabicShaping()
|
|||||||
qFatal("unassigned or unhandled joining value: %s", l[2].constData());
|
qFatal("unassigned or unhandled joining value: %s", l[2].constData());
|
||||||
|
|
||||||
if (joining == Joining_Left) {
|
if (joining == Joining_Left) {
|
||||||
// There are currently no characters of joining type Left_Joining defined in Unicode.
|
qWarning("ACHTUNG!!! joining type '%s' has been met for U+%X; the current implementation needs to be revised!",
|
||||||
qFatal("%x: joining type '%s' was met; the current implementation needs to be revised!", codepoint, l[2].constData());
|
l[2].trimmed().constData(), codepoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
UnicodeData &d = UnicodeData::valueRef(codepoint);
|
UnicodeData &d = UnicodeData::valueRef(codepoint);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user