Update the Unicode Data and Algorithms up to Unicode 6.3.0

* Mongolian and Phags-pa characters have been given a Joining_Type classification for contextual shaping. As a part of these additions, one Phags-pa character has the Joining_Type value of L (Left Joining), which no character had been assigned before. * The unassigned code points in the Currency Symbols block have been given the Bidi_Class property value ET and the Line_Break property value PR, to help implementations support new currency symbols, when they are encoded. * Hebrew letters and basic punctuation marks have been assigned the newly introduced Word_Break property values Hebrew_Letter, Single_Quote, and Double_Quote. * The Bidi_Class property has been extended with four new values for directional isolates. For more details, see http://www.unicode.org/versions/Unicode6.3.0/ Change-Id: Iad62d02edc58a8497898dcd6d6c70d5aece317ea Reviewed-by: Lars Knoll <lars.knoll@digia.com>
2014-01-12 21:14:25 +02:00 · 2014-01-12 21:14:25 +02:00 · edfce46a6c
commit edfce46a6c
parent a6046be428
6 changed files with 199 additions and 66 deletions
--- a/src/corelib/tools/qchar.cpp
+++ b/src/corelib/tools/qchar.cpp
@ -185,8 +185,9 @@ QT_BEGIN_NAMESPACE
    \value Unicode_6_0  Version 6.0
    \value Unicode_6_1  Version 6.1
    \value Unicode_6_2  Version 6.2
    \value Unicode_6_3  Version 6.3  Since Qt 5.3
    \value Unicode_Unassigned  The value is not assigned to any character
-                               in version 6.2 of Unicode.
+                               in version 6.3 of Unicode.
    \sa unicodeVersion(), currentUnicodeVersion()
 */
@ -408,14 +409,18 @@ QT_BEGIN_NAMESPACE
    \value DirEN
    \value DirES
    \value DirET
    \value DirFSI Since Qt 5.3
    \value DirL
    \value DirLRE
    \value DirLRI Since Qt 5.3
    \value DirLRO
    \value DirNSM
    \value DirON
    \value DirPDF
    \value DirPDI Since Qt 5.3
    \value DirR
    \value DirRLE
    \value DirRLI Since Qt 5.3
    \value DirRLO
    \value DirS
    \value DirWS
--- a/src/corelib/tools/qchar.h
+++ b/src/corelib/tools/qchar.h
@ -262,7 +262,8 @@ public:
    enum Direction
    {
        DirL, DirR, DirEN, DirES, DirET, DirAN, DirCS, DirB, DirS, DirWS, DirON,
-        DirLRE, DirLRO, DirAL, DirRLE, DirRLO, DirPDF, DirNSM, DirBN
+        DirLRE, DirLRO, DirAL, DirRLE, DirRLO, DirPDF, DirNSM, DirBN,
        DirLRI, DirRLI, DirFSI, DirPDI
    };
    enum Decomposition
@ -332,7 +333,8 @@ public:
        Unicode_5_2,
        Unicode_6_0,
        Unicode_6_1,
-        Unicode_6_2
+        Unicode_6_2,
        Unicode_6_3
    };
    // ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO
--- a/src/corelib/tools/qunicodetools.cpp
+++ b/src/corelib/tools/qunicodetools.cpp
@ -57,7 +57,7 @@ namespace QUnicodeTools {
 // -----------------------------------------------------------------------------------------------------
 //
 // The text boundaries determination algorithm.
-// See http://www.unicode.org/reports/tr29/tr29-21.html
+// See http://www.unicode.org/reports/tr29/tr29-23.html
 //
 // -----------------------------------------------------------------------------------------------------
@ -112,26 +112,30 @@ static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes
 namespace WB {
 enum Action {
-    NoBreak = 0,
+    NoBreak,
-    Break = 1,
+    Break,
-    Lookup = 2
+    Lookup,
    LookupW
 };
 static const uchar breakTable[QUnicodeTables::WordBreak_ExtendNumLet + 1][QUnicodeTables::WordBreak_ExtendNumLet + 1] = {
-//    Other      CR       LF    Newline   Extend    RI    Katakana ALetter MidNumLet MidLetter MidNum  Numeric  ExtendNumLet
+//    Other      CR       LF    Newline   Extend    RI    Katakana HLetter  ALetter  SQuote   DQuote  MidNumLet MidLetter MidNum  Numeric  ExtendNumLet
-    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // Other
+    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // Other
-    { Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // CR
+    { Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // CR
-    { Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // LF
+    { Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // LF
-    { Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // Newline
+    { Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // Newline
-    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // Extend
+    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // Extend
-    { Break  , Break  , Break  , Break  , NoBreak, NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // RegionalIndicator
+    { Break  , Break  , Break  , Break  , NoBreak, NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // RegionalIndicator
-    { Break  , Break  , Break  , Break  , NoBreak, Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , NoBreak }, // Katakana
+    { Break  , Break  , Break  , Break  , NoBreak, Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , NoBreak }, // Katakana
-    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , NoBreak, Lookup , Lookup , Break  , NoBreak, NoBreak }, // ALetter
+    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , NoBreak, NoBreak, LookupW, Lookup , LookupW, LookupW, Break  , NoBreak, NoBreak }, // HebrewLetter
-    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // MidNumLet
+    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , NoBreak, NoBreak, LookupW, Break  , LookupW, LookupW, Break  , NoBreak, NoBreak }, // ALetter
-    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // MidLetter
+    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // SingleQuote
-    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // MidNum
+    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // DoubleQuote
-    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , NoBreak, Lookup , Break  , Lookup , NoBreak, NoBreak }, // Numeric
+    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // MidNumLet
-    { Break  , Break  , Break  , Break  , NoBreak, Break  , NoBreak, NoBreak, Break  , Break  , Break  , NoBreak, NoBreak }, // ExtendNumLet
+    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // MidLetter
    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // MidNum
    { Break  , Break  , Break  , Break  , NoBreak, Break  , Break  , NoBreak, NoBreak, Lookup , Break  , Lookup , Break  , Lookup , NoBreak, NoBreak }, // Numeric
    { Break  , Break  , Break  , Break  , NoBreak, Break  , NoBreak, NoBreak, NoBreak, Break  , Break  , Break  , Break  , Break  , NoBreak, NoBreak }, // ExtendNumLet
 };
 } // namespace WB
@ -160,8 +164,8 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
        if (qt_initcharattributes_default_algorithm_only) {
            // as of Unicode 5.1, some punctuation marks were mapped to MidLetter and MidNumLet
            // which caused "hi.there" to be treated like if it were just a single word;
-            // by remapping those characters in the Unicode tables generator.
+            // we keep the pre-5.1 behavior by remapping these characters in the Unicode tables generator
-            // this code is needed to pass the coverage tests; remove once the issue is fixed.
+            // and this code is needed to pass the coverage tests; remove once the issue is fixed.
            if (ucs4 == 0x002E) // FULL STOP
                ncls = QUnicodeTables::WordBreak_MidNumLet;
            else if (ucs4 == 0x003A) // COLON
@ -170,8 +174,17 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
 #endif
        uchar action = WB::breakTable[cls][ncls];
-        if (Q_UNLIKELY(action == WB::Lookup)) {
+        switch (action) {
-            action = WB::Break;
+        case WB::Break:
            break;
        case WB::NoBreak:
            if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend)) {
                // WB4: X(Extend|Format)* -> X
                continue;
            }
            break;
        case WB::Lookup:
        case WB::LookupW:
            for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) {
                ucs4 = string[lookahead];
                if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
@ -184,20 +197,28 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
                prop = QUnicodeTables::properties(ucs4);
                QUnicodeTables::WordBreakClass tcls = (QUnicodeTables::WordBreakClass) prop->wordBreakClass;
-                if (Q_UNLIKELY(tcls == QUnicodeTables::WordBreak_Extend))
+
                if (Q_UNLIKELY(tcls == QUnicodeTables::WordBreak_Extend)) {
                    // WB4: X(Extend|Format)* -> X
                    continue;
-                if (Q_LIKELY(tcls == cls)) {
+                }
                if (Q_LIKELY(tcls == cls || (action == WB::LookupW && (tcls == QUnicodeTables::WordBreak_HebrewLetter
                                                                       || tcls == QUnicodeTables::WordBreak_ALetter)))) {
                    i = lookahead;
                    ncls = tcls;
                    action = WB::NoBreak;
                }
                break;
            }
-        } else if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend)) {
+            if (action != WB::NoBreak) {
-            // WB4: X(Extend|Format)* -> X
+                action = WB::Break;
-            if (Q_LIKELY(action != WB::Break))
+                if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_SingleQuote && cls == QUnicodeTables::WordBreak_HebrewLetter))
-                continue;
+                    action = WB::NoBreak; // WB7a
            }
            break;
        }
        cls = ncls;
        if (action == WB::Break) {
            attributes[pos].wordBreak = true;
@ -208,6 +229,7 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
                currentWordType = WordTypeHiraganaKatakana;
                attributes[pos].wordStart = true;
                break;
            case QUnicodeTables::WordBreak_HebrewLetter:
            case QUnicodeTables::WordBreak_ALetter:
            case QUnicodeTables::WordBreak_Numeric:
                currentWordType = WordTypeAlphaNumeric;
@ -327,7 +349,7 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes
 // -----------------------------------------------------------------------------------------------------
 //
 // The line breaking algorithm.
-// See http://www.unicode.org/reports/tr14/tr14-30.html
+// See http://www.unicode.org/reports/tr14/tr14-32.html
 //
 // -----------------------------------------------------------------------------------------------------
--- a/src/gui/text/qtextengine.cpp
+++ b/src/gui/text/qtextengine.cpp
@ -241,7 +241,8 @@ using namespace std;
 static const char *directions[] = {
    "DirL", "DirR", "DirEN", "DirES", "DirET", "DirAN", "DirCS", "DirB", "DirS", "DirWS", "DirON",
-    "DirLRE", "DirLRO", "DirAL", "DirRLE", "DirRLO", "DirPDF", "DirNSM", "DirBN"
+    "DirLRE", "DirLRO", "DirAL", "DirRLE", "DirRLO", "DirPDF", "DirNSM", "DirBN",
    "DirLRI", "DirRLI", "DirFSI", "DirPDI"
 };
 #endif
@ -2536,7 +2537,8 @@ static inline bool nextCharJoins(const QString &string, int pos)
        ++pos;
    if (pos == string.length())
        return false;
-    return string.at(pos).joining() != QChar::OtherJoining;
+    // ### U+A872 has joining type L
    return string.at(pos) == QChar(0xA872) || string.at(pos).joining() != QChar::OtherJoining;
 }
 static inline bool prevCharJoins(const QString &string, int pos)
@ -2551,13 +2553,9 @@ static inline bool prevCharJoins(const QString &string, int pos)
 static inline bool isRetainableControlCode(QChar c)
 {
-    return (c.unicode() == 0x202a       // LRE
+    return (c.unicode() >= 0x202a && c.unicode() <= 0x202e) // LRE, RLE, PDF, LRO, RLO
-            || c.unicode() == 0x202b    // LRE
+            || (c.unicode() >= 0x200e && c.unicode() <= 0x200f) // LRM, RLM
-            || c.unicode() == 0x202c    // PDF
+            || (c.unicode() >= 0x2066 && c.unicode() <= 0x2069); // LRM, RLM
            || c.unicode() == 0x202d    // LRO
            || c.unicode() == 0x202e    // RLO
            || c.unicode() == 0x200e    // LRM
            || c.unicode() == 0x200f);  // RLM
 }
 static QString stringMidRetainingBidiCC(const QString &string,
--- a/tests/auto/corelib/tools/qchar/tst_qchar.cpp
+++ b/tests/auto/corelib/tools/qchar/tst_qchar.cpp
@ -450,6 +450,18 @@ void tst_QChar::category()
 void tst_QChar::direction()
 {
    QVERIFY(QChar::direction(0x200E) == QChar::DirL);
    QVERIFY(QChar::direction(0x200F) == QChar::DirR);
    QVERIFY(QChar::direction(0x202A) == QChar::DirLRE);
    QVERIFY(QChar::direction(0x202B) == QChar::DirRLE);
    QVERIFY(QChar::direction(0x202C) == QChar::DirPDF);
    QVERIFY(QChar::direction(0x202D) == QChar::DirLRO);
    QVERIFY(QChar::direction(0x202E) == QChar::DirRLO);
    QVERIFY(QChar::direction(0x2066) == QChar::DirLRI);
    QVERIFY(QChar::direction(0x2067) == QChar::DirRLI);
    QVERIFY(QChar::direction(0x2068) == QChar::DirFSI);
    QVERIFY(QChar::direction(0x2069) == QChar::DirPDI);
    QVERIFY(QChar('a').direction() == QChar::DirL);
    QVERIFY(QChar('0').direction() == QChar::DirEN);
    QVERIFY(QChar((ushort)0x627).direction() == QChar::DirAL);
@ -492,6 +504,9 @@ void tst_QChar::joining()
    QVERIFY(QChar::joining(0xf0000u) == QChar::OtherJoining);
    QVERIFY(QChar::joining(0xE0030u) == QChar::OtherJoining);
    QVERIFY(QChar::joining(0x2FA17u) == QChar::OtherJoining);
    // ### U+A872 has joining type L
    QVERIFY(QChar::joining((uint)0xA872) == QChar::OtherJoining);
 }
 void tst_QChar::combiningClass()
@ -605,6 +620,11 @@ void tst_QChar::unicodeVersion()
    QVERIFY(QChar::unicodeVersion((uint)0x20ba) == QChar::Unicode_6_2);
    QVERIFY(QChar::unicodeVersion((uint)0x20ba) == QChar::Unicode_6_2);
    QVERIFY(QChar(0x061c).unicodeVersion() == QChar::Unicode_6_3);
    QVERIFY(QChar::unicodeVersion((ushort)0x061c) == QChar::Unicode_6_3);
    QVERIFY(QChar::unicodeVersion((uint)0x061c) == QChar::Unicode_6_3);
    QVERIFY(QChar::unicodeVersion((uint)0x061c) == QChar::Unicode_6_3);
    QVERIFY(QChar(0x09ff).unicodeVersion() == QChar::Unicode_Unassigned);
    QVERIFY(QChar::unicodeVersion((ushort)0x09ff) == QChar::Unicode_Unassigned);
    QVERIFY(QChar::unicodeVersion((uint)0x09ff) == QChar::Unicode_Unassigned);
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@ -77,6 +77,7 @@ static void initAgeMap()
        { QChar::Unicode_6_0,   "6.0" },
        { QChar::Unicode_6_1,   "6.1" },
        { QChar::Unicode_6_2,   "6.2" },
        { QChar::Unicode_6_3,   "6.3" },
        { QChar::Unicode_Unassigned, 0 }
    };
    AgeMap *d = ageMap;
@ -176,34 +177,66 @@ static void initDecompositionMap()
 }
-static QHash<QByteArray, QChar::Direction> directionMap;
+enum Direction {
    DirL = QChar::DirL,
    DirR = QChar::DirR,
    DirEN = QChar::DirEN,
    DirES = QChar::DirES,
    DirET = QChar::DirET,
    DirAN = QChar::DirAN,
    DirCS = QChar::DirCS,
    DirB = QChar::DirB,
    DirS = QChar::DirS,
    DirWS = QChar::DirWS,
    DirON = QChar::DirON,
    DirLRE = QChar::DirLRE,
    DirLRO = QChar::DirLRO,
    DirAL = QChar::DirAL,
    DirRLE = QChar::DirRLE,
    DirRLO = QChar::DirRLO,
    DirPDF = QChar::DirPDF,
    DirNSM = QChar::DirNSM,
    DirBN = QChar::DirBN,
    DirLRI = QChar::DirLRI,
    DirRLI = QChar::DirRLI,
    DirFSI = QChar::DirFSI,
    DirPDI = QChar::DirPDI
    , Dir_Unassigned
 };
 static QHash<QByteArray, Direction> directionMap;
 static void initDirectionMap()
 {
    struct Dir {
-        QChar::Direction dir;
+        Direction dir;
        const char *name;
    } directions[] = {
-        { QChar::DirL, "L" },
+        { DirL, "L" },
-        { QChar::DirR, "R" },
+        { DirR, "R" },
-        { QChar::DirEN, "EN" },
+        { DirEN, "EN" },
-        { QChar::DirES, "ES" },
+        { DirES, "ES" },
-        { QChar::DirET, "ET" },
+        { DirET, "ET" },
-        { QChar::DirAN, "AN" },
+        { DirAN, "AN" },
-        { QChar::DirCS, "CS" },
+        { DirCS, "CS" },
-        { QChar::DirB, "B" },
+        { DirB, "B" },
-        { QChar::DirS, "S" },
+        { DirS, "S" },
-        { QChar::DirWS, "WS" },
+        { DirWS, "WS" },
-        { QChar::DirON, "ON" },
+        { DirON, "ON" },
-        { QChar::DirLRE, "LRE" },
+        { DirLRE, "LRE" },
-        { QChar::DirLRO, "LRO" },
+        { DirLRO, "LRO" },
-        { QChar::DirAL, "AL" },
+        { DirAL, "AL" },
-        { QChar::DirRLE, "RLE" },
+        { DirRLE, "RLE" },
-        { QChar::DirRLO, "RLO" },
+        { DirRLO, "RLO" },
-        { QChar::DirPDF, "PDF" },
+        { DirPDF, "PDF" },
-        { QChar::DirNSM, "NSM" },
+        { DirNSM, "NSM" },
-        { QChar::DirBN, "BN" },
+        { DirBN, "BN" },
-        { QChar::DirL, 0 }
+        { DirLRI, "LRI" },
        { DirRLI, "RLI" },
        { DirFSI, "FSI" },
        { DirPDI, "PDI" },
        { Dir_Unassigned, 0 }
    };
    Dir *d = directions;
    while (d->name) {
@ -323,7 +356,10 @@ static const char *word_break_class_string =
    "    WordBreak_Extend,\n"
    "    WordBreak_RegionalIndicator,\n"
    "    WordBreak_Katakana,\n"
    "    WordBreak_HebrewLetter,\n"
    "    WordBreak_ALetter,\n"
    "    WordBreak_SingleQuote,\n"
    "    WordBreak_DoubleQuote,\n"
    "    WordBreak_MidNumLet,\n"
    "    WordBreak_MidLetter,\n"
    "    WordBreak_MidNum,\n"
@ -339,7 +375,10 @@ enum WordBreakClass {
    WordBreak_Extend,
    WordBreak_RegionalIndicator,
    WordBreak_Katakana,
    WordBreak_HebrewLetter,
    WordBreak_ALetter,
    WordBreak_SingleQuote,
    WordBreak_DoubleQuote,
    WordBreak_MidNumLet,
    WordBreak_MidLetter,
    WordBreak_MidNum,
@ -365,7 +404,10 @@ static void initWordBreak()
        { WordBreak_Extend, "Format" },
        { WordBreak_RegionalIndicator, "Regional_Indicator" },
        { WordBreak_Katakana, "Katakana" },
        { WordBreak_HebrewLetter, "Hebrew_Letter" },
        { WordBreak_ALetter, "ALetter" },
        { WordBreak_SingleQuote, "Single_Quote" },
        { WordBreak_DoubleQuote, "Double_Quote" },
        { WordBreak_MidNumLet, "MidNumLet" },
        { WordBreak_MidLetter, "MidLetter" },
        { WordBreak_MidNum, "MidNum" },
@ -815,6 +857,31 @@ static int appendToSpecialCaseMap(const QList<int> &map)
    return pos;
 }
 static inline bool isDefaultIgnorable(uint ucs4)
 {
    // Default_Ignorable_Code_Point:
    //  Generated from
    //    Other_Default_Ignorable_Code_Point + Cf + Variation_Selector
    //    - White_Space - FFF9..FFFB (Annotation Characters)
    //    - 0600..0604, 06DD, 070F, 110BD (exceptional Cf characters that should be visible)
    if (ucs4 <= 0xff)
        return ucs4 == 0xad;
    return ucs4 == 0x034f
            || (ucs4 >= 0x115f && ucs4 <= 0x1160)
            || (ucs4 >= 0x17b4 && ucs4 <= 0x17b5)
            || (ucs4 >= 0x180b && ucs4 <= 0x180d)
            || (ucs4 >= 0x200b && ucs4 <= 0x200f)
            || (ucs4 >= 0x202a && ucs4 <= 0x202e)
            || (ucs4 >= 0x2060 && ucs4 <= 0x206f)
            || ucs4 == 0x3164
            || (ucs4 >= 0xfe00 && ucs4 <= 0xfe0f)
            || ucs4 == 0xfeff
            || ucs4 == 0xffa0
            || (ucs4 >= 0xfff0 && ucs4 <= 0xfff8)
            || (ucs4 >= 0x1d173 && ucs4 <= 0xe0fff && (ucs4 <= 0x1d17a || ucs4 >= 0xe0000));
 }
 struct UnicodeData {
    UnicodeData(int codepoint = 0) {
        p.category = QChar::Other_NotAssigned; // Cn
@ -842,6 +909,17 @@ struct UnicodeData {
            || (codepoint >= 0x1EF00 && codepoint <= 0x1EFFF)) {
            p.direction = QChar::DirR;
        }
        // The unassigned code points that default to ET are in the range:
        //     [U+20A0..U+20CF]
        else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
            p.direction = QChar::DirET;
        }
        // The unassigned code points that default to BN have one of the following properties:
        //     Default_Ignorable_Code_Point
        //     Noncharacter_Code_Point
        else if (QChar::isNonCharacter(codepoint) || isDefaultIgnorable(codepoint)) {
            p.direction = QChar::DirBN;
        }
        p.lineBreakClass = LineBreak_AL; // XX -> AL
        // LineBreak.txt
@ -858,6 +936,11 @@ struct UnicodeData {
            || (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) {
            p.lineBreakClass = LineBreak_ID;
        }
        // The unassigned code points that default to "PR" comprise a range in the following block:
        //     [U+20A0..U+20CF]
        else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
            p.lineBreakClass = LineBreak_PR;
        }
        mirroredChar = 0;
        decompositionType = QChar::NoDecomposition;
@ -1008,7 +1091,10 @@ static void readUnicodeData()
        else
            ++combiningClassUsage[data.p.combiningClass];
-        data.p.direction = directionMap.value(properties[UD_BidiCategory], data.p.direction);
+        Direction dir = directionMap.value(properties[UD_BidiCategory], Dir_Unassigned);
        if (dir == Dir_Unassigned)
            qFatal("unhandled direction value: %s", properties[UD_BidiCategory].constData());
        data.p.direction = QChar::Direction(dir);
        if (!properties[UD_UpperCase].isEmpty()) {
            int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
@ -1180,8 +1266,8 @@ static void readArabicShaping()
            qFatal("unassigned or unhandled joining value: %s", l[2].constData());
        if (joining == Joining_Left) {
-            // There are currently no characters of joining type Left_Joining defined in Unicode.
+            qWarning("ACHTUNG!!! joining type '%s' has been met for U+%X; the current implementation needs to be revised!",
-            qFatal("%x: joining type '%s' was met; the current implementation needs to be revised!", codepoint, l[2].constData());
+                     l[2].trimmed().constData(), codepoint);
        }
        UnicodeData &d = UnicodeData::valueRef(codepoint);