Clean-up the Unicode tables generator code and the generated header

This fixes the blocks and memory consumption reports, the whitespace issues and makes the code a bit cleaner. Since I'm the only one who does change this code, such a no-op commit could not hurt anyone or even git blame ;) Change-Id: Ib069f925a3791c82e16c368c8392bcffbfd68c53 Reviewed-by: Lars Knoll <lars.knoll@nokia.com> Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
2012-06-17 04:20:59 +03:00 · 2012-06-17 04:20:59 +03:00 · c1329fba13
commit c1329fba13
parent 57ca02b1d2
2 changed files with 637 additions and 638 deletions
--- a/src/corelib/tools/qunicodetables_p.h
+++ b/src/corelib/tools/qunicodetables_p.h
@ -63,221 +63,217 @@ QT_BEGIN_NAMESPACE
 namespace QUnicodeTables {
-    struct Properties {
+struct Properties {
-        ushort category         : 8; /* 5 used */
+    ushort category            : 8; /* 5 used */
-        ushort direction        : 8; /* 5 used */
+    ushort direction           : 8; /* 5 used */
-        ushort combiningClass   : 8;
+    ushort combiningClass      : 8;
-        ushort joining          : 2;
+    ushort joining             : 2;
-        signed short digitValue : 6; /* 5 used */
+    signed short digitValue    : 6; /* 5 used */
-        signed short mirrorDiff    : 16;
+    signed short mirrorDiff    : 16;
-        signed short lowerCaseDiff : 16;
+    signed short lowerCaseDiff : 16;
-        signed short upperCaseDiff : 16;
+    signed short upperCaseDiff : 16;
-        signed short titleCaseDiff : 16;
+    signed short titleCaseDiff : 16;
-        signed short caseFoldDiff  : 16;
+    signed short caseFoldDiff  : 16;
-        ushort lowerCaseSpecial : 1;
+    ushort lowerCaseSpecial    : 1;
-        ushort upperCaseSpecial : 1;
+    ushort upperCaseSpecial    : 1;
-        ushort titleCaseSpecial : 1;
+    ushort titleCaseSpecial    : 1;
-        ushort caseFoldSpecial  : 1;
+    ushort caseFoldSpecial     : 1;
-        ushort unicodeVersion   : 4;
+    ushort unicodeVersion      : 4;
-        ushort graphemeBreak    : 8; /* 4 used */
+    ushort graphemeBreak       : 8; /* 4 used */
-        ushort wordBreak        : 8; /* 4 used */
+    ushort wordBreak           : 8; /* 4 used */
-        ushort sentenceBreak    : 8; /* 4 used */
+    ushort sentenceBreak       : 8; /* 4 used */
-        ushort line_break_class : 8; /* 6 used */
+    ushort line_break_class    : 8; /* 6 used */
-        ushort script           : 8; /* 5 used */
+    ushort script              : 8; /* 5 used */
-    };
+};
    Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
    Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
-    // See http://www.unicode.org/reports/tr24/tr24-5.html
+Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
-    enum Script {
+Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
        Common,
        Greek,
        Cyrillic,
        Armenian,
        Hebrew,
        Arabic,
        Syriac,
        Thaana,
        Devanagari,
        Bengali,
        Gurmukhi,
        Gujarati,
        Oriya,
        Tamil,
        Telugu,
        Kannada,
        Malayalam,
        Sinhala,
        Thai,
        Lao,
        Tibetan,
        Myanmar,
        Georgian,
        Hangul,
        Ogham,
        Runic,
        Khmer,
        Nko,
        Inherited,
        ScriptCount = Inherited,
        Latin = Common,
        Ethiopic = Common,
        Cherokee = Common,
        CanadianAboriginal = Common,
        Mongolian = Common,
        Hiragana = Common,
        Katakana = Common,
        Bopomofo = Common,
        Han = Common,
        Yi = Common,
        OldItalic = Common,
        Gothic = Common,
        Deseret = Common,
        Tagalog = Common,
        Hanunoo = Common,
        Buhid = Common,
        Tagbanwa = Common,
        Limbu = Common,
        TaiLe = Common,
        LinearB = Common,
        Ugaritic = Common,
        Shavian = Common,
        Osmanya = Common,
        Cypriot = Common,
        Braille = Common,
        Buginese = Common,
        Coptic = Common,
        NewTaiLue = Common,
        Glagolitic = Common,
        Tifinagh = Common,
        SylotiNagri = Common,
        OldPersian = Common,
        Kharoshthi = Common,
        Balinese = Common,
        Cuneiform = Common,
        Phoenician = Common,
        PhagsPa = Common,
        Sundanese = Common,
        Lepcha = Common,
        OlChiki = Common,
        Vai = Common,
        Saurashtra = Common,
        KayahLi = Common,
        Rejang = Common,
        Lycian = Common,
        Carian = Common,
        Lydian = Common,
        Cham = Common,
        TaiTham = Common,
        TaiViet = Common,
        Avestan = Common,
        EgyptianHieroglyphs = Common,
        Samaritan = Common,
        Lisu = Common,
        Bamum = Common,
        Javanese = Common,
        MeeteiMayek = Common,
        ImperialAramaic = Common,
        OldSouthArabian = Common,
        InscriptionalParthian = Common,
        InscriptionalPahlavi = Common,
        OldTurkic = Common,
        Kaithi = Common,
        Batak = Common,
        Brahmi = Common,
        Mandaic = Common,
        Chakma = Common,
        MeroiticCursive = Common,
        MeroiticHieroglyphs = Common,
        Miao = Common,
        Sharada = Common,
        SoraSompeng = Common,
        Takri = Common
    };
 // See http://www.unicode.org/reports/tr24/tr24-5.html
 enum Script {
    Common,
    Greek,
    Cyrillic,
    Armenian,
    Hebrew,
    Arabic,
    Syriac,
    Thaana,
    Devanagari,
    Bengali,
    Gurmukhi,
    Gujarati,
    Oriya,
    Tamil,
    Telugu,
    Kannada,
    Malayalam,
    Sinhala,
    Thai,
    Lao,
    Tibetan,
    Myanmar,
    Georgian,
    Hangul,
    Ogham,
    Runic,
    Khmer,
    Nko,
    Inherited,
    ScriptCount = Inherited,
    Latin = Common,
    Ethiopic = Common,
    Cherokee = Common,
    CanadianAboriginal = Common,
    Mongolian = Common,
    Hiragana = Common,
    Katakana = Common,
    Bopomofo = Common,
    Han = Common,
    Yi = Common,
    OldItalic = Common,
    Gothic = Common,
    Deseret = Common,
    Tagalog = Common,
    Hanunoo = Common,
    Buhid = Common,
    Tagbanwa = Common,
    Limbu = Common,
    TaiLe = Common,
    LinearB = Common,
    Ugaritic = Common,
    Shavian = Common,
    Osmanya = Common,
    Cypriot = Common,
    Braille = Common,
    Buginese = Common,
    Coptic = Common,
    NewTaiLue = Common,
    Glagolitic = Common,
    Tifinagh = Common,
    SylotiNagri = Common,
    OldPersian = Common,
    Kharoshthi = Common,
    Balinese = Common,
    Cuneiform = Common,
    Phoenician = Common,
    PhagsPa = Common,
    Sundanese = Common,
    Lepcha = Common,
    OlChiki = Common,
    Vai = Common,
    Saurashtra = Common,
    KayahLi = Common,
    Rejang = Common,
    Lycian = Common,
    Carian = Common,
    Lydian = Common,
    Cham = Common,
    TaiTham = Common,
    TaiViet = Common,
    Avestan = Common,
    EgyptianHieroglyphs = Common,
    Samaritan = Common,
    Lisu = Common,
    Bamum = Common,
    Javanese = Common,
    MeeteiMayek = Common,
    ImperialAramaic = Common,
    OldSouthArabian = Common,
    InscriptionalParthian = Common,
    InscriptionalPahlavi = Common,
    OldTurkic = Common,
    Kaithi = Common,
    Batak = Common,
    Brahmi = Common,
    Mandaic = Common,
    Chakma = Common,
    MeroiticCursive = Common,
    MeroiticHieroglyphs = Common,
    Miao = Common,
    Sharada = Common,
    SoraSompeng = Common,
    Takri = Common
 };
-    enum GraphemeBreak {
+enum GraphemeBreak {
-        GraphemeBreakOther,
+    GraphemeBreakOther,
-        GraphemeBreakCR,
+    GraphemeBreakCR,
-        GraphemeBreakLF,
+    GraphemeBreakLF,
-        GraphemeBreakControl,
+    GraphemeBreakControl,
-        GraphemeBreakExtend,
+    GraphemeBreakExtend,
-        GraphemeBreakPrepend,
+    GraphemeBreakPrepend,
-        GraphemeBreakSpacingMark,
+    GraphemeBreakSpacingMark,
-        GraphemeBreakL,
+    GraphemeBreakL,
-        GraphemeBreakV,
+    GraphemeBreakV,
-        GraphemeBreakT,
+    GraphemeBreakT,
-        GraphemeBreakLV,
+    GraphemeBreakLV,
-        GraphemeBreakLVT
+    GraphemeBreakLVT
-    };
+};
 enum WordBreak {
    WordBreakOther,
    WordBreakCR,
    WordBreakLF,
    WordBreakNewline,
    WordBreakFormat,
    WordBreakKatakana,
    WordBreakALetter,
    WordBreakMidNumLet,
    WordBreakMidLetter,
    WordBreakMidNum,
    WordBreakNumeric,
    WordBreakExtendNumLet
 };
-    enum WordBreak {
+enum SentenceBreak {
-        WordBreakOther,
+    SentenceBreakOther,
-        WordBreakCR,
+    SentenceBreakCR,
-        WordBreakLF,
+    SentenceBreakLF,
-        WordBreakNewline,
+    SentenceBreakSep,
-        WordBreakFormat,
+    SentenceBreakFormat,
-        WordBreakKatakana,
+    SentenceBreakSp,
-        WordBreakALetter,
+    SentenceBreakLower,
-        WordBreakMidNumLet,
+    SentenceBreakUpper,
-        WordBreakMidLetter,
+    SentenceBreakOLetter,
-        WordBreakMidNum,
+    SentenceBreakNumeric,
-        WordBreakNumeric,
+    SentenceBreakATerm,
-        WordBreakExtendNumLet
+    SentenceBreakSContinue,
-    };
+    SentenceBreakSTerm,
    SentenceBreakClose
 };
 // see http://www.unicode.org/reports/tr14/tr14-28.html
 // we don't use the XX and AI classes and map them to AL instead.
 enum LineBreakClass {
    LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,
    LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,
    LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,
    LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
    LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,
    LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_CB, LineBreak_SA,
    LineBreak_SG, LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK
 };
-    enum SentenceBreak {
+Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);
-        SentenceBreakOther,
+inline GraphemeBreak graphemeBreakClass(QChar ch)
-        SentenceBreakCR,
+{ return graphemeBreakClass(ch.unicode()); }
        SentenceBreakLF,
        SentenceBreakSep,
        SentenceBreakFormat,
        SentenceBreakSp,
        SentenceBreakLower,
        SentenceBreakUpper,
        SentenceBreakOLetter,
        SentenceBreakNumeric,
        SentenceBreakATerm,
        SentenceBreakSContinue,
        SentenceBreakSTerm,
        SentenceBreakClose
    };
 Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);
 inline WordBreak wordBreakClass(QChar ch)
 { return wordBreakClass(ch.unicode()); }
-    // see http://www.unicode.org/reports/tr14/tr14-28.html
+Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);
-    // we don't use the XX and AI classes and map them to AL instead.
+inline SentenceBreak sentenceBreakClass(QChar ch)
-    enum LineBreakClass {
+{ return sentenceBreakClass(ch.unicode()); }
        LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,
        LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,
        LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,
        LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
        LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,
        LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_CB, LineBreak_SA,
        LineBreak_SG, LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK
    };
 Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);
 inline LineBreakClass lineBreakClass(QChar ch)
 { return lineBreakClass(ch.unicode()); }
-    Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);
+Q_CORE_EXPORT Script QT_FASTCALL script(uint ucs4);
-    inline GraphemeBreak graphemeBreakClass(QChar ch)
+inline Script script(QChar ch)
-    { return graphemeBreakClass(ch.unicode()); }
+{ return script(ch.unicode()); }
    Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);
    inline WordBreak wordBreakClass(QChar ch)
    { return wordBreakClass(ch.unicode()); }
    Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);
    inline SentenceBreak sentenceBreakClass(QChar ch)
    { return sentenceBreakClass(ch.unicode()); }
    Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);
    inline LineBreakClass lineBreakClass(QChar ch)
    { return lineBreakClass(ch.unicode()); }
    Q_CORE_EXPORT Script QT_FASTCALL script(uint ucs4);
    inline Script script(QChar ch)
    { return script(ch.unicode()); }
 } // namespace QUnicodeTables
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp