Clean-up the Unicode tables generator code and the generated header
This fixes the blocks and memory consumption reports, the whitespace issues and makes the code a bit cleaner. Since I'm the only one who does change this code, such a no-op commit could not hurt anyone or even git blame ;) Change-Id: Ib069f925a3791c82e16c368c8392bcffbfd68c53 Reviewed-by: Lars Knoll <lars.knoll@nokia.com> Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
This commit is contained in:
parent
57ca02b1d2
commit
c1329fba13
@ -63,221 +63,217 @@ QT_BEGIN_NAMESPACE
|
|||||||
|
|
||||||
namespace QUnicodeTables {
|
namespace QUnicodeTables {
|
||||||
|
|
||||||
struct Properties {
|
struct Properties {
|
||||||
ushort category : 8; /* 5 used */
|
ushort category : 8; /* 5 used */
|
||||||
ushort direction : 8; /* 5 used */
|
ushort direction : 8; /* 5 used */
|
||||||
ushort combiningClass : 8;
|
ushort combiningClass : 8;
|
||||||
ushort joining : 2;
|
ushort joining : 2;
|
||||||
signed short digitValue : 6; /* 5 used */
|
signed short digitValue : 6; /* 5 used */
|
||||||
signed short mirrorDiff : 16;
|
signed short mirrorDiff : 16;
|
||||||
signed short lowerCaseDiff : 16;
|
signed short lowerCaseDiff : 16;
|
||||||
signed short upperCaseDiff : 16;
|
signed short upperCaseDiff : 16;
|
||||||
signed short titleCaseDiff : 16;
|
signed short titleCaseDiff : 16;
|
||||||
signed short caseFoldDiff : 16;
|
signed short caseFoldDiff : 16;
|
||||||
ushort lowerCaseSpecial : 1;
|
ushort lowerCaseSpecial : 1;
|
||||||
ushort upperCaseSpecial : 1;
|
ushort upperCaseSpecial : 1;
|
||||||
ushort titleCaseSpecial : 1;
|
ushort titleCaseSpecial : 1;
|
||||||
ushort caseFoldSpecial : 1;
|
ushort caseFoldSpecial : 1;
|
||||||
ushort unicodeVersion : 4;
|
ushort unicodeVersion : 4;
|
||||||
ushort graphemeBreak : 8; /* 4 used */
|
ushort graphemeBreak : 8; /* 4 used */
|
||||||
ushort wordBreak : 8; /* 4 used */
|
ushort wordBreak : 8; /* 4 used */
|
||||||
ushort sentenceBreak : 8; /* 4 used */
|
ushort sentenceBreak : 8; /* 4 used */
|
||||||
ushort line_break_class : 8; /* 6 used */
|
ushort line_break_class : 8; /* 6 used */
|
||||||
ushort script : 8; /* 5 used */
|
ushort script : 8; /* 5 used */
|
||||||
};
|
};
|
||||||
Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
|
|
||||||
Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
|
|
||||||
|
|
||||||
// See http://www.unicode.org/reports/tr24/tr24-5.html
|
Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
|
||||||
enum Script {
|
Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
|
||||||
Common,
|
|
||||||
Greek,
|
|
||||||
Cyrillic,
|
|
||||||
Armenian,
|
|
||||||
Hebrew,
|
|
||||||
Arabic,
|
|
||||||
Syriac,
|
|
||||||
Thaana,
|
|
||||||
Devanagari,
|
|
||||||
Bengali,
|
|
||||||
Gurmukhi,
|
|
||||||
Gujarati,
|
|
||||||
Oriya,
|
|
||||||
Tamil,
|
|
||||||
Telugu,
|
|
||||||
Kannada,
|
|
||||||
Malayalam,
|
|
||||||
Sinhala,
|
|
||||||
Thai,
|
|
||||||
Lao,
|
|
||||||
Tibetan,
|
|
||||||
Myanmar,
|
|
||||||
Georgian,
|
|
||||||
Hangul,
|
|
||||||
Ogham,
|
|
||||||
Runic,
|
|
||||||
Khmer,
|
|
||||||
Nko,
|
|
||||||
Inherited,
|
|
||||||
ScriptCount = Inherited,
|
|
||||||
Latin = Common,
|
|
||||||
Ethiopic = Common,
|
|
||||||
Cherokee = Common,
|
|
||||||
CanadianAboriginal = Common,
|
|
||||||
Mongolian = Common,
|
|
||||||
Hiragana = Common,
|
|
||||||
Katakana = Common,
|
|
||||||
Bopomofo = Common,
|
|
||||||
Han = Common,
|
|
||||||
Yi = Common,
|
|
||||||
OldItalic = Common,
|
|
||||||
Gothic = Common,
|
|
||||||
Deseret = Common,
|
|
||||||
Tagalog = Common,
|
|
||||||
Hanunoo = Common,
|
|
||||||
Buhid = Common,
|
|
||||||
Tagbanwa = Common,
|
|
||||||
Limbu = Common,
|
|
||||||
TaiLe = Common,
|
|
||||||
LinearB = Common,
|
|
||||||
Ugaritic = Common,
|
|
||||||
Shavian = Common,
|
|
||||||
Osmanya = Common,
|
|
||||||
Cypriot = Common,
|
|
||||||
Braille = Common,
|
|
||||||
Buginese = Common,
|
|
||||||
Coptic = Common,
|
|
||||||
NewTaiLue = Common,
|
|
||||||
Glagolitic = Common,
|
|
||||||
Tifinagh = Common,
|
|
||||||
SylotiNagri = Common,
|
|
||||||
OldPersian = Common,
|
|
||||||
Kharoshthi = Common,
|
|
||||||
Balinese = Common,
|
|
||||||
Cuneiform = Common,
|
|
||||||
Phoenician = Common,
|
|
||||||
PhagsPa = Common,
|
|
||||||
Sundanese = Common,
|
|
||||||
Lepcha = Common,
|
|
||||||
OlChiki = Common,
|
|
||||||
Vai = Common,
|
|
||||||
Saurashtra = Common,
|
|
||||||
KayahLi = Common,
|
|
||||||
Rejang = Common,
|
|
||||||
Lycian = Common,
|
|
||||||
Carian = Common,
|
|
||||||
Lydian = Common,
|
|
||||||
Cham = Common,
|
|
||||||
TaiTham = Common,
|
|
||||||
TaiViet = Common,
|
|
||||||
Avestan = Common,
|
|
||||||
EgyptianHieroglyphs = Common,
|
|
||||||
Samaritan = Common,
|
|
||||||
Lisu = Common,
|
|
||||||
Bamum = Common,
|
|
||||||
Javanese = Common,
|
|
||||||
MeeteiMayek = Common,
|
|
||||||
ImperialAramaic = Common,
|
|
||||||
OldSouthArabian = Common,
|
|
||||||
InscriptionalParthian = Common,
|
|
||||||
InscriptionalPahlavi = Common,
|
|
||||||
OldTurkic = Common,
|
|
||||||
Kaithi = Common,
|
|
||||||
Batak = Common,
|
|
||||||
Brahmi = Common,
|
|
||||||
Mandaic = Common,
|
|
||||||
Chakma = Common,
|
|
||||||
MeroiticCursive = Common,
|
|
||||||
MeroiticHieroglyphs = Common,
|
|
||||||
Miao = Common,
|
|
||||||
Sharada = Common,
|
|
||||||
SoraSompeng = Common,
|
|
||||||
Takri = Common
|
|
||||||
};
|
|
||||||
|
|
||||||
|
// See http://www.unicode.org/reports/tr24/tr24-5.html
|
||||||
|
enum Script {
|
||||||
|
Common,
|
||||||
|
Greek,
|
||||||
|
Cyrillic,
|
||||||
|
Armenian,
|
||||||
|
Hebrew,
|
||||||
|
Arabic,
|
||||||
|
Syriac,
|
||||||
|
Thaana,
|
||||||
|
Devanagari,
|
||||||
|
Bengali,
|
||||||
|
Gurmukhi,
|
||||||
|
Gujarati,
|
||||||
|
Oriya,
|
||||||
|
Tamil,
|
||||||
|
Telugu,
|
||||||
|
Kannada,
|
||||||
|
Malayalam,
|
||||||
|
Sinhala,
|
||||||
|
Thai,
|
||||||
|
Lao,
|
||||||
|
Tibetan,
|
||||||
|
Myanmar,
|
||||||
|
Georgian,
|
||||||
|
Hangul,
|
||||||
|
Ogham,
|
||||||
|
Runic,
|
||||||
|
Khmer,
|
||||||
|
Nko,
|
||||||
|
Inherited,
|
||||||
|
ScriptCount = Inherited,
|
||||||
|
Latin = Common,
|
||||||
|
Ethiopic = Common,
|
||||||
|
Cherokee = Common,
|
||||||
|
CanadianAboriginal = Common,
|
||||||
|
Mongolian = Common,
|
||||||
|
Hiragana = Common,
|
||||||
|
Katakana = Common,
|
||||||
|
Bopomofo = Common,
|
||||||
|
Han = Common,
|
||||||
|
Yi = Common,
|
||||||
|
OldItalic = Common,
|
||||||
|
Gothic = Common,
|
||||||
|
Deseret = Common,
|
||||||
|
Tagalog = Common,
|
||||||
|
Hanunoo = Common,
|
||||||
|
Buhid = Common,
|
||||||
|
Tagbanwa = Common,
|
||||||
|
Limbu = Common,
|
||||||
|
TaiLe = Common,
|
||||||
|
LinearB = Common,
|
||||||
|
Ugaritic = Common,
|
||||||
|
Shavian = Common,
|
||||||
|
Osmanya = Common,
|
||||||
|
Cypriot = Common,
|
||||||
|
Braille = Common,
|
||||||
|
Buginese = Common,
|
||||||
|
Coptic = Common,
|
||||||
|
NewTaiLue = Common,
|
||||||
|
Glagolitic = Common,
|
||||||
|
Tifinagh = Common,
|
||||||
|
SylotiNagri = Common,
|
||||||
|
OldPersian = Common,
|
||||||
|
Kharoshthi = Common,
|
||||||
|
Balinese = Common,
|
||||||
|
Cuneiform = Common,
|
||||||
|
Phoenician = Common,
|
||||||
|
PhagsPa = Common,
|
||||||
|
Sundanese = Common,
|
||||||
|
Lepcha = Common,
|
||||||
|
OlChiki = Common,
|
||||||
|
Vai = Common,
|
||||||
|
Saurashtra = Common,
|
||||||
|
KayahLi = Common,
|
||||||
|
Rejang = Common,
|
||||||
|
Lycian = Common,
|
||||||
|
Carian = Common,
|
||||||
|
Lydian = Common,
|
||||||
|
Cham = Common,
|
||||||
|
TaiTham = Common,
|
||||||
|
TaiViet = Common,
|
||||||
|
Avestan = Common,
|
||||||
|
EgyptianHieroglyphs = Common,
|
||||||
|
Samaritan = Common,
|
||||||
|
Lisu = Common,
|
||||||
|
Bamum = Common,
|
||||||
|
Javanese = Common,
|
||||||
|
MeeteiMayek = Common,
|
||||||
|
ImperialAramaic = Common,
|
||||||
|
OldSouthArabian = Common,
|
||||||
|
InscriptionalParthian = Common,
|
||||||
|
InscriptionalPahlavi = Common,
|
||||||
|
OldTurkic = Common,
|
||||||
|
Kaithi = Common,
|
||||||
|
Batak = Common,
|
||||||
|
Brahmi = Common,
|
||||||
|
Mandaic = Common,
|
||||||
|
Chakma = Common,
|
||||||
|
MeroiticCursive = Common,
|
||||||
|
MeroiticHieroglyphs = Common,
|
||||||
|
Miao = Common,
|
||||||
|
Sharada = Common,
|
||||||
|
SoraSompeng = Common,
|
||||||
|
Takri = Common
|
||||||
|
};
|
||||||
|
|
||||||
enum GraphemeBreak {
|
enum GraphemeBreak {
|
||||||
GraphemeBreakOther,
|
GraphemeBreakOther,
|
||||||
GraphemeBreakCR,
|
GraphemeBreakCR,
|
||||||
GraphemeBreakLF,
|
GraphemeBreakLF,
|
||||||
GraphemeBreakControl,
|
GraphemeBreakControl,
|
||||||
GraphemeBreakExtend,
|
GraphemeBreakExtend,
|
||||||
GraphemeBreakPrepend,
|
GraphemeBreakPrepend,
|
||||||
GraphemeBreakSpacingMark,
|
GraphemeBreakSpacingMark,
|
||||||
GraphemeBreakL,
|
GraphemeBreakL,
|
||||||
GraphemeBreakV,
|
GraphemeBreakV,
|
||||||
GraphemeBreakT,
|
GraphemeBreakT,
|
||||||
GraphemeBreakLV,
|
GraphemeBreakLV,
|
||||||
GraphemeBreakLVT
|
GraphemeBreakLVT
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum WordBreak {
|
||||||
|
WordBreakOther,
|
||||||
|
WordBreakCR,
|
||||||
|
WordBreakLF,
|
||||||
|
WordBreakNewline,
|
||||||
|
WordBreakFormat,
|
||||||
|
WordBreakKatakana,
|
||||||
|
WordBreakALetter,
|
||||||
|
WordBreakMidNumLet,
|
||||||
|
WordBreakMidLetter,
|
||||||
|
WordBreakMidNum,
|
||||||
|
WordBreakNumeric,
|
||||||
|
WordBreakExtendNumLet
|
||||||
|
};
|
||||||
|
|
||||||
enum WordBreak {
|
enum SentenceBreak {
|
||||||
WordBreakOther,
|
SentenceBreakOther,
|
||||||
WordBreakCR,
|
SentenceBreakCR,
|
||||||
WordBreakLF,
|
SentenceBreakLF,
|
||||||
WordBreakNewline,
|
SentenceBreakSep,
|
||||||
WordBreakFormat,
|
SentenceBreakFormat,
|
||||||
WordBreakKatakana,
|
SentenceBreakSp,
|
||||||
WordBreakALetter,
|
SentenceBreakLower,
|
||||||
WordBreakMidNumLet,
|
SentenceBreakUpper,
|
||||||
WordBreakMidLetter,
|
SentenceBreakOLetter,
|
||||||
WordBreakMidNum,
|
SentenceBreakNumeric,
|
||||||
WordBreakNumeric,
|
SentenceBreakATerm,
|
||||||
WordBreakExtendNumLet
|
SentenceBreakSContinue,
|
||||||
};
|
SentenceBreakSTerm,
|
||||||
|
SentenceBreakClose
|
||||||
|
};
|
||||||
|
|
||||||
|
// see http://www.unicode.org/reports/tr14/tr14-28.html
|
||||||
|
// we don't use the XX and AI classes and map them to AL instead.
|
||||||
|
enum LineBreakClass {
|
||||||
|
LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,
|
||||||
|
LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,
|
||||||
|
LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,
|
||||||
|
LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
|
||||||
|
LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,
|
||||||
|
LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_CB, LineBreak_SA,
|
||||||
|
LineBreak_SG, LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK
|
||||||
|
};
|
||||||
|
|
||||||
enum SentenceBreak {
|
Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);
|
||||||
SentenceBreakOther,
|
inline GraphemeBreak graphemeBreakClass(QChar ch)
|
||||||
SentenceBreakCR,
|
{ return graphemeBreakClass(ch.unicode()); }
|
||||||
SentenceBreakLF,
|
|
||||||
SentenceBreakSep,
|
|
||||||
SentenceBreakFormat,
|
|
||||||
SentenceBreakSp,
|
|
||||||
SentenceBreakLower,
|
|
||||||
SentenceBreakUpper,
|
|
||||||
SentenceBreakOLetter,
|
|
||||||
SentenceBreakNumeric,
|
|
||||||
SentenceBreakATerm,
|
|
||||||
SentenceBreakSContinue,
|
|
||||||
SentenceBreakSTerm,
|
|
||||||
SentenceBreakClose
|
|
||||||
};
|
|
||||||
|
|
||||||
|
Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);
|
||||||
|
inline WordBreak wordBreakClass(QChar ch)
|
||||||
|
{ return wordBreakClass(ch.unicode()); }
|
||||||
|
|
||||||
// see http://www.unicode.org/reports/tr14/tr14-28.html
|
Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);
|
||||||
// we don't use the XX and AI classes and map them to AL instead.
|
inline SentenceBreak sentenceBreakClass(QChar ch)
|
||||||
enum LineBreakClass {
|
{ return sentenceBreakClass(ch.unicode()); }
|
||||||
LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,
|
|
||||||
LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,
|
|
||||||
LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,
|
|
||||||
LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
|
|
||||||
LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,
|
|
||||||
LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_CB, LineBreak_SA,
|
|
||||||
LineBreak_SG, LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK
|
|
||||||
};
|
|
||||||
|
|
||||||
|
Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);
|
||||||
|
inline LineBreakClass lineBreakClass(QChar ch)
|
||||||
|
{ return lineBreakClass(ch.unicode()); }
|
||||||
|
|
||||||
Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);
|
Q_CORE_EXPORT Script QT_FASTCALL script(uint ucs4);
|
||||||
inline GraphemeBreak graphemeBreakClass(QChar ch)
|
inline Script script(QChar ch)
|
||||||
{ return graphemeBreakClass(ch.unicode()); }
|
{ return script(ch.unicode()); }
|
||||||
|
|
||||||
Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);
|
|
||||||
inline WordBreak wordBreakClass(QChar ch)
|
|
||||||
{ return wordBreakClass(ch.unicode()); }
|
|
||||||
|
|
||||||
Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);
|
|
||||||
inline SentenceBreak sentenceBreakClass(QChar ch)
|
|
||||||
{ return sentenceBreakClass(ch.unicode()); }
|
|
||||||
|
|
||||||
Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);
|
|
||||||
inline LineBreakClass lineBreakClass(QChar ch)
|
|
||||||
{ return lineBreakClass(ch.unicode()); }
|
|
||||||
|
|
||||||
Q_CORE_EXPORT Script QT_FASTCALL script(uint ucs4);
|
|
||||||
inline Script script(QChar ch)
|
|
||||||
{ return script(ch.unicode()); }
|
|
||||||
|
|
||||||
} // namespace QUnicodeTables
|
} // namespace QUnicodeTables
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user