Update the qunicodetables generator to deal with UCD 6.1 files

Change-Id: If22018ff83cfc6b9c984f689648da038fce11d84
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
This commit is contained in:
Konstantin Ritt 2012-06-03 04:17:10 +03:00 committed by Qt by Nokia
parent 2176a4f04f
commit 60e1892d83
3 changed files with 102 additions and 36 deletions

View File

@ -176,8 +176,12 @@ QT_BEGIN_NAMESPACE
\value Unicode_4_0 Version 4.0 \value Unicode_4_0 Version 4.0
\value Unicode_4_1 Version 4.1 \value Unicode_4_1 Version 4.1
\value Unicode_5_0 Version 5.0 \value Unicode_5_0 Version 5.0
\value Unicode_5_1 Version 5.1
\value Unicode_5_2 Version 5.2
\value Unicode_6_0 Version 6.0
\value Unicode_6_1 Version 6.1
\value Unicode_Unassigned The value is not assigned to any character \value Unicode_Unassigned The value is not assigned to any character
in version 5.0 of Unicode. in version 6.1 of Unicode.
\sa unicodeVersion(), currentUnicodeVersion() \sa unicodeVersion(), currentUnicodeVersion()
*/ */

View File

@ -204,7 +204,11 @@ public:
Unicode_3_2, Unicode_3_2,
Unicode_4_0, Unicode_4_0,
Unicode_4_1, Unicode_4_1,
Unicode_5_0 Unicode_5_0,
Unicode_5_1,
Unicode_5_2,
Unicode_6_0,
Unicode_6_1
}; };
// ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO // ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO

View File

@ -51,8 +51,8 @@
#include <private/qunicodetables_p.h> #include <private/qunicodetables_p.h>
#endif #endif
#define DATA_VERSION_S "5.0" #define DATA_VERSION_S "6.1"
#define DATA_VERSION_STR "QChar::Unicode_5_0" #define DATA_VERSION_STR "QChar::Unicode_6_1"
static QHash<QByteArray, QChar::UnicodeVersion> age_map; static QHash<QByteArray, QChar::UnicodeVersion> age_map;
@ -72,6 +72,10 @@ static void initAgeMap()
{ QChar::Unicode_4_0, "4.0" }, { QChar::Unicode_4_0, "4.0" },
{ QChar::Unicode_4_1, "4.1" }, { QChar::Unicode_4_1, "4.1" },
{ QChar::Unicode_5_0, "5.0" }, { QChar::Unicode_5_0, "5.0" },
{ QChar::Unicode_5_1, "5.1" },
{ QChar::Unicode_5_2, "5.2" },
{ QChar::Unicode_6_0, "6.0" },
{ QChar::Unicode_6_1, "6.1" },
{ QChar::Unicode_Unassigned, 0 } { QChar::Unicode_Unassigned, 0 }
}; };
AgeMap *d = ageMap; AgeMap *d = ageMap;
@ -124,6 +128,8 @@ static const char *grapheme_break_string =
" GraphemeBreakLF,\n" " GraphemeBreakLF,\n"
" GraphemeBreakControl,\n" " GraphemeBreakControl,\n"
" GraphemeBreakExtend,\n" " GraphemeBreakExtend,\n"
" GraphemeBreakPrepend,\n"
" GraphemeBreakSpacingMark,\n"
" GraphemeBreakL,\n" " GraphemeBreakL,\n"
" GraphemeBreakV,\n" " GraphemeBreakV,\n"
" GraphemeBreakT,\n" " GraphemeBreakT,\n"
@ -137,6 +143,8 @@ enum GraphemeBreak {
GraphemeBreakLF, GraphemeBreakLF,
GraphemeBreakControl, GraphemeBreakControl,
GraphemeBreakExtend, GraphemeBreakExtend,
GraphemeBreakPrepend,
GraphemeBreakSpacingMark,
GraphemeBreakL, GraphemeBreakL,
GraphemeBreakV, GraphemeBreakV,
GraphemeBreakT, GraphemeBreakT,
@ -159,6 +167,8 @@ static void initGraphemeBreak()
{ GraphemeBreakLF, "LF" }, { GraphemeBreakLF, "LF" },
{ GraphemeBreakControl, "Control" }, { GraphemeBreakControl, "Control" },
{ GraphemeBreakExtend, "Extend" }, { GraphemeBreakExtend, "Extend" },
{ GraphemeBreakPrepend, "Prepend" },
{ GraphemeBreakSpacingMark, "SpacingMark" },
{ GraphemeBreakL, "L" }, { GraphemeBreakL, "L" },
{ GraphemeBreakV, "V" }, { GraphemeBreakV, "V" },
{ GraphemeBreakT, "T" }, { GraphemeBreakT, "T" },
@ -177,9 +187,13 @@ static void initGraphemeBreak()
static const char *word_break_string = static const char *word_break_string =
" enum WordBreak {\n" " enum WordBreak {\n"
" WordBreakOther,\n" " WordBreakOther,\n"
" WordBreakCR,\n"
" WordBreakLF,\n"
" WordBreakNewline,\n"
" WordBreakFormat,\n" " WordBreakFormat,\n"
" WordBreakKatakana,\n" " WordBreakKatakana,\n"
" WordBreakALetter,\n" " WordBreakALetter,\n"
" WordBreakMidNumLet,\n"
" WordBreakMidLetter,\n" " WordBreakMidLetter,\n"
" WordBreakMidNum,\n" " WordBreakMidNum,\n"
" WordBreakNumeric,\n" " WordBreakNumeric,\n"
@ -188,9 +202,13 @@ static const char *word_break_string =
enum WordBreak { enum WordBreak {
WordBreakOther, WordBreakOther,
WordBreakCR,
WordBreakLF,
WordBreakNewline,
WordBreakFormat, WordBreakFormat,
WordBreakKatakana, WordBreakKatakana,
WordBreakALetter, WordBreakALetter,
WordBreakMidNumLet,
WordBreakMidLetter, WordBreakMidLetter,
WordBreakMidNum, WordBreakMidNum,
WordBreakNumeric, WordBreakNumeric,
@ -207,10 +225,15 @@ static void initWordBreak()
WordBreak brk; WordBreak brk;
const char *name; const char *name;
} breaks[] = { } breaks[] = {
{ WordBreakOther, "Other" },
{ WordBreakCR, "CR" },
{ WordBreakLF, "LF" },
{ WordBreakNewline, "Newline" },
{ WordBreakFormat, "Extend" },
{ WordBreakFormat, "Format" }, { WordBreakFormat, "Format" },
{ WordBreakFormat, "Extend" }, // these are copied in from GraphemeBreakProperty.txt
{ WordBreakKatakana, "Katakana" }, { WordBreakKatakana, "Katakana" },
{ WordBreakALetter, "ALetter" }, { WordBreakALetter, "ALetter" },
{ WordBreakMidNumLet, "MidNumLet" },
{ WordBreakMidLetter, "MidLetter" }, { WordBreakMidLetter, "MidLetter" },
{ WordBreakMidNum, "MidNum" }, { WordBreakMidNum, "MidNum" },
{ WordBreakNumeric, "Numeric" }, { WordBreakNumeric, "Numeric" },
@ -228,6 +251,8 @@ static void initWordBreak()
static const char *sentence_break_string = static const char *sentence_break_string =
" enum SentenceBreak {\n" " enum SentenceBreak {\n"
" SentenceBreakOther,\n" " SentenceBreakOther,\n"
" SentenceBreakCR,\n"
" SentenceBreakLF,\n"
" SentenceBreakSep,\n" " SentenceBreakSep,\n"
" SentenceBreakFormat,\n" " SentenceBreakFormat,\n"
" SentenceBreakSp,\n" " SentenceBreakSp,\n"
@ -236,12 +261,15 @@ static const char *sentence_break_string =
" SentenceBreakOLetter,\n" " SentenceBreakOLetter,\n"
" SentenceBreakNumeric,\n" " SentenceBreakNumeric,\n"
" SentenceBreakATerm,\n" " SentenceBreakATerm,\n"
" SentenceBreakSContinue,\n"
" SentenceBreakSTerm,\n" " SentenceBreakSTerm,\n"
" SentenceBreakClose\n" " SentenceBreakClose\n"
" };\n\n"; " };\n\n";
enum SentenceBreak { enum SentenceBreak {
SentenceBreakOther, SentenceBreakOther,
SentenceBreakCR,
SentenceBreakLF,
SentenceBreakSep, SentenceBreakSep,
SentenceBreakFormat, SentenceBreakFormat,
SentenceBreakSp, SentenceBreakSp,
@ -250,6 +278,7 @@ enum SentenceBreak {
SentenceBreakOLetter, SentenceBreakOLetter,
SentenceBreakNumeric, SentenceBreakNumeric,
SentenceBreakATerm, SentenceBreakATerm,
SentenceBreakSContinue,
SentenceBreakSTerm, SentenceBreakSTerm,
SentenceBreakClose SentenceBreakClose
@ -265,7 +294,10 @@ static void initSentenceBreak()
const char *name; const char *name;
} breaks[] = { } breaks[] = {
{ SentenceBreakOther, "Other" }, { SentenceBreakOther, "Other" },
{ SentenceBreakCR, "CR" },
{ SentenceBreakLF, "LF" },
{ SentenceBreakSep, "Sep" }, { SentenceBreakSep, "Sep" },
{ SentenceBreakFormat, "Extend" },
{ SentenceBreakFormat, "Format" }, { SentenceBreakFormat, "Format" },
{ SentenceBreakSp, "Sp" }, { SentenceBreakSp, "Sp" },
{ SentenceBreakLower, "Lower" }, { SentenceBreakLower, "Lower" },
@ -273,6 +305,7 @@ static void initSentenceBreak()
{ SentenceBreakOLetter, "OLetter" }, { SentenceBreakOLetter, "OLetter" },
{ SentenceBreakNumeric, "Numeric" }, { SentenceBreakNumeric, "Numeric" },
{ SentenceBreakATerm, "ATerm" }, { SentenceBreakATerm, "ATerm" },
{ SentenceBreakSContinue, "SContinue" },
{ SentenceBreakSTerm, "STerm" }, { SentenceBreakSTerm, "STerm" },
{ SentenceBreakClose, "Close" }, { SentenceBreakClose, "Close" },
{ SentenceBreak_Unassigned, 0 } { SentenceBreak_Unassigned, 0 }
@ -286,26 +319,25 @@ static void initSentenceBreak()
static const char *line_break_class_string = static const char *line_break_class_string =
" // see http://www.unicode.org/reports/tr14/tr14-19.html\n" " // see http://www.unicode.org/reports/tr14/tr14-28.html\n"
" // we don't use the XX, AI and CB properties and map them to AL instead.\n" " // we don't use the XX, AI, and CB classes and map them to AL instead.\n"
" // as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well.\n"
" enum LineBreakClass {\n" " enum LineBreakClass {\n"
" LineBreak_OP, LineBreak_CL, LineBreak_QU, LineBreak_GL, LineBreak_NS,\n" " LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,\n"
" LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR, LineBreak_PO,\n" " LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,\n"
" LineBreak_NU, LineBreak_AL, LineBreak_ID, LineBreak_IN, LineBreak_HY,\n" " LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,\n"
" LineBreak_BA, LineBreak_BB, LineBreak_B2, LineBreak_ZW, LineBreak_CM,\n" " LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,\n"
" LineBreak_WJ, LineBreak_H2, LineBreak_H3, LineBreak_JL, LineBreak_JV,\n" " LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,\n"
" LineBreak_JT, LineBreak_SA, LineBreak_SG,\n" " LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_SA, LineBreak_SG,\n"
" LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK\n" " LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK\n"
" };\n\n"; " };\n\n";
enum LineBreakClass { enum LineBreakClass {
LineBreak_OP, LineBreak_CL, LineBreak_QU, LineBreak_GL, LineBreak_NS, LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,
LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR, LineBreak_PO, LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,
LineBreak_NU, LineBreak_AL, LineBreak_ID, LineBreak_IN, LineBreak_HY, LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,
LineBreak_BA, LineBreak_BB, LineBreak_B2, LineBreak_ZW, LineBreak_CM, LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
LineBreak_WJ, LineBreak_H2, LineBreak_H3, LineBreak_JL, LineBreak_JV, LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,
LineBreak_JT, LineBreak_SA, LineBreak_SG, LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_SA, LineBreak_SG,
LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK
, LineBreak_Unassigned , LineBreak_Unassigned
@ -315,8 +347,11 @@ static QHash<QByteArray, LineBreakClass> line_break_map;
static void initLineBreak() static void initLineBreak()
{ {
// ### Classes XX and AI are left out and mapped to AL for now; // ### Classes XX and AI are left out and mapped to AL for now.
// ### Class NL is ignored and mapped to AL as well. // ### Class CB is unsupported for now and mapped to AL as well.
// ### Class NL is mapped to BK.
// ### Treating characters of class CJ as class NS will give CSS strict line breaking;
// treating them as class ID will give CSS normal breaking.
struct LineBreakList { struct LineBreakList {
LineBreakClass brk; LineBreakClass brk;
const char *name; const char *name;
@ -325,7 +360,7 @@ static void initLineBreak()
{ LineBreak_CR, "CR" }, { LineBreak_CR, "CR" },
{ LineBreak_LF, "LF" }, { LineBreak_LF, "LF" },
{ LineBreak_CM, "CM" }, { LineBreak_CM, "CM" },
{ LineBreak_AL, "NL" }, { LineBreak_BK, "NL" },
{ LineBreak_SG, "SG" }, { LineBreak_SG, "SG" },
{ LineBreak_WJ, "WJ" }, { LineBreak_WJ, "WJ" },
{ LineBreak_ZW, "ZW" }, { LineBreak_ZW, "ZW" },
@ -336,7 +371,9 @@ static void initLineBreak()
{ LineBreak_BB, "BB" }, { LineBreak_BB, "BB" },
{ LineBreak_HY, "HY" }, { LineBreak_HY, "HY" },
{ LineBreak_AL, "CB" }, // ### { LineBreak_AL, "CB" }, // ###
{ LineBreak_NS, "CJ" },
{ LineBreak_CL, "CL" }, { LineBreak_CL, "CL" },
{ LineBreak_CP, "CP" },
{ LineBreak_EX, "EX" }, { LineBreak_EX, "EX" },
{ LineBreak_IN, "IN" }, { LineBreak_IN, "IN" },
{ LineBreak_NS, "NS" }, { LineBreak_NS, "NS" },
@ -349,6 +386,7 @@ static void initLineBreak()
{ LineBreak_SY, "SY" }, { LineBreak_SY, "SY" },
{ LineBreak_AL, "AI" }, { LineBreak_AL, "AI" },
{ LineBreak_AL, "AL" }, { LineBreak_AL, "AL" },
{ LineBreak_HL, "HL" },
{ LineBreak_H2, "H2" }, { LineBreak_H2, "H2" },
{ LineBreak_H3, "H3" }, { LineBreak_H3, "H3" },
{ LineBreak_ID, "ID" }, { LineBreak_ID, "ID" },
@ -513,20 +551,41 @@ struct UnicodeData {
p.direction = QChar::DirL; p.direction = QChar::DirL;
// DerivedBidiClass.txt // DerivedBidiClass.txt
// DirR for: U+0590..U+05FF, U+07C0..U+08FF, U+FB1D..U+FB4F, U+10800..U+10FFF // The unassigned code points that default to AL are in the ranges:
if ((codepoint >= 0x590 && codepoint <= 0x5ff) // [U+0600..U+07BF, U+08A0..U+08FF, U+FB50..U+FDCF, U+FDF0..U+FDFF, U+FE70..U+FEFF, U+1EE00..U+1EEFF]
|| (codepoint >= 0x7c0 && codepoint <= 0x8ff) if ((codepoint >= 0x0600 && codepoint <= 0x07BF)
|| (codepoint >= 0xfb1d && codepoint <= 0xfb4f) || (codepoint >= 0x08A0 && codepoint <= 0x08FF)
|| (codepoint >= 0x10800 && codepoint <= 0x10fff)) { || (codepoint >= 0xFB50 && codepoint <= 0xFDCF)
|| (codepoint >= 0xFDF0 && codepoint <= 0xFDFF)
|| (codepoint >= 0xFE70 && codepoint <= 0xFEFF)
|| (codepoint >= 0x1EE00 && codepoint <= 0x1EEFF)) {
p.direction = QChar::DirAL;
}
// The unassigned code points that default to R are in the ranges:
// [U+0590..U+05FF, U+07C0..U+089F, U+FB1D..U+FB4F, U+10800..U+10FFF, U+1E800..U+1EDFF, U+1EF00..U+1EFFF]
else if ((codepoint >= 0x0590 && codepoint <= 0x05FF)
|| (codepoint >= 0x07C0 && codepoint <= 0x089F)
|| (codepoint >= 0xFB1D && codepoint <= 0xFB4F)
|| (codepoint >= 0x10800 && codepoint <= 0x10FFF)
|| (codepoint >= 0x1E800 && codepoint <= 0x1EDFF)
|| (codepoint >= 0x1EF00 && codepoint <= 0x1EFFF)) {
p.direction = QChar::DirR; p.direction = QChar::DirR;
} }
// DirAL for: U+0600..U+07BF, U+FB50..U+FDFF, U+FE70..U+FEFF
// minus noncharacter code points (intersects with U+FDD0..U+FDEF) p.line_break_class = LineBreak_AL; // XX -> AL
if ((codepoint >= 0x600 && codepoint <= 0x7bf) // LineBreak.txt
|| (codepoint >= 0xfb50 && codepoint <= 0xfdcf) // The unassigned code points that default to "ID" include ranges in the following blocks:
|| (codepoint >= 0xfdf0 && codepoint <= 0xfdff) // [U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2A6DF, U+2A700..U+2B73F, U+2B740..U+2B81F, U+2F800..U+2FA1F, U+20000..U+2FFFD, U+30000..U+3FFFD]
|| (codepoint >= 0xfe70 && codepoint <= 0xfeff)) { if ((codepoint >= 0x3400 && codepoint <= 0x4DBF)
p.direction = QChar::DirAL; || (codepoint >= 0x4E00 && codepoint <= 0x9FFF)
|| (codepoint >= 0xF900 && codepoint <= 0xFAFF)
|| (codepoint >= 0x20000 && codepoint <= 0x2A6DF)
|| (codepoint >= 0x2A700 && codepoint <= 0x2B73F)
|| (codepoint >= 0x2B740 && codepoint <= 0x2B81F)
|| (codepoint >= 0x2F800 && codepoint <= 0x2FA1F)
|| (codepoint >= 0x20000 && codepoint <= 0x2FFFD)
|| (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) {
p.line_break_class = LineBreak_ID;
} }
mirroredChar = 0; mirroredChar = 0;
@ -535,7 +594,6 @@ struct UnicodeData {
p.age = QChar::Unicode_Unassigned; p.age = QChar::Unicode_Unassigned;
p.mirrorDiff = 0; p.mirrorDiff = 0;
p.digitValue = -1; p.digitValue = -1;
p.line_break_class = LineBreak_AL; // XX -> AL
p.lowerCaseDiff = 0; p.lowerCaseDiff = 0;
p.upperCaseDiff = 0; p.upperCaseDiff = 0;
p.titleCaseDiff = 0; p.titleCaseDiff = 0;