Update Unicode data & algorithms up to v8.0
* Georgian lari currency symbol * A large collection of CJK unified ideographs * Emoji symbols and symbol modifiers * Letters to support the Ik language in Uganda, Kulango in the Côte d’Ivoire, and other languages of Africa * A set of lowercase Cherokee syllables, forming case pairs with the existing Cherokee characters * The Ahom script for support of the Tai Ahom language in India * Arabic letters to support Arwi—the Tamil language written in the Arabic script For more details, see http://www.unicode.org/versions/Unicode8.0.0/ [ChangeLog][QtCore] Unicode data updated to v.8.0 Change-Id: If255f95c9c45655b721369a116299da3cabbba0a Reviewed-by: Lars Knoll <lars.knoll@theqtcompany.com>
This commit is contained in:
parent
a98b541f26
commit
0e1f3aab11
@ -179,8 +179,9 @@ QT_BEGIN_NAMESPACE
|
||||
\value Unicode_6_2 Version 6.2
|
||||
\value Unicode_6_3 Version 6.3 Since Qt 5.3
|
||||
\value Unicode_7_0 Version 7.0 Since Qt 5.5
|
||||
\value Unicode_8_0 Version 8.0 Since Qt 5.6
|
||||
\value Unicode_Unassigned The value is not assigned to any character
|
||||
in version 6.3 of Unicode.
|
||||
in version 8.0 of Unicode.
|
||||
|
||||
\sa unicodeVersion(), currentUnicodeVersion()
|
||||
*/
|
||||
@ -401,6 +402,12 @@ QT_BEGIN_NAMESPACE
|
||||
\value Script_Khudawadi
|
||||
\value Script_Tirhuta
|
||||
\value Script_WarangCiti
|
||||
\value Script_Ahom
|
||||
\value Script_AnatolianHieroglyphs
|
||||
\value Script_Hatran
|
||||
\value Script_Multani
|
||||
\value Script_OldHungarian
|
||||
\value Script_SignWriting
|
||||
|
||||
\omitvalue ScriptCount
|
||||
|
||||
|
@ -275,6 +275,14 @@ public:
|
||||
Script_Tirhuta,
|
||||
Script_WarangCiti,
|
||||
|
||||
// Unicode 8.0 additions
|
||||
Script_Ahom,
|
||||
Script_AnatolianHieroglyphs,
|
||||
Script_Hatran,
|
||||
Script_Multani,
|
||||
Script_OldHungarian,
|
||||
Script_SignWriting,
|
||||
|
||||
ScriptCount
|
||||
};
|
||||
|
||||
@ -365,7 +373,8 @@ public:
|
||||
Unicode_6_1,
|
||||
Unicode_6_2,
|
||||
Unicode_6_3,
|
||||
Unicode_7_0
|
||||
Unicode_7_0,
|
||||
Unicode_8_0
|
||||
};
|
||||
// ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO
|
||||
|
||||
|
@ -49,7 +49,7 @@ namespace QUnicodeTools {
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// The text boundaries determination algorithm.
|
||||
// See http://www.unicode.org/reports/tr29/tr29-25.html
|
||||
// See http://www.unicode.org/reports/tr29/tr29-27.html
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
|
||||
@ -244,8 +244,9 @@ namespace SB {
|
||||
|
||||
enum State {
|
||||
Initial,
|
||||
Lower,
|
||||
Upper,
|
||||
UpATerm,
|
||||
LUATerm,
|
||||
ATerm,
|
||||
ATermC,
|
||||
ACS,
|
||||
@ -260,10 +261,11 @@ enum State {
|
||||
|
||||
static const uchar breakTable[BAfter + 1][QUnicodeTables::SentenceBreak_Close + 1] = {
|
||||
// Other CR LF Sep Extend Sp Lower Upper OLetter Numeric ATerm SContinue STerm Close
|
||||
{ Initial, BAfterC, BAfter , BAfter , Initial, Initial, Initial, Upper , Initial, Initial, ATerm , Initial, STerm , Initial }, // Initial
|
||||
{ Initial, BAfterC, BAfter , BAfter , Upper , Initial, Initial, Upper , Initial, Initial, UpATerm, STerm , STerm , Initial }, // Upper
|
||||
{ Initial, BAfterC, BAfter , BAfter , Initial, Initial, Lower , Upper , Initial, Initial, ATerm , Initial, STerm , Initial }, // Initial
|
||||
{ Initial, BAfterC, BAfter , BAfter , Lower , Initial, Initial, Initial, Initial, Initial, LUATerm, Initial, STerm , Initial }, // Lower
|
||||
{ Initial, BAfterC, BAfter , BAfter , Upper , Initial, Initial, Upper , Initial, Initial, LUATerm, STerm , STerm , Initial }, // Upper
|
||||
|
||||
{ Lookup , BAfterC, BAfter , BAfter , UpATerm, ACS , Initial, Upper , Break , Initial, ATerm , STerm , STerm , ATermC }, // UpATerm
|
||||
{ Lookup , BAfterC, BAfter , BAfter , LUATerm, ACS , Initial, Upper , Break , Initial, ATerm , STerm , STerm , ATermC }, // LUATerm
|
||||
{ Lookup , BAfterC, BAfter , BAfter , ATerm , ACS , Initial, Break , Break , Initial, ATerm , STerm , STerm , ATermC }, // ATerm
|
||||
{ Lookup , BAfterC, BAfter , BAfter , ATermC , ACS , Initial, Break , Break , Lookup , ATerm , STerm , STerm , ATermC }, // ATermC
|
||||
{ Lookup , BAfterC, BAfter , BAfter , ACS , ACS , Initial, Break , Break , Lookup , ATerm , STerm , STerm , Lookup }, // ACS
|
||||
@ -341,7 +343,7 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// The line breaking algorithm.
|
||||
// See http://www.unicode.org/reports/tr14/tr14-33.html
|
||||
// See http://www.unicode.org/reports/tr14/tr14-35.html
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
|
||||
@ -408,26 +410,29 @@ inline Class toClass(QUnicodeTables::LineBreakClass lbc, QChar::Category categor
|
||||
/* In order to support the tailored implementation of LB25 properly
|
||||
the following changes were made in the pair table to allow breaks
|
||||
where the numeric expression doesn't match the template (i.e. [^NU](IS|SY)NU):
|
||||
CL->PO from IB to DB
|
||||
CP->PO from IB to DB
|
||||
CL->PR from IB to DB
|
||||
CP->PR from IB to DB
|
||||
PO->OP from IB to DB
|
||||
PR->OP from IB to DB
|
||||
IS->NU from IB to DB
|
||||
SY->NU from IB to DB
|
||||
(CL)(PO) from IB to DB
|
||||
(CP)(PO) from IB to DB
|
||||
(CL)(PR) from IB to DB
|
||||
(CP)(PR) from IB to DB
|
||||
(PO)(OP) from IB to DB
|
||||
(PR)(OP) from IB to DB
|
||||
(IS)(NU) from IB to DB
|
||||
(SY)(NU) from IB to DB
|
||||
*/
|
||||
|
||||
// The following line break classes are not treated by the pair table
|
||||
// and must be resolved outside:
|
||||
// AI, BK, CB, CJ, CR, LF, NL, SA, SG, SP, XX
|
||||
/* In order to implementat LB21a properly a special rule HH has been introduced and
|
||||
the following changes were made in the pair table to disallow breaks after Hebrew + Hyphen:
|
||||
(HL)(HY|BA) from IB to CI
|
||||
(HY|BA)(!CB) from DB to HH
|
||||
*/
|
||||
|
||||
enum Action {
|
||||
ProhibitedBreak, PB = ProhibitedBreak,
|
||||
DirectBreak, DB = DirectBreak,
|
||||
IndirectBreak, IB = IndirectBreak,
|
||||
CombiningIndirectBreak, CI = CombiningIndirectBreak,
|
||||
CombiningProhibitedBreak, CP = CombiningProhibitedBreak
|
||||
CombiningProhibitedBreak, CP = CombiningProhibitedBreak,
|
||||
ProhibitedBreakAfterHebrewPlusHyphen, HH = ProhibitedBreakAfterHebrewPlusHyphen
|
||||
};
|
||||
|
||||
static const uchar breakTable[QUnicodeTables::LineBreak_CB + 1][QUnicodeTables::LineBreak_CB + 1] = {
|
||||
@ -438,18 +443,18 @@ static const uchar breakTable[QUnicodeTables::LineBreak_CB + 1][QUnicodeTables::
|
||||
/* QU */ { PB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB, IB, IB },
|
||||
/* GL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB, IB, IB },
|
||||
/* NS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* EX */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* SY */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* EX */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* SY */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* IS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* PR */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB, DB, DB },
|
||||
/* PO */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* NU */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* AL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* HL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* HL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, CI, CI, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* ID */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* IN */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* HY */ { DB, PB, PB, IB, DB, IB, PB, PB, PB, DB, DB, IB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* BA */ { DB, PB, PB, IB, DB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* HY */ { HH, PB, PB, IB, HH, IB, PB, PB, PB, HH, HH, IB, HH, HH, HH, HH, IB, IB, HH, HH, PB, CI, PB, HH, HH, HH, HH, HH, HH, DB },
|
||||
/* BA */ { HH, PB, PB, IB, HH, IB, PB, PB, PB, HH, HH, HH, HH, HH, HH, HH, IB, IB, HH, HH, PB, CI, PB, HH, HH, HH, HH, HH, HH, DB },
|
||||
/* BB */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB, IB, DB },
|
||||
/* B2 */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB },
|
||||
@ -464,6 +469,10 @@ static const uchar breakTable[QUnicodeTables::LineBreak_CB + 1][QUnicodeTables::
|
||||
/* CB */ { DB, PB, PB, IB, IB, DB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }
|
||||
};
|
||||
|
||||
// The following line break classes are not treated by the pair table
|
||||
// and must be resolved outside:
|
||||
// AI, BK, CB, CJ, CR, LF, NL, SA, SG, SP, XX
|
||||
|
||||
} // namespace LB
|
||||
|
||||
static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *attributes)
|
||||
@ -555,6 +564,10 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at
|
||||
if (lcls != QUnicodeTables::LineBreak_SP)
|
||||
goto next_no_cls_update;
|
||||
break;
|
||||
case LB::ProhibitedBreakAfterHebrewPlusHyphen:
|
||||
if (lcls != QUnicodeTables::LineBreak_HL)
|
||||
attributes[pos].lineBreak = true;
|
||||
break;
|
||||
case LB::ProhibitedBreak:
|
||||
// nothing to do
|
||||
default:
|
||||
@ -659,7 +672,7 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
// The Unicode script property. See http://www.unicode.org/reports/tr24/tr24-22.html
|
||||
// The Unicode script property. See http://www.unicode.org/reports/tr24/tr24-24.html
|
||||
//
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
|
@ -188,7 +188,15 @@ static const hb_script_t _qtscript_to_hbscript[] = {
|
||||
HB_SCRIPT_SIDDHAM,
|
||||
HB_SCRIPT_KHUDAWADI,
|
||||
HB_SCRIPT_TIRHUTA,
|
||||
HB_SCRIPT_WARANG_CITI
|
||||
HB_SCRIPT_WARANG_CITI,
|
||||
|
||||
// Unicode 8.0 additions
|
||||
HB_SCRIPT_AHOM,
|
||||
HB_SCRIPT_ANATOLIAN_HIEROGLYPHS,
|
||||
HB_SCRIPT_HATRAN,
|
||||
HB_SCRIPT_MULTANI,
|
||||
HB_SCRIPT_OLD_HUNGARIAN,
|
||||
HB_SCRIPT_SIGNWRITING
|
||||
};
|
||||
Q_STATIC_ASSERT(QChar::ScriptCount == sizeof(_qtscript_to_hbscript) / sizeof(_qtscript_to_hbscript[0]));
|
||||
|
||||
|
@ -242,7 +242,13 @@ static const char *specialLanguages[] = {
|
||||
"sa", // Siddham
|
||||
"sd", // Khudawadi
|
||||
"mai", // Tirhuta
|
||||
"hoc" // WarangCiti
|
||||
"hoc", // WarangCiti
|
||||
"", // Ahom
|
||||
"", // AnatolianHieroglyphs
|
||||
"", // Hatran
|
||||
"", // Multani
|
||||
"", // OldHungarian
|
||||
"" // SignWriting
|
||||
};
|
||||
Q_STATIC_ASSERT(sizeof(specialLanguages) / sizeof(const char *) == QChar::ScriptCount);
|
||||
|
||||
|
@ -594,6 +594,10 @@ void tst_QChar::unicodeVersion()
|
||||
QVERIFY(QChar::unicodeVersion(0x20bd) == QChar::Unicode_7_0);
|
||||
QVERIFY(QChar::unicodeVersion(0x16b00) == QChar::Unicode_7_0);
|
||||
|
||||
QVERIFY(QChar(0x08b3).unicodeVersion() == QChar::Unicode_8_0);
|
||||
QVERIFY(QChar::unicodeVersion(0x08b3) == QChar::Unicode_8_0);
|
||||
QVERIFY(QChar::unicodeVersion(0x108e0) == QChar::Unicode_8_0);
|
||||
|
||||
QVERIFY(QChar(0x09ff).unicodeVersion() == QChar::Unicode_Unassigned);
|
||||
QVERIFY(QChar::unicodeVersion(0x09ff) == QChar::Unicode_Unassigned);
|
||||
QVERIFY(QChar::unicodeVersion(0x110000) == QChar::Unicode_Unassigned);
|
||||
|
@ -248,6 +248,15 @@ static const EnumLookup scriptEnumLookup[] =
|
||||
{QChar::Script_Tirhuta, "Script_Tirhuta"},
|
||||
{QChar::Script_WarangCiti, "Script_WarangCiti"},
|
||||
#endif // Qt 5.5
|
||||
|
||||
#if QT_VERSION >= 0x050600
|
||||
{QChar::Script_Ahom, "Script_Ahom"},
|
||||
{QChar::Script_AnatolianHieroglyphs, "Script_AnatolianHieroglyphs"},
|
||||
{QChar::Script_Hatran, "Script_Hatran"},
|
||||
{QChar::Script_Multani, "Script_Multani"},
|
||||
{QChar::Script_OldHungarian, "Script_OldHungarian"},
|
||||
{QChar::Script_SignWriting, "Script_SignWriting"},
|
||||
#endif // Qt 5.5
|
||||
};
|
||||
|
||||
#endif // Qt 5.1
|
||||
@ -364,6 +373,9 @@ static const EnumLookup unicodeVersionEnumLookup[] =
|
||||
#if QT_VERSION >= 0x050500
|
||||
{QChar::Unicode_7_0, "Unicode_7_0"},
|
||||
#endif // Qt 5.5
|
||||
#if QT_VERSION >= 0x050600
|
||||
{QChar::Unicode_8_0, "Unicode_8_0"},
|
||||
#endif // Qt 5.6
|
||||
#endif // Qt 5
|
||||
};
|
||||
|
||||
|
@ -43,8 +43,8 @@
|
||||
#include <private/qunicodetables_p.h>
|
||||
#endif
|
||||
|
||||
#define DATA_VERSION_S "7.0"
|
||||
#define DATA_VERSION_STR "QChar::Unicode_7_0"
|
||||
#define DATA_VERSION_S "8.0"
|
||||
#define DATA_VERSION_STR "QChar::Unicode_8_0"
|
||||
|
||||
|
||||
static QHash<QByteArray, QChar::UnicodeVersion> age_map;
|
||||
@ -71,6 +71,7 @@ static void initAgeMap()
|
||||
{ QChar::Unicode_6_2, "6.2" },
|
||||
{ QChar::Unicode_6_3, "6.3" },
|
||||
{ QChar::Unicode_7_0, "7.0" },
|
||||
{ QChar::Unicode_8_0, "8.0" },
|
||||
{ QChar::Unicode_Unassigned, 0 }
|
||||
};
|
||||
AgeMap *d = ageMap;
|
||||
@ -719,6 +720,13 @@ static void initScriptMap()
|
||||
{ QChar::Script_Khudawadi, "Khudawadi" },
|
||||
{ QChar::Script_Tirhuta, "Tirhuta" },
|
||||
{ QChar::Script_WarangCiti, "WarangCiti" },
|
||||
// 8.0
|
||||
{ QChar::Script_Ahom, "Ahom" },
|
||||
{ QChar::Script_AnatolianHieroglyphs, "AnatolianHieroglyphs" },
|
||||
{ QChar::Script_Hatran, "Hatran" },
|
||||
{ QChar::Script_Multani, "Multani" },
|
||||
{ QChar::Script_OldHungarian, "OldHungarian" },
|
||||
{ QChar::Script_SignWriting, "SignWriting" },
|
||||
// unhandled
|
||||
{ QChar::Script_Unknown, 0 }
|
||||
};
|
||||
@ -946,13 +954,16 @@ struct UnicodeData {
|
||||
p.lineBreakClass = LineBreak_AL; // XX -> AL
|
||||
// LineBreak.txt
|
||||
// The unassigned code points that default to "ID" include ranges in the following blocks:
|
||||
// [U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2A6DF, U+2A700..U+2B73F, U+2B740..U+2B81F, U+2F800..U+2FA1F, U+20000..U+2FFFD, U+30000..U+3FFFD]
|
||||
// [U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2A6DF, U+2A700..U+2B73F, U+2B740..U+2B81F, U+2B820..U+2CEAF, U+2F800..U+2FA1F]
|
||||
// and any other reserved code points on
|
||||
// [U+20000..U+2FFFD, U+30000..U+3FFFD]
|
||||
if ((codepoint >= 0x3400 && codepoint <= 0x4DBF)
|
||||
|| (codepoint >= 0x4E00 && codepoint <= 0x9FFF)
|
||||
|| (codepoint >= 0xF900 && codepoint <= 0xFAFF)
|
||||
|| (codepoint >= 0x20000 && codepoint <= 0x2A6DF)
|
||||
|| (codepoint >= 0x2A700 && codepoint <= 0x2B73F)
|
||||
|| (codepoint >= 0x2B740 && codepoint <= 0x2B81F)
|
||||
|| (codepoint >= 0x2B820 && codepoint <= 0x2CEAF)
|
||||
|| (codepoint >= 0x2F800 && codepoint <= 0x2FA1F)
|
||||
|| (codepoint >= 0x20000 && codepoint <= 0x2FFFD)
|
||||
|| (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user