Make QUnicodeTables::script() support SMP code points
Instead of expanding the scripts table with script values for the code points >= 0x10000, it has been merged with the properties table in order to increase perfomance of the script itemization code (not affected yet). (Stats: the properties table grew up in 97428-89800 = 7628 bytes; the old scripts table was of size 7680 bytes) The outdated ScriptsInitial.txt and ScriptsCorrections.txt file has been removed (they were just empty, the "corrigendum" script corrections should be applied to Scripts.txt directly, *no customization allowed*!). More script testcases has been added - at least one per supported script. Task-number: QTBUG-6530 Change-Id: I40a9e76f681e2dd552fd4c61af0808d043962e79 Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
This commit is contained in:
parent
09bc8e2cb8
commit
d8c04d60db
@ -711,11 +711,39 @@ void tst_QChar::lineBreakClass()
|
|||||||
void tst_QChar::script()
|
void tst_QChar::script()
|
||||||
{
|
{
|
||||||
QVERIFY(QUnicodeTables::script(0x0020u) == QUnicodeTables::Common);
|
QVERIFY(QUnicodeTables::script(0x0020u) == QUnicodeTables::Common);
|
||||||
QVERIFY(QUnicodeTables::script(0x0041u) == QUnicodeTables::Common);
|
QVERIFY(QUnicodeTables::script(0x0041u) == QUnicodeTables::Common); // ### Latin
|
||||||
QVERIFY(QUnicodeTables::script(0x0375u) == QUnicodeTables::Greek);
|
QVERIFY(QUnicodeTables::script(0x0375u) == QUnicodeTables::Greek);
|
||||||
QVERIFY(QUnicodeTables::script(0x0400u) == QUnicodeTables::Cyrillic);
|
QVERIFY(QUnicodeTables::script(0x0400u) == QUnicodeTables::Cyrillic);
|
||||||
QVERIFY(QUnicodeTables::script(0x0E01u) == QUnicodeTables::Thai);
|
QVERIFY(QUnicodeTables::script(0x0531u) == QUnicodeTables::Armenian);
|
||||||
//### QVERIFY(QUnicodeTables::script(0x1018Au) == QUnicodeTables::Greek);
|
QVERIFY(QUnicodeTables::script(0x0591u) == QUnicodeTables::Hebrew);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0600u) == QUnicodeTables::Arabic);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0700u) == QUnicodeTables::Syriac);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0780u) == QUnicodeTables::Thaana);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x07c0u) == QUnicodeTables::Nko);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0900u) == QUnicodeTables::Devanagari);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0981u) == QUnicodeTables::Bengali);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0a01u) == QUnicodeTables::Gurmukhi);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0a81u) == QUnicodeTables::Gujarati);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0b01u) == QUnicodeTables::Oriya);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0b82u) == QUnicodeTables::Tamil);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0c01u) == QUnicodeTables::Telugu);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0c82u) == QUnicodeTables::Kannada);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0d02u) == QUnicodeTables::Malayalam);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0d82u) == QUnicodeTables::Sinhala);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0e01u) == QUnicodeTables::Thai);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0e81u) == QUnicodeTables::Lao);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x0f00u) == QUnicodeTables::Tibetan);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x1000u) == QUnicodeTables::Myanmar);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x10a0u) == QUnicodeTables::Georgian);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x1100u) == QUnicodeTables::Hangul);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x1680u) == QUnicodeTables::Ogham);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x16a0u) == QUnicodeTables::Runic);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x1780u) == QUnicodeTables::Khmer);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x200cu) == QUnicodeTables::Inherited);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x200du) == QUnicodeTables::Inherited);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x1018au) == QUnicodeTables::Greek);
|
||||||
|
QVERIFY(QUnicodeTables::script(0x1f130u) == QUnicodeTables::Common);
|
||||||
|
QVERIFY(QUnicodeTables::script(0xe0100u) == QUnicodeTables::Inherited);
|
||||||
}
|
}
|
||||||
|
|
||||||
void tst_QChar::normalization_data()
|
void tst_QChar::normalization_data()
|
||||||
|
@ -407,48 +407,49 @@ static void initLineBreak()
|
|||||||
// Keep this one in sync with the code in createPropertyInfo
|
// Keep this one in sync with the code in createPropertyInfo
|
||||||
static const char *property_string =
|
static const char *property_string =
|
||||||
" struct Properties {\n"
|
" struct Properties {\n"
|
||||||
" ushort category : 8; /* 5 needed */\n"
|
" ushort category : 8; /* 5 used */\n"
|
||||||
" ushort line_break_class : 8; /* 6 needed */\n"
|
" ushort direction : 8; /* 5 used */\n"
|
||||||
" ushort direction : 8; /* 5 needed */\n"
|
|
||||||
" ushort combiningClass : 8;\n"
|
" ushort combiningClass : 8;\n"
|
||||||
" ushort joining : 2;\n"
|
" ushort joining : 2;\n"
|
||||||
" signed short digitValue : 6; /* 5 needed */\n"
|
" signed short digitValue : 6; /* 5 used */\n"
|
||||||
" ushort unicodeVersion : 4;\n"
|
|
||||||
" ushort lowerCaseSpecial : 1;\n"
|
|
||||||
" ushort upperCaseSpecial : 1;\n"
|
|
||||||
" ushort titleCaseSpecial : 1;\n"
|
|
||||||
" ushort caseFoldSpecial : 1;\n"
|
|
||||||
" signed short mirrorDiff : 16;\n"
|
" signed short mirrorDiff : 16;\n"
|
||||||
" signed short lowerCaseDiff : 16;\n"
|
" signed short lowerCaseDiff : 16;\n"
|
||||||
" signed short upperCaseDiff : 16;\n"
|
" signed short upperCaseDiff : 16;\n"
|
||||||
" signed short titleCaseDiff : 16;\n"
|
" signed short titleCaseDiff : 16;\n"
|
||||||
" signed short caseFoldDiff : 16;\n"
|
" signed short caseFoldDiff : 16;\n"
|
||||||
" ushort graphemeBreak : 8; /* 4 needed */\n"
|
" ushort lowerCaseSpecial : 1;\n"
|
||||||
" ushort wordBreak : 8; /* 4 needed */\n"
|
" ushort upperCaseSpecial : 1;\n"
|
||||||
" ushort sentenceBreak : 8; /* 4 needed */\n"
|
" ushort titleCaseSpecial : 1;\n"
|
||||||
|
" ushort caseFoldSpecial : 1;\n"
|
||||||
|
" ushort unicodeVersion : 4;\n"
|
||||||
|
" ushort graphemeBreak : 8; /* 4 used */\n"
|
||||||
|
" ushort wordBreak : 8; /* 4 used */\n"
|
||||||
|
" ushort sentenceBreak : 8; /* 4 used */\n"
|
||||||
|
" ushort line_break_class : 8; /* 6 used */\n"
|
||||||
|
" ushort script : 8; /* 5 used */\n"
|
||||||
" };\n"
|
" };\n"
|
||||||
" Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);\n"
|
" Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);\n"
|
||||||
" Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);\n";
|
" Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);\n";
|
||||||
|
|
||||||
static const char *methods =
|
static const char *methods =
|
||||||
" Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);\n"
|
" Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);\n"
|
||||||
" inline int graphemeBreakClass(QChar ch)\n"
|
" inline GraphemeBreak graphemeBreakClass(QChar ch)\n"
|
||||||
" { return graphemeBreakClass(ch.unicode()); }\n"
|
" { return graphemeBreakClass(ch.unicode()); }\n"
|
||||||
"\n"
|
"\n"
|
||||||
" Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);\n"
|
" Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);\n"
|
||||||
" inline int wordBreakClass(QChar ch)\n"
|
" inline WordBreak wordBreakClass(QChar ch)\n"
|
||||||
" { return wordBreakClass(ch.unicode()); }\n"
|
" { return wordBreakClass(ch.unicode()); }\n"
|
||||||
"\n"
|
"\n"
|
||||||
" Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);\n"
|
" Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);\n"
|
||||||
" inline int sentenceBreakClass(QChar ch)\n"
|
" inline SentenceBreak sentenceBreakClass(QChar ch)\n"
|
||||||
" { return sentenceBreakClass(ch.unicode()); }\n"
|
" { return sentenceBreakClass(ch.unicode()); }\n"
|
||||||
"\n"
|
"\n"
|
||||||
" Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);\n"
|
" Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);\n"
|
||||||
" inline int lineBreakClass(QChar ch)\n"
|
" inline LineBreakClass lineBreakClass(QChar ch)\n"
|
||||||
" { return lineBreakClass(ch.unicode()); }\n"
|
" { return lineBreakClass(ch.unicode()); }\n"
|
||||||
"\n"
|
"\n"
|
||||||
" Q_CORE_EXPORT int QT_FASTCALL script(uint ucs4);\n"
|
" Q_CORE_EXPORT Script QT_FASTCALL script(uint ucs4);\n"
|
||||||
" inline int script(QChar ch)\n"
|
" inline Script script(QChar ch)\n"
|
||||||
" { return script(ch.unicode()); }\n\n";
|
" { return script(ch.unicode()); }\n\n";
|
||||||
|
|
||||||
static const int SizeOfPropertiesStruct = 20;
|
static const int SizeOfPropertiesStruct = 20;
|
||||||
@ -461,7 +462,6 @@ struct PropertyFlags {
|
|||||||
&& joining == o.joining
|
&& joining == o.joining
|
||||||
&& age == o.age
|
&& age == o.age
|
||||||
&& digitValue == o.digitValue
|
&& digitValue == o.digitValue
|
||||||
&& line_break_class == o.line_break_class
|
|
||||||
&& mirrorDiff == o.mirrorDiff
|
&& mirrorDiff == o.mirrorDiff
|
||||||
&& lowerCaseDiff == o.lowerCaseDiff
|
&& lowerCaseDiff == o.lowerCaseDiff
|
||||||
&& upperCaseDiff == o.upperCaseDiff
|
&& upperCaseDiff == o.upperCaseDiff
|
||||||
@ -474,6 +474,8 @@ struct PropertyFlags {
|
|||||||
&& graphemeBreak == o.graphemeBreak
|
&& graphemeBreak == o.graphemeBreak
|
||||||
&& wordBreak == o.wordBreak
|
&& wordBreak == o.wordBreak
|
||||||
&& sentenceBreak == o.sentenceBreak
|
&& sentenceBreak == o.sentenceBreak
|
||||||
|
&& line_break_class == o.line_break_class
|
||||||
|
&& script == o.script
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
// from UnicodeData.txt
|
// from UnicodeData.txt
|
||||||
@ -500,6 +502,7 @@ struct PropertyFlags {
|
|||||||
GraphemeBreak graphemeBreak;
|
GraphemeBreak graphemeBreak;
|
||||||
WordBreak wordBreak;
|
WordBreak wordBreak;
|
||||||
SentenceBreak sentenceBreak;
|
SentenceBreak sentenceBreak;
|
||||||
|
int script;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -604,6 +607,7 @@ struct UnicodeData {
|
|||||||
p.graphemeBreak = GraphemeBreakOther;
|
p.graphemeBreak = GraphemeBreakOther;
|
||||||
p.wordBreak = WordBreakOther;
|
p.wordBreak = WordBreakOther;
|
||||||
p.sentenceBreak = SentenceBreakOther;
|
p.sentenceBreak = SentenceBreakOther;
|
||||||
|
p.script = 0; // Common
|
||||||
propertyIndex = -1;
|
propertyIndex = -1;
|
||||||
excludedComposition = false;
|
excludedComposition = false;
|
||||||
}
|
}
|
||||||
@ -1816,154 +1820,128 @@ static void readBlocks()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
static QList<QByteArray> scriptNames;
|
static QList<QByteArray> scriptNames;
|
||||||
static QHash<int, int> scriptAssignment;
|
static QList<int> scriptMap;
|
||||||
static QHash<int, int> scriptHash;
|
|
||||||
|
|
||||||
struct ExtraBlock {
|
static const char *specialScripts[] = {
|
||||||
int block;
|
"Common",
|
||||||
QVector<int> vector;
|
"Greek",
|
||||||
|
"Cyrillic",
|
||||||
|
"Armenian",
|
||||||
|
"Hebrew",
|
||||||
|
"Arabic",
|
||||||
|
"Syriac",
|
||||||
|
"Thaana",
|
||||||
|
"Devanagari",
|
||||||
|
"Bengali",
|
||||||
|
"Gurmukhi",
|
||||||
|
"Gujarati",
|
||||||
|
"Oriya",
|
||||||
|
"Tamil",
|
||||||
|
"Telugu",
|
||||||
|
"Kannada",
|
||||||
|
"Malayalam",
|
||||||
|
"Sinhala",
|
||||||
|
"Thai",
|
||||||
|
"Lao",
|
||||||
|
"Tibetan",
|
||||||
|
"Myanmar",
|
||||||
|
"Georgian",
|
||||||
|
"Hangul",
|
||||||
|
"Ogham",
|
||||||
|
"Runic",
|
||||||
|
"Khmer",
|
||||||
|
"Nko",
|
||||||
|
"Inherited"
|
||||||
};
|
};
|
||||||
|
enum { specialScriptsCount = sizeof(specialScripts) / sizeof(const char *) };
|
||||||
static QList<ExtraBlock> extraBlockList;
|
|
||||||
|
|
||||||
|
|
||||||
static void readScripts()
|
static void readScripts()
|
||||||
{
|
{
|
||||||
scriptNames.append("Common");
|
qDebug("Reading Scripts.txt");
|
||||||
|
QFile f("data/Scripts.txt");
|
||||||
|
if (!f.exists())
|
||||||
|
qFatal("Couldn't find Scripts.txt");
|
||||||
|
|
||||||
static const char *files[] = {
|
f.open(QFile::ReadOnly);
|
||||||
"data/ScriptsInitial.txt",
|
|
||||||
"data/Scripts.txt",
|
|
||||||
"data/ScriptsCorrections.txt"
|
|
||||||
};
|
|
||||||
enum { fileCount = sizeof(files) / sizeof(const char *) };
|
|
||||||
|
|
||||||
for (int i = 0; i < fileCount; ++i) {
|
int scriptsCount = specialScriptsCount;
|
||||||
QFile f(files[i]);
|
// ### preserve the old ordering (temporary)
|
||||||
if (!f.exists())
|
for (int i = 0; i < specialScriptsCount; ++i) {
|
||||||
qFatal("Couldn't find %s", files[i]);
|
scriptNames.append(specialScripts[i]);
|
||||||
|
scriptMap.append(i);
|
||||||
|
}
|
||||||
|
|
||||||
f.open(QFile::ReadOnly);
|
while (!f.atEnd()) {
|
||||||
|
QByteArray line = f.readLine();
|
||||||
|
line.resize(line.size() - 1);
|
||||||
|
|
||||||
while (!f.atEnd()) {
|
int comment = line.indexOf("#");
|
||||||
QByteArray line = f.readLine();
|
if (comment >= 0)
|
||||||
line.resize(line.size() - 1);
|
line = line.left(comment);
|
||||||
|
|
||||||
int comment = line.indexOf("#");
|
line.replace(" ", "");
|
||||||
if (comment >= 0)
|
line.replace("_", "");
|
||||||
line = line.left(comment);
|
|
||||||
|
|
||||||
line.replace(" ", "");
|
if (line.isEmpty())
|
||||||
line.replace("_", "");
|
continue;
|
||||||
|
|
||||||
if (line.isEmpty())
|
int semicolon = line.indexOf(';');
|
||||||
continue;
|
Q_ASSERT(semicolon >= 0);
|
||||||
|
QByteArray codePoints = line.left(semicolon);
|
||||||
|
QByteArray scriptName = line.mid(semicolon + 1);
|
||||||
|
|
||||||
int semicolon = line.indexOf(';');
|
codePoints.replace("..", ".");
|
||||||
Q_ASSERT(semicolon >= 0);
|
QList<QByteArray> cl = codePoints.split('.');
|
||||||
QByteArray codePoints = line.left(semicolon);
|
|
||||||
QByteArray scriptName = line.mid(semicolon + 1);
|
|
||||||
|
|
||||||
int scriptIndex = scriptNames.indexOf(scriptName);
|
bool ok;
|
||||||
if (scriptIndex == -1) {
|
int first = cl[0].toInt(&ok, 16);
|
||||||
scriptIndex = scriptNames.size();
|
Q_ASSERT(ok);
|
||||||
scriptNames.append(scriptName);
|
int last = first;
|
||||||
}
|
if (cl.size() == 2) {
|
||||||
|
last = cl[1].toInt(&ok, 16);
|
||||||
codePoints.replace("..", ".");
|
|
||||||
QList<QByteArray> cl = codePoints.split('.');
|
|
||||||
|
|
||||||
bool ok;
|
|
||||||
int first = cl[0].toInt(&ok, 16);
|
|
||||||
Q_ASSERT(ok);
|
Q_ASSERT(ok);
|
||||||
int last = first;
|
}
|
||||||
if (cl.size() == 2) {
|
|
||||||
last = cl[1].toInt(&ok, 16);
|
|
||||||
Q_ASSERT(ok);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = first; i <= last; ++i)
|
int scriptIndex = scriptNames.indexOf(scriptName);
|
||||||
scriptAssignment[i] = scriptIndex;
|
if (scriptIndex == -1) {
|
||||||
|
scriptIndex = scriptNames.size();
|
||||||
|
scriptNames.append(scriptName);
|
||||||
|
|
||||||
|
// is the script alias for 'Common'?
|
||||||
|
int s = specialScriptsCount;
|
||||||
|
while (--s > 0) {
|
||||||
|
if (scriptName == specialScripts[s])
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
scriptMap.append(s > 0 ? scriptsCount++ : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int codepoint = first; codepoint <= last; ++codepoint) {
|
||||||
|
UnicodeData &ud = UnicodeData::valueRef(codepoint);
|
||||||
|
ud.p.script = scriptMap.at(scriptIndex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int scriptSentinel = 0;
|
|
||||||
|
|
||||||
QByteArray createScriptEnumDeclaration()
|
QByteArray createScriptEnumDeclaration()
|
||||||
{
|
{
|
||||||
static const char *specialScripts[] = {
|
|
||||||
"Common",
|
|
||||||
"Arabic",
|
|
||||||
"Armenian",
|
|
||||||
"Bengali",
|
|
||||||
"Cyrillic",
|
|
||||||
"Devanagari",
|
|
||||||
"Georgian",
|
|
||||||
"Greek",
|
|
||||||
"Gujarati",
|
|
||||||
"Gurmukhi",
|
|
||||||
"Hangul",
|
|
||||||
"Hebrew",
|
|
||||||
"Kannada",
|
|
||||||
"Khmer",
|
|
||||||
"Lao",
|
|
||||||
"Malayalam",
|
|
||||||
"Myanmar",
|
|
||||||
"Nko",
|
|
||||||
"Ogham",
|
|
||||||
"Oriya",
|
|
||||||
"Runic",
|
|
||||||
"Sinhala",
|
|
||||||
"Syriac",
|
|
||||||
"Tamil",
|
|
||||||
"Telugu",
|
|
||||||
"Thaana",
|
|
||||||
"Thai",
|
|
||||||
"Tibetan",
|
|
||||||
"Inherited"
|
|
||||||
};
|
|
||||||
const int specialScriptsCount = sizeof(specialScripts) / sizeof(const char *);
|
|
||||||
|
|
||||||
// generate script enum
|
|
||||||
QByteArray declaration;
|
QByteArray declaration;
|
||||||
|
|
||||||
declaration += " // See http://www.unicode.org/reports/tr24/tr24-5.html\n";
|
declaration += " // See http://www.unicode.org/reports/tr24/tr24-5.html\n";
|
||||||
declaration += " enum Script {\n Common";
|
declaration += " enum Script {\n Common";
|
||||||
|
|
||||||
int uniqueScripts = 1; // Common
|
|
||||||
|
|
||||||
// output the ones with special processing first
|
// output the ones with special processing first
|
||||||
for (int i = 1; i < scriptNames.size(); ++i) {
|
for (int i = 1; i < scriptNames.size(); ++i) {
|
||||||
const QByteArray &scriptName = scriptNames.at(i);
|
if (scriptMap.at(i) == 0)
|
||||||
// does the script require special processing?
|
|
||||||
bool special = false;
|
|
||||||
for (int s = 0; s < specialScriptsCount; ++s) {
|
|
||||||
if (scriptName == specialScripts[s]) {
|
|
||||||
special = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!special) {
|
|
||||||
scriptHash[i] = 0; // alias for 'Common'
|
|
||||||
continue;
|
continue;
|
||||||
} else {
|
declaration += ",\n ";
|
||||||
++uniqueScripts;
|
declaration += scriptNames.at(i);
|
||||||
scriptHash[i] = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (scriptName != "Inherited") {
|
|
||||||
declaration += ",\n ";
|
|
||||||
declaration += scriptName;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
declaration += ",\n Inherited";
|
|
||||||
declaration += ",\n ScriptCount = Inherited";
|
declaration += ",\n ScriptCount = Inherited";
|
||||||
|
|
||||||
// output the ones that are an alias for 'Common'
|
// output the ones that are an alias for 'Common'
|
||||||
for (int i = 1; i < scriptNames.size(); ++i) {
|
for (int i = 1; i < scriptNames.size(); ++i) {
|
||||||
if (scriptHash.value(i) != 0)
|
if (scriptMap.at(i) != 0)
|
||||||
continue;
|
continue;
|
||||||
declaration += ",\n ";
|
declaration += ",\n ";
|
||||||
declaration += scriptNames.at(i);
|
declaration += scriptNames.at(i);
|
||||||
@ -1972,124 +1950,6 @@ QByteArray createScriptEnumDeclaration()
|
|||||||
|
|
||||||
declaration += "\n };\n\n";
|
declaration += "\n };\n\n";
|
||||||
|
|
||||||
scriptSentinel = ((uniqueScripts + 16) / 32) * 32; // a multiple of 32
|
|
||||||
|
|
||||||
return declaration;
|
|
||||||
}
|
|
||||||
|
|
||||||
QByteArray createScriptTableDeclaration()
|
|
||||||
{
|
|
||||||
Q_ASSERT(scriptSentinel > 0);
|
|
||||||
|
|
||||||
QByteArray declaration;
|
|
||||||
|
|
||||||
const int unicodeBlockCount = 512; // number of unicode blocks
|
|
||||||
const int unicodeBlockSize = 128; // size of each block
|
|
||||||
declaration = "enum { UnicodeBlockCount = ";
|
|
||||||
declaration += QByteArray::number(unicodeBlockCount);
|
|
||||||
declaration += " }; // number of unicode blocks\n";
|
|
||||||
declaration += "enum { UnicodeBlockSize = ";
|
|
||||||
declaration += QByteArray::number(unicodeBlockSize);
|
|
||||||
declaration += " }; // size of each block\n\n";
|
|
||||||
|
|
||||||
// script table
|
|
||||||
declaration += "static const unsigned char uc_scripts[] = {\n";
|
|
||||||
for (int i = 0; i < unicodeBlockCount; ++i) {
|
|
||||||
int block = (((i << 7) & 0xff00) | ((i & 1) * 0x80));
|
|
||||||
int blockAssignment[unicodeBlockSize];
|
|
||||||
for (int x = 0; x < unicodeBlockSize; ++x) {
|
|
||||||
int codePoint = (i << 7) | x;
|
|
||||||
blockAssignment[x] = scriptAssignment.value(codePoint, 0);
|
|
||||||
}
|
|
||||||
bool allTheSame = true;
|
|
||||||
const int originalScript = blockAssignment[0];
|
|
||||||
const int script = scriptHash.value(originalScript);
|
|
||||||
for (int x = 1; allTheSame && x < unicodeBlockSize; ++x) {
|
|
||||||
const int s = scriptHash.value(blockAssignment[x]);
|
|
||||||
if (s != script)
|
|
||||||
allTheSame = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (allTheSame) {
|
|
||||||
declaration += " ";
|
|
||||||
declaration += scriptNames.value(originalScript);
|
|
||||||
declaration += ", /* U+";
|
|
||||||
declaration += QByteArray::number(block, 16).rightJustified(4, '0');
|
|
||||||
declaration += '-';
|
|
||||||
declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
|
|
||||||
declaration += " */\n";
|
|
||||||
} else {
|
|
||||||
const int value = extraBlockList.size() + scriptSentinel;
|
|
||||||
const int offset = ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount;
|
|
||||||
|
|
||||||
declaration += " ";
|
|
||||||
declaration += QByteArray::number(value);
|
|
||||||
declaration += ", /* U+";
|
|
||||||
declaration += QByteArray::number(block, 16).rightJustified(4, '0');
|
|
||||||
declaration += '-';
|
|
||||||
declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
|
|
||||||
declaration += " at offset ";
|
|
||||||
declaration += QByteArray::number(offset);
|
|
||||||
declaration += " */\n";
|
|
||||||
|
|
||||||
ExtraBlock extraBlock;
|
|
||||||
extraBlock.block = block;
|
|
||||||
extraBlock.vector.resize(unicodeBlockSize);
|
|
||||||
for (int x = 0; x < unicodeBlockSize; ++x)
|
|
||||||
extraBlock.vector[x] = blockAssignment[x];
|
|
||||||
|
|
||||||
extraBlockList.append(extraBlock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < extraBlockList.size(); ++i) {
|
|
||||||
const int value = i + scriptSentinel;
|
|
||||||
const int offset = ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount;
|
|
||||||
const ExtraBlock &extraBlock = extraBlockList.at(i);
|
|
||||||
const int block = extraBlock.block;
|
|
||||||
|
|
||||||
declaration += "\n\n /* U+";
|
|
||||||
declaration += QByteArray::number(block, 16).rightJustified(4, '0');
|
|
||||||
declaration += '-';
|
|
||||||
declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0');
|
|
||||||
declaration += " at offset ";
|
|
||||||
declaration += QByteArray::number(offset);
|
|
||||||
declaration += " */\n ";
|
|
||||||
|
|
||||||
for (int x = 0; x < extraBlock.vector.size(); ++x) {
|
|
||||||
const int o = extraBlock.vector.at(x);
|
|
||||||
|
|
||||||
declaration += scriptNames.value(o);
|
|
||||||
if (x < extraBlock.vector.size() - 1 || i < extraBlockList.size() - 1)
|
|
||||||
declaration += ',';
|
|
||||||
if ((x & 7) == 7 && x < extraBlock.vector.size() - 1)
|
|
||||||
declaration += "\n ";
|
|
||||||
else
|
|
||||||
declaration += ' ';
|
|
||||||
}
|
|
||||||
if (declaration.endsWith(' '))
|
|
||||||
declaration.chop(1);
|
|
||||||
}
|
|
||||||
declaration += "\n};\n\n";
|
|
||||||
|
|
||||||
declaration += "enum { ScriptSentinel = " + QByteArray::number(scriptSentinel) + " };\n\n";
|
|
||||||
|
|
||||||
declaration +=
|
|
||||||
"Q_CORE_EXPORT int QT_FASTCALL script(uint ucs4)\n"
|
|
||||||
"{\n"
|
|
||||||
" if (ucs4 > 0xffff)\n"
|
|
||||||
" return Common;\n"
|
|
||||||
" int script = uc_scripts[ucs4 >> 7];\n"
|
|
||||||
" if (script < ScriptSentinel)\n"
|
|
||||||
" return script;\n"
|
|
||||||
" script = (((script - ScriptSentinel) * UnicodeBlockSize) + UnicodeBlockCount);\n"
|
|
||||||
" script = uc_scripts[script + (ucs4 & 0x7f)];\n"
|
|
||||||
" return script;\n"
|
|
||||||
"}\n\n";
|
|
||||||
|
|
||||||
qDebug("createScriptTableDeclaration:");
|
|
||||||
qDebug(" memory usage: %d bytes", unicodeBlockCount + (extraBlockList.size() * unicodeBlockSize));
|
|
||||||
|
|
||||||
return declaration;
|
return declaration;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2264,44 +2124,26 @@ static QByteArray createPropertyInfo()
|
|||||||
for (int i = 0; i < uniqueProperties.size(); ++i) {
|
for (int i = 0; i < uniqueProperties.size(); ++i) {
|
||||||
const PropertyFlags &p = uniqueProperties.at(i);
|
const PropertyFlags &p = uniqueProperties.at(i);
|
||||||
out += "\n { ";
|
out += "\n { ";
|
||||||
// " ushort category : 8;\n"
|
// " ushort category : 8; /* 5 used */\n"
|
||||||
out += QByteArray::number( p.category );
|
out += QByteArray::number( p.category );
|
||||||
out += ", ";
|
out += ", ";
|
||||||
// " ushort line_break_class : 8;\n"
|
// " ushort direction : 8; /* 5 used */\n"
|
||||||
out += QByteArray::number( p.line_break_class );
|
|
||||||
out += ", ";
|
|
||||||
// " ushort direction : 8;\n"
|
|
||||||
out += QByteArray::number( p.direction );
|
out += QByteArray::number( p.direction );
|
||||||
out += ", ";
|
out += ", ";
|
||||||
// " ushort combiningClass :8;\n"
|
// " ushort combiningClass : 8;\n"
|
||||||
out += QByteArray::number( p.combiningClass );
|
out += QByteArray::number( p.combiningClass );
|
||||||
out += ", ";
|
out += ", ";
|
||||||
// " ushort joining : 2;\n"
|
// " ushort joining : 2;\n"
|
||||||
out += QByteArray::number( p.joining );
|
out += QByteArray::number( p.joining );
|
||||||
out += ", ";
|
out += ", ";
|
||||||
// " signed short digitValue : 6;\n /* 5 needed */"
|
// " signed short digitValue : 6; /* 5 used */\n"
|
||||||
out += QByteArray::number( p.digitValue );
|
out += QByteArray::number( p.digitValue );
|
||||||
out += ", ";
|
out += ", ";
|
||||||
// " ushort unicodeVersion : 4;\n"
|
// " signed short mirrorDiff : 16;\n"
|
||||||
out += QByteArray::number( p.age );
|
|
||||||
out += ", ";
|
|
||||||
// " ushort lowerCaseSpecial : 1;\n"
|
|
||||||
// " ushort upperCaseSpecial : 1;\n"
|
|
||||||
// " ushort titleCaseSpecial : 1;\n"
|
|
||||||
// " ushort caseFoldSpecial : 1;\n"
|
|
||||||
out += QByteArray::number( p.lowerCaseSpecial );
|
|
||||||
out += ", ";
|
|
||||||
out += QByteArray::number( p.upperCaseSpecial );
|
|
||||||
out += ", ";
|
|
||||||
out += QByteArray::number( p.titleCaseSpecial );
|
|
||||||
out += ", ";
|
|
||||||
out += QByteArray::number( p.caseFoldSpecial );
|
|
||||||
out += ", ";
|
|
||||||
// " signed short mirrorDiff : 16;\n"
|
|
||||||
// " signed short lowerCaseDiff : 16;\n"
|
// " signed short lowerCaseDiff : 16;\n"
|
||||||
// " signed short upperCaseDiff : 16;\n"
|
// " signed short upperCaseDiff : 16;\n"
|
||||||
// " signed short titleCaseDiff : 16;\n"
|
// " signed short titleCaseDiff : 16;\n"
|
||||||
// " signed short caseFoldDiff : 16;\n"
|
// " signed short caseFoldDiff : 16;\n"
|
||||||
out += QByteArray::number( p.mirrorDiff );
|
out += QByteArray::number( p.mirrorDiff );
|
||||||
out += ", ";
|
out += ", ";
|
||||||
out += QByteArray::number( p.lowerCaseDiff );
|
out += QByteArray::number( p.lowerCaseDiff );
|
||||||
@ -2312,11 +2154,35 @@ static QByteArray createPropertyInfo()
|
|||||||
out += ", ";
|
out += ", ";
|
||||||
out += QByteArray::number( p.caseFoldDiff );
|
out += QByteArray::number( p.caseFoldDiff );
|
||||||
out += ", ";
|
out += ", ";
|
||||||
|
// " ushort lowerCaseSpecial : 1;\n"
|
||||||
|
// " ushort upperCaseSpecial : 1;\n"
|
||||||
|
// " ushort titleCaseSpecial : 1;\n"
|
||||||
|
// " ushort caseFoldSpecial : 1;\n"
|
||||||
|
out += QByteArray::number( p.lowerCaseSpecial );
|
||||||
|
out += ", ";
|
||||||
|
out += QByteArray::number( p.upperCaseSpecial );
|
||||||
|
out += ", ";
|
||||||
|
out += QByteArray::number( p.titleCaseSpecial );
|
||||||
|
out += ", ";
|
||||||
|
out += QByteArray::number( p.caseFoldSpecial );
|
||||||
|
out += ", ";
|
||||||
|
// " ushort unicodeVersion : 4;\n"
|
||||||
|
out += QByteArray::number( p.age );
|
||||||
|
out += ", ";
|
||||||
|
// " ushort graphemeBreak : 8; /* 4 used */\n"
|
||||||
|
// " ushort wordBreak : 8; /* 4 used */\n"
|
||||||
|
// " ushort sentenceBreak : 8; /* 4 used */\n"
|
||||||
|
// " ushort line_break_class : 8; /* 6 used */\n"
|
||||||
out += QByteArray::number( p.graphemeBreak );
|
out += QByteArray::number( p.graphemeBreak );
|
||||||
out += ", ";
|
out += ", ";
|
||||||
out += QByteArray::number( p.wordBreak );
|
out += QByteArray::number( p.wordBreak );
|
||||||
out += ", ";
|
out += ", ";
|
||||||
out += QByteArray::number( p.sentenceBreak );
|
out += QByteArray::number( p.sentenceBreak );
|
||||||
|
out += ", ";
|
||||||
|
out += QByteArray::number( p.line_break_class );
|
||||||
|
out += ", ";
|
||||||
|
// " ushort script : 8; /* 5 used */\n"
|
||||||
|
out += QByteArray::number( p.script );
|
||||||
out += " },";
|
out += " },";
|
||||||
}
|
}
|
||||||
out.chop(1);
|
out.chop(1);
|
||||||
@ -2363,6 +2229,11 @@ static QByteArray createPropertyInfo()
|
|||||||
"Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)\n"
|
"Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" return (LineBreakClass)qGetProp(ucs4)->line_break_class;\n"
|
" return (LineBreakClass)qGetProp(ucs4)->line_break_class;\n"
|
||||||
|
"}\n"
|
||||||
|
"\n"
|
||||||
|
"Q_CORE_EXPORT Script QT_FASTCALL script(uint ucs4)\n"
|
||||||
|
"{\n"
|
||||||
|
" return (Script)qGetProp(ucs4)->script;\n"
|
||||||
"}\n\n";
|
"}\n\n";
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
@ -2848,7 +2719,6 @@ int main(int, char **)
|
|||||||
QByteArray ligatures = createLigatureInfo();
|
QByteArray ligatures = createLigatureInfo();
|
||||||
QByteArray normalizationCorrections = createNormalizationCorrections();
|
QByteArray normalizationCorrections = createNormalizationCorrections();
|
||||||
QByteArray scriptEnumDeclaration = createScriptEnumDeclaration();
|
QByteArray scriptEnumDeclaration = createScriptEnumDeclaration();
|
||||||
QByteArray scriptTableDeclaration = createScriptTableDeclaration();
|
|
||||||
|
|
||||||
QByteArray header =
|
QByteArray header =
|
||||||
"/****************************************************************************\n"
|
"/****************************************************************************\n"
|
||||||
@ -2922,8 +2792,6 @@ int main(int, char **)
|
|||||||
f.write(ligatures);
|
f.write(ligatures);
|
||||||
f.write("\n");
|
f.write("\n");
|
||||||
f.write(normalizationCorrections);
|
f.write(normalizationCorrections);
|
||||||
f.write("\n");
|
|
||||||
f.write(scriptTableDeclaration);
|
|
||||||
f.write("} // namespace QUnicodeTables\n\n");
|
f.write("} // namespace QUnicodeTables\n\n");
|
||||||
f.write("using namespace QUnicodeTables;\n\n");
|
f.write("using namespace QUnicodeTables;\n\n");
|
||||||
f.write("QT_END_NAMESPACE\n");
|
f.write("QT_END_NAMESPACE\n");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user