optimize QChar::decomposition()

* by not using QString::fromUtf16() as we know for sure that
  the data is 'raw' UCS-2;
* it's safe to avoid a check for > UNICODE_LAST_CODEPOINT
  as GET_DECOMPOSITION_INDEX macro already does a similar check

Change-Id: Ifb660efc51c664d06733ac8ed46d54278520da06
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
This commit is contained in:
Konstantin Ritt 2012-04-23 07:00:28 +03:00 committed by Qt by Nokia
parent 6fb0110f97
commit 5e66c35a23

View File

@ -892,27 +892,26 @@ ushort QChar::mirroredChar(ushort ucs2)
} }
// constants for Hangul (de)composition, see UAX #15
enum { enum {
Hangul_SBase = 0xac00, Hangul_SBase = 0xac00,
Hangul_LBase = 0x1100, Hangul_LBase = 0x1100,
Hangul_VBase = 0x1161, Hangul_VBase = 0x1161,
Hangul_TBase = 0x11a7, Hangul_TBase = 0x11a7,
Hangul_SCount = 11172,
Hangul_LCount = 19, Hangul_LCount = 19,
Hangul_VCount = 21, Hangul_VCount = 21,
Hangul_TCount = 28, Hangul_TCount = 28,
Hangul_NCount = 21*28 Hangul_NCount = Hangul_VCount * Hangul_TCount,
Hangul_SCount = Hangul_LCount * Hangul_NCount
}; };
// buffer has to have a length of 3. It's needed for Hangul decomposition // buffer has to have a length of 3. It's needed for Hangul decomposition
static const unsigned short * QT_FASTCALL decompositionHelper static const unsigned short * QT_FASTCALL decompositionHelper
(uint ucs4, int *length, int *tag, unsigned short *buffer) (uint ucs4, int *length, int *tag, unsigned short *buffer)
{ {
*length = 0;
if (ucs4 > UNICODE_LAST_CODEPOINT)
return 0;
if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) { if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
int SIndex = ucs4 - Hangul_SBase; // compute Hangul syllable decomposition as per UAX #15
const uint SIndex = ucs4 - Hangul_SBase;
buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
@ -922,8 +921,12 @@ static const unsigned short * QT_FASTCALL decompositionHelper
} }
const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4); const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
if (index == 0xffff) if (index == 0xffff) {
*length = 0;
*tag = QChar::NoDecomposition;
return 0; return 0;
}
const unsigned short *decomposition = uc_decomposition_map+index; const unsigned short *decomposition = uc_decomposition_map+index;
*tag = (*decomposition) & 0xff; *tag = (*decomposition) & 0xff;
*length = (*decomposition) >> 8; *length = (*decomposition) >> 8;
@ -950,7 +953,7 @@ QString QChar::decomposition(uint ucs4)
int length; int length;
int tag; int tag;
const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer); const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
return QString::fromUtf16(d, length); return QString(reinterpret_cast<const QChar *>(d), length);
} }
/*! /*!
@ -969,8 +972,6 @@ QChar::Decomposition QChar::decompositionTag() const
*/ */
QChar::Decomposition QChar::decompositionTag(uint ucs4) QChar::Decomposition QChar::decompositionTag(uint ucs4)
{ {
if (ucs4 > UNICODE_LAST_CODEPOINT)
return QChar::NoDecomposition;
if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount)
return QChar::Canonical; return QChar::Canonical;
const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4); const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
@ -1400,6 +1401,8 @@ QDataStream &operator>>(QDataStream &in, QChar &chr)
static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from) static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
{ {
int length;
int tag;
unsigned short buffer[3]; unsigned short buffer[3];
QString &s = *str; QString &s = *str;
@ -1415,18 +1418,18 @@ static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion
ucs4 = QChar::surrogateToUcs4(high, ucs4); ucs4 = QChar::surrogateToUcs4(high, ucs4);
} }
} }
const QChar::UnicodeVersion v = QChar::unicodeVersion(ucs4); const QChar::UnicodeVersion v = QChar::unicodeVersion(ucs4);
if (v > version || v == QChar::Unicode_Unassigned) if (v > version || v == QChar::Unicode_Unassigned)
continue; continue;
int length;
int tag;
const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer); const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
if (!d || (canonical && tag != QChar::Canonical)) if (!d || (canonical && tag != QChar::Canonical))
continue; continue;
int pos = uc - utf16; int pos = uc - utf16;
s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length); s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length);
// since the insert invalidates the pointers and we do decomposition recursive // since the replace invalidates the pointers and we do decomposition recursive
utf16 = reinterpret_cast<unsigned short *>(s.data()); utf16 = reinterpret_cast<unsigned short *>(s.data());
uc = utf16 + pos + length; uc = utf16 + pos + length;
} }
@ -1445,20 +1448,22 @@ inline bool operator<(const UCS2Pair &ligature, ushort u1)
static ushort ligatureHelper(ushort u1, ushort u2) static ushort ligatureHelper(ushort u1, ushort u2)
{ {
// hangul L-V pair if (u1 >= Hangul_LBase && u1 <= Hangul_SBase + Hangul_SCount) {
int LIndex = u1 - Hangul_LBase; // compute Hangul syllable composition as per UAX #15
if (0 <= LIndex && LIndex < Hangul_LCount) { // hangul L-V pair
int VIndex = u2 - Hangul_VBase; const uint LIndex = u1 - Hangul_LBase;
if (0 <= VIndex && VIndex < Hangul_VCount) if (LIndex < Hangul_LCount) {
return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount; const uint VIndex = u2 - Hangul_VBase;
} if (VIndex < Hangul_VCount)
return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
// hangul LV-T pair }
int SIndex = u1 - Hangul_SBase; // hangul LV-T pair
if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) { const uint SIndex = u1 - Hangul_SBase;
int TIndex = u2 - Hangul_TBase; if (SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
if (0 <= TIndex && TIndex <= Hangul_TCount) const uint TIndex = u2 - Hangul_TBase;
return u1 + TIndex; if (TIndex <= Hangul_TCount)
return u1 + TIndex;
}
} }
const unsigned short index = GET_LIGATURE_INDEX(u2); const unsigned short index = GET_LIGATURE_INDEX(u2);