add some useful methods to QUnicodeTables::
in order to reduce code duplication and prepare the ground for upcoming changes Change-Id: I980244149f65384c9484bbec4682de8b7b848b08 Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
This commit is contained in:
parent
2d23374a6f
commit
8c0048a377
@ -43,24 +43,12 @@
|
||||
#include "qlist.h"
|
||||
#include "qendian.h"
|
||||
#include "qchar.h"
|
||||
#include <private/qunicodetables_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
enum { Endian = 0, Data = 1 };
|
||||
|
||||
static inline bool isUnicodeNonCharacter(uint ucs4)
|
||||
{
|
||||
// Unicode has a couple of "non-characters" that one can use internally,
|
||||
// but are not allowed to be used for text interchange.
|
||||
//
|
||||
// Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
|
||||
// U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
|
||||
// U+FDEF (inclusive)
|
||||
|
||||
return (ucs4 & 0xfffe) == 0xfffe
|
||||
|| (ucs4 - 0xfdd0U) < 32;
|
||||
}
|
||||
|
||||
QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state)
|
||||
{
|
||||
uchar replacement = '?';
|
||||
@ -120,7 +108,7 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve
|
||||
*cursor++ = 0xc0 | ((uchar) (u >> 6));
|
||||
} else {
|
||||
// is it one of the Unicode non-characters?
|
||||
if (isUnicodeNonCharacter(u)) {
|
||||
if (QUnicodeTables::isNonCharacter(u)) {
|
||||
*cursor++ = replacement;
|
||||
++ch;
|
||||
++invalid;
|
||||
@ -196,7 +184,7 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte
|
||||
bool nonCharacter;
|
||||
if (!headerdone && uc == 0xfeff) {
|
||||
// don't do anything, just skip the BOM
|
||||
} else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) {
|
||||
} else if (!(nonCharacter = QUnicodeTables::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) {
|
||||
// surrogate pair
|
||||
Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
|
||||
*qch++ = QChar::highSurrogate(uc);
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include <qdebug.h>
|
||||
#include "qjsonparser_p.h"
|
||||
#include "qjson_p.h"
|
||||
#include <private/qunicodetables_p.h>
|
||||
|
||||
//#define PARSER_DEBUG
|
||||
#ifdef PARSER_DEBUG
|
||||
@ -721,19 +722,6 @@ static inline bool scanEscapeSequence(const char *&json, const char *end, uint *
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool isUnicodeNonCharacter(uint ucs4)
|
||||
{
|
||||
// Unicode has a couple of "non-characters" that one can use internally,
|
||||
// but are not allowed to be used for text interchange.
|
||||
//
|
||||
// Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
|
||||
// U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
|
||||
// U+FDEF (inclusive)
|
||||
|
||||
return (ucs4 & 0xfffe) == 0xfffe
|
||||
|| (ucs4 - 0xfdd0U) < 32;
|
||||
}
|
||||
|
||||
static inline bool scanUtf8Char(const char *&json, const char *end, uint *result)
|
||||
{
|
||||
int need;
|
||||
@ -769,7 +757,7 @@ static inline bool scanUtf8Char(const char *&json, const char *end, uint *result
|
||||
uc = (uc << 6) | (ch & 0x3f);
|
||||
}
|
||||
|
||||
if (uc < min_uc || isUnicodeNonCharacter(uc) ||
|
||||
if (uc < min_uc || QUnicodeTables::isNonCharacter(uc) ||
|
||||
(uc >= 0xd800 && uc <= 0xdfff) || uc >= 0x110000) {
|
||||
return false;
|
||||
}
|
||||
|
@ -41,6 +41,7 @@
|
||||
|
||||
#include "qjsonwriter_p.h"
|
||||
#include "qjson_p.h"
|
||||
#include <private/qunicodetables_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
@ -49,21 +50,6 @@ using namespace QJsonPrivate;
|
||||
static void objectContentToJson(const QJsonPrivate::Object *o, QByteArray &json, int indent, bool compact);
|
||||
static void arrayContentToJson(const QJsonPrivate::Array *a, QByteArray &json, int indent, bool compact);
|
||||
|
||||
// some code from qutfcodec.cpp, inlined here for performance reasons
|
||||
// to allow fast escaping of strings
|
||||
static inline bool isUnicodeNonCharacter(uint ucs4)
|
||||
{
|
||||
// Unicode has a couple of "non-characters" that one can use internally,
|
||||
// but are not allowed to be used for text interchange.
|
||||
//
|
||||
// Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
|
||||
// U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
|
||||
// U+FDEF (inclusive)
|
||||
|
||||
return (ucs4 & 0xfffe) == 0xfffe
|
||||
|| (ucs4 - 0xfdd0U) < 32;
|
||||
}
|
||||
|
||||
static inline uchar hexdig(uint u)
|
||||
{
|
||||
return (u < 0xa ? '0' + u : 'a' + u - 0xa);
|
||||
@ -154,7 +140,7 @@ static QByteArray escapedString(const QString &s)
|
||||
*cursor++ = 0xc0 | ((uchar) (u >> 6));
|
||||
} else {
|
||||
// is it one of the Unicode non-characters?
|
||||
if (isUnicodeNonCharacter(u)) {
|
||||
if (QUnicodeTables::isNonCharacter(u)) {
|
||||
*cursor++ = replacement;
|
||||
++ch;
|
||||
continue;
|
||||
|
@ -4348,6 +4348,21 @@ Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2)
|
||||
return qGetProp(ucs2);
|
||||
}
|
||||
|
||||
Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4)
|
||||
{
|
||||
return (GraphemeBreak)qGetProp(ucs4)->graphemeBreak;
|
||||
}
|
||||
|
||||
Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4)
|
||||
{
|
||||
return (WordBreak)qGetProp(ucs4)->wordBreak;
|
||||
}
|
||||
|
||||
Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4)
|
||||
{
|
||||
return (SentenceBreak)qGetProp(ucs4)->sentenceBreak;
|
||||
}
|
||||
|
||||
Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)
|
||||
{
|
||||
return (LineBreakClass)qGetProp(ucs4)->line_break_class;
|
||||
|
@ -217,6 +217,18 @@ namespace QUnicodeTables {
|
||||
};
|
||||
|
||||
|
||||
Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);
|
||||
inline int graphemeBreakClass(QChar ch)
|
||||
{ return graphemeBreakClass(ch.unicode()); }
|
||||
|
||||
Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);
|
||||
inline int wordBreakClass(QChar ch)
|
||||
{ return wordBreakClass(ch.unicode()); }
|
||||
|
||||
Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);
|
||||
inline int sentenceBreakClass(QChar ch)
|
||||
{ return sentenceBreakClass(ch.unicode()); }
|
||||
|
||||
Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);
|
||||
inline int lineBreakClass(QChar ch)
|
||||
{ return lineBreakClass(ch.unicode()); }
|
||||
@ -225,6 +237,18 @@ namespace QUnicodeTables {
|
||||
inline int script(QChar ch)
|
||||
{ return script(ch.unicode()); }
|
||||
|
||||
|
||||
inline bool isNonCharacter(uint ucs4)
|
||||
{
|
||||
// Noncharacter_Code_Point:
|
||||
// Unicode has a couple of "non-characters" that one can use internally,
|
||||
// but are not allowed to be used for text interchange.
|
||||
// Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF,
|
||||
// U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF
|
||||
|
||||
return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe);
|
||||
}
|
||||
|
||||
} // namespace QUnicodeTables
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
@ -397,6 +397,18 @@ static const char *property_string =
|
||||
" Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);\n";
|
||||
|
||||
static const char *methods =
|
||||
" Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);\n"
|
||||
" inline int graphemeBreakClass(QChar ch)\n"
|
||||
" { return graphemeBreakClass(ch.unicode()); }\n"
|
||||
"\n"
|
||||
" Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);\n"
|
||||
" inline int wordBreakClass(QChar ch)\n"
|
||||
" { return wordBreakClass(ch.unicode()); }\n"
|
||||
"\n"
|
||||
" Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);\n"
|
||||
" inline int sentenceBreakClass(QChar ch)\n"
|
||||
" { return sentenceBreakClass(ch.unicode()); }\n"
|
||||
"\n"
|
||||
" Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);\n"
|
||||
" inline int lineBreakClass(QChar ch)\n"
|
||||
" { return lineBreakClass(ch.unicode()); }\n"
|
||||
@ -405,6 +417,18 @@ static const char *methods =
|
||||
" inline int script(QChar ch)\n"
|
||||
" { return script(ch.unicode()); }\n\n";
|
||||
|
||||
static const char *generated_methods =
|
||||
" inline bool isNonCharacter(uint ucs4)\n"
|
||||
" {\n"
|
||||
" // Noncharacter_Code_Point:\n"
|
||||
" // Unicode has a couple of \"non-characters\" that one can use internally,\n"
|
||||
" // but are not allowed to be used for text interchange.\n"
|
||||
" // Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF,\n"
|
||||
" // U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF\n"
|
||||
"\n"
|
||||
" return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe);\n"
|
||||
" }\n\n";
|
||||
|
||||
static const int SizeOfPropertiesStruct = 20;
|
||||
|
||||
struct PropertyFlags {
|
||||
@ -2275,7 +2299,22 @@ static QByteArray createPropertyInfo()
|
||||
" return qGetProp(ucs2);\n"
|
||||
"}\n\n";
|
||||
|
||||
out += "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)\n"
|
||||
out += "Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4)\n"
|
||||
"{\n"
|
||||
" return (GraphemeBreak)qGetProp(ucs4)->graphemeBreak;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4)\n"
|
||||
"{\n"
|
||||
" return (WordBreak)qGetProp(ucs4)->wordBreak;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4)\n"
|
||||
"{\n"
|
||||
" return (SentenceBreak)qGetProp(ucs4)->sentenceBreak;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)\n"
|
||||
"{\n"
|
||||
" return (LineBreakClass)qGetProp(ucs4)->line_break_class;\n"
|
||||
"}\n\n";
|
||||
@ -2868,6 +2907,8 @@ int main(int, char **)
|
||||
f.write(line_break_class_string);
|
||||
f.write("\n");
|
||||
f.write(methods);
|
||||
f.write("\n");
|
||||
f.write(generated_methods);
|
||||
f.write("} // namespace QUnicodeTables\n\n"
|
||||
"QT_END_NAMESPACE\n\n"
|
||||
"#endif // QUNICODETABLES_P_H\n");
|
||||
|
Loading…
x
Reference in New Issue
Block a user