Clean up QTextBoundaryFinder and qunicodetools

Make QTBF ready for Qt6 by using qsizetype in the API and use
QStringView where it makes sense.

Change the exported API of qunicodetools to use QStringView as
well and use char16_t internally.

Change-Id: I853537bcabf40546a8e60fdf2ee7d751bc371761
Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
This commit is contained in:
Lars Knoll 2020-08-27 09:52:26 +02:00
parent fae4f80ecc
commit 0ae5b8af9c
6 changed files with 187 additions and 200 deletions

View File

@ -43,18 +43,10 @@
QT_BEGIN_NAMESPACE QT_BEGIN_NAMESPACE
class QTextBoundaryFinderPrivate static void init(QTextBoundaryFinder::BoundaryType type, QStringView str, QCharAttributes *attributes)
{ {
public:
QCharAttributes attributes[1];
};
static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int length, QCharAttributes *attributes)
{
const ushort *string = reinterpret_cast<const ushort *>(chars);
QUnicodeTools::ScriptItemArray scriptItems; QUnicodeTools::ScriptItemArray scriptItems;
QUnicodeTools::initScripts(string, length, &scriptItems); QUnicodeTools::initScripts(str, &scriptItems);
QUnicodeTools::CharAttributeOptions options; QUnicodeTools::CharAttributeOptions options;
switch (type) { switch (type) {
@ -64,7 +56,7 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int
case QTextBoundaryFinder::Line: options |= QUnicodeTools::LineBreaks; break; case QTextBoundaryFinder::Line: options |= QUnicodeTools::LineBreaks; break;
default: break; default: break;
} }
QUnicodeTools::initCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes, options); QUnicodeTools::initCharAttributes(str, scriptItems.data(), scriptItems.count(), attributes, options);
} }
/*! /*!
@ -145,11 +137,7 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int
Constructs an invalid QTextBoundaryFinder object. Constructs an invalid QTextBoundaryFinder object.
*/ */
QTextBoundaryFinder::QTextBoundaryFinder() QTextBoundaryFinder::QTextBoundaryFinder()
: t(Grapheme) : freeBuffer(true)
, chars(nullptr)
, length(0)
, freePrivate(true)
, d(nullptr)
{ {
} }
@ -159,17 +147,15 @@ QTextBoundaryFinder::QTextBoundaryFinder()
QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other) QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
: t(other.t) : t(other.t)
, s(other.s) , s(other.s)
, chars(other.chars) , sv(other.sv)
, length(other.length)
, pos(other.pos) , pos(other.pos)
, freePrivate(true) , freeBuffer(true)
, d(nullptr)
{ {
if (other.d) { if (other.attributes) {
Q_ASSERT(length > 0); Q_ASSERT(sv.size() > 0);
d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); attributes = (QCharAttributes *) malloc((sv.size() + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(d); Q_CHECK_PTR(attributes);
memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes)); memcpy(attributes, other.attributes, (sv.size() + 1) * sizeof(QCharAttributes));
} }
} }
@ -181,27 +167,26 @@ QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &o
if (&other == this) if (&other == this)
return *this; return *this;
if (other.d) { if (other.attributes) {
Q_ASSERT(other.length > 0); Q_ASSERT(other.sv.size() > 0);
uint newCapacity = (other.length + 1) * sizeof(QCharAttributes); size_t newCapacity = (size_t(other.sv.size()) + 1) * sizeof(QCharAttributes);
QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *) realloc(freePrivate ? d : nullptr, newCapacity); QCharAttributes *newD = (QCharAttributes *) realloc(freeBuffer ? attributes : nullptr, newCapacity);
Q_CHECK_PTR(newD); Q_CHECK_PTR(newD);
freePrivate = true; freeBuffer = true;
d = newD; attributes = newD;
} }
t = other.t; t = other.t;
s = other.s; s = other.s;
chars = other.chars; sv = other.sv;
length = other.length;
pos = other.pos; pos = other.pos;
if (other.d) { if (other.attributes) {
memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes)); memcpy(attributes, other.attributes, (sv.size() + 1) * sizeof(QCharAttributes));
} else { } else {
if (freePrivate) if (freeBuffer)
free(d); free(attributes);
d = nullptr; attributes = nullptr;
} }
return *this; return *this;
@ -213,8 +198,8 @@ QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &o
QTextBoundaryFinder::~QTextBoundaryFinder() QTextBoundaryFinder::~QTextBoundaryFinder()
{ {
Q_UNUSED(unused); Q_UNUSED(unused);
if (freePrivate) if (freeBuffer)
free(d); free(attributes);
} }
/*! /*!
@ -223,22 +208,28 @@ QTextBoundaryFinder::~QTextBoundaryFinder()
QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string) QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string)
: t(type) : t(type)
, s(string) , s(string)
, chars(string.unicode()) , sv(s)
, length(string.length())
, pos(0) , pos(0)
, freePrivate(true) , freeBuffer(true)
, d(nullptr) , attributes(nullptr)
{ {
if (length > 0) { if (sv.size() > 0) {
d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); attributes = (QCharAttributes *) malloc((sv.size() + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(d); Q_CHECK_PTR(attributes);
init(t, chars, length, d->attributes); init(t, sv, attributes);
} }
} }
/*! /*!
Creates a QTextBoundaryFinder object of \a type operating on \a chars \fn QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, qsizetype length, unsigned char *buffer, qsizetype bufferSize)
with \a length. \overload
The same as QTextBoundaryFinder(type, QStringView(chars, length), buffer, bufferSize).
*/
/*!
Creates a QTextBoundaryFinder object of \a type operating on \a string.
\since 6.0
\a buffer is an optional working buffer of size \a bufferSize you can pass to \a buffer is an optional working buffer of size \a bufferSize you can pass to
the QTextBoundaryFinder. If the buffer is large enough to hold the working the QTextBoundaryFinder. If the buffer is large enough to hold the working
@ -250,25 +241,22 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &strin
as long as the QTextBoundaryFinder object stays alive. The same applies to as long as the QTextBoundaryFinder object stays alive. The same applies to
\a buffer. \a buffer.
*/ */
QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer, int bufferSize) QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, QStringView string, unsigned char *buffer, qsizetype bufferSize)
: t(type) : t(type)
, chars(chars) , sv(string)
, length(length)
, pos(0) , pos(0)
, freePrivate(true) , freeBuffer(true)
, d(nullptr) , attributes(nullptr)
{ {
if (!chars) { if (!sv.isEmpty()) {
length = 0; if (buffer && (uint)bufferSize >= (sv.size() + 1) * sizeof(QCharAttributes)) {
} else if (length > 0) { attributes = reinterpret_cast<QCharAttributes *>(buffer);
if (buffer && (uint)bufferSize >= (length + 1) * sizeof(QCharAttributes)) { freeBuffer = false;
d = (QTextBoundaryFinderPrivate *)buffer;
freePrivate = false;
} else { } else {
d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); attributes = (QCharAttributes *) malloc((sv.size() + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(d); Q_CHECK_PTR(attributes);
} }
init(t, chars, length, d->attributes); init(t, sv, attributes);
} }
} }
@ -289,7 +277,7 @@ void QTextBoundaryFinder::toStart()
*/ */
void QTextBoundaryFinder::toEnd() void QTextBoundaryFinder::toEnd()
{ {
pos = length; pos = sv.size();
} }
/*! /*!
@ -300,7 +288,7 @@ void QTextBoundaryFinder::toEnd()
\sa setPosition() \sa setPosition()
*/ */
int QTextBoundaryFinder::position() const qsizetype QTextBoundaryFinder::position() const
{ {
return pos; return pos;
} }
@ -314,9 +302,9 @@ int QTextBoundaryFinder::position() const
\sa position() \sa position()
*/ */
void QTextBoundaryFinder::setPosition(int position) void QTextBoundaryFinder::setPosition(qsizetype position)
{ {
pos = qBound(0, position, length); pos = qBound(0, position, sv.size());
} }
/*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const /*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const
@ -335,9 +323,9 @@ void QTextBoundaryFinder::setPosition(int position)
*/ */
QString QTextBoundaryFinder::string() const QString QTextBoundaryFinder::string() const
{ {
if (chars == s.unicode() && length == s.length()) if (sv.data() == s.unicode() && sv.size() == s.size())
return s; return s;
return QString(chars, length); return sv.toString();
} }
@ -346,9 +334,9 @@ QString QTextBoundaryFinder::string() const
Returns -1 if there is no next boundary. Returns -1 if there is no next boundary.
*/ */
int QTextBoundaryFinder::toNextBoundary() qsizetype QTextBoundaryFinder::toNextBoundary()
{ {
if (!d || pos < 0 || pos >= length) { if (!attributes || pos < 0 || pos >= sv.size()) {
pos = -1; pos = -1;
return pos; return pos;
} }
@ -356,19 +344,19 @@ int QTextBoundaryFinder::toNextBoundary()
++pos; ++pos;
switch(t) { switch(t) {
case Grapheme: case Grapheme:
while (pos < length && !d->attributes[pos].graphemeBoundary) while (pos < sv.size() && !attributes[pos].graphemeBoundary)
++pos; ++pos;
break; break;
case Word: case Word:
while (pos < length && !d->attributes[pos].wordBreak) while (pos < sv.size() && !attributes[pos].wordBreak)
++pos; ++pos;
break; break;
case Sentence: case Sentence:
while (pos < length && !d->attributes[pos].sentenceBoundary) while (pos < sv.size() && !attributes[pos].sentenceBoundary)
++pos; ++pos;
break; break;
case Line: case Line:
while (pos < length && !d->attributes[pos].lineBreak) while (pos < sv.size() && !attributes[pos].lineBreak)
++pos; ++pos;
break; break;
} }
@ -381,9 +369,9 @@ int QTextBoundaryFinder::toNextBoundary()
Returns -1 if there is no previous boundary. Returns -1 if there is no previous boundary.
*/ */
int QTextBoundaryFinder::toPreviousBoundary() qsizetype QTextBoundaryFinder::toPreviousBoundary()
{ {
if (!d || pos <= 0 || pos > length) { if (!attributes || pos <= 0 || pos > sv.size()) {
pos = -1; pos = -1;
return pos; return pos;
} }
@ -391,19 +379,19 @@ int QTextBoundaryFinder::toPreviousBoundary()
--pos; --pos;
switch(t) { switch(t) {
case Grapheme: case Grapheme:
while (pos > 0 && !d->attributes[pos].graphemeBoundary) while (pos > 0 && !attributes[pos].graphemeBoundary)
--pos; --pos;
break; break;
case Word: case Word:
while (pos > 0 && !d->attributes[pos].wordBreak) while (pos > 0 && !attributes[pos].wordBreak)
--pos; --pos;
break; break;
case Sentence: case Sentence:
while (pos > 0 && !d->attributes[pos].sentenceBoundary) while (pos > 0 && !attributes[pos].sentenceBoundary)
--pos; --pos;
break; break;
case Line: case Line:
while (pos > 0 && !d->attributes[pos].lineBreak) while (pos > 0 && !attributes[pos].lineBreak)
--pos; --pos;
break; break;
} }
@ -416,19 +404,19 @@ int QTextBoundaryFinder::toPreviousBoundary()
*/ */
bool QTextBoundaryFinder::isAtBoundary() const bool QTextBoundaryFinder::isAtBoundary() const
{ {
if (!d || pos < 0 || pos > length) if (!attributes || pos < 0 || pos > sv.size())
return false; return false;
switch(t) { switch(t) {
case Grapheme: case Grapheme:
return d->attributes[pos].graphemeBoundary; return attributes[pos].graphemeBoundary;
case Word: case Word:
return d->attributes[pos].wordBreak; return attributes[pos].wordBreak;
case Sentence: case Sentence:
return d->attributes[pos].sentenceBoundary; return attributes[pos].sentenceBoundary;
case Line: case Line:
// ### TR#14 LB2 prohibits break at sot // ### TR#14 LB2 prohibits break at sot
return d->attributes[pos].lineBreak || pos == 0; return attributes[pos].lineBreak || pos == 0;
} }
return false; return false;
} }
@ -439,17 +427,17 @@ bool QTextBoundaryFinder::isAtBoundary() const
QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
{ {
BoundaryReasons reasons = NotAtBoundary; BoundaryReasons reasons = NotAtBoundary;
if (!d || pos < 0 || pos > length) if (!attributes || pos < 0 || pos > sv.size())
return reasons; return reasons;
const QCharAttributes attr = d->attributes[pos]; const QCharAttributes attr = attributes[pos];
switch (t) { switch (t) {
case Grapheme: case Grapheme:
if (attr.graphemeBoundary) { if (attr.graphemeBoundary) {
reasons |= BreakOpportunity | StartOfItem | EndOfItem; reasons |= BreakOpportunity | StartOfItem | EndOfItem;
if (pos == 0) if (pos == 0)
reasons &= (~EndOfItem); reasons &= (~EndOfItem);
else if (pos == length) else if (pos == sv.size())
reasons &= (~StartOfItem); reasons &= (~StartOfItem);
} }
break; break;
@ -467,7 +455,7 @@ QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() cons
reasons |= BreakOpportunity | StartOfItem | EndOfItem; reasons |= BreakOpportunity | StartOfItem | EndOfItem;
if (pos == 0) if (pos == 0)
reasons &= (~EndOfItem); reasons &= (~EndOfItem);
else if (pos == length) else if (pos == sv.size())
reasons &= (~StartOfItem); reasons &= (~StartOfItem);
} }
break; break;
@ -479,9 +467,9 @@ QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() cons
reasons |= MandatoryBreak | StartOfItem | EndOfItem; reasons |= MandatoryBreak | StartOfItem | EndOfItem;
if (pos == 0) if (pos == 0)
reasons &= (~EndOfItem); reasons &= (~EndOfItem);
else if (pos == length) else if (pos == sv.size())
reasons &= (~StartOfItem); reasons &= (~StartOfItem);
} else if (pos > 0 && chars[pos - 1].unicode() == QChar::SoftHyphen) { } else if (pos > 0 && sv[pos - 1].unicode() == QChar::SoftHyphen) {
reasons |= SoftHyphen; reasons |= SoftHyphen;
} }
} }

View File

@ -46,7 +46,7 @@
QT_BEGIN_NAMESPACE QT_BEGIN_NAMESPACE
class QTextBoundaryFinderPrivate; struct QCharAttributes;
class Q_CORE_EXPORT QTextBoundaryFinder class Q_CORE_EXPORT QTextBoundaryFinder
{ {
@ -74,33 +74,35 @@ public:
Q_DECLARE_FLAGS( BoundaryReasons, BoundaryReason ) Q_DECLARE_FLAGS( BoundaryReasons, BoundaryReason )
QTextBoundaryFinder(BoundaryType type, const QString &string); QTextBoundaryFinder(BoundaryType type, const QString &string);
QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer = nullptr, int bufferSize = 0); QTextBoundaryFinder(BoundaryType type, const QChar *chars, qsizetype length, unsigned char *buffer = nullptr, qsizetype bufferSize = 0)
: QTextBoundaryFinder(type, QStringView(chars, length), buffer, bufferSize)
{}
QTextBoundaryFinder(BoundaryType type, QStringView str, unsigned char *buffer = nullptr, qsizetype bufferSize = 0);
inline bool isValid() const { return d; } inline bool isValid() const { return attributes; }
inline BoundaryType type() const { return t; } inline BoundaryType type() const { return t; }
QString string() const; QString string() const;
void toStart(); void toStart();
void toEnd(); void toEnd();
int position() const; qsizetype position() const;
void setPosition(int position); void setPosition(qsizetype position);
int toNextBoundary(); qsizetype toNextBoundary();
int toPreviousBoundary(); qsizetype toPreviousBoundary();
bool isAtBoundary() const; bool isAtBoundary() const;
BoundaryReasons boundaryReasons() const; BoundaryReasons boundaryReasons() const;
private: private:
BoundaryType t; BoundaryType t = Grapheme;
QString s; QString s;
const QChar *chars; QStringView sv;
int length; qsizetype pos;
int pos; uint freeBuffer : 1;
uint freePrivate : 1;
uint unused : 31; uint unused : 31;
QTextBoundaryFinderPrivate *d; QCharAttributes *attributes = nullptr;
}; };
Q_DECLARE_OPERATORS_FOR_FLAGS(QTextBoundaryFinder::BoundaryReasons) Q_DECLARE_OPERATORS_FOR_FLAGS(QTextBoundaryFinder::BoundaryReasons)

View File

@ -97,12 +97,12 @@ static const State breakTable[QUnicodeTables::NumGraphemeBreakClasses][QUnicodeT
} // namespace GB } // namespace GB
static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes *attributes) static void getGraphemeBreaks(const char16_t *string, qsizetype len, QCharAttributes *attributes)
{ {
QUnicodeTables::GraphemeBreakClass lcls = QUnicodeTables::GraphemeBreak_LF; // to meet GB1 QUnicodeTables::GraphemeBreakClass lcls = QUnicodeTables::GraphemeBreak_LF; // to meet GB1
GB::State state = GB::Break; // only required to track some of the rules GB::State state = GB::Break; // only required to track some of the rules
for (quint32 i = 0; i != len; ++i) { for (qsizetype i = 0; i != len; ++i) {
quint32 pos = i; qsizetype pos = i;
char32_t ucs4 = string[i]; char32_t ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) { if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
ushort low = string[i + 1]; ushort low = string[i + 1];
@ -191,15 +191,15 @@ static const uchar breakTable[QUnicodeTables::NumWordBreakClasses][QUnicodeTable
} // namespace WB } // namespace WB
static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *attributes) static void getWordBreaks(const char16_t *string, qsizetype len, QCharAttributes *attributes)
{ {
enum WordType { enum WordType {
WordTypeNone, WordTypeAlphaNumeric, WordTypeHiraganaKatakana WordTypeNone, WordTypeAlphaNumeric, WordTypeHiraganaKatakana
} currentWordType = WordTypeNone; } currentWordType = WordTypeNone;
QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1 QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1
for (quint32 i = 0; i != len; ++i) { for (qsizetype i = 0; i != len; ++i) {
quint32 pos = i; qsizetype pos = i;
char32_t ucs4 = string[i]; char32_t ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) { if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
ushort low = string[i + 1]; ushort low = string[i + 1];
@ -241,7 +241,7 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
break; break;
case WB::Lookup: case WB::Lookup:
case WB::LookupW: case WB::LookupW:
for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) { for (qsizetype lookahead = i + 1; lookahead < len; ++lookahead) {
ucs4 = string[lookahead]; ucs4 = string[lookahead];
if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) { if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
ushort low = string[lookahead + 1]; ushort low = string[lookahead + 1];
@ -343,11 +343,11 @@ static const uchar breakTable[BAfter + 1][QUnicodeTables::NumSentenceBreakClasse
} // namespace SB } // namespace SB
static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes *attributes) static void getSentenceBreaks(const char16_t *string, qsizetype len, QCharAttributes *attributes)
{ {
uchar state = SB::BAfter; // to meet SB1 uchar state = SB::BAfter; // to meet SB1
for (quint32 i = 0; i != len; ++i) { for (qsizetype i = 0; i != len; ++i) {
quint32 pos = i; qsizetype pos = i;
char32_t ucs4 = string[i]; char32_t ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) { if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
ushort low = string[i + 1]; ushort low = string[i + 1];
@ -364,7 +364,7 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes
state = SB::breakTable[state][ncls]; state = SB::breakTable[state][ncls];
if (Q_UNLIKELY(state == SB::Lookup)) { // SB8 if (Q_UNLIKELY(state == SB::Lookup)) { // SB8
state = SB::Break; state = SB::Break;
for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) { for (qsizetype lookahead = i + 1; lookahead < len; ++lookahead) {
ucs4 = string[lookahead]; ucs4 = string[lookahead];
if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) { if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
ushort low = string[lookahead + 1]; ushort low = string[lookahead + 1];
@ -542,15 +542,15 @@ static const uchar breakTable[QUnicodeTables::LineBreak_SA][QUnicodeTables::Line
} // namespace LB } // namespace LB
static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *attributes, QUnicodeTools::CharAttributeOptions options) static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes *attributes, QUnicodeTools::CharAttributeOptions options)
{ {
quint32 nestart = 0; qsizetype nestart = 0;
LB::NS::Class nelast = LB::NS::XX; LB::NS::Class nelast = LB::NS::XX;
QUnicodeTables::LineBreakClass lcls = QUnicodeTables::LineBreak_LF; // to meet LB10 QUnicodeTables::LineBreakClass lcls = QUnicodeTables::LineBreak_LF; // to meet LB10
QUnicodeTables::LineBreakClass cls = lcls; QUnicodeTables::LineBreakClass cls = lcls;
for (quint32 i = 0; i != len; ++i) { for (qsizetype i = 0; i != len; ++i) {
quint32 pos = i; qsizetype pos = i;
char32_t ucs4 = string[i]; char32_t ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) { if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
ushort low = string[i + 1]; ushort low = string[i + 1];
@ -632,7 +632,7 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at
switch (LB::NS::actionTable[nelast][necur]) { switch (LB::NS::actionTable[nelast][necur]) {
case LB::NS::Break: case LB::NS::Break:
// do not change breaks before and after the expression // do not change breaks before and after the expression
for (quint32 j = nestart + 1; j < pos; ++j) for (qsizetype j = nestart + 1; j < pos; ++j)
attributes[j].lineBreak = false; attributes[j].lineBreak = false;
Q_FALLTHROUGH(); Q_FALLTHROUGH();
case LB::NS::None: case LB::NS::None:
@ -697,7 +697,7 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at
if (Q_UNLIKELY(LB::NS::actionTable[nelast][LB::NS::XX] == LB::NS::Break)) { if (Q_UNLIKELY(LB::NS::actionTable[nelast][LB::NS::XX] == LB::NS::Break)) {
// LB25: do not break lines inside numbers // LB25: do not break lines inside numbers
for (quint32 j = nestart + 1; j < len; ++j) for (qsizetype j = nestart + 1; j < len; ++j)
attributes[j].lineBreak = false; attributes[j].lineBreak = false;
} }
@ -706,9 +706,9 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at
} }
static void getWhiteSpaces(const ushort *string, quint32 len, QCharAttributes *attributes) static void getWhiteSpaces(const char16_t *string, qsizetype len, QCharAttributes *attributes)
{ {
for (quint32 i = 0; i != len; ++i) { for (qsizetype i = 0; i != len; ++i) {
uint ucs4 = string[i]; uint ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) { if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
ushort low = string[i + 1]; ushort low = string[i + 1];
@ -725,7 +725,7 @@ static void getWhiteSpaces(const ushort *string, quint32 len, QCharAttributes *a
namespace Tailored { namespace Tailored {
using CharAttributeFunction = void (*)(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes); using CharAttributeFunction = void (*)(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes);
enum Form { enum Form {
@ -1181,15 +1181,15 @@ static inline Form form(unsigned short uc) {
We return syllable boundaries on invalid combinations aswell We return syllable boundaries on invalid combinations aswell
*/ */
static int indic_nextSyllableBoundary(QChar::Script script, const ushort *s, int start, int end, bool *invalid) static qsizetype indic_nextSyllableBoundary(QChar::Script script, const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
{ {
*invalid = false; *invalid = false;
IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end); IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", int(start), int(end));
const ushort *uc = s+start; const char16_t *uc = s+start;
int pos = 0; qsizetype pos = 0;
Form state = form(uc[pos]); Form state = form(uc[pos]);
IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]); IDEBUG("state[%d]=%d (uc=%4x)", int(pos), state, uc[pos]);
pos++; pos++;
if (state != Consonant && state != IndependentVowel) { if (state != Consonant && state != IndependentVowel) {
@ -1200,7 +1200,7 @@ static int indic_nextSyllableBoundary(QChar::Script script, const ushort *s, int
while (pos < end - start) { while (pos < end - start) {
Form newState = form(uc[pos]); Form newState = form(uc[pos]);
IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]); IDEBUG("state[%d]=%d (uc=%4x)", int(pos), newState, uc[pos]);
switch (newState) { switch (newState) {
case Control: case Control:
newState = state; newState = state;
@ -1285,15 +1285,15 @@ static int indic_nextSyllableBoundary(QChar::Script script, const ushort *s, int
return pos+start; return pos+start;
} }
static void indicAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes) static void indicAttributes(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
{ {
int end = from + len; qsizetype end = from + len;
const ushort *uc = text + from; const char16_t *uc = text + from;
attributes += from; attributes += from;
uint i = 0; qsizetype i = 0;
while (i < len) { while (i < len) {
bool invalid; bool invalid;
uint boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from; qsizetype boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
attributes[i].graphemeBoundary = true; attributes[i].graphemeBoundary = true;
if (boundary > len-1) boundary = len; if (boundary > len-1) boundary = len;
@ -1339,9 +1339,9 @@ static int init_libthai() {
return 0; return 0;
} }
static void to_tis620(const ushort *string, uint len, char *cstr) static void to_tis620(const char16_t *string, qsizetype len, char *cstr)
{ {
uint i; qsizetype i;
unsigned char *result = (unsigned char *)cstr; unsigned char *result = (unsigned char *)cstr;
for (i = 0; i < len; ++i) { for (i = 0; i < len; ++i) {
@ -1359,14 +1359,14 @@ static void to_tis620(const ushort *string, uint len, char *cstr)
/* /*
* Thai Attributes: computes Word Break, Word Boundary and Char stop for THAI. * Thai Attributes: computes Word Break, Word Boundary and Char stop for THAI.
*/ */
static void thaiAssignAttributes(const ushort *string, uint len, QCharAttributes *attributes) static void thaiAssignAttributes(const char16_t *string, qsizetype len, QCharAttributes *attributes)
{ {
char s[128]; char s[128];
char *cstr = s; char *cstr = s;
int *break_positions = nullptr; int *break_positions = nullptr;
int brp[128]; int brp[128];
int brp_size = 0; int brp_size = 0;
uint numbreaks, i, j, cell_length; qsizetype numbreaks, i, j, cell_length;
struct thcell_t tis_cell; struct thcell_t tis_cell;
if (!init_libthai()) if (!init_libthai())
@ -1432,10 +1432,10 @@ static void thaiAssignAttributes(const ushort *string, uint len, QCharAttributes
free(cstr); free(cstr);
} }
static void thaiAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes) static void thaiAttributes(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
{ {
assert(script == QChar::Script_Thai); assert(script == QChar::Script_Thai);
const ushort *uc = text + from; const char16_t *uc = text + from;
attributes += from; attributes += from;
Q_UNUSED(script); Q_UNUSED(script);
thaiAssignAttributes(uc, len, attributes); thaiAssignAttributes(uc, len, attributes);
@ -1505,11 +1505,11 @@ static const unsigned char tibetanForm[0x80] = {
#define tibetan_form(c) \ #define tibetan_form(c) \
((c) >= 0x0f40 && (c) < 0x0fc0 ? (TibetanForm)tibetanForm[(c) - 0x0f40] : TibetanOther) ((c) >= 0x0f40 && (c) < 0x0fc0 ? (TibetanForm)tibetanForm[(c) - 0x0f40] : TibetanOther)
static int tibetan_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid) static qsizetype tibetan_nextSyllableBoundary(const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
{ {
const ushort *uc = s + start; const char16_t *uc = s + start;
int pos = 0; qsizetype pos = 0;
TibetanForm state = tibetan_form(*uc); TibetanForm state = tibetan_form(*uc);
/* qDebug("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);*/ /* qDebug("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);*/
@ -1549,16 +1549,16 @@ finish:
return start+pos; return start+pos;
} }
static void tibetanAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes) static void tibetanAttributes(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
{ {
int end = from + len; qsizetype end = from + len;
const ushort *uc = text + from; const char16_t *uc = text + from;
uint i = 0; qsizetype i = 0;
Q_UNUSED(script); Q_UNUSED(script);
attributes += from; attributes += from;
while (i < len) { while (i < len) {
bool invalid; bool invalid;
uint boundary = tibetan_nextSyllableBoundary(text, from+i, end, &invalid) - from; qsizetype boundary = tibetan_nextSyllableBoundary(text, from+i, end, &invalid) - from;
attributes[i].graphemeBoundary = true; attributes[i].graphemeBoundary = true;
@ -1736,11 +1736,11 @@ static const signed char mymrStateTable[][Mymr_CC_COUNT] =
// calculate, using the state table, which one is the last character of the syllable // calculate, using the state table, which one is the last character of the syllable
// that starts in the starting position. // that starts in the starting position.
*/ */
static int myanmar_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid) static qsizetype myanmar_nextSyllableBoundary(const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
{ {
const ushort *uc = s + start; const char16_t *uc = s + start;
int state = 0; int state = 0;
int pos = start; qsizetype pos = start;
*invalid = false; *invalid = false;
while (pos < end) { while (pos < end) {
@ -1749,7 +1749,7 @@ static int myanmar_nextSyllableBoundary(const ushort *s, int start, int end, boo
if (pos == start) if (pos == start)
*invalid = (bool)(charClass & Mymr_CF_DOTTED_CIRCLE); *invalid = (bool)(charClass & Mymr_CF_DOTTED_CIRCLE);
MMDEBUG("state[%d]=%d class=%8x (uc=%4x)", pos - start, state, charClass, *uc); MMDEBUG("state[%d]=%d class=%8x (uc=%4x)", int(pos - start), state, charClass, *uc);
if (state < 0) { if (state < 0) {
if (state < -1) if (state < -1)
@ -1762,16 +1762,16 @@ static int myanmar_nextSyllableBoundary(const ushort *s, int start, int end, boo
return pos; return pos;
} }
static void myanmarAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes) static void myanmarAttributes(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
{ {
int end = from + len; qsizetype end = from + len;
const ushort *uc = text + from; const char16_t *uc = text + from;
uint i = 0; qsizetype i = 0;
Q_UNUSED(script); Q_UNUSED(script);
attributes += from; attributes += from;
while (i < len) { while (i < len) {
bool invalid; bool invalid;
uint boundary = myanmar_nextSyllableBoundary(text, from+i, end, &invalid) - from; qsizetype boundary = myanmar_nextSyllableBoundary(text, from+i, end, &invalid) - from;
attributes[i].graphemeBoundary = true; attributes[i].graphemeBoundary = true;
attributes[i].lineBreak = true; attributes[i].lineBreak = true;
@ -2071,11 +2071,11 @@ static const signed char khmerStateTable[][CC_COUNT] =
// calculate, using the state table, which one is the last character of the syllable // calculate, using the state table, which one is the last character of the syllable
// that starts in the starting position. // that starts in the starting position.
*/ */
static int khmer_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid) static qsizetype khmer_nextSyllableBoundary(const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
{ {
const ushort *uc = s + start; const char16_t *uc = s + start;
int state = 0; int state = 0;
int pos = start; qsizetype pos = start;
*invalid = false; *invalid = false;
while (pos < end) { while (pos < end) {
@ -2085,7 +2085,7 @@ static int khmer_nextSyllableBoundary(const ushort *s, int start, int end, bool
} }
state = khmerStateTable[state][charClass & CF_CLASS_MASK]; state = khmerStateTable[state][charClass & CF_CLASS_MASK];
KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state, KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", int(pos - start), state,
charClass, *uc ); charClass, *uc );
if (state < 0) { if (state < 0) {
@ -2097,16 +2097,16 @@ static int khmer_nextSyllableBoundary(const ushort *s, int start, int end, bool
return pos; return pos;
} }
static void khmerAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes) static void khmerAttributes(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
{ {
int end = from + len; qsizetype end = from + len;
const ushort *uc = text + from; const char16_t *uc = text + from;
uint i = 0; qsizetype i = 0;
Q_UNUSED(script); Q_UNUSED(script);
attributes += from; attributes += from;
while ( i < len ) { while ( i < len ) {
bool invalid; bool invalid;
uint boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from; qsizetype boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
attributes[i].graphemeBoundary = true; attributes[i].graphemeBoundary = true;
@ -2191,52 +2191,52 @@ const CharAttributeFunction charAttributeFunction[] = {
khmerAttributes khmerAttributes
}; };
static void getCharAttributes(const ushort *string, uint stringLength, static void getCharAttributes(const char16_t *string, qsizetype stringLength,
const QUnicodeTools::ScriptItem *items, uint numItems, const QUnicodeTools::ScriptItem *items, qsizetype numItems,
QCharAttributes *attributes) QCharAttributes *attributes)
{ {
if (stringLength == 0) if (stringLength == 0)
return; return;
for (uint i = 0; i < numItems; ++i) { for (qsizetype i = 0; i < numItems; ++i) {
QChar::Script script = items[i].script; QChar::Script script = items[i].script;
if (script > QChar::Script_Khmer) if (script > QChar::Script_Khmer)
script = QChar::Script_Common; script = QChar::Script_Common;
CharAttributeFunction attributeFunction = charAttributeFunction[script]; CharAttributeFunction attributeFunction = charAttributeFunction[script];
if (!attributeFunction) if (!attributeFunction)
continue; continue;
int end = i < numItems - 1 ? items[i + 1].position : stringLength; qsizetype end = i < numItems - 1 ? items[i + 1].position : stringLength;
attributeFunction(script, string, items[i].position, end - items[i].position, attributes); attributeFunction(script, string, items[i].position, end - items[i].position, attributes);
} }
} }
} }
Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length, Q_CORE_EXPORT void initCharAttributes(QStringView string,
const ScriptItem *items, int numItems, const ScriptItem *items, qsizetype numItems,
QCharAttributes *attributes, CharAttributeOptions options) QCharAttributes *attributes, CharAttributeOptions options)
{ {
if (length <= 0) if (string.size() <= 0)
return; return;
if (!(options & DontClearAttributes)) if (!(options & DontClearAttributes))
::memset(attributes, 0, (length + 1) * sizeof(QCharAttributes)); ::memset(attributes, 0, (string.size() + 1) * sizeof(QCharAttributes));
if (options & GraphemeBreaks) if (options & GraphemeBreaks)
getGraphemeBreaks(string, length, attributes); getGraphemeBreaks(string.utf16(), string.size(), attributes);
if (options & WordBreaks) if (options & WordBreaks)
getWordBreaks(string, length, attributes); getWordBreaks(string.utf16(), string.size(), attributes);
if (options & SentenceBreaks) if (options & SentenceBreaks)
getSentenceBreaks(string, length, attributes); getSentenceBreaks(string.utf16(), string.size(), attributes);
if (options & LineBreaks) if (options & LineBreaks)
getLineBreaks(string, length, attributes, options); getLineBreaks(string.utf16(), string.size(), attributes, options);
if (options & WhiteSpaces) if (options & WhiteSpaces)
getWhiteSpaces(string, length, attributes); getWhiteSpaces(string.utf16(), string.size(), attributes);
if (!qt_initcharattributes_default_algorithm_only) { if (!qt_initcharattributes_default_algorithm_only) {
if (!items || numItems <= 0) if (!items || numItems <= 0)
return; return;
Tailored::getCharAttributes(string, length, items, numItems, attributes); Tailored::getCharAttributes(string.utf16(), string.size(), items, numItems, attributes);
} }
} }
@ -2247,16 +2247,16 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
// //
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
Q_CORE_EXPORT void initScripts(const ushort *string, int length, ScriptItemArray *scripts) Q_CORE_EXPORT void initScripts(QStringView string, ScriptItemArray *scripts)
{ {
int sor = 0; qsizetype sor = 0;
int eor = 0; qsizetype eor = 0;
QChar::Script script = QChar::Script_Common; QChar::Script script = QChar::Script_Common;
for (int i = 0; i < length; ++i, eor = i) { for (qsizetype i = 0; i < string.size(); ++i, eor = i) {
char32_t ucs4 = string[i]; char32_t ucs4 = string[i].unicode();
if (QChar::isHighSurrogate(ucs4) && i + 1 < length) { if (QChar::isHighSurrogate(ucs4) && i + 1 < string.size()) {
ushort low = string[i + 1]; ushort low = string[i + 1].unicode();
if (QChar::isLowSurrogate(low)) { if (QChar::isLowSurrogate(low)) {
ucs4 = QChar::surrogateToUcs4(ucs4, low); ucs4 = QChar::surrogateToUcs4(ucs4, low);
++i; ++i;
@ -2294,7 +2294,7 @@ Q_CORE_EXPORT void initScripts(const ushort *string, int length, ScriptItemArray
} }
Q_ASSERT(script >= QChar::Script_Common); Q_ASSERT(script >= QChar::Script_Common);
Q_ASSERT(eor == length); Q_ASSERT(eor == string.size());
scripts->append(ScriptItem{sor, script}); scripts->append(ScriptItem{sor, script});
} }

View File

@ -74,7 +74,7 @@ namespace QUnicodeTools {
struct ScriptItem struct ScriptItem
{ {
int position; qsizetype position;
QChar::Script script; QChar::Script script;
}; };
@ -97,12 +97,12 @@ enum CharAttributeOption {
Q_DECLARE_FLAGS(CharAttributeOptions, CharAttributeOption) Q_DECLARE_FLAGS(CharAttributeOptions, CharAttributeOption)
// attributes buffer has to have a length of string length + 1 // attributes buffer has to have a length of string length + 1
Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length, Q_CORE_EXPORT void initCharAttributes(QStringView str,
const ScriptItem *items, int numItems, const ScriptItem *items, qsizetype numItems,
QCharAttributes *attributes, CharAttributeOptions options); QCharAttributes *attributes, CharAttributeOptions options);
Q_CORE_EXPORT void initScripts(const ushort *string, int length, ScriptItemArray *scripts); Q_CORE_EXPORT void initScripts(QStringView str, ScriptItemArray *scripts);
} // namespace QUnicodeTools } // namespace QUnicodeTools

View File

@ -1808,8 +1808,7 @@ const QCharAttributes *QTextEngine::attributes() const
} }
QUnicodeTools::initCharAttributes( QUnicodeTools::initCharAttributes(
reinterpret_cast<const ushort *>(layoutData->string.constData()), layoutData->string,
layoutData->string.length(),
scriptItems.data(), scriptItems.size(), scriptItems.data(), scriptItems.size(),
reinterpret_cast<QCharAttributes *>(layoutData->memory), reinterpret_cast<QCharAttributes *>(layoutData->memory),
QUnicodeTools::CharAttributeOptions(QUnicodeTools::GraphemeBreaks QUnicodeTools::CharAttributeOptions(QUnicodeTools::GraphemeBreaks
@ -1921,7 +1920,7 @@ void QTextEngine::itemize() const
{ {
QUnicodeTools::ScriptItemArray scriptItems; QUnicodeTools::ScriptItemArray scriptItems;
QUnicodeTools::initScripts(string, length, &scriptItems); QUnicodeTools::initScripts(layoutData->string, &scriptItems);
for (int i = 0; i < scriptItems.length(); ++i) { for (int i = 0; i < scriptItems.length(); ++i) {
const auto &item = scriptItems.at(i); const auto &item = scriptItems.at(i);
int end = i < scriptItems.length() - 1 ? scriptItems.at(i + 1).position : length; int end = i < scriptItems.length() - 1 ? scriptItems.at(i + 1).position : length;

View File

@ -664,8 +664,6 @@ void tst_QTextBoundaryFinder::emptyText_data()
QTest::newRow("empty3") << finder; QTest::newRow("empty3") << finder;
QTest::newRow("empty4") << QTextBoundaryFinder(QTextBoundaryFinder::Word, notEmpty.constData(), 0, 0, 0); QTest::newRow("empty4") << QTextBoundaryFinder(QTextBoundaryFinder::Word, notEmpty.constData(), 0, 0, 0);
QTest::newRow("empty5") << QTextBoundaryFinder(QTextBoundaryFinder::Word, notEmpty.constData(), 0, attrs, 11); QTest::newRow("empty5") << QTextBoundaryFinder(QTextBoundaryFinder::Word, notEmpty.constData(), 0, attrs, 11);
QTest::newRow("invalid1") << QTextBoundaryFinder(QTextBoundaryFinder::Word, 0, 10, 0, 0);
QTest::newRow("invalid2") << QTextBoundaryFinder(QTextBoundaryFinder::Word, 0, 10, attrs, 11);
} }
void tst_QTextBoundaryFinder::emptyText() void tst_QTextBoundaryFinder::emptyText()