Clean up QTextBoundaryFinder and qunicodetools

Make QTBF ready for Qt6 by using qsizetype in the API and use
QStringView where it makes sense.

Change the exported API of qunicodetools to use QStringView as
well and use char16_t internally.

Change-Id: I853537bcabf40546a8e60fdf2ee7d751bc371761
Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
This commit is contained in:
Lars Knoll 2020-08-27 09:52:26 +02:00
parent fae4f80ecc
commit 0ae5b8af9c
6 changed files with 187 additions and 200 deletions

View File

@ -43,18 +43,10 @@
QT_BEGIN_NAMESPACE
class QTextBoundaryFinderPrivate
static void init(QTextBoundaryFinder::BoundaryType type, QStringView str, QCharAttributes *attributes)
{
public:
QCharAttributes attributes[1];
};
static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int length, QCharAttributes *attributes)
{
const ushort *string = reinterpret_cast<const ushort *>(chars);
QUnicodeTools::ScriptItemArray scriptItems;
QUnicodeTools::initScripts(string, length, &scriptItems);
QUnicodeTools::initScripts(str, &scriptItems);
QUnicodeTools::CharAttributeOptions options;
switch (type) {
@ -64,7 +56,7 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int
case QTextBoundaryFinder::Line: options |= QUnicodeTools::LineBreaks; break;
default: break;
}
QUnicodeTools::initCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes, options);
QUnicodeTools::initCharAttributes(str, scriptItems.data(), scriptItems.count(), attributes, options);
}
/*!
@ -145,11 +137,7 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int
Constructs an invalid QTextBoundaryFinder object.
*/
QTextBoundaryFinder::QTextBoundaryFinder()
: t(Grapheme)
, chars(nullptr)
, length(0)
, freePrivate(true)
, d(nullptr)
: freeBuffer(true)
{
}
@ -159,17 +147,15 @@ QTextBoundaryFinder::QTextBoundaryFinder()
QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
: t(other.t)
, s(other.s)
, chars(other.chars)
, length(other.length)
, sv(other.sv)
, pos(other.pos)
, freePrivate(true)
, d(nullptr)
, freeBuffer(true)
{
if (other.d) {
Q_ASSERT(length > 0);
d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(d);
memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes));
if (other.attributes) {
Q_ASSERT(sv.size() > 0);
attributes = (QCharAttributes *) malloc((sv.size() + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(attributes);
memcpy(attributes, other.attributes, (sv.size() + 1) * sizeof(QCharAttributes));
}
}
@ -181,27 +167,26 @@ QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &o
if (&other == this)
return *this;
if (other.d) {
Q_ASSERT(other.length > 0);
uint newCapacity = (other.length + 1) * sizeof(QCharAttributes);
QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *) realloc(freePrivate ? d : nullptr, newCapacity);
if (other.attributes) {
Q_ASSERT(other.sv.size() > 0);
size_t newCapacity = (size_t(other.sv.size()) + 1) * sizeof(QCharAttributes);
QCharAttributes *newD = (QCharAttributes *) realloc(freeBuffer ? attributes : nullptr, newCapacity);
Q_CHECK_PTR(newD);
freePrivate = true;
d = newD;
freeBuffer = true;
attributes = newD;
}
t = other.t;
s = other.s;
chars = other.chars;
length = other.length;
sv = other.sv;
pos = other.pos;
if (other.d) {
memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes));
if (other.attributes) {
memcpy(attributes, other.attributes, (sv.size() + 1) * sizeof(QCharAttributes));
} else {
if (freePrivate)
free(d);
d = nullptr;
if (freeBuffer)
free(attributes);
attributes = nullptr;
}
return *this;
@ -213,8 +198,8 @@ QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &o
QTextBoundaryFinder::~QTextBoundaryFinder()
{
Q_UNUSED(unused);
if (freePrivate)
free(d);
if (freeBuffer)
free(attributes);
}
/*!
@ -223,22 +208,28 @@ QTextBoundaryFinder::~QTextBoundaryFinder()
QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string)
: t(type)
, s(string)
, chars(string.unicode())
, length(string.length())
, sv(s)
, pos(0)
, freePrivate(true)
, d(nullptr)
, freeBuffer(true)
, attributes(nullptr)
{
if (length > 0) {
d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(d);
init(t, chars, length, d->attributes);
if (sv.size() > 0) {
attributes = (QCharAttributes *) malloc((sv.size() + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(attributes);
init(t, sv, attributes);
}
}
/*!
Creates a QTextBoundaryFinder object of \a type operating on \a chars
with \a length.
\fn QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, qsizetype length, unsigned char *buffer, qsizetype bufferSize)
\overload
The same as QTextBoundaryFinder(type, QStringView(chars, length), buffer, bufferSize).
*/
/*!
Creates a QTextBoundaryFinder object of \a type operating on \a string.
\since 6.0
\a buffer is an optional working buffer of size \a bufferSize you can pass to
the QTextBoundaryFinder. If the buffer is large enough to hold the working
@ -250,25 +241,22 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &strin
as long as the QTextBoundaryFinder object stays alive. The same applies to
\a buffer.
*/
QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer, int bufferSize)
QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, QStringView string, unsigned char *buffer, qsizetype bufferSize)
: t(type)
, chars(chars)
, length(length)
, sv(string)
, pos(0)
, freePrivate(true)
, d(nullptr)
, freeBuffer(true)
, attributes(nullptr)
{
if (!chars) {
length = 0;
} else if (length > 0) {
if (buffer && (uint)bufferSize >= (length + 1) * sizeof(QCharAttributes)) {
d = (QTextBoundaryFinderPrivate *)buffer;
freePrivate = false;
if (!sv.isEmpty()) {
if (buffer && (uint)bufferSize >= (sv.size() + 1) * sizeof(QCharAttributes)) {
attributes = reinterpret_cast<QCharAttributes *>(buffer);
freeBuffer = false;
} else {
d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(d);
attributes = (QCharAttributes *) malloc((sv.size() + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(attributes);
}
init(t, chars, length, d->attributes);
init(t, sv, attributes);
}
}
@ -289,7 +277,7 @@ void QTextBoundaryFinder::toStart()
*/
void QTextBoundaryFinder::toEnd()
{
pos = length;
pos = sv.size();
}
/*!
@ -300,7 +288,7 @@ void QTextBoundaryFinder::toEnd()
\sa setPosition()
*/
int QTextBoundaryFinder::position() const
qsizetype QTextBoundaryFinder::position() const
{
return pos;
}
@ -314,9 +302,9 @@ int QTextBoundaryFinder::position() const
\sa position()
*/
void QTextBoundaryFinder::setPosition(int position)
void QTextBoundaryFinder::setPosition(qsizetype position)
{
pos = qBound(0, position, length);
pos = qBound(0, position, sv.size());
}
/*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const
@ -335,9 +323,9 @@ void QTextBoundaryFinder::setPosition(int position)
*/
QString QTextBoundaryFinder::string() const
{
if (chars == s.unicode() && length == s.length())
if (sv.data() == s.unicode() && sv.size() == s.size())
return s;
return QString(chars, length);
return sv.toString();
}
@ -346,9 +334,9 @@ QString QTextBoundaryFinder::string() const
Returns -1 if there is no next boundary.
*/
int QTextBoundaryFinder::toNextBoundary()
qsizetype QTextBoundaryFinder::toNextBoundary()
{
if (!d || pos < 0 || pos >= length) {
if (!attributes || pos < 0 || pos >= sv.size()) {
pos = -1;
return pos;
}
@ -356,19 +344,19 @@ int QTextBoundaryFinder::toNextBoundary()
++pos;
switch(t) {
case Grapheme:
while (pos < length && !d->attributes[pos].graphemeBoundary)
while (pos < sv.size() && !attributes[pos].graphemeBoundary)
++pos;
break;
case Word:
while (pos < length && !d->attributes[pos].wordBreak)
while (pos < sv.size() && !attributes[pos].wordBreak)
++pos;
break;
case Sentence:
while (pos < length && !d->attributes[pos].sentenceBoundary)
while (pos < sv.size() && !attributes[pos].sentenceBoundary)
++pos;
break;
case Line:
while (pos < length && !d->attributes[pos].lineBreak)
while (pos < sv.size() && !attributes[pos].lineBreak)
++pos;
break;
}
@ -381,9 +369,9 @@ int QTextBoundaryFinder::toNextBoundary()
Returns -1 if there is no previous boundary.
*/
int QTextBoundaryFinder::toPreviousBoundary()
qsizetype QTextBoundaryFinder::toPreviousBoundary()
{
if (!d || pos <= 0 || pos > length) {
if (!attributes || pos <= 0 || pos > sv.size()) {
pos = -1;
return pos;
}
@ -391,19 +379,19 @@ int QTextBoundaryFinder::toPreviousBoundary()
--pos;
switch(t) {
case Grapheme:
while (pos > 0 && !d->attributes[pos].graphemeBoundary)
while (pos > 0 && !attributes[pos].graphemeBoundary)
--pos;
break;
case Word:
while (pos > 0 && !d->attributes[pos].wordBreak)
while (pos > 0 && !attributes[pos].wordBreak)
--pos;
break;
case Sentence:
while (pos > 0 && !d->attributes[pos].sentenceBoundary)
while (pos > 0 && !attributes[pos].sentenceBoundary)
--pos;
break;
case Line:
while (pos > 0 && !d->attributes[pos].lineBreak)
while (pos > 0 && !attributes[pos].lineBreak)
--pos;
break;
}
@ -416,19 +404,19 @@ int QTextBoundaryFinder::toPreviousBoundary()
*/
bool QTextBoundaryFinder::isAtBoundary() const
{
if (!d || pos < 0 || pos > length)
if (!attributes || pos < 0 || pos > sv.size())
return false;
switch(t) {
case Grapheme:
return d->attributes[pos].graphemeBoundary;
return attributes[pos].graphemeBoundary;
case Word:
return d->attributes[pos].wordBreak;
return attributes[pos].wordBreak;
case Sentence:
return d->attributes[pos].sentenceBoundary;
return attributes[pos].sentenceBoundary;
case Line:
// ### TR#14 LB2 prohibits break at sot
return d->attributes[pos].lineBreak || pos == 0;
return attributes[pos].lineBreak || pos == 0;
}
return false;
}
@ -439,17 +427,17 @@ bool QTextBoundaryFinder::isAtBoundary() const
QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
{
BoundaryReasons reasons = NotAtBoundary;
if (!d || pos < 0 || pos > length)
if (!attributes || pos < 0 || pos > sv.size())
return reasons;
const QCharAttributes attr = d->attributes[pos];
const QCharAttributes attr = attributes[pos];
switch (t) {
case Grapheme:
if (attr.graphemeBoundary) {
reasons |= BreakOpportunity | StartOfItem | EndOfItem;
if (pos == 0)
reasons &= (~EndOfItem);
else if (pos == length)
else if (pos == sv.size())
reasons &= (~StartOfItem);
}
break;
@ -467,7 +455,7 @@ QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() cons
reasons |= BreakOpportunity | StartOfItem | EndOfItem;
if (pos == 0)
reasons &= (~EndOfItem);
else if (pos == length)
else if (pos == sv.size())
reasons &= (~StartOfItem);
}
break;
@ -479,9 +467,9 @@ QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() cons
reasons |= MandatoryBreak | StartOfItem | EndOfItem;
if (pos == 0)
reasons &= (~EndOfItem);
else if (pos == length)
else if (pos == sv.size())
reasons &= (~StartOfItem);
} else if (pos > 0 && chars[pos - 1].unicode() == QChar::SoftHyphen) {
} else if (pos > 0 && sv[pos - 1].unicode() == QChar::SoftHyphen) {
reasons |= SoftHyphen;
}
}

View File

@ -46,7 +46,7 @@
QT_BEGIN_NAMESPACE
class QTextBoundaryFinderPrivate;
struct QCharAttributes;
class Q_CORE_EXPORT QTextBoundaryFinder
{
@ -74,33 +74,35 @@ public:
Q_DECLARE_FLAGS( BoundaryReasons, BoundaryReason )
QTextBoundaryFinder(BoundaryType type, const QString &string);
QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer = nullptr, int bufferSize = 0);
QTextBoundaryFinder(BoundaryType type, const QChar *chars, qsizetype length, unsigned char *buffer = nullptr, qsizetype bufferSize = 0)
: QTextBoundaryFinder(type, QStringView(chars, length), buffer, bufferSize)
{}
QTextBoundaryFinder(BoundaryType type, QStringView str, unsigned char *buffer = nullptr, qsizetype bufferSize = 0);
inline bool isValid() const { return d; }
inline bool isValid() const { return attributes; }
inline BoundaryType type() const { return t; }
QString string() const;
void toStart();
void toEnd();
int position() const;
void setPosition(int position);
qsizetype position() const;
void setPosition(qsizetype position);
int toNextBoundary();
int toPreviousBoundary();
qsizetype toNextBoundary();
qsizetype toPreviousBoundary();
bool isAtBoundary() const;
BoundaryReasons boundaryReasons() const;
private:
BoundaryType t;
BoundaryType t = Grapheme;
QString s;
const QChar *chars;
int length;
int pos;
uint freePrivate : 1;
QStringView sv;
qsizetype pos;
uint freeBuffer : 1;
uint unused : 31;
QTextBoundaryFinderPrivate *d;
QCharAttributes *attributes = nullptr;
};
Q_DECLARE_OPERATORS_FOR_FLAGS(QTextBoundaryFinder::BoundaryReasons)

View File

@ -97,12 +97,12 @@ static const State breakTable[QUnicodeTables::NumGraphemeBreakClasses][QUnicodeT
} // namespace GB
static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes *attributes)
static void getGraphemeBreaks(const char16_t *string, qsizetype len, QCharAttributes *attributes)
{
QUnicodeTables::GraphemeBreakClass lcls = QUnicodeTables::GraphemeBreak_LF; // to meet GB1
GB::State state = GB::Break; // only required to track some of the rules
for (quint32 i = 0; i != len; ++i) {
quint32 pos = i;
for (qsizetype i = 0; i != len; ++i) {
qsizetype pos = i;
char32_t ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
ushort low = string[i + 1];
@ -191,15 +191,15 @@ static const uchar breakTable[QUnicodeTables::NumWordBreakClasses][QUnicodeTable
} // namespace WB
static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *attributes)
static void getWordBreaks(const char16_t *string, qsizetype len, QCharAttributes *attributes)
{
enum WordType {
WordTypeNone, WordTypeAlphaNumeric, WordTypeHiraganaKatakana
} currentWordType = WordTypeNone;
QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1
for (quint32 i = 0; i != len; ++i) {
quint32 pos = i;
for (qsizetype i = 0; i != len; ++i) {
qsizetype pos = i;
char32_t ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
ushort low = string[i + 1];
@ -241,7 +241,7 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
break;
case WB::Lookup:
case WB::LookupW:
for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) {
for (qsizetype lookahead = i + 1; lookahead < len; ++lookahead) {
ucs4 = string[lookahead];
if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
ushort low = string[lookahead + 1];
@ -343,11 +343,11 @@ static const uchar breakTable[BAfter + 1][QUnicodeTables::NumSentenceBreakClasse
} // namespace SB
static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes *attributes)
static void getSentenceBreaks(const char16_t *string, qsizetype len, QCharAttributes *attributes)
{
uchar state = SB::BAfter; // to meet SB1
for (quint32 i = 0; i != len; ++i) {
quint32 pos = i;
for (qsizetype i = 0; i != len; ++i) {
qsizetype pos = i;
char32_t ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
ushort low = string[i + 1];
@ -364,7 +364,7 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes
state = SB::breakTable[state][ncls];
if (Q_UNLIKELY(state == SB::Lookup)) { // SB8
state = SB::Break;
for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) {
for (qsizetype lookahead = i + 1; lookahead < len; ++lookahead) {
ucs4 = string[lookahead];
if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
ushort low = string[lookahead + 1];
@ -542,15 +542,15 @@ static const uchar breakTable[QUnicodeTables::LineBreak_SA][QUnicodeTables::Line
} // namespace LB
static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *attributes, QUnicodeTools::CharAttributeOptions options)
static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes *attributes, QUnicodeTools::CharAttributeOptions options)
{
quint32 nestart = 0;
qsizetype nestart = 0;
LB::NS::Class nelast = LB::NS::XX;
QUnicodeTables::LineBreakClass lcls = QUnicodeTables::LineBreak_LF; // to meet LB10
QUnicodeTables::LineBreakClass cls = lcls;
for (quint32 i = 0; i != len; ++i) {
quint32 pos = i;
for (qsizetype i = 0; i != len; ++i) {
qsizetype pos = i;
char32_t ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
ushort low = string[i + 1];
@ -632,7 +632,7 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at
switch (LB::NS::actionTable[nelast][necur]) {
case LB::NS::Break:
// do not change breaks before and after the expression
for (quint32 j = nestart + 1; j < pos; ++j)
for (qsizetype j = nestart + 1; j < pos; ++j)
attributes[j].lineBreak = false;
Q_FALLTHROUGH();
case LB::NS::None:
@ -697,7 +697,7 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at
if (Q_UNLIKELY(LB::NS::actionTable[nelast][LB::NS::XX] == LB::NS::Break)) {
// LB25: do not break lines inside numbers
for (quint32 j = nestart + 1; j < len; ++j)
for (qsizetype j = nestart + 1; j < len; ++j)
attributes[j].lineBreak = false;
}
@ -706,9 +706,9 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at
}
static void getWhiteSpaces(const ushort *string, quint32 len, QCharAttributes *attributes)
static void getWhiteSpaces(const char16_t *string, qsizetype len, QCharAttributes *attributes)
{
for (quint32 i = 0; i != len; ++i) {
for (qsizetype i = 0; i != len; ++i) {
uint ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
ushort low = string[i + 1];
@ -725,7 +725,7 @@ static void getWhiteSpaces(const ushort *string, quint32 len, QCharAttributes *a
namespace Tailored {
using CharAttributeFunction = void (*)(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes);
using CharAttributeFunction = void (*)(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes);
enum Form {
@ -1181,15 +1181,15 @@ static inline Form form(unsigned short uc) {
We return syllable boundaries on invalid combinations aswell
*/
static int indic_nextSyllableBoundary(QChar::Script script, const ushort *s, int start, int end, bool *invalid)
static qsizetype indic_nextSyllableBoundary(QChar::Script script, const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
{
*invalid = false;
IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end);
const ushort *uc = s+start;
IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", int(start), int(end));
const char16_t *uc = s+start;
int pos = 0;
qsizetype pos = 0;
Form state = form(uc[pos]);
IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);
IDEBUG("state[%d]=%d (uc=%4x)", int(pos), state, uc[pos]);
pos++;
if (state != Consonant && state != IndependentVowel) {
@ -1200,7 +1200,7 @@ static int indic_nextSyllableBoundary(QChar::Script script, const ushort *s, int
while (pos < end - start) {
Form newState = form(uc[pos]);
IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]);
IDEBUG("state[%d]=%d (uc=%4x)", int(pos), newState, uc[pos]);
switch (newState) {
case Control:
newState = state;
@ -1285,15 +1285,15 @@ static int indic_nextSyllableBoundary(QChar::Script script, const ushort *s, int
return pos+start;
}
static void indicAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
static void indicAttributes(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
{
int end = from + len;
const ushort *uc = text + from;
qsizetype end = from + len;
const char16_t *uc = text + from;
attributes += from;
uint i = 0;
qsizetype i = 0;
while (i < len) {
bool invalid;
uint boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
qsizetype boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
attributes[i].graphemeBoundary = true;
if (boundary > len-1) boundary = len;
@ -1339,9 +1339,9 @@ static int init_libthai() {
return 0;
}
static void to_tis620(const ushort *string, uint len, char *cstr)
static void to_tis620(const char16_t *string, qsizetype len, char *cstr)
{
uint i;
qsizetype i;
unsigned char *result = (unsigned char *)cstr;
for (i = 0; i < len; ++i) {
@ -1359,14 +1359,14 @@ static void to_tis620(const ushort *string, uint len, char *cstr)
/*
* Thai Attributes: computes Word Break, Word Boundary and Char stop for THAI.
*/
static void thaiAssignAttributes(const ushort *string, uint len, QCharAttributes *attributes)
static void thaiAssignAttributes(const char16_t *string, qsizetype len, QCharAttributes *attributes)
{
char s[128];
char *cstr = s;
int *break_positions = nullptr;
int brp[128];
int brp_size = 0;
uint numbreaks, i, j, cell_length;
qsizetype numbreaks, i, j, cell_length;
struct thcell_t tis_cell;
if (!init_libthai())
@ -1432,10 +1432,10 @@ static void thaiAssignAttributes(const ushort *string, uint len, QCharAttributes
free(cstr);
}
static void thaiAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
static void thaiAttributes(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
{
assert(script == QChar::Script_Thai);
const ushort *uc = text + from;
const char16_t *uc = text + from;
attributes += from;
Q_UNUSED(script);
thaiAssignAttributes(uc, len, attributes);
@ -1505,11 +1505,11 @@ static const unsigned char tibetanForm[0x80] = {
#define tibetan_form(c) \
((c) >= 0x0f40 && (c) < 0x0fc0 ? (TibetanForm)tibetanForm[(c) - 0x0f40] : TibetanOther)
static int tibetan_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid)
static qsizetype tibetan_nextSyllableBoundary(const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
{
const ushort *uc = s + start;
const char16_t *uc = s + start;
int pos = 0;
qsizetype pos = 0;
TibetanForm state = tibetan_form(*uc);
/* qDebug("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);*/
@ -1549,16 +1549,16 @@ finish:
return start+pos;
}
static void tibetanAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
static void tibetanAttributes(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
{
int end = from + len;
const ushort *uc = text + from;
uint i = 0;
qsizetype end = from + len;
const char16_t *uc = text + from;
qsizetype i = 0;
Q_UNUSED(script);
attributes += from;
while (i < len) {
bool invalid;
uint boundary = tibetan_nextSyllableBoundary(text, from+i, end, &invalid) - from;
qsizetype boundary = tibetan_nextSyllableBoundary(text, from+i, end, &invalid) - from;
attributes[i].graphemeBoundary = true;
@ -1736,11 +1736,11 @@ static const signed char mymrStateTable[][Mymr_CC_COUNT] =
// calculate, using the state table, which one is the last character of the syllable
// that starts in the starting position.
*/
static int myanmar_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid)
static qsizetype myanmar_nextSyllableBoundary(const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
{
const ushort *uc = s + start;
const char16_t *uc = s + start;
int state = 0;
int pos = start;
qsizetype pos = start;
*invalid = false;
while (pos < end) {
@ -1749,7 +1749,7 @@ static int myanmar_nextSyllableBoundary(const ushort *s, int start, int end, boo
if (pos == start)
*invalid = (bool)(charClass & Mymr_CF_DOTTED_CIRCLE);
MMDEBUG("state[%d]=%d class=%8x (uc=%4x)", pos - start, state, charClass, *uc);
MMDEBUG("state[%d]=%d class=%8x (uc=%4x)", int(pos - start), state, charClass, *uc);
if (state < 0) {
if (state < -1)
@ -1762,16 +1762,16 @@ static int myanmar_nextSyllableBoundary(const ushort *s, int start, int end, boo
return pos;
}
static void myanmarAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
static void myanmarAttributes(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
{
int end = from + len;
const ushort *uc = text + from;
uint i = 0;
qsizetype end = from + len;
const char16_t *uc = text + from;
qsizetype i = 0;
Q_UNUSED(script);
attributes += from;
while (i < len) {
bool invalid;
uint boundary = myanmar_nextSyllableBoundary(text, from+i, end, &invalid) - from;
qsizetype boundary = myanmar_nextSyllableBoundary(text, from+i, end, &invalid) - from;
attributes[i].graphemeBoundary = true;
attributes[i].lineBreak = true;
@ -2071,11 +2071,11 @@ static const signed char khmerStateTable[][CC_COUNT] =
// calculate, using the state table, which one is the last character of the syllable
// that starts in the starting position.
*/
static int khmer_nextSyllableBoundary(const ushort *s, int start, int end, bool *invalid)
static qsizetype khmer_nextSyllableBoundary(const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
{
const ushort *uc = s + start;
const char16_t *uc = s + start;
int state = 0;
int pos = start;
qsizetype pos = start;
*invalid = false;
while (pos < end) {
@ -2085,7 +2085,7 @@ static int khmer_nextSyllableBoundary(const ushort *s, int start, int end, bool
}
state = khmerStateTable[state][charClass & CF_CLASS_MASK];
KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state,
KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", int(pos - start), state,
charClass, *uc );
if (state < 0) {
@ -2097,16 +2097,16 @@ static int khmer_nextSyllableBoundary(const ushort *s, int start, int end, bool
return pos;
}
static void khmerAttributes(QChar::Script script, const ushort *text, uint from, uint len, QCharAttributes *attributes)
static void khmerAttributes(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
{
int end = from + len;
const ushort *uc = text + from;
uint i = 0;
qsizetype end = from + len;
const char16_t *uc = text + from;
qsizetype i = 0;
Q_UNUSED(script);
attributes += from;
while ( i < len ) {
bool invalid;
uint boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
qsizetype boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
attributes[i].graphemeBoundary = true;
@ -2191,52 +2191,52 @@ const CharAttributeFunction charAttributeFunction[] = {
khmerAttributes
};
static void getCharAttributes(const ushort *string, uint stringLength,
const QUnicodeTools::ScriptItem *items, uint numItems,
static void getCharAttributes(const char16_t *string, qsizetype stringLength,
const QUnicodeTools::ScriptItem *items, qsizetype numItems,
QCharAttributes *attributes)
{
if (stringLength == 0)
return;
for (uint i = 0; i < numItems; ++i) {
for (qsizetype i = 0; i < numItems; ++i) {
QChar::Script script = items[i].script;
if (script > QChar::Script_Khmer)
script = QChar::Script_Common;
CharAttributeFunction attributeFunction = charAttributeFunction[script];
if (!attributeFunction)
continue;
int end = i < numItems - 1 ? items[i + 1].position : stringLength;
qsizetype end = i < numItems - 1 ? items[i + 1].position : stringLength;
attributeFunction(script, string, items[i].position, end - items[i].position, attributes);
}
}
}
Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
const ScriptItem *items, int numItems,
Q_CORE_EXPORT void initCharAttributes(QStringView string,
const ScriptItem *items, qsizetype numItems,
QCharAttributes *attributes, CharAttributeOptions options)
{
if (length <= 0)
if (string.size() <= 0)
return;
if (!(options & DontClearAttributes))
::memset(attributes, 0, (length + 1) * sizeof(QCharAttributes));
::memset(attributes, 0, (string.size() + 1) * sizeof(QCharAttributes));
if (options & GraphemeBreaks)
getGraphemeBreaks(string, length, attributes);
getGraphemeBreaks(string.utf16(), string.size(), attributes);
if (options & WordBreaks)
getWordBreaks(string, length, attributes);
getWordBreaks(string.utf16(), string.size(), attributes);
if (options & SentenceBreaks)
getSentenceBreaks(string, length, attributes);
getSentenceBreaks(string.utf16(), string.size(), attributes);
if (options & LineBreaks)
getLineBreaks(string, length, attributes, options);
getLineBreaks(string.utf16(), string.size(), attributes, options);
if (options & WhiteSpaces)
getWhiteSpaces(string, length, attributes);
getWhiteSpaces(string.utf16(), string.size(), attributes);
if (!qt_initcharattributes_default_algorithm_only) {
if (!items || numItems <= 0)
return;
Tailored::getCharAttributes(string, length, items, numItems, attributes);
Tailored::getCharAttributes(string.utf16(), string.size(), items, numItems, attributes);
}
}
@ -2247,16 +2247,16 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
//
// ----------------------------------------------------------------------------
Q_CORE_EXPORT void initScripts(const ushort *string, int length, ScriptItemArray *scripts)
Q_CORE_EXPORT void initScripts(QStringView string, ScriptItemArray *scripts)
{
int sor = 0;
int eor = 0;
qsizetype sor = 0;
qsizetype eor = 0;
QChar::Script script = QChar::Script_Common;
for (int i = 0; i < length; ++i, eor = i) {
char32_t ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 < length) {
ushort low = string[i + 1];
for (qsizetype i = 0; i < string.size(); ++i, eor = i) {
char32_t ucs4 = string[i].unicode();
if (QChar::isHighSurrogate(ucs4) && i + 1 < string.size()) {
ushort low = string[i + 1].unicode();
if (QChar::isLowSurrogate(low)) {
ucs4 = QChar::surrogateToUcs4(ucs4, low);
++i;
@ -2294,7 +2294,7 @@ Q_CORE_EXPORT void initScripts(const ushort *string, int length, ScriptItemArray
}
Q_ASSERT(script >= QChar::Script_Common);
Q_ASSERT(eor == length);
Q_ASSERT(eor == string.size());
scripts->append(ScriptItem{sor, script});
}

View File

@ -74,7 +74,7 @@ namespace QUnicodeTools {
struct ScriptItem
{
int position;
qsizetype position;
QChar::Script script;
};
@ -97,12 +97,12 @@ enum CharAttributeOption {
Q_DECLARE_FLAGS(CharAttributeOptions, CharAttributeOption)
// attributes buffer has to have a length of string length + 1
Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
const ScriptItem *items, int numItems,
Q_CORE_EXPORT void initCharAttributes(QStringView str,
const ScriptItem *items, qsizetype numItems,
QCharAttributes *attributes, CharAttributeOptions options);
Q_CORE_EXPORT void initScripts(const ushort *string, int length, ScriptItemArray *scripts);
Q_CORE_EXPORT void initScripts(QStringView str, ScriptItemArray *scripts);
} // namespace QUnicodeTools

View File

@ -1808,8 +1808,7 @@ const QCharAttributes *QTextEngine::attributes() const
}
QUnicodeTools::initCharAttributes(
reinterpret_cast<const ushort *>(layoutData->string.constData()),
layoutData->string.length(),
layoutData->string,
scriptItems.data(), scriptItems.size(),
reinterpret_cast<QCharAttributes *>(layoutData->memory),
QUnicodeTools::CharAttributeOptions(QUnicodeTools::GraphemeBreaks
@ -1921,7 +1920,7 @@ void QTextEngine::itemize() const
{
QUnicodeTools::ScriptItemArray scriptItems;
QUnicodeTools::initScripts(string, length, &scriptItems);
QUnicodeTools::initScripts(layoutData->string, &scriptItems);
for (int i = 0; i < scriptItems.length(); ++i) {
const auto &item = scriptItems.at(i);
int end = i < scriptItems.length() - 1 ? scriptItems.at(i + 1).position : length;

View File

@ -664,8 +664,6 @@ void tst_QTextBoundaryFinder::emptyText_data()
QTest::newRow("empty3") << finder;
QTest::newRow("empty4") << QTextBoundaryFinder(QTextBoundaryFinder::Word, notEmpty.constData(), 0, 0, 0);
QTest::newRow("empty5") << QTextBoundaryFinder(QTextBoundaryFinder::Word, notEmpty.constData(), 0, attrs, 11);
QTest::newRow("invalid1") << QTextBoundaryFinder(QTextBoundaryFinder::Word, 0, 10, 0, 0);
QTest::newRow("invalid2") << QTextBoundaryFinder(QTextBoundaryFinder::Word, 0, 10, attrs, 11);
}
void tst_QTextBoundaryFinder::emptyText()