QCharAttributes: add wordStart/wordEnd flags
A simple heuristic is used to detect the word beginning and ending by looking at the word break property value of surrounding characters. This behaves better than the white-spaces based implementation used before and makes it possible to tailor the default algorithm for complex scripts. BIG FAT WARNING: The QCharAttributes buffer now has to have a length of string length + 1 for the flags at end of text. Task-Id: QTBUG-6498 Change-Id: I5589b191ffde6a50d2af0c14a00430d3852c67b4 Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
This commit is contained in:
parent
aeb21c73c5
commit
a798b956b9
4
src/3rdparty/harfbuzz/src/harfbuzz-shaper.h
vendored
4
src/3rdparty/harfbuzz/src/harfbuzz-shaper.h
vendored
@ -133,7 +133,9 @@ typedef struct {
|
||||
hb_bitfield sentenceBoundary : 1;
|
||||
hb_bitfield lineBreak : 1;
|
||||
hb_bitfield whiteSpace : 1; /* A unicode whitespace character */
|
||||
hb_bitfield unused : 3;
|
||||
hb_bitfield wordStart : 1;
|
||||
hb_bitfield wordEnd : 1;
|
||||
hb_bitfield unused : 1;
|
||||
} HB_CharAttributes;
|
||||
|
||||
void HB_GetTailoredCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
|
10
src/3rdparty/harfbuzz/src/harfbuzz-thai.c
vendored
10
src/3rdparty/harfbuzz/src/harfbuzz-thai.c
vendored
@ -395,8 +395,10 @@ static void HB_ThaiAssignAttributes(const HB_UChar16 *string, hb_uint32 len, HB_
|
||||
to_tis620(string, len, cstr);
|
||||
|
||||
for (i = 0; i < len; ++i) {
|
||||
attributes[i].lineBreak = FALSE;
|
||||
attributes[i].wordBreak = FALSE;
|
||||
attributes[i].wordStart = FALSE;
|
||||
attributes[i].wordEnd = FALSE;
|
||||
attributes[i].lineBreak = FALSE;
|
||||
}
|
||||
|
||||
if (len > 128) {
|
||||
@ -411,11 +413,17 @@ static void HB_ThaiAssignAttributes(const HB_UChar16 *string, hb_uint32 len, HB_
|
||||
|
||||
if (break_positions) {
|
||||
attributes[0].wordBreak = TRUE;
|
||||
attributes[0].wordStart = TRUE;
|
||||
attributes[0].wordEnd = FALSE;
|
||||
numbreaks = th_brk((const unsigned char *)cstr, break_positions, brp_size);
|
||||
for (i = 0; i < numbreaks; ++i) {
|
||||
attributes[break_positions[i]].wordBreak = TRUE;
|
||||
attributes[break_positions[i]].wordStart = TRUE;
|
||||
attributes[break_positions[i]].wordEnd = TRUE;
|
||||
attributes[break_positions[i]].lineBreak = TRUE;
|
||||
}
|
||||
if (numbreaks > 0)
|
||||
attributes[break_positions[numbreaks - 1]].wordStart = FALSE;
|
||||
|
||||
if (break_positions != brp)
|
||||
free(break_positions);
|
||||
|
@ -89,7 +89,7 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int
|
||||
scriptItems.append(item);
|
||||
}
|
||||
|
||||
QUnicodeTools::CharAttributeOptions options = QUnicodeTools::WhiteSpaces;
|
||||
QUnicodeTools::CharAttributeOptions options = 0;
|
||||
switch (type) {
|
||||
case QTextBoundaryFinder::Grapheme: options |= QUnicodeTools::GraphemeBreaks; break;
|
||||
case QTextBoundaryFinder::Word: options |= QUnicodeTools::WordBreaks; break;
|
||||
@ -189,9 +189,9 @@ QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
|
||||
, pos(other.pos)
|
||||
, freePrivate(true)
|
||||
{
|
||||
d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(QCharAttributes));
|
||||
d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes));
|
||||
Q_CHECK_PTR(d);
|
||||
memcpy(d, other.d, length*sizeof(QCharAttributes));
|
||||
memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes));
|
||||
}
|
||||
|
||||
/*!
|
||||
@ -209,11 +209,11 @@ QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &o
|
||||
pos = other.pos;
|
||||
|
||||
QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *)
|
||||
realloc(freePrivate ? d : 0, length*sizeof(QCharAttributes));
|
||||
realloc(freePrivate ? d : 0, (length + 1) * sizeof(QCharAttributes));
|
||||
Q_CHECK_PTR(newD);
|
||||
freePrivate = true;
|
||||
d = newD;
|
||||
memcpy(d, other.d, length*sizeof(QCharAttributes));
|
||||
memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes));
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -238,7 +238,7 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &strin
|
||||
, pos(0)
|
||||
, freePrivate(true)
|
||||
{
|
||||
d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(QCharAttributes));
|
||||
d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes));
|
||||
Q_CHECK_PTR(d);
|
||||
init(t, chars, length, d->attributes);
|
||||
}
|
||||
@ -249,7 +249,8 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &strin
|
||||
|
||||
\a buffer is an optional working buffer of size \a bufferSize you can pass to
|
||||
the QTextBoundaryFinder. If the buffer is large enough to hold the working
|
||||
data required, it will use this instead of allocating its own buffer.
|
||||
data required (bufferSize >= length + 1), it will use this
|
||||
instead of allocating its own buffer.
|
||||
|
||||
\warning QTextBoundaryFinder does not create a copy of \a chars. It is the
|
||||
application programmer's responsibility to ensure the array is allocated for
|
||||
@ -262,11 +263,11 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars,
|
||||
, length(length)
|
||||
, pos(0)
|
||||
{
|
||||
if (buffer && (uint)bufferSize >= length*sizeof(QCharAttributes)) {
|
||||
if (buffer && (uint)bufferSize >= (length + 1) * sizeof(QCharAttributes)) {
|
||||
d = (QTextBoundaryFinderPrivate *)buffer;
|
||||
freePrivate = false;
|
||||
} else {
|
||||
d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(QCharAttributes));
|
||||
d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes));
|
||||
Q_CHECK_PTR(d);
|
||||
freePrivate = true;
|
||||
}
|
||||
@ -455,38 +456,30 @@ bool QTextBoundaryFinder::isAtBoundary() const
|
||||
*/
|
||||
QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
|
||||
{
|
||||
if (!d)
|
||||
return NotAtBoundary;
|
||||
if (! isAtBoundary())
|
||||
return NotAtBoundary;
|
||||
if (pos == 0) {
|
||||
if (d->attributes[pos].whiteSpace)
|
||||
return NotAtBoundary;
|
||||
return StartWord;
|
||||
}
|
||||
if (pos == length) {
|
||||
if (d->attributes[length-1].whiteSpace)
|
||||
return NotAtBoundary;
|
||||
return EndWord;
|
||||
BoundaryReasons reasons = NotAtBoundary;
|
||||
if (!d || !isAtBoundary())
|
||||
return reasons;
|
||||
|
||||
switch (t) {
|
||||
case Word:
|
||||
if (d->attributes[pos].wordStart)
|
||||
reasons |= StartWord;
|
||||
if (d->attributes[pos].wordEnd)
|
||||
reasons |= EndWord;
|
||||
break;
|
||||
case Line:
|
||||
if (pos > 0 && chars[pos - 1].unicode() == QChar::SoftHyphen)
|
||||
reasons |= SoftHyphen;
|
||||
// fall through
|
||||
case Grapheme:
|
||||
case Sentence:
|
||||
reasons |= StartWord | EndWord;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (t == Line && chars[pos - 1].unicode() == QChar::SoftHyphen)
|
||||
return SoftHyphen;
|
||||
|
||||
if (t != Word)
|
||||
return BoundaryReasons(StartWord | EndWord);
|
||||
|
||||
const bool nextIsSpace = d->attributes[pos].whiteSpace;
|
||||
const bool prevIsSpace = d->attributes[pos - 1].whiteSpace;
|
||||
|
||||
if (prevIsSpace && !nextIsSpace)
|
||||
return StartWord;
|
||||
else if (!prevIsSpace && nextIsSpace)
|
||||
return EndWord;
|
||||
else if (!prevIsSpace && !nextIsSpace)
|
||||
return BoundaryReasons(StartWord | EndWord);
|
||||
else
|
||||
return NotAtBoundary;
|
||||
return reasons;
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
@ -102,6 +102,8 @@ static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes
|
||||
|
||||
lcls = cls;
|
||||
}
|
||||
|
||||
attributes[len].graphemeBoundary = true; // GB2
|
||||
}
|
||||
|
||||
|
||||
@ -133,6 +135,10 @@ static const uchar breakTable[QUnicodeTables::WordBreak_ExtendNumLet + 1][QUnico
|
||||
|
||||
static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *attributes)
|
||||
{
|
||||
enum WordType {
|
||||
WordTypeNone, WordTypeAlphaNumeric, WordTypeHiraganaKatakana
|
||||
} currentWordType = WordTypeNone;
|
||||
|
||||
QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1
|
||||
for (quint32 i = 0; i != len; ++i) {
|
||||
quint32 pos = i;
|
||||
@ -178,9 +184,30 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
|
||||
continue;
|
||||
}
|
||||
cls = ncls;
|
||||
if (action == WB::Break)
|
||||
if (action == WB::Break) {
|
||||
attributes[pos].wordBreak = true;
|
||||
if (currentWordType != WordTypeNone)
|
||||
attributes[pos].wordEnd = true;
|
||||
switch (cls) {
|
||||
case QUnicodeTables::WordBreak_Katakana:
|
||||
currentWordType = WordTypeHiraganaKatakana;
|
||||
attributes[pos].wordStart = true;
|
||||
break;
|
||||
case QUnicodeTables::WordBreak_ALetter:
|
||||
case QUnicodeTables::WordBreak_Numeric:
|
||||
currentWordType = WordTypeAlphaNumeric;
|
||||
attributes[pos].wordStart = true;
|
||||
break;
|
||||
default:
|
||||
currentWordType = WordTypeNone;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (currentWordType != WordTypeNone)
|
||||
attributes[len].wordEnd = true;
|
||||
attributes[len].wordBreak = true; // WB2
|
||||
}
|
||||
|
||||
|
||||
@ -277,6 +304,8 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes
|
||||
state = SB::breakTable[SB::Initial][ncls];
|
||||
}
|
||||
}
|
||||
|
||||
attributes[len].sentenceBoundary = true; // SB2
|
||||
}
|
||||
|
||||
|
||||
@ -514,6 +543,7 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at
|
||||
}
|
||||
|
||||
attributes[0].lineBreak = false; // LB2
|
||||
attributes[len].lineBreak = true; // LB3
|
||||
}
|
||||
|
||||
|
||||
@ -543,7 +573,7 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
|
||||
return;
|
||||
|
||||
if (!(options & DontClearAttributes))
|
||||
::memset(attributes, 0, length * sizeof(QCharAttributes));
|
||||
::memset(attributes, 0, (length + 1) * sizeof(QCharAttributes));
|
||||
|
||||
if (options & GraphemeBreaks)
|
||||
getGraphemeBreaks(string, length, attributes);
|
||||
|
@ -64,7 +64,9 @@ struct Q_PACKED QCharAttributes
|
||||
uchar sentenceBoundary : 1;
|
||||
uchar lineBreak : 1;
|
||||
uchar whiteSpace : 1;
|
||||
uchar unused : 3;
|
||||
uchar wordStart : 1;
|
||||
uchar wordEnd : 1;
|
||||
uchar unused : 1;
|
||||
};
|
||||
Q_DECLARE_TYPEINFO(QCharAttributes, Q_PRIMITIVE_TYPE);
|
||||
|
||||
@ -89,6 +91,7 @@ enum CharAttributeOption {
|
||||
};
|
||||
Q_DECLARE_FLAGS(CharAttributeOptions, CharAttributeOption)
|
||||
|
||||
// attributes buffer has to have a length of string length + 1
|
||||
Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
|
||||
const ScriptItem *items, int numItems,
|
||||
QCharAttributes *attributes, CharAttributeOptions options = DefaultOptionsCompat);
|
||||
|
@ -72,6 +72,7 @@ private slots:
|
||||
void lineBoundaries_manual();
|
||||
|
||||
void fastConstructor();
|
||||
void wordBoundaries_qtbug6498();
|
||||
void isAtSoftHyphen_data();
|
||||
void isAtSoftHyphen();
|
||||
void thaiLineBreak();
|
||||
@ -544,6 +545,67 @@ void tst_QTextBoundaryFinder::fastConstructor()
|
||||
QCOMPARE(finder.boundaryReasons(), QTextBoundaryFinder::NotAtBoundary);
|
||||
}
|
||||
|
||||
void tst_QTextBoundaryFinder::wordBoundaries_qtbug6498()
|
||||
{
|
||||
// text with trailing space
|
||||
QString text("Please test me. Finish ");
|
||||
QTextBoundaryFinder finder(QTextBoundaryFinder::Word, text);
|
||||
|
||||
QCOMPARE(finder.position(), 0);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::StartWord);
|
||||
|
||||
QCOMPARE(finder.toNextBoundary(), 6);
|
||||
QCOMPARE(finder.position(), 6);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::EndWord);
|
||||
|
||||
QCOMPARE(finder.toNextBoundary(), 7);
|
||||
QCOMPARE(finder.position(), 7);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::StartWord);
|
||||
|
||||
QCOMPARE(finder.toNextBoundary(), 11);
|
||||
QCOMPARE(finder.position(), 11);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::EndWord);
|
||||
|
||||
QCOMPARE(finder.toNextBoundary(), 12);
|
||||
QCOMPARE(finder.position(), 12);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::StartWord);
|
||||
|
||||
QCOMPARE(finder.toNextBoundary(), 14);
|
||||
QCOMPARE(finder.position(), 14);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::EndWord);
|
||||
|
||||
QCOMPARE(finder.toNextBoundary(), 15);
|
||||
QCOMPARE(finder.position(), 15);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
QVERIFY(finder.boundaryReasons() == QTextBoundaryFinder::NotAtBoundary);
|
||||
|
||||
QCOMPARE(finder.toNextBoundary(), 16);
|
||||
QCOMPARE(finder.position(), 16);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::StartWord);
|
||||
|
||||
QCOMPARE(finder.toNextBoundary(), 22);
|
||||
QCOMPARE(finder.position(), 22);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::EndWord);
|
||||
|
||||
QCOMPARE(finder.toNextBoundary(), 23);
|
||||
QCOMPARE(finder.position(), 23);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
QVERIFY(finder.boundaryReasons() == QTextBoundaryFinder::NotAtBoundary);
|
||||
|
||||
QCOMPARE(finder.toNextBoundary(), -1);
|
||||
QCOMPARE(finder.position(), -1);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
QVERIFY(finder.boundaryReasons() == QTextBoundaryFinder::NotAtBoundary);
|
||||
}
|
||||
|
||||
void tst_QTextBoundaryFinder::isAtSoftHyphen_data()
|
||||
{
|
||||
QTest::addColumn<QString>("testString");
|
||||
@ -568,7 +630,7 @@ void tst_QTextBoundaryFinder::isAtSoftHyphen()
|
||||
QVERIFY(expectedBreakPositions.contains(i + 1));
|
||||
boundaryFinder.setPosition(i + 1);
|
||||
QVERIFY(boundaryFinder.isAtBoundary());
|
||||
QVERIFY(boundaryFinder.boundaryReasons() == QTextBoundaryFinder::SoftHyphen);
|
||||
QVERIFY(boundaryFinder.boundaryReasons() & QTextBoundaryFinder::SoftHyphen);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user