diff --git a/src/corelib/text/qunicodetools.cpp b/src/corelib/text/qunicodetools.cpp index ac6302362da..8f1eb2e5325 100644 --- a/src/corelib/text/qunicodetools.cpp +++ b/src/corelib/text/qunicodetools.cpp @@ -604,6 +604,8 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes QUnicodeTables::LineBreakClass lcls = QUnicodeTables::LineBreak_LF; // to meet LB10 QUnicodeTables::LineBreakClass cls = lcls; + const QUnicodeTables::Properties *lastProp = QUnicodeTables::properties(U'\n'); + for (qsizetype i = 0; i != len; ++i) { qsizetype pos = i; char32_t ucs4 = string[i]; @@ -707,6 +709,14 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes goto next; } + if (Q_UNLIKELY(ncls == QUnicodeTables::LineBreak_EM + && lastProp->category == QChar::Other_NotAssigned + && lastProp->graphemeBreakClass + == QUnicodeTables::GraphemeBreak_Extended_Pictographic)) { + // LB30b: [\p{Extended_Pictographic}&\p{Cn}] × EM + goto next; + } + // for South East Asian chars that require a complex analysis, the Unicode // standard recommends to treat them as AL. tailoring that do dictionary analysis can override if (Q_UNLIKELY(cls >= QUnicodeTables::LineBreak_SA)) @@ -745,6 +755,7 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes next: cls = ncls; + lastProp = prop; next_no_cls_update: lcls = ncls; } diff --git a/tests/auto/corelib/text/qtextboundaryfinder/data/LineBreakTest.txt b/tests/auto/corelib/text/qtextboundaryfinder/data/LineBreakTest.txt index 1b038cdce9b..32d66183197 100644 --- a/tests/auto/corelib/text/qtextboundaryfinder/data/LineBreakTest.txt +++ b/tests/auto/corelib/text/qtextboundaryfinder/data/LineBreakTest.txt @@ -7678,7 +7678,7 @@ × 1F1F7 × 1F1FA ÷ 1F1F8 × 1F1EA ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER R (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER U (RI) ÷ [30.13] REGIONAL INDICATOR SYMBOL LETTER S (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER E (RI) ÷ [0.3] × 1F1F7 × 1F1FA × 200B ÷ 1F1F8 × 1F1EA ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER R (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER U (RI) × [7.02] ZERO WIDTH SPACE (ZW) ÷ [8.0] REGIONAL INDICATOR SYMBOL LETTER S (RI) × [30.12] REGIONAL INDICATOR SYMBOL LETTER E (RI) ÷ [0.3] × 05D0 × 002D × 05D0 ÷ # × [0.3] HEBREW LETTER ALEF (HL) × [21.02] HYPHEN-MINUS (HY) × [21.1] HEBREW LETTER ALEF (HL) ÷ [0.3] -# × 1F02C × 1F3FF ÷ # × [0.3] (Other) × [30.22] EMOJI MODIFIER FITZPATRICK TYPE-6 (EM) ÷ [0.3] +× 1F02C × 1F3FF ÷ # × [0.3] (Other) × [30.22] EMOJI MODIFIER FITZPATRICK TYPE-6 (EM) ÷ [0.3] × 00A9 ÷ 1F3FF ÷ # × [0.3] COPYRIGHT SIGN (AL) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (EM) ÷ [0.3] # # Lines: 7654