QUnicodeTools: Fix line breaking for potential emojis
Implement part of LB30b introduced by UAX #14, revision 47 (Unicode 14.0.0): [\p{Extended_Pictographic}&\p{Cn}] × EM This fixes one line breaking test. Task-number: QTBUG-97537 Pick-to: 6.3 Change-Id: I3fd2372a057b7391d8846e9c146f69a54686ea61 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
parent
08d2ae411f
commit
40b4ad1866
@ -604,6 +604,8 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes
|
||||
|
||||
QUnicodeTables::LineBreakClass lcls = QUnicodeTables::LineBreak_LF; // to meet LB10
|
||||
QUnicodeTables::LineBreakClass cls = lcls;
|
||||
const QUnicodeTables::Properties *lastProp = QUnicodeTables::properties(U'\n');
|
||||
|
||||
for (qsizetype i = 0; i != len; ++i) {
|
||||
qsizetype pos = i;
|
||||
char32_t ucs4 = string[i];
|
||||
@ -707,6 +709,14 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (Q_UNLIKELY(ncls == QUnicodeTables::LineBreak_EM
|
||||
&& lastProp->category == QChar::Other_NotAssigned
|
||||
&& lastProp->graphemeBreakClass
|
||||
== QUnicodeTables::GraphemeBreak_Extended_Pictographic)) {
|
||||
// LB30b: [\p{Extended_Pictographic}&\p{Cn}] × EM
|
||||
goto next;
|
||||
}
|
||||
|
||||
// for South East Asian chars that require a complex analysis, the Unicode
|
||||
// standard recommends to treat them as AL. tailoring that do dictionary analysis can override
|
||||
if (Q_UNLIKELY(cls >= QUnicodeTables::LineBreak_SA))
|
||||
@ -745,6 +755,7 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes
|
||||
|
||||
next:
|
||||
cls = ncls;
|
||||
lastProp = prop;
|
||||
next_no_cls_update:
|
||||
lcls = ncls;
|
||||
}
|
||||
|
@ -7678,7 +7678,7 @@
|
||||
× 1F1F7 × 1F1FA ÷ 1F1F8 × 1F1EA ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER R (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER U (RI) ÷ [30.13] REGIONAL INDICATOR SYMBOL LETTER S (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER E (RI) ÷ [0.3]
|
||||
× 1F1F7 × 1F1FA × 200B ÷ 1F1F8 × 1F1EA ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER R (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER U (RI) × [7.02] ZERO WIDTH SPACE (ZW) ÷ [8.0] REGIONAL INDICATOR SYMBOL LETTER S (RI) × [30.12] REGIONAL INDICATOR SYMBOL LETTER E (RI) ÷ [0.3]
|
||||
× 05D0 × 002D × 05D0 ÷ # × [0.3] HEBREW LETTER ALEF (HL) × [21.02] HYPHEN-MINUS (HY) × [21.1] HEBREW LETTER ALEF (HL) ÷ [0.3]
|
||||
# × 1F02C × 1F3FF ÷ # × [0.3] <reserved-1F02C> (Other) × [30.22] EMOJI MODIFIER FITZPATRICK TYPE-6 (EM) ÷ [0.3]
|
||||
× 1F02C × 1F3FF ÷ # × [0.3] <reserved-1F02C> (Other) × [30.22] EMOJI MODIFIER FITZPATRICK TYPE-6 (EM) ÷ [0.3]
|
||||
× 00A9 ÷ 1F3FF ÷ # × [0.3] COPYRIGHT SIGN (AL) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (EM) ÷ [0.3]
|
||||
#
|
||||
# Lines: 7654
|
||||
|
Loading…
x
Reference in New Issue
Block a user