QUnicodeTools: Handle WB3c word break rule
Adjust handling of WB3c rule to UAX #29, revision 33 (Unicode 11.0.0). The rule reads: ZWJ × \p{Extended_Pictographic} This fixes 9 word break tests. Task-number: QTBUG-97537 Pick-to: 6.2 6.3 Change-Id: I818d4048828e6663d5c090aa372d83f5099fdffe Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
parent
1a26719c54
commit
c63cdbdc43
@ -239,6 +239,8 @@ static void getWordBreaks(const char16_t *string, qsizetype len, QCharAttributes
|
||||
} currentWordType = WordTypeNone;
|
||||
|
||||
QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1
|
||||
auto real_cls = cls; // Unaffected by WB4
|
||||
|
||||
for (qsizetype i = 0; i != len; ++i) {
|
||||
qsizetype pos = i;
|
||||
char32_t ucs4 = string[i];
|
||||
@ -268,12 +270,18 @@ static void getWordBreaks(const char16_t *string, qsizetype len, QCharAttributes
|
||||
uchar action = WB::breakTable[cls][ncls];
|
||||
switch (action) {
|
||||
case WB::Break:
|
||||
if (Q_UNLIKELY(real_cls == QUnicodeTables::WordBreak_ZWJ
|
||||
&& prop->graphemeBreakClass
|
||||
== QUnicodeTables::GraphemeBreak_Extended_Pictographic)) {
|
||||
// WB3c: ZWJ × \p{Extended_Pictographic}
|
||||
action = WB::NoBreak;
|
||||
}
|
||||
break;
|
||||
case WB::NoBreak:
|
||||
if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend || ncls == QUnicodeTables::WordBreak_ZWJ || ncls == QUnicodeTables::WordBreak_Format)) {
|
||||
// WB4: X(Extend|Format)* -> X
|
||||
if (cls != QUnicodeTables::WordBreak_ZWJ) // WB3c
|
||||
continue;
|
||||
real_cls = ncls;
|
||||
continue;
|
||||
}
|
||||
if (Q_UNLIKELY(cls == QUnicodeTables::WordBreak_RegionalIndicator)) {
|
||||
// WB15/WB16: break between pairs of Regional indicator
|
||||
@ -337,6 +345,8 @@ static void getWordBreaks(const char16_t *string, qsizetype len, QCharAttributes
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
real_cls = ncls;
|
||||
}
|
||||
|
||||
if (currentWordType != WordTypeNone)
|
||||
|
@ -1154,7 +1154,7 @@
|
||||
÷ 200D × 0308 ÷ 0022 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
|
||||
÷ 200D ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
|
||||
÷ 200D × 0308 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
|
||||
# ÷ 200D × 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] WATCH (ExtPict) ÷ [0.3]
|
||||
÷ 200D × 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] WATCH (ExtPict) ÷ [0.3]
|
||||
÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
|
||||
÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
|
||||
÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
|
||||
@ -1725,15 +1725,15 @@
|
||||
÷ 0061 ÷ 1F1E6 × 200D × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
|
||||
÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
|
||||
÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
|
||||
# ÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
|
||||
# ÷ 0061 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
|
||||
# ÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3]
|
||||
# ÷ 0061 × 200D × 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3]
|
||||
# ÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
|
||||
÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
|
||||
÷ 0061 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
|
||||
÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3]
|
||||
÷ 0061 × 200D × 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3]
|
||||
÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
|
||||
÷ 1F6D1 × 1F3FF ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
|
||||
# ÷ 200D × 1F6D1 × 1F3FF ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
|
||||
# ÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
|
||||
# ÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
|
||||
÷ 200D × 1F6D1 × 1F3FF ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
|
||||
÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
|
||||
÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
|
||||
÷ 1F6D1 ÷ 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
|
||||
÷ 0061 × 0308 × 200D × 0308 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
|
||||
÷ 0061 ÷ 0020 × 0020 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] SPACE (WSegSpace) × [3.4] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
|
||||
|
Loading…
x
Reference in New Issue
Block a user