Cope as best we can if fractional-part and group separators coincide

The user might misconfigure their system this way. CLDR's mn_Mong_MN
even had this from v43 (when I reported it) to v45 (after which it was
fixed), so some operating system using CLDR might have that problem if
configured for this locale.

This naturally makes number parsing ambiguous in general, when the
last group might be a fractional part or the end of the whole-number
part, but we can at least catch the cases where the pattern of lengths
of groups of digits would violate grouping rules for the locale under
one reading or the other. When it is ambiguous, interpret the last
group as a fractional part. That shall sometimes be wrong, but the
system is misconfigured in this case anyway, so we're allowed to not
care.

Add some test-cases for this scenario, making clear which are
unambiguous (malformed in one reading, well-formed in the other) and
which are ambiguous (so resolved by treating last separator as the
fractional-part one).

Fixes: QTBUG-134913
Change-Id: Ib8df9bcd9ca88ad4e949565b1ecfba3ef64b587d
Reviewed-by: Ivan Solovev <ivan.solovev@qt.io>
This commit is contained in:
Edward Welbourne 2025-03-18 17:49:53 +01:00
parent 754666dbe8
commit ea3d2b24c4
2 changed files with 147 additions and 27 deletions

View File

@ -4455,6 +4455,13 @@ public:
inline int asBmpDigit(char16_t digit) const;
inline bool isInfNanChar(char ch) const { return matchInfNaN.matches(ch); }
char nextToken();
bool fractionGroupClash() const
{
// If the user's hand-configuration of the system makes group and
// fractional part separators coincide, we have some kludges to apply,
// though we can skip them in integer mode.
return Q_UNLIKELY(m_mode != QLocaleData::IntegerMode && m_guide.group == m_guide.decimal);
}
};
int NumericTokenizer::asBmpDigit(char16_t digit) const
@ -4522,6 +4529,12 @@ char NumericTokenizer::nextToken()
}
if (!m_guide.group.isEmpty() && tail.startsWith(m_guide.group)) {
m_index += m_guide.group.size();
// When group and decimal coincide, and a fractional part is not
// unexpected, treat the last as a fractional part separator (and leave
// the caller to special-case the situations where that causes a
// parse-fail that we can dodge by not reading it that way).
if (fractionGroupClash() && tail.indexOf(m_guide.decimal, m_guide.group.size()) == -1)
return '.';
return ',';
}
if (m_mode != QLocaleData::IntegerMode && tail.startsWith(m_guide.decimal)) {
@ -4658,10 +4671,22 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
if (out == '.') {
if (stage > Grouped) // Too late to start a fractional part.
return false;
// That's the end of the integral part - check size of last group:
if (badLeastGroup())
return false;
stage = Fraction;
if (tokens.fractionGroupClash() && badLeastGroup()
&& digitsInGroup == grouping.higher) {
// Reinterpret '.' as ',' (as they're indistinguishable) to
// interpret the recent digits as a group, with the least to
// follow (hopefully of a suitable length):
out = ',';
stage = Grouped;
needHigherGroup = false;
digitsInGroup = 0;
} else {
// That's the end of the integral part - check size of last group:
if (badLeastGroup())
return false;
stage = Fraction;
}
} else if (out == 'e') {
if (wantDigits || stage == Name || stage > Fraction)
return false;
@ -4678,6 +4703,11 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
stage = Exponent;
wantDigits = true; // We need some in the exponent
} else if (out == ',') {
// (If tokens.fractionGroupClash(), a comma only comes out of
// nextToken() if there's a later separator, since the last is
// always treated as dot. So if we have a comma here, treating it as
// a dot wouldn't save the parse: the later dot-or-comma would make
// the text malformed.)
if (number_options.testFlag(QLocale::RejectGroupSeparator))
return false;

View File

@ -4477,6 +4477,21 @@ public:
return QVariant::fromValue(QLocaleData::GroupSizes{0,2,3});
if (m_name == u"en-NP") // CLDR: 1,3,3
return QVariant::fromValue(QLocaleData::GroupSizes{0,2,0});
// See GroupSeparator:
if (m_name == u"en-MN") // (same as CLDR en)
return QVariant::fromValue(QLocaleData::GroupSizes{1,3,3});
if (m_name == u"es-MN") // (same as CLDR es-ES)
return QVariant::fromValue(QLocaleData::GroupSizes{2,3,3});
if (m_name == u"ccp-MN") // (same as CLDR ccp-IN)
return QVariant::fromValue(QLocaleData::GroupSizes{2,2,3});
break;
case GroupSeparator:
case DecimalPoint:
// CLDR v43 through v45 had the same group and fractional-part
// separator for mn_Mong_MN. A user might also misconfigure their
// system. Use made-up hybrids *-MN for that.
if (m_name.endsWith(u"-MN"))
return QVariant(u"."_s);
break;
default:
break;
@ -4655,25 +4670,62 @@ void tst_QLocale::systemGrouping_data()
QTest::addColumn<QString>("name");
QTest::addColumn<QString>("separator");
QTest::addColumn<QString>("zeroDigit");
QTest::addColumn<int>("number");
QTest::addColumn<QString>("formattedString");
QTest::addColumn<int>("whole");
QTest::addColumn<QString>("formattedWhole");
QTest::addColumn<double>("real");
QTest::addColumn<QString>("formattedReal");
QTest::addColumn<int>("precision");
// Testing locales with non {1, 3, 3} groupe sizes, plus some locales
// that return invalid group sizes to test that we fallbakc to CLDR data.
// Testing locales with non-{1, 3, 3} groupe sizes, plus some locales
// that return invalid group sizes to test that we fallback to CLDR data.
QTest::newRow("en-ES") // {2,3,3}
<< u"en-ES"_s << u","_s << u"0"_s << 1234 << u"1234"_s;
<< u"en-ES"_s << u","_s << u"0"_s
<< 1234 << u"1234"_s << 1234.567 << u"1234.567"_s << 3;
QTest::newRow("en-ES-grouped")
<< u"en-ES"_s << u","_s << u"0"_s << 12345 << u"12,345"_s;
<< u"en-ES"_s << u","_s << u"0"_s
<< 12345 << u"12,345"_s << 12345.678 << u"12,345.678"_s << 3;
QTest::newRow("en-ES-long")
<< u"en-ES"_s << u","_s << u"0"_s << 1234567 << u"1,234,567"_s;
<< u"en-ES"_s << u","_s << u"0"_s << 1234567 << u"1,234,567"_s
<< 1234567.089 << u"1,234,567.089"_s << 3;;
QTest::newRow("en-BD") // {1,2,3}
<< u"en-BD"_s << u","_s << u"0"_s << 123456789 << u"12,34,56,789"_s;
<< u"en-BD"_s << u","_s << u"0"_s
<< 123456789 << u"12,34,56,789"_s << 1234567.089 << u"12,34,567.089"_s << 3;
// Filling in the blanks where sys gives a zero:
QTest::newRow("en-BT") // {1,2,3}
<< u"en-BT"_s << u","_s << u"0"_s << 123456789 << u"12,34,56,789"_s;
<< u"en-BT"_s << u","_s << u"0"_s
<< 123456789 << u"12,34,56,789"_s << 1.234 << u"1.234"_s << 3;
QTest::newRow("en-NP") // {1,2,3}
<< u"en-NP"_s << u","_s << u"0"_s << 123456789 << u"12,34,56,789"_s;
<< u"en-NP"_s << u","_s << u"0"_s
<< 123456789 << u"12,34,56,789"_s << 1.234 << u"1.234"_s << 3;
// Test a locale in which fractional-part and group separators coincide.
// Floating-point handling in this scenario is in general ambiguous.
// When one reading violates grouping rules, use the other:
QTest::newRow("en-MN") // {1,3,3}
<< u"en-MN"_s << u"."_s << u"0"_s << 1234 << u"1.234"_s
<< 0.003 << u"0.003"_s << 3; // QTBUG-134913
QTest::newRow("es-MN") // {2,3,3},
<< u"es-MN"_s << u"."_s << u"0"_s << 123456789 << u"123.456.789"_s
<< 12345.6789 << u"12.345.6789"_s << 4; // long last group => fractional part
QTest::newRow("es-MN-short")
<< u"es-MN"_s << u"."_s << u"0"_s << 1234 << u"1234"_s
<< 1.234 << u"1.234"_s << 3; // short first "group" => not a group
QTest::newRow("es-MN-split")
<< u"es-MN"_s << u"."_s << u"0"_s << 1234567 << u"1.234.567"_s
<< 1234.567 << u"1234.567"_s << 3; // long first "group" => rest is fraction
QTest::newRow("es-MN-whole")
<< u"es-MN"_s << u"."_s << u"0"_s << 1234567 << u"1.234.567"_s
<< 1234567. << u"1.234.567"_s << 0; // short first group => later group separator
// Test the code's best guesses do match our intentions:
QTest::newRow("es-MN-plain")
<< u"es-MN"_s << u"."_s << u"0"_s << 12345 << u"12.345"_s
<< 12.345 << u"12.345"_s << 3; // Ambiguous, best guess
QTest::newRow("es-MN-long")
<< u"es-MN"_s << u"."_s << u"0"_s << 1234567089 << u"1.234.567.089"_s
<< 1234567.089 << u"1.234.567.089"_s << 3; // Ambiguous, best guess
// This last could equally be argued to be whole, based on "The two earlier
// separators were grouping, so read the last one the same way."
// Testing with Chakma locale
// Test handling of surrogates (non-BMP digits) in Chakma (ccp):
const char32_t zeroVal = 0x11136; // Chakma zero
const QChar data[] = {
QChar::highSurrogate(zeroVal), QChar::lowSurrogate(zeroVal),
@ -4683,8 +4735,13 @@ void tst_QLocale::systemGrouping_data()
QChar::highSurrogate(zeroVal + 4), QChar::lowSurrogate(zeroVal + 4),
QChar::highSurrogate(zeroVal + 5), QChar::lowSurrogate(zeroVal + 5),
QChar::highSurrogate(zeroVal + 6), QChar::lowSurrogate(zeroVal + 6),
QChar::highSurrogate(zeroVal + 7), QChar::lowSurrogate(zeroVal + 7),
QChar::highSurrogate(zeroVal + 8), QChar::lowSurrogate(zeroVal + 8),
// QChar::highSurrogate(zeroVal + 9), QChar::lowSurrogate(zeroVal + 9),
};
const QChar separator(QLatin1Char(',')); // Separator for the Chakma locale
const QChar separator(QLatin1Char(',')); // Group separator for the Chakma locale
const QChar fractional(QLatin1Char('.')); // Fractional-part (and group for ccp-MN)
const QString
// Copy zero so it persists through QFETCH(), after data falls off the stack.
zero = QString(data, 2),
@ -4693,21 +4750,46 @@ void tst_QLocale::systemGrouping_data()
three = QString::fromRawData(data + 6, 2),
four = QString::fromRawData(data + 8, 2),
five = QString::fromRawData(data + 10, 2),
six = QString::fromRawData(data + 12, 2);
six = QString::fromRawData(data + 12, 2),
seven = QString::fromRawData(data + 14, 2),
eight = QString::fromRawData(data + 16, 2);
QString fourDigit = one + two + three + four;
QString fiveDigit = one + two + separator + three + four + five;
// Leading group can have single digit as long as there's a later separator:
QString sixDigit = one + separator + two + three + separator + four + five + six;
QString fourFloat = one + fractional + two + three + four;
QString fiveFloat = one + two + fractional + three + four + five;
QString sevenFloat = one + two + three + four + fractional + five + six + seven;
QTest::newRow("Chakma-short") // {2,2,3}
<< u"ccp"_s << QString(separator)
<< zero << 1234 << fourDigit;
<< u"ccp"_s << QString(separator) << zero
<< 1234 << fourDigit << 1.234 << fourFloat << 3;
QTest::newRow("Chakma")
<< u"ccp"_s << QString(separator)
<< zero << 12345 << fiveDigit;
<< u"ccp"_s << QString(separator) << zero
<< 12345 << fiveDigit << 12.345 << fiveFloat << 3;
QTest::newRow("Chakma-long")
<< u"ccp"_s << QString(separator) << zero
<< 123456 << sixDigit;
<< 123456 << sixDigit << 1234.567 << sevenFloat << 3;
// Floating-point forms for ccp-MN, whose group separator is the fractional-part separator:
// Leading "group" of four means too short to group, so rest is fractional part:
QTest::newRow("ccp-MN-short")
<< u"ccp-MN"_s << QString(fractional) << zero
<< 1234 << fourDigit << 1234.567 << sevenFloat << 3;
// Penultimate group of three implies final group must be fractional part:
QString groupFloat = one + two + fractional + three + four + five
+ fractional + six + seven + eight;
QTest::newRow("ccp-MN")
<< u"ccp-MN"_s << QString(fractional) << zero
<< 12345 << fiveFloat << 12345.678 << groupFloat << 3;
// Penultimate group of two implies rest must be grouping within the whole part:
QString eightDigit = one + fractional + two + three + fractional + four + five
+ fractional + six + seven + eight;
QTest::newRow("ccp-MN-long")
<< u"ccp-MN"_s << QString(fractional) << zero
<< 12345678 << eightDigit << 12345678. << eightDigit << 0;
}
void tst_QLocale::systemGrouping()
@ -4715,8 +4797,11 @@ void tst_QLocale::systemGrouping()
QFETCH(QString, name);
QFETCH(QString, separator);
QFETCH(QString, zeroDigit);
QFETCH(int, number);
QFETCH(QString, formattedString);
QFETCH(int, whole);
QFETCH(QString, formattedWhole);
QFETCH(double, real);
QFETCH(QString, formattedReal);
QFETCH(int, precision);
{
MySystemLocale sLocale(name);
@ -4724,11 +4809,16 @@ void tst_QLocale::systemGrouping()
QCOMPARE(sys.groupSeparator(), separator);
QCOMPARE(sys.zeroDigit(), zeroDigit);
QCOMPARE(sys.toString(number), formattedString);
QCOMPARE(sys.toString(whole), formattedWhole);
bool ok;
int count = sys.toInt(formattedString, &ok);
int count = sys.toInt(formattedWhole, &ok);
QVERIFY2(ok, "Integer didn't round-trip");
QCOMPARE(count, number);
QCOMPARE(count, whole);
QCOMPARE(sys.toString(real, 'f', precision), formattedReal);
double apparent = sys.toDouble(formattedReal, &ok);
QVERIFY2(ok, "Floating-precision didn't round-trip");
QCOMPARE(apparent, real);
}
}