diff --git a/src/corelib/text/qlocale.cpp b/src/corelib/text/qlocale.cpp index 8051b4713f6..da151839dc5 100644 --- a/src/corelib/text/qlocale.cpp +++ b/src/corelib/text/qlocale.cpp @@ -4455,6 +4455,13 @@ public: inline int asBmpDigit(char16_t digit) const; inline bool isInfNanChar(char ch) const { return matchInfNaN.matches(ch); } char nextToken(); + bool fractionGroupClash() const + { + // If the user's hand-configuration of the system makes group and + // fractional part separators coincide, we have some kludges to apply, + // though we can skip them in integer mode. + return Q_UNLIKELY(m_mode != QLocaleData::IntegerMode && m_guide.group == m_guide.decimal); + } }; int NumericTokenizer::asBmpDigit(char16_t digit) const @@ -4522,6 +4529,12 @@ char NumericTokenizer::nextToken() } if (!m_guide.group.isEmpty() && tail.startsWith(m_guide.group)) { m_index += m_guide.group.size(); + // When group and decimal coincide, and a fractional part is not + // unexpected, treat the last as a fractional part separator (and leave + // the caller to special-case the situations where that causes a + // parse-fail that we can dodge by not reading it that way). + if (fractionGroupClash() && tail.indexOf(m_guide.decimal, m_guide.group.size()) == -1) + return '.'; return ','; } if (m_mode != QLocaleData::IntegerMode && tail.startsWith(m_guide.decimal)) { @@ -4658,10 +4671,22 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o if (out == '.') { if (stage > Grouped) // Too late to start a fractional part. return false; - // That's the end of the integral part - check size of last group: - if (badLeastGroup()) - return false; - stage = Fraction; + + if (tokens.fractionGroupClash() && badLeastGroup() + && digitsInGroup == grouping.higher) { + // Reinterpret '.' as ',' (as they're indistinguishable) to + // interpret the recent digits as a group, with the least to + // follow (hopefully of a suitable length): + out = ','; + stage = Grouped; + needHigherGroup = false; + digitsInGroup = 0; + } else { + // That's the end of the integral part - check size of last group: + if (badLeastGroup()) + return false; + stage = Fraction; + } } else if (out == 'e') { if (wantDigits || stage == Name || stage > Fraction) return false; @@ -4678,6 +4703,11 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o stage = Exponent; wantDigits = true; // We need some in the exponent } else if (out == ',') { + // (If tokens.fractionGroupClash(), a comma only comes out of + // nextToken() if there's a later separator, since the last is + // always treated as dot. So if we have a comma here, treating it as + // a dot wouldn't save the parse: the later dot-or-comma would make + // the text malformed.) if (number_options.testFlag(QLocale::RejectGroupSeparator)) return false; diff --git a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp index ec499471933..16528e76e9e 100644 --- a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp +++ b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp @@ -4477,6 +4477,21 @@ public: return QVariant::fromValue(QLocaleData::GroupSizes{0,2,3}); if (m_name == u"en-NP") // CLDR: 1,3,3 return QVariant::fromValue(QLocaleData::GroupSizes{0,2,0}); + // See GroupSeparator: + if (m_name == u"en-MN") // (same as CLDR en) + return QVariant::fromValue(QLocaleData::GroupSizes{1,3,3}); + if (m_name == u"es-MN") // (same as CLDR es-ES) + return QVariant::fromValue(QLocaleData::GroupSizes{2,3,3}); + if (m_name == u"ccp-MN") // (same as CLDR ccp-IN) + return QVariant::fromValue(QLocaleData::GroupSizes{2,2,3}); + break; + case GroupSeparator: + case DecimalPoint: + // CLDR v43 through v45 had the same group and fractional-part + // separator for mn_Mong_MN. A user might also misconfigure their + // system. Use made-up hybrids *-MN for that. + if (m_name.endsWith(u"-MN")) + return QVariant(u"."_s); break; default: break; @@ -4655,25 +4670,62 @@ void tst_QLocale::systemGrouping_data() QTest::addColumn("name"); QTest::addColumn("separator"); QTest::addColumn("zeroDigit"); - QTest::addColumn("number"); - QTest::addColumn("formattedString"); + QTest::addColumn("whole"); + QTest::addColumn("formattedWhole"); + QTest::addColumn("real"); + QTest::addColumn("formattedReal"); + QTest::addColumn("precision"); - // Testing locales with non {1, 3, 3} groupe sizes, plus some locales - // that return invalid group sizes to test that we fallbakc to CLDR data. + // Testing locales with non-{1, 3, 3} groupe sizes, plus some locales + // that return invalid group sizes to test that we fallback to CLDR data. QTest::newRow("en-ES") // {2,3,3} - << u"en-ES"_s << u","_s << u"0"_s << 1234 << u"1234"_s; + << u"en-ES"_s << u","_s << u"0"_s + << 1234 << u"1234"_s << 1234.567 << u"1234.567"_s << 3; QTest::newRow("en-ES-grouped") - << u"en-ES"_s << u","_s << u"0"_s << 12345 << u"12,345"_s; + << u"en-ES"_s << u","_s << u"0"_s + << 12345 << u"12,345"_s << 12345.678 << u"12,345.678"_s << 3; QTest::newRow("en-ES-long") - << u"en-ES"_s << u","_s << u"0"_s << 1234567 << u"1,234,567"_s; + << u"en-ES"_s << u","_s << u"0"_s << 1234567 << u"1,234,567"_s + << 1234567.089 << u"1,234,567.089"_s << 3;; QTest::newRow("en-BD") // {1,2,3} - << u"en-BD"_s << u","_s << u"0"_s << 123456789 << u"12,34,56,789"_s; + << u"en-BD"_s << u","_s << u"0"_s + << 123456789 << u"12,34,56,789"_s << 1234567.089 << u"12,34,567.089"_s << 3; + // Filling in the blanks where sys gives a zero: QTest::newRow("en-BT") // {1,2,3} - << u"en-BT"_s << u","_s << u"0"_s << 123456789 << u"12,34,56,789"_s; + << u"en-BT"_s << u","_s << u"0"_s + << 123456789 << u"12,34,56,789"_s << 1.234 << u"1.234"_s << 3; QTest::newRow("en-NP") // {1,2,3} - << u"en-NP"_s << u","_s << u"0"_s << 123456789 << u"12,34,56,789"_s; + << u"en-NP"_s << u","_s << u"0"_s + << 123456789 << u"12,34,56,789"_s << 1.234 << u"1.234"_s << 3; + // Test a locale in which fractional-part and group separators coincide. + // Floating-point handling in this scenario is in general ambiguous. + // When one reading violates grouping rules, use the other: + QTest::newRow("en-MN") // {1,3,3} + << u"en-MN"_s << u"."_s << u"0"_s << 1234 << u"1.234"_s + << 0.003 << u"0.003"_s << 3; // QTBUG-134913 + QTest::newRow("es-MN") // {2,3,3}, + << u"es-MN"_s << u"."_s << u"0"_s << 123456789 << u"123.456.789"_s + << 12345.6789 << u"12.345.6789"_s << 4; // long last group => fractional part + QTest::newRow("es-MN-short") + << u"es-MN"_s << u"."_s << u"0"_s << 1234 << u"1234"_s + << 1.234 << u"1.234"_s << 3; // short first "group" => not a group + QTest::newRow("es-MN-split") + << u"es-MN"_s << u"."_s << u"0"_s << 1234567 << u"1.234.567"_s + << 1234.567 << u"1234.567"_s << 3; // long first "group" => rest is fraction + QTest::newRow("es-MN-whole") + << u"es-MN"_s << u"."_s << u"0"_s << 1234567 << u"1.234.567"_s + << 1234567. << u"1.234.567"_s << 0; // short first group => later group separator + // Test the code's best guesses do match our intentions: + QTest::newRow("es-MN-plain") + << u"es-MN"_s << u"."_s << u"0"_s << 12345 << u"12.345"_s + << 12.345 << u"12.345"_s << 3; // Ambiguous, best guess + QTest::newRow("es-MN-long") + << u"es-MN"_s << u"."_s << u"0"_s << 1234567089 << u"1.234.567.089"_s + << 1234567.089 << u"1.234.567.089"_s << 3; // Ambiguous, best guess + // This last could equally be argued to be whole, based on "The two earlier + // separators were grouping, so read the last one the same way." - // Testing with Chakma locale + // Test handling of surrogates (non-BMP digits) in Chakma (ccp): const char32_t zeroVal = 0x11136; // Chakma zero const QChar data[] = { QChar::highSurrogate(zeroVal), QChar::lowSurrogate(zeroVal), @@ -4683,8 +4735,13 @@ void tst_QLocale::systemGrouping_data() QChar::highSurrogate(zeroVal + 4), QChar::lowSurrogate(zeroVal + 4), QChar::highSurrogate(zeroVal + 5), QChar::lowSurrogate(zeroVal + 5), QChar::highSurrogate(zeroVal + 6), QChar::lowSurrogate(zeroVal + 6), + QChar::highSurrogate(zeroVal + 7), QChar::lowSurrogate(zeroVal + 7), + QChar::highSurrogate(zeroVal + 8), QChar::lowSurrogate(zeroVal + 8), + // QChar::highSurrogate(zeroVal + 9), QChar::lowSurrogate(zeroVal + 9), }; - const QChar separator(QLatin1Char(',')); // Separator for the Chakma locale + const QChar separator(QLatin1Char(',')); // Group separator for the Chakma locale + const QChar fractional(QLatin1Char('.')); // Fractional-part (and group for ccp-MN) + const QString // Copy zero so it persists through QFETCH(), after data falls off the stack. zero = QString(data, 2), @@ -4693,21 +4750,46 @@ void tst_QLocale::systemGrouping_data() three = QString::fromRawData(data + 6, 2), four = QString::fromRawData(data + 8, 2), five = QString::fromRawData(data + 10, 2), - six = QString::fromRawData(data + 12, 2); + six = QString::fromRawData(data + 12, 2), + seven = QString::fromRawData(data + 14, 2), + eight = QString::fromRawData(data + 16, 2); QString fourDigit = one + two + three + four; QString fiveDigit = one + two + separator + three + four + five; // Leading group can have single digit as long as there's a later separator: QString sixDigit = one + separator + two + three + separator + four + five + six; + QString fourFloat = one + fractional + two + three + four; + QString fiveFloat = one + two + fractional + three + four + five; + QString sevenFloat = one + two + three + four + fractional + five + six + seven; + QTest::newRow("Chakma-short") // {2,2,3} - << u"ccp"_s << QString(separator) - << zero << 1234 << fourDigit; + << u"ccp"_s << QString(separator) << zero + << 1234 << fourDigit << 1.234 << fourFloat << 3; QTest::newRow("Chakma") - << u"ccp"_s << QString(separator) - << zero << 12345 << fiveDigit; + << u"ccp"_s << QString(separator) << zero + << 12345 << fiveDigit << 12.345 << fiveFloat << 3; QTest::newRow("Chakma-long") << u"ccp"_s << QString(separator) << zero - << 123456 << sixDigit; + << 123456 << sixDigit << 1234.567 << sevenFloat << 3; + + // Floating-point forms for ccp-MN, whose group separator is the fractional-part separator: + // Leading "group" of four means too short to group, so rest is fractional part: + QTest::newRow("ccp-MN-short") + << u"ccp-MN"_s << QString(fractional) << zero + << 1234 << fourDigit << 1234.567 << sevenFloat << 3; + // Penultimate group of three implies final group must be fractional part: + QString groupFloat = one + two + fractional + three + four + five + + fractional + six + seven + eight; + QTest::newRow("ccp-MN") + << u"ccp-MN"_s << QString(fractional) << zero + << 12345 << fiveFloat << 12345.678 << groupFloat << 3; + + // Penultimate group of two implies rest must be grouping within the whole part: + QString eightDigit = one + fractional + two + three + fractional + four + five + + fractional + six + seven + eight; + QTest::newRow("ccp-MN-long") + << u"ccp-MN"_s << QString(fractional) << zero + << 12345678 << eightDigit << 12345678. << eightDigit << 0; } void tst_QLocale::systemGrouping() @@ -4715,8 +4797,11 @@ void tst_QLocale::systemGrouping() QFETCH(QString, name); QFETCH(QString, separator); QFETCH(QString, zeroDigit); - QFETCH(int, number); - QFETCH(QString, formattedString); + QFETCH(int, whole); + QFETCH(QString, formattedWhole); + QFETCH(double, real); + QFETCH(QString, formattedReal); + QFETCH(int, precision); { MySystemLocale sLocale(name); @@ -4724,11 +4809,16 @@ void tst_QLocale::systemGrouping() QCOMPARE(sys.groupSeparator(), separator); QCOMPARE(sys.zeroDigit(), zeroDigit); - QCOMPARE(sys.toString(number), formattedString); + QCOMPARE(sys.toString(whole), formattedWhole); bool ok; - int count = sys.toInt(formattedString, &ok); + int count = sys.toInt(formattedWhole, &ok); QVERIFY2(ok, "Integer didn't round-trip"); - QCOMPARE(count, number); + QCOMPARE(count, whole); + + QCOMPARE(sys.toString(real, 'f', precision), formattedReal); + double apparent = sys.toDouble(formattedReal, &ok); + QVERIFY2(ok, "Floating-precision didn't round-trip"); + QCOMPARE(apparent, real); } }