Replace several *_idx variables with an enum and a bool

Thanks to Mate Barany for pointing out that the *_idx variables are
now all reduced to serving (via comparison with -1) as
booleans. Replace them with a "where in the parse are we ?" enum
variable and a "do we need some digits ?" bool.

Restructure to make that work. This needed to recognize (the letters
that make up) Inf and NaN so as to know when not to object to a lack
of digits. Since NumericTokenizer already has a matcher for those,
export its access to that as a public method and use that to simplify
its own uses of the matcher. Added some test-cases suggested by this,
that actually pass in the parent, as qt_asciiToDouble() could see the
result was malformed in any case, even when they weren't caught by bad
digit-group sizes (as "inf,000", "1,inf", "NaN,000" and "1,NaN" were).

Pick-to: 6.8 6.5
Task-number: QTBUG-134913
Change-Id: Ie1730530a4e34a76ba1836a25212e4ec27dad15f
Reviewed-by: Mate Barany <mate.barany@qt.io>
(cherry picked from commit d1879c3a399091693e690f970a113510e705d4d3)
Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
Edward Welbourne 2025-03-24 18:02:20 +01:00 committed by Qt Cherry-pick Bot
parent 8f955142ed
commit d94d5bcd61
2 changed files with 59 additions and 36 deletions

View File

@ -4334,6 +4334,7 @@ public:
bool done() const { return !(m_index < m_text.size()); }
qsizetype index() const { return m_index; }
inline int asBmpDigit(char16_t digit) const;
inline bool isInfNanChar(char ch) const { return matchInfNaN.matches(ch); }
char nextToken();
};
@ -4373,10 +4374,8 @@ char NumericTokenizer::nextToken()
if (Q_LIKELY(isAsciiDigit(ascii) || ('+' <= ascii && ascii <= lastMark)
// No caller presently (6.5) passes DoubleStandardMode,
// so !IntegerMode implies scientific, for now.
|| (m_mode != QLocaleData::IntegerMode
&& matchInfNaN.matches(ascii))
|| (m_mode == QLocaleData::DoubleScientificMode
&& ascii == 'e'))) {
|| (m_mode != QLocaleData::IntegerMode && isInfNanChar(ascii))
|| (m_mode == QLocaleData::DoubleScientificMode && ascii == 'e'))) {
return ascii;
}
}
@ -4387,7 +4386,7 @@ char NumericTokenizer::nextToken()
char ascii = asciiLower(ch.toLatin1());
if (isAsciiDigit(ascii) || ascii == '-' || ascii == '+'
// Also its Inf and NaN letters:
|| (m_mode != QLocaleData::IntegerMode && matchInfNaN.matches(ascii))) {
|| (m_mode != QLocaleData::IntegerMode && isInfNanChar(ascii))) {
++m_index;
return ascii;
}
@ -4497,10 +4496,17 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
return false;
NumericTokenizer tokens(s, numericData(mode), mode);
// Reflects order constraints on possible parts of a number:
enum { Whole, Grouped, Fraction, Exponent, Name } stage = Whole;
// Grouped is just Whole with some digit-grouping separators in it.
// Name is Inf or NaN; excludes all others (so none can be after it).
// Fractional part *or* whole-number part can be empty, but not both, unless
// we have Name. Exponent must have some digits in it.
bool wantDigits = true;
// Digit-grouping details (all modes):
qsizetype digitsInGroup = 0;
qsizetype last_separator_idx = -1;
qsizetype start_of_digits_idx = -1;
const QLocaleData::GroupSizes grouping = groupSizes();
const auto badLeastGroup = [&]() {
// In principle we could object to a complete absence of grouping, when
@ -4508,7 +4514,7 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
// locale itself would omit them. However, when merely not rejecting
// grouping separators, we have historically accepted ungrouped digits,
// so objecting now would break existing code.
if (last_separator_idx != -1) {
if (stage == Grouped) {
Q_ASSERT(!number_options.testFlag(QLocale::RejectGroupSeparator));
// Were there enough digits since the last group separator?
if (digitsInGroup != grouping.least)
@ -4517,75 +4523,79 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
return false;
};
// Floating-point details (non-integer modes):
qsizetype decpt_idx = -1;
qsizetype exponent_idx = -1;
char last = '\0';
while (!tokens.done()) {
qsizetype idx = tokens.index(); // before nextToken() advances
char out = tokens.nextToken();
if (out == 0)
return false;
Q_ASSERT(tokens.index() > idx); // it always *should* advance (except on zero return)
// Note that out can only be '.', 'e' or an inf/NaN character if the
// mode allows it (else nextToken() would return 0 instead), so we don't
// need to check mode.
if (out == '.') {
// Fail if more than one decimal point or point after e
if (decpt_idx != -1 || exponent_idx != -1)
if (stage > Grouped) // Too late to start a fractional part.
return false;
decpt_idx = idx;
// That's the end of the integral part - check size of last group:
if (badLeastGroup())
return false;
last_separator_idx = -1; // Process no separators after this
stage = Fraction;
} else if (out == 'e') {
exponent_idx = idx;
if (decpt_idx == -1) {
if (stage == Name)
return false;
if (stage < Fraction) {
// The 'e' ends the whole-number part, so check its last group:
if (badLeastGroup())
return false;
last_separator_idx = -1; // Process no separators after this
} else if (number_options.testFlag(QLocale::RejectTrailingZeroesAfterDot)) {
// In a fractional part, a 0 just before the exponent is trailing:
if (last == '0')
return false;
}
stage = Exponent;
wantDigits = true; // We need some in the exponent
} else if (out == ',') {
if (number_options.testFlag(QLocale::RejectGroupSeparator))
return false;
// Don't allow group chars after the decimal point or exponent
if (decpt_idx != -1 || exponent_idx != -1)
return false;
if (last_separator_idx == -1) {
switch (stage) {
case Whole:
// Check size of most significant group
if (start_of_digits_idx == -1 || grouping.first > digitsInGroup
if (grouping.first > digitsInGroup
|| digitsInGroup >= grouping.least + grouping.first) {
return false;
}
} else {
stage = Grouped;
break;
case Grouped:
// Check size of group between two separators:
if (digitsInGroup != grouping.higher)
return false;
break;
// Only allow group chars within the whole-number part:
case Fraction:
case Exponent:
case Name:
return false;
}
last_separator_idx = idx;
digitsInGroup = 0;
} else if (isAsciiDigit(out)) {
if (stage == Name)
return false;
if (out == '0' && number_options.testFlag(QLocale::RejectLeadingZeroInExponent)
&& exponent_idx != -1 && !tokens.done() && !isAsciiDigit(last)) {
&& stage > Fraction && !tokens.done() && !isAsciiDigit(last)) {
// After the exponent there can only be '+', '-' or digits. If
// we find a '0' directly after some non-digit, then that is a
// leading zero, acceptable only if it is the whole exponent.
return false;
}
if (start_of_digits_idx == -1)
start_of_digits_idx = idx;
wantDigits = false;
++digitsInGroup;
} else if (stage == Whole && tokens.isInfNanChar(out)) {
if (!wantDigits) // Mixed digits with Inf/NaN
return false;
wantDigits = false;
stage = Name;
}
// else: nothing special to do.
@ -4593,15 +4603,16 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
if (out != ',') // Leave group separators out of the result.
result->append(out);
}
if (wantDigits)
return false;
if (!number_options.testFlag(QLocale::RejectGroupSeparator)) {
// If this is the end of the whole-part, check least significant group:
if (decpt_idx == -1 && exponent_idx == -1 && badLeastGroup())
if (stage < Fraction && badLeastGroup())
return false;
}
if (number_options.testFlag(QLocale::RejectTrailingZeroesAfterDot)
&& decpt_idx != -1 && exponent_idx == -1) {
if (number_options.testFlag(QLocale::RejectTrailingZeroesAfterDot) && stage == Fraction) {
// In the fractional part, a final zero is trailing:
if (last == '0')
return false;

View File

@ -1026,6 +1026,18 @@ void tst_QLocale::stringToDouble_data()
if (std::numeric_limits<double>::has_quiet_NaN)
QTest::newRow("C qnan") << QString("C") << QString("NaN") << true << std::numeric_limits<double>::quiet_NaN();
// Malformed
QTest::newRow("infe10") << QString("C") << QString("infe10") << false << 0.;
QTest::newRow("inf.10") << QString("C") << QString("inf.10") << false << 0.;
QTest::newRow("i1n0f") << QString("C") << QString("i1n0f") << false << 0.;
QTest::newRow("inf,000") << QString("en_US") << QString("inf,000") << false << 0.;
QTest::newRow("1,inf") << QString("en_US") << QString("1,inf") << false << 0.;
QTest::newRow("NaNe10") << QString("C") << QString("NaNe10") << false << 0.;
QTest::newRow("NaN.10") << QString("C") << QString("NaN.10") << false << 0.;
QTest::newRow("N1a0N") << QString("C") << QString("N1a0N") << false << 0.;
QTest::newRow("NaN,000") << QString("en_US") << QString("NaN,000") << false << 0.;
QTest::newRow("1,NaN") << QString("en_US") << QString("1,NaN") << false << 0.;
// In range (but outside float's range):
QTest::newRow("C big") << QString("C") << QString("3.5e38") << true << 3.5e38;
QTest::newRow("C -big") << QString("C") << QString("-3.5e38") << true << -3.5e38;