Fix TZ backend to include CLDR-known zones by territory

As previously noted, this backend uses the legacy zone.tab file, which
only gives one territory for each entry, where zone1970.tab gives
several. Until we switch to using the latter, we thus don't know all
the territories in which an IANA ID is used. However, we do have such
data from CLDR and deploy it in the default backend to filter the list
of all available IDs. As a result, the TZ backend lacks some results
of filtering by territory that the base-class would get for it,
because the CLDR data associates more territories with some IDs.

It implements the overload because its lookups in a hash are
potentially more efficient than generating its full list in order to
iterate it while intersecting with a CLDR-derived list of candidates.
Leverage that hash also to do efficient intersecting with the
CLDR-derived list, which lets us avoid duplicates almost for free, to
make the subsequent union with the directly-derived entries from
zone.tab efficient. This incidentally resolves a TODO about handling
of QLocale::AnyTerritory. In the process, add more notes to the
zone{1970,}.tab details.

Restructure the base-class implementations of filtered available IDs
to separate their generation of a CLDR-derived list of candidates from
the intersection of that with the available IDs. For the territory
overload, this lets the TZ backend share the CLDR-derived list; for
symmetry, do the same to the offset-filter, too. (This also makes each
function responsible for fewer things.) Similar treatment may also be
needed for the ICU and chrono::tzdb backends, since the former also
overloads the territory filter and both overload the offset filter.
However, leave those for now (with comments added on the possibility)
until we have evidence they actually do need this.

Pick-to: 6.8
Task-number: QTBUG-130877
Task-number: QTBUG-64941
Change-Id: Ibb347df88dd14b55f8f580bb4c9e37e5c56a533a
Reviewed-by: Mate Barany <mate.barany@qt.io>
This commit is contained in:
Edward Welbourne 2024-11-29 17:01:15 +01:00
parent d9ad2251d9
commit 54c2d6ded7
6 changed files with 59 additions and 6 deletions

View File

@ -622,7 +622,7 @@ static QList<QByteArray> selectAvailable(QList<QByteArrayView> &&desired,
return result;
}
QList<QByteArray> QTimeZonePrivate::availableTimeZoneIds(QLocale::Territory territory) const
QList<QByteArrayView> QTimeZonePrivate::matchingTimeZoneIds(QLocale::Territory territory) const
{
// Default fall-back mode: use the CLDR data to find zones for this territory.
QList<QByteArrayView> regions;
@ -643,10 +643,15 @@ QList<QByteArray> QTimeZonePrivate::availableTimeZoneIds(QLocale::Territory terr
}
}
}
return selectAvailable(std::move(regions), availableTimeZoneIds());
return regions;
}
QList<QByteArray> QTimeZonePrivate::availableTimeZoneIds(int offsetFromUtc) const
QList<QByteArray> QTimeZonePrivate::availableTimeZoneIds(QLocale::Territory territory) const
{
return selectAvailable(matchingTimeZoneIds(territory), availableTimeZoneIds());
}
QList<QByteArrayView> QTimeZonePrivate::matchingTimeZoneIds(int offsetFromUtc) const
{
// Default fall-back mode: use the zoneTable to find offsets of know zones.
QList<QByteArrayView> offsets;
@ -661,7 +666,12 @@ QList<QByteArray> QTimeZonePrivate::availableTimeZoneIds(int offsetFromUtc) cons
}
}
}
return selectAvailable(std::move(offsets), availableTimeZoneIds());
return offsets;
}
QList<QByteArray> QTimeZonePrivate::availableTimeZoneIds(int offsetFromUtc) const
{
return selectAvailable(matchingTimeZoneIds(offsetFromUtc), availableTimeZoneIds());
}
#ifndef QT_NO_DATASTREAM

View File

@ -61,6 +61,9 @@ static const std::vector<std::string_view> getAllZoneIds(std::optional<int> offs
// Each is sorted, so we can unite them; and the result shall be sorted.
all.reserve(raw.size() + links.size());
std::set_union(raw.begin(), raw.end(), links.begin(), links.end(), std::back_inserter(all));
// We could in principle merge in also such of matchingTimeZoneIds(offset)
// as are known zones - our CLDR data says those also have this offset - but
// hopefully that's redundant.
return all;
}

View File

@ -426,6 +426,8 @@ QList<QByteArray> QIcuTimeZonePrivate::availableTimeZoneIds(QLocale::Territory t
if (U_SUCCESS(status))
result = uenumToIdList(uenum);
uenum_close(uenum);
// We could merge in what matchingTimeZoneIds(territory) gives us, but
// hopefully that's redundant, as ICU packages CLDR.
return result;
}
@ -440,6 +442,8 @@ QList<QByteArray> QIcuTimeZonePrivate::availableTimeZoneIds(int offsetFromUtc) c
if (U_SUCCESS(status))
result = uenumToIdList(uenum);
uenum_close(uenum);
// We could merge in what matchingTimeZoneIds(offsetFromUtc) gives us, but
// hopefully that's redundant, as ICU packages CLDR.
return result;
#else
return QTimeZonePrivate::availableTimeZoneIds(offsetFromUtc);

View File

@ -162,6 +162,11 @@ public:
}
protected:
// Zones CLDR data says match a condition.
// Use to filter what the backend has available.
QList<QByteArrayView> matchingTimeZoneIds(QLocale::Territory territory) const;
QList<QByteArrayView> matchingTimeZoneIds(int utcOffset) const;
#if QT_CONFIG(timezone_locale)
// Defined in qtimezonelocale.cpp
QString localeName(qint64 atMSecsSinceEpoch, int offsetFromUtc,

View File

@ -41,6 +41,7 @@ using namespace Qt::StringLiterals;
*/
struct QTzTimeZone {
// TODO: for zone1970.tab we'll need a set of territories:
QLocale::Territory territory = QLocale::AnyTerritory;
QByteArray comment;
};
@ -80,7 +81,7 @@ static bool openZoneInfo(const QString &name, QFile *file)
// Parse zone.tab table for territory information, read directories to ensure we
// find all installed zones (many are omitted from zone.tab; even more from
// zone1970.tab).
// zone1970.tab; see also QTBUG-64941).
static QTzTimeZoneHash loadTzTimeZones()
{
QFile tzif;
@ -1250,13 +1251,38 @@ QList<QByteArray> QTzTimeZonePrivate::availableTimeZoneIds() const
QList<QByteArray> QTzTimeZonePrivate::availableTimeZoneIds(QLocale::Territory territory) const
{
// TODO AnyTerritory
QList<QByteArray> result;
for (auto it = tzZones->cbegin(), end = tzZones->cend(); it != end; ++it) {
if (it.value().territory == territory)
result << it.key();
}
std::sort(result.begin(), result.end());
// Since zone.tab only knows about one territory per zone, and is somewhat
// incomplete, we may well miss some zones that CLDR associates with the
// territory. So merge with those from CLDR that we do support.
const auto unWantedZone = [territory](QByteArrayView id) {
// We only want to add zones if they are known and we don't already have them:
auto it = tzZones->constFind(id);
return it == tzZones->end() || it->territory == territory;
};
QList<QByteArrayView> cldrViews = matchingTimeZoneIds(territory);
std::sort(cldrViews.begin(), cldrViews.end());
const auto uniqueEnd = std::unique(cldrViews.begin(), cldrViews.end());
const auto prunedEnd = std::remove_if(cldrViews.begin(), uniqueEnd, unWantedZone);
const auto cldrSize = std::distance(cldrViews.begin(), prunedEnd);
if (cldrSize) {
QList<QByteArray> cldrList;
cldrList.reserve(cldrSize);
for (auto it = cldrViews.begin(); it != prunedEnd; ++it)
cldrList.emplace_back(it->toByteArray());
QList<QByteArray> joined;
joined.reserve(result.size() + cldrSize);
std::set_union(result.begin(), result.end(), cldrList.begin(), cldrList.end(),
std::back_inserter(joined));
result = joined;
}
return result;
}

View File

@ -983,6 +983,11 @@ void tst_QTimeZone::availableTimeZoneIds()
QCOMPARE_LT(list001.size(), listAll.size());
QCOMPARE_LT(listUsa.size(), listAll.size());
QCOMPARE_LT(listGmt.size(), listAll.size());
// And we do know CLDR data supplies some entries to each:
QCOMPARE_GT(listAll.size(), 0);
QCOMPARE_GT(list001.size(), 0);
QCOMPARE_GT(listUsa.size(), 0);
QCOMPARE_GT(listGmt.size(), 0);
}
}