From 54c2d6ded779c3b9843fa18535b79ababa0d1a74 Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Fri, 29 Nov 2024 17:01:15 +0100 Subject: [PATCH] Fix TZ backend to include CLDR-known zones by territory As previously noted, this backend uses the legacy zone.tab file, which only gives one territory for each entry, where zone1970.tab gives several. Until we switch to using the latter, we thus don't know all the territories in which an IANA ID is used. However, we do have such data from CLDR and deploy it in the default backend to filter the list of all available IDs. As a result, the TZ backend lacks some results of filtering by territory that the base-class would get for it, because the CLDR data associates more territories with some IDs. It implements the overload because its lookups in a hash are potentially more efficient than generating its full list in order to iterate it while intersecting with a CLDR-derived list of candidates. Leverage that hash also to do efficient intersecting with the CLDR-derived list, which lets us avoid duplicates almost for free, to make the subsequent union with the directly-derived entries from zone.tab efficient. This incidentally resolves a TODO about handling of QLocale::AnyTerritory. In the process, add more notes to the zone{1970,}.tab details. Restructure the base-class implementations of filtered available IDs to separate their generation of a CLDR-derived list of candidates from the intersection of that with the available IDs. For the territory overload, this lets the TZ backend share the CLDR-derived list; for symmetry, do the same to the offset-filter, too. (This also makes each function responsible for fewer things.) Similar treatment may also be needed for the ICU and chrono::tzdb backends, since the former also overloads the territory filter and both overload the offset filter. However, leave those for now (with comments added on the possibility) until we have evidence they actually do need this. Pick-to: 6.8 Task-number: QTBUG-130877 Task-number: QTBUG-64941 Change-Id: Ibb347df88dd14b55f8f580bb4c9e37e5c56a533a Reviewed-by: Mate Barany --- src/corelib/time/qtimezoneprivate.cpp | 18 ++++++++--- src/corelib/time/qtimezoneprivate_chrono.cpp | 3 ++ src/corelib/time/qtimezoneprivate_icu.cpp | 4 +++ src/corelib/time/qtimezoneprivate_p.h | 5 ++++ src/corelib/time/qtimezoneprivate_tz.cpp | 30 +++++++++++++++++-- .../corelib/time/qtimezone/tst_qtimezone.cpp | 5 ++++ 6 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/corelib/time/qtimezoneprivate.cpp b/src/corelib/time/qtimezoneprivate.cpp index a258b847d9a..7caabd609ff 100644 --- a/src/corelib/time/qtimezoneprivate.cpp +++ b/src/corelib/time/qtimezoneprivate.cpp @@ -622,7 +622,7 @@ static QList selectAvailable(QList &&desired, return result; } -QList QTimeZonePrivate::availableTimeZoneIds(QLocale::Territory territory) const +QList QTimeZonePrivate::matchingTimeZoneIds(QLocale::Territory territory) const { // Default fall-back mode: use the CLDR data to find zones for this territory. QList regions; @@ -643,10 +643,15 @@ QList QTimeZonePrivate::availableTimeZoneIds(QLocale::Territory terr } } } - return selectAvailable(std::move(regions), availableTimeZoneIds()); + return regions; } -QList QTimeZonePrivate::availableTimeZoneIds(int offsetFromUtc) const +QList QTimeZonePrivate::availableTimeZoneIds(QLocale::Territory territory) const +{ + return selectAvailable(matchingTimeZoneIds(territory), availableTimeZoneIds()); +} + +QList QTimeZonePrivate::matchingTimeZoneIds(int offsetFromUtc) const { // Default fall-back mode: use the zoneTable to find offsets of know zones. QList offsets; @@ -661,7 +666,12 @@ QList QTimeZonePrivate::availableTimeZoneIds(int offsetFromUtc) cons } } } - return selectAvailable(std::move(offsets), availableTimeZoneIds()); + return offsets; +} + +QList QTimeZonePrivate::availableTimeZoneIds(int offsetFromUtc) const +{ + return selectAvailable(matchingTimeZoneIds(offsetFromUtc), availableTimeZoneIds()); } #ifndef QT_NO_DATASTREAM diff --git a/src/corelib/time/qtimezoneprivate_chrono.cpp b/src/corelib/time/qtimezoneprivate_chrono.cpp index 2e0b0ffb9e5..80212339530 100644 --- a/src/corelib/time/qtimezoneprivate_chrono.cpp +++ b/src/corelib/time/qtimezoneprivate_chrono.cpp @@ -61,6 +61,9 @@ static const std::vector getAllZoneIds(std::optional offs // Each is sorted, so we can unite them; and the result shall be sorted. all.reserve(raw.size() + links.size()); std::set_union(raw.begin(), raw.end(), links.begin(), links.end(), std::back_inserter(all)); + // We could in principle merge in also such of matchingTimeZoneIds(offset) + // as are known zones - our CLDR data says those also have this offset - but + // hopefully that's redundant. return all; } diff --git a/src/corelib/time/qtimezoneprivate_icu.cpp b/src/corelib/time/qtimezoneprivate_icu.cpp index 30e6b17a7cb..ca456ee117e 100644 --- a/src/corelib/time/qtimezoneprivate_icu.cpp +++ b/src/corelib/time/qtimezoneprivate_icu.cpp @@ -426,6 +426,8 @@ QList QIcuTimeZonePrivate::availableTimeZoneIds(QLocale::Territory t if (U_SUCCESS(status)) result = uenumToIdList(uenum); uenum_close(uenum); + // We could merge in what matchingTimeZoneIds(territory) gives us, but + // hopefully that's redundant, as ICU packages CLDR. return result; } @@ -440,6 +442,8 @@ QList QIcuTimeZonePrivate::availableTimeZoneIds(int offsetFromUtc) c if (U_SUCCESS(status)) result = uenumToIdList(uenum); uenum_close(uenum); + // We could merge in what matchingTimeZoneIds(offsetFromUtc) gives us, but + // hopefully that's redundant, as ICU packages CLDR. return result; #else return QTimeZonePrivate::availableTimeZoneIds(offsetFromUtc); diff --git a/src/corelib/time/qtimezoneprivate_p.h b/src/corelib/time/qtimezoneprivate_p.h index 1e0c7f28e2e..d240beb2efa 100644 --- a/src/corelib/time/qtimezoneprivate_p.h +++ b/src/corelib/time/qtimezoneprivate_p.h @@ -162,6 +162,11 @@ public: } protected: + // Zones CLDR data says match a condition. + // Use to filter what the backend has available. + QList matchingTimeZoneIds(QLocale::Territory territory) const; + QList matchingTimeZoneIds(int utcOffset) const; + #if QT_CONFIG(timezone_locale) // Defined in qtimezonelocale.cpp QString localeName(qint64 atMSecsSinceEpoch, int offsetFromUtc, diff --git a/src/corelib/time/qtimezoneprivate_tz.cpp b/src/corelib/time/qtimezoneprivate_tz.cpp index 4a238110e9d..ead40f923e7 100644 --- a/src/corelib/time/qtimezoneprivate_tz.cpp +++ b/src/corelib/time/qtimezoneprivate_tz.cpp @@ -41,6 +41,7 @@ using namespace Qt::StringLiterals; */ struct QTzTimeZone { + // TODO: for zone1970.tab we'll need a set of territories: QLocale::Territory territory = QLocale::AnyTerritory; QByteArray comment; }; @@ -80,7 +81,7 @@ static bool openZoneInfo(const QString &name, QFile *file) // Parse zone.tab table for territory information, read directories to ensure we // find all installed zones (many are omitted from zone.tab; even more from -// zone1970.tab). +// zone1970.tab; see also QTBUG-64941). static QTzTimeZoneHash loadTzTimeZones() { QFile tzif; @@ -1250,13 +1251,38 @@ QList QTzTimeZonePrivate::availableTimeZoneIds() const QList QTzTimeZonePrivate::availableTimeZoneIds(QLocale::Territory territory) const { - // TODO AnyTerritory QList result; for (auto it = tzZones->cbegin(), end = tzZones->cend(); it != end; ++it) { if (it.value().territory == territory) result << it.key(); } std::sort(result.begin(), result.end()); + + // Since zone.tab only knows about one territory per zone, and is somewhat + // incomplete, we may well miss some zones that CLDR associates with the + // territory. So merge with those from CLDR that we do support. + const auto unWantedZone = [territory](QByteArrayView id) { + // We only want to add zones if they are known and we don't already have them: + auto it = tzZones->constFind(id); + return it == tzZones->end() || it->territory == territory; + }; + QList cldrViews = matchingTimeZoneIds(territory); + std::sort(cldrViews.begin(), cldrViews.end()); + const auto uniqueEnd = std::unique(cldrViews.begin(), cldrViews.end()); + const auto prunedEnd = std::remove_if(cldrViews.begin(), uniqueEnd, unWantedZone); + const auto cldrSize = std::distance(cldrViews.begin(), prunedEnd); + if (cldrSize) { + QList cldrList; + cldrList.reserve(cldrSize); + for (auto it = cldrViews.begin(); it != prunedEnd; ++it) + cldrList.emplace_back(it->toByteArray()); + QList joined; + joined.reserve(result.size() + cldrSize); + std::set_union(result.begin(), result.end(), cldrList.begin(), cldrList.end(), + std::back_inserter(joined)); + result = joined; + } + return result; } diff --git a/tests/auto/corelib/time/qtimezone/tst_qtimezone.cpp b/tests/auto/corelib/time/qtimezone/tst_qtimezone.cpp index 18433a391ac..bcb8e20b5f9 100644 --- a/tests/auto/corelib/time/qtimezone/tst_qtimezone.cpp +++ b/tests/auto/corelib/time/qtimezone/tst_qtimezone.cpp @@ -983,6 +983,11 @@ void tst_QTimeZone::availableTimeZoneIds() QCOMPARE_LT(list001.size(), listAll.size()); QCOMPARE_LT(listUsa.size(), listAll.size()); QCOMPARE_LT(listGmt.size(), listAll.size()); + // And we do know CLDR data supplies some entries to each: + QCOMPARE_GT(listAll.size(), 0); + QCOMPARE_GT(list001.size(), 0); + QCOMPARE_GT(listUsa.size(), 0); + QCOMPARE_GT(listGmt.size(), 0); } }