Long live QStringTokenizer!
This class is designed as C++20-style generator / lazy sequence, and the new return value of QString{,View}::tokenize(). It thus is more similar to a hand-coded loop around indexOf() than QString::split(), which returns a container (the filling of which allocates memory). The template arguments of QStringTokenizer intricately depend on the arguments with which it is constructed, so QStringTokenizer cannot be used directly without C++17 CTAD. To work around this issue, add a factory function, qTokenize(). LATER: - ~Optimize QLatin1String needles (avoid repeated L1->UTF16 conversion)~ (out of scope for QStringTokenizer, should be solved in the respective indexOf()) - Keep per-instantiation state: * Boyer-Moore table [ChangeLog][QtCore][QStringTokenizer] New class. [ChangeLog][QtCore][qTokenize] New function. Change-Id: I7a7a02e9175cdd3887778f29f2f91933329be759 Reviewed-by: Lars Knoll <lars.knoll@qt.io> Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
parent
1b33ee95e5
commit
6a3c6f939f
357
src/corelib/text/qstringtokenizer.cpp
Normal file
357
src/corelib/text/qstringtokenizer.cpp
Normal file
@ -0,0 +1,357 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see https://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at https://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 3 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.LGPL3 included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU Lesser General Public License version 3 requirements
|
||||
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 2.0 or (at your option) the GNU General
|
||||
** Public license version 3 or any later version approved by the KDE Free
|
||||
** Qt Foundation. The licenses are as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
|
||||
** included in the packaging of this file. Please review the following
|
||||
** information to ensure the GNU General Public License requirements will
|
||||
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
|
||||
** https://www.gnu.org/licenses/gpl-3.0.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include "qstringtokenizer.h"
|
||||
#include "qstringalgorithms.h"
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
/*!
|
||||
\class QStringTokenizer
|
||||
\inmodule QtCore
|
||||
\since 6.0
|
||||
\brief The QStringTokenizer class splits strings into tokens along given separators
|
||||
\reentrant
|
||||
\ingroup tools
|
||||
\ingroup string-processing
|
||||
|
||||
Splits a string into substrings wherever a given separator occurs,
|
||||
and returns a (lazy) list of those strings. If the separator does
|
||||
not match anywhere in the string, produces a single-element
|
||||
containing this string. If the separator is empty,
|
||||
QStringTokenizer produces an empty string, followed by each of the
|
||||
string's characters, followed by another empty string. The two
|
||||
enumerations Qt::SplitBehavior and Qt::CaseSensitivity further
|
||||
control the output.
|
||||
|
||||
QStringTokenizer drives QStringView::tokenize(), but, at least with a
|
||||
recent compiler, you can use it directly, too:
|
||||
|
||||
\code
|
||||
for (auto it : QStringTokenizer{string, separator})
|
||||
use(*it);
|
||||
\endcode
|
||||
|
||||
\note You should never, ever, name the template arguments of a
|
||||
QStringTokenizer explicitly. If you can use C++17 Class Template
|
||||
Argument Deduction (CTAD), you may write
|
||||
\c{QStringTokenizer{string, separator}} (without template
|
||||
arguments). If you can't use C++17 CTAD, you must use the
|
||||
QStringView::split() or QLatin1String::split() member functions
|
||||
and store the return value only in \c{auto} variables:
|
||||
|
||||
\code
|
||||
auto result = string.split(sep);
|
||||
\endcode
|
||||
|
||||
This is because the template arguments of QStringTokenizer have a
|
||||
very subtle dependency on the specific string and separator types
|
||||
from with which they are constructed, and they don't usually
|
||||
correspond to the actual types passed.
|
||||
|
||||
\section Lazy Sequences
|
||||
|
||||
QStringTokenizer acts as a so-called lazy sequence, that is, each
|
||||
next element is only computed once you ask for it. Lazy sequences
|
||||
have the advantage that they only require O(1) memory. They have
|
||||
the disadvantage that, at least for QStringTokenizer, they only
|
||||
allow forward, not random-access, iteration.
|
||||
|
||||
The intended use-case is that you just plug it into a ranged for loop:
|
||||
|
||||
\code
|
||||
for (auto it : QStringTokenizer{string, separator})
|
||||
use(*it);
|
||||
\endcode
|
||||
|
||||
or a C++20 ranged algorithm:
|
||||
|
||||
\code
|
||||
std::ranges::for_each(QStringTokenizer{string, separator},
|
||||
[] (auto token) { use(token); });
|
||||
\endcode
|
||||
|
||||
\section End Sentinel
|
||||
|
||||
The QStringTokenizer iterators cannot be used with classical STL
|
||||
algorithms, because those require iterator/iterator pairs, while
|
||||
QStringTokenizer uses sentinels, that is, it uses a different
|
||||
type, QStringTokenizer::sentinel, to mark the end of the
|
||||
range. This improves performance, because the sentinel is an empty
|
||||
type. Sentinels are supported from C++17 (for ranged for)
|
||||
and C++20 (for algorithms using the new ranges library).
|
||||
|
||||
\section Temporaries
|
||||
|
||||
QStringTokenizer is very carefully designed to avoid dangling
|
||||
references. If you construct a tokenizer from a temporary string
|
||||
(an rvalue), that argument is stored internally, so the referenced
|
||||
data isn't deleted before it is tokenized:
|
||||
|
||||
\code
|
||||
auto tok = QStringTokenizer{widget.text(), u','};
|
||||
// return value of `widget.text()` is destroyed, but content was moved into `tok`
|
||||
for (auto e : tok)
|
||||
use(e);
|
||||
\endcode
|
||||
|
||||
If you pass named objects (lvalues), then QStringTokenizer does
|
||||
not store a copy. You are reponsible to keep the named object's
|
||||
data around for longer than the tokenizer operates on it:
|
||||
|
||||
\code
|
||||
auto text = widget.text();
|
||||
auto tok = QStringTokenizer{text, u','};
|
||||
text.clear(); // destroy content of `text`
|
||||
for (auto e : tok) // ERROR: `tok` references deleted data!
|
||||
use(e);
|
||||
\endcode
|
||||
|
||||
\sa QStringView::split(), QLatin1Sting::split(), Qt::SplitBehavior, Qt::CaseSensitivity
|
||||
*/
|
||||
|
||||
/*!
|
||||
\typedef QStringTokenizer::value_type
|
||||
|
||||
Alias for \c{const QStringView} or \c{const QLatin1String},
|
||||
depending on the tokenizer's \c Haystack template argument.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\typedef QStringTokenizer::difference_type
|
||||
|
||||
Alias for qsizetype.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\typedef QStringTokenizer::size_type
|
||||
|
||||
Alias for qsizetype.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\typedef QStringTokenizer::reference
|
||||
|
||||
Alias for \c{value_type &}.
|
||||
|
||||
QStringTokenizer does not support mutable references, so this is
|
||||
the same as const_reference.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\typedef QStringTokenizer::const_reference
|
||||
|
||||
Alias for \c{value_type &}.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\typedef QStringTokenizer::pointer
|
||||
|
||||
Alias for \c{value_type *}.
|
||||
|
||||
QStringTokenizer does not support mutable iterators, so this is
|
||||
the same as const_pointer.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\typedef QStringTokenizer::const_pointer
|
||||
|
||||
Alias for \c{value_type *}.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\typedef QStringTokenizer::iterator
|
||||
|
||||
This typedef provides an STL-style const iterator for
|
||||
QStringTokenizer.
|
||||
|
||||
QStringTokenizer does not support mutable iterators, so this is
|
||||
the same as const_iterator.
|
||||
|
||||
\sa const_iterator
|
||||
*/
|
||||
|
||||
/*!
|
||||
\typedef QStringTokenizer::const_iterator
|
||||
|
||||
This typedef provides an STL-style const iterator for
|
||||
QStringTokenizer.
|
||||
|
||||
\sa iterator
|
||||
*/
|
||||
|
||||
/*!
|
||||
\typedef QStringTokenizer::sentinel
|
||||
|
||||
This typedef provides an STL-style sentinel for
|
||||
QStringTokenizer::iterator and QStringTokenizer::const_iterator.
|
||||
|
||||
\sa const_iterator
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QStringTokenizer(Haystack haystack, String needle, Qt::CaseSensitivity cs, Qt::SplitBehavior sb)
|
||||
\fn QStringTokenizer(Haystack haystack, String needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs)
|
||||
|
||||
Constructs a string tokenizer that splits the string \a haystack
|
||||
into substrings wherever \a needle occurs, and allows iteration
|
||||
over those strings as they are found. If \a needle does not match
|
||||
anywhere in \a haystack, a single element containing \a haystack
|
||||
is produced.
|
||||
|
||||
\a cs specifies whether \a needle should be matched case
|
||||
sensitively or case insensitively.
|
||||
|
||||
If \a sb is QString::SkipEmptyParts, empty entries don't
|
||||
appear in the result. By default, empty entries are included.
|
||||
|
||||
\sa QStringView::split(), QLatin1String::split(), Qt::CaseSensitivity, Qt::SplitBehavior
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QStringTokenizer::const_iterator QStringTokenizer::begin() const
|
||||
|
||||
Returns a const \l{STL-style iterators}{STL-style iterator}
|
||||
pointing to the first token in the list.
|
||||
|
||||
\sa end(), cbegin()
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QStringTokenizer::const_iterator QStringTokenizer::cbegin() const
|
||||
|
||||
Same as begin().
|
||||
|
||||
\sa cend(), begin()
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QStringTokenizer::sentinel QStringTokenizer::end() const
|
||||
|
||||
Returns a const \l{STL-style iterators}{STL-style sentinel}
|
||||
pointing to the imaginary token after the last token in the list.
|
||||
|
||||
\sa begin(), cend()
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QStringTokenizer::sentinel QStringTokenizer::cend() const
|
||||
|
||||
Same as end().
|
||||
|
||||
\sa cbegin(), end()
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QStringTokenizer::toContainer(Container &&c) const &
|
||||
|
||||
Convenience method to convert the lazy sequence into a
|
||||
(typically) random-access container.
|
||||
|
||||
This function is only available if \c Container has a \c value_type
|
||||
matching this tokenizer's value_type.
|
||||
|
||||
If you pass in a named container (an lvalue), then that container
|
||||
is filled, and a reference to it is returned.
|
||||
|
||||
If you pass in a temporary container (an rvalue, incl. the default
|
||||
argument), then that container is filled, and returned by value.
|
||||
|
||||
\code
|
||||
// assuming tok's value_type is QStringView, then...
|
||||
auto tok = QStringTokenizer{~~~};
|
||||
// ... rac1 is a QVector:
|
||||
auto rac1 = tok.toContainer();
|
||||
// ... rac2 is std::pmr::vector<QStringView>:
|
||||
auto rac2 = tok.toContainer<std::pmr::vector<QStringView>>();
|
||||
auto rac3 = QVarLengthArray<QStringView, 12>{};
|
||||
// appends the token sequence produced by tok to rac3
|
||||
// and returns a reference to rac3 (which we ignore here):
|
||||
tok.toContainer(rac3);
|
||||
\endcode
|
||||
|
||||
This gives you maximum flexibility in how you want the sequence to
|
||||
be stored.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QStringTokenizer::toContainer(Container &&c) const &&
|
||||
\overload
|
||||
|
||||
In addition to the constraints on the lvalue-this overload, this
|
||||
rvalue-this overload is only available when this QStringTokenizer
|
||||
does not store the haystack internally, as this could create a
|
||||
container full of dangling references:
|
||||
|
||||
\code
|
||||
auto tokens = QStringTokenizer{widget.text(), u','}.toContainer();
|
||||
// ERROR: cannot call toContainer() on rvalue
|
||||
// 'tokens' references the data of the copy of widget.text()
|
||||
// stored inside the QStringTokenizer, which has since been deleted
|
||||
\endcode
|
||||
|
||||
To fix, store the QStringTokenizer in a temporary:
|
||||
|
||||
\code
|
||||
auto tokenizer = QStringTokenizer{widget.text90, u','};
|
||||
auto tokens = tokenizer.toContainer();
|
||||
// OK: the copy of widget.text() stored in 'tokenizer' keeps the data
|
||||
// referenced by 'tokens' alive.
|
||||
\endcode
|
||||
|
||||
You can force this function into existence by passing a view instead:
|
||||
|
||||
\code
|
||||
func(QStringTokenizer{QStringView{widget.text()}, u','}.toContainer());
|
||||
// OK: compiler keeps widget.text() around until after func() has executed
|
||||
\endcode
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn qTokenize(Haystack &&haystack, Needle &&needle, Flags...flags)
|
||||
\relates QStringTokenizer
|
||||
\since 6.0
|
||||
|
||||
Factory function for QStringTokenizer. You can use this function
|
||||
if your compiler doesn't, yet, support C++17 Class Template
|
||||
Argument Deduction (CTAD), but we recommend direct use of
|
||||
QStringTokenizer with CTAD instead.
|
||||
*/
|
||||
|
||||
QT_END_NAMESPACE
|
438
src/corelib/text/qstringtokenizer.h
Normal file
438
src/corelib/text/qstringtokenizer.h
Normal file
@ -0,0 +1,438 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see https://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at https://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 3 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.LGPL3 included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU Lesser General Public License version 3 requirements
|
||||
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 2.0 or (at your option) the GNU General
|
||||
** Public license version 3 or any later version approved by the KDE Free
|
||||
** Qt Foundation. The licenses are as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
|
||||
** included in the packaging of this file. Please review the following
|
||||
** information to ensure the GNU General Public License requirements will
|
||||
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
|
||||
** https://www.gnu.org/licenses/gpl-3.0.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
#ifndef QSTRINGTOKENIZER_H
|
||||
#define QSTRINGTOKENIZER_H
|
||||
|
||||
#include <QtCore/qnamespace.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
template <typename, typename> class QStringBuilder;
|
||||
template <typename> class QVector;
|
||||
|
||||
#if defined(Q_QDOC) || 1 || (defined(__cpp_range_based_for) && __cpp_range_based_for >= 201603)
|
||||
# define Q_STRINGTOKENIZER_USE_SENTINEL
|
||||
#endif
|
||||
|
||||
class QStringTokenizerBaseBase
|
||||
{
|
||||
protected:
|
||||
~QStringTokenizerBaseBase() = default;
|
||||
constexpr QStringTokenizerBaseBase(Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
|
||||
: m_sb{sb}, m_cs{cs} {}
|
||||
|
||||
struct tokenizer_state {
|
||||
qsizetype start, end, extra;
|
||||
friend constexpr bool operator==(tokenizer_state lhs, tokenizer_state rhs) noexcept
|
||||
{ return lhs.start == rhs.start && lhs.end == rhs.end && lhs.extra == rhs.extra; }
|
||||
friend constexpr bool operator!=(tokenizer_state lhs, tokenizer_state rhs) noexcept
|
||||
{ return !operator==(lhs, rhs); }
|
||||
};
|
||||
|
||||
Qt::SplitBehavior m_sb;
|
||||
Qt::CaseSensitivity m_cs;
|
||||
};
|
||||
|
||||
template <typename Haystack, typename Needle>
|
||||
class QStringTokenizerBase : protected QStringTokenizerBaseBase
|
||||
{
|
||||
struct next_result {
|
||||
Haystack value;
|
||||
bool ok;
|
||||
tokenizer_state state;
|
||||
};
|
||||
inline next_result next(tokenizer_state state) const noexcept;
|
||||
inline next_result toFront() const noexcept { return next({}); }
|
||||
public:
|
||||
constexpr explicit QStringTokenizerBase(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
|
||||
: QStringTokenizerBaseBase{sb, cs}, m_haystack{haystack}, m_needle{needle} {}
|
||||
|
||||
class iterator;
|
||||
friend class iterator;
|
||||
#ifdef Q_STRINGTOKENIZER_USE_SENTINEL
|
||||
class sentinel {
|
||||
friend constexpr bool operator==(sentinel, sentinel) noexcept { return true; }
|
||||
friend constexpr bool operator!=(sentinel, sentinel) noexcept { return false; }
|
||||
};
|
||||
#else
|
||||
using sentinel = iterator;
|
||||
#endif
|
||||
class iterator {
|
||||
const QStringTokenizerBase *tokenizer;
|
||||
next_result current;
|
||||
friend class QStringTokenizerBase;
|
||||
explicit iterator(const QStringTokenizerBase &t) noexcept
|
||||
: tokenizer{&t}, current{t.toFront()} {}
|
||||
public:
|
||||
using difference_type = qsizetype;
|
||||
using value_type = Haystack;
|
||||
using pointer = const value_type*;
|
||||
using reference = const value_type&;
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
|
||||
iterator() noexcept = default;
|
||||
|
||||
// violates std::forward_iterator (returns a reference into the iterator)
|
||||
Q_REQUIRED_RESULT constexpr const Haystack* operator->() const { return Q_ASSERT(current.ok), ¤t.value; }
|
||||
Q_REQUIRED_RESULT constexpr const Haystack& operator*() const { return *operator->(); }
|
||||
|
||||
iterator& operator++() { advance(); return *this; }
|
||||
iterator operator++(int) { auto tmp = *this; advance(); return tmp; }
|
||||
|
||||
friend constexpr bool operator==(const iterator &lhs, const iterator &rhs) noexcept
|
||||
{ return lhs.current.ok == rhs.current.ok && (!lhs.current.ok || (Q_ASSERT(lhs.tokenizer == rhs.tokenizer), lhs.current.state == rhs.current.state)); }
|
||||
friend constexpr bool operator!=(const iterator &lhs, const iterator &rhs) noexcept
|
||||
{ return !operator==(lhs, rhs); }
|
||||
#ifdef Q_STRINGTOKENIZER_USE_SENTINEL
|
||||
friend constexpr bool operator==(const iterator &lhs, sentinel) noexcept
|
||||
{ return !lhs.current.ok; }
|
||||
friend constexpr bool operator!=(const iterator &lhs, sentinel) noexcept
|
||||
{ return !operator==(lhs, sentinel{}); }
|
||||
friend constexpr bool operator==(sentinel, const iterator &rhs) noexcept
|
||||
{ return !rhs.current.ok; }
|
||||
friend constexpr bool operator!=(sentinel, const iterator &rhs) noexcept
|
||||
{ return !operator==(sentinel{}, rhs); }
|
||||
#endif
|
||||
private:
|
||||
void advance() {
|
||||
Q_ASSERT(current.ok);
|
||||
current = tokenizer->next(current.state);
|
||||
}
|
||||
};
|
||||
using const_iterator = iterator;
|
||||
|
||||
using size_type = std::size_t;
|
||||
using difference_type = typename iterator::difference_type;
|
||||
using value_type = typename iterator::value_type;
|
||||
using pointer = typename iterator::pointer;
|
||||
using const_pointer = pointer;
|
||||
using reference = typename iterator::reference;
|
||||
using const_reference = reference;
|
||||
|
||||
Q_REQUIRED_RESULT iterator begin() const noexcept { return iterator{*this}; }
|
||||
Q_REQUIRED_RESULT iterator cbegin() const noexcept { return begin(); }
|
||||
template <bool = std::is_same<iterator, sentinel>::value> // ODR protection
|
||||
Q_REQUIRED_RESULT constexpr sentinel end() const noexcept { return {}; }
|
||||
template <bool = std::is_same<iterator, sentinel>::value> // ODR protection
|
||||
Q_REQUIRED_RESULT constexpr sentinel cend() const noexcept { return {}; }
|
||||
|
||||
private:
|
||||
Haystack m_haystack;
|
||||
Needle m_needle;
|
||||
};
|
||||
|
||||
QT_BEGIN_INCLUDE_NAMESPACE
|
||||
#include <QtCore/qstringview.h>
|
||||
QT_END_INCLUDE_NAMESPACE
|
||||
|
||||
namespace QtPrivate {
|
||||
namespace Tok {
|
||||
|
||||
constexpr qsizetype size(QChar) noexcept { return 1; }
|
||||
template <typename String>
|
||||
constexpr qsizetype size(const String &s) noexcept { return static_cast<qsizetype>(s.size()); }
|
||||
|
||||
template <typename String> struct ViewForImpl {};
|
||||
template <> struct ViewForImpl<QStringView> { using type = QStringView; };
|
||||
template <> struct ViewForImpl<QLatin1String> { using type = QLatin1String; };
|
||||
template <> struct ViewForImpl<QChar> { using type = QChar; };
|
||||
template <> struct ViewForImpl<QString> : ViewForImpl<QStringView> {};
|
||||
template <> struct ViewForImpl<QStringRef> : ViewForImpl<QStringView> {};
|
||||
template <> struct ViewForImpl<QLatin1Char> : ViewForImpl<QChar> {};
|
||||
template <> struct ViewForImpl<char16_t> : ViewForImpl<QChar> {};
|
||||
template <> struct ViewForImpl<char16_t*> : ViewForImpl<QStringView> {};
|
||||
template <> struct ViewForImpl<const char16_t*> : ViewForImpl<QStringView> {};
|
||||
template <typename LHS, typename RHS>
|
||||
struct ViewForImpl<QStringBuilder<LHS, RHS>> : ViewForImpl<typename QStringBuilder<LHS,RHS>::ConvertTo> {};
|
||||
template <typename Char, typename...Args>
|
||||
struct ViewForImpl<std::basic_string<Char, Args...>> : ViewForImpl<Char*> {};
|
||||
#ifdef __cpp_lib_string_view
|
||||
template <typename Char, typename...Args>
|
||||
struct ViewForImpl<std::basic_string_view<Char, Args...>> : ViewForImpl<Char*> {};
|
||||
#endif
|
||||
|
||||
// This metafunction maps a StringLike to a View (currently, QChar,
|
||||
// QStringView, QLatin1String). This is what QStringTokenizerBase
|
||||
// operates on. QStringTokenizer adds pinning to keep rvalues alive
|
||||
// for the duration of the algorithm.
|
||||
template <typename String>
|
||||
using ViewFor = typename ViewForImpl<typename std::decay<String>::type>::type;
|
||||
|
||||
// Pinning:
|
||||
// rvalues of owning string types need to be moved into QStringTokenizer
|
||||
// to keep them alive for the lifetime of the tokenizer. For lvalues, we
|
||||
// assume the user takes care of that.
|
||||
|
||||
// default: don't pin anything (characters are pinned implicitly)
|
||||
template <typename String>
|
||||
struct PinForImpl { using type = ViewFor<String>; };
|
||||
|
||||
// rvalue QString -> QString
|
||||
template <>
|
||||
struct PinForImpl<QString> { using type = QString; };
|
||||
|
||||
// rvalue std::basic_string -> basic_string
|
||||
template <typename Char, typename...Args>
|
||||
struct PinForImpl<std::basic_string<Char, Args...>>
|
||||
{ using type = std::basic_string<Char, Args...>; };
|
||||
|
||||
// rvalue QStringBuilder -> pin as the nested ConvertTo type
|
||||
template <typename LHS, typename RHS>
|
||||
struct PinForImpl<QStringBuilder<LHS, RHS>>
|
||||
: PinForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> {};
|
||||
|
||||
template <typename StringLike>
|
||||
using PinFor = typename PinForImpl<typename std::remove_cv<StringLike>::type>::type;
|
||||
|
||||
template <typename T> struct is_owning_string_type : std::false_type {};
|
||||
template <> struct is_owning_string_type<QString> : std::true_type {};
|
||||
template <typename...Args> struct is_owning_string_type<std::basic_string<Args...>> : std::true_type {};
|
||||
|
||||
// unpinned
|
||||
template <typename T, bool pinned = is_owning_string_type<T>::value>
|
||||
struct Pinning
|
||||
{
|
||||
// this is the storage for non-pinned types - no storage
|
||||
constexpr Pinning(const T&) noexcept {}
|
||||
// Since we don't store something, the view() method needs to be
|
||||
// given something it can return.
|
||||
constexpr T view(T t) const noexcept { return t; }
|
||||
};
|
||||
|
||||
// pinned
|
||||
template <typename T>
|
||||
struct Pinning<T, true>
|
||||
{
|
||||
T m_string;
|
||||
// specialisation for owning string types (QString, std::u16string):
|
||||
// stores the string:
|
||||
constexpr Pinning(T &&s) noexcept : m_string{std::move(s)} {}
|
||||
// ... and thus view() uses that instead of the argument passed in:
|
||||
constexpr QStringView view(const T&) const noexcept { return m_string; }
|
||||
};
|
||||
|
||||
// NeedlePinning and HaystackPinning are there to distinguish them as
|
||||
// base classes of QStringTokenizer. We use inheritance to reap the
|
||||
// empty base class optimization.
|
||||
template <typename T>
|
||||
struct NeedlePinning : Pinning<T>
|
||||
{
|
||||
using Pinning<T>::Pinning;
|
||||
template <typename Arg>
|
||||
constexpr auto needleView(Arg &&a) noexcept
|
||||
-> decltype(this->view(std::forward<Arg>(a)))
|
||||
{ return this->view(std::forward<Arg>(a)); }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct HaystackPinning : Pinning<T>
|
||||
{
|
||||
using Pinning<T>::Pinning;
|
||||
template <typename Arg>
|
||||
constexpr auto haystackView(Arg &&a) noexcept
|
||||
-> decltype(this->view(std::forward<Arg>(a)))
|
||||
{ return this->view(std::forward<Arg>(a)); }
|
||||
};
|
||||
|
||||
// The Base of a QStringTokenizer is QStringTokenizerBase for the views
|
||||
// corresponding to the Haystack and Needle template arguments
|
||||
//
|
||||
// ie. QStringTokenizer<QString, QString>
|
||||
// : QStringTokenizerBase<QStringView, QStringView> (+ pinning)
|
||||
template <typename Haystack, typename Needle>
|
||||
using TokenizerBase = QStringTokenizerBase<ViewFor<Haystack>, ViewFor<Needle>>;
|
||||
} // namespace Tok
|
||||
} // namespace QtPrivate
|
||||
|
||||
template <typename Haystack, typename Needle>
|
||||
class QStringTokenizer
|
||||
: private QtPrivate::Tok::HaystackPinning<Haystack>,
|
||||
private QtPrivate::Tok::NeedlePinning<Needle>,
|
||||
public QtPrivate::Tok::TokenizerBase<Haystack, Needle>
|
||||
{
|
||||
using HPin = QtPrivate::Tok::HaystackPinning<Haystack>;
|
||||
using NPin = QtPrivate::Tok::NeedlePinning<Needle>;
|
||||
using Base = QtPrivate::Tok::TokenizerBase<Haystack, Needle>;
|
||||
template <typename Container, typename HPin>
|
||||
struct if_haystack_not_pinned_impl : std::enable_if<std::is_empty<HPin>::value, bool> {};
|
||||
template <typename Container>
|
||||
using if_haystack_not_pinned = typename if_haystack_not_pinned_impl<Container, HPin>::type;
|
||||
template <typename Container, typename Iterator = decltype(std::begin(std::declval<Container>()))>
|
||||
using if_compatible_container = typename std::enable_if<
|
||||
std::is_same<
|
||||
typename Base::value_type,
|
||||
typename std::iterator_traits<Iterator>::value_type
|
||||
>::value,
|
||||
bool
|
||||
>::type;
|
||||
public:
|
||||
using value_type = typename Base::value_type;
|
||||
|
||||
constexpr explicit QStringTokenizer(Haystack haystack, Needle needle,
|
||||
Qt::CaseSensitivity cs,
|
||||
Qt::SplitBehavior sb = Qt::KeepEmptyParts)
|
||||
noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value)
|
||||
// here, we present the haystack to Pinning<>, for optional storing.
|
||||
// If it did store, haystack is moved-from and mustn't be touched
|
||||
// any longer, which is why view() for these Pinning<>s ignores the
|
||||
// argument.
|
||||
: HPin{std::forward<Haystack>(haystack)},
|
||||
NPin{std::forward<Needle>(needle)},
|
||||
// If Pinning<> didn't store, we pass the haystack (ditto needle)
|
||||
// to view() again, so it can be copied from there.
|
||||
Base{this->haystackView(haystack),
|
||||
this->needleView(needle), sb, cs}
|
||||
{}
|
||||
constexpr explicit QStringTokenizer(Haystack haystack, Needle needle,
|
||||
Qt::SplitBehavior sb = Qt::KeepEmptyParts,
|
||||
Qt::CaseSensitivity cs = Qt::CaseSensitive)
|
||||
noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value)
|
||||
: HPin{std::forward<Haystack>(haystack)},
|
||||
NPin{std::forward<Needle>(needle)},
|
||||
Base{this->haystackView(haystack),
|
||||
this->needleView(needle), sb, cs}
|
||||
{}
|
||||
|
||||
template <typename Container = QVector<value_type>,
|
||||
if_compatible_container<Container> = true>
|
||||
Container toContainer(Container &&c = {}) const &
|
||||
{
|
||||
for (auto e : *this)
|
||||
c.emplace_back(e);
|
||||
return c;
|
||||
}
|
||||
|
||||
template <typename Container = QVector<value_type>,
|
||||
if_compatible_container<Container> = true,
|
||||
if_haystack_not_pinned<Container> = true>
|
||||
Container toContainer(Container &&c = {}) const &&
|
||||
{
|
||||
for (auto e : *this)
|
||||
c.emplace_back(e);
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
namespace QtPrivate {
|
||||
namespace Tok {
|
||||
// This meta function just calculated the template arguments for the
|
||||
// QStringTokenizer (not -Base), based on the actual arguments passed
|
||||
// to qTokenize() (or the ctor, with CTAD). It basically detects rvalue
|
||||
// QString and std::basic_string and otherwise decays the arguments to
|
||||
// the respective view type.
|
||||
//
|
||||
// #define works around a C++ restriction: [temp.deduct.guide]/3 seems
|
||||
// to ask for the simple-template-id following the `->` of a deduction
|
||||
// guide to be identical to the class name for which we guide deduction.
|
||||
// In particular, Clang rejects a template alias there, while GCC accepts
|
||||
// it.
|
||||
#define Q_TOK_RESULT \
|
||||
QStringTokenizer< \
|
||||
QtPrivate::Tok::PinFor<Haystack>, \
|
||||
QtPrivate::Tok::PinFor<Needle> \
|
||||
> \
|
||||
/*end*/
|
||||
template <typename Haystack, typename Needle>
|
||||
using TokenizerResult = Q_TOK_RESULT;
|
||||
template <typename Haystack, typename Needle>
|
||||
using is_nothrow_constructible_from = std::is_nothrow_copy_constructible<TokenizerResult<Haystack, Needle>>;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cpp_deduction_guides
|
||||
// these tell the compiler how to determine the QStringTokenizer
|
||||
// template arguments based on the constructor arguments (CTAD):
|
||||
template <typename Haystack, typename Needle>
|
||||
QStringTokenizer(Haystack&&, Needle&&)
|
||||
-> Q_TOK_RESULT;
|
||||
template <typename Haystack, typename Needle>
|
||||
QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior)
|
||||
-> Q_TOK_RESULT;
|
||||
template <typename Haystack, typename Needle>
|
||||
QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior, Qt::CaseSensitivity)
|
||||
-> Q_TOK_RESULT;
|
||||
template <typename Haystack, typename Needle>
|
||||
QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity)
|
||||
-> Q_TOK_RESULT;
|
||||
template <typename Haystack, typename Needle>
|
||||
QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity, Qt::SplitBehavior)
|
||||
-> Q_TOK_RESULT;
|
||||
#endif
|
||||
|
||||
#undef Q_TOK_RESULT
|
||||
|
||||
template <typename Haystack, typename Needle, typename...Flags>
|
||||
Q_REQUIRED_RESULT constexpr auto
|
||||
qTokenize(Haystack &&h, Needle &&n, Flags...flags)
|
||||
noexcept(QtPrivate::Tok::is_nothrow_constructible_from<Haystack, Needle>::value)
|
||||
-> decltype(QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h),
|
||||
std::forward<Needle>(n), flags...})
|
||||
{ return QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h),
|
||||
std::forward<Needle>(n),
|
||||
flags...}; }
|
||||
|
||||
template <typename Haystack, typename Needle>
|
||||
auto QStringTokenizerBase<Haystack, Needle>::next(tokenizer_state state) const noexcept -> next_result
|
||||
{
|
||||
while (true) {
|
||||
if (state.end < 0) {
|
||||
// already at end:
|
||||
return {{}, false, state};
|
||||
}
|
||||
state.end = m_haystack.indexOf(m_needle, state.start + state.extra, m_cs);
|
||||
Haystack result;
|
||||
if (state.end >= 0) {
|
||||
// token separator found => return intermediate element:
|
||||
result = m_haystack.mid(state.start, state.end - state.start);
|
||||
const auto ns = QtPrivate::Tok::size(m_needle);
|
||||
state.start = state.end + ns;
|
||||
state.extra = (ns == 0 ? 1 : 0);
|
||||
} else {
|
||||
// token separator not found => return final element:
|
||||
result = m_haystack.mid(state.start);
|
||||
}
|
||||
if ((m_sb & Qt::SkipEmptyParts) && result.isEmpty())
|
||||
continue;
|
||||
return {result, true, state};
|
||||
}
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif /* QSTRINGTOKENIZER_H */
|
@ -26,6 +26,7 @@ HEADERS += \
|
||||
text/qstringliteral.h \
|
||||
text/qstringmatcher.h \
|
||||
text/qstringview.h \
|
||||
text/qstringtokenizer.h \
|
||||
text/qtextboundaryfinder.h \
|
||||
text/qunicodetables_p.h \
|
||||
text/qunicodetools_p.h
|
||||
@ -44,6 +45,7 @@ SOURCES += \
|
||||
text/qstringconverter.cpp \
|
||||
text/qstringlist.cpp \
|
||||
text/qstringview.cpp \
|
||||
text/qstringtokenizer.cpp \
|
||||
text/qtextboundaryfinder.cpp \
|
||||
text/qunicodetools.cpp \
|
||||
text/qvsnprintf.cpp
|
||||
|
@ -19,5 +19,6 @@ add_subdirectory(qstringiterator)
|
||||
add_subdirectory(qstringlist)
|
||||
add_subdirectory(qstringmatcher)
|
||||
add_subdirectory(qstringref)
|
||||
add_subdirectory(qstringtokenizer)
|
||||
add_subdirectory(qstringview)
|
||||
add_subdirectory(qtextboundaryfinder)
|
||||
|
@ -1,6 +1,6 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2019 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
|
||||
** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
|
||||
** Copyright (C) 2019 Mail.ru Group.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
@ -33,6 +33,7 @@
|
||||
|
||||
#include <QString>
|
||||
#include <QStringView>
|
||||
#include <QStringTokenizer>
|
||||
#include <QChar>
|
||||
#include <QScopedArrayPointer>
|
||||
#include <QStringRef>
|
||||
@ -511,6 +512,116 @@ private Q_SLOTS:
|
||||
void split_QStringRef_char16_t_data() { split_data(false); }
|
||||
void split_QStringRef_char16_t() { split_impl<QStringRef, char16_t>(); }
|
||||
|
||||
private:
|
||||
void tok_data(bool rhsHasVariableLength = true);
|
||||
template <typename Haystack, typename Needle> void tok_impl() const;
|
||||
|
||||
private Q_SLOTS:
|
||||
// let Splittable = {QString, QStringRef, QStringView, QLatin1String, const char16_t*, std::u16string}
|
||||
// let Separators = Splittable ∪ {QChar, char16_t}
|
||||
// test Splittable × Separators:
|
||||
void tok_QString_QString_data() { tok_data(); }
|
||||
void tok_QString_QString() { tok_impl<QString, QString>(); }
|
||||
void tok_QString_QStringRef_data() { tok_data(); }
|
||||
void tok_QString_QStringRef() { tok_impl<QString, QStringRef>(); }
|
||||
void tok_QString_QStringView_data() { tok_data(); }
|
||||
void tok_QString_QStringView() { tok_impl<QString, QStringView>(); }
|
||||
void tok_QString_QLatin1String_data() { tok_data(); }
|
||||
void tok_QString_QLatin1String() { tok_impl<QString, QLatin1String>(); }
|
||||
void tok_QString_const_char16_t_star_data() { tok_data(); }
|
||||
void tok_QString_const_char16_t_star() { tok_impl<QString, const char16_t*>(); }
|
||||
void tok_QString_stdu16string_data() { tok_data(); }
|
||||
void tok_QString_stdu16string() { tok_impl<QString, std::u16string>(); }
|
||||
void tok_QString_QChar_data() { tok_data(false); }
|
||||
void tok_QString_QChar() { tok_impl<QString, QChar>(); }
|
||||
void tok_QString_char16_t_data() { tok_data(false); }
|
||||
void tok_QString_char16_t() { tok_impl<QString, char16_t>(); }
|
||||
|
||||
void tok_QStringRef_QString_data() { tok_data(); }
|
||||
void tok_QStringRef_QString() { tok_impl<QStringRef, QString>(); }
|
||||
void tok_QStringRef_QStringRef_data() { tok_data(); }
|
||||
void tok_QStringRef_QStringRef() { tok_impl<QStringRef, QStringRef>(); }
|
||||
void tok_QStringRef_QStringView_data() { tok_data(); }
|
||||
void tok_QStringRef_QStringView() { tok_impl<QStringRef, QStringView>(); }
|
||||
void tok_QStringRef_QLatin1String_data() { tok_data(); }
|
||||
void tok_QStringRef_QLatin1String() { tok_impl<QStringRef, QLatin1String>(); }
|
||||
void tok_QStringRef_const_char16_t_star_data() { tok_data(); }
|
||||
void tok_QStringRef_const_char16_t_star() { tok_impl<QStringRef, const char16_t*>(); }
|
||||
void tok_QStringRef_stdu16string_data() { tok_data(); }
|
||||
void tok_QStringRef_stdu16string() { tok_impl<QStringRef, std::u16string>(); }
|
||||
void tok_QStringRef_QChar_data() { tok_data(false); }
|
||||
void tok_QStringRef_QChar() { tok_impl<QStringRef, QChar>(); }
|
||||
void tok_QStringRef_char16_t_data() { tok_data(false); }
|
||||
void tok_QStringRef_char16_t() { tok_impl<QStringRef, char16_t>(); }
|
||||
|
||||
void tok_QStringView_QString_data() { tok_data(); }
|
||||
void tok_QStringView_QString() { tok_impl<QStringView, QString>(); }
|
||||
void tok_QStringView_QStringRef_data() { tok_data(); }
|
||||
void tok_QStringView_QStringRef() { tok_impl<QStringView, QStringRef>(); }
|
||||
void tok_QStringView_QStringView_data() { tok_data(); }
|
||||
void tok_QStringView_QStringView() { tok_impl<QStringView, QStringView>(); }
|
||||
void tok_QStringView_QLatin1String_data() { tok_data(); }
|
||||
void tok_QStringView_QLatin1String() { tok_impl<QStringView, QLatin1String>(); }
|
||||
void tok_QStringView_const_char16_t_star_data() { tok_data(); }
|
||||
void tok_QStringView_const_char16_t_star() { tok_impl<QStringView, const char16_t*>(); }
|
||||
void tok_QStringView_stdu16string_data() { tok_data(); }
|
||||
void tok_QStringView_stdu16string() { tok_impl<QStringView, std::u16string>(); }
|
||||
void tok_QStringView_QChar_data() { tok_data(false); }
|
||||
void tok_QStringView_QChar() { tok_impl<QStringView, QChar>(); }
|
||||
void tok_QStringView_char16_t_data() { tok_data(false); }
|
||||
void tok_QStringView_char16_t() { tok_impl<QStringView, char16_t>(); }
|
||||
|
||||
void tok_QLatin1String_QString_data() { tok_data(); }
|
||||
void tok_QLatin1String_QString() { tok_impl<QLatin1String, QString>(); }
|
||||
void tok_QLatin1String_QStringRef_data() { tok_data(); }
|
||||
void tok_QLatin1String_QStringRef() { tok_impl<QLatin1String, QStringRef>(); }
|
||||
void tok_QLatin1String_QStringView_data() { tok_data(); }
|
||||
void tok_QLatin1String_QStringView() { tok_impl<QLatin1String, QStringView>(); }
|
||||
void tok_QLatin1String_QLatin1String_data() { tok_data(); }
|
||||
void tok_QLatin1String_QLatin1String() { tok_impl<QLatin1String, QLatin1String>(); }
|
||||
void tok_QLatin1String_const_char16_t_star_data() { tok_data(); }
|
||||
void tok_QLatin1String_const_char16_t_star() { tok_impl<QLatin1String, const char16_t*>(); }
|
||||
void tok_QLatin1String_stdu16string_data() { tok_data(); }
|
||||
void tok_QLatin1String_stdu16string() { tok_impl<QLatin1String, std::u16string>(); }
|
||||
void tok_QLatin1String_QChar_data() { tok_data(false); }
|
||||
void tok_QLatin1String_QChar() { tok_impl<QLatin1String, QChar>(); }
|
||||
void tok_QLatin1String_char16_t_data() { tok_data(false); }
|
||||
void tok_QLatin1String_char16_t() { tok_impl<QLatin1String, char16_t>(); }
|
||||
|
||||
void tok_const_char16_t_star_QString_data() { tok_data(); }
|
||||
void tok_const_char16_t_star_QString() { tok_impl<const char16_t*, QString>(); }
|
||||
void tok_const_char16_t_star_QStringRef_data() { tok_data(); }
|
||||
void tok_const_char16_t_star_QStringRef() { tok_impl<const char16_t*, QStringRef>(); }
|
||||
void tok_const_char16_t_star_QStringView_data() { tok_data(); }
|
||||
void tok_const_char16_t_star_QStringView() { tok_impl<const char16_t*, QStringView>(); }
|
||||
void tok_const_char16_t_star_QLatin1String_data() { tok_data(); }
|
||||
void tok_const_char16_t_star_QLatin1String() { tok_impl<const char16_t*, QLatin1String>(); }
|
||||
void tok_const_char16_t_star_const_char16_t_star_data() { tok_data(); }
|
||||
void tok_const_char16_t_star_const_char16_t_star() { tok_impl<const char16_t*, const char16_t*>(); }
|
||||
void tok_const_char16_t_star_stdu16string_data() { tok_data(); }
|
||||
void tok_const_char16_t_star_stdu16string() { tok_impl<const char16_t*, std::u16string>(); }
|
||||
void tok_const_char16_t_star_QChar_data() { tok_data(false); }
|
||||
void tok_const_char16_t_star_QChar() { tok_impl<const char16_t*, QChar>(); }
|
||||
void tok_const_char16_t_star_char16_t_data() { tok_data(false); }
|
||||
void tok_const_char16_t_star_char16_t() { tok_impl<const char16_t*, char16_t>(); }
|
||||
|
||||
void tok_stdu16string_QString_data() { tok_data(); }
|
||||
void tok_stdu16string_QString() { tok_impl<std::u16string, QString>(); }
|
||||
void tok_stdu16string_QStringRef_data() { tok_data(); }
|
||||
void tok_stdu16string_QStringRef() { tok_impl<std::u16string, QStringRef>(); }
|
||||
void tok_stdu16string_QStringView_data() { tok_data(); }
|
||||
void tok_stdu16string_QStringView() { tok_impl<std::u16string, QStringView>(); }
|
||||
void tok_stdu16string_QLatin1String_data() { tok_data(); }
|
||||
void tok_stdu16string_QLatin1String() { tok_impl<std::u16string, QLatin1String>(); }
|
||||
void tok_stdu16string_const_char16_t_star_data() { tok_data(); }
|
||||
void tok_stdu16string_const_char16_t_star() { tok_impl<std::u16string, const char16_t*>(); }
|
||||
void tok_stdu16string_stdu16string_data() { tok_data(); }
|
||||
void tok_stdu16string_stdu16string() { tok_impl<std::u16string, std::u16string>(); }
|
||||
void tok_stdu16string_QChar_data() { tok_data(false); }
|
||||
void tok_stdu16string_QChar() { tok_impl<std::u16string, QChar>(); }
|
||||
void tok_stdu16string_char16_t_data() { tok_data(false); }
|
||||
void tok_stdu16string_char16_t() { tok_impl<std::u16string, char16_t>(); }
|
||||
|
||||
private:
|
||||
void mid_data();
|
||||
template <typename String> void mid_impl();
|
||||
@ -901,6 +1012,8 @@ template <> QStringView make(const QStringRef &sf, QLatin1String, const QBy
|
||||
template <> QLatin1String make(const QStringRef &, QLatin1String l1, const QByteArray &) { return l1; }
|
||||
template <> QByteArray make(const QStringRef &, QLatin1String, const QByteArray &u8) { return u8; }
|
||||
template <> const char * make(const QStringRef &, QLatin1String, const QByteArray &u8) { return u8.data(); }
|
||||
template <> const char16_t* make(const QStringRef &sf, QLatin1String, const QByteArray &) { return QStringView{sf}.utf16(); } // assumes `sf` doesn't represent a substring
|
||||
template <> std::u16string make(const QStringRef &sf, QLatin1String, const QByteArray &) { return sf.toString().toStdU16String(); }
|
||||
|
||||
template <typename> struct is_utf8_encoded : std::false_type {};
|
||||
template <> struct is_utf8_encoded<const char*> : std::true_type {};
|
||||
@ -1278,6 +1391,10 @@ static QStringList skipped(const QStringList &sl)
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T> T deepCopied(T s) { return s; }
|
||||
template <> QString deepCopied(QString s) { return detached(s); }
|
||||
template <> QByteArray deepCopied(QByteArray s) { return detached(s); }
|
||||
|
||||
template <typename Haystack, typename Needle>
|
||||
void tst_QStringApiSymmetry::split_impl() const
|
||||
{
|
||||
@ -1304,6 +1421,59 @@ void tst_QStringApiSymmetry::split_impl() const
|
||||
QCOMPARE(toQStringList(haystack.split(needle, Qt::SkipEmptyParts, Qt::CaseInsensitive)), skippedResultCIS);
|
||||
}
|
||||
|
||||
void tst_QStringApiSymmetry::tok_data(bool rhsHasVariableLength)
|
||||
{
|
||||
split_data(rhsHasVariableLength);
|
||||
}
|
||||
|
||||
template <typename Haystack, typename Needle>
|
||||
void tst_QStringApiSymmetry::tok_impl() const
|
||||
{
|
||||
QFETCH(const QStringRef, haystackU16);
|
||||
QFETCH(const QLatin1String, haystackL1);
|
||||
QFETCH(const QStringRef, needleU16);
|
||||
QFETCH(const QLatin1String, needleL1);
|
||||
QFETCH(const QStringList, resultCS);
|
||||
QFETCH(const QStringList, resultCIS);
|
||||
|
||||
const QStringList skippedResultCS = skipped(resultCS);
|
||||
const QStringList skippedResultCIS = skipped(resultCIS);
|
||||
|
||||
const auto haystackU8 = haystackU16.toUtf8();
|
||||
const auto needleU8 = needleU16.toUtf8();
|
||||
|
||||
const auto haystack = make<Haystack>(haystackU16, haystackL1, haystackU8);
|
||||
const auto needle = make<Needle>(needleU16, needleL1, needleU8);
|
||||
|
||||
QCOMPARE(toQStringList(qTokenize(haystack, needle)), resultCS);
|
||||
QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::KeepEmptyParts, Qt::CaseSensitive)), resultCS);
|
||||
QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::CaseInsensitive, Qt::KeepEmptyParts)), resultCIS);
|
||||
QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::SkipEmptyParts, Qt::CaseSensitive)), skippedResultCS);
|
||||
QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::CaseInsensitive, Qt::SkipEmptyParts)), skippedResultCIS);
|
||||
|
||||
{
|
||||
const auto tok = qTokenize(deepCopied(haystack), deepCopied(needle));
|
||||
// here, the temporaries returned from deepCopied() have already been destroyed,
|
||||
// yet `tok` should have kept a copy alive as needed:
|
||||
QCOMPARE(toQStringList(tok), resultCS);
|
||||
}
|
||||
|
||||
#ifdef __cpp_deduction_guides
|
||||
QCOMPARE(toQStringList(QStringTokenizer{haystack, needle}), resultCS);
|
||||
QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::KeepEmptyParts, Qt::CaseSensitive}), resultCS);
|
||||
QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::CaseInsensitive, Qt::KeepEmptyParts}), resultCIS);
|
||||
QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::SkipEmptyParts, Qt::CaseSensitive}), skippedResultCS);
|
||||
QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::CaseInsensitive, Qt::SkipEmptyParts}), skippedResultCIS);
|
||||
|
||||
{
|
||||
const auto tok = QStringTokenizer{deepCopied(haystack), deepCopied(needle)};
|
||||
// here, the temporaries returned from deepCopied() have already been destroyed,
|
||||
// yet `tok` should have kept a copy alive as needed:
|
||||
QCOMPARE(toQStringList(tok), resultCS);
|
||||
}
|
||||
#endif // __cpp_deduction_guides
|
||||
}
|
||||
|
||||
void tst_QStringApiSymmetry::mid_data()
|
||||
{
|
||||
QTest::addColumn<QStringRef>("unicode");
|
||||
|
1
tests/auto/corelib/text/qstringtokenizer/.gitignore
vendored
Normal file
1
tests/auto/corelib/text/qstringtokenizer/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
tst_qstringtokenizer
|
13
tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt
Normal file
13
tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt
Normal file
@ -0,0 +1,13 @@
|
||||
# Generated from qstringtokenizer.pro.
|
||||
|
||||
#####################################################################
|
||||
## tst_qstringtokenizer Test:
|
||||
#####################################################################
|
||||
|
||||
qt_add_test(tst_qstringtokenizer
|
||||
SOURCES
|
||||
tst_qstringtokenizer.cpp
|
||||
)
|
||||
|
||||
## Scopes:
|
||||
#####################################################################
|
@ -0,0 +1,7 @@
|
||||
CONFIG += testcase
|
||||
TARGET = tst_qstringtokenizer
|
||||
QT = core testlib
|
||||
contains(QT_CONFIG, c++14):CONFIG *= c++14
|
||||
contains(QT_CONFIG, c++1z):CONFIG *= c++1z
|
||||
contains(QT_CONFIG, c++2a):CONFIG *= c++2a
|
||||
SOURCES += tst_qstringtokenizer.cpp
|
@ -0,0 +1,151 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:GPL-EXCEPT$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see https://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at https://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3 as published by the Free Software
|
||||
** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
|
||||
** included in the packaging of this file. Please review the following
|
||||
** information to ensure the GNU General Public License requirements will
|
||||
** be met: https://www.gnu.org/licenses/gpl-3.0.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include <QStringTokenizer>
|
||||
#include <QStringBuilder>
|
||||
|
||||
#include <QTest>
|
||||
|
||||
#include <string>
|
||||
|
||||
Q_DECLARE_METATYPE(Qt::SplitBehavior)
|
||||
|
||||
class tst_QStringTokenizer : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
private Q_SLOTS:
|
||||
void constExpr() const;
|
||||
void basics_data() const;
|
||||
void basics() const;
|
||||
void toContainer() const;
|
||||
};
|
||||
|
||||
static QStringList skipped(const QStringList &sl)
|
||||
{
|
||||
QStringList result;
|
||||
result.reserve(sl.size());
|
||||
for (const QString &s : sl) {
|
||||
if (!s.isEmpty())
|
||||
result.push_back(s);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
QString toQString(QStringView str)
|
||||
{
|
||||
return str.toString();
|
||||
}
|
||||
|
||||
template <typename Container>
|
||||
QStringList toQStringList(const Container &c)
|
||||
{
|
||||
QStringList r;
|
||||
for (auto &&e : c)
|
||||
r.push_back(toQString(e));
|
||||
return r;
|
||||
}
|
||||
|
||||
void tst_QStringTokenizer::constExpr() const
|
||||
{
|
||||
// compile-time checks
|
||||
{
|
||||
constexpr auto tok = qTokenize(u"a,b,c", u",");
|
||||
Q_UNUSED(tok);
|
||||
}
|
||||
{
|
||||
constexpr auto tok = qTokenize(u"a,b,c", u',');
|
||||
Q_UNUSED(tok);
|
||||
}
|
||||
}
|
||||
|
||||
void tst_QStringTokenizer::basics_data() const
|
||||
{
|
||||
QTest::addColumn<Qt::SplitBehavior>("sb");
|
||||
QTest::addColumn<Qt::CaseSensitivity>("cs");
|
||||
|
||||
#define ROW(sb, cs) \
|
||||
do { QTest::addRow("%s/%s", #sb, #cs) << Qt::SplitBehavior{Qt::sb} << Qt::cs; } while (0)
|
||||
|
||||
ROW(KeepEmptyParts, CaseSensitive);
|
||||
ROW(KeepEmptyParts, CaseInsensitive);
|
||||
ROW(SkipEmptyParts, CaseSensitive);
|
||||
ROW(SkipEmptyParts, CaseInsensitive);
|
||||
|
||||
#undef ROW
|
||||
}
|
||||
|
||||
void tst_QStringTokenizer::basics() const
|
||||
{
|
||||
QFETCH(const Qt::SplitBehavior, sb);
|
||||
QFETCH(const Qt::CaseSensitivity, cs);
|
||||
|
||||
auto expected = QStringList{"", "a", "b", "c", "d", "e", ""};
|
||||
if (sb & Qt::SkipEmptyParts)
|
||||
expected = skipped(expected);
|
||||
QCOMPARE(toQStringList(qTokenize(u",a,b,c,d,e,", u',', sb, cs)), expected);
|
||||
QCOMPARE(toQStringList(qTokenize(u",a,b,c,d,e,", u',', cs, sb)), expected);
|
||||
|
||||
{
|
||||
auto tok = qTokenize(expected.join(u'x'), u"X" % QString(), Qt::CaseInsensitive);
|
||||
// the temporary QStrings returned from join() and the QStringBuilder expression
|
||||
// are now destroyed, but 'tok' should keep both alive
|
||||
QCOMPARE(toQStringList(tok), expected);
|
||||
}
|
||||
|
||||
using namespace std::string_literals;
|
||||
|
||||
{
|
||||
auto tok = qTokenize(expected.join(u'x'), u"X"s, Qt::CaseInsensitive);
|
||||
QCOMPARE(toQStringList(tok), expected);
|
||||
}
|
||||
|
||||
{
|
||||
auto tok = qTokenize(expected.join(u'x'), QLatin1Char('x'), cs, sb);
|
||||
QCOMPARE(toQStringList(tok), expected);
|
||||
}
|
||||
}
|
||||
|
||||
void tst_QStringTokenizer::toContainer() const
|
||||
{
|
||||
// QStringView value_type:
|
||||
{
|
||||
auto tok = qTokenize(u"a,b,c", u',');
|
||||
auto v = tok.toContainer();
|
||||
QVERIFY((std::is_same_v<decltype(v), QVector<QStringView>>));
|
||||
}
|
||||
// QLatin1String value_type
|
||||
{
|
||||
auto tok = qTokenize(QLatin1String{"a,b,c"}, u',');
|
||||
auto v = tok.toContainer();
|
||||
QVERIFY((std::is_same_v<decltype(v), QVector<QLatin1String>>));
|
||||
}
|
||||
}
|
||||
|
||||
QTEST_APPLESS_MAIN(tst_QStringTokenizer)
|
||||
#include "tst_qstringtokenizer.moc"
|
@ -20,5 +20,6 @@ SUBDIRS = \
|
||||
qstringlist \
|
||||
qstringmatcher \
|
||||
qstringref \
|
||||
qstringtokenizer \
|
||||
qstringview \
|
||||
qtextboundaryfinder
|
||||
|
Loading…
x
Reference in New Issue
Block a user