Optimize QMimeDatabase::mimeTypeForFile(f, MatchDefault)

Open the file only if matching on content is needed.

Use QFileInfo::filePath() instead of QFileInfo::absoluteFilePath() in
QMimeDatabase::mimeTypeForFile(). filePath() does much less work, and so
is faster. Thiago Macieira helpfully explained in a review comment why
the absolute path is not useful for correctness here: "Nothing needs
absolute paths within the same application that would resolve the
relative path to absolute. You only need an absolute path if you're
communicating with another application that may be in a different
directory."

QMimeDatabase::mimeTypeForFile() checks fileInfo.isDir(), so the
fileName.endsWith(QLatin1Char('/')) check in
QMimeDatabasePrivate::mimeTypeForFileNameAndData() was redundant when
called from this function. The other two callers of that function now
check this condition before opening IO devices. This improves
performance of the two QMimeDatabase::mimeTypeForFileNameAndData()
overloads in the corner case.

Refactor and optimize QMimeDatabasePrivate::findByFileName() and its
usages. Previously each caller constructed a QFileInfo object and passed
QFileInfo::fileName() into this function. Now the callers simply pass an
absolute or relative path to a file into this function, which then uses
QFileSystemEntry::fileName() to exclude the path. Constructing QFileInfo
is relatively expensive, so this change slightly improves performance.

Optimize QMimeDatabasePrivate::loadProviders() by calling static
QFileInfo::exists() instead of constructing a QFileInfo object and
calling the non-static QFileInfo::exists() overload. Note that the
QFileInfo object was always created, even if QFileInfo::exists() under
an `if` and an `#if` was never called.

The following table contains the average results of the added benchmark
tst_QMimeDatabase::benchMimeTypeForFile() on my GNU/Linux system before
and at this commit. The numbers denote milliseconds per iteration.

        data row tag                        before  at
MatchDefault:
        archive                             0.029   0.016
        OpenDocument Text                   0.029   0.015
        existent archive with extension     0.039   0.025
        existent C with extension           0.033   0.020
        existent text file with extension   0.033   0.020
        existent C w/o extension            0.076   0.074
        existent patch w/o extension        0.11    0.105
        existent archive w/o extension      0.069   0.066
MatchExtension:
        archive                             0.012   0.0115
        OpenDocument Text                   0.0115  0.011
        existent archive with extension     0.017   0.016
        existent C with extension           0.011   0.011
        existent text file with extension   0.011   0.011
        existent C w/o extension            0.016   0.0155
        existent patch w/o extension        0.013   0.012
        existent archive w/o extension      0.013   0.012
MatchContent:
        archive                             0.019   0.012
        OpenDocument Text                   0.019   0.012
        existent archive with extension     0.053   0.051
        existent C with extension           0.056   0.0545
        existent text file with extension   0.058   0.056
        existent C w/o extension            0.0605  0.059
        existent patch w/o extension        0.10    0.099
        existent archive w/o extension      0.057   0.054

Change-Id: Idb541656e073a2c4822ace3f4da412f29f2351f8
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: David Faure <david.faure@kdab.com>
This commit is contained in:
Igor Kushnir 2021-11-13 14:21:40 +02:00
parent 6d6fb7846c
commit d797e3c88e
8 changed files with 148 additions and 50 deletions

View File

@ -46,6 +46,8 @@
#include "qmimeprovider_p.h" #include "qmimeprovider_p.h"
#include "qmimetype_p.h" #include "qmimetype_p.h"
#include <private/qfilesystementry_p.h>
#include <QtCore/QFile> #include <QtCore/QFile>
#include <QtCore/QFileInfo> #include <QtCore/QFileInfo>
#include <QtCore/QStandardPaths> #include <QtCore/QStandardPaths>
@ -111,7 +113,6 @@ void QMimeDatabasePrivate::loadProviders()
for (const QString &mimeDir : mimeDirs) { for (const QString &mimeDir : mimeDirs) {
const QString cacheFile = mimeDir + QStringLiteral("/mime.cache"); const QString cacheFile = mimeDir + QStringLiteral("/mime.cache");
QFileInfo fileInfo(cacheFile);
// Check if we already have a provider for this dir // Check if we already have a provider for this dir
const auto predicate = [mimeDir](const std::unique_ptr<QMimeProviderBase> &prov) const auto predicate = [mimeDir](const std::unique_ptr<QMimeProviderBase> &prov)
{ {
@ -121,7 +122,7 @@ void QMimeDatabasePrivate::loadProviders()
if (it == currentProviders.end()) { if (it == currentProviders.end()) {
std::unique_ptr<QMimeProviderBase> provider; std::unique_ptr<QMimeProviderBase> provider;
#if defined(QT_USE_MMAP) #if defined(QT_USE_MMAP)
if (qEnvironmentVariableIsEmpty("QT_NO_MIME_CACHE") && fileInfo.exists()) { if (qEnvironmentVariableIsEmpty("QT_NO_MIME_CACHE") && QFileInfo::exists(cacheFile)) {
provider.reset(new QMimeBinaryProvider(this, mimeDir)); provider.reset(new QMimeBinaryProvider(this, mimeDir));
//qDebug() << "Created binary provider for" << mimeDir; //qDebug() << "Created binary provider for" << mimeDir;
if (!provider->isValid()) { if (!provider->isValid()) {
@ -206,8 +207,7 @@ QStringList QMimeDatabasePrivate::mimeTypeForFileName(const QString &fileName)
if (fileName.endsWith(QLatin1Char('/'))) if (fileName.endsWith(QLatin1Char('/')))
return QStringList() << QLatin1String("inode/directory"); return QStringList() << QLatin1String("inode/directory");
const QString shortName = QFileInfo(fileName).fileName(); const QMimeGlobMatchResult result = findByFileName(fileName);
const QMimeGlobMatchResult result = findByFileName(shortName);
QStringList matchingMimeTypes = result.m_matchingMimeTypes; QStringList matchingMimeTypes = result.m_matchingMimeTypes;
matchingMimeTypes.sort(); // make it deterministic matchingMimeTypes.sort(); // make it deterministic
return matchingMimeTypes; return matchingMimeTypes;
@ -216,8 +216,9 @@ QStringList QMimeDatabasePrivate::mimeTypeForFileName(const QString &fileName)
QMimeGlobMatchResult QMimeDatabasePrivate::findByFileName(const QString &fileName) QMimeGlobMatchResult QMimeDatabasePrivate::findByFileName(const QString &fileName)
{ {
QMimeGlobMatchResult result; QMimeGlobMatchResult result;
const QString fileNameExcludingPath = QFileSystemEntry(fileName).fileName();
for (const auto &provider : providers()) for (const auto &provider : providers())
provider->addFileNameMatches(fileName, result); provider->addFileNameMatches(fileNameExcludingPath, result);
return result; return result;
} }
@ -374,11 +375,7 @@ QMimeType QMimeDatabasePrivate::mimeTypeForFileNameAndData(const QString &fileNa
*accuracyPtr = 0; *accuracyPtr = 0;
// Pass 1) Try to match on the file name // Pass 1) Try to match on the file name
QMimeGlobMatchResult candidatesByName; QMimeGlobMatchResult candidatesByName = findByFileName(fileName);
if (fileName.endsWith(QLatin1Char('/')))
candidatesByName.addMatch(QLatin1String("inode/directory"), 100, QString());
else
candidatesByName = findByFileName(QFileInfo(fileName).fileName());
if (candidatesByName.m_allMatchingMimeTypes.count() == 1) { if (candidatesByName.m_allMatchingMimeTypes.count() == 1) {
*accuracyPtr = 100; *accuracyPtr = 100;
const QMimeType mime = mimeTypeForName(candidatesByName.m_matchingMimeTypes.at(0)); const QMimeType mime = mimeTypeForName(candidatesByName.m_matchingMimeTypes.at(0));
@ -389,8 +386,8 @@ QMimeType QMimeDatabasePrivate::mimeTypeForFileNameAndData(const QString &fileNa
// Extension is unknown, or matches multiple mimetypes. // Extension is unknown, or matches multiple mimetypes.
// Pass 2) Match on content, if we can read the data // Pass 2) Match on content, if we can read the data
const auto matchOnContent = [this, accuracyPtr, &candidatesByName](QIODevice *device) {
if (device->isOpen()) { if (device->isOpen()) {
// Read 16K in one go (QIODEVICE_BUFFERSIZE in qiodevice_p.h). // Read 16K in one go (QIODEVICE_BUFFERSIZE in qiodevice_p.h).
// This is much faster than seeking back and forth into QIODevice. // This is much faster than seeking back and forth into QIODevice.
const QByteArray data = device->peek(16384); const QByteArray data = device->peek(16384);
@ -430,6 +427,14 @@ QMimeType QMimeDatabasePrivate::mimeTypeForFileNameAndData(const QString &fileNa
} }
return mimeTypeForName(defaultMimeType()); return mimeTypeForName(defaultMimeType());
};
if (device)
return matchOnContent(device);
QFile fallbackFile(fileName);
fallbackFile.open(QIODevice::ReadOnly); // error handling: matchOnContent() will check isOpen()
return matchOnContent(&fallbackFile);
} }
QList<QMimeType> QMimeDatabasePrivate::allMimeTypes() QList<QMimeType> QMimeDatabasePrivate::allMimeTypes()
@ -566,12 +571,12 @@ QMimeType QMimeDatabase::mimeTypeForFile(const QFileInfo &fileInfo, MatchMode mo
if (fileInfo.isDir()) if (fileInfo.isDir())
return d->mimeTypeForName(QLatin1String("inode/directory")); return d->mimeTypeForName(QLatin1String("inode/directory"));
QFile file(fileInfo.absoluteFilePath()); const QString filePath = fileInfo.filePath();
#ifdef Q_OS_UNIX #ifdef Q_OS_UNIX
// Cannot access statBuf.st_mode from the filesystem engine, so we have to stat again. // Cannot access statBuf.st_mode from the filesystem engine, so we have to stat again.
// In addition we want to follow symlinks. // In addition we want to follow symlinks.
const QByteArray nativeFilePath = QFile::encodeName(file.fileName()); const QByteArray nativeFilePath = QFile::encodeName(filePath);
QT_STATBUF statBuffer; QT_STATBUF statBuffer;
if (QT_STAT(nativeFilePath.constData(), &statBuffer) == 0) { if (QT_STAT(nativeFilePath.constData(), &statBuffer) == 0) {
if (S_ISCHR(statBuffer.st_mode)) if (S_ISCHR(statBuffer.st_mode))
@ -588,18 +593,19 @@ QMimeType QMimeDatabase::mimeTypeForFile(const QFileInfo &fileInfo, MatchMode mo
int priority = 0; int priority = 0;
switch (mode) { switch (mode) {
case MatchDefault: case MatchDefault:
file.open(QIODevice::ReadOnly); // isOpen() will be tested by method below return d->mimeTypeForFileNameAndData(filePath, nullptr, &priority);
return d->mimeTypeForFileNameAndData(fileInfo.absoluteFilePath(), &file, &priority);
case MatchExtension: case MatchExtension:
locker.unlock(); locker.unlock();
return mimeTypeForFile(fileInfo.absoluteFilePath(), mode); return mimeTypeForFile(filePath, mode);
case MatchContent: case MatchContent: {
QFile file(filePath);
if (file.open(QIODevice::ReadOnly)) { if (file.open(QIODevice::ReadOnly)) {
locker.unlock(); locker.unlock();
return mimeTypeForData(&file); return mimeTypeForData(&file);
} else { } else {
return d->mimeTypeForName(d->defaultMimeType()); return d->mimeTypeForName(d->defaultMimeType());
} }
}
default: default:
Q_ASSERT(false); Q_ASSERT(false);
} }
@ -664,7 +670,7 @@ QList<QMimeType> QMimeDatabase::mimeTypesForFileName(const QString &fileName) co
QString QMimeDatabase::suffixForFileName(const QString &fileName) const QString QMimeDatabase::suffixForFileName(const QString &fileName) const
{ {
QMutexLocker locker(&d->mutex); QMutexLocker locker(&d->mutex);
const int suffixLength = d->findByFileName(QFileInfo(fileName).fileName()).m_knownSuffixLength; const int suffixLength = d->findByFileName(fileName).m_knownSuffixLength;
return fileName.right(suffixLength); return fileName.right(suffixLength);
} }
@ -756,6 +762,10 @@ QMimeType QMimeDatabase::mimeTypeForUrl(const QUrl &url) const
QMimeType QMimeDatabase::mimeTypeForFileNameAndData(const QString &fileName, QIODevice *device) const QMimeType QMimeDatabase::mimeTypeForFileNameAndData(const QString &fileName, QIODevice *device) const
{ {
QMutexLocker locker(&d->mutex); QMutexLocker locker(&d->mutex);
if (fileName.endsWith(QLatin1Char('/')))
return d->mimeTypeForName(QLatin1String("inode/directory"));
int accuracy = 0; int accuracy = 0;
const bool openedByUs = !device->isOpen() && device->open(QIODevice::ReadOnly); const bool openedByUs = !device->isOpen() && device->open(QIODevice::ReadOnly);
const QMimeType result = d->mimeTypeForFileNameAndData(fileName, device, &accuracy); const QMimeType result = d->mimeTypeForFileNameAndData(fileName, device, &accuracy);
@ -783,6 +793,10 @@ QMimeType QMimeDatabase::mimeTypeForFileNameAndData(const QString &fileName, QIO
QMimeType QMimeDatabase::mimeTypeForFileNameAndData(const QString &fileName, const QByteArray &data) const QMimeType QMimeDatabase::mimeTypeForFileNameAndData(const QString &fileName, const QByteArray &data) const
{ {
QMutexLocker locker(&d->mutex); QMutexLocker locker(&d->mutex);
if (fileName.endsWith(QLatin1Char('/')))
return d->mimeTypeForName(QLatin1String("inode/directory"));
QBuffer buffer(const_cast<QByteArray *>(&data)); QBuffer buffer(const_cast<QByteArray *>(&data));
buffer.open(QIODevice::ReadOnly); buffer.open(QIODevice::ReadOnly);
int accuracy = 0; int accuracy = 0;

View File

@ -0,0 +1 @@
#include <math.h>

View File

@ -0,0 +1 @@
void x();

View File

@ -0,0 +1,4 @@
foo
bar
-
_

View File

@ -0,0 +1 @@
diff --git a/plugins/patchreview/kdevpatchreview.json b/plugins/patchreview/kdevpatchreview.json

View File

@ -1,6 +1,7 @@
/**************************************************************************** /****************************************************************************
** **
** Copyright (C) 2016 The Qt Company Ltd. ** Copyright (C) 2016 The Qt Company Ltd.
** Copyright (C) 2021 Igor Kushnir <igorkuo@gmail.com>
** Contact: https://www.qt.io/licensing/ ** Contact: https://www.qt.io/licensing/
** **
** This file is part of the test suite of the Qt Toolkit. ** This file is part of the test suite of the Qt Toolkit.
@ -29,6 +30,36 @@
#include <QTest> #include <QTest>
#include <QMimeDatabase> #include <QMimeDatabase>
namespace {
struct MatchModeInfo
{
QMimeDatabase::MatchMode mode;
const char *name;
};
constexpr MatchModeInfo matchModes[] = { { QMimeDatabase::MatchDefault, "Default" },
{ QMimeDatabase::MatchExtension, "Extension" },
{ QMimeDatabase::MatchContent, "Content" } };
void addFileRows(const char *tag, const QString &fileName, const QStringList &expectedMimeNames)
{
QCOMPARE(static_cast<std::size_t>(expectedMimeNames.size()), std::size(matchModes));
for (int i = 0; i < expectedMimeNames.size(); ++i) {
QTest::addRow(qPrintable(tag + QStringLiteral(" - %s")), matchModes[i].name)
<< fileName << matchModes[i].mode << expectedMimeNames[i];
}
}
void addExistentFileRows(const char *tag, const QString &fileName,
const QStringList &expectedMimeNames)
{
const QString filePath = QFINDTESTDATA("files/" + fileName);
QVERIFY2(!filePath.isEmpty(),
qPrintable(QStringLiteral("Cannot find test file %1 in files/").arg(fileName)));
addFileRows(tag, filePath, expectedMimeNames);
}
}
class tst_QMimeDatabase: public QObject class tst_QMimeDatabase: public QObject
{ {
@ -37,6 +68,8 @@ class tst_QMimeDatabase: public QObject
private slots: private slots:
void inheritsPerformance(); void inheritsPerformance();
void benchMimeTypeForName(); void benchMimeTypeForName();
void benchMimeTypeForFile_data();
void benchMimeTypeForFile();
}; };
void tst_QMimeDatabase::inheritsPerformance() void tst_QMimeDatabase::inheritsPerformance()
@ -82,6 +115,50 @@ void tst_QMimeDatabase::benchMimeTypeForName()
} }
} }
void tst_QMimeDatabase::benchMimeTypeForFile_data()
{
QTest::addColumn<QString>("fileName");
QTest::addColumn<QMimeDatabase::MatchMode>("mode");
QTest::addColumn<QString>("expectedMimeName");
addFileRows("archive", "a.tar.gz",
{ "application/x-compressed-tar",
"application/x-compressed-tar",
"application/octet-stream" });
addFileRows("OpenDocument Text", "b.odt",
{ "application/vnd.oasis.opendocument.text",
"application/vnd.oasis.opendocument.text",
"application/octet-stream" });
addExistentFileRows(
"existent archive with extension", "N.tar.gz",
{ "application/x-compressed-tar", "application/x-compressed-tar", "application/gzip" });
addExistentFileRows("existent C with extension", "t.c",
{ "text/x-csrc", "text/x-csrc", "text/plain" });
addExistentFileRows("existent text file with extension", "u.txt",
{ "text/plain", "text/plain", "text/plain" });
addExistentFileRows("existent C w/o extension", "X",
{ "text/x-csrc", "application/octet-stream", "text/x-csrc" });
addExistentFileRows("existent patch w/o extension", "y",
{ "text/x-patch", "application/octet-stream", "text/x-patch" });
addExistentFileRows("existent archive w/o extension", "z",
{ "application/gzip", "application/octet-stream", "application/gzip" });
}
void tst_QMimeDatabase::benchMimeTypeForFile()
{
QFETCH(const QString, fileName);
QFETCH(const QMimeDatabase::MatchMode, mode);
QFETCH(const QString, expectedMimeName);
QMimeDatabase db;
QBENCHMARK {
const auto mimeType = db.mimeTypeForFile(fileName, mode);
QCOMPARE(mimeType.name(), expectedMimeName);
}
}
QTEST_MAIN(tst_QMimeDatabase) QTEST_MAIN(tst_QMimeDatabase)
#include "tst_bench_qmimedatabase.moc" #include "tst_bench_qmimedatabase.moc"