rcc: de-duplicate data in resources
content based de-duplications by SHA256 hashing with full data check if candidates based on the hash value are found Task-number: QTBUG-126168 Change-Id: Ifebc8ca322e354d8ea1f701f27f3f65916f7555c Reviewed-by: hjk <hjk@qt.io>
This commit is contained in:
parent
a8b7da59cb
commit
607b3b2feb
@ -1,10 +1,12 @@
|
||||
// Copyright (C) 2018 The Qt Company Ltd.
|
||||
// Copyright (C) 2018 Intel Corporation.
|
||||
// Copyright (C) 2024 Christoph Cullmann <christoph@cullmann.io>
|
||||
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
|
||||
#include "rcc.h"
|
||||
|
||||
#include <qbytearray.h>
|
||||
#include <qcryptographichash.h>
|
||||
#include <qdatetime.h>
|
||||
#include <qdebug.h>
|
||||
#include <qdir.h>
|
||||
@ -90,8 +92,28 @@ public:
|
||||
|
||||
QString resourceName() const;
|
||||
|
||||
struct DeduplicationKey {
|
||||
RCCResourceLibrary::CompressionAlgorithm compressAlgo;
|
||||
int compressLevel;
|
||||
int compressThreshold;
|
||||
QByteArray hash;
|
||||
|
||||
bool operator==(const DeduplicationKey &other) const
|
||||
{
|
||||
return compressAlgo == other.compressAlgo &&
|
||||
compressLevel == other.compressLevel &&
|
||||
compressThreshold == other.compressThreshold &&
|
||||
hash == other.hash;
|
||||
}
|
||||
};
|
||||
|
||||
typedef QMultiHash<DeduplicationKey, RCCFileInfo*> DeduplicationMultiHash;
|
||||
|
||||
public:
|
||||
qint64 writeDataBlob(RCCResourceLibrary &lib, qint64 offset, QString *errorMessage);
|
||||
qint64 writeDataBlob(RCCResourceLibrary &lib,
|
||||
qint64 offset,
|
||||
DeduplicationMultiHash &dedupByContent,
|
||||
QString *errorMessage);
|
||||
qint64 writeDataName(RCCResourceLibrary &, qint64 offset);
|
||||
void writeDataInfo(RCCResourceLibrary &lib);
|
||||
|
||||
@ -114,6 +136,11 @@ public:
|
||||
qint64 m_childOffset = 0;
|
||||
};
|
||||
|
||||
static size_t qHash(const RCCFileInfo::DeduplicationKey &key, size_t seed) noexcept
|
||||
{
|
||||
return qHashMulti(seed, key.compressAlgo, key.compressLevel, key.compressThreshold, key.hash);
|
||||
}
|
||||
|
||||
RCCFileInfo::RCCFileInfo(const QString &name, const QFileInfo &fileInfo, QLocale::Language language,
|
||||
QLocale::Territory territory, uint flags,
|
||||
RCCResourceLibrary::CompressionAlgorithm compressAlgo, int compressLevel,
|
||||
@ -217,8 +244,10 @@ void RCCFileInfo::writeDataInfo(RCCResourceLibrary &lib)
|
||||
}
|
||||
}
|
||||
|
||||
qint64 RCCFileInfo::writeDataBlob(RCCResourceLibrary &lib, qint64 offset,
|
||||
QString *errorMessage)
|
||||
qint64 RCCFileInfo::writeDataBlob(RCCResourceLibrary &lib,
|
||||
qint64 offset,
|
||||
DeduplicationMultiHash &dedupByContent,
|
||||
QString *errorMessage)
|
||||
{
|
||||
const bool text = lib.m_format == RCCResourceLibrary::C_Code;
|
||||
const bool pass1 = lib.m_format == RCCResourceLibrary::Pass1;
|
||||
@ -231,14 +260,38 @@ qint64 RCCFileInfo::writeDataBlob(RCCResourceLibrary &lib, qint64 offset,
|
||||
QByteArray data;
|
||||
|
||||
if (!m_isEmpty) {
|
||||
//find the data to be written
|
||||
QFile file(m_fileInfo.absoluteFilePath());
|
||||
// find the data to be written
|
||||
const QString absoluteFilePath = m_fileInfo.absoluteFilePath();
|
||||
QFile file(absoluteFilePath);
|
||||
if (!file.open(QFile::ReadOnly)) {
|
||||
*errorMessage = msgOpenReadFailed(m_fileInfo.absoluteFilePath(), file.errorString());
|
||||
*errorMessage = msgOpenReadFailed(absoluteFilePath, file.errorString());
|
||||
return 0;
|
||||
}
|
||||
|
||||
data = file.readAll();
|
||||
|
||||
// de-duplicate the same file content, we can re-use already written data
|
||||
// we only do that if we have the same compression settings
|
||||
const QByteArray hash = QCryptographicHash::hash(data, QCryptographicHash::Sha256);
|
||||
const DeduplicationKey key{m_compressAlgo, m_compressLevel, m_compressThreshold, hash};
|
||||
const QList<RCCFileInfo *> potentialCandidates = dedupByContent.values(key);
|
||||
for (const RCCFileInfo *candidate : potentialCandidates) {
|
||||
// check real content, we can have collisions
|
||||
QFile candidateFile(candidate->m_fileInfo.absoluteFilePath());
|
||||
if (!candidateFile.open(QFile::ReadOnly)) {
|
||||
*errorMessage = msgOpenReadFailed(candidate->m_fileInfo.absoluteFilePath(),
|
||||
candidateFile.errorString());
|
||||
return 0;
|
||||
}
|
||||
if (data != candidateFile.readAll()) {
|
||||
continue;
|
||||
}
|
||||
// just remember the offset & flags with final compression state
|
||||
// of the already written data and be done
|
||||
m_dataOffset = candidate->m_dataOffset;
|
||||
m_flags = candidate->m_flags;
|
||||
return offset;
|
||||
}
|
||||
dedupByContent.insert(key, this);
|
||||
}
|
||||
|
||||
// Check if compression is useful for this file
|
||||
@ -1168,6 +1221,7 @@ bool RCCResourceLibrary::writeDataBlobs()
|
||||
QStack<RCCFileInfo*> pending;
|
||||
pending.push(m_root);
|
||||
qint64 offset = 0;
|
||||
RCCFileInfo::DeduplicationMultiHash dedupByContent;
|
||||
QString errorMessage;
|
||||
while (!pending.isEmpty()) {
|
||||
RCCFileInfo *file = pending.pop();
|
||||
@ -1176,7 +1230,8 @@ bool RCCResourceLibrary::writeDataBlobs()
|
||||
if (child->m_flags & RCCFileInfo::Directory)
|
||||
pending.push(child);
|
||||
else {
|
||||
offset = child->writeDataBlob(*this, offset, &errorMessage);
|
||||
offset = child->writeDataBlob(*this, offset,
|
||||
dedupByContent, &errorMessage);
|
||||
if (offset == 0) {
|
||||
m_errorDevice->write(errorMessage.toUtf8());
|
||||
return false;
|
||||
|
157
tests/auto/tools/rcc/data/deduplication/deduplication.expected
Normal file
157
tests/auto/tools/rcc/data/deduplication/deduplication.expected
Normal file
@ -0,0 +1,157 @@
|
||||
/****************************************************************************
|
||||
** Resource object code
|
||||
**
|
||||
IGNORE:** Created by: The Resource Compiler for Qt version 6.9.0
|
||||
**
|
||||
** WARNING! All changes made in this file will be lost!
|
||||
*****************************************************************************/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// disable informational message "function ... selected for automatic inline expansion"
|
||||
#pragma warning (disable: 4711)
|
||||
#endif
|
||||
|
||||
static const unsigned char qt_resource_data[] = {
|
||||
// b.txt
|
||||
0x0,0x0,0x0,0xb,
|
||||
0x62,
|
||||
0x20,0x74,0x65,0x73,0x74,0x20,0x66,0x69,0x6c,0x65,
|
||||
// c_with_a_content.txt
|
||||
0x0,0x0,0x0,0xb,
|
||||
0x61,
|
||||
0x20,0x74,0x65,0x73,0x74,0x20,0x66,0x69,0x6c,0x65,
|
||||
// b.txt
|
||||
0x0,0x0,0x0,0xb,
|
||||
0x62,
|
||||
0x20,0x74,0x65,0x73,0x74,0x20,0x66,0x69,0x6c,0x65,
|
||||
|
||||
};
|
||||
|
||||
static const unsigned char qt_resource_name[] = {
|
||||
// files
|
||||
0x0,0x5,
|
||||
0x0,0x6d,0x2,0xc3,
|
||||
0x0,0x66,
|
||||
0x0,0x69,0x0,0x6c,0x0,0x65,0x0,0x73,
|
||||
// b.txt
|
||||
0x0,0x5,
|
||||
0x0,0x65,0x5b,0xf4,
|
||||
0x0,0x62,
|
||||
0x0,0x2e,0x0,0x74,0x0,0x78,0x0,0x74,
|
||||
// c_with_a_content.txt
|
||||
0x0,0x14,
|
||||
0x1,0x61,0x1d,0x34,
|
||||
0x0,0x63,
|
||||
0x0,0x5f,0x0,0x77,0x0,0x69,0x0,0x74,0x0,0x68,0x0,0x5f,0x0,0x61,0x0,0x5f,0x0,0x63,0x0,0x6f,0x0,0x6e,0x0,0x74,0x0,0x65,0x0,0x6e,0x0,0x74,0x0,0x2e,
|
||||
0x0,0x74,0x0,0x78,0x0,0x74,
|
||||
// a.txt
|
||||
0x0,0x5,
|
||||
0x0,0x64,0x5b,0xf4,
|
||||
0x0,0x61,
|
||||
0x0,0x2e,0x0,0x74,0x0,0x78,0x0,0x74,
|
||||
// alias_of_b_compress9.txt
|
||||
0x0,0x18,
|
||||
0xb,0x26,0xf,0xb4,
|
||||
0x0,0x61,
|
||||
0x0,0x6c,0x0,0x69,0x0,0x61,0x0,0x73,0x0,0x5f,0x0,0x6f,0x0,0x66,0x0,0x5f,0x0,0x62,0x0,0x5f,0x0,0x63,0x0,0x6f,0x0,0x6d,0x0,0x70,0x0,0x72,0x0,0x65,
|
||||
0x0,0x73,0x0,0x73,0x0,0x39,0x0,0x2e,0x0,0x74,0x0,0x78,0x0,0x74,
|
||||
// alias_of_b.txt
|
||||
0x0,0xe,
|
||||
0x1,0xa4,0x6d,0x34,
|
||||
0x0,0x61,
|
||||
0x0,0x6c,0x0,0x69,0x0,0x61,0x0,0x73,0x0,0x5f,0x0,0x6f,0x0,0x66,0x0,0x5f,0x0,0x62,0x0,0x2e,0x0,0x74,0x0,0x78,0x0,0x74,
|
||||
// alias_of_b_compress9_dupe.txt
|
||||
0x0,0x1d,
|
||||
0x9,0x4,0x7a,0x14,
|
||||
0x0,0x61,
|
||||
0x0,0x6c,0x0,0x69,0x0,0x61,0x0,0x73,0x0,0x5f,0x0,0x6f,0x0,0x66,0x0,0x5f,0x0,0x62,0x0,0x5f,0x0,0x63,0x0,0x6f,0x0,0x6d,0x0,0x70,0x0,0x72,0x0,0x65,
|
||||
0x0,0x73,0x0,0x73,0x0,0x39,0x0,0x5f,0x0,0x64,0x0,0x75,0x0,0x70,0x0,0x65,0x0,0x2e,0x0,0x74,0x0,0x78,0x0,0x74,
|
||||
|
||||
};
|
||||
|
||||
static const unsigned char qt_resource_struct[] = {
|
||||
// :
|
||||
0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x1,
|
||||
0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,
|
||||
// :/files
|
||||
0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x6,0x0,0x0,0x0,0x2,
|
||||
0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,
|
||||
// :/files/a.txt
|
||||
0x0,0x0,0x0,0x4e,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0xf,
|
||||
TIMESTAMP:files/a.txt
|
||||
// :/files/b.txt
|
||||
0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,
|
||||
TIMESTAMP:files/b.txt
|
||||
// :/files/c_with_a_content.txt
|
||||
0x0,0x0,0x0,0x20,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0xf,
|
||||
TIMESTAMP:files/c_with_a_content.txt
|
||||
// :/files/alias_of_b.txt
|
||||
0x0,0x0,0x0,0x94,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,
|
||||
TIMESTAMP:files/b.txt
|
||||
// :/files/alias_of_b_compress9_dupe.txt
|
||||
0x0,0x0,0x0,0xb6,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x1e,
|
||||
TIMESTAMP:files/b.txt
|
||||
// :/files/alias_of_b_compress9.txt
|
||||
0x0,0x0,0x0,0x5e,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x1e,
|
||||
TIMESTAMP:files/b.txt
|
||||
|
||||
};
|
||||
|
||||
#ifdef QT_NAMESPACE
|
||||
# define QT_RCC_PREPEND_NAMESPACE(name) ::QT_NAMESPACE::name
|
||||
# define QT_RCC_MANGLE_NAMESPACE0(x) x
|
||||
# define QT_RCC_MANGLE_NAMESPACE1(a, b) a##_##b
|
||||
# define QT_RCC_MANGLE_NAMESPACE2(a, b) QT_RCC_MANGLE_NAMESPACE1(a,b)
|
||||
# define QT_RCC_MANGLE_NAMESPACE(name) QT_RCC_MANGLE_NAMESPACE2( \
|
||||
QT_RCC_MANGLE_NAMESPACE0(name), QT_RCC_MANGLE_NAMESPACE0(QT_NAMESPACE))
|
||||
#else
|
||||
# define QT_RCC_PREPEND_NAMESPACE(name) name
|
||||
# define QT_RCC_MANGLE_NAMESPACE(name) name
|
||||
#endif
|
||||
|
||||
#if defined(QT_INLINE_NAMESPACE)
|
||||
inline namespace QT_NAMESPACE {
|
||||
#elif defined(QT_NAMESPACE)
|
||||
namespace QT_NAMESPACE {
|
||||
#endif
|
||||
|
||||
bool qRegisterResourceData(int, const unsigned char *, const unsigned char *, const unsigned char *);
|
||||
bool qUnregisterResourceData(int, const unsigned char *, const unsigned char *, const unsigned char *);
|
||||
|
||||
#ifdef QT_NAMESPACE
|
||||
}
|
||||
#endif
|
||||
|
||||
int QT_RCC_MANGLE_NAMESPACE(qInitResources)();
|
||||
int QT_RCC_MANGLE_NAMESPACE(qInitResources)()
|
||||
{
|
||||
int version = 3;
|
||||
QT_RCC_PREPEND_NAMESPACE(qRegisterResourceData)
|
||||
(version, qt_resource_struct, qt_resource_name, qt_resource_data);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int QT_RCC_MANGLE_NAMESPACE(qCleanupResources)();
|
||||
int QT_RCC_MANGLE_NAMESPACE(qCleanupResources)()
|
||||
{
|
||||
int version = 3;
|
||||
QT_RCC_PREPEND_NAMESPACE(qUnregisterResourceData)
|
||||
(version, qt_resource_struct, qt_resource_name, qt_resource_data);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wexit-time-destructors"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
struct initializer {
|
||||
initializer() { QT_RCC_MANGLE_NAMESPACE(qInitResources)(); }
|
||||
~initializer() { QT_RCC_MANGLE_NAMESPACE(qCleanupResources)(); }
|
||||
} dummy;
|
||||
}
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
10
tests/auto/tools/rcc/data/deduplication/deduplication.qrc
Normal file
10
tests/auto/tools/rcc/data/deduplication/deduplication.qrc
Normal file
@ -0,0 +1,10 @@
|
||||
<!DOCTYPE RCC><RCC version="1.0">
|
||||
<qresource>
|
||||
<file>files/a.txt</file>
|
||||
<file>files/b.txt</file>
|
||||
<file alias="files/alias_of_b.txt">files/b.txt</file>
|
||||
<file>files/c_with_a_content.txt</file>
|
||||
<file alias="files/alias_of_b_compress9.txt" compress="9">files/b.txt</file>
|
||||
<file alias="files/alias_of_b_compress9_dupe.txt" compress="9">files/b.txt</file>
|
||||
</qresource>
|
||||
</RCC>
|
1
tests/auto/tools/rcc/data/deduplication/files/a.txt
Normal file
1
tests/auto/tools/rcc/data/deduplication/files/a.txt
Normal file
@ -0,0 +1 @@
|
||||
a test file
|
1
tests/auto/tools/rcc/data/deduplication/files/b.txt
Normal file
1
tests/auto/tools/rcc/data/deduplication/files/b.txt
Normal file
@ -0,0 +1 @@
|
||||
b test file
|
@ -0,0 +1 @@
|
||||
a test file
|
@ -152,6 +152,11 @@ void tst_rcc::rcc_data()
|
||||
|
||||
QTest::newRow("legal") << m_dataPath + QLatin1StringView("/legal")
|
||||
<< "legal.qrc" << "rcc_legal.cpp";
|
||||
|
||||
if (sizeof(size_t) == 8) {
|
||||
const QString deduplicationPath = m_dataPath + QLatin1String("/deduplication");
|
||||
QTest::newRow("deduplication") << deduplicationPath << "deduplication.qrc" << "deduplication.expected";
|
||||
}
|
||||
}
|
||||
|
||||
static QStringList readLinesFromFile(const QString &fileName,
|
||||
|
Loading…
x
Reference in New Issue
Block a user