QUrl effective TLDs: update table and split into chunks of 64K

The table is there to know which domains are allowed to set cookies
and which are not. There are more than 2000 new entries since the
list has last been generated.
The split to 64K chunks was made because this is the hard limit for
strings in Visual Studio.

Change-Id: I511aec062af673555e9a69442c055f75bdcd1606
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Peter Hartmann 2013-02-08 17:48:05 +01:00 committed by The Qt Project
parent 2e9caa8942
commit 916f0ff663
5 changed files with 9763 additions and 6254 deletions

View File

@ -51,12 +51,22 @@ QT_BEGIN_NAMESPACE
static bool containsTLDEntry(const QString &entry)
{
int index = qt_hash(entry) % tldCount;
int currentDomainIndex = tldIndices[index];
while (currentDomainIndex < tldIndices[index+1]) {
QString currentEntry = QString::fromUtf8(tldData + currentDomainIndex);
// select the right chunk from the big table
short chunk = 0;
uint chunkIndex = tldIndices[index], offset = 0;
while (tldIndices[index] >= tldChunks[chunk] && chunk < tldChunkCount) {
chunkIndex -= tldChunks[chunk];
offset += tldChunks[chunk];
chunk++;
}
// check all the entries from the given index
while (chunkIndex < tldIndices[index+1] - offset) {
QString currentEntry = QString::fromUtf8(tldData[chunk] + chunkIndex);
if (currentEntry == entry)
return true;
currentDomainIndex += qstrlen(tldData + currentDomainIndex) + 1; // +1 for the ending \0
chunkIndex += qstrlen(tldData[chunk] + chunkIndex) + 1; // +1 for the ending \0
}
return false;
}

File diff suppressed because it is too large Load Diff

View File

@ -2809,6 +2809,12 @@ void tst_QUrl::effectiveTLDs_data()
QTest::newRow("yes6") << QUrl::fromEncoded("http://www.com.com.cn") << ".com.cn";
QTest::newRow("yes7") << QUrl::fromEncoded("http://www.test.org.ws") << ".org.ws";
QTest::newRow("yes9") << QUrl::fromEncoded("http://www.com.co.uk.wallonie.museum") << ".wallonie.museum";
QTest::newRow("yes10") << QUrl::fromEncoded("http://www.com.evje-og-hornnes.no") << ".evje-og-hornnes.no";
QTest::newRow("yes11") << QUrl::fromEncoded("http://www.bla.kamijima.ehime.jp") << ".kamijima.ehime.jp";
QTest::newRow("yes12") << QUrl::fromEncoded("http://www.bla.kakuda.miyagi.jp") << ".kakuda.miyagi.jp";
QTest::newRow("yes13") << QUrl::fromEncoded("http://mypage.betainabox.com") << ".betainabox.com";
QTest::newRow("yes14") << QUrl::fromEncoded("http://mypage.rhcloud.com") << ".rhcloud.com";
QTest::newRow("yes15") << QUrl::fromEncoded("http://mypage.int.az") << ".int.az";
}
void tst_QUrl::effectiveTLDs()

View File

@ -451,7 +451,7 @@ void tst_QNetworkCookieJar::effectiveTLDs_data()
QTest::newRow("yes-wildcard1.5") << "anything.jm" << true;
QTest::newRow("yes-wildcard2") << "something.kh" << true;
QTest::newRow("yes-wildcard3") << "whatever.uk" << true;
QTest::newRow("yes-wildcard4") << "anything.shizuoka.jp" << true;
QTest::newRow("yes-wildcard4") << "anything.sendai.jp" << true;
QTest::newRow("yes-wildcard5") << "foo.sch.uk" << true;
}

View File

@ -121,12 +121,14 @@ int main(int argc, char **argv) {
outIndicesBuffer.write("static const quint16 tldCount = ");
outIndicesBuffer.write(QByteArray::number(lineCount));
outIndicesBuffer.write(";\n");
outIndicesBuffer.write("static const quint16 tldIndices[");
outIndicesBuffer.write("static const quint32 tldIndices[");
// outIndicesBuffer.write(QByteArray::number(lineCount+1)); // not needed
outIndicesBuffer.write("] = {\n");
int utf8Size = 0;
// int charSize = 0;
int totalUtf8Size = 0;
int chunkSize = 0;
int stringUtf8Size = 0;
QStringList chunks;
for (int a = 0; a < lineCount; a++) {
bool lineIsEmpty = strings.at(a).isEmpty();
if (!lineIsEmpty) {
@ -136,25 +138,41 @@ int main(int argc, char **argv) {
int zeroCount = strings.at(a).count(QLatin1String("\\0"));
int utf8CharsCount = strings.at(a).count(QLatin1String("\\x"));
int quoteCount = strings.at(a).count('"');
stringUtf8Size = strings.at(a).count() - (zeroCount + quoteCount + utf8CharsCount * 3);
chunkSize += stringUtf8Size;
if (chunkSize > 65535) {
static int chunkCount = 0;
qWarning() << "chunk" << ++chunkCount << "has length" << chunkSize - stringUtf8Size;
outDataBuffer.write(",\n\n");
chunks.append(QByteArray::number(totalUtf8Size));
chunkSize = 0;
}
outDataBuffer.write(strings.at(a).toUtf8());
if (!lineIsEmpty)
outDataBuffer.write("\n");
outIndicesBuffer.write(QByteArray::number(utf8Size));
outIndicesBuffer.write(QByteArray::number(totalUtf8Size));
outIndicesBuffer.write(",\n");
utf8Size += strings.at(a).count() - (zeroCount + quoteCount + utf8CharsCount * 3);
// charSize += strings.at(a).count();
totalUtf8Size += stringUtf8Size;
}
outIndicesBuffer.write(QByteArray::number(utf8Size));
chunks.append(QByteArray::number(totalUtf8Size));
outIndicesBuffer.write(QByteArray::number(totalUtf8Size));
outIndicesBuffer.write("};\n");
outIndicesBuffer.close();
outFile.write(outIndicesBufferBA);
outDataBuffer.close();
outFile.write("\nstatic const char tldData[");
outFile.write("\nstatic const char *tldData[");
// outFile.write(QByteArray::number(charSize)); // not needed
outFile.write("] = {\n");
outFile.write(outDataBufferBA);
outFile.write("};\n");
// write chunk information
outFile.write("\nstatic const quint16 tldChunkCount = ");
outFile.write(QByteArray::number(chunks.count()));
outFile.write(";\nstatic const quint32 tldChunks[] = {");
outFile.write(chunks.join(", ").toLatin1());
outFile.write("};\n");
outFile.close();
printf("data generated to %s . Now copy the data from this file to src/corelib/io/qurltlds_p.h in your Qt repo\n", argv[2]);
exit(0);