Rewrote qmake's #include-detection to be more faithful to CPP.

The C preprocessor allows backslash-newline anywhere and allows
comments anywhere it allows space.  Testing wilfully perverse
applications of that revealed qmake's parsing of #include directives
wasn't very robust.  So rework to actually follow the rules and add
those tests.

Change-Id: If5cc7bfb65f9994e9ab9ed216dd1ee7285c63934
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@theqtcompany.com>
This commit is contained in:
Edward Welbourne 2015-11-24 14:45:52 +01:00
parent d47baa7236
commit 214e271b93
6 changed files with 364 additions and 115 deletions

View File

@ -388,6 +388,40 @@ QFileInfo QMakeSourceFileInfo::findFileInfo(const QMakeLocalFileName &dep)
return QFileInfo(dep.real());
}
static int skipEscapedLineEnds(const char *buffer, int buffer_len, int offset, int *lines)
{
// Join physical lines to make logical lines, as in the C preprocessor
while (offset + 1 < buffer_len
&& buffer[offset] == '\\'
&& qmake_endOfLine(buffer[offset + 1])) {
offset += 2;
++*lines;
if (offset < buffer_len
&& buffer[offset - 1] == '\r'
&& buffer[offset] == '\n') // CRLF
offset++;
}
return offset;
}
static bool matchWhileUnsplitting(const char *buffer, int buffer_len, int start,
const char *needle, int needle_len,
int *matchlen, int *lines)
{
int x = start;
for (int n = 0; n < needle_len && x < buffer_len;
n++, x = skipEscapedLineEnds(buffer, buffer_len, x + 1, lines)) {
if (buffer[x] != needle[n])
return false;
}
// That also skipped any remaining BSNLs immediately after the match.
// Tell caller how long the match was:
*matchlen = x - start;
return true;
}
bool QMakeSourceFileInfo::findDeps(SourceFile *file)
{
if(file->dep_checked || file->type == TYPE_UNKNOWN)
@ -426,6 +460,18 @@ bool QMakeSourceFileInfo::findDeps(SourceFile *file)
file->deps = new SourceDependChildren;
int line_count = 1;
enum {
/*
States of C preprocessing (for TYPE_C only), after backslash-newline
elimination and skipping comments and spaces (i.e. in ANSI X3.159-1989
section 2.1.1.2's phase 4). We're about to study buffer[x] to decide
on which transition to do.
*/
AtStart, // start of logical line; a # may start a preprocessor directive
HadHash, // saw a # at start, looking for preprocessor keyword
WantName, // saw #include or #import, waiting for name
InCode // after directive, parsing non-#include directive or in actual code
} cpp_state = AtStart;
for(int x = 0; x < buffer_len; ++x) {
bool try_local = true;
@ -505,144 +551,232 @@ bool QMakeSourceFileInfo::findDeps(SourceFile *file)
++line_count;
} else if(file->type == QMakeSourceFileInfo::TYPE_QRC) {
} else if(file->type == QMakeSourceFileInfo::TYPE_C) {
for(int beginning=1; x < buffer_len; ++x) {
// We've studied all buffer[i] for i < x
for (; x < buffer_len; ++x) {
// How to handle backslash-newline (BSNL) pairs:
#define SKIP_BSNL(pos) skipEscapedLineEnds(buffer, buffer_len, (pos), &line_count)
// Seek code or directive, skipping comments and space:
for(; x < buffer_len; ++x) {
x = SKIP_BSNL(x);
if (buffer[x] == ' ' || buffer[x] == '\t') {
// keep going
} else if (buffer[x] == '/' && x + 1 < buffer_len &&
(buffer[x + 1] == '/' || buffer[x + 1] == '*')) {
++x;
if (buffer[x] == '/') { // C++-style comment
for (; x < buffer_len && !qmake_endOfLine(buffer[x]); ++x) {} // skip
beginning = 1;
} else { // C-style comment
} else if (buffer[x] == '/') {
int extralines = 0;
int y = skipEscapedLineEnds(buffer, buffer_len, x + 1, &extralines);
if (buffer[y] == '/') { // C++-style comment
line_count += extralines;
x = SKIP_BSNL(y + 1);
while (x < buffer_len && !qmake_endOfLine(buffer[x]))
x = SKIP_BSNL(x + 1); // skip
cpp_state = AtStart;
++line_count;
} else if (buffer[y] == '*') { // C-style comment
line_count += extralines;
x = y;
while (++x < buffer_len) {
x = SKIP_BSNL(x);
if (buffer[x] == '*') {
if (x + 1 < buffer_len && buffer[x + 1] == '/') {
++x; // skip '*'; for loop skips '/'.
extralines = 0;
y = skipEscapedLineEnds(buffer, buffer_len,
x + 1, &extralines);
if (y < buffer_len && buffer[y] == '/') {
line_count += extralines;
x = y; // for loop shall step past this
break;
}
} else if (qmake_endOfLine(buffer[x])) {
++line_count;
}
}
} else {
// buffer[x] is the division operator
break;
}
} else if (qmake_endOfLine(buffer[x])) {
++line_count;
beginning = 1;
cpp_state = AtStart;
} else {
/* Drop out of phases 1, 2, 3, into phase 4 */
break;
}
}
// Phase 4 study of buffer[x]:
if(x >= buffer_len)
break;
// preprocessor directive
if (beginning && buffer[x] == '#') {
// Advance to start of preprocessing directive
while (++x < buffer_len
&& (buffer[x] == ' ' || buffer[x] == '\t')) {} // skip
switch (cpp_state) {
case HadHash:
{
// Read keyword; buffer[x] starts first preprocessing token after #
const char *const keyword = buffer + x;
int clean = x;
while (x < buffer_len && buffer[x] >= 'a' && buffer[x] <= 'z') {
// skip over keyword, consolidating it if it contains BSNLs
// (see WantName's similar code consolidating inc, below)
if (clean < x)
buffer[clean++] = buffer[x];
else
clean++;
if (qmake_endOfLine(buffer[x])) {
++line_count;
beginning = 1;
continue;
x = SKIP_BSNL(x + 1);
}
const int keyword_len = buffer + clean - keyword;
x--; // Still need to study buffer[x] next time round for loop.
cpp_state =
((keyword_len == 7 && !strncmp(keyword, "include", 7)) // C & Obj-C
|| (keyword_len == 6 && !strncmp(keyword, "import", 6))) // Obj-C
? WantName : InCode;
break;
}
// quoted strings
if (buffer[x] == '\'' || buffer[x] == '"') {
// It might be a C++11 raw string.
bool israw = false;
if (buffer[x] == '"' && x > 0) {
int y = x;
while (--y > 0 && (buffer[y] == '8' || buffer[y] == 'u' || buffer[y] == 'U')) {} // skip
israw = (buffer[y] == 'R');
}
if (israw) {
x++;
const char *const delim = buffer + x;
while (x < buffer_len && buffer[x] != '(')
x++;
case WantName:
{
char term = buffer[x];
if (term == '<') {
try_local = false;
term = '>';
} else if (term != '"') {
/*
Not checking correctness (trust real compiler to do that):
- no controls, spaces, '(', ')', '\\' or (presumably) '"' in delim;
- at most 16 bytes in delim
*/
const int delimlen = buffer + x - delim;
while (++x < buffer_len
&& (buffer[x] != ')'
|| (delimlen > 0 &&
strncmp(buffer + x + 1, delim, delimlen))
|| buffer[x + 1 + delimlen] != '"')) {} // skip
// buffer[x] is ')'
x += 1 + delimlen; // 1 for ')', then delim
// buffer[x] is '"'
} else {
const char term = buffer[x];
while (++x < buffer_len && buffer[x] != term) {
if (buffer[x] == '\\')
++x;
else if (qmake_endOfLine(buffer[x]))
++line_count;
}
Possibly malformed, but this may be something like:
#include IDENTIFIER
which does work, if #define IDENTIFIER "filename" is
in effect. This is beyond this noddy preprocessor's
powers of tracking. So give up and resume searching
for a directive. We haven't made sense of buffer[x],
so back up to ensure we do study it (now as code) next
time round the loop.
*/
x--;
cpp_state = InCode;
continue;
}
// for loop's ++x shall step over the closing quote.
x = SKIP_BSNL(x + 1);
inc = buffer + x;
int clean = x; // offset if we need to clear \-newlines
for (; x < buffer_len && buffer[x] != term; x = SKIP_BSNL(x + 1)) {
if (qmake_endOfLine(buffer[x])) { // malformed
cpp_state = AtStart;
++line_count;
break;
}
/*
If we do skip any BSNLs, we need to consolidate the
surviving text by copying to lower indices. For that
to be possible, we also have to keep 'clean' advanced
in step with x even when we've yet to see any BSNLs.
*/
if (clean < x)
buffer[clean++] = buffer[x];
else
clean++;
}
if (cpp_state == WantName)
buffer[clean] = '\0';
else // i.e. malformed
inc = 0;
cpp_state = InCode; // hereafter
break;
}
beginning = 0;
case AtStart:
// Preprocessor directive?
if (buffer[x] == '#') {
cpp_state = HadHash;
break;
}
cpp_state = InCode;
// ... and fall through to handle buffer[x] as such.
case InCode:
// matching quotes (string literals and character literals)
if (buffer[x] == '\'' || buffer[x] == '"') {
// It might be a C++11 raw string.
bool israw = false;
if (buffer[x] == '"' && x > 0) {
int y = x - 1;
while (y > 0 && buffer[y] != 'R') {
if (buffer[y] == '8' || buffer[y] == 'u' || buffer[y] == 'U')
y--;
else if (y > 1 && qmake_endOfLine(buffer[y])
&& buffer[y - 1] == '\\')
y -= 2;
else if (y > 2 && buffer[y] == '\n'
&& buffer[y - 1] == '\r'
&& buffer[y - 2] == '\\')
y -= 3;
else
break;
}
israw = (buffer[y] == 'R');
}
if (israw) {
x = SKIP_BSNL(x + 1);
const char *const delim = buffer + x;
int clean = x;
while (x < buffer_len && buffer[x] != '(') {
if (clean < x)
buffer[clean++] = buffer[x];
else
clean++;
x = SKIP_BSNL(x + 1);
}
/*
Not checking correctness (trust real compiler to do that):
- no controls, spaces, '(', ')', '\\' or (presumably) '"' in delim;
- at most 16 bytes in delim
Raw strings are surely defined after phase 2, when
BSNLs are resolved; so the delimiter's exclusion
of '\\' and space (including newlines) applies too
late to save us the need to cope with BSNLs in it.
*/
const int delimlen = buffer + clean - delim;
int matchlen = delimlen, extralines = 0;
while ((x = SKIP_BSNL(x + 1)) < buffer_len
&& (buffer[x] != ')'
|| (delimlen > 0 &&
!matchWhileUnsplitting(buffer, buffer_len,
x + 1, delim, delimlen,
&matchlen, &extralines))
|| buffer[x + 1 + matchlen] != '"')) {
// skip, but keep track of lines
if (qmake_endOfLine(buffer[x]))
++line_count;
extralines = 0;
}
line_count += extralines; // from the match
// buffer[x] is ')'
x += 1 + matchlen; // 1 for ')', then delim
// buffer[x] is '"'
} else {
const char term = buffer[x];
while (++x < buffer_len && buffer[x] != term) {
if (buffer[x] == '\\')
++x;
else if (qmake_endOfLine(buffer[x]))
++line_count;
}
}
// for loop's ++x shall step over the closing quote.
}
// else: buffer[x] is just some code; move on.
break;
}
if (inc) // We were in WantName and found a name.
break;
#undef SKIP_BSNL
}
if(x >= buffer_len)
break;
// Got a preprocessor directive
const char *const keyword = buffer + x;
for (;
x < buffer_len && buffer[x] >= 'a' && buffer[x] <= 'z';
x++) {} // skip over identifier
int keyword_len = buffer + x - keyword;
for (;
x < buffer_len && (buffer[x] == ' ' || buffer[x] == '\t');
x++) {} // skip spaces after keyword
/* Keyword with nothing after it, e.g. #endif: not interesting. */
if (qmake_endOfLine(buffer[x]))
keyword_len = 0;
if((keyword_len == 7 && !strncmp(keyword, "include", 7)) // C & Obj-C
|| (keyword_len == 6 && !strncmp(keyword, "import", 6))) { // Obj-C
char term = buffer[x];
if(term == '<') {
try_local = false;
term = '>';
} else if(term != '"') { //wtf?
continue;
}
x++;
inc = buffer + x;
for (;
buffer[x] != term && !qmake_endOfLine(buffer[x]);
++x) {} // skip until end of include name
buffer[x] = '\0';
} else if (buffer[x] == '\'' || buffer[x] == '"') {
const char term = buffer[x++];
while(x < buffer_len) {
if (buffer[x] == term)
break;
if (buffer[x] == '\\') {
x+=2;
} else {
if (qmake_endOfLine(buffer[x]))
++line_count;
++x;
}
}
} else {
--x;
}
}
if(inc) {

View File

@ -1,4 +1,5 @@
DESTDIR = ./
gcc: QMAKE_CXXFLAGS += -Wno-comment
HEADERS += object1.h \
object2.h \
@ -8,5 +9,7 @@ HEADERS += object1.h \
object6.h \
object7.h \
object8.h \
object9.h
object9.h \
objecta.h \
objectf.h
SOURCES += main.cpp needed.cpp

View File

@ -39,23 +39,52 @@
static const char text[] = "lorem ""ipsum /*";
#include <moc_object1.cpp>
/**/ #include <moc_object2.cpp>
/**//**/ #include <moc_object3.cpp>
/*'*/ #include <moc_object4.cpp>
/*
*/ #include <moc_object5.cpp>
//
/**/ #include "\
moc_object2.cpp\
"
/**//**/ #include <moc_\
o\
b\
j\
e\
c\
t\
3\
.cpp>
/*'"*/ #include <moc_object4.cpp>
/*"'
*/ #include <moc_object5.cpp> /*
#include "missing.cpp"
*/// a backslash newline does make the next line part of this comment \
/* so this text is in last line's C++-style comment, not a C-comment !
#include <moc_object6.cpp>
#if 0
#pragma "ignore me" '&' L"me"
#line 4321 "main.cpp" more /* preprocessing */ tokens
#endif
static void function1();
#include <moc_object7.cpp>
#include/* every comment
gets replaced (in phase 3) by a single
space */<moc_object7.cpp>
static void function2(); /**/
#include <moc_object8.cpp>
#include \
<moc_object8.cpp>
static void function3(); //
#include <moc_object9.cpp>
/* backslash-newline elimination happens in phase 2 *\
/ # /* and that's valid here, too. *\
/ include/* and, of course, here *\
/<moc_objecta.cpp>// while we're here, ... \
#include "needed.cpp"
int main () {
extern int needed(void);
return needed();
}
/*
Deliberately end file in a #include, with nothing after it but the mandatory
(unescaped) newline at the end of every source file.
*/
#include "moc_objectf.cpp"

View File

@ -0,0 +1,41 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the test suite of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL21$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <QObject>
class ObjectA : public QObject
{
Q_OBJECT
};

View File

@ -0,0 +1,41 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the test suite of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL21$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <QObject>
class ObjectF : public QObject
{
Q_OBJECT
};

View File

@ -31,7 +31,8 @@
**
****************************************************************************/
static const char raw[] = R"blah(lorem " ipsum /*)blah";
static const char raw[] = R"blah(lorem " ipsum /*)blah"\
;
#include <moc_object1.cpp>
int main () { return 0; }