Skip to content

Commit 3cc9090

Browse files
committed
added command-line option --cpp-header-probe to probe headers and extension-less files for Emacs C++ marker [skip ci]
1 parent e5659cc commit 3cc9090

13 files changed

+298
-51
lines changed

cli/cmdlineparser.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,10 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
544544
}
545545
}
546546

547+
else if (std::strcmp(argv[i], "--cpp-header-probe") == 0) {
548+
mSettings.cppHeaderProbe = true;
549+
}
550+
547551
// Show --debug output after the first simplifications
548552
else if (std::strcmp(argv[i], "--debug") == 0 ||
549553
std::strcmp(argv[i], "--debug-normal") == 0)
@@ -887,6 +891,10 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
887891
return Result::Fail;
888892
}
889893

894+
else if (std::strcmp(argv[i], "--no-cpp-header-probe") == 0) {
895+
mSettings.cppHeaderProbe = false;
896+
}
897+
890898
// Write results in file
891899
else if (std::strncmp(argv[i], "--output-file=", 14) == 0)
892900
mSettings.outputFile = Path::simplifyPath(Path::fromNativeSeparators(argv[i] + 14));

lib/cppcheck.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ static void createDumpFile(const Settings& settings,
182182
case Standards::Language::None:
183183
{
184184
// TODO: error out on unknown language?
185-
const Standards::Language lang = Path::identify(filename);
185+
const Standards::Language lang = Path::identify(filename, settings.cppHeaderProbe);
186186
if (lang == Standards::Language::CPP)
187187
language = " language=\"cpp\"";
188188
else if (lang == Standards::Language::C)
@@ -420,7 +420,7 @@ unsigned int CppCheck::checkClang(const std::string &path)
420420
mErrorLogger.reportOut(std::string("Checking ") + path + " ...", Color::FgGreen);
421421

422422
// TODO: this ignores the configured language
423-
const bool isCpp = Path::identify(path) == Standards::Language::CPP;
423+
const bool isCpp = Path::identify(path, mSettings.cppHeaderProbe) == Standards::Language::CPP;
424424
const std::string langOpt = isCpp ? "-x c++" : "-x c";
425425
const std::string analyzerInfo = mSettings.buildDir.empty() ? std::string() : AnalyzerInformation::getAnalyzerInfoFile(mSettings.buildDir, path, emptyString);
426426
const std::string clangcmd = analyzerInfo + ".clang-cmd";
@@ -783,7 +783,7 @@ unsigned int CppCheck::checkFile(const std::string& filename, const std::string
783783
TokenList tokenlist(&mSettings);
784784
std::istringstream istr2(code);
785785
// TODO: asserts when file has unknown extension
786-
tokenlist.createTokens(istr2, Path::identify(*files.begin())); // TODO: check result?
786+
tokenlist.createTokens(istr2, Path::identify(*files.begin(), false)); // TODO: check result?
787787
executeRules("define", tokenlist);
788788
}
789789
#endif

lib/path.cpp

Lines changed: 98 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,17 @@
2020
#undef __STRICT_ANSI__
2121
#endif
2222

23+
#define LOG_EMACS_MARKER
24+
2325
#include "path.h"
2426
#include "utils.h"
2527

2628
#include <algorithm>
29+
#include <cstdio>
2730
#include <cstdlib>
31+
#ifdef LOG_EMACS_MARKER
32+
#include <iostream>
33+
#endif
2834
#include <sys/stat.h>
2935
#include <unordered_set>
3036
#include <utility>
@@ -235,7 +241,7 @@ bool Path::isCPP(const std::string &path)
235241
bool Path::acceptFile(const std::string &path, const std::set<std::string> &extra)
236242
{
237243
bool header = false;
238-
return (identify(path, &header) != Standards::Language::None && !header) || extra.find(getFilenameExtension(path)) != extra.end();
244+
return (identify(path, false, &header) != Standards::Language::None && !header) || extra.find(getFilenameExtension(path)) != extra.end();
239245
}
240246

241247
// cppcheck-suppress unusedFunction
@@ -245,13 +251,99 @@ bool Path::isHeader(const std::string &path)
245251
return startsWith(extension, ".h");
246252
}
247253

248-
Standards::Language Path::identify(const std::string &path, bool *header)
254+
static bool hasEmacsCppMarker(const char* path)
255+
{
256+
// TODO: identify is called three times for each file
257+
// Preprocessor::loadFiles() -> createDUI()
258+
// Preprocessor::preprocess() -> createDUI()
259+
// TokenList::createTokens() -> TokenList::determineCppC()
260+
#ifdef LOG_EMACS_MARKER
261+
std::cout << path << '\n';
262+
#endif
263+
264+
FILE *fp = fopen(path, "rt");
265+
if (!fp)
266+
return false;
267+
std::string buf(128, '\0');
268+
{
269+
// TODO: read the whole first line only
270+
const char * const res = fgets(const_cast<char*>(buf.data()), buf.size(), fp);
271+
fclose(fp);
272+
fp = nullptr;
273+
if (!res)
274+
return false; // failed to read file
275+
}
276+
// TODO: replace with regular expression
277+
const auto pos1 = buf.find("-*-");
278+
if (pos1 == std::string::npos)
279+
return false; // no start marker
280+
const auto pos_nl = buf.find_first_of("\r\n");
281+
if (pos_nl != std::string::npos && (pos_nl < pos1)) {
282+
#ifdef LOG_EMACS_MARKER
283+
std::cout << path << " - Emacs marker not on the first line" << '\n';
284+
#endif
285+
return false; // not on first line
286+
}
287+
const auto pos2 = buf.find("-*-", pos1 + 3);
288+
// TODO: make sure we have read the whole line before bailing out
289+
if (pos2 == std::string::npos) {
290+
#ifdef LOG_EMACS_MARKER
291+
std::cout << path << " - Emacs marker not terminated" << '\n';
292+
#endif
293+
return false; // no end marker
294+
}
295+
#ifdef LOG_EMACS_MARKER
296+
std::cout << "Emacs marker: '" << buf.substr(pos1, (pos2 + 3) - pos1) << "'" << '\n';
297+
#endif
298+
// TODO: support /* */ comments
299+
const std::string buf_trim = trim(buf); // trim whitespaces
300+
if (buf_trim[0] != '/' || buf_trim[1] != '/') {
301+
#ifdef LOG_EMACS_MARKER
302+
std::cout << path << " - Emacs marker not in a comment: '" << buf.substr(pos1, (pos2 + 3) - pos1) << "'" << '\n';
303+
#endif
304+
return false; // not a comment
305+
}
306+
307+
// there are more variations with lowercase and no whitespaces
308+
// -*- C++ -*-
309+
// -*- Mode: C++; -*-
310+
// -*- Mode: C++; c-basic-offset: 8 -*-
311+
std::string marker = trim(buf.substr(pos1 + 3, pos2 - pos1 - 3), " ;");
312+
// cut off additional attributes
313+
const auto pos_semi = marker.find(';');
314+
if (pos_semi != std::string::npos)
315+
marker.resize(pos_semi);
316+
findAndReplace(marker, "mode:", "");
317+
findAndReplace(marker, "Mode:", "");
318+
marker = trim(marker);
319+
if (marker == "C++" || marker == "c++")
320+
return true; // C++ marker found
321+
322+
//if (marker == "C" || marker == "c")
323+
// return false;
324+
#ifdef LOG_EMACS_MARKER
325+
std::cout << path << " - unmatched Emacs marker: '" << marker << "'" << '\n';
326+
#endif
327+
328+
return false; // marker is not a C++ one
329+
}
330+
331+
Standards::Language Path::identify(const std::string &path, bool cppHeaderProbe, bool *header)
249332
{
250333
// cppcheck-suppress uninitvar - TODO: FP
251334
if (header)
252335
*header = false;
253336

254337
std::string ext = getFilenameExtension(path);
338+
// standard library headers have no extension
339+
if (cppHeaderProbe && ext.empty()) {
340+
if (hasEmacsCppMarker(path.c_str())) {
341+
if (header)
342+
*header = true;
343+
return Standards::Language::CPP;
344+
}
345+
return Standards::Language::None;
346+
}
255347
if (ext == ".C")
256348
return Standards::Language::CPP;
257349
if (c_src_exts.find(ext) != c_src_exts.end())
@@ -262,7 +354,9 @@ Standards::Language Path::identify(const std::string &path, bool *header)
262354
if (ext == ".h") {
263355
if (header)
264356
*header = true;
265-
return Standards::Language::C; // treat as C for now
357+
if (cppHeaderProbe && hasEmacsCppMarker(path.c_str()))
358+
return Standards::Language::CPP;
359+
return Standards::Language::C;
266360
}
267361
if (cpp_src_exts.find(ext) != cpp_src_exts.end())
268362
return Standards::Language::CPP;
@@ -277,7 +371,7 @@ Standards::Language Path::identify(const std::string &path, bool *header)
277371
bool Path::isHeader2(const std::string &path)
278372
{
279373
bool header;
280-
(void)Path::identify(path, &header);
374+
(void)identify(path, false, &header);
281375
return header;
282376
}
283377

lib/path.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,10 +187,11 @@ class CPPCHECKLIB Path {
187187
/**
188188
* @brief Identify the language based on the file extension
189189
* @param path filename to check. path info is optional
190+
* @param cppHeaderProbe check optional Emacs marker to identify extension-less and *.h files as C++
190191
* @param header if provided indicates if the file is a header
191192
* @return the language type
192193
*/
193-
static Standards::Language identify(const std::string &path, bool *header = nullptr);
194+
static Standards::Language identify(const std::string &path, bool cppHeaderProbe, bool *header = nullptr);
194195

195196
/**
196197
* @brief Get filename without a directory path part.

lib/preprocessor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,7 @@ static simplecpp::DUI createDUI(const Settings &mSettings, const std::string &cf
684684
dui.includes = mSettings.userIncludes; // --include
685685
// TODO: use mSettings.standards.stdValue instead
686686
// TODO: error out on unknown language?
687-
const Standards::Language lang = Path::identify(filename);
687+
const Standards::Language lang = Path::identify(filename, mSettings.cppHeaderProbe);
688688
if (lang == Standards::Language::CPP) {
689689
dui.std = mSettings.standards.getCPP();
690690
splitcfg(mSettings.platform.getLimitsDefines(Standards::getCPP(dui.std)), dui.defines, "");

lib/settings.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ class CPPCHECKLIB WARN_UNUSED Settings {
167167
/** cppcheck.cfg: About text */
168168
std::string cppcheckCfgAbout;
169169

170+
/** @brief check Emacs marker to detect extension-less and *.h files as C++ */
171+
bool cppHeaderProbe{};
172+
170173
/** @brief Are we running from DACA script? */
171174
bool daca{};
172175

lib/tokenize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8052,7 +8052,8 @@ void Tokenizer::unmatchedToken(const Token *tok) const
80528052
void Tokenizer::syntaxErrorC(const Token *tok, const std::string &what) const
80538053
{
80548054
printDebugOutput(0);
8055-
throw InternalError(tok, "Code '"+what+"' is invalid C code. Use --std or --language to configure the language.", InternalError::SYNTAX);
8055+
// TODO
8056+
throw InternalError(tok, "Code '"+what+"' is invalid C code.\nUse --std or --language to configure the language or --cpp-header-probe to detect C++ headers via the Emacs marker.", InternalError::SYNTAX);
80568057
}
80578058

80588059
void Tokenizer::unknownMacroError(const Token *tok1) const

lib/tokenlist.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ void TokenList::determineCppC()
9696
// only try to determine if it wasn't enforced
9797
if (mLang == Standards::Language::None) {
9898
ASSERT_LANG(!getSourceFilePath().empty());
99-
mLang = Path::identify(getSourceFilePath());
99+
mLang = Path::identify(getSourceFilePath(), mSettings ? mSettings->cppHeaderProbe : false);
100100
// TODO: cannot enable assert as this might occur for unknown extensions
101101
//ASSERT_LANG(mLang != Standards::Language::None);
102102
if (mLang == Standards::Language::None) {

releasenotes.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ Deprecations:
1616
-
1717

1818
Other:
19+
- added command-line option `--cpp-header-probe` (and `--no-cpp-header-probe`) to probe headers and extension-less files for Emacs marker (see https://trac.cppcheck.net/ticket/10692 for more details)
1920
-

test/cli/other_test.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,3 +1428,31 @@ def test_filelist(tmpdir):
14281428
for i in range(1, len(expected)+1):
14291429
lines.remove('{}/11 files checked 0% done'.format(i, len(expected)))
14301430
assert lines == expected
1431+
1432+
1433+
def test_cpp_probe(tmpdir):
1434+
test_file = os.path.join(tmpdir, 'test.h')
1435+
with open(test_file, 'wt') as f:
1436+
f.writelines([
1437+
'class A {};'
1438+
])
1439+
1440+
args = ['-q', '--template=simple', '--cpp-header-probe', test_file]
1441+
err_lines = [
1442+
"{}:1:1: error: Code 'classA{{' is invalid C code. Use --std or --language to configure the language. [syntaxError]".format(test_file)
1443+
]
1444+
1445+
assert_cppcheck(args, ec_exp=0, err_exp=err_lines, out_exp=[])
1446+
1447+
1448+
def test_cpp_probe_2(tmpdir):
1449+
test_file = os.path.join(tmpdir, 'test.h')
1450+
with open(test_file, 'wt') as f:
1451+
f.writelines([
1452+
'// -*- C++ -*-',
1453+
'class A {};'
1454+
])
1455+
1456+
args = ['-q', '--template=simple', '--cpp-header-probe', test_file]
1457+
1458+
assert_cppcheck(args, ec_exp=0, err_exp=[], out_exp=[])

0 commit comments

Comments
 (0)