diff --git a/Makefile b/Makefile index 7489ec83..7597bc58 100644 --- a/Makefile +++ b/Makefile @@ -11,21 +11,32 @@ TEST_CPPFLAGS = -DSIMPLECPP_TEST_SOURCE_DIR=\"$(CURDIR)\" test.o: CPPFLAGS += $(TEST_CPPFLAGS) %.o: %.cpp simplecpp.h - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< $(LIB_FUZZING_ENGINE) + +fuzz_no.o: fuzz.cpp + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DNO_FUZZ -c -o $@ fuzz.cpp testrunner: test.o simplecpp.o - $(CXX) $(LDFLAGS) simplecpp.o test.o -o testrunner + $(CXX) $(LDFLAGS) -o $@ $^ test: testrunner simplecpp ./testrunner python3 run-tests.py python3 -m pytest integration_test.py -vv +fuzz: fuzz.o simplecpp.o + # TODO: use -stdlib=libc++ -lc++ + # make fuzz CXX=clang++ CXXOPTS="-O2 -fno-omit-frame-pointer -g -gline-tables-only -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=address,undefined -fsanitize-address-use-after-scope -fno-sanitize=integer -fno-sanitize-recover=undefined" LIB_FUZZING_ENGINE="-fsanitize=fuzzer" + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $^ $(LIB_FUZZING_ENGINE) + +no-fuzz: fuzz_no.o simplecpp.o + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $^ + selfcheck: simplecpp ./selfcheck.sh simplecpp: main.o simplecpp.o - $(CXX) $(LDFLAGS) main.o simplecpp.o -o simplecpp + $(CXX) $(LDFLAGS) -o $@ $^ clean: - rm -f testrunner simplecpp *.o + rm -f testrunner fuzz no-fuzz simplecpp *.o diff --git a/fuzz.cpp b/fuzz.cpp new file mode 100644 index 00000000..79e71186 --- /dev/null +++ b/fuzz.cpp @@ -0,0 +1,65 @@ +/* + * simplecpp - A simple and high-fidelity C/C++ preprocessor library + * Copyright (C) 2016-2024 simplecpp team + */ + +#include "simplecpp.h" + +#include + +#ifdef NO_FUZZ +#include +#include +#include +#include +#endif + +static void doProcess(const uint8_t *data, size_t dataSize) +{ + simplecpp::OutputList outputList; + std::vector files; + simplecpp::TokenList rawtokens(data, dataSize, files, "test.cpp", &outputList); + + simplecpp::TokenList outputTokens(files); + simplecpp::FileDataCache filedata; + const simplecpp::DUI dui; + std::list macroUsage; + std::list ifCond; + simplecpp::preprocess(outputTokens, rawtokens, files, filedata, dui, &outputList, ¯oUsage, &ifCond); + + simplecpp::cleanup(filedata); +} + +#ifndef NO_FUZZ +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t dataSize); + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t dataSize) +{ + doProcess(data, dataSize); + return 0; +} +#else +int main(int argc, char * argv[]) +{ + if (argc < 2 || argc > 3) + return EXIT_FAILURE; + + std::ifstream f(argv[1]); + if (!f.is_open()) + return EXIT_FAILURE; + + std::ostringstream oss; + oss << f.rdbuf(); + + if (!f.good()) + return EXIT_FAILURE; + + const int cnt = (argc == 3) ? std::stoi(argv[2]) : 1; + + const std::string code = oss.str(); + for (int i = 0; i < cnt; ++i) + doProcess(reinterpret_cast(code.data()), code.size()); + + return EXIT_SUCCESS; +} +#endif diff --git a/main.cpp b/main.cpp index a6d14386..2cb71827 100644 --- a/main.cpp +++ b/main.cpp @@ -121,6 +121,10 @@ int main(int argc, char **argv) std::cout << "error: could not open file '" << filename << "'" << std::endl; std::exit(1); } + if (!simplecpp::isFile(filename)) { + std::cout << "error: could not open file '" << filename << "' - not a regular file" << std::endl; + std::exit(1); + } rawtokens = new simplecpp::TokenList(f, files,filename,&outputList); } else { rawtokens = new simplecpp::TokenList(filename,files,&outputList); diff --git a/simplecpp.cpp b/simplecpp.cpp index 84e4b54b..820a0cd7 100644 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -42,8 +42,10 @@ #ifdef _WIN32 # include +using mode_t = unsigned short; #else # include +# include #endif static bool isHex(const std::string &s) @@ -678,33 +680,55 @@ void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, if (oldLastToken != cback()) { oldLastToken = cback(); - if (!isLastLinePreprocessor()) + const Token * const llTok = isLastLinePreprocessor(); + if (!llTok) continue; - const std::string lastline(lastLine()); - if (lastline == "# file %str%") { - const Token *strtok = cback(); - while (strtok->comment) - strtok = strtok->previous; - loc.push(location); - location.fileIndex = fileIndex(strtok->str().substr(1U, strtok->str().size() - 2U)); - location.line = 1U; - } else if (lastline == "# line %num%") { - const Token *numtok = cback(); - while (numtok->comment) - numtok = numtok->previous; - lineDirective(location.fileIndex, std::atol(numtok->str().c_str()), &location); - } else if (lastline == "# %num% %str%" || lastline == "# line %num% %str%") { - const Token *strtok = cback(); - while (strtok->comment) - strtok = strtok->previous; - const Token *numtok = strtok->previous; - while (numtok->comment) - numtok = numtok->previous; - lineDirective(fileIndex(replaceAll(strtok->str().substr(1U, strtok->str().size() - 2U),"\\\\","\\")), - std::atol(numtok->str().c_str()), &location); + const Token * const llNextToken = llTok->next; + if (!llTok->next) + continue; + if (llNextToken->next) { + // #file "file.c" + if (llNextToken->str() == "file" && + llNextToken->next->str()[0] == '\"') + { + const Token *strtok = cback(); + while (strtok->comment) + strtok = strtok->previous; + loc.push(location); + location.fileIndex = fileIndex(strtok->str().substr(1U, strtok->str().size() - 2U)); + location.line = 1U; + } + // #3 "file.c" + // #line 3 "file.c" + else if ((llNextToken->number && + llNextToken->next->str()[0] == '\"') || + (llNextToken->str() == "line" && + llNextToken->next->number && + llNextToken->next->next && + llNextToken->next->next->str()[0] == '\"')) + { + const Token *strtok = cback(); + while (strtok->comment) + strtok = strtok->previous; + const Token *numtok = strtok->previous; + while (numtok->comment) + numtok = numtok->previous; + lineDirective(fileIndex(replaceAll(strtok->str().substr(1U, strtok->str().size() - 2U),"\\\\","\\")), + std::atol(numtok->str().c_str()), &location); + } + // #line 3 + else if (llNextToken->str() == "line" && + llNextToken->next->number) + { + const Token *numtok = cback(); + while (numtok->comment) + numtok = numtok->previous; + lineDirective(location.fileIndex, std::atol(numtok->str().c_str()), &location); + } } // #endfile - else if (lastline == "# endfile" && !loc.empty()) { + else if (llNextToken->str() == "endfile" && !loc.empty()) + { location = loc.top(); loc.pop(); } @@ -1398,34 +1422,6 @@ std::string simplecpp::TokenList::readUntil(Stream &stream, const Location &loca return ret; } -std::string simplecpp::TokenList::lastLine(int maxsize) const -{ - std::string ret; - int count = 0; - for (const Token *tok = cback(); ; tok = tok->previous) { - if (!sameline(tok, cback())) { - break; - } - if (tok->comment) - continue; - if (++count > maxsize) - return ""; - if (!ret.empty()) - ret += ' '; - // add tokens in reverse for performance reasons - if (tok->str()[0] == '\"') - ret += "%rts%"; // %str% - else if (tok->number) - ret += "%mun%"; // %num% - else { - ret += tok->str(); - std::reverse(ret.end() - tok->str().length(), ret.end()); - } - } - std::reverse(ret.begin(), ret.end()); - return ret; -} - const simplecpp::Token* simplecpp::TokenList::lastLineTok(int maxsize) const { const Token* prevTok = nullptr; @@ -1442,10 +1438,12 @@ const simplecpp::Token* simplecpp::TokenList::lastLineTok(int maxsize) const return prevTok; } -bool simplecpp::TokenList::isLastLinePreprocessor(int maxsize) const +const simplecpp::Token* simplecpp::TokenList::isLastLinePreprocessor(int maxsize) const { const Token * const prevTok = lastLineTok(maxsize); - return prevTok && prevTok->op == '#'; + if (prevTok && prevTok->op == '#') + return prevTok; + return nullptr; } unsigned int simplecpp::TokenList::fileIndex(const std::string &filename) @@ -1693,7 +1691,9 @@ namespace simplecpp { nameTokDef = nametoken; variadic = false; variadicOpt = false; + delete optExpandValue; optExpandValue = nullptr; + delete optNoExpandValue; optNoExpandValue = nullptr; if (!nameTokDef) { valueToken = endToken = nullptr; @@ -2367,8 +2367,8 @@ namespace simplecpp { bool variadicOpt; /** Expansion value for varadic macros with __VA_OPT__ expanded and discarded respectively */ - const TokenList *optExpandValue; - const TokenList *optNoExpandValue; + const TokenList *optExpandValue = nullptr; + const TokenList *optNoExpandValue = nullptr; /** was the value of this macro actually defined in the code? */ bool valueDefinedInCode_; @@ -2977,9 +2977,11 @@ static std::string openHeaderDirect(std::ifstream &f, const std::string &path) if (nonExistingFilesCache.contains(path)) return ""; // file is known not to exist, skip expensive file open call #endif - f.open(path.c_str()); - if (f.is_open()) - return path; + if (simplecpp::isFile(path)) { + f.open(path.c_str()); + if (f.is_open()) + return path; + } #ifdef SIMPLECPP_WINDOWS nonExistingFilesCache.add(path); #endif @@ -3099,6 +3101,9 @@ bool simplecpp::FileDataCache::getFileId(const std::string &path, FileID &id) if (stat(path.c_str(), &statbuf) != 0) return false; + if ((statbuf.st_mode & S_IFMT) != S_IFREG) + return false; + id.dev = statbuf.st_dev; id.ino = statbuf.st_ino; @@ -3835,3 +3840,21 @@ std::string simplecpp::getCppStdString(const std::string &std) { return getCppStdString(getCppStd(std)); } + +static mode_t file_type(const std::string &path) +{ + struct stat file_stat; + if (stat(path.c_str(), &file_stat) == -1) + return 0; + return file_stat.st_mode & S_IFMT; +} + +bool simplecpp::isFile(const std::string &path) +{ + return file_type(path) == S_IFREG; +} + +bool simplecpp::isDirectory(const std::string &path) +{ + return file_type(path) == S_IFDIR; +} diff --git a/simplecpp.h b/simplecpp.h index ac367154..14aab376 100644 --- a/simplecpp.h +++ b/simplecpp.h @@ -365,9 +365,8 @@ namespace simplecpp { std::string readUntil(Stream &stream, const Location &location, char start, char end, OutputList *outputList); void lineDirective(unsigned int fileIndex, unsigned int line, Location *location); - std::string lastLine(int maxsize=1000) const; const Token* lastLineTok(int maxsize=1000) const; - bool isLastLinePreprocessor(int maxsize=1000) const; + const Token* isLastLinePreprocessor(int maxsize=1000) const; unsigned int fileIndex(const std::string &filename); @@ -408,6 +407,20 @@ namespace simplecpp { bool removeComments; /** remove comment tokens from included files */ }; + /** + * @brief Checks if given path is a file + * @param path Path to be checked + * @return true if given path is a file + */ + SIMPLECPP_LIB bool isFile(const std::string &path); + + /** + * @brief Checks if a given path is a directory + * @param path Path to be checked + * @return true if given path is a directory + */ + SIMPLECPP_LIB bool isDirectory(const std::string &path); + struct SIMPLECPP_LIB FileData { /** The canonical filename associated with this data */ std::string filename; diff --git a/test.cpp b/test.cpp index 0ecaa3b1..268f7499 100644 --- a/test.cpp +++ b/test.cpp @@ -2063,6 +2063,44 @@ static void missingHeader4() ASSERT_EQUALS("file0,1,syntax_error,No header in #include\n", toString(outputList)); } +#ifndef _WIN32 +static void missingHeader5() +{ + // this is a directory + const char code[] = "#include \"/\"\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,missing_header,Header not found: \"/\"\n", toString(outputList)); +} + +static void missingHeader6() +{ + // this is a directory + const char code[] = "#include \"/usr\"\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,missing_header,Header not found: \"/usr\"\n", toString(outputList)); +} + +static void missingHeader7() +{ + // this is a directory + const char code[] = "#include \n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,missing_header,Header not found: \n", toString(outputList)); +} + +static void missingHeader8() +{ + // this is a directory + const char code[] = "#include \n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,missing_header,Header not found: \n", toString(outputList)); +} +#endif + static void nestedInclude() { const char code[] = "#include \"test.h\"\n"; @@ -3231,6 +3269,16 @@ static void fuzz_crash() } } +static void leak() +{ + const char code[] = "#include\n" + "#include\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,missing_header,Header not found: \n" + "file0,2,missing_header,Header not found: \n", toString(outputList)); +} + int main(int argc, char **argv) { TEST_CASE(backslash); @@ -3404,6 +3452,12 @@ int main(int argc, char **argv) TEST_CASE(missingHeader2); TEST_CASE(missingHeader3); TEST_CASE(missingHeader4); +#ifndef _WIN32 + TEST_CASE(missingHeader5); + TEST_CASE(missingHeader6); + TEST_CASE(missingHeader7); + TEST_CASE(missingHeader8); +#endif TEST_CASE(nestedInclude); TEST_CASE(systemInclude); TEST_CASE(circularInclude); @@ -3487,5 +3541,7 @@ int main(int argc, char **argv) TEST_CASE(fuzz_crash); + TEST_CASE(leak); + return numberOfFailedAssertions > 0 ? EXIT_FAILURE : EXIT_SUCCESS; }