diff --git a/llvm/tools/llvm-advisor/CMakeLists.txt b/llvm/tools/llvm-advisor/CMakeLists.txt new file mode 100644 index 0000000000000..d2389bdd1e0fa --- /dev/null +++ b/llvm/tools/llvm-advisor/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.18) + +set(LLVM_TOOL_LLVM_ADVISOR_BUILD_DEFAULT ON) +set(LLVM_REQUIRE_EXE_NAMES llvm-advisor) + +add_subdirectory(src) + +# Set the executable name +set_target_properties(llvm-advisor PROPERTIES + OUTPUT_NAME llvm-advisor) + +# Install the binary +install(TARGETS llvm-advisor + RUNTIME DESTINATION bin + COMPONENT llvm-advisor) diff --git a/llvm/tools/llvm-advisor/config/config.json b/llvm/tools/llvm-advisor/config/config.json new file mode 100644 index 0000000000000..9e94a41ff46c4 --- /dev/null +++ b/llvm/tools/llvm-advisor/config/config.json @@ -0,0 +1,7 @@ +{ + "outputDir": ".llvm-advisor", + "verbose": false, + "keepTemps": false, + "runProfiler": true, + "timeout": 60 +} diff --git a/llvm/tools/llvm-advisor/src/CMakeLists.txt b/llvm/tools/llvm-advisor/src/CMakeLists.txt new file mode 100644 index 0000000000000..81088f8231625 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/CMakeLists.txt @@ -0,0 +1,35 @@ +# Gather all .cpp sources in this directory tree +file(GLOB_RECURSE LLVM_ADVISOR_SOURCES CONFIGURE_DEPENDS + ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp +) + +# Define the executable target +add_llvm_tool(llvm-advisor + ${LLVM_ADVISOR_SOURCES} +) + +# Link required LLVM libraries +target_link_libraries(llvm-advisor PRIVATE + LLVMSupport + LLVMCore + LLVMIRReader + LLVMBitWriter + LLVMRemarks + LLVMProfileData +) + +# Set include directories +target_include_directories(llvm-advisor PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) + +# Install the Python view module alongside the binary +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../view/ + DESTINATION ${CMAKE_INSTALL_BINDIR}/view + FILES_MATCHING + PATTERN "*.py" + PATTERN "*.html" + PATTERN "*.css" + PATTERN "*.js" + PATTERN "*.md" +) diff --git a/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.cpp b/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.cpp new file mode 100644 index 0000000000000..69f1e3d52702e --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.cpp @@ -0,0 +1,64 @@ +#include "AdvisorConfig.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +namespace llvm { +namespace advisor { + +AdvisorConfig::AdvisorConfig() { + // Use relative path as default, will be resolved by CompilationManager + OutputDir_ = ".llvm-advisor"; +} + +Expected AdvisorConfig::loadFromFile(const std::string &path) { + auto BufferOrError = MemoryBuffer::getFile(path); + if (!BufferOrError) { + return createStringError(BufferOrError.getError(), + "Cannot read config file"); + } + + auto Buffer = std::move(*BufferOrError); + Expected JsonOrError = json::parse(Buffer->getBuffer()); + if (!JsonOrError) { + return JsonOrError.takeError(); + } + + auto &Json = *JsonOrError; + auto *Obj = Json.getAsObject(); + if (!Obj) { + return createStringError(std::make_error_code(std::errc::invalid_argument), + "Config file must contain JSON object"); + } + + if (auto outputDirOpt = Obj->getString("outputDir"); outputDirOpt) { + OutputDir_ = outputDirOpt->str(); + } + + if (auto verboseOpt = Obj->getBoolean("verbose"); verboseOpt) { + Verbose_ = *verboseOpt; + } + + if (auto keepTempsOpt = Obj->getBoolean("keepTemps"); keepTempsOpt) { + KeepTemps_ = *keepTempsOpt; + } + + if (auto runProfileOpt = Obj->getBoolean("runProfiler"); runProfileOpt) { + RunProfiler_ = *runProfileOpt; + } + + if (auto timeoutOpt = Obj->getInteger("timeout"); timeoutOpt) { + TimeoutSeconds_ = static_cast(*timeoutOpt); + } + + return true; +} + +std::string AdvisorConfig::getToolPath(const std::string &tool) const { + // For now, just return the tool name and rely on PATH + return tool; +} + +} // namespace advisor +} // namespace llvm diff --git a/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.h b/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.h new file mode 100644 index 0000000000000..b7f553fddbb23 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.h @@ -0,0 +1,41 @@ +#ifndef LLVM_ADVISOR_CONFIG_H +#define LLVM_ADVISOR_CONFIG_H + +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace advisor { + +class AdvisorConfig { +public: + AdvisorConfig(); + + Expected loadFromFile(const std::string &path); + + void setOutputDir(const std::string &dir) { OutputDir_ = dir; } + void setVerbose(bool verbose) { Verbose_ = verbose; } + void setKeepTemps(bool keep) { KeepTemps_ = keep; } + void setRunProfiler(bool run) { RunProfiler_ = run; } + void setTimeout(int seconds) { TimeoutSeconds_ = seconds; } + + const std::string &getOutputDir() const { return OutputDir_; } + bool getVerbose() const { return Verbose_; } + bool getKeepTemps() const { return KeepTemps_; } + bool getRunProfiler() const { return RunProfiler_; } + int getTimeout() const { return TimeoutSeconds_; } + + std::string getToolPath(const std::string &tool) const; + +private: + std::string OutputDir_; + bool Verbose_ = false; + bool KeepTemps_ = false; + bool RunProfiler_ = true; + int TimeoutSeconds_ = 60; +}; + +} // namespace advisor +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-advisor/src/Core/BuildContext.h b/llvm/tools/llvm-advisor/src/Core/BuildContext.h new file mode 100644 index 0000000000000..4f40c37ca8706 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Core/BuildContext.h @@ -0,0 +1,52 @@ +#ifndef LLVM_ADVISOR_BUILD_CONTEXT_H +#define LLVM_ADVISOR_BUILD_CONTEXT_H + +#include +#include +#include + +namespace llvm { +namespace advisor { + +enum class BuildPhase { + Unknown, + Preprocessing, + Compilation, + Assembly, + Linking, + Archiving, + CMakeConfigure, + CMakeBuild, + MakefileBuild +}; + +enum class BuildTool { + Unknown, + Clang, + GCC, + LLVM_Tools, + CMake, + Make, + Ninja, + Linker, + Archiver +}; + +struct BuildContext { + BuildPhase phase; + BuildTool tool; + std::string workingDirectory; + std::string outputDirectory; + std::vector inputFiles; + std::vector outputFiles; + std::vector expectedGeneratedFiles; + std::map metadata; + bool hasOffloading = false; + bool hasDebugInfo = false; + bool hasOptimization = false; +}; + +} // namespace advisor +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-advisor/src/Core/BuildExecutor.cpp b/llvm/tools/llvm-advisor/src/Core/BuildExecutor.cpp new file mode 100644 index 0000000000000..a4af5a660c80e --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Core/BuildExecutor.cpp @@ -0,0 +1,109 @@ +#include "BuildExecutor.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace advisor { + +BuildExecutor::BuildExecutor(const AdvisorConfig &config) : config_(config) {} + +Expected BuildExecutor::execute(const std::string &compiler, + const std::vector &args, + BuildContext &buildContext, + const std::string &tempDir) { + auto instrumentedArgs = instrumentCompilerArgs(args, buildContext, tempDir); + + auto compilerPath = sys::findProgramByName(compiler); + if (!compilerPath) { + return createStringError( + std::make_error_code(std::errc::no_such_file_or_directory), + "Compiler not found: " + compiler); + } + + std::vector execArgs; + execArgs.push_back(compiler); + for (const auto &arg : instrumentedArgs) { + execArgs.push_back(arg); + } + + if (config_.getVerbose()) { + outs() << "Executing: " << compiler; + for (const auto &arg : instrumentedArgs) { + outs() << " " << arg; + } + outs() << "\n"; + } + + return sys::ExecuteAndWait(*compilerPath, execArgs); +} + +std::vector +BuildExecutor::instrumentCompilerArgs(const std::vector &args, + BuildContext &buildContext, + const std::string &tempDir) { + + std::vector result = args; + std::set existingFlags; + + // Scan existing flags to avoid duplication + for (const auto &arg : args) { + if (arg.find("-g") == 0) + existingFlags.insert("debug"); + if (arg.find("-fsave-optimization-record") != std::string::npos) + existingFlags.insert("remarks"); + if (arg.find("-fprofile-instr-generate") != std::string::npos) + existingFlags.insert("profile"); + } + + // Add debug info if not present + if (existingFlags.find("debug") == existingFlags.end()) { + result.push_back("-g"); + } + + // Add optimization remarks with proper redirection + if (existingFlags.find("remarks") == existingFlags.end()) { + result.push_back("-fsave-optimization-record"); + result.push_back("-foptimization-record-file=" + tempDir + + "/remarks.opt.yaml"); + buildContext.expectedGeneratedFiles.push_back(tempDir + + "/remarks.opt.yaml"); + } else { + // If user already specified remarks, find and redirect the file + bool foundFileFlag = false; + for (auto &arg : result) { + if (arg.find("-foptimization-record-file=") != std::string::npos) { + // Extract filename and redirect to temp + StringRef existingPath = StringRef(arg).substr(26); + StringRef filename = sys::path::filename(existingPath); + arg = "-foptimization-record-file=" + tempDir + "/" + filename.str(); + buildContext.expectedGeneratedFiles.push_back(tempDir + "/" + + filename.str()); + foundFileFlag = true; + break; + } + } + // If no explicit file specified, add our own + if (!foundFileFlag) { + result.push_back("-foptimization-record-file=" + tempDir + + "/remarks.opt.yaml"); + buildContext.expectedGeneratedFiles.push_back(tempDir + + "/remarks.opt.yaml"); + } + } + + // Add profiling if enabled and not present, redirect to temp directory + if (config_.getRunProfiler() && + existingFlags.find("profile") == existingFlags.end()) { + result.push_back("-fprofile-instr-generate=" + tempDir + + "/profile.profraw"); + result.push_back("-fcoverage-mapping"); + buildContext.expectedGeneratedFiles.push_back(tempDir + "/profile.profraw"); + } + + return result; +} + +} // namespace advisor +} // namespace llvm diff --git a/llvm/tools/llvm-advisor/src/Core/BuildExecutor.h b/llvm/tools/llvm-advisor/src/Core/BuildExecutor.h new file mode 100644 index 0000000000000..a77ffd70c9b57 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Core/BuildExecutor.h @@ -0,0 +1,34 @@ +#ifndef LLVM_ADVISOR_BUILD_EXECUTOR_H +#define LLVM_ADVISOR_BUILD_EXECUTOR_H + +#include "../Config/AdvisorConfig.h" +#include "BuildContext.h" +#include "llvm/Support/Error.h" +#include +#include +#include + +namespace llvm { +namespace advisor { + +class BuildExecutor { +public: + BuildExecutor(const AdvisorConfig &config); + + Expected execute(const std::string &compiler, + const std::vector &args, + BuildContext &buildContext, const std::string &tempDir); + +private: + std::vector + instrumentCompilerArgs(const std::vector &args, + BuildContext &buildContext, + const std::string &tempDir); + + const AdvisorConfig &config_; +}; + +} // namespace advisor +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.cpp b/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.cpp new file mode 100644 index 0000000000000..3192c42669e65 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.cpp @@ -0,0 +1,167 @@ +#include "CommandAnalyzer.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" + +namespace llvm { +namespace advisor { + +CommandAnalyzer::CommandAnalyzer(const std::string &command, + const std::vector &args) + : command_(command), args_(args) {} + +BuildContext CommandAnalyzer::analyze() const { + BuildContext context; + SmallString<256> cwd; + sys::fs::current_path(cwd); + context.workingDirectory = cwd.str().str(); + + context.tool = detectBuildTool(); + context.phase = detectBuildPhase(context.tool); + context.inputFiles = extractInputFiles(); + context.outputFiles = extractOutputFiles(); + detectBuildFeatures(context); + + return context; +} + +BuildTool CommandAnalyzer::detectBuildTool() const { + return StringSwitch(sys::path::filename(command_)) + .StartsWith("clang", BuildTool::Clang) + .StartsWith("gcc", BuildTool::GCC) + .StartsWith("g++", BuildTool::GCC) + .Case("cmake", BuildTool::CMake) + .Case("make", BuildTool::Make) + .Case("ninja", BuildTool::Ninja) + .EndsWith("-ld", BuildTool::Linker) + .Case("ld", BuildTool::Linker) + .Case("ar", BuildTool::Archiver) + .Case("llvm-ar", BuildTool::Archiver) + .StartsWith("llvm-", BuildTool::LLVM_Tools) + .Default(BuildTool::Unknown); +} + +BuildPhase CommandAnalyzer::detectBuildPhase(BuildTool tool) const { + if (tool == BuildTool::CMake) { + for (const auto &arg : args_) { + if (arg == "--build") + return BuildPhase::CMakeBuild; + } + return BuildPhase::CMakeConfigure; + } + + if (tool == BuildTool::Make || tool == BuildTool::Ninja) { + return BuildPhase::MakefileBuild; + } + + if (tool == BuildTool::Linker) { + return BuildPhase::Linking; + } + + if (tool == BuildTool::Archiver) { + return BuildPhase::Archiving; + } + + if (tool == BuildTool::Clang || tool == BuildTool::GCC) { + for (const auto &arg : args_) { + if (arg == "-E") + return BuildPhase::Preprocessing; + if (arg == "-S") + return BuildPhase::Assembly; + if (arg == "-c") + return BuildPhase::Compilation; + } + + bool hasObjectFile = false; + for (const auto &Arg : args_) { + StringRef argRef(Arg); + if (argRef.ends_with(".o") || argRef.ends_with(".O") || + argRef.ends_with(".obj") || argRef.ends_with(".OBJ")) { + hasObjectFile = true; + break; + } + } + if (hasObjectFile) { + return BuildPhase::Linking; + } + + bool hasSourceFile = false; + for (const auto &Arg : args_) { + StringRef argRef(Arg); + if (argRef.ends_with(".c") || argRef.ends_with(".C") || + argRef.ends_with(".cpp") || argRef.ends_with(".CPP") || + argRef.ends_with(".cc") || argRef.ends_with(".CC") || + argRef.ends_with(".cxx") || argRef.ends_with(".CXX")) { + hasSourceFile = true; + break; + } + } + if (hasSourceFile) { + return BuildPhase::Compilation; // Default for source files + } + } + + return BuildPhase::Unknown; +} + +void CommandAnalyzer::detectBuildFeatures(BuildContext &context) const { + for (const auto &arg : args_) { + if (arg == "-g" || StringRef(arg).starts_with("-g")) { + context.hasDebugInfo = true; + } + + if (StringRef(arg).starts_with("-O") && arg.length() > 2) { + context.hasOptimization = true; + } + + if (arg.find("openmp") != std::string::npos || + arg.find("openacc") != std::string::npos || + arg.find("cuda") != std::string::npos || + arg.find("offload") != std::string::npos) { + context.hasOffloading = true; + } + + if (StringRef(arg).starts_with("-march=")) { + context.metadata["target_arch"] = arg.substr(7); + } + if (StringRef(arg).starts_with("-mtune=")) { + context.metadata["tune"] = arg.substr(7); + } + if (StringRef(arg).starts_with("--offload-arch=")) { + context.metadata["offload_arch"] = arg.substr(15); + } + } +} + +std::vector CommandAnalyzer::extractInputFiles() const { + std::vector inputs; + for (size_t i = 0; i < args_.size(); ++i) { + const auto &arg = args_[i]; + if (StringRef(arg).starts_with("-")) { + if (arg == "-o" || arg == "-I" || arg == "-L" || arg == "-D") { + i++; + } + continue; + } + if (sys::fs::exists(arg)) { + inputs.push_back(arg); + } + } + return inputs; +} + +std::vector CommandAnalyzer::extractOutputFiles() const { + std::vector outputs; + for (size_t i = 0; i < args_.size(); ++i) { + const auto &arg = args_[i]; + if (arg == "-o" && i + 1 < args_.size()) { + outputs.push_back(args_[i + 1]); + i++; + } + } + return outputs; +} + +} // namespace advisor +} // namespace llvm diff --git a/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.h b/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.h new file mode 100644 index 0000000000000..c3efdff147e5f --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.h @@ -0,0 +1,32 @@ +#ifndef LLVM_ADVISOR_COMMAND_ANALYZER_H +#define LLVM_ADVISOR_COMMAND_ANALYZER_H + +#include "BuildContext.h" +#include +#include + +namespace llvm { +namespace advisor { + +class CommandAnalyzer { +public: + CommandAnalyzer(const std::string &command, + const std::vector &args); + + BuildContext analyze() const; + +private: + BuildTool detectBuildTool() const; + BuildPhase detectBuildPhase(BuildTool tool) const; + void detectBuildFeatures(BuildContext &context) const; + std::vector extractInputFiles() const; + std::vector extractOutputFiles() const; + + std::string command_; + std::vector args_; +}; + +} // namespace advisor +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-advisor/src/Core/CompilationManager.cpp b/llvm/tools/llvm-advisor/src/Core/CompilationManager.cpp new file mode 100644 index 0000000000000..e07db9d365009 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Core/CompilationManager.cpp @@ -0,0 +1,257 @@ +#include "CompilationManager.h" +#include "../Detection/UnitDetector.h" +#include "../Utils/FileManager.h" +#include "CommandAnalyzer.h" +#include "DataExtractor.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include +#include +#include + +namespace llvm { +namespace advisor { + +CompilationManager::CompilationManager(const AdvisorConfig &config) + : config_(config), buildExecutor_(config) { + + // Get current working directory first + SmallString<256> currentDir; + sys::fs::current_path(currentDir); + initialWorkingDir_ = currentDir.str().str(); + + // Create temp directory with proper error handling + SmallString<128> tempDirPath; + if (auto EC = sys::fs::createUniqueDirectory("llvm-advisor", tempDirPath)) { + // Use timestamp for temp folder naming + auto now = std::chrono::system_clock::now(); + auto timestamp = + std::chrono::duration_cast(now.time_since_epoch()) + .count(); + tempDir_ = "/tmp/llvm-advisor-" + std::to_string(timestamp); + sys::fs::create_directories(tempDir_); + } else { + tempDir_ = tempDirPath.str().str(); + } + + // Ensure the directory actually exists + if (!sys::fs::exists(tempDir_)) { + sys::fs::create_directories(tempDir_); + } + + if (config_.getVerbose()) { + outs() << "Using temporary directory: " << tempDir_ << "\n"; + } +} + +CompilationManager::~CompilationManager() { + if (!config_.getKeepTemps() && sys::fs::exists(tempDir_)) { + sys::fs::remove_directories(tempDir_); + } +} + +Expected CompilationManager::executeWithDataCollection( + const std::string &compiler, const std::vector &args) { + + // Analyze the build command + BuildContext buildContext = CommandAnalyzer(compiler, args).analyze(); + + if (config_.getVerbose()) { + outs() << "Build phase: " << static_cast(buildContext.phase) << "\n"; + } + + // Skip data collection for linking/archiving phases + if (buildContext.phase == BuildPhase::Linking || + buildContext.phase == BuildPhase::Archiving) { + return buildExecutor_.execute(compiler, args, buildContext, tempDir_); + } + + // Detect compilation units + UnitDetector detector(config_); + auto detectedUnits = detector.detectUnits(compiler, args); + if (!detectedUnits) { + return detectedUnits.takeError(); + } + + std::vector> units; + for (auto &unitInfo : *detectedUnits) { + units.push_back(std::make_unique(unitInfo, tempDir_)); + } + + // Scan existing files before compilation + auto existingFiles = scanDirectory(initialWorkingDir_); + + // Execute compilation with instrumentation + auto execResult = + buildExecutor_.execute(compiler, args, buildContext, tempDir_); + if (!execResult) { + return execResult; + } + int exitCode = *execResult; + + // Collect generated files (even if compilation failed for analysis) + collectGeneratedFiles(existingFiles, units); + + // Extract additional data + DataExtractor extractor(config_); + for (auto &unit : units) { + if (auto Err = extractor.extractAllData(*unit, tempDir_)) { + if (config_.getVerbose()) { + errs() << "Data extraction failed: " << toString(std::move(Err)) + << "\n"; + } + } + } + + // Organize output + if (auto Err = organizeOutput(units)) { + if (config_.getVerbose()) { + errs() << "Output organization failed: " << toString(std::move(Err)) + << "\n"; + } + } + + // Clean up leaked files from source directory + cleanupLeakedFiles(); + + return exitCode; +} + +std::set +CompilationManager::scanDirectory(const std::string &dir) const { + std::set files; + std::error_code EC; + for (sys::fs::directory_iterator DI(dir, EC), DE; DI != DE && !EC; + DI.increment(EC)) { + if (DI->type() != sys::fs::file_type::directory_file) { + files.insert(DI->path()); + } + } + return files; +} + +void CompilationManager::collectGeneratedFiles( + const std::set &existingFiles, + std::vector> &units) { + FileClassifier classifier; + + // Collect files from temp directory + std::error_code EC; + for (sys::fs::recursive_directory_iterator DI(tempDir_, EC), DE; + DI != DE && !EC; DI.increment(EC)) { + if (DI->type() != sys::fs::file_type::directory_file) { + std::string filePath = DI->path(); + if (classifier.shouldCollect(filePath)) { + auto classification = classifier.classifyFile(filePath); + + // Add to appropriate unit + if (!units.empty()) { + units[0]->addGeneratedFile(classification.category, filePath); + } + } + } + } + + // Also check for files that leaked into source directory + auto currentFiles = scanDirectory(initialWorkingDir_); + for (const auto &file : currentFiles) { + if (existingFiles.find(file) == existingFiles.end()) { + if (classifier.shouldCollect(file)) { + auto classification = classifier.classifyFile(file); + + // Move leaked file to temp directory + std::string destPath = tempDir_ + "/" + sys::path::filename(file).str(); + if (!FileManager::moveFile(file, destPath)) { + if (!units.empty()) { + units[0]->addGeneratedFile(classification.category, destPath); + } + } + } + } + } +} + +Error CompilationManager::organizeOutput( + const std::vector> &units) { + // Resolve output directory as absolute path from initial working directory + SmallString<256> outputDirPath; + if (sys::path::is_absolute(config_.getOutputDir())) { + outputDirPath = config_.getOutputDir(); + } else { + outputDirPath = initialWorkingDir_; + sys::path::append(outputDirPath, config_.getOutputDir()); + } + + std::string outputDir = outputDirPath.str().str(); + + if (config_.getVerbose()) { + outs() << "Output directory: " << outputDir << "\n"; + } + + // Move collected files to organized structure + for (const auto &unit : units) { + std::string unitDir = outputDir + "/" + unit->getName(); + + // Remove existing unit directory if it exists + if (sys::fs::exists(unitDir)) { + if (auto EC = sys::fs::remove_directories(unitDir)) { + if (config_.getVerbose()) { + errs() << "Warning: Could not remove existing unit directory: " + << unitDir << "\n"; + } + } + } + + // Create fresh unit directory + if (auto EC = sys::fs::create_directories(unitDir)) { + continue; // Skip if we can't create the directory + } + + const auto &generatedFiles = unit->getAllGeneratedFiles(); + for (const auto &category : generatedFiles) { + std::string categoryDir = unitDir + "/" + category.first; + sys::fs::create_directories(categoryDir); + + for (const auto &file : category.second) { + std::string destFile = + categoryDir + "/" + sys::path::filename(file).str(); + if (auto Err = FileManager::copyFile(file, destFile)) { + if (config_.getVerbose()) { + errs() << "Failed to copy " << file << " to " << destFile << "\n"; + } + } + } + } + } + + return Error::success(); +} + +void CompilationManager::cleanupLeakedFiles() { + FileClassifier classifier; + + // Clean up any remaining leaked files in source directory + auto currentFiles = scanDirectory(initialWorkingDir_); + for (const auto &file : currentFiles) { + StringRef filename = sys::path::filename(file); + + // Remove optimization remarks files that leaked + if (filename.ends_with(".opt.yaml") || filename.ends_with(".opt.yml")) { + sys::fs::remove(file); + if (config_.getVerbose()) { + outs() << "Cleaned up leaked file: " << file << "\n"; + } + } + + // Remove profile files that leaked + if (filename.ends_with(".profraw") || filename.ends_with(".profdata")) { + sys::fs::remove(file); + if (config_.getVerbose()) { + outs() << "Cleaned up leaked file: " << file << "\n"; + } + } + } +} + +} // namespace advisor +} // namespace llvm diff --git a/llvm/tools/llvm-advisor/src/Core/CompilationManager.h b/llvm/tools/llvm-advisor/src/Core/CompilationManager.h new file mode 100644 index 0000000000000..5256042a8c464 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Core/CompilationManager.h @@ -0,0 +1,45 @@ +#ifndef LLVM_ADVISOR_COMPILATION_MANAGER_H +#define LLVM_ADVISOR_COMPILATION_MANAGER_H + +#include "../Config/AdvisorConfig.h" +#include "../Utils/FileClassifier.h" +#include "BuildExecutor.h" +#include "CompilationUnit.h" +#include "llvm/Support/Error.h" +#include +#include +#include + +namespace llvm { +namespace advisor { + +class CompilationManager { +public: + explicit CompilationManager(const AdvisorConfig &config); + ~CompilationManager(); + + Expected executeWithDataCollection(const std::string &compiler, + const std::vector &args); + +private: + std::set scanDirectory(const std::string &dir) const; + + void + collectGeneratedFiles(const std::set &existingFiles, + std::vector> &units); + + Error + organizeOutput(const std::vector> &units); + + void cleanupLeakedFiles(); + + const AdvisorConfig &config_; + BuildExecutor buildExecutor_; + std::string tempDir_; + std::string initialWorkingDir_; +}; + +} // namespace advisor +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-advisor/src/Core/CompilationUnit.cpp b/llvm/tools/llvm-advisor/src/Core/CompilationUnit.cpp new file mode 100644 index 0000000000000..8b6a478cfaf63 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Core/CompilationUnit.cpp @@ -0,0 +1,66 @@ +#include "CompilationUnit.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" + +namespace llvm { +namespace advisor { + +CompilationUnit::CompilationUnit(const CompilationUnitInfo &info, + const std::string &workDir) + : info_(info), workDir_(workDir) { + // Create unit-specific data directory + SmallString<128> dataDir; + sys::path::append(dataDir, workDir, "units", info.name); + sys::fs::create_directories(dataDir); +} + +std::string CompilationUnit::getPrimarySource() const { + if (info_.sources.empty()) { + return ""; + } + return info_.sources[0].path; +} + +std::string CompilationUnit::getDataDir() const { + SmallString<128> dataDir; + sys::path::append(dataDir, workDir_, "units", info_.name); + return dataDir.str().str(); +} + +std::string CompilationUnit::getExecutablePath() const { + return info_.outputExecutable; +} + +void CompilationUnit::addGeneratedFile(const std::string &type, + const std::string &path) { + generatedFiles_[type].push_back(path); +} + +bool CompilationUnit::hasGeneratedFiles(const std::string &type) const { + if (type.empty()) { + return !generatedFiles_.empty(); + } + auto it = generatedFiles_.find(type); + return it != generatedFiles_.end() && !it->second.empty(); +} + +std::vector +CompilationUnit::getGeneratedFiles(const std::string &type) const { + if (type.empty()) { + std::vector allFiles; + for (const auto &pair : generatedFiles_) { + allFiles.insert(allFiles.end(), pair.second.begin(), pair.second.end()); + } + return allFiles; + } + auto it = generatedFiles_.find(type); + return it != generatedFiles_.end() ? it->second : std::vector(); +} + +const std::unordered_map> & +CompilationUnit::getAllGeneratedFiles() const { + return generatedFiles_; +} + +} // namespace advisor +} // namespace llvm \ No newline at end of file diff --git a/llvm/tools/llvm-advisor/src/Core/CompilationUnit.h b/llvm/tools/llvm-advisor/src/Core/CompilationUnit.h new file mode 100644 index 0000000000000..18dbc35ab5aec --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Core/CompilationUnit.h @@ -0,0 +1,58 @@ +#ifndef LLVM_ADVISOR_COMPILATION_UNIT_H +#define LLVM_ADVISOR_COMPILATION_UNIT_H + +#include "llvm/Support/Error.h" +#include +#include +#include + +namespace llvm { +namespace advisor { + +struct SourceFile { + std::string path; + std::string language; + bool isHeader = false; + std::vector dependencies; +}; + +struct CompilationUnitInfo { + std::string name; + std::vector sources; + std::vector compileFlags; + std::string targetArch; + bool hasOffloading = false; + std::string outputObject; + std::string outputExecutable; +}; + +class CompilationUnit { +public: + CompilationUnit(const CompilationUnitInfo &info, const std::string &workDir); + + const std::string &getName() const { return info_.name; } + const CompilationUnitInfo &getInfo() const { return info_; } + const std::string &getWorkDir() const { return workDir_; } + std::string getPrimarySource() const; + + std::string getDataDir() const; + std::string getExecutablePath() const; + + void addGeneratedFile(const std::string &type, const std::string &path); + + bool hasGeneratedFiles(const std::string &type) const; + std::vector + getGeneratedFiles(const std::string &type = "") const; + const std::unordered_map> & + getAllGeneratedFiles() const; + +private: + CompilationUnitInfo info_; + std::string workDir_; + std::unordered_map> generatedFiles_; +}; + +} // namespace advisor +} // namespace llvm + +#endif \ No newline at end of file diff --git a/llvm/tools/llvm-advisor/src/Core/DataExtractor.cpp b/llvm/tools/llvm-advisor/src/Core/DataExtractor.cpp new file mode 100644 index 0000000000000..4d709e4a6d51c --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Core/DataExtractor.cpp @@ -0,0 +1,367 @@ +#include "DataExtractor.h" +#include "../Utils/ProcessRunner.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { +namespace advisor { + +DataExtractor::DataExtractor(const AdvisorConfig &config) : config_(config) {} + +Error DataExtractor::extractAllData(CompilationUnit &unit, + const std::string &tempDir) { + if (config_.getVerbose()) { + outs() << "Extracting data for unit: " << unit.getName() << "\n"; + } + + // Create extraction subdirectories + sys::fs::create_directories(tempDir + "/ir"); + sys::fs::create_directories(tempDir + "/assembly"); + sys::fs::create_directories(tempDir + "/ast"); + sys::fs::create_directories(tempDir + "/preprocessed"); + sys::fs::create_directories(tempDir + "/include-tree"); + sys::fs::create_directories(tempDir + "/debug"); + sys::fs::create_directories(tempDir + "/static-analyzer"); + + if (auto Err = extractIR(unit, tempDir)) + return Err; + if (auto Err = extractAssembly(unit, tempDir)) + return Err; + if (auto Err = extractAST(unit, tempDir)) + return Err; + if (auto Err = extractPreprocessed(unit, tempDir)) + return Err; + if (auto Err = extractIncludeTree(unit, tempDir)) + return Err; + if (auto Err = extractDebugInfo(unit, tempDir)) + return Err; + if (auto Err = extractStaticAnalysis(unit, tempDir)) + return Err; + if (auto Err = extractMacroExpansion(unit, tempDir)) + return Err; + if (auto Err = extractCompilationPhases(unit, tempDir)) + return Err; + + return Error::success(); +} + +std::vector +DataExtractor::getBaseCompilerArgs(const CompilationUnitInfo &unitInfo) const { + std::vector baseArgs; + + // Copy include paths and defines + for (const auto &arg : unitInfo.compileFlags) { + if (StringRef(arg).starts_with("-I") || StringRef(arg).starts_with("-D") || + StringRef(arg).starts_with("-U") || + StringRef(arg).starts_with("-std=") || + StringRef(arg).starts_with("-m") || StringRef(arg).starts_with("-f") || + StringRef(arg).starts_with("-W") || StringRef(arg).starts_with("-O")) { + // Skip problematic flags for extraction + if (StringRef(arg).starts_with("-fsave-optimization-record") || + StringRef(arg).starts_with("-fprofile-instr-generate") || + StringRef(arg).starts_with("-fcoverage-mapping") || + StringRef(arg).starts_with("-foptimization-record-file")) { + continue; + } + baseArgs.push_back(arg); + } + } + + return baseArgs; +} + +Error DataExtractor::extractIR(CompilationUnit &unit, + const std::string &tempDir) { + for (const auto &source : unit.getInfo().sources) { + if (source.isHeader) + continue; + + std::string outputFile = + tempDir + "/ir/" + sys::path::stem(source.path).str() + ".ll"; + + auto baseArgs = getBaseCompilerArgs(unit.getInfo()); + baseArgs.push_back("-emit-llvm"); + baseArgs.push_back("-S"); + baseArgs.push_back("-o"); + baseArgs.push_back(outputFile); + baseArgs.push_back(source.path); + + if (auto Err = runCompilerWithFlags(baseArgs)) { + if (config_.getVerbose()) { + errs() << "Failed to extract IR for " << source.path << "\n"; + } + continue; + } + + if (sys::fs::exists(outputFile)) { + unit.addGeneratedFile("ir", outputFile); + } + } + return Error::success(); +} + +Error DataExtractor::extractAssembly(CompilationUnit &unit, + const std::string &tempDir) { + for (const auto &source : unit.getInfo().sources) { + if (source.isHeader) + continue; + + std::string outputFile = + tempDir + "/assembly/" + sys::path::stem(source.path).str() + ".s"; + + auto baseArgs = getBaseCompilerArgs(unit.getInfo()); + baseArgs.push_back("-S"); + baseArgs.push_back("-o"); + baseArgs.push_back(outputFile); + baseArgs.push_back(source.path); + + if (auto Err = runCompilerWithFlags(baseArgs)) { + if (config_.getVerbose()) { + errs() << "Failed to extract assembly for " << source.path << "\n"; + } + continue; + } + + if (sys::fs::exists(outputFile)) { + unit.addGeneratedFile("assembly", outputFile); + } + } + return Error::success(); +} + +Error DataExtractor::extractAST(CompilationUnit &unit, + const std::string &tempDir) { + for (const auto &source : unit.getInfo().sources) { + if (source.isHeader) + continue; + + std::string outputFile = + tempDir + "/ast/" + sys::path::stem(source.path).str() + ".ast"; + + auto baseArgs = getBaseCompilerArgs(unit.getInfo()); + baseArgs.push_back("-ast-dump"); + baseArgs.push_back("-fsyntax-only"); + baseArgs.push_back(source.path); + + auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs, + config_.getTimeout()); + if (result && result->exitCode == 0) { + std::error_code EC; + raw_fd_ostream OS(outputFile, EC); + if (!EC) { + OS << result->stdout; + unit.addGeneratedFile("ast", outputFile); + } + } + } + return Error::success(); +} + +Error DataExtractor::extractPreprocessed(CompilationUnit &unit, + const std::string &tempDir) { + for (const auto &source : unit.getInfo().sources) { + if (source.isHeader) + continue; + + std::string ext = (source.language == "C++") ? ".ii" : ".i"; + std::string outputFile = + tempDir + "/preprocessed/" + sys::path::stem(source.path).str() + ext; + + auto baseArgs = getBaseCompilerArgs(unit.getInfo()); + baseArgs.push_back("-E"); + baseArgs.push_back("-o"); + baseArgs.push_back(outputFile); + baseArgs.push_back(source.path); + + if (auto Err = runCompilerWithFlags(baseArgs)) { + if (config_.getVerbose()) { + errs() << "Failed to extract preprocessed for " << source.path << "\n"; + } + continue; + } + + if (sys::fs::exists(outputFile)) { + unit.addGeneratedFile("preprocessed", outputFile); + } + } + return Error::success(); +} + +Error DataExtractor::extractIncludeTree(CompilationUnit &unit, + const std::string &tempDir) { + for (const auto &source : unit.getInfo().sources) { + if (source.isHeader) + continue; + + std::string outputFile = tempDir + "/include-tree/" + + sys::path::stem(source.path).str() + + ".include.txt"; + + auto baseArgs = getBaseCompilerArgs(unit.getInfo()); + baseArgs.push_back("-H"); + baseArgs.push_back("-fsyntax-only"); + baseArgs.push_back(source.path); + + auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs, + config_.getTimeout()); + if (result && !result->stderr.empty()) { + std::error_code EC; + raw_fd_ostream OS(outputFile, EC); + if (!EC) { + OS << result->stderr; // Include tree goes to stderr + unit.addGeneratedFile("include-tree", outputFile); + } + } + } + return Error::success(); +} + +Error DataExtractor::extractDebugInfo(CompilationUnit &unit, + const std::string &tempDir) { + for (const auto &source : unit.getInfo().sources) { + if (source.isHeader) + continue; + + std::string outputFile = + tempDir + "/debug/" + sys::path::stem(source.path).str() + ".debug.txt"; + std::string objectFile = + tempDir + "/debug/" + sys::path::stem(source.path).str() + ".o"; + + auto baseArgs = getBaseCompilerArgs(unit.getInfo()); + baseArgs.push_back("-g"); + baseArgs.push_back("-c"); + baseArgs.push_back("-o"); + baseArgs.push_back(objectFile); + baseArgs.push_back(source.path); + + if (auto Err = runCompilerWithFlags(baseArgs)) { + if (config_.getVerbose()) { + errs() << "Failed to extract debug info for " << source.path << "\n"; + } + continue; + } + + // Extract DWARF info using llvm-dwarfdump + if (sys::fs::exists(objectFile)) { + std::vector dwarfArgs = {objectFile}; + auto result = + ProcessRunner::run("llvm-dwarfdump", dwarfArgs, config_.getTimeout()); + if (result && result->exitCode == 0) { + std::error_code EC; + raw_fd_ostream OS(outputFile, EC); + if (!EC) { + OS << result->stdout; + unit.addGeneratedFile("debug", outputFile); + } + } + } + } + return Error::success(); +} + +Error DataExtractor::extractStaticAnalysis(CompilationUnit &unit, + const std::string &tempDir) { + for (const auto &source : unit.getInfo().sources) { + if (source.isHeader) + continue; + + std::string outputFile = tempDir + "/static-analyzer/" + + sys::path::stem(source.path).str() + + ".analysis.txt"; + + auto baseArgs = getBaseCompilerArgs(unit.getInfo()); + baseArgs.push_back("--analyze"); + baseArgs.push_back("-Xanalyzer"); + baseArgs.push_back("-analyzer-output=text"); + baseArgs.push_back(source.path); + + auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs, + config_.getTimeout()); + if (result) { + std::error_code EC; + raw_fd_ostream OS(outputFile, EC); + if (!EC) { + OS << "STDOUT:\n" << result->stdout << "\nSTDERR:\n" << result->stderr; + unit.addGeneratedFile("static-analyzer", outputFile); + } + } + } + return Error::success(); +} + +Error DataExtractor::extractMacroExpansion(CompilationUnit &unit, + const std::string &tempDir) { + for (const auto &source : unit.getInfo().sources) { + if (source.isHeader) + continue; + + std::string outputFile = + tempDir + "/preprocessed/" + sys::path::stem(source.path).str() + + ".macro-expanded" + ((source.language == "C++") ? ".ii" : ".i"); + + auto baseArgs = getBaseCompilerArgs(unit.getInfo()); + baseArgs.push_back("-E"); + baseArgs.push_back("-dM"); // Show macro definitions + baseArgs.push_back("-o"); + baseArgs.push_back(outputFile); + baseArgs.push_back(source.path); + + if (auto Err = runCompilerWithFlags(baseArgs)) { + if (config_.getVerbose()) { + errs() << "Failed to extract macro expansion for " << source.path + << "\n"; + } + continue; + } + + if (sys::fs::exists(outputFile)) { + unit.addGeneratedFile("macro-expansion", outputFile); + } + } + return Error::success(); +} + +Error DataExtractor::extractCompilationPhases(CompilationUnit &unit, + const std::string &tempDir) { + for (const auto &source : unit.getInfo().sources) { + if (source.isHeader) + continue; + + std::string outputFile = tempDir + "/debug/" + + sys::path::stem(source.path).str() + ".phases.txt"; + + auto baseArgs = getBaseCompilerArgs(unit.getInfo()); + baseArgs.push_back("-v"); // Verbose compilation phases + baseArgs.push_back("-fsyntax-only"); + baseArgs.push_back(source.path); + + auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs, + config_.getTimeout()); + if (result) { + std::error_code EC; + raw_fd_ostream OS(outputFile, EC); + if (!EC) { + OS << "COMPILATION PHASES:\n" + << result->stderr; // Verbose output goes to stderr + unit.addGeneratedFile("compilation-phases", outputFile); + } + } + } + return Error::success(); +} + +Error DataExtractor::runCompilerWithFlags( + const std::vector &args) { + auto result = ProcessRunner::run(config_.getToolPath("clang"), args, + config_.getTimeout()); + if (!result || result->exitCode != 0) { + return createStringError(std::make_error_code(std::errc::io_error), + "Compiler failed"); + } + return Error::success(); +} + +} // namespace advisor +} // namespace llvm diff --git a/llvm/tools/llvm-advisor/src/Core/DataExtractor.h b/llvm/tools/llvm-advisor/src/Core/DataExtractor.h new file mode 100644 index 0000000000000..7564660ed05b9 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Core/DataExtractor.h @@ -0,0 +1,44 @@ +#ifndef LLVM_ADVISOR_DATA_EXTRACTOR_H +#define LLVM_ADVISOR_DATA_EXTRACTOR_H + +#include "../Config/AdvisorConfig.h" +#include "CompilationUnit.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { +namespace advisor { + +class DataExtractor { +public: + DataExtractor(const AdvisorConfig &config); + + Error extractAllData(CompilationUnit &unit, const std::string &tempDir); + +private: + std::vector + getBaseCompilerArgs(const CompilationUnitInfo &unitInfo) const; + + Error extractIR(CompilationUnit &unit, const std::string &tempDir); + Error extractAssembly(CompilationUnit &unit, const std::string &tempDir); + Error extractAST(CompilationUnit &unit, const std::string &tempDir); + Error extractPreprocessed(CompilationUnit &unit, const std::string &tempDir); + Error extractIncludeTree(CompilationUnit &unit, const std::string &tempDir); + Error extractDebugInfo(CompilationUnit &unit, const std::string &tempDir); + Error extractStaticAnalysis(CompilationUnit &unit, + const std::string &tempDir); + Error extractMacroExpansion(CompilationUnit &unit, + const std::string &tempDir); + Error extractCompilationPhases(CompilationUnit &unit, + const std::string &tempDir); + + Error runCompilerWithFlags(const std::vector &args); + + const AdvisorConfig &config_; +}; + +} // namespace advisor +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-advisor/src/Detection/UnitDetector.cpp b/llvm/tools/llvm-advisor/src/Detection/UnitDetector.cpp new file mode 100644 index 0000000000000..16d24f7a61d8f --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Detection/UnitDetector.cpp @@ -0,0 +1,114 @@ +#include "UnitDetector.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/Path.h" + +namespace llvm { +namespace advisor { + +UnitDetector::UnitDetector(const AdvisorConfig &config) : config_(config) {} + +Expected> +UnitDetector::detectUnits(const std::string &compiler, + const std::vector &args) { + + auto sources = findSourceFiles(args); + if (sources.empty()) { + return createStringError(std::make_error_code(std::errc::invalid_argument), + "No source files found"); + } + + CompilationUnitInfo unit; + unit.name = generateUnitName(sources); + unit.sources = sources; + + // Store original args but filter out source files for the compile flags + for (const auto &arg : args) { + // Skip source files when adding to compile flags + StringRef extension = sys::path::extension(arg); + if (!arg.empty() && arg[0] != '-' && + (extension == ".c" || extension == ".cpp" || extension == ".cc" || + extension == ".cxx" || extension == ".C")) { + continue; + } + unit.compileFlags.push_back(arg); + } + + // Extract output files and features + extractBuildInfo(args, unit); + + return std::vector{unit}; +} + +std::vector +UnitDetector::findSourceFiles(const std::vector &args) const { + std::vector sources; + + for (const auto &arg : args) { + if (arg.empty() || arg[0] == '-') + continue; + + StringRef extension = sys::path::extension(arg); + if (extension == ".c" || extension == ".cpp" || extension == ".cc" || + extension == ".cxx" || extension == ".C") { + + SourceFile source; + source.path = arg; + source.language = classifier_.getLanguage(arg); + source.isHeader = false; + sources.push_back(source); + } + } + + return sources; +} + +void UnitDetector::extractBuildInfo(const std::vector &args, + CompilationUnitInfo &unit) { + for (size_t i = 0; i < args.size(); ++i) { + const auto &arg = args[i]; + + if (arg == "-o" && i + 1 < args.size()) { + StringRef output = args[i + 1]; + StringRef ext = sys::path::extension(output); + if (ext == ".o") { + unit.outputObject = args[i + 1]; + } else { + unit.outputExecutable = args[i + 1]; + } + } + + if (arg.find("openmp") != std::string::npos || + arg.find("offload") != std::string::npos || + arg.find("cuda") != std::string::npos) { + unit.hasOffloading = true; + } + + if (StringRef(arg).starts_with("-march=")) { + unit.targetArch = arg.substr(7); + } + } +} + +std::string +UnitDetector::generateUnitName(const std::vector &sources) const { + if (sources.empty()) + return "unknown"; + + // Use first source file name as base + std::string baseName = sys::path::stem(sources[0].path).str(); + + // Add hash for uniqueness when multiple sources + if (sources.size() > 1) { + std::string combined; + for (const auto &source : sources) { + combined += source.path; + } + auto hash = hash_value(combined); + baseName += "_" + std::to_string(static_cast(hash) % 10000); + } + + return baseName; +} + +} // namespace advisor +} // namespace llvm diff --git a/llvm/tools/llvm-advisor/src/Detection/UnitDetector.h b/llvm/tools/llvm-advisor/src/Detection/UnitDetector.h new file mode 100644 index 0000000000000..8ad998d3c4e7a --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Detection/UnitDetector.h @@ -0,0 +1,35 @@ +#ifndef LLVM_ADVISOR_UNIT_DETECTOR_H +#define LLVM_ADVISOR_UNIT_DETECTOR_H + +#include "../Config/AdvisorConfig.h" +#include "../Core/CompilationUnit.h" +#include "../Utils/FileClassifier.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace advisor { + +class UnitDetector { +public: + explicit UnitDetector(const AdvisorConfig &config); + + Expected> + detectUnits(const std::string &compiler, + const std::vector &args); + +private: + std::vector + findSourceFiles(const std::vector &args) const; + void extractBuildInfo(const std::vector &args, + CompilationUnitInfo &unit); + std::string generateUnitName(const std::vector &sources) const; + + const AdvisorConfig &config_; + FileClassifier classifier_; +}; + +} // namespace advisor +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-advisor/src/Utils/FileClassifier.cpp b/llvm/tools/llvm-advisor/src/Utils/FileClassifier.cpp new file mode 100644 index 0000000000000..e9b39f984c771 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Utils/FileClassifier.cpp @@ -0,0 +1,136 @@ +#include "FileClassifier.h" +#include "llvm/Support/Path.h" + +namespace llvm { +namespace advisor { + +FileClassification +FileClassifier::classifyFile(const std::string &filePath) const { + StringRef filename = sys::path::filename(filePath); + StringRef extension = sys::path::extension(filePath); + + FileClassification classification; + classification.isGenerated = true; + classification.isTemporary = false; + + // LLVM IR files + if (extension == ".ll") { + classification.category = "ir"; + classification.description = "LLVM IR text"; + return classification; + } + + // Assembly files + if (extension == ".s" || extension == ".S") { + classification.category = "assembly"; + classification.description = "Assembly"; + return classification; + } + + // Optimization remarks + if (filename.ends_with(".opt.yaml") || filename.ends_with(".opt.yml")) { + classification.category = "remarks"; + classification.description = "Optimization remarks"; + return classification; + } + + // Preprocessed files + if (extension == ".i" || extension == ".ii") { + classification.category = "preprocessed"; + classification.description = "Preprocessed source"; + return classification; + } + + // AST dumps + if (extension == ".ast" || filename.contains("ast-dump")) { + classification.category = "ast"; + classification.description = "AST dump"; + return classification; + } + + // Profile data + if (extension == ".profraw" || extension == ".profdata") { + classification.category = "profile"; + classification.description = "Profile data"; + return classification; + } + + // Include trees + if (filename.contains(".include.") || filename.contains("include-tree")) { + classification.category = "include-tree"; + classification.description = "Include tree"; + return classification; + } + + // Debug info + if (filename.contains("debug") || filename.contains("dwarf")) { + classification.category = "debug"; + classification.description = "Debug information"; + return classification; + } + + // Static analyzer output + if (filename.contains("analysis") || filename.contains("analyzer")) { + classification.category = "static-analyzer"; + classification.description = "Static analyzer output"; + return classification; + } + + // Macro expansion + if (filename.contains("macro-expanded")) { + classification.category = "macro-expansion"; + classification.description = "Macro expansion"; + return classification; + } + + // Compilation phases + if (filename.contains("phases")) { + classification.category = "compilation-phases"; + classification.description = "Compilation phases"; + return classification; + } + + // Control flow graph + if (extension == ".dot" || filename.contains("cfg")) { + classification.category = "cfg"; + classification.description = "Control flow graph"; + return classification; + } + + // Template instantiation + if (filename.contains("template") || filename.contains("instantiation")) { + classification.category = "template-instantiation"; + classification.description = "Template instantiation"; + return classification; + } + + // Default for unknown files + classification.category = "unknown"; + classification.description = "Unknown file type"; + classification.isGenerated = false; + return classification; +} + +bool FileClassifier::shouldCollect(const std::string &filePath) const { + auto classification = classifyFile(filePath); + return classification.category != "unknown" && classification.isGenerated && + !classification.isTemporary; +} + +std::string FileClassifier::getLanguage(const std::string &filePath) const { + StringRef extension = sys::path::extension(filePath); + + if (extension == ".c") + return "C"; + if (extension == ".cpp" || extension == ".cc" || extension == ".cxx" || + extension == ".C") + return "C++"; + if (extension == ".h" || extension == ".hpp" || extension == ".hh" || + extension == ".hxx") + return "Header"; + + return "Unknown"; +} + +} // namespace advisor +} // namespace llvm diff --git a/llvm/tools/llvm-advisor/src/Utils/FileClassifier.h b/llvm/tools/llvm-advisor/src/Utils/FileClassifier.h new file mode 100644 index 0000000000000..6bf7c43ba4ffc --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Utils/FileClassifier.h @@ -0,0 +1,26 @@ +#ifndef LLVM_ADVISOR_FILE_CLASSIFIER_H +#define LLVM_ADVISOR_FILE_CLASSIFIER_H + +#include + +namespace llvm { +namespace advisor { + +struct FileClassification { + std::string category; + std::string description; + bool isTemporary = false; + bool isGenerated = true; +}; + +class FileClassifier { +public: + FileClassification classifyFile(const std::string &filePath) const; + bool shouldCollect(const std::string &filePath) const; + std::string getLanguage(const std::string &filePath) const; +}; + +} // namespace advisor +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-advisor/src/Utils/FileManager.cpp b/llvm/tools/llvm-advisor/src/Utils/FileManager.cpp new file mode 100644 index 0000000000000..7083d7edb7f3d --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Utils/FileManager.cpp @@ -0,0 +1,205 @@ +#include "FileManager.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" + +#include + +namespace llvm { +namespace advisor { + +Expected FileManager::createTempDir(const std::string &prefix) { + SmallString<128> tempDirPath; + if (std::error_code ec = + sys::fs::createUniqueDirectory(prefix, tempDirPath)) { + return createStringError(ec, "Failed to create unique temporary directory"); + } + return std::string(tempDirPath.str()); +} + +Error FileManager::copyDirectory(const std::string &source, + const std::string &dest) { + std::error_code EC; + + SmallString<128> sourcePathNorm(source); + // Remove trailing slash manually if present + if (sourcePathNorm.ends_with("/") && sourcePathNorm.size() > 1) { + sourcePathNorm.pop_back(); + } + + for (sys::fs::recursive_directory_iterator I(source, EC), E; I != E && !EC; + I.increment(EC)) { + StringRef currentPath = I->path(); + SmallString<128> destPath(dest); + + StringRef relativePath = currentPath; + if (!relativePath.consume_front(sourcePathNorm)) { + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Path '" + currentPath.str() + "' not in source dir '" + source + + "'"); + } + // Remove leading slash manually if present + if (relativePath.starts_with("/")) { + relativePath = relativePath.drop_front(1); + } + + sys::path::append(destPath, relativePath); + + if (sys::fs::is_directory(currentPath)) { + if (sys::fs::create_directories(destPath)) { + return createStringError(std::make_error_code(std::errc::io_error), + "Failed to create directory: " + + destPath.str().str()); + } + } else { + if (sys::fs::create_directories(sys::path::parent_path(destPath))) { + return createStringError(std::make_error_code(std::errc::io_error), + "Failed to create parent directory for: " + + destPath.str().str()); + } + if (sys::fs::copy_file(currentPath, destPath)) { + return createStringError(std::make_error_code(std::errc::io_error), + "Failed to copy file: " + currentPath.str()); + } + } + } + + if (EC) { + return createStringError(EC, "Failed to iterate directory: " + source); + } + + return Error::success(); +} + +Error FileManager::removeDirectory(const std::string &path) { + if (!sys::fs::exists(path)) { + return Error::success(); + } + + std::error_code EC; + std::vector Dirs; + for (sys::fs::recursive_directory_iterator I(path, EC), E; I != E && !EC; + I.increment(EC)) { + if (I->type() == sys::fs::file_type::directory_file) { + Dirs.push_back(I->path()); + } else { + if (auto E = sys::fs::remove(I->path())) { + return createStringError(E, "Failed to remove file: " + I->path()); + } + } + } + + if (EC) { + return createStringError(EC, "Error iterating directory " + path); + } + + for (const auto &Dir : llvm::reverse(Dirs)) { + if (auto E = sys::fs::remove(Dir)) { + return createStringError(E, "Failed to remove directory: " + Dir); + } + } + + if (auto E = sys::fs::remove(path)) { + return createStringError(E, + "Failed to remove top-level directory: " + path); + } + + return Error::success(); +} + +std::vector FileManager::findFiles(const std::string &directory, + const std::string &pattern) { + std::vector files; + std::error_code EC; + for (sys::fs::recursive_directory_iterator I(directory, EC), E; I != E && !EC; + I.increment(EC)) { + if (I->type() != sys::fs::file_type::directory_file) { + StringRef filename = sys::path::filename(I->path()); + if (filename.find(pattern) != StringRef::npos) { + files.push_back(I->path()); + } + } + } + return files; +} + +std::vector +FileManager::findFilesByExtension(const std::string &directory, + const std::vector &extensions) { + std::vector files; + std::error_code EC; + for (sys::fs::recursive_directory_iterator I(directory, EC), E; I != E && !EC; + I.increment(EC)) { + if (I->type() != sys::fs::file_type::directory_file) { + StringRef filepath = I->path(); + for (const auto &ext : extensions) { + if (filepath.ends_with(ext)) { + files.push_back(filepath.str()); + break; + } + } + } + } + return files; +} + +Error FileManager::moveFile(const std::string &source, + const std::string &dest) { + if (source == dest) { + return Error::success(); + } + + if (sys::fs::create_directories(sys::path::parent_path(dest))) { + return createStringError( + std::make_error_code(std::errc::io_error), + "Failed to create parent directory for destination: " + dest); + } + + if (sys::fs::rename(source, dest)) { + // If rename fails, try copy and remove + if (sys::fs::copy_file(source, dest)) { + return createStringError(std::make_error_code(std::errc::io_error), + "Failed to move file (copy failed): " + source); + } + if (sys::fs::remove(source)) { + return createStringError(std::make_error_code(std::errc::io_error), + "Failed to move file (source removal failed): " + + source); + } + } + + return Error::success(); +} + +Error FileManager::copyFile(const std::string &source, + const std::string &dest) { + if (source == dest) { + return Error::success(); + } + + if (sys::fs::create_directories(sys::path::parent_path(dest))) { + return createStringError( + std::make_error_code(std::errc::io_error), + "Failed to create parent directory for destination: " + dest); + } + + if (sys::fs::copy_file(source, dest)) { + return createStringError(std::make_error_code(std::errc::io_error), + "Failed to copy file: " + source); + } + + return Error::success(); +} + +Expected FileManager::getFileSize(const std::string &path) { + sys::fs::file_status status; + if (auto EC = sys::fs::status(path, status)) { + return createStringError(EC, "File not found: " + path); + } + + return status.getSize(); +} + +} // namespace advisor +} // namespace llvm \ No newline at end of file diff --git a/llvm/tools/llvm-advisor/src/Utils/FileManager.h b/llvm/tools/llvm-advisor/src/Utils/FileManager.h new file mode 100644 index 0000000000000..07b49e647f542 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Utils/FileManager.h @@ -0,0 +1,46 @@ +#ifndef LLVM_ADVISOR_FILE_MANAGER_H +#define LLVM_ADVISOR_FILE_MANAGER_H + +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { +namespace advisor { + +class FileManager { +public: + /// Create unique temporary directory with pattern llvm-advisor-xxxxx + static Expected + createTempDir(const std::string &prefix = "llvm-advisor"); + + /// Recursively copy directory + static Error copyDirectory(const std::string &source, + const std::string &dest); + + /// Remove directory and contents + static Error removeDirectory(const std::string &path); + + /// Find files matching pattern + static std::vector findFiles(const std::string &directory, + const std::string &pattern); + + /// Find files by extension + static std::vector + findFilesByExtension(const std::string &directory, + const std::vector &extensions); + + /// Move file from source to destination + static Error moveFile(const std::string &source, const std::string &dest); + + /// Copy file from source to destination + static Error copyFile(const std::string &source, const std::string &dest); + + /// Get file size + static Expected getFileSize(const std::string &path); +}; + +} // namespace advisor +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.cpp b/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.cpp new file mode 100644 index 0000000000000..b08b3cc88a434 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.cpp @@ -0,0 +1,69 @@ +#include "ProcessRunner.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" + +namespace llvm { +namespace advisor { + +Expected +ProcessRunner::run(const std::string &program, + const std::vector &args, int timeoutSeconds) { + + auto programPath = sys::findProgramByName(program); + if (!programPath) { + return createStringError(programPath.getError(), + "Tool not found: " + program); + } + + std::vector execArgs; + execArgs.push_back(program); + for (const auto &arg : args) { + execArgs.push_back(arg); + } + + SmallString<128> stdoutPath, stderrPath; + sys::fs::createTemporaryFile("stdout", "tmp", stdoutPath); + sys::fs::createTemporaryFile("stderr", "tmp", stderrPath); + + std::optional redirects[] = { + std::nullopt, // stdin + StringRef(stdoutPath), // stdout + StringRef(stderrPath) // stderr + }; + + int exitCode = sys::ExecuteAndWait(*programPath, execArgs, std::nullopt, + redirects, timeoutSeconds); + + ProcessResult result; + result.exitCode = exitCode; + // TODO: Collect information about compilation time + result.executionTime = 0; // not tracking time + + auto stdoutBuffer = MemoryBuffer::getFile(stdoutPath); + if (stdoutBuffer) { + result.stdout = (*stdoutBuffer)->getBuffer().str(); + } + + auto stderrBuffer = MemoryBuffer::getFile(stderrPath); + if (stderrBuffer) { + result.stderr = (*stderrBuffer)->getBuffer().str(); + } + + sys::fs::remove(stdoutPath); + sys::fs::remove(stderrPath); + + return result; +} + +Expected ProcessRunner::runWithEnv( + const std::string &program, const std::vector &args, + const std::vector &env, int timeoutSeconds) { + + // For simplicity, just use the regular run method + // Environment variables can be added later if needed + return run(program, args, timeoutSeconds); +} + +} // namespace advisor +} // namespace llvm diff --git a/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.h b/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.h new file mode 100644 index 0000000000000..ffd0ef353ba16 --- /dev/null +++ b/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.h @@ -0,0 +1,32 @@ +#ifndef LLVM_ADVISOR_PROCESS_RUNNER_H +#define LLVM_ADVISOR_PROCESS_RUNNER_H + +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { +namespace advisor { + +class ProcessRunner { +public: + struct ProcessResult { + int exitCode; + std::string stdout; + std::string stderr; + double executionTime; + }; + + static Expected run(const std::string &program, + const std::vector &args, + int timeoutSeconds = 60); + + static Expected + runWithEnv(const std::string &program, const std::vector &args, + const std::vector &env, int timeoutSeconds = 60); +}; + +} // namespace advisor +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-advisor/src/llvm-advisor.cpp b/llvm/tools/llvm-advisor/src/llvm-advisor.cpp new file mode 100644 index 0000000000000..01c28ba53b95b --- /dev/null +++ b/llvm/tools/llvm-advisor/src/llvm-advisor.cpp @@ -0,0 +1,111 @@ +#include "Config/AdvisorConfig.h" +#include "Core/CompilationManager.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::advisor; + +static cl::opt ConfigFile("config", cl::desc("Configuration file"), + cl::value_desc("filename")); +static cl::opt OutputDir("output-dir", + cl::desc("Output directory"), + cl::value_desc("directory")); +static cl::opt Verbose("verbose", cl::desc("Verbose output")); +static cl::opt KeepTemps("keep-temps", cl::desc("Keep temporary files")); +static cl::opt NoProfiler("no-profiler", cl::desc("Disable profiler")); + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + + // Parse llvm-advisor options until we find the compiler + std::vector advisorArgs; + advisorArgs.push_back(argv[0]); + + int compilerArgStart = 1; + bool foundCompiler = false; + + for (int i = 1; i < argc; ++i) { + StringRef arg(argv[i]); + if (arg.starts_with("--") || + (arg.starts_with("-") && arg.size() > 1 && arg != "-")) { + advisorArgs.push_back(argv[i]); + if (arg == "--config" || arg == "--output-dir") { + if (i + 1 < argc && !StringRef(argv[i + 1]).starts_with("-")) { + advisorArgs.push_back(argv[++i]); + } + } + } else { + compilerArgStart = i; + foundCompiler = true; + break; + } + } + + if (!foundCompiler) { + errs() << "Error: No compiler command provided.\n"; + errs() << "Usage: llvm-advisor [options] [compiler-args...]\n"; + return 1; + } + + // Parse llvm-advisor options + int advisorArgc = advisorArgs.size(); + cl::ParseCommandLineOptions(advisorArgc, + const_cast(advisorArgs.data()), + "LLVM Compilation Advisor"); + + // Extract compiler and arguments + std::string compiler = argv[compilerArgStart]; + std::vector compilerArgs; + for (int i = compilerArgStart + 1; i < argc; ++i) { + compilerArgs.push_back(argv[i]); + } + + // Configure advisor + AdvisorConfig config; + if (!ConfigFile.empty()) { + if (auto Err = config.loadFromFile(ConfigFile).takeError()) { + errs() << "Error loading config: " << toString(std::move(Err)) << "\n"; + return 1; + } + } + + if (!OutputDir.empty()) { + config.setOutputDir(OutputDir); + } else { + config.setOutputDir(".llvm-advisor"); // Default hidden directory + } + + config.setVerbose(Verbose); + config.setKeepTemps(KeepTemps); + config.setRunProfiler(!NoProfiler); + + // Create output directory + if (auto EC = sys::fs::create_directories(config.getOutputDir())) { + errs() << "Error creating output directory: " << EC.message() << "\n"; + return 1; + } + + if (config.getVerbose()) { + outs() << "LLVM Compilation Advisor\n"; + outs() << "Compiler: " << compiler << "\n"; + outs() << "Output: " << config.getOutputDir() << "\n"; + } + + // Execute with data collection + CompilationManager manager(config); + auto result = manager.executeWithDataCollection(compiler, compilerArgs); + + if (result) { + if (config.getVerbose()) { + outs() << "Compilation completed (exit code: " << *result << ")\n"; + } + return *result; + } else { + errs() << "Error: " << toString(result.takeError()) << "\n"; + return 1; + } +}