|
| 1 | +// \file RootObjTree.cxx |
| 2 | +/// |
| 3 | +/// \author Giacomo Parolini <[email protected]> |
| 4 | +/// \date 2025-10-14 |
| 5 | + |
| 6 | +#include "RootObjTree.hxx" |
| 7 | + |
| 8 | +#include "wildcards.hpp" |
| 9 | + |
| 10 | +#include <TFile.h> |
| 11 | + |
| 12 | +#include <ROOT/StringUtils.hxx> |
| 13 | + |
| 14 | +#include <algorithm> |
| 15 | +#include <deque> |
| 16 | +#include <iostream> |
| 17 | + |
| 18 | +static bool MatchesGlob(std::string_view haystack, std::string_view pattern) |
| 19 | +{ |
| 20 | + return wildcards::match(haystack, pattern); |
| 21 | +} |
| 22 | + |
| 23 | +ROOT::CmdLine::RootSource |
| 24 | +ROOT::CmdLine::GetMatchingPathsInFile(std::string_view fileName, std::string_view pattern, std::uint32_t flags) |
| 25 | +{ |
| 26 | + ROOT::CmdLine::RootSource source; |
| 27 | + source.fFileName = fileName; |
| 28 | + auto &nodeTree = source.fObjectTree; |
| 29 | + nodeTree.fFile = std::unique_ptr<TFile>(TFile::Open(std::string(fileName).c_str(), "READ")); |
| 30 | + if (!nodeTree.fFile) { |
| 31 | + source.fErrors.push_back("Failed to open file"); |
| 32 | + return source; |
| 33 | + } |
| 34 | + |
| 35 | + const auto patternSplits = pattern.empty() ? std::vector<std::string>{} : ROOT::Split(pattern, "/"); |
| 36 | + |
| 37 | + // Match all objects at all nesting levels down to the deepest nesting level of `pattern` (or all nesting levels |
| 38 | + // if we have the "recursive listing" flag). The nodes are visited breadth-first. |
| 39 | + { |
| 40 | + ROOT::CmdLine::RootObjNode rootNode = {}; |
| 41 | + rootNode.fName = std::string(fileName); |
| 42 | + rootNode.fClassName = nodeTree.fFile->Class()->GetName(); |
| 43 | + rootNode.fDir = nodeTree.fFile.get(); |
| 44 | + nodeTree.fNodes.emplace_back(std::move(rootNode)); |
| 45 | + } |
| 46 | + std::deque<NodeIdx_t> nodesToVisit{0}; |
| 47 | + |
| 48 | + // Keep track of the object names found at every nesting level and only add the first one. |
| 49 | + std::unordered_set<std::string> namesFound; |
| 50 | + |
| 51 | + const bool isRecursive = flags & EGetMatchingPathsFlags::kRecursive; |
| 52 | + do { |
| 53 | + NodeIdx_t curIdx = nodesToVisit.front(); |
| 54 | + nodesToVisit.pop_front(); |
| 55 | + ROOT::CmdLine::RootObjNode *cur = &nodeTree.fNodes[curIdx]; |
| 56 | + assert(cur->fDir); |
| 57 | + |
| 58 | + // Sort the keys by name |
| 59 | + std::vector<TKey *> keys; |
| 60 | + keys.reserve(cur->fDir->GetListOfKeys()->GetEntries()); |
| 61 | + for (TKey *key : ROOT::Detail::TRangeStaticCast<TKey>(cur->fDir->GetListOfKeys())) |
| 62 | + keys.push_back(key); |
| 63 | + |
| 64 | + std::sort(keys.begin(), keys.end(), |
| 65 | + [](const auto *a, const auto *b) { return strcmp(a->GetName(), b->GetName()) < 0; }); |
| 66 | + |
| 67 | + namesFound.clear(); |
| 68 | + |
| 69 | + for (TKey *key : keys) { |
| 70 | + // Don't recurse lower than requested by `pattern` unless we explicitly have the `recursive listing` flag. |
| 71 | + if (cur->fNesting < patternSplits.size() && !MatchesGlob(key->GetName(), patternSplits[cur->fNesting])) |
| 72 | + continue; |
| 73 | + |
| 74 | + if (namesFound.count(key->GetName()) > 0) { |
| 75 | + std::cerr << "WARNING: Several versions of '" << key->GetName() << "' are present in '" << fileName |
| 76 | + << "'. Only the most recent will be considered.\n"; |
| 77 | + continue; |
| 78 | + } |
| 79 | + namesFound.insert(key->GetName()); |
| 80 | + |
| 81 | + auto &newChild = nodeTree.fNodes.emplace_back(NodeFromKey(*key)); |
| 82 | + // Need to get back cur since the emplace_back() may have moved it. |
| 83 | + cur = &nodeTree.fNodes[curIdx]; |
| 84 | + newChild.fNesting = cur->fNesting + 1; |
| 85 | + newChild.fParent = curIdx; |
| 86 | + if (!cur->fNChildren) |
| 87 | + cur->fFirstChild = nodeTree.fNodes.size() - 1; |
| 88 | + cur->fNChildren++; |
| 89 | + |
| 90 | + const auto *cl = TClass::GetClass(key->GetClassName()); |
| 91 | + if (cl && cl->InheritsFrom("TDirectory")) |
| 92 | + newChild.fDir = cur->fDir->GetDirectory(key->GetName()); |
| 93 | + } |
| 94 | + |
| 95 | + // Only recurse into subdirectories that are up to the deepest level we ask for through `pattern`. |
| 96 | + if (cur->fNesting < patternSplits.size() || isRecursive) { |
| 97 | + for (auto childIdx = cur->fFirstChild; childIdx < cur->fFirstChild + cur->fNChildren; ++childIdx) { |
| 98 | + auto &child = nodeTree.fNodes[childIdx]; |
| 99 | + if (child.fDir) |
| 100 | + nodesToVisit.push_back(childIdx); |
| 101 | + else if (cur->fNesting < patternSplits.size()) |
| 102 | + nodeTree.fLeafList.push_back(childIdx); |
| 103 | + } |
| 104 | + } |
| 105 | + if (cur->fNesting == patternSplits.size()) { |
| 106 | + if (cur->fDir) |
| 107 | + nodeTree.fDirList.push_back(curIdx); |
| 108 | + else |
| 109 | + nodeTree.fLeafList.push_back(curIdx); |
| 110 | + } |
| 111 | + } while (!nodesToVisit.empty()); |
| 112 | + |
| 113 | + return source; |
| 114 | +} |
| 115 | + |
| 116 | +ROOT::CmdLine::RootSource ROOT::CmdLine::ParseRootSource(std::string_view sourceRaw, std::uint32_t flags) |
| 117 | +{ |
| 118 | + ROOT::CmdLine::RootSource source; |
| 119 | + const char *str = sourceRaw.data(); |
| 120 | + |
| 121 | + // Handle known URI prefixes |
| 122 | + static const char *const specialPrefixes[] = {"http", "https", "root", "gs", "s3"}; |
| 123 | + for (const char *prefix : specialPrefixes) { |
| 124 | + const auto prefixLen = strlen(prefix); |
| 125 | + if (strncmp(str, prefix, prefixLen) == 0 && strncmp(str + prefixLen, "://", 3) == 0) { |
| 126 | + source.fFileName = std::string(prefix) + "://"; |
| 127 | + str += prefixLen + 3; |
| 128 | + break; |
| 129 | + } |
| 130 | + } |
| 131 | + |
| 132 | + auto tokens = ROOT::Split(str, ":"); |
| 133 | + if (tokens.empty()) |
| 134 | + return source; |
| 135 | + |
| 136 | + source.fFileName += tokens[0]; |
| 137 | + if (tokens.size() > 1) { |
| 138 | + source = ROOT::CmdLine::GetMatchingPathsInFile(source.fFileName, tokens[1], flags); |
| 139 | + } else { |
| 140 | + source = ROOT::CmdLine::GetMatchingPathsInFile(source.fFileName, "", flags); |
| 141 | + } |
| 142 | + |
| 143 | + return source; |
| 144 | +} |
| 145 | + |
| 146 | +std::vector<ROOT::CmdLine::RootSource> |
| 147 | +ROOT::CmdLine::ParseRootSources(const std::vector<std::string> &sourcesRaw, std::uint32_t flags) |
| 148 | +{ |
| 149 | + std::vector<ROOT::CmdLine::RootSource> sources; |
| 150 | + sources.reserve(sourcesRaw.size()); |
| 151 | + |
| 152 | + for (const auto &srcRaw : sourcesRaw) { |
| 153 | + sources.push_back(ParseRootSource(srcRaw, flags)); |
| 154 | + } |
| 155 | + |
| 156 | + return sources; |
| 157 | +} |
0 commit comments