Skip to content

Commit d4c65c5

Browse files
committed
[main] Extract root obj tree matching code from rootls
so that it can be reused by other utilities (rootcp, rootmv, ...)
1 parent 37bee09 commit d4c65c5

File tree

3 files changed

+289
-185
lines changed

3 files changed

+289
-185
lines changed

main/src/RootObjTree.cxx

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
// \file RootObjTree.cxx
2+
///
3+
/// \author Giacomo Parolini <[email protected]>
4+
/// \date 2025-10-14
5+
6+
#include "RootObjTree.hxx"
7+
8+
#include "wildcards.hpp"
9+
10+
#include <TFile.h>
11+
12+
#include <ROOT/StringUtils.hxx>
13+
14+
#include <algorithm>
15+
#include <deque>
16+
#include <iostream>
17+
18+
static bool MatchesGlob(std::string_view haystack, std::string_view pattern)
19+
{
20+
return wildcards::match(haystack, pattern);
21+
}
22+
23+
ROOT::CmdLine::RootSource
24+
ROOT::CmdLine::GetMatchingPathsInFile(std::string_view fileName, std::string_view pattern, std::uint32_t flags)
25+
{
26+
ROOT::CmdLine::RootSource source;
27+
source.fFileName = fileName;
28+
auto &nodeTree = source.fObjectTree;
29+
nodeTree.fFile = std::unique_ptr<TFile>(TFile::Open(std::string(fileName).c_str(), "READ"));
30+
if (!nodeTree.fFile) {
31+
source.fErrors.push_back("Failed to open file");
32+
return source;
33+
}
34+
35+
const auto patternSplits = pattern.empty() ? std::vector<std::string>{} : ROOT::Split(pattern, "/");
36+
37+
// Match all objects at all nesting levels down to the deepest nesting level of `pattern` (or all nesting levels
38+
// if we have the "recursive listing" flag). The nodes are visited breadth-first.
39+
{
40+
ROOT::CmdLine::RootObjNode rootNode = {};
41+
rootNode.fName = std::string(fileName);
42+
rootNode.fClassName = nodeTree.fFile->Class()->GetName();
43+
rootNode.fDir = nodeTree.fFile.get();
44+
nodeTree.fNodes.emplace_back(std::move(rootNode));
45+
}
46+
std::deque<NodeIdx_t> nodesToVisit{0};
47+
48+
// Keep track of the object names found at every nesting level and only add the first one.
49+
std::unordered_set<std::string> namesFound;
50+
51+
const bool isRecursive = flags & EGetMatchingPathsFlags::kRecursive;
52+
do {
53+
NodeIdx_t curIdx = nodesToVisit.front();
54+
nodesToVisit.pop_front();
55+
ROOT::CmdLine::RootObjNode *cur = &nodeTree.fNodes[curIdx];
56+
assert(cur->fDir);
57+
58+
// Sort the keys by name
59+
std::vector<TKey *> keys;
60+
keys.reserve(cur->fDir->GetListOfKeys()->GetEntries());
61+
for (TKey *key : ROOT::Detail::TRangeStaticCast<TKey>(cur->fDir->GetListOfKeys()))
62+
keys.push_back(key);
63+
64+
std::sort(keys.begin(), keys.end(),
65+
[](const auto *a, const auto *b) { return strcmp(a->GetName(), b->GetName()) < 0; });
66+
67+
namesFound.clear();
68+
69+
for (TKey *key : keys) {
70+
// Don't recurse lower than requested by `pattern` unless we explicitly have the `recursive listing` flag.
71+
if (cur->fNesting < patternSplits.size() && !MatchesGlob(key->GetName(), patternSplits[cur->fNesting]))
72+
continue;
73+
74+
if (namesFound.count(key->GetName()) > 0) {
75+
std::cerr << "WARNING: Several versions of '" << key->GetName() << "' are present in '" << fileName
76+
<< "'. Only the most recent will be considered.\n";
77+
continue;
78+
}
79+
namesFound.insert(key->GetName());
80+
81+
auto &newChild = nodeTree.fNodes.emplace_back(NodeFromKey(*key));
82+
// Need to get back cur since the emplace_back() may have moved it.
83+
cur = &nodeTree.fNodes[curIdx];
84+
newChild.fNesting = cur->fNesting + 1;
85+
newChild.fParent = curIdx;
86+
if (!cur->fNChildren)
87+
cur->fFirstChild = nodeTree.fNodes.size() - 1;
88+
cur->fNChildren++;
89+
90+
const auto *cl = TClass::GetClass(key->GetClassName());
91+
if (cl && cl->InheritsFrom("TDirectory"))
92+
newChild.fDir = cur->fDir->GetDirectory(key->GetName());
93+
}
94+
95+
// Only recurse into subdirectories that are up to the deepest level we ask for through `pattern`.
96+
if (cur->fNesting < patternSplits.size() || isRecursive) {
97+
for (auto childIdx = cur->fFirstChild; childIdx < cur->fFirstChild + cur->fNChildren; ++childIdx) {
98+
auto &child = nodeTree.fNodes[childIdx];
99+
if (child.fDir)
100+
nodesToVisit.push_back(childIdx);
101+
else if (cur->fNesting < patternSplits.size())
102+
nodeTree.fLeafList.push_back(childIdx);
103+
}
104+
}
105+
if (cur->fNesting == patternSplits.size()) {
106+
if (cur->fDir)
107+
nodeTree.fDirList.push_back(curIdx);
108+
else
109+
nodeTree.fLeafList.push_back(curIdx);
110+
}
111+
} while (!nodesToVisit.empty());
112+
113+
return source;
114+
}
115+
116+
ROOT::CmdLine::RootSource ROOT::CmdLine::ParseRootSource(std::string_view sourceRaw, std::uint32_t flags)
117+
{
118+
ROOT::CmdLine::RootSource source;
119+
const char *str = sourceRaw.data();
120+
121+
// Handle known URI prefixes
122+
static const char *const specialPrefixes[] = {"http", "https", "root", "gs", "s3"};
123+
for (const char *prefix : specialPrefixes) {
124+
const auto prefixLen = strlen(prefix);
125+
if (strncmp(str, prefix, prefixLen) == 0 && strncmp(str + prefixLen, "://", 3) == 0) {
126+
source.fFileName = std::string(prefix) + "://";
127+
str += prefixLen + 3;
128+
break;
129+
}
130+
}
131+
132+
auto tokens = ROOT::Split(str, ":");
133+
if (tokens.empty())
134+
return source;
135+
136+
source.fFileName += tokens[0];
137+
if (tokens.size() > 1) {
138+
source = ROOT::CmdLine::GetMatchingPathsInFile(source.fFileName, tokens[1], flags);
139+
} else {
140+
source = ROOT::CmdLine::GetMatchingPathsInFile(source.fFileName, "", flags);
141+
}
142+
143+
return source;
144+
}
145+
146+
std::vector<ROOT::CmdLine::RootSource>
147+
ROOT::CmdLine::ParseRootSources(const std::vector<std::string> &sourcesRaw, std::uint32_t flags)
148+
{
149+
std::vector<ROOT::CmdLine::RootSource> sources;
150+
sources.reserve(sourcesRaw.size());
151+
152+
for (const auto &srcRaw : sourcesRaw) {
153+
sources.push_back(ParseRootSource(srcRaw, flags));
154+
}
155+
156+
return sources;
157+
}

main/src/RootObjTree.hxx

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// \file RootObjTree.hxx
2+
///
3+
/// Utility functions used by command line tools to parse "path-like" strings like: "foo.root:dir/obj*" into a
4+
/// tree structure usable to iterate the matched objects.
5+
///
6+
/// For example usage, see rootls.cxx
7+
///
8+
/// \author Giacomo Parolini <[email protected]>
9+
/// \date 2025-10-14
10+
11+
#ifndef ROOT_CMDLINE_OBJTREE
12+
#define ROOT_CMDLINE_OBJTREE
13+
14+
#include <cstdint>
15+
#include <memory>
16+
#include <string>
17+
#include <vector>
18+
19+
#include <TKey.h>
20+
21+
class TDirectory;
22+
class TFile;
23+
24+
namespace ROOT::CmdLine {
25+
26+
using NodeIdx_t = std::uint32_t;
27+
28+
struct RootObjNode {
29+
std::string fName;
30+
std::string fClassName;
31+
TKey *fKey = nullptr; // This is non-null for all nodes except the root node (which is the file itself)
32+
33+
TDirectory *fDir = nullptr; // This is null for all non-directory nodes
34+
// NOTE: by construction of the tree, all children of the same node are contiguous.
35+
NodeIdx_t fFirstChild = 0;
36+
std::uint32_t fNChildren = 0;
37+
std::uint32_t fNesting = 0;
38+
NodeIdx_t fParent = 0;
39+
};
40+
41+
inline RootObjNode NodeFromKey(TKey &key)
42+
{
43+
RootObjNode node = {};
44+
node.fName = key.GetName();
45+
node.fClassName = key.GetClassName();
46+
node.fKey = &key;
47+
return node;
48+
}
49+
50+
struct RootObjTree {
51+
// 0th node is the root node
52+
std::vector<RootObjNode> fNodes;
53+
std::vector<NodeIdx_t> fDirList;
54+
std::vector<NodeIdx_t> fLeafList;
55+
// The file must be kept alive in order to access the nodes' keys
56+
std::unique_ptr<TFile> fFile;
57+
};
58+
59+
struct RootSource {
60+
std::string fFileName;
61+
RootObjTree fObjectTree;
62+
std::vector<std::string> fErrors;
63+
};
64+
65+
enum EGetMatchingPathsFlags {
66+
/// Recurse into subdirectories when matching objects
67+
kRecursive = 1 << 0,
68+
};
69+
70+
/// Given a file and a "path pattern", returns a RootSource containing the tree of matched objects.
71+
///
72+
/// \param fileName The name of the ROOT file to look into
73+
/// \param pattern A glob-like pattern (basically a `ls` pattern). May be empty to match anything.
74+
/// \param flags A bitmask of EGetMatchingPathsFlags
75+
RootSource GetMatchingPathsInFile(std::string_view fileName, std::string_view pattern, std::uint32_t flags);
76+
77+
/// Given a string like "file.root:dir/obj", converts it to a RootSource.
78+
/// The string may start with one of the known file protocols: "http", "https", "root", "gs", "s3"
79+
/// (e.g. "https://file.root").
80+
///
81+
/// If the source fails to get created, its fErrors list will be non-empty.
82+
///
83+
/// \param flags A bitmask of EGetMatchingPathsFlags
84+
/// \return The converted source.
85+
RootSource ParseRootSource(std::string_view sourceRaw, std::uint32_t flags);
86+
87+
/// Given a list of strings like "file.root:dir/obj", converts each string to a RootSource.
88+
/// The string may start with one of the known file protocols: "http", "https", "root", "gs", "s3"
89+
/// (e.g. "https://file.root").
90+
///
91+
/// If one or more sources fail to get created, each sources's fErrors list will be non-empty.
92+
///
93+
/// \param flags A bitmask of EGetMatchingPathsFlags
94+
/// \return The list of converted sources.
95+
std::vector<ROOT::CmdLine::RootSource>
96+
ParseRootSources(const std::vector<std::string> &sourcesRaw, std::uint32_t flags);
97+
98+
} // namespace ROOT::CmdLine
99+
100+
#endif

0 commit comments

Comments
 (0)