Skip to content

Commit 6d2e143

Browse files
committed
Try to respect RPATHS of calling dlopen modules with dlinfo
This commit is just for backup on our main strategy of using dlinfo instead of dlopen. Signed-off-by: Pablo Galindo <[email protected]>
1 parent 91d7016 commit 6d2e143

File tree

2 files changed

+66
-66
lines changed

2 files changed

+66
-66
lines changed

src/memray/_memray/hooks.cpp

Lines changed: 64 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -306,75 +306,76 @@ posix_memalign(void** memptr, size_t alignment, size_t size) noexcept
306306
return ret;
307307
}
308308

309-
// We need to override dlopen/dlclose to account for new shared libraries being
310-
// loaded in the process memory space. This is needed so we can correctly track
311-
// allocations in those libraries by overriding their PLT entries and also so we
312-
// can properly map the addresses of the symbols in those libraries when we
313-
// resolve later native traces. Unfortunately, we can't just override dlopen
314-
// directly because of the following edge case: when a shared library dlopen's
315-
// another by name (e.g. dlopen("libfoo.so")), the dlopen call will honor the
316-
// RPATH/RUNPATH of the calling library if it's set. Some libraries set an
317-
// RPATH/RUNPATH based on $ORIGIN (the path of the calling library) to load
318-
// dependencies from a relative directory based on the location of the calling
319-
// library. This means that if we override dlopen, we'll end up loading the
320-
// library from the wrong path or more likely, not loading it at all because the
321-
// dynamic loader will think the memray extenion it's the calling library and
322-
// the RPATH of the real calling library will not be honoured.
323-
//
324-
// To work around this, we override dlsym instead and override the symbols in
325-
// the loaded libraries only the first time we have seen a handle passed to
326-
// dlsym. This works because for a symbol from a given dlopen-ed library to
327-
// appear in a call stack, *something* from that library has to be dlsym-ed
328-
// first. The only exception to this are static initializers, but we cannot
329-
// track those anyway by overriding dlopen as they run within the dlopen call
330-
// itself.
331-
// There's another set of cases we would miss: if library A has a static initializer
332-
// that passes a pointer to one of its functions to library B, and library B stores
333-
// that function pointer, then we could see calls into library A via the function pointer
334-
// held by library B, even though dlsym was never called on library A. This should be
335-
// very rare and will be corrected the next time library B calls dlsym so this should
336-
// not be a problem in practice.
337-
338-
class DlsymCache
339-
{
340-
public:
341-
auto insert(const void* handle)
342-
{
343-
std::unique_lock lock(mutex_);
344-
return d_handles.insert(handle);
345-
}
346-
347-
void erase(const void* handle)
348-
{
349-
std::unique_lock lock(mutex_);
350-
d_handles.erase(handle);
351-
}
352-
353-
private:
354-
mutable std::mutex mutex_;
355-
std::unordered_set<const void*> d_handles;
356-
};
357-
358-
static DlsymCache dlsym_cache;
359-
360309
void*
361-
dlsym(void* handle, const char* symbol) noexcept
310+
dlopen(const char* filename, int flag) noexcept
362311
{
363-
assert(MEMRAY_ORIG(dlsym));
364-
void* ret;
312+
assert(MEMRAY_ORIG(dlopen));
313+
void* ret = nullptr;
365314
{
366315
tracking_api::RecursionGuard guard;
367-
ret = MEMRAY_ORIG(dlsym)(handle, symbol);
316+
#if defined(__GLIBC__)
317+
if (filename != nullptr && filename[0] != '\0' && std::strchr(filename, '/') == nullptr) {
318+
void* const callerAddr = __builtin_extract_return_addr(__builtin_return_address(0));
319+
320+
Dl_info info;
321+
if (dladdr(callerAddr, &info)) {
322+
const char* dlname = info.dli_fname;
323+
{
324+
struct stat dlstat, exestat;
325+
// Check fi we are being called from the main executable
326+
void* main_sym = dlsym(dlopen(nullptr, RTLD_LAZY | RTLD_NOLOAD), "main");
327+
if (main_sym && dladdr(main_sym, &main_info)
328+
&& strcmp(main_info.dli_fname, info.dli_fname) == 0)
329+
{
330+
dlname = nullptr;
331+
}
332+
}
333+
334+
void* caller = MEMRAY_ORIG(dlopen)(dlname, RTLD_LAZY | RTLD_NOLOAD);
335+
if (caller != nullptr) {
336+
Dl_serinfo size;
337+
if (dlinfo(caller, RTLD_DI_SERINFOSIZE, &size) == 0) {
338+
auto* paths = reinterpret_cast<Dl_serinfo*>(new char[size.dls_size]);
339+
*paths = size;
340+
if (dlinfo(caller, RTLD_DI_SERINFO, paths) == 0) {
341+
for (unsigned i = 0; i != paths->dls_cnt; ++i) {
342+
const char* name = paths->dls_serpath[i].dls_name;
343+
std::string path;
344+
if (name == nullptr || name[0] == '\0') {
345+
// In the dynamic linking search path, an
346+
// empty entry typically represents the
347+
// current working directory ($PWD).
348+
path = filename;
349+
}
350+
path = name;
351+
if (path.back() != '/') {
352+
path += '/';
353+
}
354+
path += filename;
355+
ret = MEMRAY_ORIG(dlopen)(path.c_str(), flag);
356+
if (ret) {
357+
break;
358+
}
359+
}
360+
}
361+
delete[] reinterpret_cast<char*>(paths);
362+
}
363+
dlclose(caller);
364+
}
365+
}
366+
}
367+
#endif
368+
// Fallback if we found nothing
369+
if (ret == nullptr) {
370+
ret = MEMRAY_ORIG(dlopen)(filename, flag);
371+
}
368372
}
369373
if (ret) {
370-
auto [_, inserted] = dlsym_cache.insert(handle);
371-
if (inserted) {
372-
tracking_api::Tracker::invalidate_module_cache();
373-
if (symbol
374-
&& (0 == strcmp(symbol, "PyInit_greenlet") || 0 == strcmp(symbol, "PyInit__greenlet")))
375-
{
376-
tracking_api::Tracker::beginTrackingGreenlets();
377-
}
374+
tracking_api::Tracker::invalidate_module_cache();
375+
if (filename
376+
&& (nullptr != strstr(filename, "/_greenlet.") || nullptr != strstr(filename, "/greenlet.")))
377+
{
378+
tracking_api::Tracker::beginTrackingGreenlets();
378379
}
379380
}
380381
return ret;
@@ -390,7 +391,6 @@ dlclose(void* handle) noexcept
390391
tracking_api::RecursionGuard guard;
391392
ret = MEMRAY_ORIG(dlclose)(handle);
392393
}
393-
dlsym_cache.erase(handle);
394394
tracking_api::NativeTrace::flushCache();
395395
if (!ret) tracking_api::Tracker::invalidate_module_cache();
396396
return ret;

src/memray/_memray/hooks.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
FOR_EACH_HOOKED_FUNCTION(aligned_alloc) \
4444
FOR_EACH_HOOKED_FUNCTION(mmap) \
4545
FOR_EACH_HOOKED_FUNCTION(munmap) \
46-
FOR_EACH_HOOKED_FUNCTION(dlsym) \
46+
FOR_EACH_HOOKED_FUNCTION(dlopen) \
4747
FOR_EACH_HOOKED_FUNCTION(dlclose) \
4848
FOR_EACH_HOOKED_FUNCTION(PyGILState_Ensure) \
4949
MEMRAY_PLATFORM_HOOKED_FUNCTIONS
@@ -179,7 +179,7 @@ void*
179179
pvalloc(size_t size) noexcept;
180180

181181
void*
182-
dlsym(void* handle, const char* symbol) noexcept;
182+
dlopen(const char* filename, int flag) noexcept;
183183

184184
int
185185
dlclose(void* handle) noexcept;

0 commit comments

Comments
 (0)