-
Notifications
You must be signed in to change notification settings - Fork 358
Add RHI Device Caching and Test Prefix Exclusion #8448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2d0ac96
0ab4eae
2a164dc
97464c0
2560207
ae6de8b
7093023
0e48251
5ac16bd
2bf2d1e
fe4f205
dfecbd2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
#include "slang-test-device-cache.h" | ||
|
||
#include <algorithm> | ||
|
||
// Static member accessor functions (Meyer's singleton pattern) | ||
// This ensures proper destruction order - function-local statics are destroyed | ||
// in reverse order of first access, avoiding the static destruction order fiasco | ||
std::mutex& DeviceCache::getMutex() | ||
{ | ||
static std::mutex instance; | ||
return instance; | ||
} | ||
|
||
std::unordered_map< | ||
DeviceCache::DeviceCacheKey, | ||
DeviceCache::CachedDevice, | ||
DeviceCache::DeviceCacheKeyHash>& | ||
DeviceCache::getDeviceCache() | ||
{ | ||
static std::unordered_map<DeviceCacheKey, CachedDevice, DeviceCacheKeyHash> instance; | ||
return instance; | ||
} | ||
|
||
uint64_t& DeviceCache::getNextCreationOrder() | ||
{ | ||
static uint64_t instance = 0; | ||
return instance; | ||
} | ||
|
||
bool DeviceCache::DeviceCacheKey::operator==(const DeviceCacheKey& other) const | ||
{ | ||
return deviceType == other.deviceType && enableValidation == other.enableValidation && | ||
enableRayTracingValidation == other.enableRayTracingValidation && | ||
profileName == other.profileName && requiredFeatures == other.requiredFeatures; | ||
} | ||
|
||
std::size_t DeviceCache::DeviceCacheKeyHash::operator()(const DeviceCacheKey& key) const | ||
{ | ||
std::size_t h1 = std::hash<int>{}(static_cast<int>(key.deviceType)); | ||
std::size_t h2 = std::hash<bool>{}(key.enableValidation); | ||
std::size_t h3 = std::hash<bool>{}(key.enableRayTracingValidation); | ||
std::size_t h4 = std::hash<std::string>{}(key.profileName); | ||
|
||
std::size_t h5 = 0; | ||
for (const auto& feature : key.requiredFeatures) | ||
{ | ||
h5 ^= std::hash<std::string>{}(feature) + 0x9e3779b9 + (h5 << 6) + (h5 >> 2); | ||
} | ||
|
||
return h1 ^ (h2 << 1) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4); | ||
} | ||
Comment on lines
+37
to
+51
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI, chatGPT suggested the following as a cleaner alternative,
I haven't tried it. |
||
|
||
DeviceCache::CachedDevice::CachedDevice() | ||
: creationOrder(0) | ||
{ | ||
} | ||
|
||
void DeviceCache::evictOldestDeviceIfNeeded() | ||
{ | ||
auto& deviceCache = getDeviceCache(); | ||
if (deviceCache.size() < MAX_CACHED_DEVICES) | ||
return; | ||
|
||
// Find the oldest device to evict | ||
auto oldestIt = deviceCache.end(); | ||
uint64_t oldestCreationOrder = UINT64_MAX; | ||
|
||
for (auto it = deviceCache.begin(); it != deviceCache.end(); ++it) | ||
{ | ||
if (it->second.creationOrder < oldestCreationOrder) | ||
{ | ||
oldestCreationOrder = it->second.creationOrder; | ||
oldestIt = it; | ||
} | ||
} | ||
|
||
// Remove the oldest device - ComPtr will handle the actual device release | ||
if (oldestIt != deviceCache.end()) | ||
{ | ||
deviceCache.erase(oldestIt); | ||
} | ||
} | ||
|
||
SlangResult DeviceCache::acquireDevice(const rhi::DeviceDesc& desc, rhi::IDevice** outDevice) | ||
{ | ||
if (!outDevice) | ||
return SLANG_E_INVALID_ARG; | ||
|
||
*outDevice = nullptr; | ||
|
||
// Skip caching for CUDA devices due to crashes | ||
if (desc.deviceType == rhi::DeviceType::CUDA) | ||
Comment on lines
+91
to
+92
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are not caching for CUDA? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. During the CUDA device destroying, it calls the debugCall func pointer. I tried a few approaches and but seems there is no easy fix. That requires a careful design to be threadsafe. I will create another issue. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why can't we make debug callback also have the same lifetime as the device? |
||
{ | ||
return rhi::getRHI()->createDevice(desc, outDevice); | ||
} | ||
|
||
std::lock_guard<std::mutex> lock(getMutex()); | ||
auto& deviceCache = getDeviceCache(); | ||
auto& nextCreationOrder = getNextCreationOrder(); | ||
|
||
// Create cache key | ||
DeviceCacheKey key; | ||
key.deviceType = desc.deviceType; | ||
key.enableValidation = desc.enableValidation; | ||
key.enableRayTracingValidation = desc.enableRayTracingValidation; | ||
key.profileName = desc.slang.targetProfile ? desc.slang.targetProfile : "Unknown"; | ||
|
||
// Add required features to key | ||
for (int i = 0; i < desc.requiredFeatureCount; ++i) | ||
{ | ||
key.requiredFeatures.push_back(desc.requiredFeatures[i]); | ||
} | ||
std::sort(key.requiredFeatures.begin(), key.requiredFeatures.end()); | ||
Comment on lines
+109
to
+113
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't it make sense to use |
||
|
||
// Evict oldest device if we've reached the limit | ||
evictOldestDeviceIfNeeded(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we may want to print some message when the verbose mode is enabled for a debugging purpose. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would be good, but I haven't seen |
||
|
||
// Check if we have a cached device | ||
auto it = deviceCache.find(key); | ||
if (it != deviceCache.end()) | ||
{ | ||
// Return the cached device - COM reference counting handles the references | ||
*outDevice = it->second.device.get(); | ||
if (*outDevice) | ||
{ | ||
(*outDevice)->addRef(); | ||
return SLANG_OK; | ||
} | ||
} | ||
|
||
// Create new device | ||
Slang::ComPtr<rhi::IDevice> device; | ||
auto result = rhi::getRHI()->createDevice(desc, device.writeRef()); | ||
if (SLANG_FAILED(result)) | ||
{ | ||
return result; | ||
} | ||
|
||
// Cache the device | ||
CachedDevice& cached = deviceCache[key]; | ||
cached.device = device; | ||
cached.creationOrder = nextCreationOrder++; | ||
|
||
// Return the device with proper reference counting | ||
*outDevice = device.get(); | ||
if (*outDevice) | ||
{ | ||
(*outDevice)->addRef(); | ||
} | ||
|
||
return SLANG_OK; | ||
} | ||
|
||
|
||
void DeviceCache::cleanCache() | ||
{ | ||
std::lock_guard<std::mutex> lock(getMutex()); | ||
auto& deviceCache = getDeviceCache(); | ||
deviceCache.clear(); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#pragma once | ||
|
||
#include <mutex> | ||
#include <slang-rhi.h> | ||
#include <string> | ||
#include <unordered_map> | ||
#include <vector> | ||
|
||
// Device Cache for preventing NVIDIA Tegra driver state corruption | ||
// This cache reuses Vulkan instances and devices to avoid the VK_ERROR_INCOMPATIBLE_DRIVER | ||
// issue that occurs after ~19 device creation/destruction cycles on Tegra platforms. | ||
// Uses ComPtr for automatic device lifecycle management - devices are released when removed from | ||
// cache. | ||
class DeviceCache | ||
{ | ||
public: | ||
struct DeviceCacheKey | ||
{ | ||
rhi::DeviceType deviceType; | ||
bool enableValidation; | ||
bool enableRayTracingValidation; | ||
std::string profileName; | ||
std::vector<std::string> requiredFeatures; | ||
|
||
bool operator==(const DeviceCacheKey& other) const; | ||
}; | ||
|
||
struct DeviceCacheKeyHash | ||
{ | ||
std::size_t operator()(const DeviceCacheKey& key) const; | ||
}; | ||
|
||
struct CachedDevice | ||
{ | ||
Slang::ComPtr<rhi::IDevice> device; | ||
uint64_t creationOrder; | ||
|
||
CachedDevice(); | ||
}; | ||
|
||
private: | ||
static constexpr int MAX_CACHED_DEVICES = 10; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Feel free to ignore this comment, but we may want to set the value from the command-line argument for a debugging purpose. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do that in a follow up PR, as this will require we adding another option to both render-test-tool and cmdline arg to slang-test. |
||
|
||
// Use function-local statics to control destruction order (Meyer's singleton pattern) | ||
static std::mutex& getMutex(); | ||
static std::unordered_map<DeviceCacheKey, CachedDevice, DeviceCacheKeyHash>& getDeviceCache(); | ||
static uint64_t& getNextCreationOrder(); | ||
|
||
static void evictOldestDeviceIfNeeded(); | ||
|
||
public: | ||
static SlangResult acquireDevice(const rhi::DeviceDesc& desc, rhi::IDevice** outDevice); | ||
static void cleanCache(); | ||
}; | ||
|
||
// RAII wrapper for cached devices to ensure proper cleanup | ||
class CachedDeviceWrapper | ||
{ | ||
private: | ||
Slang::ComPtr<rhi::IDevice> m_device; | ||
|
||
public: | ||
CachedDeviceWrapper() = default; | ||
|
||
CachedDeviceWrapper(Slang::ComPtr<rhi::IDevice> device) | ||
: m_device(device) | ||
{ | ||
} | ||
|
||
~CachedDeviceWrapper() {} | ||
|
||
// Move constructor | ||
CachedDeviceWrapper(CachedDeviceWrapper&& other) noexcept | ||
: m_device(std::move(other.m_device)) | ||
{ | ||
} | ||
|
||
// Move assignment | ||
CachedDeviceWrapper& operator=(CachedDeviceWrapper&& other) noexcept | ||
{ | ||
if (this != &other) | ||
{ | ||
m_device = std::move(other.m_device); | ||
} | ||
return *this; | ||
} | ||
|
||
// Delete copy constructor and assignment | ||
CachedDeviceWrapper(const CachedDeviceWrapper&) = delete; | ||
CachedDeviceWrapper& operator=(const CachedDeviceWrapper&) = delete; | ||
|
||
rhi::IDevice* get() const { return m_device.get(); } | ||
rhi::IDevice* operator->() const { return m_device.get(); } | ||
operator bool() const { return m_device != nullptr; } | ||
|
||
Slang::ComPtr<rhi::IDevice>& getComPtr() { return m_device; } | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It makes sense to have a function scoped static for "getMutex".
But when the type is just a primitive type, it seems unnecessary.
Feel free to ignore this comment but I would go with a simpler/traditional code like,