From 485c66cc46d915ddbc557a35a3c1f0c3958899d5 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 16 Sep 2025 10:46:09 -0700 Subject: [PATCH 1/9] Optimize Load/Store pairs --- source/slang/slang-ir-dce.cpp | 175 +++++++++++++++++++ source/slang/slang-ir-dce.h | 9 + source/slang/slang-ir-ssa-simplification.cpp | 1 + 3 files changed, 185 insertions(+) diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp index 0b89baf64c..1735263e0f 100644 --- a/source/slang/slang-ir-dce.cpp +++ b/source/slang/slang-ir-dce.cpp @@ -512,6 +512,181 @@ bool trimOptimizableTypes(IRModule* module) return changed; } +static bool eliminateLoadStorePairsInFunc(IRFunc* func, HashSet& processedFuncs) +{ + // Avoid infinite recursion by tracking processed functions + if (processedFuncs.contains(func)) + return false; + + processedFuncs.add(func); + + bool changed = false; + List toRemove; + + for (auto block : func->getBlocks()) + { + for (auto blockInst : block->getChildren()) + { + // First, recursively process any function calls in this block + if (auto call = as(blockInst)) + { + auto callee = call->getCallee(); + if (auto calleeFunc = as(callee)) + { + changed |= eliminateLoadStorePairsInFunc(calleeFunc, processedFuncs); + } + } + + auto storeInst = as(blockInst); + if (!storeInst) + continue; + + auto storedValue = storeInst->getVal(); + auto destPtr = storeInst->getPtr(); + + // Only optimize if destPtr is a variable (kIROp_Var). + // Don't optimize stores to buffer elements or other meaningful destinations. + if (!as(destPtr)) + continue; + + // Check if we're storing a load result + auto loadInst = as(storedValue); + if (!loadInst) + continue; + + // Do not optimize the primitive types because the legalization step may assume the + // existance of Load instructions when the entry parameters are replaced with the + // builtin variables. + auto loadInstType = loadInst->getDataType(); + switch (loadInstType->getOp()) + { + case kIROp_Int8Type: + case kIROp_Int16Type: + case kIROp_IntType: + case kIROp_Int64Type: + case kIROp_UInt8Type: + case kIROp_UInt16Type: + case kIROp_UIntType: + case kIROp_UInt64Type: + case kIROp_IntPtrType: + case kIROp_UIntPtrType: + case kIROp_FloatType: + case kIROp_DoubleType: + case kIROp_HalfType: + case kIROp_BoolType: + continue; + } + + auto loadedPtr = loadInst->getPtr(); + + // Optimize only when the type is immutable (ConstRef). + auto loadedPtrType = loadedPtr->getDataType(); + if (!as(loadedPtrType) && !as(loadedPtrType) && + !as(loadedPtrType)) + continue; + + // We cannot optimize if the variable is passed to a function call that treats the + // parameter as mutable. + bool isSafeToOptimize = true; + + // Check if variable is only used in function calls with ConstRef parameters + for (auto use = destPtr->firstUse; use; use = use->nextUse) + { + auto user = use->getUser(); + if (user == storeInst) + continue; // Skip the store itself + + auto call = as(user); + if (!call) + { + isSafeToOptimize = false; + break; + } + + auto callee = call->getCallee(); + auto funcInst = as(callee); + if (!funcInst) + { + isSafeToOptimize = false; + break; + } + + // Find which argument position this variable is used in + UInt argIndex = 0; + bool foundArg = false; + for (UInt i = 0; i < call->getArgCount(); i++) + { + if (call->getArg(i) == destPtr) + { + argIndex = i; + foundArg = true; + break; + } + } + + if (!foundArg) + { + isSafeToOptimize = false; + break; + } + + // Check if the corresponding parameter is ConstRef + auto param = funcInst->getFirstParam(); + for (UInt i = 0; i < argIndex && param; i++) + { + param = param->getNextParam(); + } + if (!param || !as(param->getDataType())) + { + // Unsafe, because the parameter might be used as mutable. + isSafeToOptimize = false; + break; + } + } + + if (!isSafeToOptimize) + continue; + + // Replace all uses of destPtr with loadedPtr + destPtr->replaceUsesWith(loadedPtr); + + // Mark both instructions for removal + toRemove.add(storeInst); + toRemove.add(loadInst); + toRemove.add(destPtr); + changed = true; + } + } + + // Remove marked instructions + for (auto instToRemove : toRemove) + { + instToRemove->removeAndDeallocate(); + } + + return changed; +} + +bool eliminateLoadStorePairs(IRModule* module) +{ + bool changed = false; + HashSet processedFuncs; + + // Look for patterns: load(ptr) followed by store(var, load_result) + // This can be optimized to direct pointer usage when safe + // Process recursively through function calls + for (auto inst : module->getGlobalInsts()) + { + auto func = as(inst); + if (!func) + continue; + + changed |= eliminateLoadStorePairsInFunc(func, processedFuncs); + } + + return changed; +} + bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions options) { // The main source of confusion/complexity here is that diff --git a/source/slang/slang-ir-dce.h b/source/slang/slang-ir-dce.h index 869ebc36ae..f9e367163a 100644 --- a/source/slang/slang-ir-dce.h +++ b/source/slang/slang-ir-dce.h @@ -35,4 +35,13 @@ bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions o bool isWeakReferenceOperand(IRInst* inst, UInt operandIndex); bool trimOptimizableTypes(IRModule* module); + +/// Eliminate unnecessary load+store pairs when safe to do so. +/// This optimization looks for patterns where a value is loaded from a ConstRef +/// parameter and immediately stored to a temporary variable, then only passed +/// to functions that accept ConstRef parameters. In such cases, the temporary +/// variable can be eliminated and the original ConstRef parameter used directly. +/// Returns true if any changes were made. +bool eliminateLoadStorePairs(IRModule* module); + } // namespace Slang diff --git a/source/slang/slang-ir-ssa-simplification.cpp b/source/slang/slang-ir-ssa-simplification.cpp index 41da868b65..3a41cb332c 100644 --- a/source/slang/slang-ir-ssa-simplification.cpp +++ b/source/slang/slang-ir-ssa-simplification.cpp @@ -74,6 +74,7 @@ void simplifyIR( changed |= applySparseConditionalConstantPropagationForGlobalScope(module, sink); changed |= peepholeOptimizeGlobalScope(target, module); changed |= trimOptimizableTypes(module); + changed |= eliminateLoadStorePairs(module); for (auto inst : module->getGlobalInsts()) { From 244e17e3b0f9b5ce06d3bdb2056c727a5c5fa31e Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 16 Sep 2025 15:12:44 -0700 Subject: [PATCH 2/9] Fix the failing/crashing test --- source/slang/slang-ir-dce.cpp | 7 +++++-- .../slang-ir-transform-params-to-constref.cpp | 17 ++++------------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp index 1735263e0f..3ca472a0e0 100644 --- a/source/slang/slang-ir-dce.cpp +++ b/source/slang/slang-ir-dce.cpp @@ -521,7 +521,7 @@ static bool eliminateLoadStorePairsInFunc(IRFunc* func, HashSet& proces processedFuncs.add(func); bool changed = false; - List toRemove; + HashSet toRemove; for (auto block : func->getBlocks()) { @@ -652,8 +652,11 @@ static bool eliminateLoadStorePairsInFunc(IRFunc* func, HashSet& proces // Mark both instructions for removal toRemove.add(storeInst); - toRemove.add(loadInst); toRemove.add(destPtr); + + // Note: loadInst might be still in use. + // We need to rely on DCE to delete it if unused. + changed = true; } } diff --git a/source/slang/slang-ir-transform-params-to-constref.cpp b/source/slang/slang-ir-transform-params-to-constref.cpp index 8f4bcd0379..9328a1de11 100644 --- a/source/slang/slang-ir-transform-params-to-constref.cpp +++ b/source/slang/slang-ir-transform-params-to-constref.cpp @@ -60,7 +60,7 @@ struct TransformParamsToConstRefContext case kIROp_FieldExtract: { // Transform the IRFieldExtract into a IRFieldAddress - auto fieldExtract = as(use->getUser()); + auto fieldExtract = as(user); builder.setInsertBefore(fieldExtract); auto fieldAddr = builder.emitFieldAddress( fieldExtract->getBase(), @@ -73,8 +73,7 @@ struct TransformParamsToConstRefContext case kIROp_GetElement: { // Transform the IRGetElement into a IRGetElementPtr - auto getElement = as(use->getUser()); - + auto getElement = as(user); builder.setInsertBefore(getElement); auto elemAddr = builder.emitElementAddress( getElement->getBase(), @@ -111,14 +110,8 @@ struct TransformParamsToConstRefContext List newArgs; // Transform arguments to match the updated-parameter - IRParam* param = func->getFirstParam(); UInt i = 0; - auto iterate = [&]() - { - param = param->getNextParam(); - i++; - }; - for (; param; iterate()) + for (IRParam* param = func->getFirstParam(); param; param = param->getNextParam(), i++) { auto arg = call->getArg(i); if (!updatedParams.contains(param)) @@ -183,7 +176,6 @@ struct TransformParamsToConstRefContext void processFunc(IRFunc* func) { HashSet updatedParams; - bool hasTransformedParams = false; // First pass: Transform parameter types for (auto param = func->getFirstParam(); param; param = param->getNextParam()) @@ -203,13 +195,12 @@ struct TransformParamsToConstRefContext auto constRefType = builder.getConstRefType(paramType, AddressSpace::ThreadLocal); param->setFullType(constRefType); - hasTransformedParams = true; changed = true; updatedParams.add(param); } } - if (!hasTransformedParams) + if (updatedParams.getCount() == 0) { return; } From 6322903b68ca2d59b1ed0077898b4839f97864d3 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Mon, 22 Sep 2025 19:03:14 -0700 Subject: [PATCH 3/9] Refactor based on the feedback --- source/slang/slang-ir-dce.cpp | 289 ++++++++++--------- source/slang/slang-ir-dce.h | 13 +- source/slang/slang-ir-ssa-simplification.cpp | 2 +- tests/metal/vector-get-element-ptr.slang | 6 +- 4 files changed, 164 insertions(+), 146 deletions(-) diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp index 3ca472a0e0..4d1677dcde 100644 --- a/source/slang/slang-ir-dce.cpp +++ b/source/slang/slang-ir-dce.cpp @@ -512,182 +512,199 @@ bool trimOptimizableTypes(IRModule* module) return changed; } -static bool eliminateLoadStorePairsInFunc(IRFunc* func, HashSet& processedFuncs) +static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) { - // Avoid infinite recursion by tracking processed functions - if (processedFuncs.contains(func)) - return false; - - processedFuncs.add(func); - - bool changed = false; - HashSet toRemove; - - for (auto block : func->getBlocks()) + // Consider the following IR pattern: + // ``` + // let %temp = var + // let %value = load(%sourcePtr) + // store(%temp, %value) + // ``` + // We can replace "%temp" with "%sourcePtr" without the load and store indirection + // if "%temp" is used only in read-only contexts. + + bool overallChanged = false; + for (bool changed = true; changed;) { - for (auto blockInst : block->getChildren()) - { - // First, recursively process any function calls in this block - if (auto call = as(blockInst)) - { - auto callee = call->getCallee(); - if (auto calleeFunc = as(callee)) - { - changed |= eliminateLoadStorePairsInFunc(calleeFunc, processedFuncs); - } - } - - auto storeInst = as(blockInst); - if (!storeInst) - continue; + changed = false; - auto storedValue = storeInst->getVal(); - auto destPtr = storeInst->getPtr(); - - // Only optimize if destPtr is a variable (kIROp_Var). - // Don't optimize stores to buffer elements or other meaningful destinations. - if (!as(destPtr)) - continue; - - // Check if we're storing a load result - auto loadInst = as(storedValue); - if (!loadInst) - continue; + HashSet toRemove; - // Do not optimize the primitive types because the legalization step may assume the - // existance of Load instructions when the entry parameters are replaced with the - // builtin variables. - auto loadInstType = loadInst->getDataType(); - switch (loadInstType->getOp()) + for (auto block : func->getBlocks()) + { + for (auto blockInst : block->getChildren()) { - case kIROp_Int8Type: - case kIROp_Int16Type: - case kIROp_IntType: - case kIROp_Int64Type: - case kIROp_UInt8Type: - case kIROp_UInt16Type: - case kIROp_UIntType: - case kIROp_UInt64Type: - case kIROp_IntPtrType: - case kIROp_UIntPtrType: - case kIROp_FloatType: - case kIROp_DoubleType: - case kIROp_HalfType: - case kIROp_BoolType: - continue; - } - - auto loadedPtr = loadInst->getPtr(); + auto storeInst = as(blockInst); + if (!storeInst) + continue; - // Optimize only when the type is immutable (ConstRef). - auto loadedPtrType = loadedPtr->getDataType(); - if (!as(loadedPtrType) && !as(loadedPtrType) && - !as(loadedPtrType)) - continue; + auto storedValue = storeInst->getVal(); + auto destPtr = storeInst->getPtr(); - // We cannot optimize if the variable is passed to a function call that treats the - // parameter as mutable. - bool isSafeToOptimize = true; + // Only optimize temporary variable. + // Don't optimize stores to permanent memory locations. + if (destPtr->getOp() != kIROp_Var) + continue; - // Check if variable is only used in function calls with ConstRef parameters - for (auto use = destPtr->firstUse; use; use = use->nextUse) - { - auto user = use->getUser(); - if (user == storeInst) - continue; // Skip the store itself + // Check if we're storing a load result + auto loadInst = as(storedValue); + if (!loadInst) + continue; - auto call = as(user); - if (!call) + auto loadedPtr = loadInst->getPtr(); + + // Do not optimize loads from semantic parameters because some semantics have + // builtin types that are vector types but pretend to be scalar types (e.g., + // SV_DispatchThreadID is used as 'int id' but maps to 'float3 + // gl_GlobalInvocationID'). The legalization step must remove the load instruction + // to maintain this pretense, which breaks our load/store optimization assumptions. + // Skip optimization when loading from semantics to let legalization handle the load + // removal. + if (auto param = as(loadedPtr)) + if (param->findDecoration()) + continue; + + // We cannot optimize if the variable is passed to a function call that treats the + // parameter as mutable, or if there are multiple stores to the variable. + + // If the pointer is re-used with IRStore more than once, + // handling the case will become more complex. + // TODO: we may want to handle the complex cases as well later. + UInt storeCount = 0; + for (auto use = destPtr->firstUse; use; use = use->nextUse) { - isSafeToOptimize = false; - break; + if (as(use->getUser())) + { + storeCount++; + if (storeCount > 1) + goto unsafeToOptimize; + } } - auto callee = call->getCallee(); - auto funcInst = as(callee); - if (!funcInst) + // Check all uses of the destination variable + for (auto use = destPtr->firstUse; use; use = use->nextUse) { - isSafeToOptimize = false; - break; - } + auto user = use->getUser(); + if (user == storeInst) + continue; // Skip the store itself - // Find which argument position this variable is used in - UInt argIndex = 0; - bool foundArg = false; - for (UInt i = 0; i < call->getArgCount(); i++) - { - if (call->getArg(i) == destPtr) + // Allow loads - they are safe since we're not changing the data + if (as(user)) + continue; // Check the next use + + // For function calls, check if the parameter is ConstRef + if (auto call = as(user)) { - argIndex = i; - foundArg = true; - break; + auto callee = call->getCallee(); + auto funcInst = as(callee); + if (!funcInst) + goto unsafeToOptimize; + + // Build parameter list once for efficient indexing + List params; + for (auto param = funcInst->getFirstParam(); param; + param = param->getNextParam()) + { + params.add(param); + } + + // Check ALL argument positions where this variable is used + for (UInt i = 0; i < call->getArgCount(); i++) + { + if (call->getArg(i) == destPtr) + { + if (i >= (UInt)params.getCount()) + goto unsafeToOptimize; + + // Check if this parameter position is ConstRef + auto param = params[i]; + if (!as(param->getDataType())) + goto unsafeToOptimize; + } + } + continue; // Safe so far and check the next use } - } - if (!foundArg) - { - isSafeToOptimize = false; - break; - } + // TODO: there might be more cases that is safe to optimize + // We need to add more cases here as needed. - // Check if the corresponding parameter is ConstRef - auto param = funcInst->getFirstParam(); - for (UInt i = 0; i < argIndex && param; i++) - { - param = param->getNextParam(); - } - if (!param || !as(param->getDataType())) - { - // Unsafe, because the parameter might be used as mutable. - isSafeToOptimize = false; - break; + goto unsafeToOptimize; } - } - if (!isSafeToOptimize) - continue; + // If we get here, all uses are safe to optimize + // safeToOptimize: - // Replace all uses of destPtr with loadedPtr - destPtr->replaceUsesWith(loadedPtr); + // Replace all uses of destPtr with loadedPtr + destPtr->replaceUsesWith(loadedPtr); - // Mark both instructions for removal - toRemove.add(storeInst); - toRemove.add(destPtr); + // Mark instructions for removal + toRemove.add(storeInst); + toRemove.add(destPtr); - // Note: loadInst might be still in use. - // We need to rely on DCE to delete it if unused. + // Note: loadInst might be still in use. + // We need to rely on DCE to delete it if unused. - changed = true; + changed = true; + overallChanged = true; + + unsafeToOptimize:; + } } - } - // Remove marked instructions - for (auto instToRemove : toRemove) - { - instToRemove->removeAndDeallocate(); + // Remove marked instructions + for (auto instToRemove : toRemove) + { + instToRemove->removeAndDeallocate(); + } } - return changed; + return overallChanged; } -bool eliminateLoadStorePairs(IRModule* module) +bool eliminateRedundantTemporaryCopy(IRModule* module) { - bool changed = false; - HashSet processedFuncs; + bool overallChanged = false; - // Look for patterns: load(ptr) followed by store(var, load_result) - // This can be optimized to direct pointer usage when safe - // Process recursively through function calls + // Populate work list with all functions and any functions they call + HashSet workListSet; + List workList; + + // Start with all global functions for (auto inst : module->getGlobalInsts()) { auto func = as(inst); if (!func) continue; + if (workListSet.add(func)) + workList.add(func); + } - changed |= eliminateLoadStorePairsInFunc(func, processedFuncs); + // Recursively add called functions + for (Index i = 0; i < workList.getCount(); i++) + { + auto func = workList[i]; + for (auto block : func->getBlocks()) + { + for (auto inst : block->getChildren()) + { + if (auto call = as(inst)) + { + if (auto calledFunc = as(call->getCallee())) + { + if (workListSet.add(calledFunc)) + workList.add(calledFunc); + } + } + } + } } - return changed; + for (auto func : workList) + { + overallChanged |= eliminateRedundantTemporaryCopyInFunc(func); + } + + return overallChanged; } bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions options) diff --git a/source/slang/slang-ir-dce.h b/source/slang/slang-ir-dce.h index f9e367163a..08475313be 100644 --- a/source/slang/slang-ir-dce.h +++ b/source/slang/slang-ir-dce.h @@ -36,12 +36,13 @@ bool isWeakReferenceOperand(IRInst* inst, UInt operandIndex); bool trimOptimizableTypes(IRModule* module); -/// Eliminate unnecessary load+store pairs when safe to do so. -/// This optimization looks for patterns where a value is loaded from a ConstRef -/// parameter and immediately stored to a temporary variable, then only passed -/// to functions that accept ConstRef parameters. In such cases, the temporary -/// variable can be eliminated and the original ConstRef parameter used directly. +/// Eliminate redundant temporary variable copies in load-store patterns. +/// This optimization looks for patterns where a value is loaded from memory +/// and immediately stored to a temporary variable, which is then only used +/// in read-only contexts. In such cases, the temporary variable and the +/// load-store indirection can be eliminated by using the original memory +/// location directly. /// Returns true if any changes were made. -bool eliminateLoadStorePairs(IRModule* module); +bool eliminateRedundantTemporaryCopy(IRModule* module); } // namespace Slang diff --git a/source/slang/slang-ir-ssa-simplification.cpp b/source/slang/slang-ir-ssa-simplification.cpp index 3a41cb332c..94dbe57de7 100644 --- a/source/slang/slang-ir-ssa-simplification.cpp +++ b/source/slang/slang-ir-ssa-simplification.cpp @@ -74,7 +74,7 @@ void simplifyIR( changed |= applySparseConditionalConstantPropagationForGlobalScope(module, sink); changed |= peepholeOptimizeGlobalScope(target, module); changed |= trimOptimizableTypes(module); - changed |= eliminateLoadStorePairs(module); + changed |= eliminateRedundantTemporaryCopy(module); for (auto inst : module->getGlobalInsts()) { diff --git a/tests/metal/vector-get-element-ptr.slang b/tests/metal/vector-get-element-ptr.slang index af2acabbce..1c616b37e6 100644 --- a/tests/metal/vector-get-element-ptr.slang +++ b/tests/metal/vector-get-element-ptr.slang @@ -14,11 +14,11 @@ void modify(inout int v) void computeMain(int3 v : SV_DispatchThreadID) { int3 u = v; - // CHECK: int [[TEMP:[a-zA-Z0-9_]+]] = u{{.*}}.x; + // CHECK: int [[TEMP:[a-zA-Z0-9_]+]] = [[OUT:[a-zA-Z0-9_]+]].x; // CHECK: modify{{.*}}(&[[TEMP]]) - // CHECK: u{{.*}}.x = [[TEMP]]; + // CHECK: [[OUT]].x = [[TEMP]]; modify(u.x); // BUF: 2 outputBuffer[0] = u.x + u.y; -} \ No newline at end of file +} From ed51278cba297c86aaa1cf729b4b67b0a533d98e Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Mon, 22 Sep 2025 22:16:48 -0700 Subject: [PATCH 4/9] Fix Metal test failures --- source/slang/slang-ir-dce.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp index 4d1677dcde..39da0ced60 100644 --- a/source/slang/slang-ir-dce.cpp +++ b/source/slang/slang-ir-dce.cpp @@ -544,15 +544,29 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) // Only optimize temporary variable. // Don't optimize stores to permanent memory locations. if (destPtr->getOp() != kIROp_Var) - continue; + continue; // skip because it is not the pattern we are looking for // Check if we're storing a load result auto loadInst = as(storedValue); if (!loadInst) - continue; + continue; // skip because it is not the pattern we are looking for auto loadedPtr = loadInst->getPtr(); + // Check address space compatibility + if (auto destPtrType = as(getRootAddr(destPtr)->getDataType())) + { + if (auto loadedPtrType = + as(getRootAddr(loadedPtr)->getDataType())) + { + if (destPtrType->getAddressSpace() != loadedPtrType->getAddressSpace()) + { + // skip because the address space is not compatible. + continue; + } + } + } + // Do not optimize loads from semantic parameters because some semantics have // builtin types that are vector types but pretend to be scalar types (e.g., // SV_DispatchThreadID is used as 'int id' but maps to 'float3 @@ -631,8 +645,7 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) goto unsafeToOptimize; } - // If we get here, all uses are safe to optimize - // safeToOptimize: + // If we get here, all uses are safe to optimize. // Replace all uses of destPtr with loadedPtr destPtr->replaceUsesWith(loadedPtr); From b39da60e0c410b3528d21eed6df01e2178e15058 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 23 Sep 2025 11:29:43 -0700 Subject: [PATCH 5/9] Check compatibility of address space for function call --- source/slang/slang-ir-dce.cpp | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp index 39da0ced60..97cd852c59 100644 --- a/source/slang/slang-ir-dce.cpp +++ b/source/slang/slang-ir-dce.cpp @@ -551,19 +551,21 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) if (!loadInst) continue; // skip because it is not the pattern we are looking for - auto loadedPtr = loadInst->getPtr(); + auto loadPtr = loadInst->getPtr(); + + AddressSpace loadAddressSpace = AddressSpace::Generic; + if (auto loadPtrType = as(getRootAddr(loadPtr)->getDataType())) + { + loadAddressSpace = loadPtrType->getAddressSpace(); + } // Check address space compatibility if (auto destPtrType = as(getRootAddr(destPtr)->getDataType())) { - if (auto loadedPtrType = - as(getRootAddr(loadedPtr)->getDataType())) + if (destPtrType->getAddressSpace() != loadAddressSpace) { - if (destPtrType->getAddressSpace() != loadedPtrType->getAddressSpace()) - { - // skip because the address space is not compatible. - continue; - } + // skip because the address space is not compatible. + continue; } } @@ -574,7 +576,7 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) // to maintain this pretense, which breaks our load/store optimization assumptions. // Skip optimization when loading from semantics to let legalization handle the load // removal. - if (auto param = as(loadedPtr)) + if (auto param = as(loadPtr)) if (param->findDecoration()) continue; @@ -631,9 +633,15 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) goto unsafeToOptimize; // Check if this parameter position is ConstRef - auto param = params[i]; - if (!as(param->getDataType())) - goto unsafeToOptimize; + auto paramType = as(params[i]->getDataType()); + if (!paramType) + goto unsafeToOptimize; // must be a pointer + + if (!as(paramType)) + goto unsafeToOptimize; // must be const-ref + + if (paramType->getAddressSpace() != loadAddressSpace) + goto unsafeToOptimize; // incompatible address space } } continue; // Safe so far and check the next use @@ -648,7 +656,7 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) // If we get here, all uses are safe to optimize. // Replace all uses of destPtr with loadedPtr - destPtr->replaceUsesWith(loadedPtr); + destPtr->replaceUsesWith(loadPtr); // Mark instructions for removal toRemove.add(storeInst); From ba02075070b644c36adb26d40df900e950816954 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 23 Sep 2025 12:21:35 -0700 Subject: [PATCH 6/9] Remove too much restriction --- source/slang/slang-ir-dce.cpp | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp index 97cd852c59..73a52101cf 100644 --- a/source/slang/slang-ir-dce.cpp +++ b/source/slang/slang-ir-dce.cpp @@ -541,15 +541,17 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) auto storedValue = storeInst->getVal(); auto destPtr = storeInst->getPtr(); - // Only optimize temporary variable. - // Don't optimize stores to permanent memory locations. if (destPtr->getOp() != kIROp_Var) - continue; // skip because it is not the pattern we are looking for + { + // Only optimize temporary variable. + // Don't optimize stores to permanent memory locations. + continue; + } // Check if we're storing a load result auto loadInst = as(storedValue); if (!loadInst) - continue; // skip because it is not the pattern we are looking for + continue; auto loadPtr = loadInst->getPtr(); @@ -559,16 +561,6 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) loadAddressSpace = loadPtrType->getAddressSpace(); } - // Check address space compatibility - if (auto destPtrType = as(getRootAddr(destPtr)->getDataType())) - { - if (destPtrType->getAddressSpace() != loadAddressSpace) - { - // skip because the address space is not compatible. - continue; - } - } - // Do not optimize loads from semantic parameters because some semantics have // builtin types that are vector types but pretend to be scalar types (e.g., // SV_DispatchThreadID is used as 'int id' but maps to 'float3 @@ -580,9 +572,6 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) if (param->findDecoration()) continue; - // We cannot optimize if the variable is passed to a function call that treats the - // parameter as mutable, or if there are multiple stores to the variable. - // If the pointer is re-used with IRStore more than once, // handling the case will become more complex. // TODO: we may want to handle the complex cases as well later. @@ -608,7 +597,7 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) if (as(user)) continue; // Check the next use - // For function calls, check if the parameter is ConstRef + // For function calls, check if the pointer is treated as immutable. if (auto call = as(user)) { auto callee = call->getCallee(); @@ -633,10 +622,7 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) goto unsafeToOptimize; // Check if this parameter position is ConstRef - auto paramType = as(params[i]->getDataType()); - if (!paramType) - goto unsafeToOptimize; // must be a pointer - + auto paramType = cast(params[i]->getDataType()); if (!as(paramType)) goto unsafeToOptimize; // must be const-ref @@ -650,6 +636,7 @@ static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) // TODO: there might be more cases that is safe to optimize // We need to add more cases here as needed. + // If we get here, the pointer is used with an unexpected IR. goto unsafeToOptimize; } From 77702f68539c08482ff8bbd591c1e4e2738420e9 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 23 Sep 2025 18:55:39 -0700 Subject: [PATCH 7/9] Refactor based on review feedbacks --- source/slang/slang-ir-dce.cpp | 203 ------------------- source/slang/slang-ir-dce.h | 9 - source/slang/slang-ir-redundancy-removal.cpp | 192 ++++++++++++++++++ source/slang/slang-ir-ssa-simplification.cpp | 1 - 4 files changed, 192 insertions(+), 213 deletions(-) diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp index 73a52101cf..0b89baf64c 100644 --- a/source/slang/slang-ir-dce.cpp +++ b/source/slang/slang-ir-dce.cpp @@ -512,209 +512,6 @@ bool trimOptimizableTypes(IRModule* module) return changed; } -static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) -{ - // Consider the following IR pattern: - // ``` - // let %temp = var - // let %value = load(%sourcePtr) - // store(%temp, %value) - // ``` - // We can replace "%temp" with "%sourcePtr" without the load and store indirection - // if "%temp" is used only in read-only contexts. - - bool overallChanged = false; - for (bool changed = true; changed;) - { - changed = false; - - HashSet toRemove; - - for (auto block : func->getBlocks()) - { - for (auto blockInst : block->getChildren()) - { - auto storeInst = as(blockInst); - if (!storeInst) - continue; - - auto storedValue = storeInst->getVal(); - auto destPtr = storeInst->getPtr(); - - if (destPtr->getOp() != kIROp_Var) - { - // Only optimize temporary variable. - // Don't optimize stores to permanent memory locations. - continue; - } - - // Check if we're storing a load result - auto loadInst = as(storedValue); - if (!loadInst) - continue; - - auto loadPtr = loadInst->getPtr(); - - AddressSpace loadAddressSpace = AddressSpace::Generic; - if (auto loadPtrType = as(getRootAddr(loadPtr)->getDataType())) - { - loadAddressSpace = loadPtrType->getAddressSpace(); - } - - // Do not optimize loads from semantic parameters because some semantics have - // builtin types that are vector types but pretend to be scalar types (e.g., - // SV_DispatchThreadID is used as 'int id' but maps to 'float3 - // gl_GlobalInvocationID'). The legalization step must remove the load instruction - // to maintain this pretense, which breaks our load/store optimization assumptions. - // Skip optimization when loading from semantics to let legalization handle the load - // removal. - if (auto param = as(loadPtr)) - if (param->findDecoration()) - continue; - - // If the pointer is re-used with IRStore more than once, - // handling the case will become more complex. - // TODO: we may want to handle the complex cases as well later. - UInt storeCount = 0; - for (auto use = destPtr->firstUse; use; use = use->nextUse) - { - if (as(use->getUser())) - { - storeCount++; - if (storeCount > 1) - goto unsafeToOptimize; - } - } - - // Check all uses of the destination variable - for (auto use = destPtr->firstUse; use; use = use->nextUse) - { - auto user = use->getUser(); - if (user == storeInst) - continue; // Skip the store itself - - // Allow loads - they are safe since we're not changing the data - if (as(user)) - continue; // Check the next use - - // For function calls, check if the pointer is treated as immutable. - if (auto call = as(user)) - { - auto callee = call->getCallee(); - auto funcInst = as(callee); - if (!funcInst) - goto unsafeToOptimize; - - // Build parameter list once for efficient indexing - List params; - for (auto param = funcInst->getFirstParam(); param; - param = param->getNextParam()) - { - params.add(param); - } - - // Check ALL argument positions where this variable is used - for (UInt i = 0; i < call->getArgCount(); i++) - { - if (call->getArg(i) == destPtr) - { - if (i >= (UInt)params.getCount()) - goto unsafeToOptimize; - - // Check if this parameter position is ConstRef - auto paramType = cast(params[i]->getDataType()); - if (!as(paramType)) - goto unsafeToOptimize; // must be const-ref - - if (paramType->getAddressSpace() != loadAddressSpace) - goto unsafeToOptimize; // incompatible address space - } - } - continue; // Safe so far and check the next use - } - - // TODO: there might be more cases that is safe to optimize - // We need to add more cases here as needed. - - // If we get here, the pointer is used with an unexpected IR. - goto unsafeToOptimize; - } - - // If we get here, all uses are safe to optimize. - - // Replace all uses of destPtr with loadedPtr - destPtr->replaceUsesWith(loadPtr); - - // Mark instructions for removal - toRemove.add(storeInst); - toRemove.add(destPtr); - - // Note: loadInst might be still in use. - // We need to rely on DCE to delete it if unused. - - changed = true; - overallChanged = true; - - unsafeToOptimize:; - } - } - - // Remove marked instructions - for (auto instToRemove : toRemove) - { - instToRemove->removeAndDeallocate(); - } - } - - return overallChanged; -} - -bool eliminateRedundantTemporaryCopy(IRModule* module) -{ - bool overallChanged = false; - - // Populate work list with all functions and any functions they call - HashSet workListSet; - List workList; - - // Start with all global functions - for (auto inst : module->getGlobalInsts()) - { - auto func = as(inst); - if (!func) - continue; - if (workListSet.add(func)) - workList.add(func); - } - - // Recursively add called functions - for (Index i = 0; i < workList.getCount(); i++) - { - auto func = workList[i]; - for (auto block : func->getBlocks()) - { - for (auto inst : block->getChildren()) - { - if (auto call = as(inst)) - { - if (auto calledFunc = as(call->getCallee())) - { - if (workListSet.add(calledFunc)) - workList.add(calledFunc); - } - } - } - } - } - - for (auto func : workList) - { - overallChanged |= eliminateRedundantTemporaryCopyInFunc(func); - } - - return overallChanged; -} - bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions options) { // The main source of confusion/complexity here is that diff --git a/source/slang/slang-ir-dce.h b/source/slang/slang-ir-dce.h index 08475313be..4df491881a 100644 --- a/source/slang/slang-ir-dce.h +++ b/source/slang/slang-ir-dce.h @@ -36,13 +36,4 @@ bool isWeakReferenceOperand(IRInst* inst, UInt operandIndex); bool trimOptimizableTypes(IRModule* module); -/// Eliminate redundant temporary variable copies in load-store patterns. -/// This optimization looks for patterns where a value is loaded from memory -/// and immediately stored to a temporary variable, which is then only used -/// in read-only contexts. In such cases, the temporary variable and the -/// load-store indirection can be eliminated by using the original memory -/// location directly. -/// Returns true if any changes were made. -bool eliminateRedundantTemporaryCopy(IRModule* module); - } // namespace Slang diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp index c52ab7bcae..346ce44a9c 100644 --- a/source/slang/slang-ir-redundancy-removal.cpp +++ b/source/slang/slang-ir-redundancy-removal.cpp @@ -126,6 +126,197 @@ bool removeRedundancy(IRModule* module, bool hoistLoopInvariantInsts) return changed; } +bool isAddressMutable(IRInst* inst) +{ + auto rootType = getRootAddr(inst)->getDataType(); + switch (rootType->getOp()) + { + case kIROp_ParameterBlockType: + case kIROp_ConstantBufferType: + case kIROp_StructuredBufferLoad: + case kIROp_GetStructuredBufferPtr: + case kIROp_ConstRefType: + return false; // immutable + } + + if (auto ptr = as(rootType)) + return ptr->getAccessQualifier() == AccessQualifier::ReadWrite; + + return true; // mutable +} + +/// Eliminate redundant temporary variable copies in load-store patterns. +/// This optimization looks for patterns where a value is loaded from memory +/// and immediately stored to a temporary variable, which is then only used +/// in read-only contexts. In such cases, the temporary variable and the +/// load-store indirection can be eliminated by using the original memory +/// location directly. +/// Returns true if any changes were made. +static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func) +{ + // Consider the following IR pattern: + // ``` + // let %temp = var + // let %value = load(%sourcePtr) + // store(%temp, %value) + // ``` + // We can replace "%temp" with "%sourcePtr" without the load and store indirection + // if "%temp" is used only in read-only contexts. + + bool overallChanged = false; + for (bool changed = true; changed;) + { + changed = false; + + HashSet toRemove; + + for (auto block : func->getBlocks()) + { + for (auto blockInst : block->getChildren()) + { + auto storeInst = as(blockInst); + if (!storeInst) + { + // We are interested only in IRStore. + continue; + } + + auto storedValue = storeInst->getVal(); + auto destPtr = storeInst->getPtr(); + + if (destPtr->getOp() != kIROp_Var) + { + // Only optimize temporary variable. + // Don't optimize stores to permanent memory locations. + continue; + } + + // Check if we're storing a load result + auto loadInst = as(storedValue); + if (!loadInst) + { + // Skip because only IRLoad is expected for the optimization. + continue; + } + + auto loadPtr = loadInst->getPtr(); + + if (isAddressMutable(loadPtr)) + { + // If the input is mutable, we cannot optimize, + // because any function calls may alter the content of the input + // and we cannot replace the temporary copy with a memory pointer. + continue; + } + + // Storing address-sapce for later use. + AddressSpace loadAddressSpace = AddressSpace::Generic; + if (auto rootPtrType = as(getRootAddr(loadPtr)->getDataType())) + { + loadAddressSpace = rootPtrType->getAddressSpace(); + } + + // Do not optimize loads from semantic parameters because some semantics have + // builtin types that are vector types but pretend to be scalar types (e.g., + // SV_DispatchThreadID is used as 'int id' but maps to 'float3 + // gl_GlobalInvocationID'). The legalization step must remove the load instruction + // to maintain this pretense, which breaks our load/store optimization assumptions. + // Skip optimization when loading from semantics to let legalization handle the load + // removal. + if (auto param = as(loadPtr)) + if (param->findDecoration()) + continue; + + // Check all uses of the destination variable + for (auto use = destPtr->firstUse; use; use = use->nextUse) + { + auto user = use->getUser(); + if (user == storeInst) + { + // Skip the store itself + continue; // check the next use + } + + if (as(use->getUser())) + { + // We cannot optimize when the variable is reused + // with another store. + goto unsafeToOptimize; + } + + if (as(user)) + { + // Allow loads because IRLoad is read-only operation + continue; // Check the next use + } + + // For function calls, check if the pointer is treated as immutable. + if (auto call = as(user)) + { + auto callee = call->getCallee(); + auto funcInst = as(callee); + if (!funcInst) + goto unsafeToOptimize; + + UIndex argIndex = (UIndex)(use - call->getArgs()); + SLANG_ASSERT(argIndex < call->getArgCount()); + SLANG_ASSERT(call->getArg(argIndex) == destPtr); + + IRParam* param = funcInst->getFirstParam(); + for (UIndex i = 0; i < argIndex; i++) + { + if (param) + param = param->getNextParam(); + } + if (nullptr == param) + goto unsafeToOptimize; // IRFunc might be incomplete yet + + if (auto paramPtrType = as(param->getFullType())) + { + if (paramPtrType->getAddressSpace() != loadAddressSpace) + goto unsafeToOptimize; // incompatible address space + + continue; // safe so far and check the next use + } + goto unsafeToOptimize; // must be const-ref + } + + // TODO: there might be more cases that is safe to optimize + // We need to add more cases here as needed. + + // If we get here, the pointer is used with an unexpected IR. + goto unsafeToOptimize; + } + + // If we get here, all uses are safe to optimize. + + // Replace all uses of destPtr with loadedPtr + destPtr->replaceUsesWith(loadPtr); + + // Mark instructions for removal + toRemove.add(storeInst); + toRemove.add(destPtr); + + // Note: loadInst might be still in use. + // We need to rely on DCE to delete it if unused. + + changed = true; + overallChanged = true; + + unsafeToOptimize:; + } + } + + // Remove marked instructions + for (auto instToRemove : toRemove) + { + instToRemove->removeAndDeallocate(); + } + } + + return overallChanged; +} + bool removeRedundancyInFunc(IRGlobalValueWithCode* func, bool hoistLoopInvariantInsts) { auto root = func->getFirstBlock(); @@ -163,6 +354,7 @@ bool removeRedundancyInFunc(IRGlobalValueWithCode* func, bool hoistLoopInvariant if (auto normalFunc = as(func)) { result |= eliminateRedundantLoadStore(normalFunc); + result |= eliminateRedundantTemporaryCopyInFunc(normalFunc); } return result; } diff --git a/source/slang/slang-ir-ssa-simplification.cpp b/source/slang/slang-ir-ssa-simplification.cpp index 94dbe57de7..41da868b65 100644 --- a/source/slang/slang-ir-ssa-simplification.cpp +++ b/source/slang/slang-ir-ssa-simplification.cpp @@ -74,7 +74,6 @@ void simplifyIR( changed |= applySparseConditionalConstantPropagationForGlobalScope(module, sink); changed |= peepholeOptimizeGlobalScope(target, module); changed |= trimOptimizableTypes(module); - changed |= eliminateRedundantTemporaryCopy(module); for (auto inst : module->getGlobalInsts()) { From 8e13179b9d29fb2c726101d303443d9db0730524 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 23 Sep 2025 21:23:07 -0700 Subject: [PATCH 8/9] Treat a few aliasing cases as mutable --- source/slang/slang-ir-redundancy-removal.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp index 346ce44a9c..cbea6ee916 100644 --- a/source/slang/slang-ir-redundancy-removal.cpp +++ b/source/slang/slang-ir-redundancy-removal.cpp @@ -133,14 +133,19 @@ bool isAddressMutable(IRInst* inst) { case kIROp_ParameterBlockType: case kIROp_ConstantBufferType: - case kIROp_StructuredBufferLoad: - case kIROp_GetStructuredBufferPtr: case kIROp_ConstRefType: return false; // immutable + + // We should consider StructuredBuffer as mutable by default, since the resources may alias. + // There could be anotherRWStructuredBuffer pointing to the same memory location as the + // structured buffer. + case kIROp_StructuredBufferLoad: + case kIROp_GetStructuredBufferPtr: + return true; // mutable } - if (auto ptr = as(rootType)) - return ptr->getAccessQualifier() == AccessQualifier::ReadWrite; + // Similarly, IRPtrTypeBase should also be considered writable always, + // because there can be aliasing. return true; // mutable } From dbe1992a1f456c1389f6a667d1e6689e8adc7dd4 Mon Sep 17 00:00:00 2001 From: slangbot Date: Wed, 24 Sep 2025 12:25:46 +0800 Subject: [PATCH 9/9] format code (#104) --- source/slang/slang-ir-redundancy-removal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp index cbea6ee916..94bb6b67c6 100644 --- a/source/slang/slang-ir-redundancy-removal.cpp +++ b/source/slang/slang-ir-redundancy-removal.cpp @@ -141,7 +141,7 @@ bool isAddressMutable(IRInst* inst) // structured buffer. case kIROp_StructuredBufferLoad: case kIROp_GetStructuredBufferPtr: - return true; // mutable + return true; // mutable } // Similarly, IRPtrTypeBase should also be considered writable always,