From 3254815539b36049fb074bf3cb18ee8bbb2ffdb6 Mon Sep 17 00:00:00 2001 From: Adam Yang <31109344+adam-yang@users.noreply.github.com> Date: Fri, 20 Jun 2025 16:52:37 -0700 Subject: [PATCH 01/11] Made llvm-debuginfo-analyzer work for AMDGPU. A few changes to generate DWARF correctly in AMDGPU --- .../LogicalView/Readers/LVBinaryReader.h | 3 +- .../LogicalView/Readers/LVBinaryReader.cpp | 4 +- .../LogicalView/Readers/LVCodeViewReader.cpp | 10 +- .../LogicalView/Readers/LVDWARFReader.cpp | 12 +- .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 3 +- .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 3 + .../llvm-debuginfo-analyzer/DWARF/amdgpu.ll | 103 ++++++++++++++++++ 7 files changed, 127 insertions(+), 11 deletions(-) create mode 100644 llvm/test/tools/llvm-debuginfo-analyzer/DWARF/amdgpu.ll diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h index 1847fa8323480..1f8b884bc1b5d 100644 --- a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h +++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h @@ -159,7 +159,8 @@ class LVBinaryReader : public LVReader { LVAddress WasmCodeSectionOffset = 0; // Loads all info for the architecture of the provided object file. - Error loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures); + Error loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures, + StringRef CPU); virtual void mapRangeAddress(const object::ObjectFile &Obj) {} virtual void mapRangeAddress(const object::ObjectFile &Obj, diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp index 80b4185b7c600..414f0f3efc82d 100644 --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp @@ -275,7 +275,8 @@ void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) { } Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple, - StringRef TheFeatures) { + StringRef TheFeatures, + StringRef CPU) { std::string TargetLookupError; const Target *TheTarget = TargetRegistry::lookupTarget(TheTriple, TargetLookupError); @@ -298,7 +299,6 @@ Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple, MAI.reset(AsmInfo); // Target subtargets. - StringRef CPU; MCSubtargetInfo *SubtargetInfo( TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures)); if (!SubtargetInfo) diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp index e5895516b5e77..2ff70816b4bf1 100644 --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp @@ -1190,7 +1190,12 @@ Error LVCodeViewReader::loadTargetInfo(const ObjectFile &Obj) { FeaturesValue = SubtargetFeatures(); } FeaturesValue = *Features; - return loadGenericTargetInfo(TT.str(), FeaturesValue.getString()); + + StringRef CPU; + if (auto OptCPU = Obj.tryGetCPUName()) + CPU = *OptCPU; + + return loadGenericTargetInfo(TT.str(), FeaturesValue.getString(), CPU); } Error LVCodeViewReader::loadTargetInfo(const PDBFile &Pdb) { @@ -1200,8 +1205,9 @@ Error LVCodeViewReader::loadTargetInfo(const PDBFile &Pdb) { TT.setOS(Triple::Win32); StringRef TheFeature = ""; + StringRef TheCPU = ""; - return loadGenericTargetInfo(TT.str(), TheFeature); + return loadGenericTargetInfo(TT.str(), TheFeature, TheCPU); } std::string LVCodeViewReader::getRegisterName(LVSmall Opcode, diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp index 696e2bc948a2e..62134dfdadf46 100644 --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp @@ -956,10 +956,7 @@ LVElement *LVDWARFReader::getElementForOffset(LVOffset Offset, Error LVDWARFReader::loadTargetInfo(const ObjectFile &Obj) { // Detect the architecture from the object file. We usually don't need OS // info to lookup a target and create register info. - Triple TT; - TT.setArch(Triple::ArchType(Obj.getArch())); - TT.setVendor(Triple::UnknownVendor); - TT.setOS(Triple::UnknownOS); + Triple TT = Obj.makeTriple(); // Features to be passed to target/subtarget Expected Features = Obj.getFeatures(); @@ -969,7 +966,12 @@ Error LVDWARFReader::loadTargetInfo(const ObjectFile &Obj) { FeaturesValue = SubtargetFeatures(); } FeaturesValue = *Features; - return loadGenericTargetInfo(TT.str(), FeaturesValue.getString()); + + StringRef CPU; + if (auto OptCPU = Obj.tryGetCPUName()) + CPU = *OptCPU; + + return loadGenericTargetInfo(TT.str(), FeaturesValue.getString(), CPU); } void LVDWARFReader::mapRangeAddress(const ObjectFile &Obj) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 8f89168754180..bf390e836078e 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -226,7 +226,8 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { public: ELFAMDGPUAsmBackend(const Target &T, const Triple &TT) : AMDGPUAsmBackend(T), Is64Bit(TT.isAMDGCN()), - HasRelocationAddend(TT.getOS() == Triple::AMDHSA) { + HasRelocationAddend(TT.getOS() == Triple::AMDHSA || + TT.getOS() == Triple::AMDPAL) { switch (TT.getOS()) { case Triple::AMDHSA: OSABI = ELF::ELFOSABI_AMDGPU_HSA; diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index 205a45a045a42..469a6525b4ac0 100644 --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -130,6 +130,9 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { if (VirtReg.isPhysical()) continue; + if (MI.isDebugInstr() && VirtReg == AMDGPU::NoRegister) + continue; + if (!VRM->hasPhys(VirtReg)) continue; diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/amdgpu.ll b/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/amdgpu.ll new file mode 100644 index 0000000000000..1d031979309a6 --- /dev/null +++ b/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/amdgpu.ll @@ -0,0 +1,103 @@ +; RUN: llc %s -o %t.o -mcpu=gfx1030 -filetype=obj -O0 +; RUN: llvm-debuginfo-analyzer %t.o --print=all --attribute=all | FileCheck %s + +; This test compiles this module with AMDGPU backend under -O0, +; and makes sure llvm-debuginfo-analzyer works for it. + +; Simple checks to make sure llvm-debuginfo-analzyer didn't fail early. +; CHECK: Logical View: +; CHECK: {CompileUnit} +; CHECK: {Code} 's_endpgm' + +source_filename = "module" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-p10:32:32-p11:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p32:32:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32" +target triple = "amdgcn-amd-amdpal" + +%dx.types.ResRet.f32 = type { float, float, float, float, i32 } + +; Function Attrs: memory(readwrite) +define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !14 { + %LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i64 0, !dbg !28 + %WorkgroupId.i0 = extractelement <3 x i32> %WorkgroupId, i64 0, !dbg !28 + %1 = call i64 @llvm.amdgcn.s.getpc(), !dbg !28 + %2 = shl i32 %WorkgroupId.i0, 6, !dbg !28 + %3 = add i32 %LocalInvocationId.i0, %2, !dbg !28 + #dbg_value(i32 %3, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !28) + %4 = and i64 %1, -4294967296, !dbg !30 + %5 = zext i32 %userdata4 to i64, !dbg !30 + %6 = or disjoint i64 %4, %5, !dbg !30 + %7 = inttoptr i64 %6 to ptr addrspace(4), !dbg !30 + call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) %7, i32 4), "dereferenceable"(ptr addrspace(4) %7, i32 -1) ], !dbg !30 + %8 = load <4 x i32>, ptr addrspace(4) %7, align 4, !dbg !30, !invariant.load !2 + %9 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %8, i32 %3, i32 0, i32 0, i32 0), !dbg !30 + #dbg_value(%dx.types.ResRet.f32 poison, !31, !DIExpression(), !32) + %10 = fmul reassoc arcp contract afn float %9, 2.000000e+00, !dbg !33 + #dbg_value(float %10, !34, !DIExpression(), !35) + call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) %7, i32 4), "dereferenceable"(ptr addrspace(4) %7, i32 -1) ], !dbg !36 + %11 = getelementptr i8, ptr addrspace(4) %7, i64 32, !dbg !36 + %.upto01 = insertelement <4 x float> poison, float %10, i64 0, !dbg !36 + %12 = shufflevector <4 x float> %.upto01, <4 x float> poison, <4 x i32> zeroinitializer, !dbg !36 + %13 = load <4 x i32>, ptr addrspace(4) %11, align 4, !dbg !36, !invariant.load !2 + call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %12, <4 x i32> %13, i32 %3, i32 0, i32 0, i32 0), !dbg !36 + ret void, !dbg !37 +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare noundef i64 @llvm.amdgcn.s.getpc() #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef) #2 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #3 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4 + +attributes #0 = { memory(readwrite) "amdgpu-flat-work-group-size"="64,64" "amdgpu-memory-bound"="false" "amdgpu-num-sgpr"="4294967295" "amdgpu-num-vgpr"="4294967295" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="1200" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="3" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) } +attributes #4 = { nocallback nofree nosync nounwind willreturn memory(read) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!12, !13} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "dxcoob 1.7.2308.16 (52da17e29)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3) +!1 = !DIFile(filename: "tests\\basic_var.hlsl", directory: "") +!2 = !{} +!3 = !{!4, !10} +!4 = distinct !DIGlobalVariableExpression(var: !5, expr: !DIExpression()) +!5 = !DIGlobalVariable(name: "u0", linkageName: "\01?u0@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 2, type: !6, isLocal: false, isDefinition: true) +!6 = !DICompositeType(tag: DW_TAG_class_type, name: "RWBuffer", file: !1, line: 2, size: 32, align: 32, elements: !2, templateParams: !7) +!7 = !{!8} +!8 = !DITemplateTypeParameter(name: "element", type: !9) +!9 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) +!10 = distinct !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) +!11 = !DIGlobalVariable(name: "u1", linkageName: "\01?u1@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 3, type: !6, isLocal: false, isDefinition: true) +!12 = !{i32 2, !"Dwarf Version", i32 5} +!13 = !{i32 2, !"Debug Info Version", i32 3} +!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !15, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!15 = !DISubroutineType(types: !16) +!16 = !{null, !17} +!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint3", file: !1, baseType: !18) +!18 = !DICompositeType(tag: DW_TAG_class_type, name: "vector", file: !1, size: 96, align: 32, elements: !19, templateParams: !24) +!19 = !{!20, !22, !23} +!20 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !18, file: !1, baseType: !21, size: 32, align: 32, flags: DIFlagPublic) +!21 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned) +!22 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 32, flags: DIFlagPublic) +!23 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 64, flags: DIFlagPublic) +!24 = !{!25, !26} +!25 = !DITemplateTypeParameter(name: "element", type: !21) +!26 = !DITemplateValueParameter(name: "element_count", type: !27, value: i32 3) +!27 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!28 = !DILocation(line: 7, column: 17, scope: !14) +!29 = !DILocalVariable(name: "dtid", arg: 1, scope: !14, file: !1, line: 7, type: !17) +!30 = !DILocation(line: 11, column: 18, scope: !14) +!31 = !DILocalVariable(name: "my_var", scope: !14, file: !1, line: 11, type: !9) +!32 = !DILocation(line: 11, column: 9, scope: !14) +!33 = !DILocation(line: 14, column: 26, scope: !14) +!34 = !DILocalVariable(name: "my_var2", scope: !14, file: !1, line: 14, type: !9) +!35 = !DILocation(line: 14, column: 9, scope: !14) +!36 = !DILocation(line: 17, column: 14, scope: !14) +!37 = !DILocation(line: 19, column: 1, scope: !14) \ No newline at end of file From c6bacae4803be21e4204a202d4c2b1c4a5559bb6 Mon Sep 17 00:00:00 2001 From: Adam Yang <31109344+adam-yang@users.noreply.github.com> Date: Fri, 20 Jun 2025 18:21:29 -0700 Subject: [PATCH 02/11] Moved the test to amdgpu target tests --- .../AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/{tools/llvm-debuginfo-analyzer/DWARF/amdgpu.ll => CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll} (100%) diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/amdgpu.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll similarity index 100% rename from llvm/test/tools/llvm-debuginfo-analyzer/DWARF/amdgpu.ll rename to llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll From a4277f436bbfdbabeac50aa84c8c8d7f2c97b116 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Fri, 25 Jul 2025 12:00:53 -0700 Subject: [PATCH 03/11] Addressed feedback --- .../LogicalView/Readers/LVBinaryReader.h | 2 +- .../LogicalView/Readers/LVBinaryReader.cpp | 4 +- .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 3 +- .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 4 +- .../AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll | 12 +- .../si-pre-allocate-wwwmregs-dbg-noreg.mir | 210 ++++++++++++++++++ 6 files changed, 222 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h index 1f8b884bc1b5d..2cf4a8ec6a37f 100644 --- a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h +++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h @@ -160,7 +160,7 @@ class LVBinaryReader : public LVReader { // Loads all info for the architecture of the provided object file. Error loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures, - StringRef CPU); + StringRef TheCPU); virtual void mapRangeAddress(const object::ObjectFile &Obj) {} virtual void mapRangeAddress(const object::ObjectFile &Obj, diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp index 414f0f3efc82d..0df9137a3bd37 100644 --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp @@ -276,7 +276,7 @@ void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) { Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures, - StringRef CPU) { + StringRef TheCPU) { std::string TargetLookupError; const Target *TheTarget = TargetRegistry::lookupTarget(TheTriple, TargetLookupError); @@ -300,7 +300,7 @@ Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple, // Target subtargets. MCSubtargetInfo *SubtargetInfo( - TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures)); + TheTarget->createMCSubtargetInfo(TheTriple, TheCPU, TheFeatures)); if (!SubtargetInfo) return createStringError(errc::invalid_argument, "no subtarget info for target " + TheTriple); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index bf390e836078e..8f89168754180 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -226,8 +226,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { public: ELFAMDGPUAsmBackend(const Target &T, const Triple &TT) : AMDGPUAsmBackend(T), Is64Bit(TT.isAMDGCN()), - HasRelocationAddend(TT.getOS() == Triple::AMDHSA || - TT.getOS() == Triple::AMDPAL) { + HasRelocationAddend(TT.getOS() == Triple::AMDHSA) { switch (TT.getOS()) { case Triple::AMDHSA: OSABI = ELF::ELFOSABI_AMDGPU_HSA; diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index 469a6525b4ac0..f807c567efa2f 100644 --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -130,8 +130,10 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { if (VirtReg.isPhysical()) continue; - if (MI.isDebugInstr() && VirtReg == AMDGPU::NoRegister) + if (!VirtReg.isValid()) { + assert(MI.isDebugInstr() && "non-debug use of noreg"); continue; + } if (!VRM->hasPhys(VirtReg)) continue; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll index 1d031979309a6..2cff21c66172d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll @@ -2,11 +2,14 @@ ; RUN: llvm-debuginfo-analyzer %t.o --print=all --attribute=all | FileCheck %s ; This test compiles this module with AMDGPU backend under -O0, -; and makes sure llvm-debuginfo-analzyer works for it. +; and makes sure llvm-debuginfo-analyzer works for it. ; Simple checks to make sure llvm-debuginfo-analzyer didn't fail early. ; CHECK: Logical View: ; CHECK: {CompileUnit} +; CHECK-DAG: {Parameter} 'dtid' -> [0x{{[a-f0-9]+}}]'uint3' +; CHECK-DAG: {Variable} 'my_var2' -> [0x{{[a-f0-9]+}}]'float' +; CHECK-DAG: {Line} {{.+}}basic_var.hlsl ; CHECK: {Code} 's_endpgm' source_filename = "module" @@ -15,7 +18,6 @@ target triple = "amdgcn-amd-amdpal" %dx.types.ResRet.f32 = type { float, float, float, float, i32 } -; Function Attrs: memory(readwrite) define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !14 { %LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i64 0, !dbg !28 %WorkgroupId.i0 = extractelement <3 x i32> %WorkgroupId, i64 0, !dbg !28 @@ -42,16 +44,12 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, ret void, !dbg !37 } -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare noundef i64 @llvm.amdgcn.s.getpc() #1 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) declare void @llvm.assume(i1 noundef) #2 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #3 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4 attributes #0 = { memory(readwrite) "amdgpu-flat-work-group-size"="64,64" "amdgpu-memory-bound"="false" "amdgpu-num-sgpr"="4294967295" "amdgpu-num-vgpr"="4294967295" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="1200" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="3" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } @@ -100,4 +98,4 @@ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(read) } !34 = !DILocalVariable(name: "my_var2", scope: !14, file: !1, line: 14, type: !9) !35 = !DILocation(line: 14, column: 9, scope: !14) !36 = !DILocation(line: 17, column: 14, scope: !14) -!37 = !DILocation(line: 19, column: 1, scope: !14) \ No newline at end of file +!37 = !DILocation(line: 19, column: 1, scope: !14) diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir new file mode 100644 index 0000000000000..4b5fea863289b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir @@ -0,0 +1,210 @@ +# RUN: llc %s -o - -mcpu=gfx1030 -O0 -run-pass=si-pre-allocate-wwm-regs | FileCheck %s + +# Simple regression test to make sure DBG_VALUE $noreg does not assert in the pass + +# CHECK: S_ENDPGM + +--- | + source_filename = "module" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" + target triple = "amdgcn-amd-amdpal" + + %dx.types.ResRet.f32 = type { float, float, float, float, i32 } + + define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !14 { + %LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i64 0, !dbg !28 + %WorkgroupId.i0 = extractelement <3 x i32> %WorkgroupId, i64 0, !dbg !28 + %1 = call i64 @llvm.amdgcn.s.getpc(), !dbg !28 + %2 = shl i32 %WorkgroupId.i0, 6, !dbg !28 + %3 = add i32 %LocalInvocationId.i0, %2, !dbg !28 + #dbg_value(i32 %3, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !28) + %4 = and i64 %1, -4294967296, !dbg !30 + %5 = zext i32 %userdata4 to i64, !dbg !30 + %6 = or disjoint i64 %4, %5, !dbg !30 + %7 = inttoptr i64 %6 to ptr addrspace(4), !dbg !30, !amdgpu.uniform !2 + %8 = load <4 x i32>, ptr addrspace(4) %7, align 4, !dbg !30, !invariant.load !2 + %9 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %8, i32 %3, i32 0, i32 0, i32 0), !dbg !30 + #dbg_value(%dx.types.ResRet.f32 poison, !31, !DIExpression(), !32) + %10 = fmul reassoc arcp contract afn float %9, 2.000000e+00, !dbg !33 + #dbg_value(float %10, !34, !DIExpression(), !35) + %11 = getelementptr i8, ptr addrspace(4) %7, i64 32, !dbg !36, !amdgpu.uniform !2 + %.upto01 = insertelement <4 x float> poison, float %10, i64 0, !dbg !36 + %12 = shufflevector <4 x float> %.upto01, <4 x float> poison, <4 x i32> zeroinitializer, !dbg !36 + %13 = load <4 x i32>, ptr addrspace(4) %11, align 4, !dbg !36, !invariant.load !2 + call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %12, <4 x i32> %13, i32 %3, i32 0, i32 0, i32 0), !dbg !36 + ret void, !dbg !37 + } + + declare noundef i64 @llvm.amdgcn.s.getpc() #1 + declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #3 + declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4 + + attributes #0 = { memory(readwrite) "amdgpu-flat-work-group-size"="64,64" "amdgpu-memory-bound"="false" "amdgpu-num-sgpr"="4294967295" "amdgpu-num-vgpr"="4294967295" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="1200" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="3" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "target-cpu"="gfx1030" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } + attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1030" } + attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx1030" } + attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) "target-cpu"="gfx1030" } + attributes #4 = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx1030" } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!12, !13} + + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "dxcoob 1.7.2308.16 (52da17e29)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3) + !1 = !DIFile(filename: "tests\\basic_var.hlsl", directory: "") + !2 = !{} + !3 = !{!4, !10} + !4 = distinct !DIGlobalVariableExpression(var: !5, expr: !DIExpression()) + !5 = !DIGlobalVariable(name: "u0", linkageName: "\01?u0@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 2, type: !6, isLocal: false, isDefinition: true) + !6 = !DICompositeType(tag: DW_TAG_class_type, name: "RWBuffer", file: !1, line: 2, size: 32, align: 32, elements: !2, templateParams: !7) + !7 = !{!8} + !8 = !DITemplateTypeParameter(name: "element", type: !9) + !9 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) + !10 = distinct !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) + !11 = !DIGlobalVariable(name: "u1", linkageName: "\01?u1@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 3, type: !6, isLocal: false, isDefinition: true) + !12 = !{i32 2, !"Dwarf Version", i32 5} + !13 = !{i32 2, !"Debug Info Version", i32 3} + !14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !15, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) + !15 = !DISubroutineType(types: !16) + !16 = !{null, !17} + !17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint3", file: !1, baseType: !18) + !18 = !DICompositeType(tag: DW_TAG_class_type, name: "vector", file: !1, size: 96, align: 32, elements: !19, templateParams: !24) + !19 = !{!20, !22, !23} + !20 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !18, file: !1, baseType: !21, size: 32, align: 32, flags: DIFlagPublic) + !21 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned) + !22 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 32, flags: DIFlagPublic) + !23 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 64, flags: DIFlagPublic) + !24 = !{!25, !26} + !25 = !DITemplateTypeParameter(name: "element", type: !21) + !26 = !DITemplateValueParameter(name: "element_count", type: !27, value: i32 3) + !27 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) + !28 = !DILocation(line: 7, column: 17, scope: !14) + !29 = !DILocalVariable(name: "dtid", arg: 1, scope: !14, file: !1, line: 7, type: !17) + !30 = !DILocation(line: 11, column: 18, scope: !14) + !31 = !DILocalVariable(name: "my_var", scope: !14, file: !1, line: 11, type: !9) + !32 = !DILocation(line: 11, column: 9, scope: !14) + !33 = !DILocation(line: 14, column: 26, scope: !14) + !34 = !DILocalVariable(name: "my_var2", scope: !14, file: !1, line: 14, type: !9) + !35 = !DILocation(line: 14, column: 9, scope: !14) + !36 = !DILocation(line: 17, column: 14, scope: !14) + !37 = !DILocation(line: 19, column: 1, scope: !14) +... +--- +name: _amdgpu_cs_main +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: true +isSSA: false +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHContTarget: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 4 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + isChainFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: true + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sgpr32' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr6' } + psInputAddr: 0 + psInputEnable: 0 + maxMemoryClusterDWords: 8 + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: false + fp32-output-denormals: false + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 16 + vgprForAGPRCopy: '' + sgprForEXECCopy: '$sgpr12_sgpr13' + longBranchReservedReg: '' + hasInitWholeWave: false + dynamicVGPRBlockSize: 0 + scratchReservedForDynamicVGPRs: 0 +body: | + bb.0 (%ir-block.0): + liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + + %8:vgpr_32 = COPY killed $vgpr2 + %7:vgpr_32 = COPY killed $vgpr1 + %6:vgpr_32 = COPY killed $vgpr0 + renamable $sgpr0 = COPY killed $sgpr4 + %39:vgpr_32 = IMPLICIT_DEF + %39:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 0, %39 + renamable $sgpr3 = COPY killed $sgpr2 + renamable $sgpr2 = COPY $sgpr1 + $sgpr1 = SI_RESTORE_S32_FROM_VGPR %39, 0 + dead renamable $sgpr4 = IMPLICIT_DEF + dead renamable $sgpr4 = IMPLICIT_DEF + dead renamable $sgpr4 = IMPLICIT_DEF + undef %38.sub0:vreg_96 = COPY %6 + %38.sub1:vreg_96 = COPY %7 + dead %38.sub2:vreg_96 = COPY %8 + undef renamable $sgpr4 = COPY renamable $sgpr3, implicit-def $sgpr4_sgpr5_sgpr6 + renamable $sgpr5 = COPY killed renamable $sgpr1 + renamable $sgpr6 = COPY killed renamable $sgpr0 + dead renamable $sgpr8_sgpr9_sgpr10 = IMPLICIT_DEF + renamable $sgpr0_sgpr1 = S_GETPC_B64_pseudo debug-location !28 + renamable $sgpr4 = S_MOV_B32 6 + %16:vgpr_32 = V_LSHL_ADD_U32_e64 killed $sgpr3, killed $sgpr4, %6, implicit $exec, debug-location !28 + DBG_VALUE %16, $noreg, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), debug-location !28 + renamable $sgpr3 = S_MOV_B32 -1 + renamable $sgpr4 = S_MOV_B32 0 + undef renamable $sgpr6 = COPY renamable $sgpr4, implicit-def $sgpr6_sgpr7 + renamable $sgpr7 = COPY killed renamable $sgpr3 + renamable $sgpr0_sgpr1 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $sgpr6_sgpr7, implicit-def dead $scc, debug-location !30 + renamable $sgpr5 = S_MOV_B32 0, debug-location !30 + undef renamable $sgpr2 = COPY killed renamable $sgpr2, implicit-def $sgpr2_sgpr3, debug-location !30 + renamable $sgpr3 = COPY killed renamable $sgpr5, debug-location !30 + renamable $sgpr0_sgpr1 = disjoint S_OR_B64 killed renamable $sgpr0_sgpr1, killed renamable $sgpr2_sgpr3, implicit-def dead $scc, debug-location !30 + renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, debug-location !30 :: (invariant load (s128) from %ir.7, align 4, addrspace 4) + renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 32, 0, debug-location !36 :: (invariant load (s128) from %ir.11, align 4, addrspace 4) + %26:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %16, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, renamable $sgpr4, 0, 0, 0, implicit $exec, debug-location !30 :: (dereferenceable load (s32), align 1, addrspace 8) + DBG_VALUE $noreg, $noreg, !31, !DIExpression(), debug-location !32 + %27:vgpr_32 = arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %26, 0, %26, 0, 0, implicit $mode, implicit $exec, debug-location !33 + DBG_VALUE %27, $noreg, !34, !DIExpression(), debug-location !35 + dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 + dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 + dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 + dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 + undef %37.sub0:vreg_128 = COPY %27, debug-location !36 + %37.sub1:vreg_128 = COPY %27, debug-location !36 + %37.sub2:vreg_128 = COPY %27, debug-location !36 + %37.sub3:vreg_128 = COPY %27, debug-location !36 + %29:vreg_128 = COPY %37, debug-location !36 + BUFFER_STORE_FORMAT_XYZW_IDXEN_exact %29, %16, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec, debug-location !36 :: (dereferenceable store (s128), align 1, addrspace 8) + S_ENDPGM 0, debug-location !37 +... From c938125a081f5335948c39a915555261eabdeda5 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Mon, 28 Jul 2025 14:50:18 -0700 Subject: [PATCH 04/11] Addressed feedback --- .../AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll index 2cff21c66172d..89fc6c062c29d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-llvm-debuginfo-analyzer.ll @@ -13,7 +13,6 @@ ; CHECK: {Code} 's_endpgm' source_filename = "module" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-p10:32:32-p11:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p32:32:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32" target triple = "amdgcn-amd-amdpal" %dx.types.ResRet.f32 = type { float, float, float, float, i32 } @@ -21,26 +20,26 @@ target triple = "amdgcn-amd-amdpal" define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !14 { %LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i64 0, !dbg !28 %WorkgroupId.i0 = extractelement <3 x i32> %WorkgroupId, i64 0, !dbg !28 - %1 = call i64 @llvm.amdgcn.s.getpc(), !dbg !28 - %2 = shl i32 %WorkgroupId.i0, 6, !dbg !28 - %3 = add i32 %LocalInvocationId.i0, %2, !dbg !28 - #dbg_value(i32 %3, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !28) - %4 = and i64 %1, -4294967296, !dbg !30 - %5 = zext i32 %userdata4 to i64, !dbg !30 - %6 = or disjoint i64 %4, %5, !dbg !30 - %7 = inttoptr i64 %6 to ptr addrspace(4), !dbg !30 - call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) %7, i32 4), "dereferenceable"(ptr addrspace(4) %7, i32 -1) ], !dbg !30 - %8 = load <4 x i32>, ptr addrspace(4) %7, align 4, !dbg !30, !invariant.load !2 - %9 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %8, i32 %3, i32 0, i32 0, i32 0), !dbg !30 + %pc = call i64 @llvm.amdgcn.s.getpc(), !dbg !28 + %offset = shl i32 %WorkgroupId.i0, 6, !dbg !28 + %dtid = add i32 %LocalInvocationId.i0, %offset, !dbg !28 + #dbg_value(i32 %dtid, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !28) + %pc_hi = and i64 %pc, -4294967296, !dbg !30 + %zext = zext i32 %userdata4 to i64, !dbg !30 + %ptr_val = or disjoint i64 %pc_hi, %zext, !dbg !30 + %ptr = inttoptr i64 %ptr_val to ptr addrspace(4), !dbg !30 + call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) %ptr, i32 4), "dereferenceable"(ptr addrspace(4) %ptr, i32 -1) ], !dbg !30 + %uav_0 = load <4 x i32>, ptr addrspace(4) %ptr, align 4, !dbg !30, !invariant.load !2 + %uav_load_1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %uav_0, i32 %dtid, i32 0, i32 0, i32 0), !dbg !30 #dbg_value(%dx.types.ResRet.f32 poison, !31, !DIExpression(), !32) - %10 = fmul reassoc arcp contract afn float %9, 2.000000e+00, !dbg !33 - #dbg_value(float %10, !34, !DIExpression(), !35) - call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) %7, i32 4), "dereferenceable"(ptr addrspace(4) %7, i32 -1) ], !dbg !36 - %11 = getelementptr i8, ptr addrspace(4) %7, i64 32, !dbg !36 - %.upto01 = insertelement <4 x float> poison, float %10, i64 0, !dbg !36 - %12 = shufflevector <4 x float> %.upto01, <4 x float> poison, <4 x i32> zeroinitializer, !dbg !36 - %13 = load <4 x i32>, ptr addrspace(4) %11, align 4, !dbg !36, !invariant.load !2 - call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %12, <4 x i32> %13, i32 %3, i32 0, i32 0, i32 0), !dbg !36 + %mul = fmul reassoc arcp contract afn float %uav_load_1, 2.000000e+00, !dbg !33 + #dbg_value(float %mul, !34, !DIExpression(), !35) + call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) %ptr, i32 4), "dereferenceable"(ptr addrspace(4) %ptr, i32 -1) ], !dbg !36 + %uav_1_ptr = getelementptr i8, ptr addrspace(4) %ptr, i64 32, !dbg !36 + %.upto01 = insertelement <4 x float> poison, float %mul, i64 0, !dbg !36 + %filled_vector = shufflevector <4 x float> %.upto01, <4 x float> poison, <4 x i32> zeroinitializer, !dbg !36 + %uav_1 = load <4 x i32>, ptr addrspace(4) %uav_1_ptr, align 4, !dbg !36, !invariant.load !2 + call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %filled_vector, <4 x i32> %uav_1, i32 %dtid, i32 0, i32 0, i32 0), !dbg !36 ret void, !dbg !37 } @@ -52,7 +51,7 @@ declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32 declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4 -attributes #0 = { memory(readwrite) "amdgpu-flat-work-group-size"="64,64" "amdgpu-memory-bound"="false" "amdgpu-num-sgpr"="4294967295" "amdgpu-num-vgpr"="4294967295" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="1200" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="3" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } +attributes #0 = { memory(readwrite) } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) } From dea0f42bccae023b525163eb57e293a9f8dfd8c4 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Thu, 7 Aug 2025 16:08:13 -0700 Subject: [PATCH 05/11] Removed the assert that the instruction that $noreg is part of has to be a debug inst --- llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index f807c567efa2f..38d9a4b82daee 100644 --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -130,10 +130,8 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { if (VirtReg.isPhysical()) continue; - if (!VirtReg.isValid()) { - assert(MI.isDebugInstr() && "non-debug use of noreg"); + if (!VirtReg.isValid()) continue; - } if (!VRM->hasPhys(VirtReg)) continue; From af4f40e0905be9d32b754c79d2924a700e637b11 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Fri, 8 Aug 2025 16:09:17 -0700 Subject: [PATCH 06/11] Further reduced test. --- .../si-pre-allocate-wwwmregs-dbg-noreg.mir | 169 +++++------------- 1 file changed, 45 insertions(+), 124 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir index 4b5fea863289b..b825d3b42998d 100644 --- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir @@ -2,90 +2,50 @@ # Simple regression test to make sure DBG_VALUE $noreg does not assert in the pass -# CHECK: S_ENDPGM +# CHECK: $vgpr0 = IMPLICIT_DEF +# CHECK: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 0, $vgpr0 --- | - source_filename = "module" - target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdpal" - %dx.types.ResRet.f32 = type { float, float, float, float, i32 } - - define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !14 { - %LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i64 0, !dbg !28 - %WorkgroupId.i0 = extractelement <3 x i32> %WorkgroupId, i64 0, !dbg !28 - %1 = call i64 @llvm.amdgcn.s.getpc(), !dbg !28 - %2 = shl i32 %WorkgroupId.i0, 6, !dbg !28 - %3 = add i32 %LocalInvocationId.i0, %2, !dbg !28 - #dbg_value(i32 %3, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !28) - %4 = and i64 %1, -4294967296, !dbg !30 - %5 = zext i32 %userdata4 to i64, !dbg !30 - %6 = or disjoint i64 %4, %5, !dbg !30 - %7 = inttoptr i64 %6 to ptr addrspace(4), !dbg !30, !amdgpu.uniform !2 - %8 = load <4 x i32>, ptr addrspace(4) %7, align 4, !dbg !30, !invariant.load !2 - %9 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %8, i32 %3, i32 0, i32 0, i32 0), !dbg !30 - #dbg_value(%dx.types.ResRet.f32 poison, !31, !DIExpression(), !32) - %10 = fmul reassoc arcp contract afn float %9, 2.000000e+00, !dbg !33 - #dbg_value(float %10, !34, !DIExpression(), !35) - %11 = getelementptr i8, ptr addrspace(4) %7, i64 32, !dbg !36, !amdgpu.uniform !2 - %.upto01 = insertelement <4 x float> poison, float %10, i64 0, !dbg !36 - %12 = shufflevector <4 x float> %.upto01, <4 x float> poison, <4 x i32> zeroinitializer, !dbg !36 - %13 = load <4 x i32>, ptr addrspace(4) %11, align 4, !dbg !36, !invariant.load !2 - call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %12, <4 x i32> %13, i32 %3, i32 0, i32 0, i32 0), !dbg !36 - ret void, !dbg !37 + + ; Function Attrs: memory(readwrite) + define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !5 { + #dbg_value(i32 poison, !19, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !20) + #dbg_value(%dx.types.ResRet.f32 poison, !21, !DIExpression(), !23) + ret void, !dbg !24 } - - declare noundef i64 @llvm.amdgcn.s.getpc() #1 - declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #3 - declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4 - - attributes #0 = { memory(readwrite) "amdgpu-flat-work-group-size"="64,64" "amdgpu-memory-bound"="false" "amdgpu-num-sgpr"="4294967295" "amdgpu-num-vgpr"="4294967295" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="1200" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="3" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "target-cpu"="gfx1030" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } - attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1030" } - attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx1030" } - attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) "target-cpu"="gfx1030" } - attributes #4 = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx1030" } - + + attributes #0 = { memory(readwrite) "amdgpu-prealloc-sgpr-spill-vgprs" } + !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!12, !13} - - !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "dxcoob 1.7.2308.16 (52da17e29)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3) + !llvm.module.flags = !{!3, !4} + + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "dxcoob 1.7.2308.16 (52da17e29)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2) !1 = !DIFile(filename: "tests\\basic_var.hlsl", directory: "") !2 = !{} - !3 = !{!4, !10} - !4 = distinct !DIGlobalVariableExpression(var: !5, expr: !DIExpression()) - !5 = !DIGlobalVariable(name: "u0", linkageName: "\01?u0@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 2, type: !6, isLocal: false, isDefinition: true) - !6 = !DICompositeType(tag: DW_TAG_class_type, name: "RWBuffer", file: !1, line: 2, size: 32, align: 32, elements: !2, templateParams: !7) - !7 = !{!8} - !8 = !DITemplateTypeParameter(name: "element", type: !9) - !9 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) - !10 = distinct !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) - !11 = !DIGlobalVariable(name: "u1", linkageName: "\01?u1@@3V?$RWBuffer@M@@A", scope: !0, file: !1, line: 3, type: !6, isLocal: false, isDefinition: true) - !12 = !{i32 2, !"Dwarf Version", i32 5} - !13 = !{i32 2, !"Debug Info Version", i32 3} - !14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !15, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) - !15 = !DISubroutineType(types: !16) - !16 = !{null, !17} - !17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint3", file: !1, baseType: !18) - !18 = !DICompositeType(tag: DW_TAG_class_type, name: "vector", file: !1, size: 96, align: 32, elements: !19, templateParams: !24) - !19 = !{!20, !22, !23} - !20 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !18, file: !1, baseType: !21, size: 32, align: 32, flags: DIFlagPublic) - !21 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned) - !22 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 32, flags: DIFlagPublic) - !23 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 64, flags: DIFlagPublic) - !24 = !{!25, !26} - !25 = !DITemplateTypeParameter(name: "element", type: !21) - !26 = !DITemplateValueParameter(name: "element_count", type: !27, value: i32 3) - !27 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) - !28 = !DILocation(line: 7, column: 17, scope: !14) - !29 = !DILocalVariable(name: "dtid", arg: 1, scope: !14, file: !1, line: 7, type: !17) - !30 = !DILocation(line: 11, column: 18, scope: !14) - !31 = !DILocalVariable(name: "my_var", scope: !14, file: !1, line: 11, type: !9) - !32 = !DILocation(line: 11, column: 9, scope: !14) - !33 = !DILocation(line: 14, column: 26, scope: !14) - !34 = !DILocalVariable(name: "my_var2", scope: !14, file: !1, line: 14, type: !9) - !35 = !DILocation(line: 14, column: 9, scope: !14) - !36 = !DILocation(line: 17, column: 14, scope: !14) - !37 = !DILocation(line: 19, column: 1, scope: !14) + !3 = !{i32 2, !"Dwarf Version", i32 5} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !6, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) + !6 = !DISubroutineType(types: !7) + !7 = !{null, !8} + !8 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint3", file: !1, baseType: !9) + !9 = !DICompositeType(tag: DW_TAG_class_type, name: "vector", file: !1, size: 96, align: 32, elements: !10, templateParams: !15) + !10 = !{!11, !13, !14} + !11 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !9, file: !1, baseType: !12, size: 32, align: 32, flags: DIFlagPublic) + !12 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned) + !13 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !9, file: !1, baseType: !12, size: 32, align: 32, offset: 32, flags: DIFlagPublic) + !14 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !9, file: !1, baseType: !12, size: 32, align: 32, offset: 64, flags: DIFlagPublic) + !15 = !{!16, !17} + !16 = !DITemplateTypeParameter(name: "element", type: !12) + !17 = !DITemplateValueParameter(name: "element_count", type: !18, value: i32 3) + !18 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) + !19 = !DILocalVariable(name: "dtid", arg: 1, scope: !5, file: !1, line: 7, type: !8) + !20 = !DILocation(line: 7, column: 17, scope: !5) + !21 = !DILocalVariable(name: "my_var", scope: !5, file: !1, line: 11, type: !22) + !22 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) + !23 = !DILocation(line: 11, column: 9, scope: !5) + !24 = !DILocation(line: 19, column: 1, scope: !5) ... --- name: _amdgpu_cs_main @@ -110,6 +70,7 @@ isOutlined: false debugInstrRef: false failsVerification: false tracksDebugUserValues: false +liveins: [] fixedStack: [] stack: [] entry_values: [] @@ -157,54 +118,14 @@ machineFunctionInfo: body: | bb.0 (%ir-block.0): liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - - %8:vgpr_32 = COPY killed $vgpr2 - %7:vgpr_32 = COPY killed $vgpr1 - %6:vgpr_32 = COPY killed $vgpr0 - renamable $sgpr0 = COPY killed $sgpr4 - %39:vgpr_32 = IMPLICIT_DEF - %39:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 0, %39 + + %0:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 0, %2 renamable $sgpr3 = COPY killed $sgpr2 - renamable $sgpr2 = COPY $sgpr1 - $sgpr1 = SI_RESTORE_S32_FROM_VGPR %39, 0 - dead renamable $sgpr4 = IMPLICIT_DEF - dead renamable $sgpr4 = IMPLICIT_DEF - dead renamable $sgpr4 = IMPLICIT_DEF - undef %38.sub0:vreg_96 = COPY %6 - %38.sub1:vreg_96 = COPY %7 - dead %38.sub2:vreg_96 = COPY %8 - undef renamable $sgpr4 = COPY renamable $sgpr3, implicit-def $sgpr4_sgpr5_sgpr6 - renamable $sgpr5 = COPY killed renamable $sgpr1 - renamable $sgpr6 = COPY killed renamable $sgpr0 - dead renamable $sgpr8_sgpr9_sgpr10 = IMPLICIT_DEF - renamable $sgpr0_sgpr1 = S_GETPC_B64_pseudo debug-location !28 renamable $sgpr4 = S_MOV_B32 6 - %16:vgpr_32 = V_LSHL_ADD_U32_e64 killed $sgpr3, killed $sgpr4, %6, implicit $exec, debug-location !28 - DBG_VALUE %16, $noreg, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), debug-location !28 - renamable $sgpr3 = S_MOV_B32 -1 - renamable $sgpr4 = S_MOV_B32 0 - undef renamable $sgpr6 = COPY renamable $sgpr4, implicit-def $sgpr6_sgpr7 - renamable $sgpr7 = COPY killed renamable $sgpr3 - renamable $sgpr0_sgpr1 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $sgpr6_sgpr7, implicit-def dead $scc, debug-location !30 - renamable $sgpr5 = S_MOV_B32 0, debug-location !30 - undef renamable $sgpr2 = COPY killed renamable $sgpr2, implicit-def $sgpr2_sgpr3, debug-location !30 - renamable $sgpr3 = COPY killed renamable $sgpr5, debug-location !30 - renamable $sgpr0_sgpr1 = disjoint S_OR_B64 killed renamable $sgpr0_sgpr1, killed renamable $sgpr2_sgpr3, implicit-def dead $scc, debug-location !30 - renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, debug-location !30 :: (invariant load (s128) from %ir.7, align 4, addrspace 4) - renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 32, 0, debug-location !36 :: (invariant load (s128) from %ir.11, align 4, addrspace 4) - %26:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %16, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, renamable $sgpr4, 0, 0, 0, implicit $exec, debug-location !30 :: (dereferenceable load (s32), align 1, addrspace 8) - DBG_VALUE $noreg, $noreg, !31, !DIExpression(), debug-location !32 - %27:vgpr_32 = arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %26, 0, %26, 0, 0, implicit $mode, implicit $exec, debug-location !33 - DBG_VALUE %27, $noreg, !34, !DIExpression(), debug-location !35 - dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 - dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 - dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 - dead renamable $sgpr5 = IMPLICIT_DEF debug-location !36 - undef %37.sub0:vreg_128 = COPY %27, debug-location !36 - %37.sub1:vreg_128 = COPY %27, debug-location !36 - %37.sub2:vreg_128 = COPY %27, debug-location !36 - %37.sub3:vreg_128 = COPY %27, debug-location !36 - %29:vreg_128 = COPY %37, debug-location !36 - BUFFER_STORE_FORMAT_XYZW_IDXEN_exact %29, %16, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec, debug-location !36 :: (dereferenceable store (s128), align 1, addrspace 8) - S_ENDPGM 0, debug-location !37 + %3:vgpr_32 = V_LSHL_ADD_U32_e64 killed $sgpr3, killed $sgpr4, %0, implicit $exec + DBG_VALUE %3, $noreg, !19, !DIExpression(DW_OP_LLVM_fragment, 0, 32), debug-location !20 + DBG_VALUE $noreg, $noreg, !21, !DIExpression(), debug-location !23 + S_ENDPGM 0 ... From bce027af9174ca7ea6c2b436e170418423df26c3 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Tue, 12 Aug 2025 14:28:19 -0700 Subject: [PATCH 07/11] Update llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir Co-authored-by: Matt Arsenault --- .../CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir | 6 ------ 1 file changed, 6 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir index b825d3b42998d..b65ae0d37276f 100644 --- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir @@ -49,12 +49,6 @@ ... --- name: _amdgpu_cs_main -alignment: 1 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false tracksRegLiveness: true hasWinCFI: false noPhis: true From bdcbdc2fd3705b4bd41f3b516ceee4cc5d10b280 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Tue, 12 Aug 2025 14:28:32 -0700 Subject: [PATCH 08/11] Update llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir Co-authored-by: Matt Arsenault --- llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir index b65ae0d37276f..decc604acb755 100644 --- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir @@ -9,7 +9,6 @@ target triple = "amdgcn-amd-amdpal" %dx.types.ResRet.f32 = type { float, float, float, float, i32 } - ; Function Attrs: memory(readwrite) define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !5 { #dbg_value(i32 poison, !19, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !20) #dbg_value(%dx.types.ResRet.f32 poison, !21, !DIExpression(), !23) From 70dd9534070abd02a9c90903147ee189202636b3 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Tue, 12 Aug 2025 14:29:24 -0700 Subject: [PATCH 09/11] Update llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir Co-authored-by: Matt Arsenault --- .../si-pre-allocate-wwwmregs-dbg-noreg.mir | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir index decc604acb755..6f30b6e835bb0 100644 --- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir @@ -49,27 +49,6 @@ --- name: _amdgpu_cs_main tracksRegLiveness: true -hasWinCFI: false -noPhis: true -isSSA: false -noVRegs: false -hasFakeUses: false -callsEHReturn: false -callsUnwindInit: false -hasEHContTarget: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false -liveins: [] -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] machineFunctionInfo: explicitKernArgSize: 0 maxKernArgAlign: 4 From 9e014b81d36842232b5499d9b79de1f7cb9d3050 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Tue, 12 Aug 2025 14:32:33 -0700 Subject: [PATCH 10/11] Update llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir Co-authored-by: Matt Arsenault --- .../si-pre-allocate-wwwmregs-dbg-noreg.mir | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir index 6f30b6e835bb0..ab3dc841db9be 100644 --- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir @@ -50,36 +50,9 @@ name: _amdgpu_cs_main tracksRegLiveness: true machineFunctionInfo: - explicitKernArgSize: 0 - maxKernArgAlign: 4 - ldsSize: 0 - gdsSize: 0 - dynLDSAlign: 1 isEntryFunction: true - isChainFunction: false - noSignedZerosFPMath: false - memoryBound: false - waveLimiter: false hasSpilledSGPRs: true - hasSpilledVGPRs: false - scratchRSrcReg: '$private_rsrc_reg' - frameOffsetReg: '$fp_reg' stackPtrOffsetReg: '$sgpr32' - bytesInStackArgArea: 0 - returnsVoid: true - argumentInfo: - privateSegmentWaveByteOffset: { reg: '$sgpr6' } - psInputAddr: 0 - psInputEnable: 0 - maxMemoryClusterDWords: 8 - mode: - ieee: false - dx10-clamp: true - fp32-input-denormals: false - fp32-output-denormals: false - fp64-fp16-input-denormals: true - fp64-fp16-output-denormals: true - highBitsOf32BitAddress: 0 occupancy: 16 vgprForAGPRCopy: '' sgprForEXECCopy: '$sgpr12_sgpr13' From f54cb3d28e5779214a22a2e5b9adbcf93d83caa4 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Tue, 12 Aug 2025 14:32:54 -0700 Subject: [PATCH 11/11] Update llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir Co-authored-by: Matt Arsenault --- .../CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir index ab3dc841db9be..cb515f880d0a0 100644 --- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwwmregs-dbg-noreg.mir @@ -54,12 +54,7 @@ machineFunctionInfo: hasSpilledSGPRs: true stackPtrOffsetReg: '$sgpr32' occupancy: 16 - vgprForAGPRCopy: '' sgprForEXECCopy: '$sgpr12_sgpr13' - longBranchReservedReg: '' - hasInitWholeWave: false - dynamicVGPRBlockSize: 0 - scratchReservedForDynamicVGPRs: 0 body: | bb.0 (%ir-block.0): liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2