Skip to content

Commit 4ab1468

Browse files
authored
[AMDGPU] Narrow only on store to pow of 2 mem location (#150093)
Lowering in GlobalISel for AMDGPU previously always narrows to i32 on truncating store regardless of mem size or scalar size, causing issues with types like i65 which is first extended to i128 then stored as i64 + i8 to i128 locations. Narrowing only on store to pow of 2 mem location ensures only narrowing to mem size near end of legalization. This LLVM defect was identified via the AMD Fuzzing project.
1 parent 7c53c61 commit 4ab1468

File tree

4 files changed

+299
-47
lines changed

4 files changed

+299
-47
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "llvm/ADT/ScopeExit.h"
2727
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
2828
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
29+
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
2930
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
3031
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
3132
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -137,6 +138,14 @@ static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
137138
};
138139
}
139140

141+
// Retrieves the scalar type that's the same size as the mem desc
142+
static LegalizeMutation getScalarTypeFromMemDesc(unsigned TypeIdx) {
143+
return [=](const LegalityQuery &Query) {
144+
unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
145+
return std::make_pair(TypeIdx, LLT::scalar(MemSize));
146+
};
147+
}
148+
140149
// Increase the number of vector elements to reach the next legal RegClass.
141150
static LegalizeMutation moreElementsToNextExistingRegClass(unsigned TypeIdx) {
142151
return [=](const LegalityQuery &Query) {
@@ -384,6 +393,16 @@ static LegalityPredicate isWideScalarExtLoadTruncStore(unsigned TypeIdx) {
384393
};
385394
}
386395

396+
// If we have a truncating store or an extending load with a data size larger
397+
// than 32-bits and mem location is a power of 2
398+
static LegalityPredicate isTruncStoreToSizePowerOf2(unsigned TypeIdx) {
399+
return [=](const LegalityQuery &Query) {
400+
unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
401+
return isWideScalarExtLoadTruncStore(TypeIdx)(Query) &&
402+
isPowerOf2_64(MemSize);
403+
};
404+
}
405+
387406
// TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
388407
// handle some operations by just promoting the register during
389408
// selection. There are also d16 loads on GFX9+ which preserve the high bits.
@@ -1635,11 +1654,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
16351654
// May need relegalization for the scalars.
16361655
return std::pair(0, EltTy);
16371656
})
1638-
.minScalar(0, S32)
1639-
.narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, S32))
1640-
.widenScalarToNextPow2(0)
1641-
.moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
1642-
.lower();
1657+
.minScalar(0, S32)
1658+
.narrowScalarIf(isTruncStoreToSizePowerOf2(0),
1659+
getScalarTypeFromMemDesc(0))
1660+
.widenScalarToNextPow2(0)
1661+
.moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
1662+
.lower();
16431663
}
16441664

16451665
// FIXME: Unaligned accesses not lowered.

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir

Lines changed: 46 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -886,33 +886,34 @@ body: |
886886
; SI-NEXT: {{ $}}
887887
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
888888
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
889-
; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
889+
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
890890
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
891-
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
891+
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
892892
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
893893
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
894-
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
894+
; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
895+
; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
895896
; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
896-
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
897+
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32)
897898
; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
898899
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
899900
; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
900901
; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
901-
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
902+
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
902903
; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32)
903904
; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
904905
; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C6]](s64)
905-
; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
906+
; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
906907
; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
907-
; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
908-
; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32)
908+
; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
909+
; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32)
909910
; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C6]](s64)
910911
; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
911912
; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
912913
; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
913-
; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
914+
; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
914915
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C5]]
915-
; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32)
916+
; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32)
916917
; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C6]](s64)
917918
; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
918919
; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
@@ -922,11 +923,12 @@ body: |
922923
; CI-NEXT: {{ $}}
923924
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
924925
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
925-
; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
926+
; CI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
926927
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
927-
; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
928+
; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
928929
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
929930
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
931+
; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
930932
; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
931933
; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
932934
; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1)
@@ -936,22 +938,23 @@ body: |
936938
; VI-NEXT: {{ $}}
937939
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
938940
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
939-
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
941+
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
940942
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
941-
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
943+
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
942944
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
943945
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
944-
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
946+
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
947+
; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
945948
; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
946-
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
949+
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32)
947950
; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
948951
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
949-
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64)
952+
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64)
950953
; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
951954
; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16)
952955
; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
953956
; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
954-
; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
957+
; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
955958
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
956959
; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
957960
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
@@ -960,11 +963,11 @@ body: |
960963
; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
961964
; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
962965
; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
963-
; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
964-
; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64)
965-
; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16)
966+
; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64)
967+
; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
968+
; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C4]](s16)
966969
; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C5]](s64)
967-
; VI-NEXT: G_STORE [[TRUNC3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
970+
; VI-NEXT: G_STORE [[TRUNC4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
968971
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
969972
; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
970973
;
@@ -973,11 +976,12 @@ body: |
973976
; GFX9-NEXT: {{ $}}
974977
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
975978
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
976-
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
979+
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
977980
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
978-
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
981+
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
979982
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
980983
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
984+
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
981985
; GFX9-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
982986
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
983987
; GFX9-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1)
@@ -998,17 +1002,18 @@ body: |
9981002
; SI-NEXT: {{ $}}
9991003
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
10001004
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
1001-
; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
1005+
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
10021006
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
1003-
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
1007+
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
10041008
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
10051009
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
1006-
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
1010+
; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
1011+
; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
10071012
; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1008-
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
1013+
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32)
10091014
; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
10101015
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
1011-
; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
1016+
; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
10121017
; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
10131018
; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
10141019
; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
@@ -1018,11 +1023,12 @@ body: |
10181023
; CI-NEXT: {{ $}}
10191024
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
10201025
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
1021-
; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
1026+
; CI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
10221027
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
1023-
; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
1028+
; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
10241029
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
10251030
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
1031+
; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
10261032
; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
10271033
; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
10281034
; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
@@ -1032,17 +1038,18 @@ body: |
10321038
; VI-NEXT: {{ $}}
10331039
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
10341040
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
1035-
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
1041+
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
10361042
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
1037-
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
1043+
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
10381044
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
10391045
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
1040-
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
1046+
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
1047+
; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
10411048
; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1042-
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
1049+
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32)
10431050
; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
10441051
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
1045-
; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
1052+
; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
10461053
; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
10471054
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
10481055
; VI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
@@ -1052,11 +1059,12 @@ body: |
10521059
; GFX9-NEXT: {{ $}}
10531060
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
10541061
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
1055-
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
1062+
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
10561063
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
1057-
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
1064+
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
10581065
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
10591066
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
1067+
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
10601068
; GFX9-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
10611069
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
10621070
; GFX9-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -285,13 +285,13 @@ body: |
285285
; VI-NEXT: {{ $}}
286286
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
287287
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
288-
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
289-
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64)
288+
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64)
289+
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
290290
; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
291-
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C]](s16)
291+
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
292292
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
293293
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
294-
; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
294+
; VI-NEXT: G_STORE [[TRUNC1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
295295
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
296296
; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
297297
%0:_(p1) = COPY $vgpr0_vgpr1

0 commit comments

Comments
 (0)