Skip to content

Commit 94aa08a

Browse files
[LLVM][CodeGen][SVE] Don't combine shifts at the expense of addressing modes. (#149873)
Fixes #149654
1 parent d52675e commit 94aa08a

File tree

3 files changed

+65
-5
lines changed

3 files changed

+65
-5
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18022,11 +18022,14 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
1802218022
unsigned ShlAmt = C2->getZExtValue();
1802318023
if (auto ShouldADD = *N->user_begin();
1802418024
ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) {
18025-
if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
18026-
unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
18027-
if ((1ULL << ShlAmt) == ByteVT &&
18028-
isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT()))
18029-
return false;
18025+
if (auto Load = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
18026+
EVT MemVT = Load->getMemoryVT();
18027+
18028+
if (Load->getValueType(0).isScalableVector())
18029+
return (8ULL << ShlAmt) != MemVT.getScalarSizeInBits();
18030+
18031+
if (isIndexedLoadLegal(ISD::PRE_INC, MemVT))
18032+
return (8ULL << ShlAmt) != MemVT.getFixedSizeInBits();
1803018033
}
1803118034
}
1803218035
}

llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,18 @@ entry:
136136
%0 = load i64, ptr %arrayidx, align 8
137137
ret i64 %0
138138
}
139+
140+
define <2 x i64> @loadv2i64_shr1(i64 %a, i64 %b, ptr %table) {
141+
; CHECK-LABEL: loadv2i64_shr1:
142+
; CHECK: // %bb.0: // %entry
143+
; CHECK-NEXT: mul x8, x1, x0
144+
; CHECK-NEXT: lsr x8, x8, #1
145+
; CHECK-NEXT: ldr q0, [x2, x8, lsl #4]
146+
; CHECK-NEXT: ret
147+
entry:
148+
%mul = mul i64 %b, %a
149+
%shr = lshr i64 %mul, 1
150+
%arrayidx = getelementptr inbounds <2 x i64>, ptr %table, i64 %shr
151+
%0 = load <2 x i64>, ptr %arrayidx, align 16
152+
ret <2 x i64> %0
153+
}

llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,20 @@ define <vscale x 2 x bfloat> @ld1_nxv2bf16(ptr %addr, i64 %off) {
268268
ret <vscale x 2 x bfloat> %val
269269
}
270270

271+
; Ensure we don't lose the free shift when using indexed addressing.
272+
define <vscale x 2 x bfloat> @ld1_nxv2bf16_double_shift(ptr %addr, i64 %off) {
273+
; CHECK-LABEL: ld1_nxv2bf16_double_shift:
274+
; CHECK: // %bb.0:
275+
; CHECK-NEXT: ptrue p0.d
276+
; CHECK-NEXT: lsr x8, x1, #6
277+
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1]
278+
; CHECK-NEXT: ret
279+
%off2 = lshr i64 %off, 6
280+
%ptr = getelementptr inbounds bfloat, ptr %addr, i64 %off2
281+
%val = load volatile <vscale x 2 x bfloat>, ptr %ptr
282+
ret <vscale x 2 x bfloat> %val
283+
}
284+
271285
; LD1W
272286

273287
define <vscale x 4 x i32> @ld1_nxv4i32(ptr %addr, i64 %off) {
@@ -327,6 +341,20 @@ define <vscale x 2 x float> @ld1_nxv2f32(ptr %addr, i64 %off) {
327341
ret <vscale x 2 x float> %val
328342
}
329343

344+
; Ensure we don't lose the free shift when using indexed addressing.
345+
define <vscale x 2 x float> @ld1_nxv2f32_double_shift(ptr %addr, i64 %off) {
346+
; CHECK-LABEL: ld1_nxv2f32_double_shift:
347+
; CHECK: // %bb.0:
348+
; CHECK-NEXT: ptrue p0.d
349+
; CHECK-NEXT: lsr x8, x1, #6
350+
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
351+
; CHECK-NEXT: ret
352+
%off2 = lshr i64 %off, 6
353+
%ptr = getelementptr inbounds float, ptr %addr, i64 %off2
354+
%val = load volatile <vscale x 2 x float>, ptr %ptr
355+
ret <vscale x 2 x float> %val
356+
}
357+
330358
; LD1D
331359

332360
define <vscale x 2 x i64> @ld1_nxv2i64(ptr %addr, i64 %off) {
@@ -350,3 +378,17 @@ define <vscale x 2 x double> @ld1_nxv2f64(ptr %addr, i64 %off) {
350378
%val = load volatile <vscale x 2 x double>, ptr %ptr
351379
ret <vscale x 2 x double> %val
352380
}
381+
382+
; Ensure we don't lose the free shift when using indexed addressing.
383+
define <vscale x 2 x double> @ld1_nxv2f64_double_shift(ptr %addr, i64 %off) {
384+
; CHECK-LABEL: ld1_nxv2f64_double_shift:
385+
; CHECK: // %bb.0:
386+
; CHECK-NEXT: ptrue p0.d
387+
; CHECK-NEXT: lsr x8, x1, #6
388+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
389+
; CHECK-NEXT: ret
390+
%off2 = lshr i64 %off, 6
391+
%ptr = getelementptr inbounds double, ptr %addr, i64 %off2
392+
%val = load volatile <vscale x 2 x double>, ptr %ptr
393+
ret <vscale x 2 x double> %val
394+
}

0 commit comments

Comments
 (0)