@@ -268,6 +268,20 @@ define <vscale x 2 x bfloat> @ld1_nxv2bf16(ptr %addr, i64 %off) {
268
268
ret <vscale x 2 x bfloat> %val
269
269
}
270
270
271
+ ; Ensure we don't lose the free shift when using indexed addressing.
272
+ define <vscale x 2 x bfloat> @ld1_nxv2bf16_double_shift (ptr %addr , i64 %off ) {
273
+ ; CHECK-LABEL: ld1_nxv2bf16_double_shift:
274
+ ; CHECK: // %bb.0:
275
+ ; CHECK-NEXT: ptrue p0.d
276
+ ; CHECK-NEXT: lsr x8, x1, #6
277
+ ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1]
278
+ ; CHECK-NEXT: ret
279
+ %off2 = lshr i64 %off , 6
280
+ %ptr = getelementptr inbounds bfloat, ptr %addr , i64 %off2
281
+ %val = load volatile <vscale x 2 x bfloat>, ptr %ptr
282
+ ret <vscale x 2 x bfloat> %val
283
+ }
284
+
271
285
; LD1W
272
286
273
287
define <vscale x 4 x i32 > @ld1_nxv4i32 (ptr %addr , i64 %off ) {
@@ -327,6 +341,20 @@ define <vscale x 2 x float> @ld1_nxv2f32(ptr %addr, i64 %off) {
327
341
ret <vscale x 2 x float > %val
328
342
}
329
343
344
+ ; Ensure we don't lose the free shift when using indexed addressing.
345
+ define <vscale x 2 x float > @ld1_nxv2f32_double_shift (ptr %addr , i64 %off ) {
346
+ ; CHECK-LABEL: ld1_nxv2f32_double_shift:
347
+ ; CHECK: // %bb.0:
348
+ ; CHECK-NEXT: ptrue p0.d
349
+ ; CHECK-NEXT: lsr x8, x1, #6
350
+ ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
351
+ ; CHECK-NEXT: ret
352
+ %off2 = lshr i64 %off , 6
353
+ %ptr = getelementptr inbounds float , ptr %addr , i64 %off2
354
+ %val = load volatile <vscale x 2 x float >, ptr %ptr
355
+ ret <vscale x 2 x float > %val
356
+ }
357
+
330
358
; LD1D
331
359
332
360
define <vscale x 2 x i64 > @ld1_nxv2i64 (ptr %addr , i64 %off ) {
@@ -350,3 +378,17 @@ define <vscale x 2 x double> @ld1_nxv2f64(ptr %addr, i64 %off) {
350
378
%val = load volatile <vscale x 2 x double >, ptr %ptr
351
379
ret <vscale x 2 x double > %val
352
380
}
381
+
382
+ ; Ensure we don't lose the free shift when using indexed addressing.
383
+ define <vscale x 2 x double > @ld1_nxv2f64_double_shift (ptr %addr , i64 %off ) {
384
+ ; CHECK-LABEL: ld1_nxv2f64_double_shift:
385
+ ; CHECK: // %bb.0:
386
+ ; CHECK-NEXT: ptrue p0.d
387
+ ; CHECK-NEXT: lsr x8, x1, #6
388
+ ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
389
+ ; CHECK-NEXT: ret
390
+ %off2 = lshr i64 %off , 6
391
+ %ptr = getelementptr inbounds double , ptr %addr , i64 %off2
392
+ %val = load volatile <vscale x 2 x double >, ptr %ptr
393
+ ret <vscale x 2 x double > %val
394
+ }
0 commit comments