Skip to content

Commit 664f9ef

Browse files
author
Martien de Jong
committed
[AIE][WAWRewriter] Auto select latency/swpaware and swpaware bias
1 parent a5f9660 commit 664f9ef

File tree

1 file changed

+12
-13
lines changed

1 file changed

+12
-13
lines changed

llvm/test/CodeGen/AIE/aie2/end-to-end/TanhTemplated-swp.ll

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
; (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
88

99
; RUN: llc -O2 -mtriple=aie2 \
10-
; RUN: --aie-reg-rewrite-mode=latencyaware \
1110
; RUN: %s -o - | FileCheck %s
1211

1312
; The test is meant as a quick way to spot QoR regressions. In this test, the
@@ -112,22 +111,22 @@ define dso_local void @TanhTemplated(ptr noalias %ifm, ptr noalias %ofm, ptr non
112111
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
113112
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vband x9, x10, x3; nopv
114113
; CHECK-NEXT: vldb wl7, [p0], #32; vmov wh3, wl2
115-
; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh5, x7, x0, r1
116-
; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh4, bmh0, x3, x4, r1
114+
; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh4, x7, x0, r1
115+
; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh3, bmh0, x3, x4, r1
117116
; CHECK-NEXT: vband x9, x10, x5; vmul.f bmh2, x6, x9, r1
118-
; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh6, x7, x0, r1
119-
; CHECK-NEXT: vsub.f bml0, bmh5, bmh1, r0
120-
; CHECK-NEXT: vmul.f bmh3, x6, x9, r1
121-
; CHECK-NEXT: vmul.f bmh7, x0, x7, r1
122-
; CHECK-NEXT: vmov wh5, wl2; vsub.f bml1, bmh6, bmh1, r0
117+
; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh5, x7, x0, r1
118+
; CHECK-NEXT: vsub.f bml1, bmh4, bmh1, r0
119+
; CHECK-NEXT: vmul.f bmh7, x6, x9, r1
120+
; CHECK-NEXT: vmul.f bmh6, x0, x7, r1
121+
; CHECK-NEXT: vmov wh5, wl2; vsub.f bml2, bmh5, bmh1, r0
123122
; CHECK-NEXT: vconv.bf16.fp32 wl7, bmh2; vmul.f bmh8, x0, x7, r1
124-
; CHECK-NEXT: vmac.f bml2, bmh0, x5, x4, r1
125-
; CHECK-NEXT: vmsc.f bml3, bmh4, x7, x3, r1
126-
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh3
123+
; CHECK-NEXT: vmac.f bml0, bmh0, x5, x4, r1
124+
; CHECK-NEXT: vmsc.f bml3, bmh3, x7, x3, r1
127125
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh7
128-
; CHECK-NEXT: vst.conv.bf16.fp32 bml0, [p1], #32; vmsc.f bml4, bml2, x3, x5, r1
126+
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh6
127+
; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmsc.f bml4, bml0, x3, x5, r1
129128
; CHECK-NEXT: vconv.bf16.fp32 wl5, bmh8; vmin_ge.bf16 x9, r16, x3, x1
130-
; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
129+
; CHECK-NEXT: vst.conv.bf16.fp32 bml2, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
131130
; CHECK-NEXT: .L_LEnd0:
132131
; CHECK-NEXT: nopb ; nopa ; vconv.bf16.fp32 wl7, bml3; nopx ; vmin_ge.bf16 x11, r16, x5, x1; nopv
133132
; CHECK-NEXT: // %bb.2:

0 commit comments

Comments
 (0)