|
7 | 7 | ; (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates |
8 | 8 |
|
9 | 9 | ; RUN: llc -O2 -mtriple=aie2 \ |
10 | | -; RUN: --aie-reg-rewrite-mode=latencyaware \ |
11 | 10 | ; RUN: %s -o - | FileCheck %s |
12 | 11 |
|
13 | 12 | ; The test is meant as a quick way to spot QoR regressions. In this test, the |
@@ -112,22 +111,22 @@ define dso_local void @TanhTemplated(ptr noalias %ifm, ptr noalias %ofm, ptr non |
112 | 111 | ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
113 | 112 | ; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vband x9, x10, x3; nopv |
114 | 113 | ; CHECK-NEXT: vldb wl7, [p0], #32; vmov wh3, wl2 |
115 | | -; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh5, x7, x0, r1 |
116 | | -; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh4, bmh0, x3, x4, r1 |
| 114 | +; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh4, x7, x0, r1 |
| 115 | +; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh3, bmh0, x3, x4, r1 |
117 | 116 | ; CHECK-NEXT: vband x9, x10, x5; vmul.f bmh2, x6, x9, r1 |
118 | | -; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh6, x7, x0, r1 |
119 | | -; CHECK-NEXT: vsub.f bml0, bmh5, bmh1, r0 |
120 | | -; CHECK-NEXT: vmul.f bmh3, x6, x9, r1 |
121 | | -; CHECK-NEXT: vmul.f bmh7, x0, x7, r1 |
122 | | -; CHECK-NEXT: vmov wh5, wl2; vsub.f bml1, bmh6, bmh1, r0 |
| 117 | +; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh5, x7, x0, r1 |
| 118 | +; CHECK-NEXT: vsub.f bml1, bmh4, bmh1, r0 |
| 119 | +; CHECK-NEXT: vmul.f bmh7, x6, x9, r1 |
| 120 | +; CHECK-NEXT: vmul.f bmh6, x0, x7, r1 |
| 121 | +; CHECK-NEXT: vmov wh5, wl2; vsub.f bml2, bmh5, bmh1, r0 |
123 | 122 | ; CHECK-NEXT: vconv.bf16.fp32 wl7, bmh2; vmul.f bmh8, x0, x7, r1 |
124 | | -; CHECK-NEXT: vmac.f bml2, bmh0, x5, x4, r1 |
125 | | -; CHECK-NEXT: vmsc.f bml3, bmh4, x7, x3, r1 |
126 | | -; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh3 |
| 123 | +; CHECK-NEXT: vmac.f bml0, bmh0, x5, x4, r1 |
| 124 | +; CHECK-NEXT: vmsc.f bml3, bmh3, x7, x3, r1 |
127 | 125 | ; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh7 |
128 | | -; CHECK-NEXT: vst.conv.bf16.fp32 bml0, [p1], #32; vmsc.f bml4, bml2, x3, x5, r1 |
| 126 | +; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh6 |
| 127 | +; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmsc.f bml4, bml0, x3, x5, r1 |
129 | 128 | ; CHECK-NEXT: vconv.bf16.fp32 wl5, bmh8; vmin_ge.bf16 x9, r16, x3, x1 |
130 | | -; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8 |
| 129 | +; CHECK-NEXT: vst.conv.bf16.fp32 bml2, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8 |
131 | 130 | ; CHECK-NEXT: .L_LEnd0: |
132 | 131 | ; CHECK-NEXT: nopb ; nopa ; vconv.bf16.fp32 wl7, bml3; nopx ; vmin_ge.bf16 x11, r16, x5, x1; nopv |
133 | 132 | ; CHECK-NEXT: // %bb.2: |
|
0 commit comments