|
2 | 2 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
|
3 | 3 |
|
4 | 4 | ; TODO: Add global-isel when it can support bf16
|
| 5 | +define amdgpu_ps void @llvm_sqrt_bf16_v(ptr addrspace(1) %out, bfloat %src) { |
| 6 | +; GCN-LABEL: llvm_sqrt_bf16_v: |
| 7 | +; GCN: ; %bb.0: |
| 8 | +; GCN-NEXT: v_sqrt_bf16_e32 v2, v2 |
| 9 | +; GCN-NEXT: global_store_b16 v[0:1], v2, off |
| 10 | +; GCN-NEXT: s_endpgm |
| 11 | + %sqrt = call bfloat @llvm.sqrt.bf16(bfloat %src) |
| 12 | + store bfloat %sqrt, ptr addrspace(1) %out, align 2 |
| 13 | + ret void |
| 14 | +} |
| 15 | + |
| 16 | +define amdgpu_ps void @llvm_sqrt_bf16_s(ptr addrspace(1) %out, bfloat inreg %src) { |
| 17 | +; GCN-LABEL: llvm_sqrt_bf16_s: |
| 18 | +; GCN: ; %bb.0: |
| 19 | +; GCN-NEXT: v_sqrt_bf16_e32 v2, s0 |
| 20 | +; GCN-NEXT: global_store_b16 v[0:1], v2, off |
| 21 | +; GCN-NEXT: s_endpgm |
| 22 | + %sqrt = call bfloat @llvm.sqrt.bf16(bfloat %src) |
| 23 | + store bfloat %sqrt, ptr addrspace(1) %out, align 2 |
| 24 | + ret void |
| 25 | +} |
5 | 26 |
|
6 | 27 | define amdgpu_ps void @llvm_log2_bf16_v(ptr addrspace(1) %out, bfloat %src) {
|
7 | 28 | ; GCN-LABEL: llvm_log2_bf16_v:
|
@@ -47,5 +68,6 @@ define amdgpu_ps void @llvm_exp2_bf16_s(ptr addrspace(1) %out, bfloat inreg %src
|
47 | 68 | ret void
|
48 | 69 | }
|
49 | 70 |
|
| 71 | +declare bfloat @llvm.sqrt.bf16(bfloat) |
50 | 72 | declare bfloat @llvm.log2.bf16(bfloat)
|
51 | 73 | declare bfloat @llvm.exp2.bf16(bfloat)
|
0 commit comments