Skip to content

Commit c883f82

Browse files
authored
[PowerPC][CLANG] DMF VSX Vector float GER 2x (rank-2 update) (#147383)
Add clang builtins for DMF VSX Vector floats: ``` void __builtin_mma_dmxvf16gerx2 (__dmr1024 *, __vector_pair, vec_t); void __builtin_mma_dmxvf16gerx2nn (__dmr1024 *, __vector_pair, vec_t); void __builtin_mma_dmxvf16gerx2np (__dmr1024 *, __vector_pair, vec_t); void __builtin_mma_dmxvf16gerx2pn (__dmr1024 *, __vector_pair, vec_t); void __builtin_mma_dmxvf16gerx2pp (__dmr1024 *, __vector_pair, vec_t); void __builtin_mma_pmdmxvf16gerx2 (__dmr1024 *, __vector_pair, vec_t, uint8, uint4, uint2); void __builtin_mma_pmdmxvf16gerx2nn (__dmr1024 *, __vector_pair, vec_t, uint8, uint4, uint2); void __builtin_mma_pmdmxvf16gerx2np (__dmr1024 *, __vector_pair, vec_t, uint8, uint4, uint2); void __builtin_mma_pmdmxvf16gerx2pn (__dmr1024 *, __vector_pair, vec_t, uint8, uint4, uint2); void __builtin_mma_pmdmxvf16gerx2pp (__dmr1024 *, __vector_pair, vec_t, uint8, uint4, uint2); void __builtin_mma_dmxvbf16gerx2 (__dmr1024 *, __vector_pair, vec_t); void __builtin_mma_dmxvbf16gerx2nn (__dmr1024 *, __vector_pair, vec_t); void __builtin_mma_dmxvbf16gerx2np (__dmr1024 *, __vector_pair, vec_t); void __builtin_mma_dmxvbf16gerx2pn (__dmr1024 *, __vector_pair, vec_t); void __builtin_mma_dmxvbf16gerx2pp (__dmr1024 *, __vector_pair, vec_t); void __builtin_mma_pmdmxvbf16gerx2 (__dmr1024 *, __vector_pair, vec_t, uint8, uint4, uint2); void __builtin_mma_pmdmxvbf16gerx2nn (__dmr1024 *, __vector_pair, vec_t, uint8, uint4, uint2); void __builtin_mma_pmdmxvbf16gerx2np (__dmr1024 *, __vector_pair, vec_t, uint8, uint4, uint2); void __builtin_mma_pmdmxvbf16gerx2pn (__dmr1024 *, __vector_pair, vec_t, uint8, uint4, uint2); void __builtin_mma_pmdmxvbf16gerx2pp (__dmr1024 *, __vector_pair, vec_t, uint8, uint4, uint2); ```
1 parent c4181e5 commit c883f82

File tree

3 files changed

+380
-10
lines changed

3 files changed

+380
-10
lines changed

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,14 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvbf16ger2, "vW512*VV",
11221122
"mma,paired-vector-memops")
11231123
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvbf16ger2, "vW512*VVi15i15i3",
11241124
"mma,paired-vector-memops")
1125+
UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvbf16gerx2, "vW1024*W256V",
1126+
"mma,isa-future-instructions")
1127+
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvbf16gerx2, "vW1024*W256Vi255i15i3",
1128+
"mma,isa-future-instructions")
1129+
UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvf16gerx2, "vW1024*W256V",
1130+
"mma,isa-future-instructions")
1131+
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvf16gerx2, "vW1024*W256Vi255i15i3",
1132+
"mma,isa-future-instructions")
11251133

11261134
// FIXME: Obviously incomplete.
11271135

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// Update then manual applied to commonize the checks for AIX and LoP.
3+
// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future \
4+
// RUN: -emit-llvm %s -o - | FileCheck %s
5+
// RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu future \
6+
// RUN: -emit-llvm %s -o - | FileCheck %s
7+
8+
// CHECK-LABEL: void @test_dmxvbf16gerx2(
9+
// CHECK-NEXT: [[ENTRY:.*:]]
10+
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
11+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
12+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]]
13+
// CHECK-NEXT: ret void
14+
//
15+
void test_dmxvbf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
16+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
17+
__vector_pair vp = *((__vector_pair *)vpp);
18+
__builtin_mma_dmxvbf16gerx2(&vdmr, vp, vc);
19+
*((__dmr1024 *)resp) = vdmr;
20+
}
21+
22+
// CHECK-LABEL: void @test_dmxvbf16gerx2nn(
23+
// CHECK-NEXT: [[ENTRY:.*:]]
24+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
25+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
26+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
27+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
28+
// CHECK-NEXT: ret void
29+
//
30+
void test_dmxvbf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
31+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
32+
__vector_pair vp = *((__vector_pair *)vpp);
33+
__builtin_mma_dmxvbf16gerx2nn(&vdmr, vp, vc);
34+
*((__dmr1024 *)resp) = vdmr;
35+
}
36+
37+
// CHECK-LABEL: void @test_dmxvbf16gerx2np(
38+
// CHECK-NEXT: [[ENTRY:.*:]]
39+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
40+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
41+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
42+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
43+
// CHECK-NEXT: ret void
44+
//
45+
void test_dmxvbf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
46+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
47+
__vector_pair vp = *((__vector_pair *)vpp);
48+
__builtin_mma_dmxvbf16gerx2np(&vdmr, vp, vc);
49+
*((__dmr1024 *)resp) = vdmr;
50+
}
51+
52+
// CHECK-LABEL: void @test_dmxvbf16gerx2pn(
53+
// CHECK-NEXT: [[ENTRY:.*:]]
54+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
55+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
56+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
57+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
58+
// CHECK-NEXT: ret void
59+
//
60+
void test_dmxvbf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
61+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
62+
__vector_pair vp = *((__vector_pair *)vpp);
63+
__builtin_mma_dmxvbf16gerx2pn(&vdmr, vp, vc);
64+
*((__dmr1024 *)resp) = vdmr;
65+
}
66+
67+
// CHECK-LABEL: void @test_dmxvbf16gerx2pp(
68+
// CHECK-NEXT: [[ENTRY:.*:]]
69+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
70+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
71+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
72+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
73+
// CHECK-NEXT: ret void
74+
//
75+
void test_dmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
76+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
77+
__vector_pair vp = *((__vector_pair *)vpp);
78+
__builtin_mma_dmxvbf16gerx2pp(&vdmr, vp, vc);
79+
*((__dmr1024 *)resp) = vdmr;
80+
}
81+
82+
// CHECK-LABEL: void @test_pmdmxvbf16gerx2(
83+
// CHECK-NEXT: [[ENTRY:.*:]]
84+
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
85+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
86+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
87+
// CHECK-NEXT: ret void
88+
//
89+
void test_pmdmxvbf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
90+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
91+
__vector_pair vp = *((__vector_pair *)vpp);
92+
__builtin_mma_pmdmxvbf16gerx2(&vdmr, vp, vc, 0, 0, 0);
93+
*((__dmr1024 *)resp) = vdmr;
94+
}
95+
96+
// CHECK-LABEL: void @test_pmdmxvbf16gerx2nn(
97+
// CHECK-NEXT: [[ENTRY:.*:]]
98+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
99+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
100+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
101+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
102+
// CHECK-NEXT: ret void
103+
//
104+
void test_pmdmxvbf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
105+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
106+
__vector_pair vp = *((__vector_pair *)vpp);
107+
__builtin_mma_pmdmxvbf16gerx2nn(&vdmr, vp, vc, 0, 0, 0);
108+
*((__dmr1024 *)resp) = vdmr;
109+
}
110+
111+
// CHECK-LABEL: void @test_pmdmxvbf16gerx2np(
112+
// CHECK-NEXT: [[ENTRY:.*:]]
113+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
114+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
115+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
116+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
117+
// CHECK-NEXT: ret void
118+
//
119+
void test_pmdmxvbf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
120+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
121+
__vector_pair vp = *((__vector_pair *)vpp);
122+
__builtin_mma_pmdmxvbf16gerx2np(&vdmr, vp, vc, 0, 0, 0);
123+
*((__dmr1024 *)resp) = vdmr;
124+
}
125+
126+
// CHECK-LABEL: void @test_pmdmxvbf16gerx2pn(
127+
// CHECK-NEXT: [[ENTRY:.*:]]
128+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
129+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
130+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
131+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
132+
// CHECK-NEXT: ret void
133+
//
134+
void test_pmdmxvbf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
135+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
136+
__vector_pair vp = *((__vector_pair *)vpp);
137+
__builtin_mma_pmdmxvbf16gerx2pn(&vdmr, vp, vc, 0, 0, 0);
138+
*((__dmr1024 *)resp) = vdmr;
139+
}
140+
141+
// CHECK-LABEL: void @test_pmdmxvbf16gerx2pp(
142+
// CHECK-NEXT: [[ENTRY:.*:]]
143+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
144+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
145+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
146+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
147+
// CHECK-NEXT: ret void
148+
//
149+
void test_pmdmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
150+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
151+
__vector_pair vp = *((__vector_pair *)vpp);
152+
__builtin_mma_pmdmxvbf16gerx2pp(&vdmr, vp, vc, 0, 0, 0);
153+
*((__dmr1024 *)resp) = vdmr;
154+
}
155+
156+
// CHECK-LABEL: void @test_dmxvf16gerx2(
157+
// CHECK-NEXT: [[ENTRY:.*:]]
158+
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
159+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
160+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]]
161+
// CHECK-NEXT: ret void
162+
//
163+
void test_dmxvf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
164+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
165+
__vector_pair vp = *((__vector_pair *)vpp);
166+
__builtin_mma_dmxvf16gerx2(&vdmr, vp, vc);
167+
*((__dmr1024 *)resp) = vdmr;
168+
}
169+
170+
// CHECK-LABEL: void @test_dmxvf16gerx2nn(
171+
// CHECK-NEXT: [[ENTRY:.*:]]
172+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
173+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
174+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
175+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
176+
// CHECK-NEXT: ret void
177+
//
178+
void test_dmxvf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
179+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
180+
__vector_pair vp = *((__vector_pair *)vpp);
181+
__builtin_mma_dmxvf16gerx2nn(&vdmr, vp, vc);
182+
*((__dmr1024 *)resp) = vdmr;
183+
}
184+
185+
// CHECK-LABEL: void @test_dmxvf16gerx2np(
186+
// CHECK-NEXT: [[ENTRY:.*:]]
187+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
188+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
189+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
190+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
191+
// CHECK-NEXT: ret void
192+
//
193+
void test_dmxvf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
194+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
195+
__vector_pair vp = *((__vector_pair *)vpp);
196+
__builtin_mma_dmxvf16gerx2np(&vdmr, vp, vc);
197+
*((__dmr1024 *)resp) = vdmr;
198+
}
199+
200+
// CHECK-LABEL: void @test_dmxvf16gerx2pn(
201+
// CHECK-NEXT: [[ENTRY:.*:]]
202+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
203+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
204+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
205+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
206+
// CHECK-NEXT: ret void
207+
//
208+
void test_dmxvf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
209+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
210+
__vector_pair vp = *((__vector_pair *)vpp);
211+
__builtin_mma_dmxvf16gerx2pn(&vdmr, vp, vc);
212+
*((__dmr1024 *)resp) = vdmr;
213+
}
214+
215+
// CHECK-LABEL: void @test_dmxvf16gerx2pp(
216+
// CHECK-NEXT: [[ENTRY:.*:]]
217+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
218+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
219+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
220+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
221+
// CHECK-NEXT: ret void
222+
//
223+
void test_dmxvf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
224+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
225+
__vector_pair vp = *((__vector_pair *)vpp);
226+
__builtin_mma_dmxvf16gerx2pp(&vdmr, vp, vc);
227+
*((__dmr1024 *)resp) = vdmr;
228+
}
229+
230+
// CHECK-LABEL: void @test_pmdmxvf16gerx2(
231+
// CHECK-NEXT: [[ENTRY:.*:]]
232+
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
233+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
234+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
235+
// CHECK-NEXT: ret void
236+
//
237+
void test_pmdmxvf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
238+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
239+
__vector_pair vp = *((__vector_pair *)vpp);
240+
__builtin_mma_pmdmxvf16gerx2(&vdmr, vp, vc, 0, 0, 0);
241+
*((__dmr1024 *)resp) = vdmr;
242+
}
243+
244+
// CHECK-LABEL: void @test_pmdmxvf16gerx2nn(
245+
// CHECK-NEXT: [[ENTRY:.*:]]
246+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
247+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
248+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
249+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
250+
// CHECK-NEXT: ret void
251+
//
252+
void test_pmdmxvf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
253+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
254+
__vector_pair vp = *((__vector_pair *)vpp);
255+
__builtin_mma_pmdmxvf16gerx2nn(&vdmr, vp, vc, 0, 0, 0);
256+
*((__dmr1024 *)resp) = vdmr;
257+
}
258+
259+
// CHECK-LABEL: void @test_pmdmxvf16gerx2np(
260+
// CHECK-NEXT: [[ENTRY:.*:]]
261+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
262+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
263+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
264+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
265+
// CHECK-NEXT: ret void
266+
//
267+
void test_pmdmxvf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
268+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
269+
__vector_pair vp = *((__vector_pair *)vpp);
270+
__builtin_mma_pmdmxvf16gerx2np(&vdmr, vp, vc, 0, 0, 0);
271+
*((__dmr1024 *)resp) = vdmr;
272+
}
273+
274+
// CHECK-LABEL: void @test_pmdmxvf16gerx2pn(
275+
// CHECK-NEXT: [[ENTRY:.*:]]
276+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
277+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
278+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
279+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
280+
// CHECK-NEXT: ret void
281+
//
282+
void test_pmdmxvf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
283+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
284+
__vector_pair vp = *((__vector_pair *)vpp);
285+
__builtin_mma_pmdmxvf16gerx2pn(&vdmr, vp, vc, 0, 0, 0);
286+
*((__dmr1024 *)resp) = vdmr;
287+
}
288+
289+
// CHECK-LABEL: void @test_pmdmxvf16gerx2pp(
290+
// CHECK-NEXT: [[ENTRY:.*:]]
291+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
292+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
293+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
294+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
295+
// CHECK-NEXT: ret void
296+
//
297+
void test_pmdmxvf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
298+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
299+
__vector_pair vp = *((__vector_pair *)vpp);
300+
__builtin_mma_pmdmxvf16gerx2pp(&vdmr, vp, vc, 0, 0, 0);
301+
*((__dmr1024 *)resp) = vdmr;
302+
}
303+
304+
// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
305+
// CHECK: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0}
306+
// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
307+
// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
308+
// CHECK: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
309+
// CHECK: [[META7]] = !{!"__dmr1024", [[META4]], i64 0}

0 commit comments

Comments
 (0)