Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/BuiltinsPPC.def
Original file line number Diff line number Diff line change
Expand Up @@ -1146,6 +1146,12 @@ UNALIASED_CUSTOM_BUILTIN(mma_dmxvi8gerx4spp, "vW1024*W256V", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmdmxvi8gerx4spp, "vW1024*W256Vi255i15i15", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_dmsetdmrz, "vW1024*", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_dmmr, "vW1024*W1024*", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_dmxor, "vW1024*W1024*", true,
"mma,paired-vector-memops")
Copy link
Contributor

@lei137 lei137 Jun 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

paired-vector-memops is specifically defined for P10+ to deal with mma instructions. Since these builtins deal with _dmr1024 types for cpu=future, should a new feature type be created so we can diag within PCTargetInfo::initFeatureMap()?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK, there is already a sub target feature called isa-future-instructions. For the dmr Integer builtins I did not use it as the #144594 (comment) did not land and without this -target-cpu future did not imply isa-future-instructions without extra hard coding in clang, maybe we can see if that works now.


// FIXME: Obviously incomplete.

Expand Down
5 changes: 5 additions & 0 deletions clang/lib/CodeGen/TargetBuiltins/PPC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Value *Acc = Builder.CreateLoad(Addr);
CallOps.push_back(Acc);
}
if (BuiltinID == PPC::BI__builtin_mma_dmmr ||
BuiltinID == PPC::BI__builtin_mma_dmxor) {
Address Addr = EmitPointerWithAlignment(E->getArg(1));
Ops[1] = Builder.CreateLoad(Addr);
}
for (unsigned i=1; i<Ops.size(); i++)
CallOps.push_back(Ops[i]);
llvm::Function *F = CGM.getIntrinsic(ID);
Expand Down
16 changes: 16 additions & 0 deletions clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,19 @@ void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsi
__builtin_mma_pmdmxvi8gerx4spp(&vdmr, vp, vc, 0, 0, 0);
*((__dmr1024 *)resp) = vdmr;
}

// CHECK-LABEL: @test_dmf_basic
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]])
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr %res1, align 128
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr %res2, align 128
// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr %p, align 128
// CHECK-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
// CHECK-NEXT: store <1024 x i1> [[TMP4]], ptr %res2, align 128
void test_dmf_basic(char *p, char *res1, char *res2) {
__dmr1024 x[2];
__builtin_mma_dmsetdmrz(&x[0]);
__builtin_mma_dmmr((__dmr1024*)res1, &x[0]);
__builtin_mma_dmxor((__dmr1024*)res2, (__dmr1024*)p);
}
Loading