Skip to content

Commit 7bf867f

Browse files
authored
Merge branch 'main' into gfx11-disallow-dpp-in-v_pk_fmac_f16
2 parents f57fcbb + d992382 commit 7bf867f

File tree

60 files changed

+633
-168
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+633
-168
lines changed

compiler-rt/test/fuzzer/sig-trap.test

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# Check that libFuzzer handles SIGTRAP; disabled on Windows due to reliance on
2+
# posix only features
3+
UNSUPPORTED: target={{.*windows.*}}
4+
15
RUN: %cpp_compiler %S/SigTrapTest.cpp -o %t
26

37
RUN: not %run %t 2>&1 | FileCheck %s

libc/test/src/__support/FPUtil/comparison_operations_test.cpp

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,28 +25,15 @@ template <typename T>
2525
class ComparisonOperationsTest : public LIBC_NAMESPACE::testing::FEnvSafeTest {
2626
DECLARE_SPECIAL_CONSTANTS(T)
2727

28-
// TODO: Make these constexpr once quick_get_round is made constexpr.
29-
T normal1;
30-
T neg_normal1;
31-
T normal2;
32-
T small;
33-
T neg_small;
34-
T large;
35-
T neg_large;
28+
static constexpr T normal1 = T(3.14);
29+
static constexpr T neg_normal1 = T(-3.14);
30+
static constexpr T normal2 = T(2.71);
31+
static constexpr T small = T(0.1);
32+
static constexpr T neg_small = T(-0.1);
33+
static constexpr T large = T(10000.0);
34+
static constexpr T neg_large = T(-10000.0);
3635

3736
public:
38-
void SetUp() override {
39-
with_fenv_preserved([this]() {
40-
normal1 = T(3.14);
41-
neg_normal1 = T(-3.14);
42-
normal2 = T(2.71);
43-
small = T(0.1);
44-
neg_small = T(-0.1);
45-
large = T(10000.0);
46-
neg_large = T(-10000.0);
47-
});
48-
}
49-
5037
void test_equals() {
5138
EXPECT_TRUE(equals(neg_zero, neg_zero));
5239
EXPECT_TRUE(equals(zero, neg_zero));

lldb/source/API/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ foreach(header
327327
endif()
328328

329329
add_custom_target(liblldb-stage-header-${basename} DEPENDS ${staged_header})
330+
add_dependencies(liblldb-stage-header-${basename} lldb-sbapi-dwarf-enums)
330331
add_dependencies(liblldb-header-staging liblldb-stage-header-${basename})
331332
add_custom_command(
332333
DEPENDS ${header} OUTPUT ${staged_header}
@@ -339,6 +340,7 @@ foreach(header
339340
set(output_header $<TARGET_FILE_DIR:liblldb>/Headers/${basename})
340341

341342
add_custom_target(lldb-framework-fixup-header-${basename} DEPENDS ${staged_header})
343+
add_dependencies(lldb-framework-fixup-header-${basename} liblldb-stage-header-${basename})
342344
add_dependencies(lldb-framework-fixup-all-headers lldb-framework-fixup-header-${basename})
343345

344346
add_custom_command(TARGET lldb-framework-fixup-header-${basename} POST_BUILD

llvm/include/llvm/BinaryFormat/COFF.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,24 @@ enum DLLCharacteristics : unsigned {
694694

695695
enum ExtendedDLLCharacteristics : unsigned {
696696
/// Image is CET compatible
697-
IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT = 0x0001
697+
IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT = 0x0001,
698+
/// Image is CET compatible in strict mode
699+
IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT_STRICT_MODE = 0x0002,
700+
/// Image is CET compatible in such a way that context IP validation is
701+
/// relaxed
702+
IMAGE_DLL_CHARACTERISTICS_EX_CET_SET_CONTEXT_IP_VALIDATION_RELAXED_MODE =
703+
0x0004,
704+
/// Image is CET compatible in such a way that the use of
705+
/// dynamic APIs is restricted to processes only
706+
IMAGE_DLL_CHARACTERISTICS_EX_CET_DYNAMIC_APIS_ALLOW_IN_PROC_ONLY = 0x0008,
707+
/// Reserved for future use. Not used by MSVC link.exe
708+
IMAGE_DLL_CHARACTERISTICS_EX_CET_RESERVED_1 = 0x0010,
709+
/// Reserved for future use. Not used by MSVC link.exe
710+
IMAGE_DLL_CHARACTERISTICS_EX_CET_RESERVED_2 = 0x0020,
711+
/// Image is CFI compatible.
712+
IMAGE_DLL_CHARACTERISTICS_EX_FORWARD_CFI_COMPAT = 0x0040,
713+
/// Image is hotpatch compatible.
714+
IMAGE_DLL_CHARACTERISTICS_EX_HOTPATCH_COMPATIBLE = 0x0080,
698715
};
699716

700717
enum DebugType : unsigned {

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1389,6 +1389,9 @@ def FeatureAddSubU64Insts
13891389
: SubtargetFeature<"add-sub-u64-insts", "HasAddSubU64Insts", "true",
13901390
"Has v_add_u64 and v_sub_u64 instructions">;
13911391

1392+
def FeatureMadU32Inst : SubtargetFeature<"mad-u32-inst", "HasMadU32Inst",
1393+
"true", "Has v_mad_u32 instruction">;
1394+
13921395
def FeatureMemToLDSLoad : SubtargetFeature<"vmem-to-lds-load-insts",
13931396
"HasVMemToLDSLoad",
13941397
"true",
@@ -2049,6 +2052,7 @@ def FeatureISAVersion12_50 : FeatureSet<
20492052
FeatureVmemPrefInsts,
20502053
FeatureLshlAddU64Inst,
20512054
FeatureAddSubU64Insts,
2055+
FeatureMadU32Inst,
20522056
FeatureLdsBarrierArriveAtomic,
20532057
FeatureSetPrioIncWgInst,
20542058
]>;
@@ -2839,6 +2843,9 @@ def HasLshlAddU64Inst : Predicate<"Subtarget->hasLshlAddU64Inst()">,
28392843
def HasAddSubU64Insts : Predicate<"Subtarget->hasAddSubU64Insts()">,
28402844
AssemblerPredicate<(all_of FeatureAddSubU64Insts)>;
28412845

2846+
def HasMadU32Inst : Predicate<"Subtarget->hasMadU32Inst()">,
2847+
AssemblerPredicate<(all_of FeatureMadU32Inst)>;
2848+
28422849
def HasLdsBarrierArriveAtomic : Predicate<"Subtarget->hasLdsBarrierArriveAtomic()">,
28432850
AssemblerPredicate<(all_of FeatureLdsBarrierArriveAtomic)>;
28442851

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,15 +1134,26 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
11341134
SDLoc SL(N);
11351135
bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
11361136
unsigned Opc;
1137+
bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() && !N->hasAnyUseOfValue(1);
11371138
if (Subtarget->hasMADIntraFwdBug())
11381139
Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
11391140
: AMDGPU::V_MAD_U64_U32_gfx11_e64;
1141+
else if (UseNoCarry)
1142+
Opc = Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
11401143
else
11411144
Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
11421145

11431146
SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
11441147
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
11451148
Clamp };
1149+
1150+
if (UseNoCarry) {
1151+
MachineSDNode *Mad = CurDAG->getMachineNode(Opc, SL, MVT::i64, Ops);
1152+
ReplaceUses(SDValue(N, 0), SDValue(Mad, 0));
1153+
CurDAG->RemoveDeadNode(N);
1154+
return;
1155+
}
1156+
11461157
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
11471158
}
11481159

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,13 +574,22 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
574574
MachineBasicBlock *BB = I.getParent();
575575
MachineFunction *MF = BB->getParent();
576576
const bool IsUnsigned = I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
577+
bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
578+
MRI->use_nodbg_empty(I.getOperand(1).getReg());
577579

578580
unsigned Opc;
579581
if (Subtarget->hasMADIntraFwdBug())
580582
Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
581583
: AMDGPU::V_MAD_I64_I32_gfx11_e64;
584+
else if (UseNoCarry)
585+
Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
586+
: AMDGPU::V_MAD_NC_I64_I32_e64;
582587
else
583588
Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
589+
590+
if (UseNoCarry)
591+
I.removeOperand(1);
592+
584593
I.setDesc(TII.get(Opc));
585594
I.addOperand(*MF, MachineOperand::CreateImm(0));
586595
I.addImplicitDefUseOperands(*MF);

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
273273
bool HasMinimum3Maximum3PKF16 = false;
274274
bool HasLshlAddU64Inst = false;
275275
bool HasAddSubU64Insts = false;
276+
bool HasMadU32Inst = false;
276277
bool HasPointSampleAccel = false;
277278
bool HasLdsBarrierArriveAtomic = false;
278279
bool HasSetPrioIncWgInst = false;
@@ -1521,9 +1522,16 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
15211522
// \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.
15221523
bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }
15231524

1525+
// \returns true if the target has V_MAD_U32 instruction.
1526+
bool hasMadU32Inst() const { return HasMadU32Inst; }
1527+
15241528
// \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
15251529
bool hasVectorMulU64() const { return GFX1250Insts; }
15261530

1531+
// \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32
1532+
// instructions.
1533+
bool hasMadU64U32NoCarry() const { return GFX1250Insts; }
1534+
15271535
// \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
15281536
bool hasPkAddMinMaxInsts() const { return GFX1250Insts; }
15291537

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
5757
def V_LSHL_ADD_U64_PROF : VOP3_Profile<VOP_I64_I64_I32_I64>;
5858

5959
def VOP_F64_F64_F64_F64_DPP_PROF : VOP3_Profile<VOP_F64_F64_F64_F64>;
60+
def V_MAD_U32_PROF: VOP3_Profile<VOP_I32_I32_I32_I32> {
61+
let HasExtVOP3DPP = 0;
62+
let HasExt64BitDPP = 1;
63+
}
64+
def VOP_I64_I64_I64_DPP : VOP3_Profile<VOP_I64_I64_I64>;
65+
def VOP_I32_I32_I64_DPP : VOP3_Profile<VOPProfile<[i64, i32, i32, i64]>> {
66+
let HasClamp = 1;
67+
}
6068
} // End HasExt64BitDPP = 1;
6169

6270
//===----------------------------------------------------------------------===//
@@ -152,6 +160,15 @@ defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32
152160
defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">;
153161
defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
154162

163+
let SchedRW = [WriteIntMul] in {
164+
let SubtargetPredicate = HasMadU32Inst in
165+
defm V_MAD_U32 : VOP3Inst <"v_mad_u32", V_MAD_U32_PROF>;
166+
let SubtargetPredicate = isGFX1250Plus in {
167+
defm V_MAD_NC_U64_U32 : VOP3Inst<"v_mad_nc_u64_u32", VOP_I32_I32_I64_DPP>;
168+
defm V_MAD_NC_I64_I32 : VOP3Inst<"v_mad_nc_i64_i32", VOP_I32_I32_I64_DPP>;
169+
}
170+
}
171+
155172
let SchedRW = [WriteDoubleAdd] in {
156173
let FPDPRounding = 1 in {
157174
defm V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP_F64_F64_F64_F64_DPP_PROF, any_fma>, VOPD_Component<0x20, "v_fma_f64">;
@@ -848,6 +865,9 @@ def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32_e64>;
848865
def : ThreeOp_i32_Pats<or, or, V_OR3_B32_e64>;
849866
def : ThreeOp_i32_Pats<xor, add, V_XAD_U32_e64>;
850867

868+
let SubtargetPredicate = HasMadU32Inst, AddedComplexity = 10 in
869+
def : ThreeOp_i32_Pats<mul, add, V_MAD_U32_e64>;
870+
851871
def : GCNPat<
852872
(DivergentBinFrag<mul> i32:$src0, IsPow2Plus1:$src1),
853873
(V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>;
@@ -1746,6 +1766,10 @@ defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x368, "v_m
17461766
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
17471767
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
17481768

1769+
defm V_MAD_U32 : VOP3Only_Realtriple_gfx1250<0x235>;
1770+
defm V_MAD_NC_U64_U32 : VOP3Only_Realtriple_gfx1250<0x2fa>;
1771+
defm V_MAD_NC_I64_I32 : VOP3Only_Realtriple_gfx1250<0x2fb>;
1772+
17491773
defm V_CVT_PK_FP8_F32 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x369, "v_cvt_pk_fp8_f32">;
17501774
defm V_CVT_PK_BF8_F32 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x36a, "v_cvt_pk_bf8_f32">;
17511775
defm V_CVT_SR_FP8_F32_gfx12 : VOP3_Realtriple_with_name_gfx12<0x36b, "V_CVT_SR_FP8_F32_gfx12", "v_cvt_sr_fp8_f32" >;

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20751,6 +20751,53 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
2075120751
return DAG.getAllOnesConstant(DL, VT);
2075220752
return DAG.getConstant(0, DL, VT);
2075320753
}
20754+
case Intrinsic::riscv_vsseg2_mask:
20755+
case Intrinsic::riscv_vsseg3_mask:
20756+
case Intrinsic::riscv_vsseg4_mask:
20757+
case Intrinsic::riscv_vsseg5_mask:
20758+
case Intrinsic::riscv_vsseg6_mask:
20759+
case Intrinsic::riscv_vsseg7_mask:
20760+
case Intrinsic::riscv_vsseg8_mask: {
20761+
SDValue Tuple = N->getOperand(2);
20762+
unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
20763+
20764+
if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
20765+
Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
20766+
!Tuple.getOperand(0).isUndef())
20767+
return SDValue();
20768+
20769+
SDValue Val = Tuple.getOperand(1);
20770+
unsigned Idx = Tuple.getConstantOperandVal(2);
20771+
20772+
unsigned SEW = Val.getValueType().getScalarSizeInBits();
20773+
assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
20774+
"Type mismatch without bitcast?");
20775+
unsigned Stride = SEW / 8 * NF;
20776+
unsigned Offset = SEW / 8 * Idx;
20777+
20778+
SDValue Ops[] = {
20779+
/*Chain=*/N->getOperand(0),
20780+
/*IntID=*/
20781+
DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
20782+
/*StoredVal=*/Val,
20783+
/*Ptr=*/
20784+
DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
20785+
DAG.getConstant(Offset, DL, XLenVT)),
20786+
/*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
20787+
/*Mask=*/N->getOperand(4),
20788+
/*VL=*/N->getOperand(5)};
20789+
20790+
auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
20791+
// Match getTgtMemIntrinsic for non-unit stride case
20792+
EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
20793+
MachineFunction &MF = DAG.getMachineFunction();
20794+
MachineMemOperand *MMO = MF.getMachineMemOperand(
20795+
OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
20796+
20797+
SDVTList VTs = DAG.getVTList(MVT::Other);
20798+
return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
20799+
MMO);
20800+
}
2075420801
}
2075520802
}
2075620803
case ISD::EXPERIMENTAL_VP_REVERSE:
@@ -20899,6 +20946,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
2089920946
DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
2090020947
return Result.getValue(0);
2090120948
}
20949+
case RISCVISD::TUPLE_INSERT: {
20950+
// tuple_insert tuple, undef, idx -> tuple
20951+
if (N->getOperand(1).isUndef())
20952+
return N->getOperand(0);
20953+
break;
20954+
}
2090220955
}
2090320956

2090420957
return SDValue();

0 commit comments

Comments
 (0)