diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index f9c0b54be7a27..52ac49d3cfb9d 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1700,6 +1700,18 @@ def TuneNLogNVRGather def TunePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; +def TuneDisableMISchedLoadClustering : SubtargetFeature<"disable-misched-load-clustering", + "EnableMISchedLoadClustering", "false", "Disable load clustering in the machine scheduler">; + +def TuneDisableMISchedStoreClustering : SubtargetFeature<"disable-misched-store-clustering", + "EnableMISchedStoreClustering", "false", "Disable store clustering in the machine scheduler">; + +def TuneDisablePostMISchedLoadClustering : SubtargetFeature<"disable-postmisched-load-clustering", + "EnablePostMISchedLoadClustering", "false", "Disable PostRA load clustering in the machine scheduler">; + +def TuneDisablePostMISchedStoreClustering : SubtargetFeature<"disable-postmisched-store-clustering", + "EnablePostMISchedStoreClustering", "false", "Disable PostRA store clustering in the machine scheduler">; + def TuneDisableLatencySchedHeuristic : SubtargetFeature<"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 838edf6c57250..8445730446dd9 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -590,6 +590,9 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", FeatureStdExtZicboz, FeatureVendorXVentanaCondOps], [TuneVentanaVeyron, + TuneDisableMISchedLoadClustering, + TuneDisablePostMISchedLoadClustering, + TuneDisablePostMISchedStoreClustering, TuneLUIADDIFusion, TuneAUIPCADDIFusion, TuneZExtHFusion, diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index b43b915d0ad4f..c5f36c5a459f3 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -94,16 +94,6 @@ static cl::opt cl::desc("Enable the loop data prefetch pass"), cl::init(true)); -static cl::opt EnableMISchedLoadStoreClustering( - "riscv-misched-load-store-clustering", cl::Hidden, - cl::desc("Enable load and store clustering in the machine scheduler"), - cl::init(true)); - -static cl::opt EnablePostMISchedLoadStoreClustering( - "riscv-postmisched-load-store-clustering", cl::Hidden, - cl::desc("Enable PostRA load and store clustering in the machine scheduler"), - cl::init(true)); - static cl::opt EnableVLOptimizer("riscv-enable-vl-optimizer", cl::desc("Enable the RISC-V VL Optimizer pass"), @@ -299,15 +289,17 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, ScheduleDAGInstrs * RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const { + const RISCVSubtarget &ST = C->MF->getSubtarget(); ScheduleDAGMILive *DAG = createSchedLive(C); - if (EnableMISchedLoadStoreClustering) { + + if (ST.enableMISchedLoadClustering()) DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); + + if (ST.enableMISchedStoreClustering()) DAG->addMutation(createStoreClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); - } - const RISCVSubtarget &ST = C->MF->getSubtarget(); if (!DisableVectorMaskMutation && ST.hasVInstructions()) DAG->addMutation(createRISCVVectorMaskDAGMutation(DAG->TRI)); @@ -316,13 +308,16 @@ RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const { ScheduleDAGInstrs * RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const { + const RISCVSubtarget &ST = C->MF->getSubtarget(); ScheduleDAGMI *DAG = createSchedPostRA(C); - if (EnablePostMISchedLoadStoreClustering) { + + if (ST.enablePostMISchedLoadClustering()) DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); + + if (ST.enablePostMISchedStoreClustering()) DAG->addMutation(createStoreClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); - } return DAG; } diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index b94665b718ae7..a5ee41281607b 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -13,6 +13,10 @@ ; CHECK-NEXT: conditional-cmv-fusion - Enable branch+c.mv fusion. ; CHECK-NEXT: d - 'D' (Double-Precision Floating-Point). ; CHECK-NEXT: disable-latency-sched-heuristic - Disable latency scheduling heuristic. +; CHECK-NEXT: disable-misched-load-clustering - Disable load clustering in the machine scheduler. +; CHECK-NEXT: disable-misched-store-clustering - Disable store clustering in the machine scheduler. +; CHECK-NEXT: disable-postmisched-load-clustering - Disable PostRA load clustering in the machine scheduler. +; CHECK-NEXT: disable-postmisched-store-clustering - Disable PostRA store clustering in the machine scheduler. ; CHECK-NEXT: dlen-factor-2 - Vector unit DLEN(data path width) is half of VLEN. ; CHECK-NEXT: e - 'E' (Embedded Instruction Set with 16 GPRs). ; CHECK-NEXT: exact-asm - Enable Exact Assembly (Disables Compression and Relaxation). diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll index 160f0aefa36a7..abdc1bad787ae 100644 --- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll +++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll @@ -1,17 +1,42 @@ ; REQUIRES: asserts -; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-store-clustering=false \ +; +; Disable all misched clustering +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=NOCLUSTER %s -; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-store-clustering=false \ +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=NOCLUSTER %s +; +; ST misched clustering only +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; +; LD misched clustering only ; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=LDCLUSTER %s ; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=LDCLUSTER %s - +; +; Default misched cluster settings (i.e. both LD and ST clustering) +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s define i32 @load_clustering_1(ptr nocapture %p) { ; NOCLUSTER: ********** MI Scheduling ********** @@ -22,6 +47,14 @@ define i32 @load_clustering_1(ptr nocapture %p) { ; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 ; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 ; +; STCLUSTER: ********** MI Scheduling ********** +; STCLUSTER-LABEL: load_clustering_1:%bb.0 +; STCLUSTER: *** Final schedule for %bb.0 *** +; STCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12 +; STCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8 +; STCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 +; STCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 +; ; LDCLUSTER: ********** MI Scheduling ********** ; LDCLUSTER-LABEL: load_clustering_1:%bb.0 ; LDCLUSTER: *** Final schedule for %bb.0 *** @@ -29,6 +62,14 @@ define i32 @load_clustering_1(ptr nocapture %p) { ; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8 ; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12 ; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 +; +; DEFAULTCLUSTER: ********** MI Scheduling ********** +; DEFAULTCLUSTER-LABEL: load_clustering_1:%bb.0 +; DEFAULTCLUSTER: *** Final schedule for %bb.0 *** +; DEFAULTCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 +; DEFAULTCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8 +; DEFAULTCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12 +; DEFAULTCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 entry: %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3 %val0 = load i32, ptr %arrayidx0 diff --git a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir index 21398d315ec93..01960f9d99a8b 100644 --- a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir +++ b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir @@ -1,10 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -verify-misched -enable-post-misched=false \ -# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \ +# RUN: -mattr=+disable-postmisched-load-clustering \ +# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \ # RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \ # RUN: | FileCheck -check-prefix=NOPOSTMISCHED %s # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \ -# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \ +# RUN: -mattr=+disable-postmisched-load-clustering \ +# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \ # RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \ # RUN: | FileCheck -check-prefix=NOCLUSTER %s # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \ diff --git a/llvm/test/CodeGen/RISCV/misched-store-clustering.ll b/llvm/test/CodeGen/RISCV/misched-store-clustering.ll new file mode 100644 index 0000000000000..02e853d2217cc --- /dev/null +++ b/llvm/test/CodeGen/RISCV/misched-store-clustering.ll @@ -0,0 +1,83 @@ +; REQUIRES: asserts +; +; Disable all misched clustering +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=NOCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=NOCLUSTER %s +; +; ST misched clustering only +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; +; LD misched clustering only +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=LDCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=LDCLUSTER %s +; +; Default misched cluster settings (i.e. both LD and ST clustering) +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s + +define i32 @store_clustering_1(ptr nocapture %p, i32 %v) { +; NOCLUSTER: ********** MI Scheduling ********** +; NOCLUSTER-LABEL: store_clustering_1:%bb.0 +; NOCLUSTER: *** Final schedule for %bb.0 *** +; NOCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; NOCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; NOCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; NOCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +; +; STCLUSTER: ********** MI Scheduling ********** +; STCLUSTER-LABEL: store_clustering_1:%bb.0 +; STCLUSTER: *** Final schedule for %bb.0 *** +; STCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; STCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; STCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; STCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +; +; LDCLUSTER: ********** MI Scheduling ********** +; LDCLUSTER-LABEL: store_clustering_1:%bb.0 +; LDCLUSTER: *** Final schedule for %bb.0 *** +; LDCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; LDCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; LDCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; LDCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +; +; DEFAULTCLUSTER: ********** MI Scheduling ********** +; DEFAULTCLUSTER-LABEL: store_clustering_1:%bb.0 +; DEFAULTCLUSTER: *** Final schedule for %bb.0 *** +; DEFAULTCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; DEFAULTCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; DEFAULTCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; DEFAULTCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +entry: + %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3 + store i32 %v, ptr %arrayidx0 + %arrayidx1 = getelementptr inbounds i32, ptr %p, i32 2 + store i32 %v, ptr %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, ptr %p, i32 1 + store i32 %v, ptr %arrayidx2 + %arrayidx3 = getelementptr inbounds i32, ptr %p, i32 4 + store i32 %v, ptr %arrayidx3 + ret i32 %v +}