Skip to content

[RISCV] add load/store misched/PostRA subtarget features #149409

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -1700,6 +1700,18 @@ def TuneNLogNVRGather
def TunePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;

def TuneDisableMISchedLoadClustering : SubtargetFeature<"disable-misched-load-clustering",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think @topperc meant to mimic something like "bool RISCVSubtarget::useAA()" for example

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No I meant something we could set per-CPU like this.

"EnableMISchedLoadClustering", "false", "Disable load clustering in the machine scheduler">;

def TuneDisableMISchedStoreClustering : SubtargetFeature<"disable-misched-store-clustering",
"EnableMISchedStoreClustering", "false", "Disable store clustering in the machine scheduler">;

def TuneDisablePostMISchedLoadClustering : SubtargetFeature<"disable-postmisched-load-clustering",
"EnablePostMISchedLoadClustering", "false", "Disable PostRA load clustering in the machine scheduler">;

def TuneDisablePostMISchedStoreClustering : SubtargetFeature<"disable-postmisched-store-clustering",
"EnablePostMISchedStoreClustering", "false", "Disable PostRA store clustering in the machine scheduler">;

def TuneDisableLatencySchedHeuristic
: SubtargetFeature<"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVProcessors.td
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,9 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
FeatureStdExtZicboz,
FeatureVendorXVentanaCondOps],
[TuneVentanaVeyron,
TuneDisableMISchedLoadClustering,
TuneDisablePostMISchedLoadClustering,
TuneDisablePostMISchedStoreClustering,
TuneLUIADDIFusion,
TuneAUIPCADDIFusion,
TuneZExtHFusion,
Expand Down
25 changes: 10 additions & 15 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,6 @@ static cl::opt<bool>
cl::desc("Enable the loop data prefetch pass"),
cl::init(true));

static cl::opt<bool> EnableMISchedLoadStoreClustering(
"riscv-misched-load-store-clustering", cl::Hidden,
cl::desc("Enable load and store clustering in the machine scheduler"),
cl::init(true));

static cl::opt<bool> EnablePostMISchedLoadStoreClustering(
"riscv-postmisched-load-store-clustering", cl::Hidden,
cl::desc("Enable PostRA load and store clustering in the machine scheduler"),
cl::init(true));

static cl::opt<bool>
EnableVLOptimizer("riscv-enable-vl-optimizer",
cl::desc("Enable the RISC-V VL Optimizer pass"),
Expand Down Expand Up @@ -299,15 +289,17 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,

ScheduleDAGInstrs *
RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
ScheduleDAGMILive *DAG = createSchedLive(C);
if (EnableMISchedLoadStoreClustering) {

if (ST.enableMISchedLoadClustering())
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));

if (ST.enableMISchedStoreClustering())
DAG->addMutation(createStoreClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
}

const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
if (!DisableVectorMaskMutation && ST.hasVInstructions())
DAG->addMutation(createRISCVVectorMaskDAGMutation(DAG->TRI));

Expand All @@ -316,13 +308,16 @@ RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {

ScheduleDAGInstrs *
RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
ScheduleDAGMI *DAG = createSchedPostRA(C);
if (EnablePostMISchedLoadStoreClustering) {

if (ST.enablePostMISchedLoadClustering())
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));

if (ST.enablePostMISchedStoreClustering())
DAG->addMutation(createStoreClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
}

return DAG;
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/RISCV/features-info.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
; CHECK-NEXT: conditional-cmv-fusion - Enable branch+c.mv fusion.
; CHECK-NEXT: d - 'D' (Double-Precision Floating-Point).
; CHECK-NEXT: disable-latency-sched-heuristic - Disable latency scheduling heuristic.
; CHECK-NEXT: disable-misched-load-clustering - Disable load clustering in the machine scheduler.
; CHECK-NEXT: disable-misched-store-clustering - Disable store clustering in the machine scheduler.
; CHECK-NEXT: disable-postmisched-load-clustering - Disable PostRA load clustering in the machine scheduler.
; CHECK-NEXT: disable-postmisched-store-clustering - Disable PostRA store clustering in the machine scheduler.
; CHECK-NEXT: dlen-factor-2 - Vector unit DLEN(data path width) is half of VLEN.
; CHECK-NEXT: e - 'E' (Embedded Instruction Set with 16 GPRs).
; CHECK-NEXT: exact-asm - Enable Exact Assembly (Disables Compression and Relaxation).
Expand Down
47 changes: 44 additions & 3 deletions llvm/test/CodeGen/RISCV/misched-load-clustering.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,42 @@
; REQUIRES: asserts
; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-store-clustering=false \
;
; Disable all misched clustering
; RUN: llc -mtriple=riscv32 -verify-misched \
; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-store-clustering=false \
; RUN: llc -mtriple=riscv64 -verify-misched \
; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
;
; ST misched clustering only
; RUN: llc -mtriple=riscv32 -verify-misched \
; RUN: -mattr=+disable-misched-load-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=STCLUSTER %s
; RUN: llc -mtriple=riscv64 -verify-misched \
; RUN: -mattr=+disable-misched-load-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=STCLUSTER %s
;
; LD misched clustering only
; RUN: llc -mtriple=riscv32 -verify-misched \
; RUN: -mattr=+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
; RUN: llc -mtriple=riscv64 -verify-misched \
; RUN: -mattr=+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s

;
; Default misched cluster settings (i.e. both LD and ST clustering)
; RUN: llc -mtriple=riscv32 -verify-misched \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
; RUN: llc -mtriple=riscv64 -verify-misched \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s

define i32 @load_clustering_1(ptr nocapture %p) {
; NOCLUSTER: ********** MI Scheduling **********
Expand All @@ -22,13 +47,29 @@ define i32 @load_clustering_1(ptr nocapture %p) {
; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
;
; STCLUSTER: ********** MI Scheduling **********
; STCLUSTER-LABEL: load_clustering_1:%bb.0
; STCLUSTER: *** Final schedule for %bb.0 ***
; STCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
; STCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
; STCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
; STCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
;
; LDCLUSTER: ********** MI Scheduling **********
; LDCLUSTER-LABEL: load_clustering_1:%bb.0
; LDCLUSTER: *** Final schedule for %bb.0 ***
; LDCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
;
; DEFAULTCLUSTER: ********** MI Scheduling **********
; DEFAULTCLUSTER-LABEL: load_clustering_1:%bb.0
; DEFAULTCLUSTER: *** Final schedule for %bb.0 ***
; DEFAULTCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
; DEFAULTCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
; DEFAULTCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
; DEFAULTCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
entry:
%arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
%val0 = load i32, ptr %arrayidx0
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -verify-misched -enable-post-misched=false \
# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
# RUN: -mattr=+disable-postmisched-load-clustering \
# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOPOSTMISCHED %s
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
# RUN: -mattr=+disable-postmisched-load-clustering \
# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOCLUSTER %s
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
Expand Down
83 changes: 83 additions & 0 deletions llvm/test/CodeGen/RISCV/misched-store-clustering.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
; REQUIRES: asserts
;
; Disable all misched clustering
; RUN: llc -mtriple=riscv32 -verify-misched \
; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
; RUN: llc -mtriple=riscv64 -verify-misched \
; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
;
; ST misched clustering only
; RUN: llc -mtriple=riscv32 -verify-misched \
; RUN: -mattr=+disable-misched-load-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=STCLUSTER %s
; RUN: llc -mtriple=riscv64 -verify-misched \
; RUN: -mattr=+disable-misched-load-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=STCLUSTER %s
;
; LD misched clustering only
; RUN: llc -mtriple=riscv32 -verify-misched \
; RUN: -mattr=+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
; RUN: llc -mtriple=riscv64 -verify-misched \
; RUN: -mattr=+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
;
; Default misched cluster settings (i.e. both LD and ST clustering)
; RUN: llc -mtriple=riscv32 -verify-misched \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
; RUN: llc -mtriple=riscv64 -verify-misched \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s

define i32 @store_clustering_1(ptr nocapture %p, i32 %v) {
; NOCLUSTER: ********** MI Scheduling **********
; NOCLUSTER-LABEL: store_clustering_1:%bb.0
; NOCLUSTER: *** Final schedule for %bb.0 ***
; NOCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
; NOCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
; NOCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
; NOCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
;
; STCLUSTER: ********** MI Scheduling **********
; STCLUSTER-LABEL: store_clustering_1:%bb.0
; STCLUSTER: *** Final schedule for %bb.0 ***
; STCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
; STCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
; STCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
; STCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
;
; LDCLUSTER: ********** MI Scheduling **********
; LDCLUSTER-LABEL: store_clustering_1:%bb.0
; LDCLUSTER: *** Final schedule for %bb.0 ***
; LDCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
; LDCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
; LDCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
; LDCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
;
; DEFAULTCLUSTER: ********** MI Scheduling **********
; DEFAULTCLUSTER-LABEL: store_clustering_1:%bb.0
; DEFAULTCLUSTER: *** Final schedule for %bb.0 ***
; DEFAULTCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
; DEFAULTCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
; DEFAULTCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
; DEFAULTCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
entry:
%arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
store i32 %v, ptr %arrayidx0
%arrayidx1 = getelementptr inbounds i32, ptr %p, i32 2
store i32 %v, ptr %arrayidx1
%arrayidx2 = getelementptr inbounds i32, ptr %p, i32 1
store i32 %v, ptr %arrayidx2
%arrayidx3 = getelementptr inbounds i32, ptr %p, i32 4
store i32 %v, ptr %arrayidx3
ret i32 %v
}