From c811c97f3587274aaf2078b168d5331248ae399e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 23 Jan 2025 18:24:20 +0000 Subject: [PATCH] [TailDup] Allow large number of predecessors/successors without phis. (#116072) This adjusts the threshold logic added in #78582 to only trigger for cases where there are actually phis to duplicate in either TailBB or in one of the successors. In cases there are no phis, we only have to pay the cost of extra edges, but have no explosion in PHI related instructions. This improves performance of Python on some inputs by 2-3% on Apple Silicon CPUs. PR: https://github.com/llvm/llvm-project/pull/116072 --- llvm/lib/CodeGen/TailDuplicator.cpp | 23 +- .../CodeGen/X86/tail-dup-pred-succ-size.mir | 501 ++++++++++++++++++ 2 files changed, 516 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index c5fa4e6211a63..9222e61b99026 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -574,14 +574,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; - // Duplicating a BB which has both multiple predecessors and successors will - // result in a complex CFG and also may cause huge amount of PHI nodes. If we - // want to remove this limitation, we have to address - // https://github.com/llvm/llvm-project/issues/78578. - if (TailBB.pred_size() > TailDupPredSize && - TailBB.succ_size() > TailDupSuccSize) - return false; - // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. @@ -621,6 +613,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; + unsigned NumPhis = 0; for (MachineInstr &MI : TailBB) { // Non-duplicable things shouldn't be tail-duplicated. // CFI instructions are marked as non-duplicable, because Darwin compact @@ -664,6 +657,20 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (InstrCount > MaxDuplicateCount) return false; + NumPhis += MI.isPHI(); + } + + // Duplicating a BB which has both multiple predecessors and successors will + // may cause huge amount of PHI nodes. If we want to remove this limitation, + // we have to address https://github.com/llvm/llvm-project/issues/78578. + if (TailBB.pred_size() > TailDupPredSize && + TailBB.succ_size() > TailDupSuccSize) { + // If TailBB or any of its successors contains a phi, we may have to add a + // large number of additional phis with additional incoming values. + if (NumPhis != 0 || any_of(TailBB.successors(), [](MachineBasicBlock *MBB) { + return any_of(*MBB, [](MachineInstr &MI) { return MI.isPHI(); }); + })) + return false; } // Check if any of the successors of TailBB has a PHI node in which the diff --git a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir index 67f8cc72e0d72..2f1ff76fda76c 100644 --- a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir +++ b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir @@ -258,3 +258,504 @@ body: | RET 0, $eax ... +# Based on foo, but with a phi node in a successor of %bb.7 instead of %bb.7. +--- +name: foo_phi_in_tailbb_successor +tracksRegLiveness: true +jumpTable: + kind: block-address + entries: + - id: 0 + blocks: [ '%bb.2', '%bb.3', '%bb.4', '%bb.5', '%bb.9' ] + - id: 1 + blocks: [ '%bb.9', '%bb.10', '%bb.11', '%bb.12' ] +body: | + ; LIMIT-LABEL: name: foo_phi_in_tailbb_successor + ; LIMIT: bb.0: + ; LIMIT-NEXT: successors: %bb.2(0x1999999a), %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.9(0x1999999a) + ; LIMIT-NEXT: liveins: $rdi, $esi + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; LIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; LIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.2: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.3: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.4: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.5: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.6: + ; LIMIT-NEXT: successors: + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.7: + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.9: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[AND32ri]], %bb.0, [[AND32ri1]], %bb.7 + ; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.10: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.11: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.12: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.13: + ; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri7]], %bb.12, [[SHR32ri6]], %bb.11, [[SHR32ri5]], %bb.10, [[MOV32rm4]], %bb.9 + ; LIMIT-NEXT: [[OR32ri:%[0-9]+]]:gr32 = OR32ri [[PHI]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: $eax = COPY [[OR32ri]] + ; LIMIT-NEXT: RET 0, $eax + ; + ; NOLIMIT-LABEL: name: foo_phi_in_tailbb_successor + ; NOLIMIT: bb.0: + ; NOLIMIT-NEXT: successors: %bb.2(0x1999999a), %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.9(0x1999999a) + ; NOLIMIT-NEXT: liveins: $rdi, $esi + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; NOLIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; NOLIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.2: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.3: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.4: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.5: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.6: + ; NOLIMIT-NEXT: successors: + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.9: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[AND32ri]], %bb.0, [[AND32ri1]], %bb.2, [[AND32ri2]], %bb.3, [[AND32ri3]], %bb.4, [[AND32ri4]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.10: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.11: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.12: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.13: + ; NOLIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9 + ; NOLIMIT-NEXT: [[OR32ri:%[0-9]+]]:gr32 = OR32ri [[PHI]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: $eax = COPY [[OR32ri]] + ; NOLIMIT-NEXT: RET 0, $eax + bb.0: + liveins: $rdi, $esi + + %11:gr32 = COPY $esi + %10:gr64 = COPY $rdi + %13:gr32 = SHR32ri %11, 1, implicit-def dead $eflags + %14:gr32 = AND32ri %13, 7, implicit-def dead $eflags + %12:gr64_nosp = SUBREG_TO_REG 0, %14, %subreg.sub_32bit + + bb.1: + successors: %bb.2, %bb.3, %bb.4, %bb.5, %bb.9 + + JMP64m $noreg, 8, %12, %jump-table.0, $noreg + + bb.2: + %0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + JMP_1 %bb.7 + + bb.3: + %17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %1:gr32 = SHR32ri %17, 1, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.4: + %16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %2:gr32 = SHR32ri %16, 2, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.5: + %15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %3:gr32 = SHR32ri %15, 3, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.6: + successors: + + bb.7: + %19:gr32 = SHR32ri %11, 2, implicit-def dead $eflags + %20:gr32 = AND32ri %19, 7, implicit-def dead $eflags + %18:gr64_nosp = SUBREG_TO_REG 0, %20, %subreg.sub_32bit + + bb.8: + successors: %bb.9, %bb.10, %bb.11, %bb.12 + + JMP64m $noreg, 8, %18, %jump-table.1, $noreg + + bb.9: + %9:gr32 = PHI %14, %bb.1, %20, %bb.8 + %5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + JMP_1 %bb.13 + + bb.10: + %23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %6:gr32 = SHR32ri %23, 1, implicit-def dead $eflags + JMP_1 %bb.13 + + bb.11: + %22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %7:gr32 = SHR32ri %22, 2, implicit-def dead $eflags + JMP_1 %bb.13 + + bb.12: + %21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %8:gr32 = SHR32ri %21, 6, implicit-def dead $eflags + + bb.13: + %9:gr32 = PHI %8, %bb.12, %7, %bb.11, %6, %bb.10, %5, %bb.9 + %24:gr32 = OR32ri %9, 1, implicit-def dead $eflags + $eax = COPY %24 + RET 0, $eax + +... + + +# Based on foo, but without any phi nodes. +--- +name: foo_no_phis +tracksRegLiveness: true +jumpTable: + kind: block-address + entries: + - id: 0 + blocks: [ '%bb.2', '%bb.3', '%bb.4', '%bb.5' ] + - id: 1 + blocks: [ '%bb.9', '%bb.10', '%bb.11', '%bb.12' ] +body: | + ; LIMIT-LABEL: name: foo_no_phis + ; LIMIT: bb.0: + ; LIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) + ; LIMIT-NEXT: liveins: $rdi, $esi + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; LIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; LIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.2: + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.3: + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.4: + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.5: + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.6: + ; LIMIT-NEXT: successors: + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.9: + ; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[MOV32rm4]] :: (store (s32)) + ; LIMIT-NEXT: $eax = COPY [[MOV32rm4]] + ; LIMIT-NEXT: RET 0, $eax + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.10: + ; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri8]] :: (store (s32)) + ; LIMIT-NEXT: $eax = COPY [[SHR32ri8]] + ; LIMIT-NEXT: RET 0, $eax + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.11: + ; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri9]] :: (store (s32)) + ; LIMIT-NEXT: $eax = COPY [[SHR32ri9]] + ; LIMIT-NEXT: RET 0, $eax + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.12: + ; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri10]] :: (store (s32)) + ; LIMIT-NEXT: $eax = COPY [[SHR32ri10]] + ; LIMIT-NEXT: RET 0, $eax + ; + ; NOLIMIT-LABEL: name: foo_no_phis + ; NOLIMIT: bb.0: + ; NOLIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) + ; NOLIMIT-NEXT: liveins: $rdi, $esi + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; NOLIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; NOLIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.2: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.3: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.4: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.5: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.6: + ; NOLIMIT-NEXT: successors: + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.9: + ; NOLIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[MOV32rm4]] :: (store (s32)) + ; NOLIMIT-NEXT: $eax = COPY [[MOV32rm4]] + ; NOLIMIT-NEXT: RET 0, $eax + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.10: + ; NOLIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri8]] :: (store (s32)) + ; NOLIMIT-NEXT: $eax = COPY [[SHR32ri8]] + ; NOLIMIT-NEXT: RET 0, $eax + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.11: + ; NOLIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri9]] :: (store (s32)) + ; NOLIMIT-NEXT: $eax = COPY [[SHR32ri9]] + ; NOLIMIT-NEXT: RET 0, $eax + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.12: + ; NOLIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; NOLIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri10]] :: (store (s32)) + ; NOLIMIT-NEXT: $eax = COPY [[SHR32ri10]] + ; NOLIMIT-NEXT: RET 0, $eax + bb.0: + liveins: $rdi, $esi + + %11:gr32 = COPY $esi + %10:gr64 = COPY $rdi + %13:gr32 = SHR32ri %11, 1, implicit-def dead $eflags + %14:gr32 = AND32ri %13, 7, implicit-def dead $eflags + %12:gr64_nosp = SUBREG_TO_REG 0, killed %14, %subreg.sub_32bit + + bb.1: + successors: %bb.2, %bb.3, %bb.4, %bb.5 + + JMP64m $noreg, 8, %12, %jump-table.0, $noreg + + bb.2: + %0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + JMP_1 %bb.7 + + bb.3: + %17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %1:gr32 = SHR32ri %17, 1, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.4: + %16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %2:gr32 = SHR32ri %16, 2, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.5: + %15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %3:gr32 = SHR32ri %15, 3, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.6: + successors: + + bb.7: + %19:gr32 = SHR32ri %11, 2, implicit-def dead $eflags + %20:gr32 = AND32ri %19, 7, implicit-def dead $eflags + %18:gr64_nosp = SUBREG_TO_REG 0, killed %20, %subreg.sub_32bit + + bb.8: + successors: %bb.9, %bb.10, %bb.11, %bb.12 + + JMP64m $noreg, 8, %18, %jump-table.1, $noreg + + bb.9: + %5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + MOV32mr %10, 1, $noreg, 0, $noreg, %5 :: (store (s32)) + $eax = COPY %5 + RET 0, $eax + + bb.10: + %23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %6:gr32 = SHR32ri %23, 1, implicit-def dead $eflags + MOV32mr %10, 1, $noreg, 0, $noreg, %6 :: (store (s32)) + $eax = COPY %6 + RET 0, $eax + + bb.11: + %22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %7:gr32 = SHR32ri %22, 2, implicit-def dead $eflags + MOV32mr %10, 1, $noreg, 0, $noreg, %7 :: (store (s32)) + $eax = COPY %7 + RET 0, $eax + + bb.12: + %21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %8:gr32 = SHR32ri %21, 6, implicit-def dead $eflags + MOV32mr %10, 1, $noreg, 0, $noreg, %8 :: (store (s32)) + $eax = COPY %8 + RET 0, $eax +...