Skip to content

Commit e124649

Browse files
[AIEX] Add missing undef flags in copy expansion
This will handle properly use of non-dominating definitions. We also change the handling of the destination registers in two parts: *Copy expansion: we replace the ogininal index by the index of the first lane copy to avoid the creation LRs with just one instruction, in this way we keep que LI correct. *Rewrite: reset dead flags if necessary. In the test, MachineVerifier is still off, because some lanes redefinition are consirered full register redefinition, causing some non-accurate expactations aroud dead flags. The affected test is a extreme corner case where individual lanes are handled apart of original 2D register. Co-Authored-By: Krishnam Tibrewala <[email protected]>
1 parent f3147fa commit e124649

File tree

2 files changed

+113
-12
lines changed

2 files changed

+113
-12
lines changed

llvm/lib/Target/AIE/AIESuperRegUtils.cpp

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,7 @@ void rewriteFullCopy(MachineInstr &MI, const std::set<int> &CopySubRegs,
103103
Register SrcReg = MI.getOperand(1).getReg();
104104
LaneBitmask LiveSrcLanes = getLiveLanesAt(CopyIndex, SrcReg, LIS);
105105

106-
LIS.removeVRegDefAt(LIS.getInterval(DstReg), CopyIndex.getRegSlot());
107-
106+
MachineInstr *FirstMI = nullptr;
108107
SmallSet<Register, 8> RegistersToRepair;
109108
for (int SubRegIdx : CopySubRegs) {
110109
if ((LiveSrcLanes & TRI.getSubRegIndexLaneMask(SubRegIdx)).none()) {
@@ -118,13 +117,29 @@ void rewriteFullCopy(MachineInstr &MI, const std::set<int> &CopySubRegs,
118117
.addReg(DstReg, RegState::Define, SubRegIdx)
119118
.addReg(SrcReg, 0, SubRegIdx)
120119
.getInstr();
120+
121+
// We set to undef, otherwise, MachineVerifier will consider that we are
122+
// also reading the other lanes and, if this register is not used in a loop,
123+
// it will cause dominance problems (it will be a live out for all pred
124+
// blocks).
125+
PartCopy->getOperand(0).setIsUndef();
121126
LLVM_DEBUG(dbgs() << " to " << *PartCopy);
122127
LIS.InsertMachineInstrInMaps(*PartCopy);
123-
LIS.getInterval(PartCopy->getOperand(0).getReg());
128+
// We need to repair only the Src register. For the Dst register,
129+
// we don't need to do anything explicit, because we will replace the
130+
// original copy by the first lane copy in LIS. We avoid the explicit repair
131+
// of Dst reg because LIS will create a exclusive range for each copy,
132+
// because it considers that every sub-lane copy will make the preceding
133+
// one dead, what is not true for composite registers.
134+
// TODO: investigate why subregister liveness is being ignored by LIS
135+
// at this point.
124136
RegistersToRepair.insert(PartCopy->getOperand(1).getReg());
137+
FirstMI = FirstMI ? FirstMI : PartCopy;
125138
}
126139

127-
LIS.RemoveMachineInstrFromMaps(MI);
140+
// Replace the original copy by the first one, so we automatically repair
141+
// DstReg's LI.
142+
LIS.ReplaceMachineInstrInMaps(MI, *FirstMI);
128143
MI.eraseFromParent();
129144
// As we don't handle all registers now (selective LI filter),
130145
// We should make sure that all LiveIntervals are correct.
@@ -190,8 +205,17 @@ void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
190205

191206
// There might have been a write-undefined due to only writing one sub-lane.
192207
// Now that each sub-lane has its own VReg, the qualifier is invalid.
193-
if (RegOp.isDef())
208+
if (RegOp.isDef()) {
194209
RegOp.setIsUndef(false);
210+
// Also unset correctly the dead flag if the instruction
211+
// is not the dead slot if the live range (the def is still alive).
212+
LiveInterval &LI = LIS.getInterval(Reg);
213+
MachineInstr *DefMI = RegOp.getParent();
214+
SlotIndex Def = LIS.getInstructionIndex(*DefMI);
215+
LiveRange::iterator I = LI.FindSegmentContaining(Def);
216+
if (I->end != Def.getDeadSlot())
217+
RegOp.setIsDead(false);
218+
}
195219

196220
// Make sure the right reg class is applied, some MIs might use compound
197221
// classes with both 20 and 32 bits registers.

llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-check-undef.mir

Lines changed: 84 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
12

23
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
34
# See https://llvm.org/LICENSE.txt for license information.
45
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
56
#
67
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
78
#
8-
# RUN: not --crash llc -O2 -mtriple=aie2p -start-before=greedy \
9-
# RUN: -stop-before=aie-unallocated-superreg-rewrite -o /dev/null %s 2>&1 | FileCheck %s
9+
# RUN: llc -O2 -mtriple=aie2p -start-before=greedy \
10+
# RUN: -stop-before=aie-unallocated-superreg-rewrite %s -o - | FileCheck %s
1011

1112
# The goal of this test is to check if we properly insert undef flag on the def side
1213
# of a expanded full copy. On a sub-register def operand, it refers to the part of the
@@ -15,15 +16,91 @@
1516
# force the related register to be inserted in liveout set of the predecessors block,
1617
# causing dominance problems.
1718

18-
# CHECK: LLVM ERROR: Found 1 machine code errors
19-
2019
---
2120
name: use_all_2d_regs
2221
tracksRegLiveness: true
2322
body: |
23+
; CHECK-LABEL: name: use_all_2d_regs
24+
; CHECK: bb.0:
25+
; CHECK-NEXT: successors: %bb.1(0x80000000)
26+
; CHECK-NEXT: {{ $}}
27+
; CHECK-NEXT: undef [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_stride:ed = MOV_PD_imm11_pseudo 1
28+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_mod:ed = MOV_PD_imm11_pseudo 0
29+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edn = MOV_PD_imm11_pseudo -1
30+
; CHECK-NEXT: [[COPY:%[0-9]+]]:em = COPY [[MOV_PD_imm11_pseudo]].sub_dim_stride
31+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:edj = COPY [[MOV_PD_imm11_pseudo]].sub_mod
32+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
33+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
34+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
35+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
36+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
37+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_size:ed = COPY [[MOV_PD_imm11_pseudo]].sub_mod
38+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
39+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
40+
; CHECK-NEXT: [[COPY9:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
41+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
42+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
43+
; CHECK-NEXT: [[COPY12:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
44+
; CHECK-NEXT: [[COPY13:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
45+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_count:ed = COPY [[MOV_PD_imm11_pseudo]].sub_mod
46+
; CHECK-NEXT: [[COPY14:%[0-9]+]]:ed = COPY [[MOV_PD_imm11_pseudo]]
47+
; CHECK-NEXT: [[COPY15:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: bb.1:
50+
; CHECK-NEXT: successors: %bb.1(0x80000000)
51+
; CHECK-NEXT: {{ $}}
52+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
53+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
54+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
55+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
56+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo2:%[0-9]+]]:ep, [[COPY7:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo2]], [[COPY]], [[MOV_PD_imm11_pseudo1]], [[COPY1]], [[COPY7]]
57+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
58+
; CHECK-NEXT: [[COPY16:%[0-9]+]]:em = COPY [[COPY]]
59+
; CHECK-NEXT: [[COPY17:%[0-9]+]]:edj = COPY [[COPY1]]
60+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep, [[COPY10:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo3]], [[COPY16]], [[COPY2]], [[COPY17]], [[COPY10]]
61+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo7:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
62+
; CHECK-NEXT: [[COPY18:%[0-9]+]]:em = COPY [[COPY]]
63+
; CHECK-NEXT: [[COPY19:%[0-9]+]]:edj = COPY [[COPY1]]
64+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep, [[COPY8:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo4]], [[COPY18]], [[COPY3]], [[COPY19]], [[COPY8]]
65+
; CHECK-NEXT: [[COPY20:%[0-9]+]]:em = COPY [[COPY]]
66+
; CHECK-NEXT: [[COPY21:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo1]]
67+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
68+
; CHECK-NEXT: [[COPY22:%[0-9]+]]:edj = COPY [[COPY1]]
69+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep, [[COPY11:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo5]], [[COPY20]], [[COPY21]], [[COPY22]], [[COPY11]]
70+
; CHECK-NEXT: [[COPY23:%[0-9]+]]:em = COPY [[COPY]]
71+
; CHECK-NEXT: [[COPY24:%[0-9]+]]:edj = COPY [[COPY1]]
72+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep, [[COPY9:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo6]], [[COPY23]], [[COPY4]], [[COPY24]], [[COPY9]]
73+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
74+
; CHECK-NEXT: [[COPY25:%[0-9]+]]:em = COPY [[COPY]]
75+
; CHECK-NEXT: [[COPY26:%[0-9]+]]:edj = COPY [[COPY1]]
76+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo7:%[0-9]+]]:ep, [[COPY12:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo7]], [[COPY25]], [[COPY5]], [[COPY26]], [[COPY12]]
77+
; CHECK-NEXT: undef [[COPY27:%[0-9]+]].sub_dim_count:ed = COPY [[COPY10]] {
78+
; CHECK-NEXT: internal [[COPY27]].sub_dim_size:ed = COPY [[COPY2]]
79+
; CHECK-NEXT: }
80+
; CHECK-NEXT: [[COPY28:%[0-9]+]]:edc = COPY [[COPY14]].sub_dim_count
81+
; CHECK-NEXT: [[COPY29:%[0-9]+]]:edn = COPY [[COPY14]].sub_dim_size
82+
; CHECK-NEXT: [[COPY30:%[0-9]+]]:edj = COPY [[COPY14]].sub_dim_stride
83+
; CHECK-NEXT: [[COPY31:%[0-9]+]]:em = COPY [[COPY14]].sub_mod
84+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep, [[COPY28:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo9]], [[COPY31]], [[COPY29]], [[COPY30]], [[COPY28]]
85+
; CHECK-NEXT: [[COPY32:%[0-9]+]]:em = COPY [[COPY]]
86+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
87+
; CHECK-NEXT: [[COPY33:%[0-9]+]]:edj = COPY [[COPY1]]
88+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep, [[COPY13:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo8]], [[COPY32]], [[COPY6]], [[COPY33]], [[COPY13]]
89+
; CHECK-NEXT: [[COPY34:%[0-9]+]]:edn = COPY [[COPY31]]
90+
; CHECK-NEXT: undef [[COPY14:%[0-9]+]].sub_dim_count:ed = COPY [[COPY28]]
91+
; CHECK-NEXT: undef [[COPY14:%[0-9]+]].sub_dim_size:ed = COPY [[COPY29]]
92+
; CHECK-NEXT: undef [[COPY14:%[0-9]+]].sub_dim_stride:ed = COPY [[COPY30]]
93+
; CHECK-NEXT: undef [[COPY14:%[0-9]+]].sub_mod:ed = COPY [[COPY31]]
94+
; CHECK-NEXT: [[COPY35:%[0-9]+]]:em = COPY [[COPY31]]
95+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:edc = COPY [[COPY27]].sub_dim_count {
96+
; CHECK-NEXT: internal [[COPY2]]:edn = COPY [[COPY27]].sub_dim_size
97+
; CHECK-NEXT: }
98+
; CHECK-NEXT: [[COPY36:%[0-9]+]]:edj = COPY [[COPY1]]
99+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep, [[COPY15:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo10]], [[COPY35]], [[COPY34]], [[COPY36]], [[COPY15]]
100+
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
24101
bb.0:
25102
successors: %bb.1(0x80000000)
26-
103+
27104
undef %80.sub_dim_stride:ed = MOV_PD_imm11_pseudo 1
28105
%80.sub_mod:ed = MOV_PD_imm11_pseudo 0
29106
undef %105.sub_dim_size:ed = MOV_PD_imm11_pseudo -1
@@ -44,10 +121,10 @@ body: |
44121
%82.sub_dim_count:ed = COPY %80.sub_mod
45122
%80.sub_dim_count:ed = COPY %80.sub_mod
46123
undef %77.sub_dim_count:ed = COPY %80.sub_mod
47-
124+
48125
bb.1:
49126
successors: %bb.1(0x80000000)
50-
127+
51128
%10:ep = MOV_PD_imm11_pseudo 0
52129
%18:ep = MOV_PD_imm11_pseudo 0
53130
%22:ep = MOV_PD_imm11_pseudo 0

0 commit comments

Comments
 (0)