Skip to content

Commit 234ec1a

Browse files
committed
Clean up coalescing pass
1 parent 1313bbf commit 234ec1a

File tree

1 file changed

+17
-82
lines changed

1 file changed

+17
-82
lines changed

llvm/lib/Target/NVPTX/NVPTXMemOpts.cpp

Lines changed: 17 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ std::string NVPTXMemOpts::NVVM_READ_SREG_INTRINSIC_NAME = "llvm.nvvm.read.ptx.sr
6969

7070
// A common pattern to calculate the abosolute index of a thread is:
7171
// idx = tid + ctaid * ntid
72-
// This function will check if the index is calculated in this way
72+
// This function will check if an index is calculated in this way
7373
bool isAbsoluteThreadIndex(Value *idx) {
7474
auto sext = dyn_cast<SExtInst>(idx);
7575
if (!sext) { return false; }
@@ -114,8 +114,9 @@ void getIndexValues(GetElementPtrInst *GEP, std::vector<NVPTXMemOpts::IndexType>
114114
return;
115115
}
116116

117-
// Return dimension's indexes for an array load instruction
118-
// return 0 if the value is not an array
117+
// This function will check if the load instruction is loading from an array
118+
// If it is, it will return the index value types used to access the array
119+
// If not, it will return an empty vector
119120
std::vector<NVPTXMemOpts::IndexType> NVPTXMemOpts::isLoadingFromArray(LoadInst *LI) {
120121

121122
std::vector<NVPTXMemOpts::IndexType> indexValues;
@@ -130,103 +131,43 @@ std::vector<NVPTXMemOpts::IndexType> NVPTXMemOpts::isLoadingFromArray(LoadInst *
130131
// get index value. There should be exactly one
131132
auto idx = GEP->idx_begin();
132133
assert(idx != GEP->idx_end() && "No index found");
133-
134-
// TODO:: if more than one index, it is probably coalesced already
135-
// if (++idx != GEP->idx_end()) {
136-
// return indexValues;
137-
// }
138134

139135
getIndexValues(GEP, indexValues);
140136
return indexValues;
141137
}
142138

143-
int isStoringToArray(StoreInst *SI) {
144-
assert(SI && "SI is null");
145-
auto GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand());
146-
if (!GEP) { return 0; }
147-
148-
return 0;
149-
}
150-
151-
// Check if the index is a constant
152-
bool isIndexConstant(Value *idx) {
153-
return isa<ConstantInt>(idx);
154-
}
155-
156-
// Check if the index is a thread constant.
157-
// ie. the thread id. this is not a constant for all threads in a warp
158-
bool isIndexThreadConstant(Value *idx) {
159-
return false;
160-
}
161-
139+
/*
140+
Rules regarding coalescing:
141+
- if the index is a constant for all threads in a warp, it cannot be coalesced
142+
- if the index is a constant for one thread but contiguous across a warp, it can be coalesced
143+
- if the index is a loop induction variable, it can be coalesced
162144
145+
Other memory accesses will be ignored for now
146+
*/
163147
bool NVPTXMemOpts::isCallCoalescable(LoadInst *LI, std::vector<IndexType> &indexValues) {
164-
// Check if the call is already coalesced
165-
// We can do this by seeing if the call is already a load from shared memory
166-
// If it is, we can skip this call
167148
auto GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());
168149
assert(GEP && "GEP is null");
169150
auto ptr = GEP->getPointerOperand();
170151
auto ptrGEP = dyn_cast<GetElementPtrInst>(ptr);
171152
assert(!ptrGEP && "Nested GEP not supported");
172153

173-
// check if the loaded float is being used by a store into shared memory (addressspace 3)
174-
// if it is, we can skip this call
175-
176-
// check if the gep is loading from global memory
154+
// We only consider loads from global memory. Filters out already coalesced loads
177155
if (GEP->getPointerOperand()->getType()->getPointerAddressSpace() != 1) {
178156
return false;
179157
}
180158

159+
// If the load is being stored to shared memory, it cannot be coalesced
160+
// It is probably already coalesced
181161
auto storeInst = dyn_cast<StoreInst>(LI->user_back());
182162
if (storeInst && storeInst->getPointerAddressSpace() == 3) {
183163
return false;
184164
}
185165

186-
// TODO:: otherwise, for now, we will assume that the call is coalescable
166+
// TODO:: there will be other considerations
167+
// otherwise, we assume the call is coalescable
187168
return true;
188169
}
189170

190-
/*
191-
This function will coalesce memory calls.
192-
Example:
193-
194-
%0 = call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
195-
%1 = call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
196-
%mul = mul i32 %0, %1
197-
%2 = call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x()
198-
%add = add i32 %mul, %2
199-
%3 = call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
200-
%4 = call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
201-
%mul5 = mul i32 %3, %4
202-
%5 = call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.y()
203-
%add7 = add i32 %mul5, %5
204-
%idxprom = sext i32 %add7 to i64
205-
%arrayidx = getelementptr inbounds ptr, ptr %A2, i64 %idxprom
206-
%6 = load ptr, ptr %arrayidx, align 8
207-
%idxprom8 = sext i32 %i.0 to i64
208-
%arrayidx9 = getelementptr inbounds float, ptr %6, i64 %idxprom8
209-
%7 = load float, ptr %arrayidx9, align 4
210-
211-
Will give the following parameters:
212-
LI = %6
213-
indexValues = { %add7, %add5, %add, %mul, %add7, %add, %mul }
214-
215-
Will be coalesced to:
216-
217-
218-
219-
220-
*/
221-
222-
/*
223-
Rules regarding coalescing:
224-
- if the index is a constant for all threads in a warp, it cannot be coalesced
225-
- if the index is a constant for one thread but contiguous across a warp, it can be coalesced
226-
- if the index is a loop induction variable, it can be coalesced
227-
228-
Other memory accesses will be ignored for now
229-
*/
230171
void NVPTXMemOpts::CoalesceMemCalls(LoadInst *LI, std::vector<IndexType> &indexValues) {
231172
assert (LI && "LI is null");
232173
assert (indexValues.size() > 0 && "indexValues is empty");
@@ -278,7 +219,6 @@ void NVPTXMemOpts::CoalesceMemCalls(LoadInst *LI, std::vector<IndexType> &indexV
278219
Builder.SetInsertPoint(LI);
279220
auto SharedLoad = Builder.CreateLoad(GEP->getSourceElementType(), SharedGEP);
280221
LI->replaceAllUsesWith(SharedLoad);
281-
// LI->eraseFromParent();
282222

283223
}
284224

@@ -299,14 +239,9 @@ bool NVPTXMemOpts::runOnFunction(Function &F) {
299239
toDelete.push_back(LI);
300240
}
301241
}
302-
if (auto *SI = dyn_cast<StoreInst>(&*I)) {
303-
if (isStoringToArray(SI) > 0) {
304-
errs() << "Found a store instruction: " << *SI << "\n";
305-
}
306-
}
307242
}
308243
for (auto LI : toDelete) {
309-
// asser that the LI has no uses
244+
// assert that the LI has no uses
310245
assert(LI->use_empty());
311246
LI->eraseFromParent();
312247
}

0 commit comments

Comments
 (0)