@@ -69,7 +69,7 @@ std::string NVPTXMemOpts::NVVM_READ_SREG_INTRINSIC_NAME = "llvm.nvvm.read.ptx.sr
6969
7070// A common pattern to calculate the abosolute index of a thread is:
7171// idx = tid + ctaid * ntid
72- // This function will check if the index is calculated in this way
72+ // This function will check if an index is calculated in this way
7373bool isAbsoluteThreadIndex (Value *idx) {
7474 auto sext = dyn_cast<SExtInst>(idx);
7575 if (!sext) { return false ; }
@@ -114,8 +114,9 @@ void getIndexValues(GetElementPtrInst *GEP, std::vector<NVPTXMemOpts::IndexType>
114114 return ;
115115}
116116
117- // Return dimension's indexes for an array load instruction
118- // return 0 if the value is not an array
117+ // This function will check if the load instruction is loading from an array
118+ // If it is, it will return the index value types used to access the array
119+ // If not, it will return an empty vector
119120std::vector<NVPTXMemOpts::IndexType> NVPTXMemOpts::isLoadingFromArray (LoadInst *LI) {
120121
121122 std::vector<NVPTXMemOpts::IndexType> indexValues;
@@ -130,103 +131,43 @@ std::vector<NVPTXMemOpts::IndexType> NVPTXMemOpts::isLoadingFromArray(LoadInst *
130131 // get index value. There should be exactly one
131132 auto idx = GEP->idx_begin ();
132133 assert (idx != GEP->idx_end () && " No index found" );
133-
134- // TODO:: if more than one index, it is probably coalesced already
135- // if (++idx != GEP->idx_end()) {
136- // return indexValues;
137- // }
138134
139135 getIndexValues (GEP, indexValues);
140136 return indexValues;
141137}
142138
143- int isStoringToArray (StoreInst *SI) {
144- assert (SI && " SI is null" );
145- auto GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand ());
146- if (!GEP) { return 0 ; }
147-
148- return 0 ;
149- }
150-
151- // Check if the index is a constant
152- bool isIndexConstant (Value *idx) {
153- return isa<ConstantInt>(idx);
154- }
155-
156- // Check if the index is a thread constant.
157- // ie. the thread id. this is not a constant for all threads in a warp
158- bool isIndexThreadConstant (Value *idx) {
159- return false ;
160- }
161-
139+ /*
140+ Rules regarding coalescing:
141+ - if the index is a constant for all threads in a warp, it cannot be coalesced
142+ - if the index is a constant for one thread but contiguous across a warp, it can be coalesced
143+ - if the index is a loop induction variable, it can be coalesced
162144
145+ Other memory accesses will be ignored for now
146+ */
163147bool NVPTXMemOpts::isCallCoalescable (LoadInst *LI, std::vector<IndexType> &indexValues) {
164- // Check if the call is already coalesced
165- // We can do this by seeing if the call is already a load from shared memory
166- // If it is, we can skip this call
167148 auto GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand ());
168149 assert (GEP && " GEP is null" );
169150 auto ptr = GEP->getPointerOperand ();
170151 auto ptrGEP = dyn_cast<GetElementPtrInst>(ptr);
171152 assert (!ptrGEP && " Nested GEP not supported" );
172153
173- // check if the loaded float is being used by a store into shared memory (addressspace 3)
174- // if it is, we can skip this call
175-
176- // check if the gep is loading from global memory
154+ // We only consider loads from global memory. Filters out already coalesced loads
177155 if (GEP->getPointerOperand ()->getType ()->getPointerAddressSpace () != 1 ) {
178156 return false ;
179157 }
180158
159+ // If the load is being stored to shared memory, it cannot be coalesced
160+ // It is probably already coalesced
181161 auto storeInst = dyn_cast<StoreInst>(LI->user_back ());
182162 if (storeInst && storeInst->getPointerAddressSpace () == 3 ) {
183163 return false ;
184164 }
185165
186- // TODO:: otherwise, for now, we will assume that the call is coalescable
166+ // TODO:: there will be other considerations
167+ // otherwise, we assume the call is coalescable
187168 return true ;
188169}
189170
190- /*
191- This function will coalesce memory calls.
192- Example:
193-
194- %0 = call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
195- %1 = call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
196- %mul = mul i32 %0, %1
197- %2 = call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x()
198- %add = add i32 %mul, %2
199- %3 = call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
200- %4 = call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
201- %mul5 = mul i32 %3, %4
202- %5 = call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.y()
203- %add7 = add i32 %mul5, %5
204- %idxprom = sext i32 %add7 to i64
205- %arrayidx = getelementptr inbounds ptr, ptr %A2, i64 %idxprom
206- %6 = load ptr, ptr %arrayidx, align 8
207- %idxprom8 = sext i32 %i.0 to i64
208- %arrayidx9 = getelementptr inbounds float, ptr %6, i64 %idxprom8
209- %7 = load float, ptr %arrayidx9, align 4
210-
211- Will give the following parameters:
212- LI = %6
213- indexValues = { %add7, %add5, %add, %mul, %add7, %add, %mul }
214-
215- Will be coalesced to:
216-
217-
218-
219-
220- */
221-
222- /*
223- Rules regarding coalescing:
224- - if the index is a constant for all threads in a warp, it cannot be coalesced
225- - if the index is a constant for one thread but contiguous across a warp, it can be coalesced
226- - if the index is a loop induction variable, it can be coalesced
227-
228- Other memory accesses will be ignored for now
229- */
230171void NVPTXMemOpts::CoalesceMemCalls (LoadInst *LI, std::vector<IndexType> &indexValues) {
231172 assert (LI && " LI is null" );
232173 assert (indexValues.size () > 0 && " indexValues is empty" );
@@ -278,7 +219,6 @@ void NVPTXMemOpts::CoalesceMemCalls(LoadInst *LI, std::vector<IndexType> &indexV
278219 Builder.SetInsertPoint (LI);
279220 auto SharedLoad = Builder.CreateLoad (GEP->getSourceElementType (), SharedGEP);
280221 LI->replaceAllUsesWith (SharedLoad);
281- // LI->eraseFromParent();
282222
283223}
284224
@@ -299,14 +239,9 @@ bool NVPTXMemOpts::runOnFunction(Function &F) {
299239 toDelete.push_back (LI);
300240 }
301241 }
302- if (auto *SI = dyn_cast<StoreInst>(&*I)) {
303- if (isStoringToArray (SI) > 0 ) {
304- errs () << " Found a store instruction: " << *SI << " \n " ;
305- }
306- }
307242 }
308243 for (auto LI : toDelete) {
309- // asser that the LI has no uses
244+ // assert that the LI has no uses
310245 assert (LI->use_empty ());
311246 LI->eraseFromParent ();
312247 }
0 commit comments