@@ -1872,6 +1872,23 @@ static SDValue matchZExtFromI32(SDValue Op) {
1872
1872
return (ExtSrc.getValueType () == MVT::i32 ) ? ExtSrc : SDValue ();
1873
1873
}
1874
1874
1875
+ // If this matches *_extend i32:x, return x
1876
+ // Otherwise if the value is I32 returns x.
1877
+ static SDValue matchExtFromI32orI32 (SDValue Op, bool IsSigned,
1878
+ const SelectionDAG *DAG) {
1879
+ if (Op.getValueType () == MVT::i32 )
1880
+ return Op;
1881
+
1882
+ if (Op.getOpcode () != (IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND) &&
1883
+ Op.getOpcode () != ISD::ANY_EXTEND &&
1884
+ !(DAG->SignBitIsZero (Op) &&
1885
+ Op.getOpcode () == (IsSigned ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND)))
1886
+ return SDValue ();
1887
+
1888
+ SDValue ExtSrc = Op.getOperand (0 );
1889
+ return (ExtSrc.getValueType () == MVT::i32 ) ? ExtSrc : SDValue ();
1890
+ }
1891
+
1875
1892
// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1876
1893
bool AMDGPUDAGToDAGISel::SelectGlobalSAddr (SDNode *N,
1877
1894
SDValue Addr,
@@ -2159,17 +2176,59 @@ bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(SDValue *SOffset,
2159
2176
return true ;
2160
2177
}
2161
2178
2179
+ // Given \p Offset and load node \p N check if an \p Offset is a multiple of
2180
+ // the load byte size. If it is update \p Offset to a pre-scaled value and
2181
+ // return true.
2182
+ bool AMDGPUDAGToDAGISel::SelectScaleOffset (SDNode *N, SDValue &Offset,
2183
+ bool IsSigned) const {
2184
+ bool ScaleOffset = false ;
2185
+ if (!Subtarget->hasScaleOffset () || !Offset)
2186
+ return false ;
2187
+
2188
+ unsigned Size =
2189
+ (unsigned )cast<MemSDNode>(N)->getMemoryVT ().getFixedSizeInBits () / 8 ;
2190
+
2191
+ SDValue Off = Offset;
2192
+ if (SDValue Ext = matchExtFromI32orI32 (Offset, IsSigned, CurDAG))
2193
+ Off = Ext;
2194
+
2195
+ if (isPowerOf2_32 (Size) && Off.getOpcode () == ISD::SHL) {
2196
+ if (auto *C = dyn_cast<ConstantSDNode>(Off.getOperand (1 )))
2197
+ ScaleOffset = C->getZExtValue () == Log2_32 (Size);
2198
+ } else if (Offset.getOpcode () == ISD::MUL ||
2199
+ (IsSigned && Offset.getOpcode () == AMDGPUISD::MUL_I24) ||
2200
+ Offset.getOpcode () == AMDGPUISD::MUL_U24 ||
2201
+ (Offset.isMachineOpcode () &&
2202
+ Offset.getMachineOpcode () ==
2203
+ (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2204
+ : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2205
+ if (auto *C = dyn_cast<ConstantSDNode>(Offset.getOperand (1 )))
2206
+ ScaleOffset = C->getZExtValue () == Size;
2207
+ }
2208
+
2209
+ if (ScaleOffset)
2210
+ Offset = Off.getOperand (0 );
2211
+
2212
+ return ScaleOffset;
2213
+ }
2214
+
2162
2215
// Match an immediate (if Offset is not null) or an SGPR (if SOffset is
2163
2216
// not null) offset. If Imm32Only is true, match only 32-bit immediate
2164
2217
// offsets available on CI.
2165
- bool AMDGPUDAGToDAGISel::SelectSMRDOffset (SDValue ByteOffsetNode,
2218
+ bool AMDGPUDAGToDAGISel::SelectSMRDOffset (SDNode *N, SDValue ByteOffsetNode,
2166
2219
SDValue *SOffset, SDValue *Offset,
2167
2220
bool Imm32Only, bool IsBuffer,
2168
- bool HasSOffset,
2169
- int64_t ImmOffset ) const {
2221
+ bool HasSOffset, int64_t ImmOffset,
2222
+ bool *ScaleOffset ) const {
2170
2223
assert ((!SOffset || !Offset) &&
2171
2224
" Cannot match both soffset and offset at the same time!" );
2172
2225
2226
+ if (ScaleOffset) {
2227
+ assert (N && SOffset);
2228
+
2229
+ *ScaleOffset = SelectScaleOffset (N, ByteOffsetNode, false /* IsSigned */ );
2230
+ }
2231
+
2173
2232
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
2174
2233
if (!C) {
2175
2234
if (!SOffset)
@@ -2254,24 +2313,25 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
2254
2313
// Match a base and an immediate (if Offset is not null) or an SGPR (if
2255
2314
// SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
2256
2315
// true, match only 32-bit immediate offsets available on CI.
2257
- bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset (SDValue Addr, SDValue &SBase,
2258
- SDValue *SOffset, SDValue *Offset,
2259
- bool Imm32Only, bool IsBuffer,
2260
- bool HasSOffset,
2261
- int64_t ImmOffset) const {
2316
+ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset (SDNode *N, SDValue Addr,
2317
+ SDValue &SBase, SDValue *SOffset,
2318
+ SDValue *Offset, bool Imm32Only,
2319
+ bool IsBuffer, bool HasSOffset,
2320
+ int64_t ImmOffset,
2321
+ bool *ScaleOffset) const {
2262
2322
if (SOffset && Offset) {
2263
2323
assert (!Imm32Only && !IsBuffer);
2264
2324
SDValue B;
2265
2325
2266
- if (!SelectSMRDBaseOffset (Addr, B, nullptr , Offset, false , false , true ))
2326
+ if (!SelectSMRDBaseOffset (N, Addr, B, nullptr , Offset, false , false , true ))
2267
2327
return false ;
2268
2328
2269
2329
int64_t ImmOff = 0 ;
2270
2330
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(*Offset))
2271
2331
ImmOff = C->getSExtValue ();
2272
2332
2273
- return SelectSMRDBaseOffset (B, SBase, SOffset, nullptr , false , false , true ,
2274
- ImmOff);
2333
+ return SelectSMRDBaseOffset (N, B, SBase, SOffset, nullptr , false , false ,
2334
+ true , ImmOff, ScaleOffset );
2275
2335
}
2276
2336
2277
2337
// A 32-bit (address + offset) should not cause unsigned 32-bit integer
@@ -2291,23 +2351,25 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
2291
2351
if (!N0 || !N1)
2292
2352
return false ;
2293
2353
2294
- if (SelectSMRDOffset (N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2295
- ImmOffset)) {
2354
+ if (SelectSMRDOffset (N, N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2355
+ ImmOffset, ScaleOffset )) {
2296
2356
SBase = N0;
2297
2357
return true ;
2298
2358
}
2299
- if (SelectSMRDOffset (N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2300
- ImmOffset)) {
2359
+ if (SelectSMRDOffset (N, N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2360
+ ImmOffset, ScaleOffset )) {
2301
2361
SBase = N1;
2302
2362
return true ;
2303
2363
}
2304
2364
return false ;
2305
2365
}
2306
2366
2307
- bool AMDGPUDAGToDAGISel::SelectSMRD (SDValue Addr, SDValue &SBase,
2367
+ bool AMDGPUDAGToDAGISel::SelectSMRD (SDNode *N, SDValue Addr, SDValue &SBase,
2308
2368
SDValue *SOffset, SDValue *Offset,
2309
- bool Imm32Only) const {
2310
- if (SelectSMRDBaseOffset (Addr, SBase, SOffset, Offset, Imm32Only)) {
2369
+ bool Imm32Only, bool *ScaleOffset) const {
2370
+ if (SelectSMRDBaseOffset (N, Addr, SBase, SOffset, Offset, Imm32Only,
2371
+ /* IsBuffer */ false , /* HasSOffset */ false ,
2372
+ /* ImmOffset */ 0 , ScaleOffset)) {
2311
2373
SBase = Expand32BitAddress (SBase);
2312
2374
return true ;
2313
2375
}
@@ -2323,36 +2385,51 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2323
2385
2324
2386
bool AMDGPUDAGToDAGISel::SelectSMRDImm (SDValue Addr, SDValue &SBase,
2325
2387
SDValue &Offset) const {
2326
- return SelectSMRD (Addr, SBase, /* SOffset */ nullptr , &Offset);
2388
+ return SelectSMRD (/* N */ nullptr , Addr, SBase, /* SOffset */ nullptr ,
2389
+ &Offset);
2327
2390
}
2328
2391
2329
2392
bool AMDGPUDAGToDAGISel::SelectSMRDImm32 (SDValue Addr, SDValue &SBase,
2330
2393
SDValue &Offset) const {
2331
2394
assert (Subtarget->getGeneration () == AMDGPUSubtarget::SEA_ISLANDS);
2332
- return SelectSMRD (Addr, SBase, /* SOffset */ nullptr , &Offset ,
2333
- /* Imm32Only */ true );
2395
+ return SelectSMRD (/* N */ nullptr , Addr, SBase, /* SOffset */ nullptr ,
2396
+ &Offset, /* Imm32Only */ true );
2334
2397
}
2335
2398
2336
- bool AMDGPUDAGToDAGISel::SelectSMRDSgpr (SDValue Addr, SDValue &SBase,
2337
- SDValue &SOffset) const {
2338
- return SelectSMRD (Addr, SBase, &SOffset, /* Offset */ nullptr );
2399
+ bool AMDGPUDAGToDAGISel::SelectSMRDSgpr (SDNode *N, SDValue Addr, SDValue &SBase,
2400
+ SDValue &SOffset, SDValue &CPol) const {
2401
+ bool ScaleOffset;
2402
+ if (!SelectSMRD (N, Addr, SBase, &SOffset, /* Offset */ nullptr ,
2403
+ /* Imm32Only */ false , &ScaleOffset))
2404
+ return false ;
2405
+
2406
+ CPol = CurDAG->getTargetConstant (ScaleOffset ? AMDGPU::CPol::SCAL : 0 ,
2407
+ SDLoc (N), MVT::i32 );
2408
+ return true ;
2339
2409
}
2340
2410
2341
- bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm (SDValue Addr, SDValue &SBase,
2342
- SDValue &SOffset,
2343
- SDValue &Offset) const {
2344
- return SelectSMRD (Addr, SBase, &SOffset, &Offset);
2411
+ bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm (SDNode *N, SDValue Addr,
2412
+ SDValue &SBase, SDValue &SOffset,
2413
+ SDValue &Offset,
2414
+ SDValue &CPol) const {
2415
+ bool ScaleOffset;
2416
+ if (!SelectSMRD (N, Addr, SBase, &SOffset, &Offset, false , &ScaleOffset))
2417
+ return false ;
2418
+
2419
+ CPol = CurDAG->getTargetConstant (ScaleOffset ? AMDGPU::CPol::SCAL : 0 ,
2420
+ SDLoc (N), MVT::i32 );
2421
+ return true ;
2345
2422
}
2346
2423
2347
2424
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm (SDValue N, SDValue &Offset) const {
2348
- return SelectSMRDOffset (N, /* SOffset */ nullptr , &Offset,
2425
+ return SelectSMRDOffset (/* N */ nullptr , N, /* SOffset */ nullptr , &Offset,
2349
2426
/* Imm32Only */ false , /* IsBuffer */ true );
2350
2427
}
2351
2428
2352
2429
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32 (SDValue N,
2353
2430
SDValue &Offset) const {
2354
2431
assert (Subtarget->getGeneration () == AMDGPUSubtarget::SEA_ISLANDS);
2355
- return SelectSMRDOffset (N, /* SOffset */ nullptr , &Offset,
2432
+ return SelectSMRDOffset (/* N */ nullptr , N, /* SOffset */ nullptr , &Offset,
2356
2433
/* Imm32Only */ true , /* IsBuffer */ true );
2357
2434
}
2358
2435
@@ -2361,9 +2438,9 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
2361
2438
// Match the (soffset + offset) pair as a 32-bit register base and
2362
2439
// an immediate offset.
2363
2440
return N.getValueType () == MVT::i32 &&
2364
- SelectSMRDBaseOffset (N, /* SBase */ SOffset, /* SOffset */ nullptr ,
2365
- &Offset, /* Imm32Only */ false ,
2366
- /* IsBuffer */ true );
2441
+ SelectSMRDBaseOffset (/* N */ nullptr , N, /* SBase */ SOffset ,
2442
+ /* SOffset */ nullptr , &Offset ,
2443
+ /* Imm32Only */ false , /* IsBuffer */ true );
2367
2444
}
2368
2445
2369
2446
bool AMDGPUDAGToDAGISel::SelectMOVRELOffset (SDValue Index,
0 commit comments