Skip to content

Commit 6fad44f

Browse files
vsemenov368pszymich
authored andcommitted
Revert back vector decomposer benavior in VC
. (cherry picked from commit 7901165)
1 parent 69abf4b commit 6fad44f

File tree

5 files changed

+53
-104
lines changed

5 files changed

+53
-104
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXPostLegalization.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2024 Intel Corporation
3+
Copyright (C) 2017-2022 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -100,7 +100,7 @@ bool GenXPostLegalization::runOnFunction(Function &F)
100100
.getTM<GenXTargetMachine>()
101101
.getGenXSubtarget();
102102

103-
VectorDecomposer VD;
103+
VectorDecomposer VD(ST);
104104

105105
bool Modified = false;
106106
Modified |= vc::breakConstantExprs(&F, vc::LegalizationStage::Legalized);

IGC/VectorCompiler/lib/GenXCodeGen/GenXVectorDecomposer.cpp

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2024 Intel Corporation
3+
Copyright (C) 2017-2022 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -167,15 +167,15 @@ bool VectorDecomposer::determineDecomposition(Instruction *Inst) {
167167
NotDecomposingReportInst = Inst;
168168
Web.clear();
169169
Decomposition.clear();
170-
unsigned ChunkWidth = genx::ByteBits * ChunkByteSize;
171-
unsigned NumChunks =
172-
alignTo(DL->getTypeSizeInBits(Inst->getType()), ChunkWidth) / ChunkWidth;
173-
if (NumChunks == 1)
174-
return false; // Ignore single chunk vector.
170+
unsigned GRFWidth = genx::ByteBits * GRFByteSize;
171+
unsigned NumGrfs =
172+
alignTo(DL->getTypeSizeInBits(Inst->getType()), GRFWidth) / GRFWidth;
173+
if (NumGrfs == 1)
174+
return false; // Ignore single GRF vector.
175175
LLVM_DEBUG(dbgs() << "VectorDecomposer::determineDecomposition(" << *Inst
176-
<< " NumChunks: " << NumChunks << ")\n");
176+
<< ")\n");
177177
NotDecomposing = false;
178-
for (unsigned i = 0; i != NumChunks; ++i)
178+
for (unsigned i = 0; i != NumGrfs; ++i)
179179
Decomposition.push_back(i);
180180
addToWeb(Inst);
181181
for (unsigned Idx = 0; Idx != Web.size(); ++Idx) {
@@ -263,7 +263,7 @@ bool VectorDecomposer::determineDecomposition(Instruction *Inst) {
263263
//
264264
// Change Decomposition[] so the indices used are contiguous, changing the
265265
// example above to { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3 }, and create the Offsets[]
266-
// array to translate a value from Decomposition[] into the chunk offset, so
266+
// array to translate a value from Decomposition[] into the GRF offset, so
267267
// for this example { 0, 2, 4, 8 }.
268268
Offsets.clear();
269269
for (unsigned Last = UINT_MAX, i = 0, e = Decomposition.size(); i != e; ++i) {
@@ -349,15 +349,15 @@ void VectorDecomposer::adjustDecomposition(Instruction *Inst) {
349349
Last = (R.NumElements / R.Width - 1) * R.VStride;
350350
Last += (R.Width - 1) * R.Stride;
351351
Last = R.Offset + Last * R.ElementBytes;
352-
// Compute the chunk number of the first and last byte of the region.
353-
unsigned First = R.Offset / ChunkByteSize;
354-
Last /= ChunkByteSize;
352+
// Compute the GRF number of the first and last byte of the region.
353+
unsigned First = R.Offset / GRFByteSize;
354+
Last /= GRFByteSize;
355355
if ((First >= Decomposition.size()) || (Last >= Decomposition.size())) {
356356
setNotDecomposing(Inst, "out-of-bounds");
357357
return; // don't attempt to decompose out-of-bounds accesses
358358
}
359359
if (First != Last) {
360-
// This region spans more than one chunk. Ensure they are all in the same
360+
// This region spans more than one GRF. Ensure they are all in the same
361361
// decomposed vector.
362362
for (unsigned i = Last + 1;
363363
i != Decomposition.size() && Decomposition[i] == Decomposition[Last];
@@ -705,28 +705,28 @@ void VectorDecomposer::decomposeBitCast(Instruction *Inst,
705705
* VectorDecomposer::getPartIndex : get the part index for the region
706706
*/
707707
unsigned VectorDecomposer::getPartIndex(vc::Region *R) {
708-
return Decomposition[R->Offset / ChunkByteSize];
708+
return Decomposition[R->Offset / GRFByteSize];
709709
}
710710

711711
/***********************************************************************
712712
* VectorDecomposer::getPartOffset : get the byte offset of a part
713713
*/
714714
unsigned VectorDecomposer::getPartOffset(unsigned PartIndex) {
715-
// Offsets[] has the index in chunks.
716-
return Offsets[PartIndex] * ChunkByteSize;
715+
// Offsets[] has the index in GRFs.
716+
return Offsets[PartIndex] * GRFByteSize;
717717
}
718718

719719
/***********************************************************************
720720
* VectorDecomposer::getPartNumBytes : get the size of a part in bytes
721721
*/
722722
unsigned VectorDecomposer::getPartNumBytes(Type *WholeTy, unsigned PartIndex) {
723723
if (PartIndex + 1 != Offsets.size()) {
724-
// Not the last part. We can use the offset (in chunks) difference.
725-
return ChunkByteSize * (Offsets[PartIndex + 1] - Offsets[PartIndex]);
724+
// Not the last part. We can use the offset (in GRFs) difference.
725+
return GRFByteSize * (Offsets[PartIndex + 1] - Offsets[PartIndex]);
726726
}
727727
// For the last part, we need to get the total size from WholeTy.
728728
return DL->getTypeSizeInBits(WholeTy) / genx::ByteBits -
729-
ChunkByteSize * Offsets[PartIndex];
729+
GRFByteSize * Offsets[PartIndex];
730730
}
731731

732732
/***********************************************************************

IGC/VectorCompiler/lib/GenXCodeGen/GenXVectorDecomposer.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2024 Intel Corporation
3+
Copyright (C) 2017-2022 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -68,7 +68,10 @@ class VectorDecomposer {
6868
using Value = llvm::Value;
6969
using VectorType = llvm::VectorType;
7070

71+
using GenXSubtarget = llvm::GenXSubtarget;
72+
7173
const DataLayout *DL = nullptr;
74+
const GenXSubtarget *ST;
7275

7376
llvm::SmallVector<Instruction *, 16> StartWrRegions;
7477
std::set<Instruction *> Seen;
@@ -81,9 +84,11 @@ class VectorDecomposer {
8184
std::map<PHINode *, llvm::SmallVector<Value *, 8>> PhiParts;
8285
llvm::SmallVector<Instruction *, 8> NewInsts;
8386
unsigned DecomposedCount = 0;
84-
static constexpr unsigned ChunkByteSize = 32;
87+
const unsigned GRFByteSize;
8588

8689
public:
90+
explicit VectorDecomposer(const GenXSubtarget *ST)
91+
: ST(ST), GRFByteSize(ST ? ST->getGRFByteSize() : llvm::genx::defaultGRFByteSize) {}
8792

8893
// clear : clear anything stored
8994
void clear() {

IGC/VectorCompiler/test/PostLegalization/decomp.ll

Lines changed: 0 additions & 69 deletions
This file was deleted.

IGC/VectorCompiler/test/PostLegalization/wrregion.ll

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,17 @@
66
;
77
;============================ end_copyright_notice =============================
88

9-
; RUN: %opt %use_old_pass_manager% -GenXPostLegalization -march=genx64 -mcpu=XeHPC -mtriple=spir64 -S < %s | FileCheck %s
9+
; RUN: %opt %use_old_pass_manager% -GenXPostLegalization -march=genx64 -mcpu=XeHPC -mtriple=spir64 -S < %s | FileCheck %s --check-prefix=XeHPC
10+
; RUN: %opt %use_old_pass_manager% -GenXPostLegalization -march=genx64 -mcpu=XeHPG -mtriple=spir64 -S < %s | FileCheck %s --check-prefix=XeHPG
1011

1112
declare <48 x i32> @llvm.genx.wrregioni.v48i32.v32i32.i16.i1(<48 x i32>, <32 x i32>, i32, i32, i32, i16, i32, i1)
1213
declare <32 x i16> @llvm.genx.rdregioni.v32i16.v96i16.i16(<96 x i16>, i32, i32, i32, i16, i32)
1314

14-
; CHECK-LABEL: test1
15+
; XeHPC-LABEL: test1
16+
; XeHPG-LABEL: test1
1517
define <32 x i16> @test1(<32 x i32> %arg) {
16-
; CHECK-NEXT: call <40 x i32> @llvm.genx.wrregioni.v40i32.v32i32.i16.i1(<40 x i32> zeroinitializer, <32 x i32> %arg, i32 0, i32 32, i32 1, i16 32, i32 undef, i1 true)
18+
; XeHPC-NEXT: call <48 x i32> @llvm.genx.wrregioni.v48i32.v32i32.i16.i1(<48 x i32> zeroinitializer, <32 x i32> %arg, i32 0, i32 32, i32 1, i16 64, i32 undef, i1 true)
19+
; XeHPG-NEXT: call <40 x i32> @llvm.genx.wrregioni.v40i32.v32i32.i16.i1(<40 x i32> zeroinitializer, <32 x i32> %arg, i32 0, i32 32, i32 1, i16 32, i32 undef, i1 true)
1720
%1 = call <48 x i32> @llvm.genx.wrregioni.v48i32.v32i32.i16.i1(<48 x i32> zeroinitializer, <32 x i32> %arg, i32 0, i32 32, i32 1, i16 64, i32 undef, i1 true)
1821
%cast = bitcast <48 x i32> %1 to <192 x i8>
1922
%postcast = bitcast <192 x i8> %cast to <96 x i16>
@@ -29,16 +32,26 @@ declare <128 x float> @llvm.genx.dpas2.v128f32.v128f32.v128i32.v64i32(<128 x flo
2932
declare <256 x float> @llvm.genx.wrregionf.v256f32.v128f32.i16.i1(<256 x float>, <128 x float>, i32, i32, i32, i16, i32, i1)
3033
declare <64 x i64> @llvm.genx.rdregioni.v64i64.v128i64.i16(<128 x i64>, i32, i32, i32, i16, i32)
3134

32-
; CHECK-LABEL: test2
35+
; XeHPC-LABEL: test2
36+
; XeHPG-LABEL: test2
3337
define <64 x i64> @test2(<256 x i32> %src1, <128 x i32> %src2) {
34-
; CHECK-NEXT: %[[DPAS1_1:[^ ]+]] = tail call <128 x i32> @llvm.genx.rdregioni.v128i32.v256i32.i16(<256 x i32> %src1, i32 0, i32 128, i32 1, i16 0, i32 undef)
35-
; CHECK-NEXT: %[[DPAS1_2:[^ ]+]] = tail call <64 x i32> @llvm.genx.rdregioni.v64i32.v128i32.i16(<128 x i32> %src2, i32 0, i32 64, i32 1, i16 0, i32 undef)
36-
; CHECK-NEXT: %[[DPAS1_D:[^ ]+]] = call <128 x float> @llvm.genx.dpas.nosrc0.v128f32.v128i32.v64i32(<128 x i32> %[[DPAS1_1]], <64 x i32> %[[DPAS1_2]], i32 134744329)
37-
; CHECK-NEXT: %[[DPAS2_1:[^ ]+]] = tail call <128 x i32> @llvm.genx.rdregioni.v128i32.v256i32.i16(<256 x i32> %src1, i32 0, i32 128, i32 1, i16 512, i32 undef)
38-
; CHECK-NEXT: %[[DPAS2_2:[^ ]+]] = tail call <64 x i32> @llvm.genx.rdregioni.v64i32.v128i32.i16(<128 x i32> %src2, i32 0, i32 64, i32 1, i16 256, i32 undef)
39-
; CHECK-NEXT: tail call <128 x float> @llvm.genx.dpas2.v128f32.v128f32.v128i32.v64i32(<128 x float> zeroinitializer, <128 x i32> %[[DPAS2_1]], <64 x i32> %[[DPAS2_2]], i32 9, i32 9, i32 8, i32 8, i32 0, i32 0)
40-
; CHECK-NEXT: %[[RET:[^ ]+]] = bitcast <128 x float> %[[DPAS1_D]] to <64 x i64>
41-
; CHECK-NEXT: ret <64 x i64> %[[RET]]
38+
; XeHPC-NEXT: %[[DPAS1_1:[^ ]+]] = tail call <128 x i32> @llvm.genx.rdregioni.v128i32.v256i32.i16(<256 x i32> %src1, i32 0, i32 128, i32 1, i16 0, i32 undef)
39+
; XeHPC-NEXT: %[[DPAS1_2:[^ ]+]] = tail call <64 x i32> @llvm.genx.rdregioni.v64i32.v128i32.i16(<128 x i32> %src2, i32 0, i32 64, i32 1, i16 0, i32 undef)
40+
; XeHPC-NEXT: %[[DPAS1_D:[^ ]+]] = call <128 x float> @llvm.genx.dpas.nosrc0.v128f32.v128i32.v64i32(<128 x i32> %[[DPAS1_1]], <64 x i32> %[[DPAS1_2]], i32 134744329)
41+
; XeHPC-NEXT: %[[DPAS2_1:[^ ]+]] = tail call <128 x i32> @llvm.genx.rdregioni.v128i32.v256i32.i16(<256 x i32> %src1, i32 0, i32 128, i32 1, i16 512, i32 undef)
42+
; XeHPC-NEXT: %[[DPAS2_2:[^ ]+]] = tail call <64 x i32> @llvm.genx.rdregioni.v64i32.v128i32.i16(<128 x i32> %src2, i32 0, i32 64, i32 1, i16 256, i32 undef)
43+
; XeHPC-NEXT: tail call <128 x float> @llvm.genx.dpas2.v128f32.v128f32.v128i32.v64i32(<128 x float> zeroinitializer, <128 x i32> %[[DPAS2_1]], <64 x i32> %[[DPAS2_2]], i32 9, i32 9, i32 8, i32 8, i32 0, i32 0)
44+
; XeHPC-NEXT: %[[RET:[^ ]+]] = bitcast <128 x float> %[[DPAS1_D]] to <64 x i64>
45+
; XeHPC-NEXT: ret <64 x i64> %[[RET]]
46+
47+
; XeHPG-NEXT: %[[DPAS1_1:[^ ]+]] = tail call <128 x i32> @llvm.genx.rdregioni.v128i32.v256i32.i16(<256 x i32> %src1, i32 0, i32 128, i32 1, i16 0, i32 undef)
48+
; XeHPG-NEXT: %[[DPAS1_2:[^ ]+]] = tail call <64 x i32> @llvm.genx.rdregioni.v64i32.v128i32.i16(<128 x i32> %src2, i32 0, i32 64, i32 1, i16 0, i32 undef)
49+
; XeHPG-NEXT: %[[DPAS1_D:[^ ]+]] = call <128 x float> @llvm.genx.dpas.nosrc0.v128f32.v128i32.v64i32(<128 x i32> %[[DPAS1_1]], <64 x i32> %[[DPAS1_2]], i32 134744329)
50+
; XeHPG-NEXT: %[[DPAS2_1:[^ ]+]] = tail call <128 x i32> @llvm.genx.rdregioni.v128i32.v256i32.i16(<256 x i32> %src1, i32 0, i32 128, i32 1, i16 512, i32 undef)
51+
; XeHPG-NEXT: %[[DPAS2_2:[^ ]+]] = tail call <64 x i32> @llvm.genx.rdregioni.v64i32.v128i32.i16(<128 x i32> %src2, i32 0, i32 64, i32 1, i16 256, i32 undef)
52+
; XeHPG-NEXT: tail call <128 x float> @llvm.genx.dpas2.v128f32.v128f32.v128i32.v64i32(<128 x float> zeroinitializer, <128 x i32> %[[DPAS2_1]], <64 x i32> %[[DPAS2_2]], i32 9, i32 9, i32 8, i32 8, i32 0, i32 0)
53+
; XeHPG-NEXT: %[[RET:[^ ]+]] = bitcast <128 x float> %[[DPAS1_D]] to <64 x i64>
54+
; XeHPG-NEXT: ret <64 x i64> %[[RET]]
4255

4356
%1 = tail call <128 x i32> @llvm.genx.rdregioni.v128i32.v256i32.i16(<256 x i32> %src1, i32 0, i32 128, i32 1, i16 0, i32 undef)
4457
%2 = tail call <64 x i32> @llvm.genx.rdregioni.v64i32.v128i32.i16(<128 x i32> %src2, i32 0, i32 64, i32 1, i16 0, i32 undef)

0 commit comments

Comments
 (0)