Skip to content

Commit d33224e

Browse files
esukhovigcbot
authored andcommitted
IGCVectorizer only supports floats, explicit check has been added
IGCVectorizer vector emission only supports floats, we do not add to vectorization tree everything that is not a float. Now check is explicit.
1 parent 2f124cb commit d33224e

File tree

2 files changed

+90
-1
lines changed

2 files changed

+90
-1
lines changed

IGC/Compiler/CISACodeGen/IGCVectorizer.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ bool isBinarySafe(Instruction *I) {
198198

199199
bool Result = false;
200200
auto* Binary = llvm::dyn_cast<BinaryOperator>(I);
201+
201202
if (Binary) {
202203
auto OpCode = Binary->getOpcode();
203204
Result |= OpCode == Instruction::FMul;
@@ -214,7 +215,22 @@ bool isPHISafe(Instruction *I) {
214215
return false;
215216
}
216217

218+
219+
bool isFloatTyped(Instruction* I) {
220+
221+
const auto* fixedVecType = llvm::dyn_cast<llvm::FixedVectorType>(I->getType());
222+
if (fixedVecType) {
223+
if (fixedVecType->getElementType()->isFloatTy())
224+
return true;
225+
}
226+
227+
return I->getType()->isFloatTy();
228+
}
229+
217230
bool isSafeToVectorize(Instruction *I) {
231+
232+
bool isFloat = isFloatTyped(I);
233+
218234
// this is a very limited approach for vectorizing but it's safe
219235
bool Result =
220236
isPHISafe(I) ||
@@ -223,7 +239,7 @@ bool isSafeToVectorize(Instruction *I) {
223239
(llvm::isa<FPTruncInst>(I) && IGC_GET_FLAG_VALUE(VectorizerAllowFPTRUNC)) ||
224240
isBinarySafe(I);
225241

226-
return Result;
242+
return Result && isFloat;
227243
}
228244

229245
bool IGCVectorizer::handlePHI(VecArr &Slice) {
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; UNSUPPORTED: system-windows
2+
3+
; RUN: igc_opt -S --igc-vectorizer -dce < %s 2>&1 | FileCheck %s
4+
; CHECK-NOT: %vectorized_phi
5+
6+
; ModuleID = 'reduced.ll'
7+
source_filename = "initial_test.ll"
8+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
9+
target triple = "spir64-unknown-unknown"
10+
11+
; Function Attrs: convergent nounwind
12+
define spir_kernel void @_attn_fwd() #0 {
13+
br label %._crit_edge
14+
15+
._crit_edge: ; preds = %._crit_edge, %0
16+
%1 = phi half [ 0.000000e+00, %0 ], [ %35, %._crit_edge ]
17+
%2 = phi half [ 0.000000e+00, %0 ], [ %36, %._crit_edge ]
18+
%3 = phi half [ 0.000000e+00, %0 ], [ %37, %._crit_edge ]
19+
%4 = phi half [ 0.000000e+00, %0 ], [ %38, %._crit_edge ]
20+
%5 = phi half [ 0.000000e+00, %0 ], [ %39, %._crit_edge ]
21+
%6 = phi half [ 0.000000e+00, %0 ], [ %40, %._crit_edge ]
22+
%7 = phi half [ 0.000000e+00, %0 ], [ %41, %._crit_edge ]
23+
%8 = phi half [ 0.000000e+00, %0 ], [ %42, %._crit_edge ]
24+
%9 = call half @llvm.exp2.f32(half 0.000000e+00)
25+
%10 = call half @llvm.exp2.f32(half 0.000000e+00)
26+
%11 = call half @llvm.exp2.f32(half 0.000000e+00)
27+
%12 = call half @llvm.exp2.f32(half 0.000000e+00)
28+
%13 = call half @llvm.exp2.f32(half 0.000000e+00)
29+
%14 = call half @llvm.exp2.f32(half 0.000000e+00)
30+
%15 = call half @llvm.exp2.f32(half 0.000000e+00)
31+
%16 = call half @llvm.exp2.f32(half 0.000000e+00)
32+
%17 = fmul fast half %9, %1
33+
%18 = fmul fast half %10, %2
34+
%19 = fmul fast half %11, %3
35+
%20 = fmul fast half %12, %4
36+
%21 = fmul fast half %13, %5
37+
%22 = fmul fast half %14, %6
38+
%23 = fmul fast half %15, %7
39+
%24 = fmul fast half %16, %8
40+
%25 = insertelement <8 x half> zeroinitializer, half %17, i64 0
41+
%26 = insertelement <8 x half> %25, half %18, i64 1
42+
%27 = insertelement <8 x half> %26, half %19, i64 2
43+
%28 = insertelement <8 x half> %27, half %20, i64 3
44+
%29 = insertelement <8 x half> %28, half %21, i64 4
45+
%30 = insertelement <8 x half> %29, half %22, i64 5
46+
%31 = insertelement <8 x half> %30, half %23, i64 6
47+
%32 = insertelement <8 x half> %31, half %24, i64 7
48+
%33 = call <8 x half> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x half> %32, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
49+
%34 = call <8 x half> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x half> %33, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
50+
%35 = extractelement <8 x half> %34, i64 0
51+
%36 = extractelement <8 x half> %34, i64 1
52+
%37 = extractelement <8 x half> %34, i64 2
53+
%38 = extractelement <8 x half> %34, i64 3
54+
%39 = extractelement <8 x half> %34, i64 4
55+
%40 = extractelement <8 x half> %34, i64 5
56+
%41 = extractelement <8 x half> %34, i64 6
57+
%42 = extractelement <8 x half> %34, i64 7
58+
br label %._crit_edge
59+
}
60+
61+
; Function Attrs: convergent nounwind readnone willreturn
62+
declare <8 x half> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x half>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1) #1
63+
64+
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
65+
declare half @llvm.exp2.f32(half) #2
66+
67+
; uselistorder directives
68+
uselistorder <8 x half> (<8 x half>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)* @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32, { 1, 0 }
69+
uselistorder half (half)* @llvm.exp2.f32, { 7, 6, 5, 4, 3, 2, 1, 0 }
70+
71+
attributes #0 = { convergent nounwind }
72+
attributes #1 = { convergent nounwind readnone willreturn }
73+
attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }

0 commit comments

Comments
 (0)