Skip to content

Commit 7485f34

Browse files
authored
[X86] X86ISelDAGToDAG - don't let ADD/SUB(X,1) -> SUB/ADD(X,-1) constant fold (llvm#169217)
Extension to llvm#168726 - ensure we peek through bitcasts to look for constants (as constant folding will) DAG should have constant folded this, but we're still fighting the lack of proper topological sorting. Fixes llvm#169205
1 parent 06fc87b commit 7485f34

File tree

2 files changed

+25
-1
lines changed

2 files changed

+25
-1
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
10041004
if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
10051005
N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
10061006
APInt SplatVal;
1007-
if (!ISD::isBuildVectorOfConstantSDNodes(N->getOperand(0).getNode()) &&
1007+
if (!ISD::isBuildVectorOfConstantSDNodes(
1008+
peekThroughBitcasts(N->getOperand(0)).getNode()) &&
10081009
X86::isConstantSplat(N->getOperand(1), SplatVal) &&
10091010
SplatVal.isOne()) {
10101011
SDLoc DL(N);

llvm/test/CodeGen/X86/pr169205.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
4+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX
5+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX
6+
7+
define <4 x i16> @PR169205() {
8+
; SSE-LABEL: PR169205:
9+
; SSE: # %bb.0:
10+
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1,u,u,u,u]
11+
; SSE-NEXT: retq
12+
;
13+
; AVX-LABEL: PR169205:
14+
; AVX: # %bb.0:
15+
; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
16+
; AVX-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17+
; AVX-NEXT: retq
18+
%avg = tail call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer)
19+
%shuffle24 = shufflevector <16 x i8> %avg, <16 x i8> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 9, i32 9>
20+
%conv25 = zext <4 x i8> %shuffle24 to <4 x i16>
21+
%not.neg = add <4 x i16> %conv25, splat (i16 1)
22+
ret <4 x i16> %not.neg
23+
}

0 commit comments

Comments
 (0)