Skip to content

Commit 109bc60

Browse files
committed
[X86] Remove extra MOV after widening atomic load
This change adds patterns to optimize out an extra MOV present after widening the atomic load. It also casts floats to ints in an atomic load during AtomicExpand to support 128 bit vectors in SSE/AVX. commit-id:45989503
1 parent 39f039e commit 109bc60

File tree

4 files changed

+102
-163
lines changed

4 files changed

+102
-163
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32070,6 +32070,13 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
3207032070
}
3207132071
}
3207232072

32073+
TargetLowering::AtomicExpansionKind
32074+
X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
32075+
if (LI->getType()->getScalarType()->isFloatingPointTy())
32076+
return AtomicExpansionKind::CastToInteger;
32077+
return AtomicExpansionKind::None;
32078+
}
32079+
3207332080
LoadInst *
3207432081
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
3207532082
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1838,6 +1838,8 @@ namespace llvm {
18381838
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
18391839
TargetLoweringBase::AtomicExpansionKind
18401840
shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1841+
TargetLoweringBase::AtomicExpansionKind
1842+
shouldCastAtomicLoadInIR(LoadInst *LI) const override;
18411843
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
18421844
void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
18431845

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,6 +1204,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>;
12041204
def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>;
12051205
def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>;
12061206

1207+
def : Pat<(v4i32 (scalar_to_vector (i32 (zext (i16 (atomic_load_16 addr:$src)))))),
1208+
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8>
1209+
def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
1210+
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16>
1211+
def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
1212+
(MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float>
1213+
12071214
// Floating point loads/stores.
12081215
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
12091216
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;

0 commit comments

Comments
 (0)