diff --git a/llvm/test/CodeGen/PowerPC/half.ll b/llvm/test/CodeGen/PowerPC/half.ll new file mode 100644 index 0000000000000..fe0dccf63af80 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/half.ll @@ -0,0 +1,2562 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=PPC32 +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=P8 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -mattr=-hard-float \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=SOFT +; RUN: llc -mtriple=powerpc64-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=BE + +; Tests for various operations on half precison float. Much of the test is +; copied from test/CodeGen/X86/half.ll. + +define void @store(half %x, ptr %p) nounwind { +; PPC32-LABEL: store: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: store: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r4 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: store: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r4 +; CHECK-NEXT: blr +; +; SOFT-LABEL: store: +; SOFT: # %bb.0: +; SOFT-NEXT: sth r3, 0(r4) +; SOFT-NEXT: blr +; +; BE-LABEL: store: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r4 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + store half %x, ptr %p + ret void +} + +define half @return(ptr %p) nounwind { +; PPC32-LABEL: return: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: return: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: return: +; CHECK: # %bb.0: +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: return: +; SOFT: # %bb.0: +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: blr +; +; BE-LABEL: return: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %r = load half, ptr %p + ret half %r +} + +define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind { +; PPC32-LABEL: loadd: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 2(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: loadd: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 2(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: loadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi r3, r3, 2 +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: loadd: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 2(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: loadd: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: lhz r3, 2(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 + %0 = load i16, ptr %arrayidx, align 2 + %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0) + ret double %1 +} + +declare double @llvm.convert.from.fp16.f64(i16) + +define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind { +; PPC32-LABEL: loadf: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 2(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: loadf: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 2(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: loadf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi r3, r3, 2 +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: loadf: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 2(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: loadf: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: lhz r3, 2(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 + %0 = load i16, ptr %arrayidx, align 2 + %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) + ret float %1 +} + +declare float @llvm.convert.from.fp16.f32(i16) + +define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind { +; PPC32-LABEL: stored: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: stored: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: stored: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r3 +; CHECK-NEXT: blr +; +; SOFT-LABEL: stored: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: mr r3, r4 +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: stored: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +entry: + %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b) + store i16 %0, ptr %a, align 2 + ret void +} + +declare i16 @llvm.convert.to.fp16.f64(double) + +define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind { +; PPC32-LABEL: storef: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: storef: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: storef: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r3 +; CHECK-NEXT: blr +; +; SOFT-LABEL: storef: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: clrldi r3, r4, 32 +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: storef: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +entry: + %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b) + store i16 %0, ptr %a, align 2 + ret void +} + +declare i16 @llvm.convert.to.fp16.f32(float) +define void @test_load_store(ptr %in, ptr %out) nounwind { +; PPC32-LABEL: test_load_store: +; PPC32: # %bb.0: +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: sth r3, 0(r4) +; PPC32-NEXT: blr +; +; P8-LABEL: test_load_store: +; P8: # %bb.0: +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: sth r3, 0(r4) +; P8-NEXT: blr +; +; CHECK-LABEL: test_load_store: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_load_store: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_load_store: +; BE: # %bb.0: +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: sth r3, 0(r4) +; BE-NEXT: blr + %val = load half, ptr %in + store half %val, ptr %out + ret void +} +define i16 @test_bitcast_from_half(ptr %addr) nounwind { +; PPC32-LABEL: test_bitcast_from_half: +; PPC32: # %bb.0: +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: blr +; +; P8-LABEL: test_bitcast_from_half: +; P8: # %bb.0: +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: blr +; +; CHECK-LABEL: test_bitcast_from_half: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_bitcast_from_half: +; SOFT: # %bb.0: +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: blr +; +; BE-LABEL: test_bitcast_from_half: +; BE: # %bb.0: +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: blr + %val = load half, ptr %addr + %val_int = bitcast half %val to i16 + ret i16 %val_int +} +define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind { +; PPC32-LABEL: test_bitcast_to_half: +; PPC32: # %bb.0: +; PPC32-NEXT: sth r4, 0(r3) +; PPC32-NEXT: blr +; +; P8-LABEL: test_bitcast_to_half: +; P8: # %bb.0: +; P8-NEXT: sth r4, 0(r3) +; P8-NEXT: blr +; +; CHECK-LABEL: test_bitcast_to_half: +; CHECK: # %bb.0: +; CHECK-NEXT: sth r4, 0(r3) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_bitcast_to_half: +; SOFT: # %bb.0: +; SOFT-NEXT: sth r4, 0(r3) +; SOFT-NEXT: blr +; +; BE-LABEL: test_bitcast_to_half: +; BE: # %bb.0: +; BE-NEXT: sth r4, 0(r3) +; BE-NEXT: blr + %val_fp = bitcast i16 %in to half + store half %val_fp, ptr %addr + ret void +} + + +; Checks for https://github.com/llvm/llvm-project/issues/97981 +define half @from_bits(i16 %x) nounwind { +; PPC32-LABEL: from_bits: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: clrlwi r3, r3, 16 +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: from_bits: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: clrldi r3, r3, 48 +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: from_bits: +; CHECK: # %bb.0: +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: from_bits: +; SOFT: # %bb.0: +; SOFT-NEXT: blr +; +; BE-LABEL: from_bits: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %res = bitcast i16 %x to half + ret half %res +} + +define i16 @to_bits(half %x) nounwind { +; PPC32-LABEL: to_bits: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: clrlwi r3, r3, 16 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: to_bits: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: clrldi r3, r3, 48 +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: to_bits: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: blr +; +; SOFT-LABEL: to_bits: +; SOFT: # %bb.0: +; SOFT-NEXT: blr +; +; BE-LABEL: to_bits: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %res = bitcast half %x to i16 + ret i16 %res +} + +define float @test_extend32(ptr %addr) nounwind { +; PPC32-LABEL: test_extend32: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_extend32: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend32: +; CHECK: # %bb.0: +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend32: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_extend32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %val16 = load half, ptr %addr + %val32 = fpext half %val16 to float + ret float %val32 +} +define double @test_extend64(ptr %addr) nounwind { +; PPC32-LABEL: test_extend64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_extend64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend64: +; CHECK: # %bb.0: +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_extend64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %val16 = load half, ptr %addr + %val32 = fpext half %val16 to double + ret double %val32 +} +define void @test_trunc32(float %in, ptr %addr) nounwind { +; PPC32-LABEL: test_trunc32: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_trunc32: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r4 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc32: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r4 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc32: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: clrldi r3, r3, 32 +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_trunc32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r4 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %val16 = fptrunc float %in to half + store half %val16, ptr %addr + ret void +} +define void @test_trunc64(double %in, ptr %addr) nounwind { +; PPC32-LABEL: test_trunc64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_trunc64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r4 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc64: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r4 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_trunc64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r4 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %val16 = fptrunc double %in to half + store half %val16, ptr %addr + ret void +} +define i64 @test_fptosi_i64(ptr %p) nounwind { +; PPC32-LABEL: test_fptosi_i64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: bl __fixsfdi +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_fptosi_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: xscvdpsxds f0, f1 +; P8-NEXT: mffprd r3, f0 +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_fptosi_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvdpsxds f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_fptosi_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __fixsfdi +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_fptosi_i64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: fctidz f0, f1 +; BE-NEXT: stfd f0, 120(r1) +; BE-NEXT: ld r3, 120(r1) +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %a = load half, ptr %p, align 2 + %r = fptosi half %a to i64 + ret i64 %r +} +define void @test_sitofp_i64(i64 %a, ptr %p) nounwind { +; PPC32-LABEL: test_sitofp_i64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r5 +; PPC32-NEXT: bl __floatdisf +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_sitofp_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mtfprd f0, r3 +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r4 +; P8-NEXT: xscvsxdsp f1, f0 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_sitofp_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvsxdsp f0, f0 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_sitofp_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __floatdisf +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 32 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_sitofp_i64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -144(r1) +; BE-NEXT: sradi r5, r3, 53 +; BE-NEXT: std r0, 160(r1) +; BE-NEXT: addi r5, r5, 1 +; BE-NEXT: cmpldi r5, 1 +; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r4 +; BE-NEXT: ble cr0, .LBB16_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: clrldi r4, r3, 53 +; BE-NEXT: addi r4, r4, 2047 +; BE-NEXT: or r3, r4, r3 +; BE-NEXT: rldicr r3, r3, 0, 52 +; BE-NEXT: .LBB16_2: +; BE-NEXT: std r3, 120(r1) +; BE-NEXT: lfd f0, 120(r1) +; BE-NEXT: fcfid f0, f0 +; BE-NEXT: frsp f1, f0 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 144 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %r = sitofp i64 %a to half + store half %r, ptr %p + ret void +} +define i64 @test_fptoui_i64(ptr %p) nounwind { +; PPC32-LABEL: test_fptoui_i64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: bl __fixunssfdi +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_fptoui_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: xscvdpuxds f0, f1 +; P8-NEXT: mffprd r3, f0 +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_fptoui_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvdpuxds f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_fptoui_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __fixunssfdi +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_fptoui_i64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha +; BE-NEXT: lfs f0, .LCPI17_0@toc@l(r3) +; BE-NEXT: fsubs f2, f1, f0 +; BE-NEXT: fcmpu cr0, f1, f0 +; BE-NEXT: fctidz f2, f2 +; BE-NEXT: stfd f2, 120(r1) +; BE-NEXT: fctidz f2, f1 +; BE-NEXT: stfd f2, 112(r1) +; BE-NEXT: blt cr0, .LBB17_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: ld r3, 120(r1) +; BE-NEXT: li r4, 1 +; BE-NEXT: rldic r4, r4, 63, 0 +; BE-NEXT: xor r3, r3, r4 +; BE-NEXT: b .LBB17_3 +; BE-NEXT: .LBB17_2: +; BE-NEXT: ld r3, 112(r1) +; BE-NEXT: .LBB17_3: +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %a = load half, ptr %p, align 2 + %r = fptoui half %a to i64 + ret i64 %r +} +define void @test_uitofp_i64(i64 %a, ptr %p) nounwind { +; PPC32-LABEL: test_uitofp_i64: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r5 +; PPC32-NEXT: bl __floatundisf +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_uitofp_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mtfprd f0, r3 +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: mr r30, r4 +; P8-NEXT: xscvuxdsp f1, f0 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_uitofp_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvuxdsp f0, f0 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_uitofp_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __floatundisf +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_uitofp_i64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -144(r1) +; BE-NEXT: sradi r5, r3, 53 +; BE-NEXT: std r0, 160(r1) +; BE-NEXT: addi r5, r5, 1 +; BE-NEXT: cmpldi r5, 1 +; BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r4 +; BE-NEXT: bgt cr0, .LBB18_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: mr r4, r3 +; BE-NEXT: b .LBB18_3 +; BE-NEXT: .LBB18_2: +; BE-NEXT: clrldi r4, r3, 53 +; BE-NEXT: addi r4, r4, 2047 +; BE-NEXT: or r4, r4, r3 +; BE-NEXT: rldicr r4, r4, 0, 52 +; BE-NEXT: .LBB18_3: +; BE-NEXT: rldicl r5, r3, 10, 54 +; BE-NEXT: clrldi r6, r3, 63 +; BE-NEXT: std r4, 112(r1) +; BE-NEXT: addi r5, r5, 1 +; BE-NEXT: cmpldi r5, 1 +; BE-NEXT: rldicl r5, r3, 63, 1 +; BE-NEXT: or r4, r6, r5 +; BE-NEXT: ble cr0, .LBB18_5 +; BE-NEXT: # %bb.4: +; BE-NEXT: clrldi r4, r4, 53 +; BE-NEXT: addi r4, r4, 2047 +; BE-NEXT: or r4, r4, r5 +; BE-NEXT: rldicl r4, r4, 53, 11 +; BE-NEXT: rldicl r4, r4, 11, 1 +; BE-NEXT: .LBB18_5: +; BE-NEXT: cmpdi r3, 0 +; BE-NEXT: std r4, 120(r1) +; BE-NEXT: bc 12, lt, .LBB18_7 +; BE-NEXT: # %bb.6: +; BE-NEXT: lfd f0, 112(r1) +; BE-NEXT: fcfid f0, f0 +; BE-NEXT: frsp f1, f0 +; BE-NEXT: b .LBB18_8 +; BE-NEXT: .LBB18_7: +; BE-NEXT: lfd f0, 120(r1) +; BE-NEXT: fcfid f0, f0 +; BE-NEXT: frsp f0, f0 +; BE-NEXT: fadds f1, f0, f0 +; BE-NEXT: .LBB18_8: +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r3, 0(r30) +; BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 144 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %r = uitofp i64 %a to half + store half %r, ptr %p + ret void +} +define <4 x float> @test_extend32_vec4(ptr %p) nounwind { +; PPC32-LABEL: test_extend32_vec4: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -48(r1) +; PPC32-NEXT: stw r0, 52(r1) +; PPC32-NEXT: stw r30, 16(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: stfd f29, 24(r1) # 8-byte Folded Spill +; PPC32-NEXT: stfd f30, 32(r1) # 8-byte Folded Spill +; PPC32-NEXT: stfd f31, 40(r1) # 8-byte Folded Spill +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 2(r30) +; PPC32-NEXT: fmr f31, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 4(r30) +; PPC32-NEXT: fmr f30, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 6(r30) +; PPC32-NEXT: fmr f29, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: fmr f4, f1 +; PPC32-NEXT: fmr f1, f31 +; PPC32-NEXT: fmr f2, f30 +; PPC32-NEXT: fmr f3, f29 +; PPC32-NEXT: lfd f31, 40(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f30, 32(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f29, 24(r1) # 8-byte Folded Reload +; PPC32-NEXT: lwz r30, 16(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 52(r1) +; PPC32-NEXT: addi r1, r1, 48 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_extend32_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: li r4, 48 +; P8-NEXT: std r0, 128(r1) +; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill +; P8-NEXT: mr r30, r3 +; P8-NEXT: lhz r3, 6(r3) +; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill +; P8-NEXT: li r4, 64 +; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill +; P8-NEXT: li r4, 80 +; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: lhz r3, 2(r30) +; P8-NEXT: xxlor vs63, f1, f1 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: lhz r3, 4(r30) +; P8-NEXT: xxlor vs62, f1, f1 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: lhz r3, 0(r30) +; P8-NEXT: xxlor vs61, f1, f1 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: li r3, 80 +; P8-NEXT: xxmrghd vs0, vs61, vs1 +; P8-NEXT: xxmrghd vs1, vs63, vs62 +; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 64 +; P8-NEXT: xvcvdpsp vs34, vs0 +; P8-NEXT: xvcvdpsp vs35, vs1 +; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 48 +; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload +; P8-NEXT: vmrgew v2, v3, v2 +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend32_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r4, 6(r3) +; CHECK-NEXT: mtfprwz f0, r4 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: lhz r4, 2(r3) +; CHECK-NEXT: mtfprwz f1, r4 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: lhz r4, 4(r3) +; CHECK-NEXT: mtfprwz f2, r4 +; CHECK-NEXT: xscvhpdp f2, f2 +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: xxmrghd vs0, vs0, vs1 +; CHECK-NEXT: mtfprwz f3, r3 +; CHECK-NEXT: xvcvdpsp vs35, vs0 +; CHECK-NEXT: xscvhpdp f3, f3 +; CHECK-NEXT: xxmrghd vs2, vs2, vs3 +; CHECK-NEXT: xvcvdpsp vs34, vs2 +; CHECK-NEXT: vmrgew v2, v3, v2 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend32_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: std r0, 96(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: lhz r3, 2(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: lhz r3, 4(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: lhz r3, 6(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r6, r3 +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: mr r4, r28 +; SOFT-NEXT: mr r5, r27 +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr +; +; BE-LABEL: test_extend32_vec4: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -176(r1) +; BE-NEXT: std r0, 192(r1) +; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r3 +; BE-NEXT: lhz r3, 0(r3) +; BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: lhz r3, 2(r30) +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: lhz r3, 4(r30) +; BE-NEXT: fmr f30, f1 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: lhz r3, 6(r30) +; BE-NEXT: fmr f29, f1 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: stfs f29, 120(r1) +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: stfs f30, 116(r1) +; BE-NEXT: stfs f31, 112(r1) +; BE-NEXT: stfs f1, 124(r1) +; BE-NEXT: lvx v2, 0, r3 +; BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 176 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %a = load <4 x half>, ptr %p, align 8 + %b = fpext <4 x half> %a to <4 x float> + ret <4 x float> %b +} +define <4 x double> @test_extend64_vec4(ptr %p) nounwind { +; PPC32-LABEL: test_extend64_vec4: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -48(r1) +; PPC32-NEXT: stw r0, 52(r1) +; PPC32-NEXT: stw r30, 16(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: stfd f29, 24(r1) # 8-byte Folded Spill +; PPC32-NEXT: stfd f30, 32(r1) # 8-byte Folded Spill +; PPC32-NEXT: stfd f31, 40(r1) # 8-byte Folded Spill +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 2(r30) +; PPC32-NEXT: fmr f31, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 4(r30) +; PPC32-NEXT: fmr f30, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lhz r3, 6(r30) +; PPC32-NEXT: fmr f29, f1 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: fmr f4, f1 +; PPC32-NEXT: fmr f1, f31 +; PPC32-NEXT: fmr f2, f30 +; PPC32-NEXT: fmr f3, f29 +; PPC32-NEXT: lfd f31, 40(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f30, 32(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f29, 24(r1) # 8-byte Folded Reload +; PPC32-NEXT: lwz r30, 16(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 52(r1) +; PPC32-NEXT: addi r1, r1, 48 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_extend64_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: li r4, 48 +; P8-NEXT: std r0, 128(r1) +; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill +; P8-NEXT: mr r30, r3 +; P8-NEXT: lhz r3, 6(r3) +; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill +; P8-NEXT: li r4, 64 +; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill +; P8-NEXT: li r4, 80 +; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: lhz r3, 4(r30) +; P8-NEXT: xxlor vs63, f1, f1 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: lhz r3, 2(r30) +; P8-NEXT: xxlor vs62, f1, f1 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: lhz r3, 0(r30) +; P8-NEXT: xxlor vs61, f1, f1 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: li r3, 80 +; P8-NEXT: xxmrghd vs35, vs63, vs62 +; P8-NEXT: xxmrghd vs34, vs61, vs1 +; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 64 +; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 48 +; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend64_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r4, 6(r3) +; CHECK-NEXT: lhz r5, 4(r3) +; CHECK-NEXT: lhz r6, 2(r3) +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: mtfprwz f1, r6 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: xxmrghd vs34, vs1, vs0 +; CHECK-NEXT: mtfprwz f0, r5 +; CHECK-NEXT: mtfprwz f1, r4 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: xxmrghd vs35, vs1, vs0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend64_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: std r0, 96(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: lhz r3, 2(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: lhz r3, 4(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: lhz r3, 6(r30) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r6, r3 +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: mr r4, r28 +; SOFT-NEXT: mr r5, r27 +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr +; +; BE-LABEL: test_extend64_vec4: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -160(r1) +; BE-NEXT: std r0, 176(r1) +; BE-NEXT: std r30, 120(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r3 +; BE-NEXT: lhz r3, 6(r3) +; BE-NEXT: stfd f29, 136(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f30, 144(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: lhz r3, 4(r30) +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: lhz r3, 2(r30) +; BE-NEXT: fmr f30, f1 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: lhz r3, 0(r30) +; BE-NEXT: fmr f29, f1 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: fmr f2, f29 +; BE-NEXT: fmr f3, f30 +; BE-NEXT: lfd f30, 144(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 136(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f4, f31 +; BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 120(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %a = load <4 x half>, ptr %p, align 8 + %b = fpext <4 x half> %a to <4 x double> + ret <4 x double> %b +} +define void @test_trunc32_vec4(<4 x float> %a, ptr %p) nounwind { +; PPC32-LABEL: test_trunc32_vec4: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -64(r1) +; PPC32-NEXT: stw r0, 68(r1) +; PPC32-NEXT: stw r27, 20(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r28, 24(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r29, 28(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r30, 32(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: stfd f29, 40(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f29, f2 +; PPC32-NEXT: stfd f30, 48(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f30, f3 +; PPC32-NEXT: stfd f31, 56(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f31, f4 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: fmr f1, f29 +; PPC32-NEXT: mr r29, r3 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: fmr f1, f30 +; PPC32-NEXT: mr r28, r3 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: fmr f1, f31 +; PPC32-NEXT: mr r27, r3 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: sth r27, 4(r30) +; PPC32-NEXT: sth r28, 2(r30) +; PPC32-NEXT: sth r3, 6(r30) +; PPC32-NEXT: sth r29, 0(r30) +; PPC32-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f30, 48(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f29, 40(r1) # 8-byte Folded Reload +; PPC32-NEXT: lwz r30, 32(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r29, 28(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r28, 24(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r27, 20(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 68(r1) +; PPC32-NEXT: addi r1, r1, 64 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_trunc32_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: xxsldwi vs0, vs34, vs34, 3 +; P8-NEXT: li r3, 48 +; P8-NEXT: std r0, 128(r1) +; P8-NEXT: std r27, 72(r1) # 8-byte Folded Spill +; P8-NEXT: std r28, 80(r1) # 8-byte Folded Spill +; P8-NEXT: std r29, 88(r1) # 8-byte Folded Spill +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill +; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill +; P8-NEXT: mr r30, r5 +; P8-NEXT: vmr v31, v2 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: xxswapd vs0, vs63 +; P8-NEXT: mr r29, r3 +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: xxsldwi vs0, vs63, vs63, 1 +; P8-NEXT: mr r28, r3 +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: xscvspdpn f1, vs63 +; P8-NEXT: mr r27, r3 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 6(r30) +; P8-NEXT: li r3, 48 +; P8-NEXT: sth r27, 4(r30) +; P8-NEXT: ld r27, 72(r1) # 8-byte Folded Reload +; P8-NEXT: sth r28, 2(r30) +; P8-NEXT: sth r29, 0(r30) +; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; P8-NEXT: ld r29, 88(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload +; P8-NEXT: ld r28, 80(r1) # 8-byte Folded Reload +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc32_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1 +; CHECK-NEXT: xscvspdpn f0, vs0 +; CHECK-NEXT: xscvspdpn f1, vs1 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: xscvspdpn f0, vs0 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: xscvdphp f1, f1 +; CHECK-NEXT: mffprwz r4, f1 +; CHECK-NEXT: xscvspdpn f1, vs34 +; CHECK-NEXT: xscvdphp f1, f1 +; CHECK-NEXT: sth r4, 4(r5) +; CHECK-NEXT: mffprwz r4, f0 +; CHECK-NEXT: sth r3, 0(r5) +; CHECK-NEXT: sth r4, 2(r5) +; CHECK-NEXT: mffprwz r6, f1 +; CHECK-NEXT: sth r6, 6(r5) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc32_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: clrldi r3, r6, 32 +; SOFT-NEXT: std r0, 96(r1) +; SOFT-NEXT: mr r30, r7 +; SOFT-NEXT: mr r29, r5 +; SOFT-NEXT: mr r28, r4 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r26, r3 +; SOFT-NEXT: clrldi r3, r29, 32 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: clrldi r3, r28, 32 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: clrldi r3, r27, 32 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: clrldi r3, r28, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: clrldi r3, r29, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: clrldi r3, r26, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 6(r30) +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 4(r30) +; SOFT-NEXT: mr r3, r28 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 2(r30) +; SOFT-NEXT: mr r3, r27 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr +; +; BE-LABEL: test_trunc32_vec4: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -176(r1) +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 192(r1) +; BE-NEXT: std r27, 136(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 144(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 152(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r5 +; BE-NEXT: stvx v2, 0, r3 +; BE-NEXT: lfs f1, 112(r1) +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: lfs f1, 116(r1) +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: lfs f1, 120(r1) +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: lfs f1, 124(r1) +; BE-NEXT: mr r27, r3 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r27, 4(r30) +; BE-NEXT: sth r28, 2(r30) +; BE-NEXT: sth r3, 6(r30) +; BE-NEXT: sth r29, 0(r30) +; BE-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 152(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 144(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 136(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 176 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %v = fptrunc <4 x float> %a to <4 x half> + store <4 x half> %v, ptr %p + ret void +} +define void @test_trunc64_vec4(<4 x double> %a, ptr %p) nounwind { +; PPC32-LABEL: test_trunc64_vec4: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -64(r1) +; PPC32-NEXT: stw r0, 68(r1) +; PPC32-NEXT: stw r27, 20(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r28, 24(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r29, 28(r1) # 4-byte Folded Spill +; PPC32-NEXT: stw r30, 32(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: stfd f29, 40(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f29, f2 +; PPC32-NEXT: stfd f30, 48(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f30, f3 +; PPC32-NEXT: stfd f31, 56(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f31, f4 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: fmr f1, f29 +; PPC32-NEXT: mr r29, r3 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: fmr f1, f30 +; PPC32-NEXT: mr r28, r3 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: fmr f1, f31 +; PPC32-NEXT: mr r27, r3 +; PPC32-NEXT: bl __truncdfhf2 +; PPC32-NEXT: sth r27, 4(r30) +; PPC32-NEXT: sth r28, 2(r30) +; PPC32-NEXT: sth r3, 6(r30) +; PPC32-NEXT: sth r29, 0(r30) +; PPC32-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f30, 48(r1) # 8-byte Folded Reload +; PPC32-NEXT: lfd f29, 40(r1) # 8-byte Folded Reload +; PPC32-NEXT: lwz r30, 32(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r29, 28(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r28, 24(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r27, 20(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 68(r1) +; PPC32-NEXT: addi r1, r1, 64 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_trunc64_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -128(r1) +; P8-NEXT: li r3, 48 +; P8-NEXT: std r0, 144(r1) +; P8-NEXT: xxswapd vs1, vs34 +; P8-NEXT: std r27, 88(r1) # 8-byte Folded Spill +; P8-NEXT: std r28, 96(r1) # 8-byte Folded Spill +; P8-NEXT: std r29, 104(r1) # 8-byte Folded Spill +; P8-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; P8-NEXT: mr r30, r7 +; P8-NEXT: stxvd2x vs62, r1, r3 # 16-byte Folded Spill +; P8-NEXT: li r3, 64 +; P8-NEXT: vmr v30, v2 +; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: xxswapd vs1, vs63 +; P8-NEXT: mr r29, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: xxlor f1, vs62, vs62 +; P8-NEXT: mr r28, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: xxlor f1, vs63, vs63 +; P8-NEXT: mr r27, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 6(r30) +; P8-NEXT: li r3, 64 +; P8-NEXT: sth r27, 2(r30) +; P8-NEXT: ld r27, 88(r1) # 8-byte Folded Reload +; P8-NEXT: sth r28, 4(r30) +; P8-NEXT: sth r29, 0(r30) +; P8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; P8-NEXT: ld r29, 104(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 48 +; P8-NEXT: ld r28, 96(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload +; P8-NEXT: addi r1, r1, 128 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc64_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: xxswapd vs0, vs35 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: xscvdphp f1, vs34 +; CHECK-NEXT: mffprwz r4, f1 +; CHECK-NEXT: xscvdphp f1, vs35 +; CHECK-NEXT: sth r3, 0(r7) +; CHECK-NEXT: sth r4, 2(r7) +; CHECK-NEXT: mffprwz r4, f0 +; CHECK-NEXT: sth r4, 4(r7) +; CHECK-NEXT: mffprwz r5, f1 +; CHECK-NEXT: sth r5, 6(r7) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc64_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: mr r3, r6 +; SOFT-NEXT: std r0, 96(r1) +; SOFT-NEXT: mr r30, r7 +; SOFT-NEXT: mr r29, r5 +; SOFT-NEXT: mr r28, r4 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r26, r3 +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: mr r3, r28 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: mr r3, r27 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: clrldi r3, r28, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: clrldi r3, r29, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: clrldi r3, r26, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 6(r30) +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 4(r30) +; SOFT-NEXT: mr r3, r28 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 2(r30) +; SOFT-NEXT: mr r3, r27 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr +; +; BE-LABEL: test_trunc64_vec4: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -176(r1) +; BE-NEXT: std r0, 192(r1) +; BE-NEXT: std r27, 112(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 120(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 128(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r7 +; BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f29, f2 +; BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f30, f3 +; BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f4 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: mr r27, r3 +; BE-NEXT: bl __truncdfhf2 +; BE-NEXT: nop +; BE-NEXT: sth r27, 4(r30) +; BE-NEXT: sth r28, 2(r30) +; BE-NEXT: sth r3, 6(r30) +; BE-NEXT: sth r29, 0(r30) +; BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 128(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 120(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 112(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 176 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %v = fptrunc <4 x double> %a to <4 x half> + store <4 x half> %v, ptr %p + ret void +} +define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind { +; PPC32-LABEL: test_sitofp_fadd_i32: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -32(r1) +; PPC32-NEXT: stw r0, 36(r1) +; PPC32-NEXT: stw r30, 16(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: lhz r3, 0(r4) +; PPC32-NEXT: stfd f31, 24(r1) # 8-byte Folded Spill +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lis r3, 17200 +; PPC32-NEXT: stw r3, 8(r1) +; PPC32-NEXT: xoris r3, r30, 32768 +; PPC32-NEXT: stw r3, 12(r1) +; PPC32-NEXT: lis r3, .LCPI23_0@ha +; PPC32-NEXT: fmr f31, f1 +; PPC32-NEXT: lfd f0, 8(r1) +; PPC32-NEXT: lfs f1, .LCPI23_0@l(r3) +; PPC32-NEXT: fsub f0, f0, f1 +; PPC32-NEXT: frsp f1, f0 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: clrlwi r3, r3, 16 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: fadds f1, f31, f1 +; PPC32-NEXT: lfd f31, 24(r1) # 8-byte Folded Reload +; PPC32-NEXT: lwz r30, 16(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 36(r1) +; PPC32-NEXT: addi r1, r1, 32 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: test_sitofp_fadd_i32: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -24(r1) # 8-byte Folded Spill +; P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -64(r1) +; P8-NEXT: std r0, 80(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: lhz r3, 0(r4) +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: mtfprwa f0, r30 +; P8-NEXT: fmr f31, f1 +; P8-NEXT: xscvsxdsp f1, f0 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: clrldi r3, r3, 48 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: xsaddsp f1, f31, f1 +; P8-NEXT: addi r1, r1, 64 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; P8-NEXT: ld r30, -24(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_sitofp_fadd_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mtfprwa f1, r3 +; CHECK-NEXT: lhz r4, 0(r4) +; CHECK-NEXT: xscvsxdsp f1, f1 +; CHECK-NEXT: mtfprwz f0, r4 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvdphp f1, f1 +; CHECK-NEXT: mffprwz r3, f1 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f1, r3 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: xsaddsp f1, f0, f1 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_sitofp_fadd_i32: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -64(r1) +; SOFT-NEXT: std r0, 80(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: lhz r3, 0(r4) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: extsw r3, r30 +; SOFT-NEXT: bl __floatsisf +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 32 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r4, r3 +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: bl __addsf3 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 64 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: test_sitofp_fadd_i32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -144(r1) +; BE-NEXT: std r0, 160(r1) +; BE-NEXT: std r30, 120(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r3 +; BE-NEXT: lhz r3, 0(r4) +; BE-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: extsw r3, r30 +; BE-NEXT: fmr f31, f1 +; BE-NEXT: std r3, 112(r1) +; BE-NEXT: lfd f0, 112(r1) +; BE-NEXT: fcfid f0, f0 +; BE-NEXT: frsp f1, f0 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: fadds f1, f31, f1 +; BE-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 120(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 144 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %tmp0 = load half, ptr %b + %tmp1 = sitofp i32 %a to half + %tmp2 = fadd half %tmp0, %tmp1 + %tmp3 = fpext half %tmp2 to float + ret float %tmp3 +} +define half @PR40273(half) nounwind { +; PPC32-LABEL: PR40273: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: clrlwi r3, r3, 16 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: lis r3, .LCPI24_0@ha +; PPC32-NEXT: lfs f0, .LCPI24_0@l(r3) +; PPC32-NEXT: li r3, 0 +; PPC32-NEXT: fcmpu cr0, f1, f0 +; PPC32-NEXT: bc 12, eq, .LBB24_2 +; PPC32-NEXT: # %bb.1: +; PPC32-NEXT: li r3, 4 +; PPC32-NEXT: .LBB24_2: +; PPC32-NEXT: li r4, .LCPI24_1@l +; PPC32-NEXT: addis r4, r4, .LCPI24_1@ha +; PPC32-NEXT: lfsx f1, r4, r3 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: PR40273: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: clrldi r3, r3, 48 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: fmr f0, f1 +; P8-NEXT: xxlxor f1, f1, f1 +; P8-NEXT: fcmpu cr0, f0, f1 +; P8-NEXT: beq cr0, .LBB24_2 +; P8-NEXT: # %bb.1: +; P8-NEXT: vspltisw v2, 1 +; P8-NEXT: xvcvsxwdp vs1, vs34 +; P8-NEXT: .LBB24_2: +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: PR40273: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: xxlxor f1, f1, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: fcmpu cr0, f0, f1 +; CHECK-NEXT: beqlr cr0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vspltisw v2, 1 +; CHECK-NEXT: xvcvsxwdp vs1, vs34 +; CHECK-NEXT: blr +; +; SOFT-LABEL: PR40273: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: std r0, 48(r1) +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: li r4, 0 +; SOFT-NEXT: bl __nesf2 +; SOFT-NEXT: nop +; SOFT-NEXT: cmplwi r3, 0 +; SOFT-NEXT: lis r3, 16256 +; SOFT-NEXT: iseleq r3, 0, r3 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: PR40273: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: addis r3, r2, .LCPI24_0@toc@ha +; BE-NEXT: lfs f0, .LCPI24_0@toc@l(r3) +; BE-NEXT: li r3, 0 +; BE-NEXT: fcmpu cr0, f1, f0 +; BE-NEXT: bc 12, eq, .LBB24_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: li r3, 4 +; BE-NEXT: .LBB24_2: +; BE-NEXT: addis r4, r2, .LCPI24_1@toc@ha +; BE-NEXT: addi r4, r4, .LCPI24_1@toc@l +; BE-NEXT: lfsx f1, r4, r3 +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %2 = fcmp une half %0, 0xH0000 + %3 = uitofp i1 %2 to half + ret half %3 +} + +; Trivial operations shouldn't need a libcall + +define half @fabs(half %x) nounwind { +; PPC32-LABEL: fabs: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: clrlwi r3, r3, 16 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: fabs f1, f1 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: fabs: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: std r0, 48(r1) +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: clrldi r3, r3, 48 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: xsabsdp f1, f1 +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: fabs: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xsabsdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: fabs: +; SOFT: # %bb.0: +; SOFT-NEXT: clrldi r3, r3, 49 +; SOFT-NEXT: blr +; +; BE-LABEL: fabs: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: fabs f1, f1 +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %a = call half @llvm.fabs.f16(half %x) + ret half %a +} + +define half @fcopysign(half %x, half %y) nounwind { +; PPC32-LABEL: fcopysign: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -32(r1) +; PPC32-NEXT: stw r0, 36(r1) +; PPC32-NEXT: stfd f31, 24(r1) # 8-byte Folded Spill +; PPC32-NEXT: fmr f31, f2 +; PPC32-NEXT: bl __truncsfhf2 +; PPC32-NEXT: clrlwi r3, r3, 16 +; PPC32-NEXT: bl __extendhfsf2 +; PPC32-NEXT: stfs f31, 20(r1) +; PPC32-NEXT: lwz r3, 20(r1) +; PPC32-NEXT: srwi r3, r3, 31 +; PPC32-NEXT: andi. r3, r3, 1 +; PPC32-NEXT: bc 12, gt, .LBB26_2 +; PPC32-NEXT: # %bb.1: +; PPC32-NEXT: fabs f1, f1 +; PPC32-NEXT: b .LBB26_3 +; PPC32-NEXT: .LBB26_2: +; PPC32-NEXT: fnabs f1, f1 +; PPC32-NEXT: .LBB26_3: +; PPC32-NEXT: lfd f31, 24(r1) # 8-byte Folded Reload +; PPC32-NEXT: lwz r0, 36(r1) +; PPC32-NEXT: addi r1, r1, 32 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; P8-LABEL: fcopysign: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: std r0, 64(r1) +; P8-NEXT: fmr f31, f2 +; P8-NEXT: bl __truncsfhf2 +; P8-NEXT: nop +; P8-NEXT: clrldi r3, r3, 48 +; P8-NEXT: bl __extendhfsf2 +; P8-NEXT: nop +; P8-NEXT: fcpsgn f1, f31, f1 +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: fcopysign: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: fcpsgn f1, f2, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: fcopysign: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: std r0, 64(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __extendhfsf2 +; SOFT-NEXT: nop +; SOFT-NEXT: rlwimi r3, r30, 16, 0, 0 +; SOFT-NEXT: clrldi r3, r3, 32 +; SOFT-NEXT: bl __truncsfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +; +; BE-LABEL: fcopysign: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -128(r1) +; BE-NEXT: std r0, 144(r1) +; BE-NEXT: stfd f31, 120(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f2 +; BE-NEXT: bl __truncsfhf2 +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __extendhfsf2 +; BE-NEXT: nop +; BE-NEXT: stfs f31, 116(r1) +; BE-NEXT: lwz r3, 116(r1) +; BE-NEXT: srwi r3, r3, 31 +; BE-NEXT: andi. r3, r3, 1 +; BE-NEXT: bc 12, gt, .LBB26_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: fabs f1, f1 +; BE-NEXT: b .LBB26_3 +; BE-NEXT: .LBB26_2: +; BE-NEXT: fnabs f1, f1 +; BE-NEXT: .LBB26_3: +; BE-NEXT: lfd f31, 120(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 128 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr + %a = call half @llvm.copysign.f16(half %x, half %y) + ret half %a +} diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll deleted file mode 100644 index 50f05cca80458..0000000000000 --- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll +++ /dev/null @@ -1,1281 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ -; RUN: --check-prefix=P8 -; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s -; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -mattr=-hard-float \ -; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ -; RUN: --check-prefix=SOFT - -; Tests for various operations on half precison float. Much of the test is -; copied from test/CodeGen/X86/half.ll. -define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr #0 { -; P8-LABEL: loadd: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 2(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: loadd: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi r3, r3, 2 -; CHECK-NEXT: lxsihzx f0, 0, r3 -; CHECK-NEXT: xscvhpdp f1, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: loadd: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 2(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -entry: - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %0 = load i16, ptr %arrayidx, align 2 - %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0) - ret double %1 -} - -declare double @llvm.convert.from.fp16.f64(i16) - -define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr #0 { -; P8-LABEL: loadf: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 2(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: loadf: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi r3, r3, 2 -; CHECK-NEXT: lxsihzx f0, 0, r3 -; CHECK-NEXT: xscvhpdp f1, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: loadf: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 2(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -entry: - %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 - %0 = load i16, ptr %arrayidx, align 2 - %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) - ret float %1 -} - -declare float @llvm.convert.from.fp16.f32(i16) - -define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr #0 { -; P8-LABEL: stored: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r3 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: stored: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: stxsihx f0, 0, r3 -; CHECK-NEXT: blr -; -; SOFT-LABEL: stored: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: mr r3, r4 -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -entry: - %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b) - store i16 %0, ptr %a, align 2 - ret void -} - -declare i16 @llvm.convert.to.fp16.f64(double) - -define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr #0 { -; P8-LABEL: storef: -; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r3 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: storef: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: stxsihx f0, 0, r3 -; CHECK-NEXT: blr -; -; SOFT-LABEL: storef: -; SOFT: # %bb.0: # %entry -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: clrldi r3, r4, 32 -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr -entry: - %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b) - store i16 %0, ptr %a, align 2 - ret void -} - -declare i16 @llvm.convert.to.fp16.f32(float) -define void @test_load_store(ptr %in, ptr %out) #0 { -; P8-LABEL: test_load_store: -; P8: # %bb.0: -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: sth r3, 0(r4) -; P8-NEXT: blr -; -; CHECK-LABEL: test_load_store: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: sth r3, 0(r4) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_load_store: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: mr r30, r4 -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %val = load half, ptr %in - store half %val, ptr %out - ret void -} -define i16 @test_bitcast_from_half(ptr %addr) #0 { -; P8-LABEL: test_bitcast_from_half: -; P8: # %bb.0: -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: blr -; -; CHECK-LABEL: test_bitcast_from_half: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_bitcast_from_half: -; SOFT: # %bb.0: -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: blr - %val = load half, ptr %addr - %val_int = bitcast half %val to i16 - ret i16 %val_int -} -define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 { -; P8-LABEL: test_bitcast_to_half: -; P8: # %bb.0: -; P8-NEXT: sth r4, 0(r3) -; P8-NEXT: blr -; -; CHECK-LABEL: test_bitcast_to_half: -; CHECK: # %bb.0: -; CHECK-NEXT: sth r4, 0(r3) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_bitcast_to_half: -; SOFT: # %bb.0: -; SOFT-NEXT: sth r4, 0(r3) -; SOFT-NEXT: blr - %val_fp = bitcast i16 %in to half - store half %val_fp, ptr %addr - ret void -} -define float @test_extend32(ptr %addr) #0 { -; P8-LABEL: test_extend32: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_extend32: -; CHECK: # %bb.0: -; CHECK-NEXT: lxsihzx f0, 0, r3 -; CHECK-NEXT: xscvhpdp f1, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_extend32: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %val16 = load half, ptr %addr - %val32 = fpext half %val16 to float - ret float %val32 -} -define double @test_extend64(ptr %addr) #0 { -; P8-LABEL: test_extend64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_extend64: -; CHECK: # %bb.0: -; CHECK-NEXT: lxsihzx f0, 0, r3 -; CHECK-NEXT: xscvhpdp f1, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_extend64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %val16 = load half, ptr %addr - %val32 = fpext half %val16 to double - ret double %val32 -} -define void @test_trunc32(float %in, ptr %addr) #0 { -; P8-LABEL: test_trunc32: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r4 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_trunc32: -; CHECK: # %bb.0: -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: stxsihx f0, 0, r4 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_trunc32: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: clrldi r3, r3, 32 -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: mr r30, r4 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %val16 = fptrunc float %in to half - store half %val16, ptr %addr - ret void -} -define void @test_trunc64(double %in, ptr %addr) #0 { -; P8-LABEL: test_trunc64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r4 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_trunc64: -; CHECK: # %bb.0: -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: stxsihx f0, 0, r4 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_trunc64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: mr r30, r4 -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %val16 = fptrunc double %in to half - store half %val16, ptr %addr - ret void -} -define i64 @test_fptosi_i64(ptr %p) #0 { -; P8-LABEL: test_fptosi_i64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: xscvdpsxds f0, f1 -; P8-NEXT: mffprd r3, f0 -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_fptosi_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: xscvdpsxds f0, f0 -; CHECK-NEXT: mffprd r3, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_fptosi_i64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __fixsfdi -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %a = load half, ptr %p, align 2 - %r = fptosi half %a to i64 - ret i64 %r -} -define void @test_sitofp_i64(i64 %a, ptr %p) #0 { -; P8-LABEL: test_sitofp_i64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: mtfprd f0, r3 -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r4 -; P8-NEXT: xscvsxdsp f1, f0 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_sitofp_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xscvsxdsp f0, f0 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: sth r3, 0(r4) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_sitofp_i64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: mr r30, r4 -; SOFT-NEXT: bl __floatdisf -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 32 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %r = sitofp i64 %a to half - store half %r, ptr %p - ret void -} -define i64 @test_fptoui_i64(ptr %p) #0 { -; P8-LABEL: test_fptoui_i64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: lhz r3, 0(r3) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: xscvdpuxds f0, f1 -; P8-NEXT: mffprd r3, f0 -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_fptoui_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: xscvdpuxds f0, f0 -; CHECK-NEXT: mffprd r3, f0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_fptoui_i64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __fixunssfdi -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %a = load half, ptr %p, align 2 - %r = fptoui half %a to i64 - ret i64 %r -} -define void @test_uitofp_i64(i64 %a, ptr %p) #0 { -; P8-LABEL: test_uitofp_i64: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -48(r1) -; P8-NEXT: mtfprd f0, r3 -; P8-NEXT: std r0, 64(r1) -; P8-NEXT: mr r30, r4 -; P8-NEXT: xscvuxdsp f1, f0 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 0(r30) -; P8-NEXT: addi r1, r1, 48 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_uitofp_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: xscvuxdsp f0, f0 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: sth r3, 0(r4) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_uitofp_i64: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -48(r1) -; SOFT-NEXT: std r0, 64(r1) -; SOFT-NEXT: mr r30, r4 -; SOFT-NEXT: bl __floatundisf -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 48 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %r = uitofp i64 %a to half - store half %r, ptr %p - ret void -} -define <4 x float> @test_extend32_vec4(ptr %p) #0 { -; P8-LABEL: test_extend32_vec4: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: li r4, 48 -; P8-NEXT: std r0, 128(r1) -; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill -; P8-NEXT: mr r30, r3 -; P8-NEXT: lhz r3, 6(r3) -; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill -; P8-NEXT: li r4, 64 -; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill -; P8-NEXT: li r4, 80 -; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 2(r30) -; P8-NEXT: xxlor vs63, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 4(r30) -; P8-NEXT: xxlor vs62, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 0(r30) -; P8-NEXT: xxlor vs61, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: li r3, 80 -; P8-NEXT: xxmrghd vs0, vs61, vs1 -; P8-NEXT: xxmrghd vs1, vs63, vs62 -; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload -; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 64 -; P8-NEXT: xvcvdpsp vs34, vs0 -; P8-NEXT: xvcvdpsp vs35, vs1 -; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 48 -; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload -; P8-NEXT: vmrgew v2, v3, v2 -; P8-NEXT: addi r1, r1, 112 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_extend32_vec4: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r4, 6(r3) -; CHECK-NEXT: mtfprwz f0, r4 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: lhz r4, 2(r3) -; CHECK-NEXT: mtfprwz f1, r4 -; CHECK-NEXT: xscvhpdp f1, f1 -; CHECK-NEXT: lhz r4, 4(r3) -; CHECK-NEXT: mtfprwz f2, r4 -; CHECK-NEXT: xscvhpdp f2, f2 -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: xxmrghd vs0, vs0, vs1 -; CHECK-NEXT: mtfprwz f3, r3 -; CHECK-NEXT: xvcvdpsp vs35, vs0 -; CHECK-NEXT: xscvhpdp f3, f3 -; CHECK-NEXT: xxmrghd vs2, vs2, vs3 -; CHECK-NEXT: xvcvdpsp vs34, vs2 -; CHECK-NEXT: vmrgew v2, v3, v2 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_extend32_vec4: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -80(r1) -; SOFT-NEXT: std r0, 96(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: lhz r3, 2(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: lhz r3, 4(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: lhz r3, 6(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r6, r3 -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: mr r4, r28 -; SOFT-NEXT: mr r5, r27 -; SOFT-NEXT: addi r1, r1, 80 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; SOFT-NEXT: blr - %a = load <4 x half>, ptr %p, align 8 - %b = fpext <4 x half> %a to <4 x float> - ret <4 x float> %b -} -define <4 x double> @test_extend64_vec4(ptr %p) #0 { -; P8-LABEL: test_extend64_vec4: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: li r4, 48 -; P8-NEXT: std r0, 128(r1) -; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill -; P8-NEXT: mr r30, r3 -; P8-NEXT: lhz r3, 6(r3) -; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill -; P8-NEXT: li r4, 64 -; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill -; P8-NEXT: li r4, 80 -; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 4(r30) -; P8-NEXT: xxlor vs63, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 2(r30) -; P8-NEXT: xxlor vs62, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: lhz r3, 0(r30) -; P8-NEXT: xxlor vs61, f1, f1 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: li r3, 80 -; P8-NEXT: xxmrghd vs35, vs63, vs62 -; P8-NEXT: xxmrghd vs34, vs61, vs1 -; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload -; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 64 -; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 48 -; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload -; P8-NEXT: addi r1, r1, 112 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_extend64_vec4: -; CHECK: # %bb.0: -; CHECK-NEXT: lhz r4, 6(r3) -; CHECK-NEXT: lhz r5, 4(r3) -; CHECK-NEXT: lhz r6, 2(r3) -; CHECK-NEXT: lhz r3, 0(r3) -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: mtfprwz f1, r6 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: xscvhpdp f1, f1 -; CHECK-NEXT: xxmrghd vs34, vs1, vs0 -; CHECK-NEXT: mtfprwz f0, r5 -; CHECK-NEXT: mtfprwz f1, r4 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: xscvhpdp f1, f1 -; CHECK-NEXT: xxmrghd vs35, vs1, vs0 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_extend64_vec4: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -80(r1) -; SOFT-NEXT: std r0, 96(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: lhz r3, 0(r3) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: lhz r3, 2(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: lhz r3, 4(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: lhz r3, 6(r30) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __extendsfdf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r6, r3 -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: mr r4, r28 -; SOFT-NEXT: mr r5, r27 -; SOFT-NEXT: addi r1, r1, 80 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; SOFT-NEXT: blr - %a = load <4 x half>, ptr %p, align 8 - %b = fpext <4 x half> %a to <4 x double> - ret <4 x double> %b -} -define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 { -; P8-LABEL: test_trunc32_vec4: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: xxsldwi vs0, vs34, vs34, 3 -; P8-NEXT: li r3, 48 -; P8-NEXT: std r0, 128(r1) -; P8-NEXT: std r27, 72(r1) # 8-byte Folded Spill -; P8-NEXT: std r28, 80(r1) # 8-byte Folded Spill -; P8-NEXT: std r29, 88(r1) # 8-byte Folded Spill -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill -; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill -; P8-NEXT: mr r30, r5 -; P8-NEXT: vmr v31, v2 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: xxswapd vs0, vs63 -; P8-NEXT: mr r29, r3 -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: xxsldwi vs0, vs63, vs63, 1 -; P8-NEXT: mr r28, r3 -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: xscvspdpn f1, vs63 -; P8-NEXT: mr r27, r3 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 6(r30) -; P8-NEXT: li r3, 48 -; P8-NEXT: sth r27, 4(r30) -; P8-NEXT: ld r27, 72(r1) # 8-byte Folded Reload -; P8-NEXT: sth r28, 2(r30) -; P8-NEXT: sth r29, 0(r30) -; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload -; P8-NEXT: ld r29, 88(r1) # 8-byte Folded Reload -; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload -; P8-NEXT: ld r28, 80(r1) # 8-byte Folded Reload -; P8-NEXT: addi r1, r1, 112 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_trunc32_vec4: -; CHECK: # %bb.0: -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: xscvspdpn f1, vs1 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: xscvdphp f1, f1 -; CHECK-NEXT: mffprwz r4, f1 -; CHECK-NEXT: xscvspdpn f1, vs34 -; CHECK-NEXT: xscvdphp f1, f1 -; CHECK-NEXT: sth r4, 4(r5) -; CHECK-NEXT: mffprwz r4, f0 -; CHECK-NEXT: sth r3, 0(r5) -; CHECK-NEXT: sth r4, 2(r5) -; CHECK-NEXT: mffprwz r6, f1 -; CHECK-NEXT: sth r6, 6(r5) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_trunc32_vec4: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -80(r1) -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: clrldi r3, r6, 32 -; SOFT-NEXT: std r0, 96(r1) -; SOFT-NEXT: mr r30, r7 -; SOFT-NEXT: mr r29, r5 -; SOFT-NEXT: mr r28, r4 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r26, r3 -; SOFT-NEXT: clrldi r3, r29, 32 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: clrldi r3, r28, 32 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: clrldi r3, r27, 32 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: clrldi r3, r28, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: clrldi r3, r29, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: clrldi r3, r26, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 6(r30) -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 4(r30) -; SOFT-NEXT: mr r3, r28 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 2(r30) -; SOFT-NEXT: mr r3, r27 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 80 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload -; SOFT-NEXT: blr - %v = fptrunc <4 x float> %a to <4 x half> - store <4 x half> %v, ptr %p - ret void -} -define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 { -; P8-LABEL: test_trunc64_vec4: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -128(r1) -; P8-NEXT: li r3, 48 -; P8-NEXT: std r0, 144(r1) -; P8-NEXT: xxswapd vs1, vs34 -; P8-NEXT: std r27, 88(r1) # 8-byte Folded Spill -; P8-NEXT: std r28, 96(r1) # 8-byte Folded Spill -; P8-NEXT: std r29, 104(r1) # 8-byte Folded Spill -; P8-NEXT: std r30, 112(r1) # 8-byte Folded Spill -; P8-NEXT: mr r30, r7 -; P8-NEXT: stxvd2x vs62, r1, r3 # 16-byte Folded Spill -; P8-NEXT: li r3, 64 -; P8-NEXT: vmr v30, v2 -; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill -; P8-NEXT: vmr v31, v3 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: xxswapd vs1, vs63 -; P8-NEXT: mr r29, r3 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: xxlor f1, vs62, vs62 -; P8-NEXT: mr r28, r3 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: xxlor f1, vs63, vs63 -; P8-NEXT: mr r27, r3 -; P8-NEXT: bl __truncdfhf2 -; P8-NEXT: nop -; P8-NEXT: sth r3, 6(r30) -; P8-NEXT: li r3, 64 -; P8-NEXT: sth r27, 2(r30) -; P8-NEXT: ld r27, 88(r1) # 8-byte Folded Reload -; P8-NEXT: sth r28, 4(r30) -; P8-NEXT: sth r29, 0(r30) -; P8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload -; P8-NEXT: ld r29, 104(r1) # 8-byte Folded Reload -; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 48 -; P8-NEXT: ld r28, 96(r1) # 8-byte Folded Reload -; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload -; P8-NEXT: addi r1, r1, 128 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_trunc64_vec4: -; CHECK: # %bb.0: -; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: xxswapd vs0, vs35 -; CHECK-NEXT: xscvdphp f0, f0 -; CHECK-NEXT: xscvdphp f1, vs34 -; CHECK-NEXT: mffprwz r4, f1 -; CHECK-NEXT: xscvdphp f1, vs35 -; CHECK-NEXT: sth r3, 0(r7) -; CHECK-NEXT: sth r4, 2(r7) -; CHECK-NEXT: mffprwz r4, f0 -; CHECK-NEXT: sth r4, 4(r7) -; CHECK-NEXT: mffprwz r5, f1 -; CHECK-NEXT: sth r5, 6(r7) -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_trunc64_vec4: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -80(r1) -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: mr r3, r6 -; SOFT-NEXT: std r0, 96(r1) -; SOFT-NEXT: mr r30, r7 -; SOFT-NEXT: mr r29, r5 -; SOFT-NEXT: mr r28, r4 -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r26, r3 -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: mr r3, r28 -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: mr r3, r27 -; SOFT-NEXT: bl __truncdfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r27, r3 -; SOFT-NEXT: clrldi r3, r28, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r28, r3 -; SOFT-NEXT: clrldi r3, r29, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: clrldi r3, r26, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 6(r30) -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 4(r30) -; SOFT-NEXT: mr r3, r28 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 2(r30) -; SOFT-NEXT: mr r3, r27 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: sth r3, 0(r30) -; SOFT-NEXT: addi r1, r1, 80 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload -; SOFT-NEXT: blr - %v = fptrunc <4 x double> %a to <4 x half> - store <4 x half> %v, ptr %p - ret void -} -define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 { -; P8-LABEL: test_sitofp_fadd_i32: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -24(r1) # 8-byte Folded Spill -; P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; P8-NEXT: stdu r1, -64(r1) -; P8-NEXT: std r0, 80(r1) -; P8-NEXT: mr r30, r3 -; P8-NEXT: lhz r3, 0(r4) -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: mtfprwa f0, r30 -; P8-NEXT: fmr f31, f1 -; P8-NEXT: xscvsxdsp f1, f0 -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: clrldi r3, r3, 48 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: xsaddsp f1, f31, f1 -; P8-NEXT: addi r1, r1, 64 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; P8-NEXT: ld r30, -24(r1) # 8-byte Folded Reload -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: test_sitofp_fadd_i32: -; CHECK: # %bb.0: -; CHECK-NEXT: mtfprwa f1, r3 -; CHECK-NEXT: lhz r4, 0(r4) -; CHECK-NEXT: xscvsxdsp f1, f1 -; CHECK-NEXT: mtfprwz f0, r4 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: xscvdphp f1, f1 -; CHECK-NEXT: mffprwz r3, f1 -; CHECK-NEXT: clrlwi r3, r3, 16 -; CHECK-NEXT: mtfprwz f1, r3 -; CHECK-NEXT: xscvhpdp f1, f1 -; CHECK-NEXT: xsaddsp f1, f0, f1 -; CHECK-NEXT: blr -; -; SOFT-LABEL: test_sitofp_fadd_i32: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; SOFT-NEXT: stdu r1, -64(r1) -; SOFT-NEXT: std r0, 80(r1) -; SOFT-NEXT: mr r30, r3 -; SOFT-NEXT: lhz r3, 0(r4) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r29, r3 -; SOFT-NEXT: extsw r3, r30 -; SOFT-NEXT: bl __floatsisf -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 32 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: mr r4, r3 -; SOFT-NEXT: mr r3, r29 -; SOFT-NEXT: bl __addsf3 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 64 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %tmp0 = load half, ptr %b - %tmp1 = sitofp i32 %a to half - %tmp2 = fadd half %tmp0, %tmp1 - %tmp3 = fpext half %tmp2 to float - ret float %tmp3 -} -define half @PR40273(half) #0 { -; P8-LABEL: PR40273: -; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -32(r1) -; P8-NEXT: std r0, 48(r1) -; P8-NEXT: bl __truncsfhf2 -; P8-NEXT: nop -; P8-NEXT: clrldi r3, r3, 48 -; P8-NEXT: bl __extendhfsf2 -; P8-NEXT: nop -; P8-NEXT: fmr f0, f1 -; P8-NEXT: xxlxor f1, f1, f1 -; P8-NEXT: fcmpu cr0, f0, f1 -; P8-NEXT: beq cr0, .LBB20_2 -; P8-NEXT: # %bb.1: -; P8-NEXT: vspltisw v2, 1 -; P8-NEXT: xvcvsxwdp vs1, vs34 -; P8-NEXT: .LBB20_2: -; P8-NEXT: addi r1, r1, 32 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 -; P8-NEXT: blr -; -; CHECK-LABEL: PR40273: -; CHECK: # %bb.0: -; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: xxlxor f1, f1, f1 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrlwi r3, r3, 16 -; CHECK-NEXT: mtfprwz f0, r3 -; CHECK-NEXT: xscvhpdp f0, f0 -; CHECK-NEXT: fcmpu cr0, f0, f1 -; CHECK-NEXT: beqlr cr0 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vspltisw v2, 1 -; CHECK-NEXT: xvcvsxwdp vs1, vs34 -; CHECK-NEXT: blr -; -; SOFT-LABEL: PR40273: -; SOFT: # %bb.0: -; SOFT-NEXT: mflr r0 -; SOFT-NEXT: stdu r1, -32(r1) -; SOFT-NEXT: clrldi r3, r3, 48 -; SOFT-NEXT: std r0, 48(r1) -; SOFT-NEXT: bl __extendhfsf2 -; SOFT-NEXT: nop -; SOFT-NEXT: li r4, 0 -; SOFT-NEXT: bl __nesf2 -; SOFT-NEXT: nop -; SOFT-NEXT: cmplwi r3, 0 -; SOFT-NEXT: lis r3, 16256 -; SOFT-NEXT: iseleq r3, 0, r3 -; SOFT-NEXT: bl __truncsfhf2 -; SOFT-NEXT: nop -; SOFT-NEXT: addi r1, r1, 32 -; SOFT-NEXT: ld r0, 16(r1) -; SOFT-NEXT: mtlr r0 -; SOFT-NEXT: blr - %2 = fcmp une half %0, 0xH0000 - %3 = uitofp i1 %2 to half - ret half %3 -} -attributes #0 = { nounwind }