Merge remote-tracking branch 'upstream/release/13.x' into ldc-release/13.x

kinke · kinke · commit 18f343a606a0 · 2022-01-29T12:36:43.000+01:00
diff --git a/clang/test/Driver/undefined-libs.cpp b/clang/test/Driver/undefined-libs.cpp
@@ -5,7 +5,7 @@
 // STDLIB: error: invalid library name in argument '-stdlib=nostdlib'
 // STDLIB-EMPTY:
 
-// RUN: not %clangxx --target=i386-unknown-linux -rtlib=nortlib %s 2>&1 | FileCheck --check-prefix=RTLIB %s
+// RUN: not %clangxx --target=i386-unknown-linux -rtlib=nortlib --unwindlib=libgcc %s 2>&1 | FileCheck --check-prefix=RTLIB %s
 // RTLIB: error: invalid runtime library name in argument '-rtlib=nortlib'
 // RTLIB-EMPTY:
 
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -316,12 +316,11 @@ if( LLVM_ENABLE_PIC )
     # Note: GCC<10.3 has a bug on SystemZ.
     #
     # Note: Clang allows IPO for -fPIC so this optimization is less effective.
-    # Older Clang may support -fno-semantic-interposition but it used local
-    # aliases to optimize global variables, which is incompatible with copy
-    # relocations due to -fno-pic.
+    # Clang 13 has a bug related to -fsanitize-coverage
+    # -fno-semantic-interposition (https://reviews.llvm.org/D117183).
     if ((CMAKE_COMPILER_IS_GNUCXX AND
          NOT (LLVM_NATIVE_ARCH STREQUAL "SystemZ" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10.3))
-       OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 13))
+       OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 14))
       add_flag_if_supported("-fno-semantic-interposition" FNO_SEMANTIC_INTERPOSITION)
     endif()
   endif()
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -79,6 +79,10 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
 
   void Select(SDNode *N) override;
 
+  /// Return true as some complex patterns, like those that call
+  /// canExtractShiftFromMul can modify the DAG inplace.
+  bool ComplexPatternFuncMutatesDAG() const override { return true; }
+
   bool hasNoVMLxHazardUse(SDNode *N) const;
   bool isShifterOpProfitable(const SDValue &Shift,
                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44079,32 +44079,9 @@ static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG,
          "Unexpected horizontal add/sub opcode");
 
   if (!shouldUseHorizontalOp(true, DAG, Subtarget)) {
-    // For slow-hop targets, if we have a hop with a single op, see if we already
-    // have another user that we can reuse and shuffle the result.
     MVT VT = N->getSimpleValueType(0);
     SDValue LHS = N->getOperand(0);
     SDValue RHS = N->getOperand(1);
-    if (VT.is128BitVector() && LHS == RHS) {
-      for (SDNode *User : LHS->uses()) {
-        if (User != N && User->getOpcode() == N->getOpcode()) {
-          MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
-          if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) {
-            return DAG.getBitcast(
-                VT,
-                DAG.getVectorShuffle(ShufVT, SDLoc(N),
-                                     DAG.getBitcast(ShufVT, SDValue(User, 0)),
-                                     DAG.getUNDEF(ShufVT), {0, 1, 0, 1}));
-          }
-          if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) {
-            return DAG.getBitcast(
-                VT,
-                DAG.getVectorShuffle(ShufVT, SDLoc(N),
-                                     DAG.getBitcast(ShufVT, SDValue(User, 0)),
-                                     DAG.getUNDEF(ShufVT), {2, 3, 2, 3}));
-          }
-        }
-      }
-    }
 
     // HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)).
     if (LHS != RHS && LHS.getOpcode() == N->getOpcode() &&
diff --git a/llvm/test/CodeGen/ARM/shifter_operand.ll b/llvm/test/CodeGen/ARM/shifter_operand.ll
@@ -256,3 +256,108 @@ define { i32, i32 } @test_multi_use_add(i32 %base, i32 %offset) {
 
   ret { i32, i32 } %ret
 }
+
+define i32 @test_new(i32 %x, i32 %y) {
+; CHECK-ARM-LABEL: test_new:
+; CHECK-ARM:       @ %bb.0: @ %entry
+; CHECK-ARM-NEXT:    movw r2, #48047
+; CHECK-ARM-NEXT:    mul r1, r1, r2
+; CHECK-ARM-NEXT:    add r0, r0, r1, lsl #1
+; CHECK-ARM-NEXT:    bx lr
+;
+; CHECK-THUMB-LABEL: test_new:
+; CHECK-THUMB:       @ %bb.0: @ %entry
+; CHECK-THUMB-NEXT:    movw r2, #48047
+; CHECK-THUMB-NEXT:    muls r1, r2, r1
+; CHECK-THUMB-NEXT:    add.w r0, r0, r1, lsl #1
+; CHECK-THUMB-NEXT:    bx lr
+entry:
+  %mul = mul i32 %y, 96094
+  %conv = add i32 %mul, %x
+  ret i32 %conv
+}
+
+; This test was hitting issues with deleted nodes because ComplexPatternFuncMutatesDAG
+; was not defined.
+@arr_9 = external dso_local local_unnamed_addr global [15 x [25 x [18 x i8]]], align 1
+define void @test_mutateddag(i32 %b, i32 %c, i32 %d, i1 %cc) {
+; CHECK-THUMB-LABEL: test_mutateddag:
+; CHECK-THUMB:       @ %bb.0: @ %entry
+; CHECK-THUMB-NEXT:    .save {r4, lr}
+; CHECK-THUMB-NEXT:    push {r4, lr}
+; CHECK-THUMB-NEXT:    movw r12, #50608
+; CHECK-THUMB-NEXT:    movw r4, #51512
+; CHECK-THUMB-NEXT:    movt r12, #17917
+; CHECK-THUMB-NEXT:    movt r4, #52
+; CHECK-THUMB-NEXT:    mla r12, r1, r4, r12
+; CHECK-THUMB-NEXT:    mov.w r4, #450
+; CHECK-THUMB-NEXT:    lsls r3, r3, #31
+; CHECK-THUMB-NEXT:    mul lr, r0, r4
+; CHECK-THUMB-NEXT:    movw r0, #48047
+; CHECK-THUMB-NEXT:    muls r0, r1, r0
+; CHECK-THUMB-NEXT:    movw r1, :lower16:arr_9
+; CHECK-THUMB-NEXT:    movt r1, :upper16:arr_9
+; CHECK-THUMB-NEXT:    add.w r0, r2, r0, lsl #1
+; CHECK-THUMB-NEXT:    movw r2, #24420
+; CHECK-THUMB-NEXT:    movt r2, #19356
+; CHECK-THUMB-NEXT:    add.w r0, r0, r0, lsl #3
+; CHECK-THUMB-NEXT:    add.w r0, r1, r0, lsl #1
+; CHECK-THUMB-NEXT:    movw r1, #60920
+; CHECK-THUMB-NEXT:    movt r1, #64028
+; CHECK-THUMB-NEXT:    add r2, r0
+; CHECK-THUMB-NEXT:    add r1, r0
+; CHECK-THUMB-NEXT:    movs r0, #0
+; CHECK-THUMB-NEXT:    b .LBB19_2
+; CHECK-THUMB-NEXT:  .LBB19_1: @ %for.cond1.for.cond.cleanup_crit_edge
+; CHECK-THUMB-NEXT:    @ in Loop: Header=BB19_2 Depth=1
+; CHECK-THUMB-NEXT:    add r1, lr
+; CHECK-THUMB-NEXT:    add r2, lr
+; CHECK-THUMB-NEXT:  .LBB19_2: @ %for.cond
+; CHECK-THUMB-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB-NEXT:    @ Child Loop BB19_3 Depth 2
+; CHECK-THUMB-NEXT:    movs r4, #0
+; CHECK-THUMB-NEXT:  .LBB19_3: @ %for.cond2.preheader
+; CHECK-THUMB-NEXT:    @ Parent Loop BB19_2 Depth=1
+; CHECK-THUMB-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB-NEXT:    cmp r3, #0
+; CHECK-THUMB-NEXT:    str r0, [r1, r4]
+; CHECK-THUMB-NEXT:    bne .LBB19_1
+; CHECK-THUMB-NEXT:  @ %bb.4: @ %for.cond2.preheader.2
+; CHECK-THUMB-NEXT:    @ in Loop: Header=BB19_3 Depth=2
+; CHECK-THUMB-NEXT:    str r0, [r2, r4]
+; CHECK-THUMB-NEXT:    add r4, r12
+; CHECK-THUMB-NEXT:    b .LBB19_3
+entry:
+  %0 = add i32 %d, -4
+  %1 = mul i32 %c, 864846
+  %2 = add i32 %1, 1367306604
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond1.for.cond.cleanup_crit_edge, %for.cond.preheader
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %for.cond1.for.cond.cleanup_crit_edge ]
+  %3 = mul i32 %indvar, %b
+  %4 = add i32 %3, -2
+  br label %for.cond2.preheader
+
+for.cond2.preheader:                              ; preds = %for.cond2.preheader.2, %for.cond
+  %indvar24 = phi i32 [ 0, %for.cond ], [ %indvar.next25.3, %for.cond2.preheader.2 ]
+  %indvar.next25 = or i32 %indvar24, 1
+  %l5 = mul i32 %2, %indvar.next25
+  %scevgep.1 = getelementptr [15 x [25 x [18 x i8]]], [15 x [25 x [18 x i8]]]* @arr_9, i32 -217196, i32 %4, i32 %0, i32 %l5
+  %l7 = bitcast i8* %scevgep.1 to i32*
+  store i32 0, i32* %l7, align 1
+  br i1 %cc, label %for.cond1.for.cond.cleanup_crit_edge, label %for.cond2.preheader.2
+
+for.cond2.preheader.2:                            ; preds = %for.cond2.preheader
+  %indvar.next25.1 = or i32 %indvar24, 2
+  %l8 = mul i32 %2, %indvar.next25.1
+  %scevgep.2 = getelementptr [15 x [25 x [18 x i8]]], [15 x [25 x [18 x i8]]]* @arr_9, i32 -217196, i32 %4, i32 %0, i32 %l8
+  %l10 = bitcast i8* %scevgep.2 to i32*
+  store i32 0, i32* %l10, align 1
+  %indvar.next25.3 = add i32 %indvar24, 4
+  br label %for.cond2.preheader
+
+for.cond1.for.cond.cleanup_crit_edge:             ; preds = %for.cond2.preheader
+  %indvar.next = add i32 %indvar, 1
+  br label %for.cond
+}
diff --git a/llvm/test/CodeGen/X86/horizontal-shuffle-2.ll b/llvm/test/CodeGen/X86/horizontal-shuffle-2.ll
@@ -171,6 +171,25 @@ define <4 x float> @test_unpacklo_hadd_v4f32_unary(<4 x float> %0) {
   ret <4 x float> %3
 }
 
+define <8 x i16> @PR51974(<8 x i16> %a0) {
+; SSE-LABEL: PR51974:
+; SSE:       ## %bb.0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    phaddw %xmm0, %xmm1
+; SSE-NEXT:    phaddw %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    ret{{[l|q]}}
+;
+; AVX-LABEL: PR51974:
+; AVX:       ## %bb.0:
+; AVX-NEXT:    vphaddw %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vphaddw %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    ret{{[l|q]}}
+  %r0 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a0)
+  %r1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %r0, <8 x i16> %a0)
+  ret <8 x i16> %r1
+}
+
 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)
 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)
 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)