Skip to content

Commit ef98e24

Browse files
committed
[SLP]Initial support for copyable elements (non-schedulable only)
Adds initial support for copyable elements. This patch only models adds and model copyable elements as add <element>, 0, i.e. uses identity constants for missing lanes. Only support for elements, which do not require scheduling, is added to reduce size of the patch. Fixed compile time regressions, reported crashes, updated release notes Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: #140279
1 parent ce535c8 commit ef98e24

11 files changed

+675
-164
lines changed

llvm/docs/ReleaseNotes.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ Changes to TableGen
6868
Changes to Interprocedural Optimizations
6969
----------------------------------------
7070

71+
Changes to Vectorizers
72+
----------------------------------------
73+
74+
* Added initial support for copyable elements in SLP, which models copyable
75+
elements as add <element>, 0, i.e. uses identity constants for missing lanes.
76+
7177
Changes to the AArch64 Backend
7278
------------------------------
7379

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 595 additions & 108 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
define void @test() {
55
; CHECK-LABEL: define void @test() {
66
; CHECK-NEXT: [[BB:.*:]]
7-
; CHECK-NEXT: [[ADD:%.*]] = add i32 1, 0
8-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[ADD]], i32 3
9-
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[TMP0]], zeroinitializer
107
; CHECK-NEXT: [[ICMP:%.*]] = icmp samesign ult i32 0, 0
118
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], i32 0, i32 0
129
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SELECT]] to i64
@@ -17,8 +14,7 @@ define void @test() {
1714
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[CALL]], i32 3
1815
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
1916
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
20-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
21-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
17+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2218
; CHECK-NEXT: ret void
2319
;
2420
bb:

llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,13 @@ define i64 @foo(i32 %tmp7) {
66
; CHECK-NEXT: bb:
77
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP8:%.*]], i32 3
88
; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP0]], <i32 0, i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 0, i32 poison>
9-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 0>, <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 14, i32 poison, i32 poison, i32 7>
10-
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i32> [[TMP2]], i32 0, i32 5
11-
; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP13]], [[TMP4]]
12-
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[TMP13]], [[TMP4]]
9+
; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 0, i32 poison, i32 0>, [[TMP4]]
10+
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 0, i32 poison, i32 0>, [[TMP4]]
1311
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7>
14-
; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]]
15-
; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer
16-
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
17-
; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 0, [[TMP10]]
12+
; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]]
13+
; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i32> [[TMP9]], zeroinitializer
14+
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP10]])
15+
; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 0, [[TMP8]]
1816
; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[OP_RDX]] to i64
1917
; CHECK-NEXT: ret i64 [[TMP64]]
2018
;

llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,10 @@ define i32 @test() {
77
; CHECK-NEXT: br label %[[FUNC_135_EXIT_I:.*]]
88
; CHECK: [[FUNC_135_EXIT_I]]:
99
; CHECK-NEXT: [[G_228_PROMOTED166_I1105_I:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
10-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
11-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
12-
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP1]]
13-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3>
14-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
15-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
16-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
17-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP9]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 23, i32 8, i32 9, i32 10, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
18-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <12 x i32> [[TMP3]], <12 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
19-
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
20-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP17]], <16 x i32> [[TMP8]], <16 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 23, i32 24, i32 25, i32 26, i32 2, i32 2, i32 2, i32 2, i32 3>
10+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison>, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
11+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 poison, i32 poison, i32 poison>
12+
; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison>, [[TMP1]]
13+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4>
2114
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP11]], zeroinitializer
2215
; CHECK-NEXT: [[TMP13:%.*]] = icmp ult <16 x i32> [[TMP11]], zeroinitializer
2316
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP12]], <16 x i1> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>

llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@
44
define i64 @test() {
55
; CHECK-LABEL: define i64 @test() {
66
; CHECK-NEXT: [[BB:.*]]:
7-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 0, i32 1
87
; CHECK-NEXT: br label %[[BB1:.*]]
98
; CHECK: [[BB1]]:
109
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB5:.*]] ]
11-
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP0]], [[TMP1]]
10+
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> zeroinitializer, [[TMP1]]
1211
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
1312
; CHECK-NEXT: [[TMP4]] = or <2 x i32> [[TMP3]], zeroinitializer
1413
; CHECK-NEXT: br label %[[BB5]]

llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,17 @@ define void @test() {
77
; CHECK-NEXT: [[ENTRY:.*:]]
88
; CHECK-NEXT: br label %[[BB1:.*]]
99
; CHECK: [[IF_THEN_I_I:.*]]:
10-
; CHECK-NEXT: br label %[[BB5:.*]]
10+
; CHECK-NEXT: br label %[[BB3:.*]]
1111
; CHECK: [[BB1]]:
1212
; CHECK-NEXT: [[TMP0:%.*]] = zext i1 false to i64
13-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
14-
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> zeroinitializer, [[TMP1]]
15-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
16-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> <i64 0, i64 0, i64 poison, i64 poison>, <4 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
17-
; CHECK-NEXT: br i1 false, label %[[BB5]], label %[[BB2:.*]]
18-
; CHECK: [[BB5]]:
19-
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i64> [ [[TMP4]], %[[BB1]] ], [ poison, %[[IF_THEN_I_I]] ]
13+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> <i64 0, i64 0, i64 poison, i64 0>, i64 [[TMP0]], i32 2
14+
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> zeroinitializer, [[TMP1]]
15+
; CHECK-NEXT: br i1 false, label %[[BB3]], label %[[BB2:.*]]
16+
; CHECK: [[BB3]]:
17+
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i64> [ [[TMP2]], %[[BB1]] ], [ poison, %[[IF_THEN_I_I]] ]
2018
; CHECK-NEXT: br label %[[BB2]]
2119
; CHECK: [[BB2]]:
22-
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i64> [ [[TMP6]], %[[BB5]] ], [ [[TMP4]], %[[BB1]] ]
20+
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i64> [ [[TMP4]], %[[BB3]] ], [ [[TMP2]], %[[BB1]] ]
2321
; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr getelementptr inbounds nuw (i8, ptr null, i64 40), align 8
2422
; CHECK-NEXT: ret void
2523
;

llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,9 @@ target triple = "x86_64-unknown-linux-gnu"
66

77
define <4 x i32> @foo(<4 x i32> %x, i32 %f) {
88
; CHECK-LABEL: @foo(
9-
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> undef, i32 [[F:%.*]], i32 0
10-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[F]], 1
11-
; CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[ADD]], i32 1
12-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[F]], i32 0
13-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
14-
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 2, i32 3>
15-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
16-
; CHECK-NEXT: [[VECINIT51:%.*]] = shufflevector <4 x i32> [[VECINIT1]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
9+
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[F:%.*]], i32 0
10+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECINIT]], <4 x i32> poison, <4 x i32> zeroinitializer
11+
; CHECK-NEXT: [[VECINIT51:%.*]] = add <4 x i32> [[TMP2]], <i32 0, i32 1, i32 2, i32 3>
1712
; CHECK-NEXT: ret <4 x i32> [[VECINIT51]]
1813
;
1914
%vecinit = insertelement <4 x i32> undef, i32 %f, i32 0

llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,6 @@ define <4 x i16> @test() {
66
; CHECK-NEXT: [[ENTRY:.*:]]
77
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> zeroinitializer, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
88
; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i16> [[TMP0]], zeroinitializer
9-
; CHECK-NEXT: [[TMP25:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer)
10-
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> poison, i16 [[TMP25]], i64 0
11-
; CHECK-NEXT: [[TMP28:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer)
12-
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP28]], i64 1
13-
; CHECK-NEXT: [[TMP31:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer)
14-
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> [[TMP29]], i16 [[TMP31]], i64 2
15-
; CHECK-NEXT: [[TMP34:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer)
16-
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i16> [[TMP32]], i16 [[TMP34]], i64 3
179
; CHECK-NEXT: [[RDX_OP:%.*]] = or <16 x i16> zeroinitializer, [[TMP1]]
1810
; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i16> [[RDX_OP]], <16 x i16> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
1911
; CHECK-NEXT: [[TMP37:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP36]])
@@ -28,8 +20,7 @@ define <4 x i16> @test() {
2820
; CHECK-NEXT: [[TMP46:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP45]])
2921
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i16> [[TMP44]], i16 [[TMP46]], i64 3
3022
; CHECK-NEXT: [[OP_RDX9:%.*]] = or <4 x i16> [[TMP47]], zeroinitializer
31-
; CHECK-NEXT: [[OP_RDX11:%.*]] = or <4 x i16> [[OP_RDX9]], [[TMP35]]
32-
; CHECK-NEXT: ret <4 x i16> [[OP_RDX11]]
23+
; CHECK-NEXT: ret <4 x i16> [[OP_RDX9]]
3324
;
3425
entry:
3526
%subi = add <4 x i16> zeroinitializer, zeroinitializer
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
@g = global [128 x i8] zeroinitializer, align 16
5+
6+
define i64 @test() {
7+
; CHECK-LABEL: define i64 @test() {
8+
; CHECK-NEXT: [[ENTRY:.*:]]
9+
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @g, align 8
10+
; CHECK-NEXT: br label %[[FUNC_154_EXIT_FUNC_146_EXIT_CRIT_EDGE_I:.*]]
11+
; CHECK: [[FUNC_154_EXIT_FUNC_146_EXIT_CRIT_EDGE_I]]:
12+
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @g, i64 80), align 16
13+
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @g, i64 88), align 8
14+
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @g, i64 32), align 16
15+
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @g, align 16
16+
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @g, i64 8), align 8
17+
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr @g, align 16
18+
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @g, i64 24), align 8
19+
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP1]], [[TMP2]]
20+
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], [[TMP3]]
21+
; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], [[TMP4]]
22+
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], [[TMP5]]
23+
; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], [[TMP6]]
24+
; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], [[TMP7]]
25+
; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], [[TMP0]]
26+
; CHECK-NEXT: ret i64 [[TMP14]]
27+
;
28+
entry:
29+
%0 = load i64, ptr @g, align 8
30+
br label %func_154.exit.func_146.exit_crit_edge.i
31+
32+
func_154.exit.func_146.exit_crit_edge.i:
33+
%1 = load i64, ptr getelementptr inbounds nuw (i8, ptr @g, i64 80), align 16
34+
%2 = load i64, ptr getelementptr inbounds nuw (i8, ptr @g, i64 88), align 8
35+
%3 = load i64, ptr getelementptr inbounds nuw (i8, ptr @g, i64 32), align 16
36+
%4 = load i64, ptr @g, align 16
37+
%5 = load i64, ptr getelementptr inbounds nuw (i8, ptr @g, i64 8), align 8
38+
%6 = load i64, ptr @g, align 16
39+
%7 = load i64, ptr getelementptr inbounds nuw (i8, ptr @g, i64 24), align 8
40+
%8 = xor i64 %1, %2
41+
%9 = xor i64 %8, %3
42+
%10 = xor i64 %9, %4
43+
%11 = xor i64 %10, %5
44+
%12 = xor i64 %11, %6
45+
%13 = xor i64 %12, %7
46+
%14 = xor i64 %13, %0
47+
ret i64 %14
48+
}

0 commit comments

Comments
 (0)