Skip to content

[NFC] [PowerPC] Auto-generate check-zero-vector.ll #151991

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

guy-david
Copy link
Contributor

@guy-david guy-david commented Aug 4, 2025

Should make it easier to update the test and highlight the diff against incoming patches, as seen in this PR: #146806.

@Himadhith

Should make it easier to update the test and highlight the diff against
incoming patches.
@llvmbot
Copy link
Member

llvmbot commented Aug 4, 2025

@llvm/pr-subscribers-backend-powerpc

Author: Guy David (guy-david)

Changes

Should make it easier to update the test and highlight the diff against incoming patches, as seen in this PR: #146806.


Patch is 29.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151991.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/PowerPC/check-zero-vector.ll (+625-73)
diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
index 59173e22edf26..e6367e65d7200 100644
--- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:    < %s | FileCheck %s --check-prefix=POWERPC_64LE
 
@@ -10,96 +11,647 @@
 define i32 @test_Greater_than(ptr %colauths, i32 signext %ncols) {
 ; This testcase is manually reduced to isolate the critical code blocks.
 ; It is designed to check for vector comparison specifically for zero vectors.
-; In the vector.body section, we are expecting a comparison instruction (vcmpequh), 
-; merge instructions (vmrghh and vmrglh) which use exactly 2 vectors. 
-; The output of the merge instruction is being used by xxland and finally 
+; In the vector.body section, we are expecting a comparison instruction (vcmpequh),
+; merge instructions (vmrghh and vmrglh) which use exactly 2 vectors.
+; The output of the merge instruction is being used by xxland and finally
 ; accumulated by vadduwm instruction.
-
 ; POWERPC_64LE-LABEL: test_Greater_than:
-; POWERPC_64LE:  .LBB0_6: # %vector.body
+; POWERPC_64LE:       # %bb.0: # %entry
+; POWERPC_64LE-NEXT:    cmpwi 4, 0
+; POWERPC_64LE-NEXT:    ble 0, .LBB0_3
+; POWERPC_64LE-NEXT:  # %bb.1: # %iter.check
+; POWERPC_64LE-NEXT:    clrldi 5, 4, 32
+; POWERPC_64LE-NEXT:    cmplwi 4, 7
+; POWERPC_64LE-NEXT:    bgt 0, .LBB0_4
+; POWERPC_64LE-NEXT:  # %bb.2:
+; POWERPC_64LE-NEXT:    li 4, 0
+; POWERPC_64LE-NEXT:    li 6, 0
+; POWERPC_64LE-NEXT:    b .LBB0_12
+; POWERPC_64LE-NEXT:  .LBB0_3:
+; POWERPC_64LE-NEXT:    li 4, 0
+; POWERPC_64LE-NEXT:    mr 3, 4
+; POWERPC_64LE-NEXT:    blr
+; POWERPC_64LE-NEXT:  .LBB0_4: # %vector.main.loop.iter.check
+; POWERPC_64LE-NEXT:    vspltisw 2, 1
+; POWERPC_64LE-NEXT:    li 7, 0
+; POWERPC_64LE-NEXT:    li 6, 0
+; POWERPC_64LE-NEXT:    cmplwi 4, 64
+; POWERPC_64LE-NEXT:    li 4, 0
+; POWERPC_64LE-NEXT:    blt 0, .LBB0_9
+; POWERPC_64LE-NEXT:  # %bb.5: # %vector.ph
+; POWERPC_64LE-NEXT:    rlwinm 6, 5, 0, 1, 25
+; POWERPC_64LE-NEXT:    addi 4, 3, 64
+; POWERPC_64LE-NEXT:    xxlxor 35, 35, 35
+; POWERPC_64LE-NEXT:    addi 8, 6, -64
+; POWERPC_64LE-NEXT:    xxlxor 36, 36, 36
+; POWERPC_64LE-NEXT:    rldicl 8, 8, 58, 6
+; POWERPC_64LE-NEXT:    xxlxor 37, 37, 37
+; POWERPC_64LE-NEXT:    xxlxor 32, 32, 32
+; POWERPC_64LE-NEXT:    xxlxor 39, 39, 39
+; POWERPC_64LE-NEXT:    xxlxor 33, 33, 33
+; POWERPC_64LE-NEXT:    xxlxor 42, 42, 42
+; POWERPC_64LE-NEXT:    xxlxor 40, 40, 40
+; POWERPC_64LE-NEXT:    addi 8, 8, 1
+; POWERPC_64LE-NEXT:    xxlxor 41, 41, 41
+; POWERPC_64LE-NEXT:    mtctr 8
+; POWERPC_64LE-NEXT:    xxlxor 38, 38, 38
+; POWERPC_64LE-NEXT:    xxlxor 46, 46, 46
+; POWERPC_64LE-NEXT:    xxlxor 43, 43, 43
+; POWERPC_64LE-NEXT:    xxlxor 44, 44, 44
+; POWERPC_64LE-NEXT:    xxlxor 45, 45, 45
+; POWERPC_64LE-NEXT:    xxlxor 49, 49, 49
+; POWERPC_64LE-NEXT:    xxlxor 47, 47, 47
+; POWERPC_64LE-NEXT:    xxlxor 48, 48, 48
+; POWERPC_64LE-NEXT:    .p2align 4
+; POWERPC_64LE-NEXT:  .LBB0_6: # %vector.body
 ; POWERPC_64LE-NEXT:    #
-; POWERPC_64LE-NEXT:    lxv [[R1:[0-9]+]], -64(4)
-; POWERPC_64LE-NEXT:    vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]]
-; POWERPC_64LE-NEXT:    xxlnor [[R1]], [[R1]], [[R1]]
-; POWERPC_64LE-NEXT:    vmrghh [[R4:[0-9]+]], [[R2]], [[R2]]
-; POWERPC_64LE-NEXT:    vmrglh [[R2]], [[R2]], [[R2]]
-; POWERPC_64LE-NEXT:    xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]]
-; POWERPC_64LE-NEXT:    xxland [[R1]], [[R1]], [[R6]]
-; POWERPC_64LE-NEXT:    vadduwm [[R7:[0-9]+]], [[R7]], [[R4]]
-; POWERPC_64LE:  .LBB0_10: # %vec.epilog.vector.body
+; POWERPC_64LE-NEXT:    lxv 50, -64(4)
+; POWERPC_64LE-NEXT:    vcmpequh 18, 18, 3
+; POWERPC_64LE-NEXT:    xxlnor 50, 50, 50
+; POWERPC_64LE-NEXT:    vmrghh 19, 18, 18
+; POWERPC_64LE-NEXT:    vmrglh 18, 18, 18
+; POWERPC_64LE-NEXT:    xxland 51, 51, 34
+; POWERPC_64LE-NEXT:    xxland 50, 50, 34
+; POWERPC_64LE-NEXT:    vadduwm 5, 5, 19
+; POWERPC_64LE-NEXT:    lxv 51, -48(4)
+; POWERPC_64LE-NEXT:    vadduwm 4, 4, 18
+; POWERPC_64LE-NEXT:    vcmpequh 19, 19, 3
+; POWERPC_64LE-NEXT:    xxlnor 51, 51, 51
+; POWERPC_64LE-NEXT:    vmrghh 18, 19, 19
+; POWERPC_64LE-NEXT:    vmrglh 19, 19, 19
+; POWERPC_64LE-NEXT:    xxland 50, 50, 34
+; POWERPC_64LE-NEXT:    xxland 51, 51, 34
+; POWERPC_64LE-NEXT:    vadduwm 7, 7, 18
+; POWERPC_64LE-NEXT:    lxv 50, -32(4)
+; POWERPC_64LE-NEXT:    vadduwm 0, 0, 19
+; POWERPC_64LE-NEXT:    vcmpequh 18, 18, 3
+; POWERPC_64LE-NEXT:    xxlnor 50, 50, 50
+; POWERPC_64LE-NEXT:    vmrghh 19, 18, 18
+; POWERPC_64LE-NEXT:    vmrglh 18, 18, 18
+; POWERPC_64LE-NEXT:    xxland 51, 51, 34
+; POWERPC_64LE-NEXT:    xxland 50, 50, 34
+; POWERPC_64LE-NEXT:    vadduwm 10, 10, 19
+; POWERPC_64LE-NEXT:    lxv 51, -16(4)
+; POWERPC_64LE-NEXT:    vadduwm 1, 1, 18
+; POWERPC_64LE-NEXT:    vcmpequh 19, 19, 3
+; POWERPC_64LE-NEXT:    xxlnor 51, 51, 51
+; POWERPC_64LE-NEXT:    vmrghh 18, 19, 19
+; POWERPC_64LE-NEXT:    vmrglh 19, 19, 19
+; POWERPC_64LE-NEXT:    xxland 50, 50, 34
+; POWERPC_64LE-NEXT:    xxland 51, 51, 34
+; POWERPC_64LE-NEXT:    vadduwm 9, 9, 18
+; POWERPC_64LE-NEXT:    lxv 50, 0(4)
+; POWERPC_64LE-NEXT:    vadduwm 8, 8, 19
+; POWERPC_64LE-NEXT:    vcmpequh 18, 18, 3
+; POWERPC_64LE-NEXT:    xxlnor 50, 50, 50
+; POWERPC_64LE-NEXT:    vmrghh 19, 18, 18
+; POWERPC_64LE-NEXT:    vmrglh 18, 18, 18
+; POWERPC_64LE-NEXT:    xxland 51, 51, 34
+; POWERPC_64LE-NEXT:    xxland 50, 50, 34
+; POWERPC_64LE-NEXT:    vadduwm 14, 14, 19
+; POWERPC_64LE-NEXT:    lxv 51, 16(4)
+; POWERPC_64LE-NEXT:    vadduwm 6, 6, 18
+; POWERPC_64LE-NEXT:    vcmpequh 19, 19, 3
+; POWERPC_64LE-NEXT:    xxlnor 51, 51, 51
+; POWERPC_64LE-NEXT:    vmrghh 18, 19, 19
+; POWERPC_64LE-NEXT:    vmrglh 19, 19, 19
+; POWERPC_64LE-NEXT:    xxland 50, 50, 34
+; POWERPC_64LE-NEXT:    xxland 51, 51, 34
+; POWERPC_64LE-NEXT:    vadduwm 12, 12, 18
+; POWERPC_64LE-NEXT:    lxv 50, 32(4)
+; POWERPC_64LE-NEXT:    vadduwm 11, 11, 19
+; POWERPC_64LE-NEXT:    vcmpequh 18, 18, 3
+; POWERPC_64LE-NEXT:    xxlnor 50, 50, 50
+; POWERPC_64LE-NEXT:    vmrghh 19, 18, 18
+; POWERPC_64LE-NEXT:    vmrglh 18, 18, 18
+; POWERPC_64LE-NEXT:    xxland 51, 51, 34
+; POWERPC_64LE-NEXT:    xxland 50, 50, 34
+; POWERPC_64LE-NEXT:    vadduwm 17, 17, 19
+; POWERPC_64LE-NEXT:    lxv 51, 48(4)
+; POWERPC_64LE-NEXT:    addi 4, 4, 128
+; POWERPC_64LE-NEXT:    vadduwm 13, 13, 18
+; POWERPC_64LE-NEXT:    vcmpequh 19, 19, 3
+; POWERPC_64LE-NEXT:    xxlnor 51, 51, 51
+; POWERPC_64LE-NEXT:    vmrghh 18, 19, 19
+; POWERPC_64LE-NEXT:    xxland 50, 50, 34
+; POWERPC_64LE-NEXT:    vadduwm 16, 16, 18
+; POWERPC_64LE-NEXT:    vmrglh 18, 19, 19
+; POWERPC_64LE-NEXT:    xxland 50, 50, 34
+; POWERPC_64LE-NEXT:    vadduwm 15, 15, 18
+; POWERPC_64LE-NEXT:    bdnz .LBB0_6
+; POWERPC_64LE-NEXT:  # %bb.7: # %middle.block
+; POWERPC_64LE-NEXT:    vadduwm 3, 0, 4
+; POWERPC_64LE-NEXT:    vadduwm 4, 7, 5
+; POWERPC_64LE-NEXT:    li 4, 0
+; POWERPC_64LE-NEXT:    cmpld 6, 5
+; POWERPC_64LE-NEXT:    vadduwm 4, 10, 4
+; POWERPC_64LE-NEXT:    vadduwm 3, 1, 3
+; POWERPC_64LE-NEXT:    vadduwm 3, 8, 3
+; POWERPC_64LE-NEXT:    vadduwm 4, 9, 4
+; POWERPC_64LE-NEXT:    vadduwm 4, 14, 4
+; POWERPC_64LE-NEXT:    vadduwm 3, 6, 3
+; POWERPC_64LE-NEXT:    vadduwm 3, 11, 3
+; POWERPC_64LE-NEXT:    vadduwm 4, 12, 4
+; POWERPC_64LE-NEXT:    vadduwm 4, 17, 4
+; POWERPC_64LE-NEXT:    vadduwm 3, 13, 3
+; POWERPC_64LE-NEXT:    vadduwm 3, 15, 3
+; POWERPC_64LE-NEXT:    vadduwm 4, 16, 4
+; POWERPC_64LE-NEXT:    vadduwm 3, 3, 4
+; POWERPC_64LE-NEXT:    xxswapd 36, 35
+; POWERPC_64LE-NEXT:    vadduwm 3, 3, 4
+; POWERPC_64LE-NEXT:    xxspltw 36, 35, 2
+; POWERPC_64LE-NEXT:    vadduwm 3, 3, 4
+; POWERPC_64LE-NEXT:    vextuwrx 4, 4, 3
+; POWERPC_64LE-NEXT:    beq 0, .LBB0_14
+; POWERPC_64LE-NEXT:  # %bb.8: # %vec.epilog.iter.check
+; POWERPC_64LE-NEXT:    andi. 8, 5, 56
+; POWERPC_64LE-NEXT:    beq 0, .LBB0_12
+; POWERPC_64LE-NEXT:  .LBB0_9: # %vec.epilog.ph
+; POWERPC_64LE-NEXT:    mr 8, 6
+; POWERPC_64LE-NEXT:    rlwinm 6, 5, 0, 1, 28
+; POWERPC_64LE-NEXT:    rldimi 4, 7, 32, 0
+; POWERPC_64LE-NEXT:    xxlxor 36, 36, 36
+; POWERPC_64LE-NEXT:    xxlxor 37, 37, 37
+; POWERPC_64LE-NEXT:    rldimi 7, 7, 32, 0
+; POWERPC_64LE-NEXT:    mtvsrdd 35, 7, 4
+; POWERPC_64LE-NEXT:    sub 7, 6, 8
+; POWERPC_64LE-NEXT:    sldi 4, 8, 1
+; POWERPC_64LE-NEXT:    addi 7, 7, -8
+; POWERPC_64LE-NEXT:    add 4, 3, 4
+; POWERPC_64LE-NEXT:    rldicl 7, 7, 61, 3
+; POWERPC_64LE-NEXT:    addi 7, 7, 1
+; POWERPC_64LE-NEXT:    mtctr 7
+; POWERPC_64LE-NEXT:    .p2align 4
+; POWERPC_64LE-NEXT:  .LBB0_10: # %vec.epilog.vector.body
 ; POWERPC_64LE-NEXT:    #
-; POWERPC_64LE-NEXT:    lxv [[R8:[0-9]+]], 0(4)
+; POWERPC_64LE-NEXT:    lxv 32, 0(4)
 ; POWERPC_64LE-NEXT:    addi 4, 4, 16
-; POWERPC_64LE-NEXT:    vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]]
-; POWERPC_64LE-NEXT:    xxlnor [[R8]], [[R8]], [[R8]]
-; POWERPC_64LE-NEXT:    vmrglh [[R11:[0-9]+]], [[R9]], [[R9]]
-; POWERPC_64LE-NEXT:    vmrghh [[R9]], [[R9]], [[R9]]
-; POWERPC_64LE-NEXT:    xxland [[R12:[0-9]+]], [[R12]], [[R6]]
-; POWERPC_64LE-NEXT:    xxland [[R8]], [[R8]], [[R6]]
-; POWERPC_64LE-NEXT:    vadduwm [[R7]], [[R7]], [[R9]]
-; POWERPC_64LE-NEXT:    vadduwm [[R3]], [[R3]], [[R11]]
+; POWERPC_64LE-NEXT:    vcmpequh 0, 0, 4
+; POWERPC_64LE-NEXT:    xxlnor 32, 32, 32
+; POWERPC_64LE-NEXT:    vmrglh 1, 0, 0
+; POWERPC_64LE-NEXT:    vmrghh 0, 0, 0
+; POWERPC_64LE-NEXT:    xxland 33, 33, 34
+; POWERPC_64LE-NEXT:    xxland 32, 32, 34
+; POWERPC_64LE-NEXT:    vadduwm 5, 5, 0
+; POWERPC_64LE-NEXT:    vadduwm 3, 3, 1
 ; POWERPC_64LE-NEXT:    bdnz .LBB0_10
-; POWERPC_64LE:    blr
+; POWERPC_64LE-NEXT:  # %bb.11: # %vec.epilog.middle.block
+; POWERPC_64LE-NEXT:    vadduwm 2, 3, 5
+; POWERPC_64LE-NEXT:    li 4, 0
+; POWERPC_64LE-NEXT:    cmpld 6, 5
+; POWERPC_64LE-NEXT:    xxswapd 35, 34
+; POWERPC_64LE-NEXT:    vadduwm 2, 2, 3
+; POWERPC_64LE-NEXT:    xxspltw 35, 34, 2
+; POWERPC_64LE-NEXT:    vadduwm 2, 2, 3
+; POWERPC_64LE-NEXT:    vextuwrx 4, 4, 2
+; POWERPC_64LE-NEXT:    beq 0, .LBB0_14
+; POWERPC_64LE-NEXT:  .LBB0_12: # %for.body.preheader
+; POWERPC_64LE-NEXT:    sldi 7, 6, 1
+; POWERPC_64LE-NEXT:    sub 5, 5, 6
+; POWERPC_64LE-NEXT:    add 3, 7, 3
+; POWERPC_64LE-NEXT:    mtctr 5
+; POWERPC_64LE-NEXT:    addi 3, 3, -2
+; POWERPC_64LE-NEXT:    .p2align 5
+; POWERPC_64LE-NEXT:  .LBB0_13: # %for.body
+; POWERPC_64LE-NEXT:    #
+; POWERPC_64LE-NEXT:    lhzu 5, 2(3)
+; POWERPC_64LE-NEXT:    cntlzw 5, 5
+; POWERPC_64LE-NEXT:    srwi 5, 5, 5
+; POWERPC_64LE-NEXT:    xori 5, 5, 1
+; POWERPC_64LE-NEXT:    add 4, 4, 5
+; POWERPC_64LE-NEXT:    bdnz .LBB0_13
+; POWERPC_64LE-NEXT:  .LBB0_14: # %for.cond.cleanup
+; POWERPC_64LE-NEXT:    mr 3, 4
+; POWERPC_64LE-NEXT:    blr
 ;
 ; POWERPC_64-LABEL: test_Greater_than:
-; POWERPC_64:  L..BB0_6: # %vector.body
+; POWERPC_64:       # %bb.0: # %entry
+; POWERPC_64-NEXT:    cmpwi 4, 1
+; POWERPC_64-NEXT:    blt 0, L..BB0_3
+; POWERPC_64-NEXT:  # %bb.1: # %iter.check
+; POWERPC_64-NEXT:    clrldi 5, 4, 32
+; POWERPC_64-NEXT:    cmplwi 4, 7
+; POWERPC_64-NEXT:    bgt 0, L..BB0_4
+; POWERPC_64-NEXT:  # %bb.2:
+; POWERPC_64-NEXT:    li 4, 0
+; POWERPC_64-NEXT:    li 6, 0
+; POWERPC_64-NEXT:    b L..BB0_12
+; POWERPC_64-NEXT:  L..BB0_3:
+; POWERPC_64-NEXT:    li 4, 0
+; POWERPC_64-NEXT:    mr 3, 4
+; POWERPC_64-NEXT:    blr
+; POWERPC_64-NEXT:  L..BB0_4: # %vector.main.loop.iter.check
+; POWERPC_64-NEXT:    vspltisw 2, 1
+; POWERPC_64-NEXT:    li 7, 0
+; POWERPC_64-NEXT:    li 6, 0
+; POWERPC_64-NEXT:    cmplwi 4, 64
+; POWERPC_64-NEXT:    li 4, 0
+; POWERPC_64-NEXT:    blt 0, L..BB0_9
+; POWERPC_64-NEXT:  # %bb.5: # %vector.ph
+; POWERPC_64-NEXT:    rlwinm 6, 5, 0, 1, 25
+; POWERPC_64-NEXT:    addi 4, 3, 64
+; POWERPC_64-NEXT:    xxlxor 35, 35, 35
+; POWERPC_64-NEXT:    addi 8, 6, -64
+; POWERPC_64-NEXT:    xxlxor 36, 36, 36
+; POWERPC_64-NEXT:    rldicl 8, 8, 58, 6
+; POWERPC_64-NEXT:    xxlxor 37, 37, 37
+; POWERPC_64-NEXT:    xxlxor 32, 32, 32
+; POWERPC_64-NEXT:    xxlxor 39, 39, 39
+; POWERPC_64-NEXT:    xxlxor 33, 33, 33
+; POWERPC_64-NEXT:    xxlxor 42, 42, 42
+; POWERPC_64-NEXT:    xxlxor 40, 40, 40
+; POWERPC_64-NEXT:    addi 8, 8, 1
+; POWERPC_64-NEXT:    xxlxor 41, 41, 41
+; POWERPC_64-NEXT:    mtctr 8
+; POWERPC_64-NEXT:    xxlxor 38, 38, 38
+; POWERPC_64-NEXT:    xxlxor 46, 46, 46
+; POWERPC_64-NEXT:    xxlxor 43, 43, 43
+; POWERPC_64-NEXT:    xxlxor 44, 44, 44
+; POWERPC_64-NEXT:    xxlxor 45, 45, 45
+; POWERPC_64-NEXT:    xxlxor 49, 49, 49
+; POWERPC_64-NEXT:    xxlxor 47, 47, 47
+; POWERPC_64-NEXT:    xxlxor 48, 48, 48
+; POWERPC_64-NEXT:    .align 4
+; POWERPC_64-NEXT:  L..BB0_6: # %vector.body
 ; POWERPC_64-NEXT:    #
-; POWERPC_64-NEXT:    lxv [[R1:[0-9]+]], -64(4)
-; POWERPC_64-NEXT:    vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]]
-; POWERPC_64-NEXT:    xxlnor [[R1]], [[R1]], [[R1]]
-; POWERPC_64-NEXT:    vmrglh [[R4:[0-9]+]], [[R2]], [[R2]]
-; POWERPC_64-NEXT:    vmrghh [[R2]], [[R2]], [[R2]]
-; POWERPC_64-NEXT:    xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]]
-; POWERPC_64-NEXT:    xxland [[R1]], [[R1]], [[R6]]
-; POWERPC_64-NEXT:    vadduwm [[R7:[0-9]+]], [[R7]], [[R4]]
-; POWERPC_64:  L..BB0_10: # %vec.epilog.vector.body
+; POWERPC_64-NEXT:    lxv 50, -64(4)
+; POWERPC_64-NEXT:    vcmpequh 18, 18, 3
+; POWERPC_64-NEXT:    xxlnor 50, 50, 50
+; POWERPC_64-NEXT:    vmrglh 19, 18, 18
+; POWERPC_64-NEXT:    vmrghh 18, 18, 18
+; POWERPC_64-NEXT:    xxland 51, 51, 34
+; POWERPC_64-NEXT:    xxland 50, 50, 34
+; POWERPC_64-NEXT:    vadduwm 5, 5, 19
+; POWERPC_64-NEXT:    lxv 51, -48(4)
+; POWERPC_64-NEXT:    vadduwm 4, 4, 18
+; POWERPC_64-NEXT:    vcmpequh 19, 19, 3
+; POWERPC_64-NEXT:    xxlnor 51, 51, 51
+; POWERPC_64-NEXT:    vmrglh 18, 19, 19
+; POWERPC_64-NEXT:    vmrghh 19, 19, 19
+; POWERPC_64-NEXT:    xxland 50, 50, 34
+; POWERPC_64-NEXT:    xxland 51, 51, 34
+; POWERPC_64-NEXT:    vadduwm 7, 7, 18
+; POWERPC_64-NEXT:    lxv 50, -32(4)
+; POWERPC_64-NEXT:    vadduwm 0, 0, 19
+; POWERPC_64-NEXT:    vcmpequh 18, 18, 3
+; POWERPC_64-NEXT:    xxlnor 50, 50, 50
+; POWERPC_64-NEXT:    vmrglh 19, 18, 18
+; POWERPC_64-NEXT:    vmrghh 18, 18, 18
+; POWERPC_64-NEXT:    xxland 51, 51, 34
+; POWERPC_64-NEXT:    xxland 50, 50, 34
+; POWERPC_64-NEXT:    vadduwm 10, 10, 19
+; POWERPC_64-NEXT:    lxv 51, -16(4)
+; POWERPC_64-NEXT:    vadduwm 1, 1, 18
+; POWERPC_64-NEXT:    vcmpequh 19, 19, 3
+; POWERPC_64-NEXT:    xxlnor 51, 51, 51
+; POWERPC_64-NEXT:    vmrglh 18, 19, 19
+; POWERPC_64-NEXT:    vmrghh 19, 19, 19
+; POWERPC_64-NEXT:    xxland 50, 50, 34
+; POWERPC_64-NEXT:    xxland 51, 51, 34
+; POWERPC_64-NEXT:    vadduwm 9, 9, 18
+; POWERPC_64-NEXT:    lxv 50, 0(4)
+; POWERPC_64-NEXT:    vadduwm 8, 8, 19
+; POWERPC_64-NEXT:    vcmpequh 18, 18, 3
+; POWERPC_64-NEXT:    xxlnor 50, 50, 50
+; POWERPC_64-NEXT:    vmrglh 19, 18, 18
+; POWERPC_64-NEXT:    vmrghh 18, 18, 18
+; POWERPC_64-NEXT:    xxland 51, 51, 34
+; POWERPC_64-NEXT:    xxland 50, 50, 34
+; POWERPC_64-NEXT:    vadduwm 14, 14, 19
+; POWERPC_64-NEXT:    lxv 51, 16(4)
+; POWERPC_64-NEXT:    vadduwm 6, 6, 18
+; POWERPC_64-NEXT:    vcmpequh 19, 19, 3
+; POWERPC_64-NEXT:    xxlnor 51, 51, 51
+; POWERPC_64-NEXT:    vmrglh 18, 19, 19
+; POWERPC_64-NEXT:    vmrghh 19, 19, 19
+; POWERPC_64-NEXT:    xxland 50, 50, 34
+; POWERPC_64-NEXT:    xxland 51, 51, 34
+; POWERPC_64-NEXT:    vadduwm 12, 12, 18
+; POWERPC_64-NEXT:    lxv 50, 32(4)
+; POWERPC_64-NEXT:    vadduwm 11, 11, 19
+; POWERPC_64-NEXT:    vcmpequh 18, 18, 3
+; POWERPC_64-NEXT:    xxlnor 50, 50, 50
+; POWERPC_64-NEXT:    vmrglh 19, 18, 18
+; POWERPC_64-NEXT:    vmrghh 18, 18, 18
+; POWERPC_64-NEXT:    xxland 51, 51, 34
+; POWERPC_64-NEXT:    xxland 50, 50, 34
+; POWERPC_64-NEXT:    vadduwm 17, 17, 19
+; POWERPC_64-NEXT:    lxv 51, 48(4)
+; POWERPC_64-NEXT:    addi 4, 4, 128
+; POWERPC_64-NEXT:    vadduwm 13, 13, 18
+; POWERPC_64-NEXT:    vcmpequh 19, 19, 3
+; POWERPC_64-NEXT:    xxlnor 51, 51, 51
+; POWERPC_64-NEXT:    vmrglh 18, 19, 19
+; POWERPC_64-NEXT:    xxland 50, 50, 34
+; POWERPC_64-NEXT:    vadduwm 16, 16, 18
+; POWERPC_64-NEXT:    vmrghh 18, 19, 19
+; POWERPC_64-NEXT:    xxland 50, 50, 34
+; POWERPC_64-NEXT:    vadduwm 15, 15, 18
+; POWERPC_64-NEXT:    bdnz L..BB0_6
+; POWERPC_64-NEXT:  # %bb.7: # %middle.block
+; POWERPC_64-NEXT:    vadduwm 3, 0, 4
+; POWERPC_64-NEXT:    vadduwm 4, 7, 5
+; POWERPC_64-NEXT:    li 4, 0
+; POWERPC_64-NEXT:    cmpld 6, 5
+; POWERPC_64-NEXT:    vadduwm 4, 10, 4
+; POWERPC_64-NEXT:    vadduwm 3, 1, 3
+; POWERPC_64-NEXT:    vadduwm 3, 8, 3
+; POWERPC_64-NEXT:    vadduwm 4, 9, 4
+; POWERPC_64-NEXT:    vadduwm 4, 14, 4
+; POWERPC_64-NEXT:    vadduwm 3, 6, 3
+; POWERPC_64-NEXT:    vadduwm 3, 11, 3
+; POWERPC_64-NEXT:    vadduwm 4, 12, 4
+; POWERPC_64-NEXT:    vadduwm 4, 17, 4
+; POWERPC_64-NEXT:    vadduwm 3, 13, 3
+; POWERPC_64-NEXT:    vadduwm 3, 15, 3
+; POWERPC_64-NEXT:    vadduwm 4, 16, 4
+; POWERPC_64-NEXT:    vadduwm 3, 3, 4
+; POWERPC_64-NEXT:    xxswapd 36, 35
+; POWERPC_64-NEXT:    vadduwm 3, 3, 4
+; POWERPC_64-NEXT:    xxspltw 36, 35, 1
+; POWERPC_64-NEXT:    vadduwm 3, 3, 4
+; POWERPC_64-NEXT:    vextuwlx 4, 4, 3
+; POWERPC_64-NEXT:    beq 0, L..BB0_14
+; POWERPC_64-NEXT:  # %bb.8: # %vec.epilog.iter.check
+; POWERPC_64-NEXT:    andi. 8, 5, 56
+; POWERPC_64-NEXT:    beq 0, L..BB0_12
+; POWERPC_64-NEXT:  L..BB0_9: # %vec.epilog.ph
+; POWERPC_64-NEXT:    li 9, 0
+; POWERPC_64-NEXT:    mr 8, 6
+; POWERPC_64-NEXT:    rlwinm 6, 5, 0, 1, 28
+; POWERPC_64-NEXT:    rldimi 7, 7, 32, 0
+; POWERPC_64-NEXT:    xxlxor 36, 36, 36
+; POWERPC_64-NEXT:    rldimi 9, 4, 32, 0
+; POWERPC_64-NEXT:    sldi 4, 8, 1
+; POWERPC_64-NEXT:    xxlxor 37, 37, 37
+; POWERPC_64-NEXT:    mtvsrdd 35, 9, 7
+; POWERPC_64-NEXT:    sub 7, 6, 8
+; POWERPC_64-NEXT:    add 4, 3, 4
+; POWERPC_64-NEXT:    addi 7, 7, -8
+; POWERPC_64-NEXT:    rldicl 7, 7, 61, 3
+; POWERPC_64-NEXT:    addi 7, 7, 1
+; POWERPC_64-NEXT:    mtctr 7
+; POWERPC_64-NEXT:    .align 4
+; POWERPC_64-NEXT:  L..BB0_10: # %vec.epilog.vector.body
 ; POWERPC_64-NEXT:    #
-; POWERPC_64-NEXT:    lxv [[R8:[0-9]+]], 0(4)
+; POWERPC_64-NEXT:    lxv 32, 0(4)
 ; POWERPC_64-NEXT:    addi 4, 4, 16
-; POWERPC_64-NEXT:    vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]]
-; POWERPC_64-NEXT:    xxlnor [[R8]], [[R8]], [[R8]]
-; POWERPC_64-NEXT:    vmrghh [[R11:[0-9]+]], [[R9]], [[R9]]
-; POWERPC_64-NEXT:    vmrglh [[R9]], [[R9]], [[R9]]
-; POWERPC_64-NEXT:    xxland [[R12:[0-9]+]], [[R12]], [[R6]]
-; POWERPC_64-NEXT:    xxland [[R8]], [[R8]], [[R6]]
-; POWERPC_64-NEXT:    vadduwm [[R7]], [[R7]], [[R9]]
-; POWERPC_64-NEXT:    vadduwm [[R3]], [[R3]], [[R11]]
+; POWERPC_64-NEXT:    vcmpequh 0, 0, 4
+; POWERPC_64-NEXT:    xxlnor 32, 32, 32
+; POWERPC_64-NEXT:    vmrghh 1, 0, 0
+; POWERPC_64-NEXT:    vmrglh 0, 0, 0
+; POWERPC_64-NEXT:    xxland 33, 33, 34
+; POWERPC_64-NEXT:    xxland 32, 32, 34
+; POWERPC_64-NEXT:    vadduwm 5, 5, 0
+; POWERPC_64-NEXT:    vadduwm 3, 3, 1
 ; POWERPC_64-NEXT:    bdnz L..BB0_10
-; POWERPC_64:    blr
+; POWERPC_64-NEXT:  # %bb.11: # %vec.epilog.middle.block
+; POWERPC_64-NEXT:    vadduwm 2, 3, 5
+; POWERPC_64-NEXT:    li 4, 0
+; POWERPC_64-NEXT:    cmpld 6, 5
+; POWERPC_64-NEXT:    xxswapd 35, 34
+; POWERPC_64-NEXT:    vadduwm 2, 2, 3
+; POWERPC_64-NEXT:    xxspltw 35, 34, 1
+; POWERPC_64-NEXT:    vadduwm 2, 2, 3
+; POWERPC_64-NEXT:    vextuwlx 4, 4, 2
+; POWERPC_64-NEXT:    beq 0, L..BB0_14
+; POWERPC_64-NEXT:  L..BB0_12: # %for.body.preheader
+; POWERPC_64-NEXT:    sldi 7, 6, 1
+; POWERPC_64-NEXT:    sub 5, 5, 6
+; POWERPC_64-NEXT:    add 3, 7, 3
+; POWERPC_64-NEXT:    mtctr 5
+; POWERPC_64-NEXT:    addi 3, 3, -2
+; POWERPC_64-NEXT:    .align 5
+; POWERPC_64-NEXT:  L..BB0_13: # %for.body
+; POWERPC_64-NEXT:    #
+; POWERPC_64-NEXT:    lhzu 5, 2(3)
+; POWERPC_64-NEXT:    cntlzw 5, 5
+; POWERPC_64-NEXT:    srwi 5, 5, 5
+; POWERPC_64-NEXT:    xori 5, 5, 1
+; POWERPC_64-NEXT:    add 4, 4, 5
+; POWERPC_64-NEXT:    bdnz L..BB0_13
+; POWERPC_64-NEXT:  L..BB0_14: # %for.cond.cleanup
+; POWERPC_64-NEXT:    mr 3, 4
+; POWERPC_64-NEXT:    blr
 ;
 ; POWERPC_32-LABEL: test_Greater_than:
-; POWERPC_32:  L..BB0_7: # %vector.body
+; POWERPC_32:       # %bb.0: # %entry
+; POWERPC_32-NEXT:    cmpwi 4, 0
+; POWERPC_32-NEXT:    ble 0, L..BB0_3
+; POWERPC_32-NEXT:  # %bb.1: # %iter.check
+; POWER...
[truncated]

@Himadhith
Copy link
Contributor

Himadhith commented Aug 5, 2025

Hey @guy-david I am currently working on a NFC patch to have no loop unroll. Here is the ref link: [NFC][PowerPC] Cleaning up test file and removing redundant front-end test. The assertions for this are auto-generated using the python script and it should make it easier to update the test and highlight the diff.
@tonykuttai

@guy-david guy-david closed this Aug 6, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants