diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S index e98893c7ef8b8..f285fe119f4c1 100644 --- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S +++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S @@ -45,6 +45,10 @@ blake3_hash_many_avx2: mov rbp, rsp sub rsp, 680 and rsp, 0xFFFFFFFFFFFFFFC0 +#ifdef _ILP32 + mov esi, esi + mov edx, edx +#endif neg r9d vmovd xmm0, r9d vpbroadcastd ymm0, xmm0 @@ -77,6 +81,7 @@ blake3_hash_many_avx2: vpbroadcastd ymm5, dword ptr [rcx+0x14] vpbroadcastd ymm6, dword ptr [rcx+0x18] vpbroadcastd ymm7, dword ptr [rcx+0x1C] +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] mov r10, qword ptr [rdi+0x10] @@ -85,6 +90,16 @@ blake3_hash_many_avx2: mov r13, qword ptr [rdi+0x28] mov r14, qword ptr [rdi+0x30] mov r15, qword ptr [rdi+0x38] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] + mov r10d, dword ptr [rdi+0x8] + mov r11d, dword ptr [rdi+0xc] + mov r12d, dword ptr [rdi+0x10] + mov r13d, dword ptr [rdi+0x14] + mov r14d, dword ptr [rdi+0x18] + mov r15d, dword ptr [rdi+0x1c] +#endif movzx eax, byte ptr [rbp+0x38] movzx ebx, byte ptr [rbp+0x40] or eax, ebx @@ -1305,7 +1320,11 @@ blake3_hash_many_avx2: vmovdqa ymm0, ymmword ptr [rsp+0x260] vpsubd ymm2, ymm0, ymm2 vmovdqa ymmword ptr [rsp+0x260], ymm2 +#ifndef _ILP32 add rdi, 64 +#else + add rdi, 32 +#endif add rbx, 256 mov qword ptr [rbp+0x50], rbx sub rsi, 8 @@ -1346,10 +1365,17 @@ blake3_hash_many_avx2: vpblendd ymm15, ymm15, ymm12, 0x44 vmovdqa ymmword ptr [rsp], ymm14 vmovdqa ymmword ptr [rsp+0x20], ymm15 +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] mov r10, qword ptr [rdi+0x10] mov r11, qword ptr [rdi+0x18] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] + mov r10d, dword ptr [rdi+0x8] + mov r11d, dword ptr [rdi+0xc] +#endif movzx eax, byte ptr [rbp+0x40] or eax, r13d xor edx, edx @@ -1557,7 +1583,11 @@ blake3_hash_many_avx2: vmovaps xmmword ptr [rsp+0x240], xmm0 vmovaps xmmword ptr [rsp+0x260], xmm2 add rbx, 128 +#ifndef _ILP32 add rdi, 32 +#else + add rdi, 16 +#endif sub rsi, 4 3: test rsi, 0x2 @@ -1573,8 +1603,13 @@ blake3_hash_many_avx2: vinserti128 ymm13, ymm13, xmm14, 0x01 vbroadcasti128 ymm14, xmmword ptr [ROT16+rip] vbroadcasti128 ymm15, xmmword ptr [ROT8+rip] +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] +#endif movzx eax, byte ptr [rbp+0x40] or eax, r13d xor edx, edx @@ -1683,7 +1718,11 @@ blake3_hash_many_avx2: vmovaps ymmword ptr [rsp+0x240], ymm0 vmovaps ymmword ptr [rsp+0x260], ymm2 add rbx, 64 +#ifndef _ILP32 add rdi, 16 +#else + add rdi, 8 +#endif sub rsi, 2 3: test rsi, 0x1 @@ -1695,7 +1734,11 @@ blake3_hash_many_avx2: vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 vmovdqa xmm14, xmmword ptr [ROT16+rip] vmovdqa xmm15, xmmword ptr [ROT8+rip] +#ifndef _ILP32 mov r8, qword ptr [rdi] +#else + mov r8d, dword ptr [rdi] +#endif movzx eax, byte ptr [rbp+0x40] or eax, r13d xor edx, edx diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S index b4b14946de10e..709c4752d4084 100644 --- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S +++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S @@ -59,6 +59,10 @@ blake3_hash_many_avx512: sub rsp, 144 and rsp, 0xFFFFFFFFFFFFFFC0 neg r9 +#ifdef _ILP32 + mov esi, esi + mov edx, edx +#endif kmovw k1, r9d vmovd xmm0, r8d vpbroadcastd ymm0, xmm0 @@ -107,6 +111,7 @@ blake3_hash_many_avx512: cmp rdx, qword ptr [rsp+0x80] cmove eax, ebx mov dword ptr [rsp+0x88], eax +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] mov r10, qword ptr [rdi+0x10] @@ -115,6 +120,16 @@ blake3_hash_many_avx512: mov r13, qword ptr [rdi+0x48] mov r14, qword ptr [rdi+0x50] mov r15, qword ptr [rdi+0x58] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] + mov r10d, dword ptr [rdi+0x8] + mov r11d, dword ptr [rdi+0xc] + mov r12d, dword ptr [rdi+0x20] + mov r13d, dword ptr [rdi+0x24] + mov r14d, dword ptr [rdi+0x28] + mov r15d, dword ptr [rdi+0x2c] +#endif vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20] vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01 vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20] @@ -127,6 +142,7 @@ blake3_hash_many_avx512: vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01 vpunpcklqdq zmm10, zmm18, zmm19 vpunpckhqdq zmm11, zmm18, zmm19 +#ifndef _ILP32 mov r8, qword ptr [rdi+0x20] mov r9, qword ptr [rdi+0x28] mov r10, qword ptr [rdi+0x30] @@ -135,6 +151,16 @@ blake3_hash_many_avx512: mov r13, qword ptr [rdi+0x68] mov r14, qword ptr [rdi+0x70] mov r15, qword ptr [rdi+0x78] +#else + mov r8d, dword ptr [rdi+0x10] + mov r9d, dword ptr [rdi+0x14] + mov r10d, dword ptr [rdi+0x18] + mov r11d, dword ptr [rdi+0x1c] + mov r12d, dword ptr [rdi+0x30] + mov r13d, dword ptr [rdi+0x34] + mov r14d, dword ptr [rdi+0x38] + mov r15d, dword ptr [rdi+0x3c] +#endif vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20] vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01 vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20] @@ -169,6 +195,7 @@ blake3_hash_many_avx512: vmovdqa32 zmm23, zmm19 vpermt2d zmm19, zmm27, zmm8 vpermt2d zmm23, zmm31, zmm8 +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] mov r10, qword ptr [rdi+0x10] @@ -177,6 +204,16 @@ blake3_hash_many_avx512: mov r13, qword ptr [rdi+0x48] mov r14, qword ptr [rdi+0x50] mov r15, qword ptr [rdi+0x58] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] + mov r10d, dword ptr [rdi+0x8] + mov r11d, dword ptr [rdi+0xc] + mov r12d, dword ptr [rdi+0x20] + mov r13d, dword ptr [rdi+0x24] + mov r14d, dword ptr [rdi+0x28] + mov r15d, dword ptr [rdi+0x2c] +#endif vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20] vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01 vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20] @@ -197,6 +234,7 @@ blake3_hash_many_avx512: prefetcht0 [r14+rdx+0x80] prefetcht0 [r11+rdx+0x80] prefetcht0 [r15+rdx+0x80] +#ifndef _ILP32 mov r8, qword ptr [rdi+0x20] mov r9, qword ptr [rdi+0x28] mov r10, qword ptr [rdi+0x30] @@ -205,6 +243,16 @@ blake3_hash_many_avx512: mov r13, qword ptr [rdi+0x68] mov r14, qword ptr [rdi+0x70] mov r15, qword ptr [rdi+0x78] +#else + mov r8d, dword ptr [rdi+0x10] + mov r9d, dword ptr [rdi+0x14] + mov r10d, dword ptr [rdi+0x18] + mov r11d, dword ptr [rdi+0x1c] + mov r12d, dword ptr [rdi+0x30] + mov r13d, dword ptr [rdi+0x34] + mov r14d, dword ptr [rdi+0x38] + mov r15d, dword ptr [rdi+0x3c] +#endif vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20] vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01 vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20] @@ -1095,7 +1143,11 @@ blake3_hash_many_avx512: vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16} vmovdqa32 zmmword ptr [rsp], zmm2 vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1 +#ifndef _ILP32 add rdi, 128 +#else + add rdi, 64 +#endif add rbx, 512 mov qword ptr [rbp+0x50], rbx sub rsi, 16 @@ -1125,6 +1177,7 @@ blake3_hash_many_avx512: vpbroadcastd ymm5, dword ptr [rcx+0x14] vpbroadcastd ymm6, dword ptr [rcx+0x18] vpbroadcastd ymm7, dword ptr [rcx+0x1C] +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] mov r10, qword ptr [rdi+0x10] @@ -1133,6 +1186,16 @@ blake3_hash_many_avx512: mov r13, qword ptr [rdi+0x28] mov r14, qword ptr [rdi+0x30] mov r15, qword ptr [rdi+0x38] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] + mov r10d, dword ptr [rdi+0x8] + mov r11d, dword ptr [rdi+0xc] + mov r12d, dword ptr [rdi+0x10] + mov r13d, dword ptr [rdi+0x14] + mov r14d, dword ptr [rdi+0x18] + mov r15d, dword ptr [rdi+0x1c] +#endif movzx eax, byte ptr [rbp+0x38] movzx ebx, byte ptr [rbp+0x40] or eax, ebx @@ -2055,7 +2118,11 @@ blake3_hash_many_avx512: vmovdqa ymmword ptr [rsp+0x2*0x20], ymm2 add rbx, 256 mov qword ptr [rbp+0x50], rbx +#ifndef _ILP32 add rdi, 64 +#else + add rdi, 32 +#endif sub rsi, 8 3: mov rbx, qword ptr [rbp+0x50] @@ -2078,10 +2145,17 @@ blake3_hash_many_avx512: kmovw k2, eax vpblendmd zmm13 {k2}, zmm13, zmm12 vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip] +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] mov r10, qword ptr [rdi+0x10] mov r11, qword ptr [rdi+0x18] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] + mov r10d, dword ptr [rdi+0x8] + mov r11d, dword ptr [rdi+0xc] +#endif mov eax, 43690 kmovw k3, eax mov eax, 34952 @@ -2195,7 +2269,11 @@ blake3_hash_many_avx512: vmovdqa xmmword ptr [rsp], xmm0 vmovdqa xmmword ptr [rsp+0x40], xmm2 add rbx, 128 +#ifndef _ILP32 add rdi, 32 +#else + add rdi, 16 +#endif sub rsi, 4 3: test esi, 0x2 @@ -2209,8 +2287,13 @@ blake3_hash_many_avx512: vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1 vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 vinserti128 ymm13, ymm13, xmm14, 0x01 +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] +#endif movzx eax, byte ptr [rbp+0x40] or eax, r13d xor edx, edx @@ -2308,7 +2391,11 @@ blake3_hash_many_avx512: vmovdqa xmmword ptr [rsp], xmm0 vmovdqa xmmword ptr [rsp+0x4*0x10], xmm2 add rbx, 64 +#ifndef _ILP32 add rdi, 16 +#else + add rdi, 8 +#endif sub rsi, 2 3: test esi, 0x1 @@ -2319,7 +2406,11 @@ blake3_hash_many_avx512: vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1 vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip] +#ifndef _ILP32 mov r8, qword ptr [rdi] +#else + mov r8d, dword ptr [rdi] +#endif movzx eax, byte ptr [rbp+0x40] or eax, r13d xor edx, edx diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S index d69a1706fefe7..85434df927cdd 100644 --- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S +++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S @@ -54,6 +54,10 @@ blake3_hash_many_sse2: sub rsp, 360 and rsp, 0xFFFFFFFFFFFFFFC0 neg r9d +#ifdef _ILP32 + mov esi, esi + mov edx, edx +#endif movd xmm0, r9d pshufd xmm0, xmm0, 0x00 movdqa xmmword ptr [rsp+0x130], xmm0 @@ -91,10 +95,17 @@ blake3_hash_many_sse2: pshufd xmm5, xmm7, 0x55 pshufd xmm6, xmm7, 0xAA pshufd xmm7, xmm7, 0xFF +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] mov r10, qword ptr [rdi+0x10] mov r11, qword ptr [rdi+0x18] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] + mov r10d, dword ptr [rdi+0x8] + mov r11d, dword ptr [rdi+0xc] +#endif movzx eax, byte ptr [rbp+0x40] or eax, r13d xor edx, edx @@ -1648,7 +1659,11 @@ blake3_hash_many_sse2: psubd xmm1, xmm0 movdqa xmmword ptr [rsp+0x120], xmm1 add rbx, 128 +#ifndef _ILP32 add rdi, 32 +#else + add rdi, 16 +#endif sub rsi, 4 cmp rsi, 4 jnc 2b @@ -1679,8 +1694,13 @@ blake3_hash_many_sse2: movd xmm13, dword ptr [rsp+0x124] punpckldq xmm14, xmm13 movaps xmmword ptr [rsp+0x10], xmm14 +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] +#endif movzx eax, byte ptr [rbp+0x40] or eax, r13d xor edx, edx @@ -1909,7 +1929,11 @@ blake3_hash_many_sse2: mov r11d, dword ptr [rsp+0x120+8*rax] mov dword ptr [rsp+0x110], r10d mov dword ptr [rsp+0x120], r11d +#ifndef _ILP32 add rdi, 16 +#else + add rdi, 8 +#endif add rbx, 64 sub rsi, 2 3: @@ -1920,7 +1944,11 @@ blake3_hash_many_sse2: movd xmm13, dword ptr [rsp+0x110] movd xmm14, dword ptr [rsp+0x120] punpckldq xmm13, xmm14 +#ifndef _ILP32 mov r8, qword ptr [rdi] +#else + mov r8d, dword ptr [rdi] +#endif movzx eax, byte ptr [rbp+0x40] or eax, r13d xor edx, edx diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S index c5b103af61c4f..403773421587c 100644 --- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S +++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S @@ -54,6 +54,10 @@ blake3_hash_many_sse41: sub rsp, 360 and rsp, 0xFFFFFFFFFFFFFFC0 neg r9d +#ifdef _ILP32 + mov esi, esi + mov edx, edx +#endif movd xmm0, r9d pshufd xmm0, xmm0, 0x00 movdqa xmmword ptr [rsp+0x130], xmm0 @@ -91,10 +95,17 @@ blake3_hash_many_sse41: pshufd xmm5, xmm7, 0x55 pshufd xmm6, xmm7, 0xAA pshufd xmm7, xmm7, 0xFF +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] mov r10, qword ptr [rdi+0x10] mov r11, qword ptr [rdi+0x18] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] + mov r10d, dword ptr [rdi+0x8] + mov r11d, dword ptr [rdi+0xc] +#endif movzx eax, byte ptr [rbp+0x40] or eax, r13d xor edx, edx @@ -1452,7 +1463,11 @@ blake3_hash_many_sse41: psubd xmm1, xmm0 movdqa xmmword ptr [rsp+0x120], xmm1 add rbx, 128 +#ifndef _ILP32 add rdi, 32 +#else + add rdi, 16 +#endif sub rsi, 4 cmp rsi, 4 jnc 2b @@ -1483,8 +1498,13 @@ blake3_hash_many_sse41: pinsrd xmm14, dword ptr [rsp+0x124], 1 pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 movaps xmmword ptr [rsp+0x10], xmm14 +#ifndef _ILP32 mov r8, qword ptr [rdi] mov r9, qword ptr [rdi+0x8] +#else + mov r8d, dword ptr [rdi] + mov r9d, dword ptr [rdi+0x4] +#endif movzx eax, byte ptr [rbp+0x40] or eax, r13d xor edx, edx @@ -1686,7 +1706,11 @@ blake3_hash_many_sse41: blendvps xmm2, xmm4, xmm0 movdqa xmmword ptr [rsp+0x110], xmm1 movdqa xmmword ptr [rsp+0x120], xmm2 +#ifndef _ILP32 add rdi, 16 +#else + add rdi, 8 +#endif add rbx, 64 sub rsi, 2 3: @@ -1699,7 +1723,11 @@ blake3_hash_many_sse41: pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 movaps xmm14, xmmword ptr [ROT8+rip] movaps xmm15, xmmword ptr [ROT16+rip] +#ifndef _ILP32 mov r8, qword ptr [rdi] +#else + mov r8d, dword ptr [rdi] +#endif movzx eax, byte ptr [rbp+0x40] or eax, r13d xor edx, edx