diff --git a/md5_mb/aarch64/md5_mb_sve.S b/md5_mb/aarch64/md5_mb_sve.S index 2975fd2c..7abe1acd 100644 --- a/md5_mb/aarch64/md5_mb_sve.S +++ b/md5_mb/aarch64/md5_mb_sve.S @@ -109,7 +109,8 @@ md5_mb_sve_max_lanes: mov tmpw,total_lanes,lsl 6 sub databuf,abcd_buf,tmp mov sp,databuf - adr md5key_adr,MD5_CONST_KEYS + adrp md5key_adr,MD5_CONST_KEYS + add md5key_adr,md5key_adr,:lo12:MD5_CONST_KEYS whilelo p0.s,wzr,total_lanes mov src,job_vec mov dst,abcd_buf diff --git a/mh_sha1/aarch64/mh_sha1_block_ce.S b/mh_sha1/aarch64/mh_sha1_block_ce.S index daba3e54..df0e6fa5 100644 --- a/mh_sha1/aarch64/mh_sha1_block_ce.S +++ b/mh_sha1/aarch64/mh_sha1_block_ce.S @@ -160,7 +160,8 @@ start_loop: mov msg_adr,input_data lane_loop: mov offs,64 - adr key_adr,KEY_0 + adrp key_adr,KEY_0 + add key_adr,key_adr,:lo12:KEY_0 //load msg 0 ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[msg_adr],offs ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[msg_adr],offs @@ -252,7 +253,8 @@ lane_loop: sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 - adr key_adr,KEY_1 + adrp key_adr,KEY_1 + add key_adr,key_adr,:lo12:KEY_1 ldr key_q,[key_adr] sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 /* rounds 12-15 */ sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 @@ -260,7 +262,8 @@ lane_loop: sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 - adr key_adr,KEY_2 + adrp key_adr,KEY_2 + add key_adr,key_adr,:lo12:KEY_2 ldr key_q,[key_adr] sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 36-39 */ @@ -268,7 +271,8 @@ lane_loop: sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 - adr key_adr,KEY_3 + adrp key_adr,KEY_3 + add key_adr,key_adr,:lo12:KEY_3 ldr key_q,[key_adr] sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 52-55 */ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 diff --git a/mh_sha1/aarch64/mh_sha1_block_sve.S b/mh_sha1/aarch64/mh_sha1_block_sve.S index 52438c94..cb9dd3d5 100644 --- a/mh_sha1/aarch64/mh_sha1_block_sve.S +++ b/mh_sha1/aarch64/mh_sha1_block_sve.S @@ -217,7 +217,8 @@ and data_buf,data_buf,#-64 mov sp,data_buf mov lane_offset,0 - adr sha1key_adr, SHA1KEY + adrp sha1key_adr, SHA1KEY + add sha1key_adr, sha1key_adr, :lo12:SHA1KEY .seg_loops\sve2_flag\(): mov block_ptr,input_data mov ctr,num_blocks diff --git a/mh_sha1/aarch64/sha1_asimd_common.S b/mh_sha1/aarch64/sha1_asimd_common.S index 1381038a..9af346d3 100644 --- a/mh_sha1/aarch64/sha1_asimd_common.S +++ b/mh_sha1/aarch64/sha1_asimd_common.S @@ -228,22 +228,26 @@ mov vDD.16B, VD.16B mov vEE.16B, VE.16B - adr sha1key_adr, KEY_0 + adrp sha1key_adr, KEY_0 + add sha1key_adr, sha1key_adr, :lo12:KEY_0 ld1 {VK.4s}, [sha1key_adr] exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 // 20 ~ 39 - adr sha1key_adr, KEY_1 + adrp sha1key_adr, KEY_1 + add sha1key_adr, sha1key_adr, :lo12:KEY_1 ld1 {VK.4s}, [sha1key_adr] exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 // 40 ~ 59 - adr sha1key_adr, KEY_2 + adrp sha1key_adr, KEY_2 + add sha1key_adr, sha1key_adr, :lo12:KEY_2 ld1 {VK.4s}, [sha1key_adr] exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59 // 60 ~ 79 - adr sha1key_adr, KEY_3 + adrp sha1key_adr, KEY_3 + add sha1key_adr, sha1key_adr, :lo12:KEY_3 ld1 {VK.4s}, [sha1key_adr] exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79 diff --git a/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S b/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S index e30ceb9d..b166b21b 100644 --- a/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S +++ b/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S @@ -134,13 +134,17 @@ mh_sha1_murmur3_block_asimd: mov mur_data, input_data ldr mur_hash1, [mur_digest] ldr mur_hash2, [mur_digest, 8] - adr mur_c1, C1 + adrp mur_c1, C1 + add mur_c1, mur_c1, :lo12:C1 ldr mur_c1, [mur_c1] - adr mur_c2, C2 + adrp mur_c2, C2 + add mur_c2, mur_c2, :lo12:C2 ldr mur_c2, [mur_c2] - adr tmp, N1 + adrp tmp, N1 + add tmp, tmp, :lo12:N1 ldr mur_n1_w, [tmp] - adr tmp, N2 + adrp tmp, N2 + add tmp, tmp, :lo12:N2 ldr mur_n2_w, [tmp] mov mh_segs, #0 diff --git a/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S b/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S index 9a159621..da1111ca 100644 --- a/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S +++ b/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S @@ -216,13 +216,17 @@ mh_sha1_murmur3_block_ce: mov mur_data, input_data ldr mur_hash1, [mur_digest] ldr mur_hash2, [mur_digest, 8] - adr mur_c1, C1 + adrp mur_c1, C1 + add mur_c1, mur_c1, :lo12:C1 ldr mur_c1, [mur_c1] - adr mur_c2, C2 + adrp mur_c2, C2 + add mur_c2, mur_c2, :lo12:C2 ldr mur_c2, [mur_c2] - adr tmp0_adr, N1 + adrp tmp0_adr, N1 + add tmp0_adr, tmp0_adr, :lo12:N1 ldr mur_n1_w, [tmp0_adr] - adr tmp0_adr, N2 + adrp tmp0_adr, N2 + add tmp0_adr, tmp0_adr, :lo12:N2 ldr mur_n2_w, [tmp0_adr] mov tmp0_adr,frame_buffer @@ -234,7 +238,8 @@ start_loop: mov msg_adr,input_data lane_loop: mov offs,64 - adr key_adr,KEY_0 + adrp key_adr,KEY_0 + add key_adr,key_adr,:lo12:KEY_0 // load msg 0 ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[msg_adr],offs ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[msg_adr],offs @@ -326,7 +331,8 @@ lane_loop: sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 - adr key_adr,KEY_1 + adrp key_adr,KEY_1 + add key_adr,key_adr,:lo12:KEY_1 ldr key_q,[key_adr] sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 /* rounds 12-15 */ sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 @@ -334,7 +340,8 @@ lane_loop: sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 - adr key_adr,KEY_2 + adrp key_adr,KEY_2 + add key_adr,key_adr,:lo12:KEY_2 ldr key_q,[key_adr] sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 36-39 */ @@ -342,7 +349,8 @@ lane_loop: sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 - adr key_adr,KEY_3 + adrp key_adr,KEY_3 + add key_adr,key_adr,:lo12:KEY_3 ldr key_q,[key_adr] sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 52-55 */ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 diff --git a/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S b/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S index ccc66f41..3f61862f 100644 --- a/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S +++ b/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S @@ -230,22 +230,26 @@ mov vDD.16B, VD.16B mov vEE.16B, VE.16B - adr sha1key_adr, KEY_0 + adrp sha1key_adr, KEY_0 + add sha1key_adr, sha1key_adr, :lo12:KEY_0 ld1 {VK.4s}, [sha1key_adr] exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 // 20 ~ 39 - adr sha1key_adr, KEY_1 + adrp sha1key_adr, KEY_1 + add sha1key_adr, sha1key_adr, :lo12:KEY_1 ld1 {VK.4s}, [sha1key_adr] exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 // 40 ~ 59 - adr sha1key_adr, KEY_2 + adrp sha1key_adr, KEY_2 + add sha1key_adr, sha1key_adr, :lo12:KEY_2 ld1 {VK.4s}, [sha1key_adr] exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59 // 60 ~ 79 - adr sha1key_adr, KEY_3 + adrp sha1key_adr, KEY_3 + add sha1key_adr, sha1key_adr, :lo12:KEY_3 ld1 {VK.4s}, [sha1key_adr] exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79 diff --git a/sha1_mb/aarch64/sha1_aarch64_x1.S b/sha1_mb/aarch64/sha1_aarch64_x1.S index 55d6f932..75626bbe 100644 --- a/sha1_mb/aarch64/sha1_aarch64_x1.S +++ b/sha1_mb/aarch64/sha1_aarch64_x1.S @@ -245,22 +245,26 @@ sha1_aarch64_x1: load_word_at 0 - adr sha1key_adr, KEY_0 + adrp sha1key_adr, KEY_0 + add sha1key_adr, sha1key_adr, :lo12:KEY_0 ldr WK, [sha1key_adr] exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 // 20 ~ 39 - adr sha1key_adr, KEY_1 + adrp sha1key_adr, KEY_1 + add sha1key_adr, sha1key_adr, :lo12:KEY_1 ldr WK, [sha1key_adr] exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 // 40 ~ 59 - adr sha1key_adr, KEY_2 + adrp sha1key_adr, KEY_2 + add sha1key_adr, sha1key_adr, :lo12:KEY_2 ldr WK, [sha1key_adr] exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59 // 60 ~ 79 - adr sha1key_adr, KEY_3 + adrp sha1key_adr, KEY_3 + add sha1key_adr, sha1key_adr, :lo12:KEY_3 ldr WK, [sha1key_adr] exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79 diff --git a/sha1_mb/aarch64/sha1_asimd_common.S b/sha1_mb/aarch64/sha1_asimd_common.S index 1381038a..9af346d3 100644 --- a/sha1_mb/aarch64/sha1_asimd_common.S +++ b/sha1_mb/aarch64/sha1_asimd_common.S @@ -228,22 +228,26 @@ mov vDD.16B, VD.16B mov vEE.16B, VE.16B - adr sha1key_adr, KEY_0 + adrp sha1key_adr, KEY_0 + add sha1key_adr, sha1key_adr, :lo12:KEY_0 ld1 {VK.4s}, [sha1key_adr] exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 // 20 ~ 39 - adr sha1key_adr, KEY_1 + adrp sha1key_adr, KEY_1 + add sha1key_adr, sha1key_adr, :lo12:KEY_1 ld1 {VK.4s}, [sha1key_adr] exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 // 40 ~ 59 - adr sha1key_adr, KEY_2 + adrp sha1key_adr, KEY_2 + add sha1key_adr, sha1key_adr, :lo12:KEY_2 ld1 {VK.4s}, [sha1key_adr] exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59 // 60 ~ 79 - adr sha1key_adr, KEY_3 + adrp sha1key_adr, KEY_3 + add sha1key_adr, sha1key_adr, :lo12:KEY_3 ld1 {VK.4s}, [sha1key_adr] exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79 diff --git a/sha1_mb/aarch64/sha1_mb_x1_ce.S b/sha1_mb/aarch64/sha1_mb_x1_ce.S index 152d5fff..80268cbd 100644 --- a/sha1_mb/aarch64/sha1_mb_x1_ce.S +++ b/sha1_mb/aarch64/sha1_mb_x1_ce.S @@ -100,7 +100,8 @@ sha1_mb_ce_x1: ldr data, [job] ldr abcd_q, [job, 64] ldr e0_s, [job, 80] - adr tmp, KEY + adrp tmp, KEY + add tmp, tmp, :lo12:KEY ld1 {key_0_v.4s-key_3_v.4s},[tmp] start_loop: diff --git a/sha1_mb/aarch64/sha1_mb_x2_ce.S b/sha1_mb/aarch64/sha1_mb_x2_ce.S index cc97e55b..09a05842 100644 --- a/sha1_mb/aarch64/sha1_mb_x2_ce.S +++ b/sha1_mb/aarch64/sha1_mb_x2_ce.S @@ -115,7 +115,8 @@ sha1_mb_ce_x2: //push d8,d9 to stack stp d8, d9, [sp, -256]! - adr tmp, KEY + adrp tmp, KEY + add tmp, tmp, :lo12:KEY ld1 {key_0_v.4s-key_3_v.4s},[tmp] ldr l0_data, [l0_job] ldr l1_data, [l1_job] diff --git a/sha256_mb/aarch64/sha256_mb_x1_ce.S b/sha256_mb/aarch64/sha256_mb_x1_ce.S index 3f70ad71..c3240bd2 100644 --- a/sha256_mb/aarch64/sha256_mb_x1_ce.S +++ b/sha256_mb/aarch64/sha256_mb_x1_ce.S @@ -112,7 +112,8 @@ sha256_mb_ce_x1: start_loop: - adr tmp, KEY + adrp tmp, KEY + add tmp, tmp, :lo12:KEY //load msgs ld1 {l0_msg0_v.4s-l0_msg3_v.4s},[l0_data] ldr key_q,[tmp] diff --git a/sha256_mb/aarch64/sha256_mb_x2_ce.S b/sha256_mb/aarch64/sha256_mb_x2_ce.S index b409b5d0..e2bb90fd 100644 --- a/sha256_mb/aarch64/sha256_mb_x2_ce.S +++ b/sha256_mb/aarch64/sha256_mb_x2_ce.S @@ -141,7 +141,8 @@ sha256_mb_ce_x2: start_loop: //load key addr - adr tmp, KEY + adrp tmp, KEY + add tmp, tmp, :lo12:KEY //load msgs ld1 {l0_msg0_v.4s-l0_msg3_v.4s},[l0_data] ld1 {l1_msg0_v.4s-l1_msg3_v.4s},[l1_data] diff --git a/sha256_mb/aarch64/sha256_mb_x3_ce.S b/sha256_mb/aarch64/sha256_mb_x3_ce.S index 4c304b3e..cd333ab2 100644 --- a/sha256_mb/aarch64/sha256_mb_x3_ce.S +++ b/sha256_mb/aarch64/sha256_mb_x3_ce.S @@ -163,7 +163,8 @@ sha256_mb_ce_x3: start_loop: //load key addr - adr tmp, KEY + adrp tmp, KEY + add tmp, tmp, :lo12:KEY //load msgs ld1 {l0_msg0_v.4s-l0_msg3_v.4s},[l0_data] ld1 {l1_msg0_v.4s-l1_msg3_v.4s},[l1_data] diff --git a/sha256_mb/aarch64/sha256_mb_x4_ce.S b/sha256_mb/aarch64/sha256_mb_x4_ce.S index 2e4f5c0f..9205cc25 100644 --- a/sha256_mb/aarch64/sha256_mb_x4_ce.S +++ b/sha256_mb/aarch64/sha256_mb_x4_ce.S @@ -192,7 +192,8 @@ sha256_mb_ce_x4: start_loop: //load key addr - adr tmp, KEY + adrp tmp, KEY + add tmp, tmp, :lo12:KEY //load msgs ld1 {l0_msg0_v.4s-l0_msg3_v.4s},[l0_data] ld1 {l1_msg0_v.4s-l1_msg3_v.4s},[l1_data] diff --git a/sha512_mb/aarch64/sha512_mb_x1_ce.S b/sha512_mb/aarch64/sha512_mb_x1_ce.S index 3a6e21e1..098b6121 100644 --- a/sha512_mb/aarch64/sha512_mb_x1_ce.S +++ b/sha512_mb/aarch64/sha512_mb_x1_ce.S @@ -137,7 +137,8 @@ sha512_mb_ce_x1: start_loop: - adr key_adr, KEY + adrp key_adr, KEY + add key_adr, key_adr, :lo12:KEY //load msgs ld1 {l0_msg0_v.4s-l0_msg3_v.4s},[l0_data] add l0_data,l0_data,64 diff --git a/sha512_mb/aarch64/sha512_mb_x2_ce.S b/sha512_mb/aarch64/sha512_mb_x2_ce.S index fd4409e0..961a947c 100644 --- a/sha512_mb/aarch64/sha512_mb_x2_ce.S +++ b/sha512_mb/aarch64/sha512_mb_x2_ce.S @@ -227,7 +227,8 @@ sha512_mb_ce_x2: start_loop: - adr key_adr, KEY + adrp key_adr, KEY + add key_adr, key_adr, :lo12:KEY //load msgs ld1 {l0_msg0_v.4s-l0_msg3_v.4s},[l0_data] add l0_data,l0_data,64 diff --git a/sm3_mb/aarch64/sm3_mb_sve.S b/sm3_mb/aarch64/sm3_mb_sve.S index 29668545..b3d41df7 100644 --- a/sm3_mb/aarch64/sm3_mb_sve.S +++ b/sm3_mb/aarch64/sm3_mb_sve.S @@ -109,7 +109,8 @@ sm3_mb_sve_max_lanes: lsl tmp,lanes,6 sub databuf,abcd_buf,tmp mov sp,databuf - adr sm3const_adr,SM3_CONSTS + adrp sm3const_adr,SM3_CONSTS + add sm3const_adr,sm3const_adr,:lo12:SM3_CONSTS 1: mov src,job_vec mov dst,abcd_buf