Skip to content

Commit 3801d5c

Browse files
committed
[Arm64] lj_new_str() crc32 optimization
1 parent 10aeff6 commit 3801d5c

File tree

3 files changed

+277
-1
lines changed

3 files changed

+277
-1
lines changed

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ CCOPT= -O2 -fomit-frame-pointer
5050
CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
5151
CCOPT_x64=
5252
CCOPT_arm=
53-
CCOPT_arm64=
53+
CCOPT_arm64= -march=armv8-a+crc
5454
CCOPT_ppc=
5555
CCOPT_mips=
5656
#

src/arm64/src/lj_str_hash_arm64.h

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
/*
2+
* This file defines string hash function using CRC32. It takes advantage of
3+
* Arm64 hardware support (crc32 instruction) to speedup the CRC32
4+
* computation. The hash functions try to compute CRC32 of length and up
5+
* to 128 bytes of given string.
6+
*/
7+
8+
#ifndef _LJ_STR_HASH_ARM64_H_
9+
#define _LJ_STR_HASH_ARM64_H_
10+
11+
#if defined(__aarch64__) && defined(__GNUC__)
12+
13+
#include <stdint.h>
14+
#include <sys/types.h>
15+
#include <unistd.h>
16+
#include <time.h>
17+
#include <sys/auxv.h>
18+
#include <stdio.h>
19+
#include <arm_acle.h>
20+
21+
#include "../../lj_def.h"
22+
23+
#ifndef HWCAP_CRC32
24+
#define HWCAP_CRC32 (1 << 7)
25+
#endif /* HWCAP for crc32 */
26+
27+
#ifndef LJ_AINLINE
28+
#define LJ_AINLINE inline __attribute__((always_inline))
29+
#endif
30+
31+
#ifdef __MINGW32__
32+
#define random() ((long) rand())
33+
#define srandom(seed) srand(seed)
34+
#endif
35+
36+
extern uint32_t lj_str_original_hash(const char *str, size_t lenx);
37+
static LJ_AINLINE uint32_t lj_str_hash(const char* str, size_t len);
38+
/* lj_str hash function determined at runtime */
39+
typedef uint32_t (*lj_str_hash_func)(const char *str, size_t lenx);
40+
lj_str_hash_func LJ_STR_HASH;
41+
42+
static const uint64_t* cast_uint64p(const char* str)
43+
{
44+
return (const uint64_t*)(void*)str;
45+
}
46+
47+
static const uint32_t* cast_uint32p(const char* str)
48+
{
49+
return (const uint32_t*)(void*)str;
50+
}
51+
52+
static LJ_AINLINE uint32_t lj_str_hash_1_4(const char* str, uint32_t len)
53+
{
54+
uint32_t v = str[0], h = 0;
55+
v = (v << 8) | str[len >> 1];
56+
v = (v << 8) | str[len - 1];
57+
v = (v << 8) | len;
58+
return __crc32cw(h, v);
59+
}
60+
61+
static LJ_AINLINE uint32_t lj_str_hash_4_16(const char* str, size_t len)
62+
{
63+
uint64_t v1, v2, h = 0;
64+
65+
if (len >= 8) {
66+
v1 = *cast_uint64p(str);
67+
v2 = *cast_uint64p(str + len - 8);
68+
} else {
69+
v1 = *cast_uint32p(str);
70+
v2 = *cast_uint32p(str + len - 4);
71+
}
72+
73+
h = __crc32cw(h, len);
74+
h = __crc32cd(h, v1);
75+
h = __crc32cd(h, v2);
76+
77+
return h;
78+
}
79+
80+
static LJ_AINLINE uint32_t lj_str_hash_16_128(const char* str, size_t len)
81+
{
82+
uint64_t h1 = 0, h2 = 0;
83+
uint32_t i;
84+
85+
h1 = __crc32cw(h1, len);
86+
87+
for (i = 0; i < len - 16; i += 16) {
88+
h1 += __crc32cd(h1, *cast_uint64p(str + i));
89+
h2 += __crc32cd(h2, *cast_uint64p(str + i + 8));
90+
}
91+
92+
h1 = __crc32cd(h1, *cast_uint64p(str + len - 16));
93+
h2 = __crc32cd(h2, *cast_uint64p(str + len - 8));
94+
95+
return __crc32cw(h1, h2);
96+
}
97+
98+
/* **************************************************************************
99+
*
100+
* Following is code about hashing string with length >= 128
101+
*
102+
* **************************************************************************
103+
*/
104+
105+
static uint32_t random_pos[32][2];
106+
static const int8_t log2_tab[128] = { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,
107+
4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
108+
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,
109+
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
110+
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 };
111+
112+
/* return floor(log2(n)) */
113+
static LJ_AINLINE uint32_t log2_floor(uint32_t n)
114+
{
115+
if (n <= 127) {
116+
return log2_tab[n];
117+
}
118+
119+
if ((n >> 8) <= 127) {
120+
return log2_tab[n >> 8] + 8;
121+
}
122+
123+
if ((n >> 16) <= 127) {
124+
return log2_tab[n >> 16] + 16;
125+
}
126+
127+
if ((n >> 24) <= 127) {
128+
return log2_tab[n >> 24] + 24;
129+
}
130+
131+
return 31;
132+
}
133+
134+
#define POW2_MASK(n) ((1L << (n)) - 1)
135+
/* This function is to populate `random_pos` such that random_pos[i][*]
136+
* contains random value in the range of [2**i, 2**(i+1)).
137+
*/
138+
static void arm64_init_random(void)
139+
{
140+
int i, seed, rml;
141+
142+
/* Calculate the ceil(log2(RAND_MAX)) */
143+
rml = log2_floor(RAND_MAX);
144+
if (RAND_MAX & (RAND_MAX - 1)) {
145+
rml += 1;
146+
}
147+
148+
/* Init seed */
149+
seed = 0;
150+
seed = __crc32cw(seed, getpid());
151+
seed = __crc32cw(seed, time(NULL));
152+
srandom(seed);
153+
154+
/* Now start to populate the random_pos[][]. */
155+
for (i = 0; i < 3; i++) {
156+
/* No need to provide random value for chunk smaller than 8 bytes */
157+
random_pos[i][0] = random_pos[i][1] = 0;
158+
}
159+
160+
for (; i < rml; i++) {
161+
random_pos[i][0] = random() & POW2_MASK(i+1);
162+
random_pos[i][1] = random() & POW2_MASK(i+1);
163+
}
164+
165+
for (; i < 31; i++) {
166+
int j;
167+
for (j = 0; j < 2; j++) {
168+
uint32_t v, scale;
169+
scale = random_pos[i - rml][0];
170+
if (scale == 0) {
171+
scale = 1;
172+
}
173+
v = (random() * scale) & POW2_MASK(i+1);
174+
random_pos[i][j] = v;
175+
}
176+
}
177+
}
178+
#undef POW2_MASK
179+
180+
void __attribute__((constructor)) arm64_init_constructor()
181+
{
182+
// Check if crc32 supported.
183+
unsigned long hwcap;
184+
hwcap = getauxval(AT_HWCAP);
185+
if (hwcap & HWCAP_CRC32) {
186+
LJ_STR_HASH = lj_str_hash;
187+
}
188+
else {
189+
LJ_STR_HASH = lj_str_original_hash;
190+
}
191+
192+
// init random
193+
arm64_init_random();
194+
}
195+
196+
/* Return a pre-computed random number in the range of [1**chunk_sz_order,
197+
* 1**(chunk_sz_order+1)). It is "unsafe" in the sense that the return value
198+
* may be greater than chunk-size; it is up to the caller to make sure
199+
* "chunk-base + return-value-of-this-func" has valid virtual address.
200+
*/
201+
static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t chunk_sz_order,
202+
uint32_t idx)
203+
{
204+
uint32_t pos = random_pos[chunk_sz_order][idx & 1];
205+
return pos;
206+
}
207+
208+
static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str,
209+
uint32_t len)
210+
{
211+
uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2;
212+
uint32_t h1, h2, v;
213+
const char* chunk_ptr;
214+
215+
chunk_num = 16;
216+
chunk_sz = len / chunk_num;
217+
chunk_sz_log2 = log2_floor(chunk_sz);
218+
219+
pos1 = get_random_pos_unsafe(chunk_sz_log2, 0);
220+
pos2 = get_random_pos_unsafe(chunk_sz_log2, 1);
221+
222+
h1 = 0;
223+
h1 = __crc32cw(h1, len);
224+
h2 = 0;
225+
226+
/* loop over 14 chunks, 2 chunks at a time */
227+
for (i = 0, chunk_ptr = str; i < (chunk_num / 2 - 1);
228+
chunk_ptr += chunk_sz, i++) {
229+
230+
v = *cast_uint64p(chunk_ptr + pos1);
231+
h1 = __crc32cd(h1, v);
232+
233+
v = *cast_uint64p(chunk_ptr + chunk_sz + pos2);
234+
h2 = __crc32cd(h2, v);
235+
}
236+
237+
/* the last two chunks */
238+
v = *cast_uint64p(chunk_ptr + pos1);
239+
h1 = __crc32cd(h1, v);
240+
241+
v = *cast_uint64p(chunk_ptr + chunk_sz - 8 - pos2);
242+
h2 = __crc32cd(h2, v);
243+
244+
/* process the trailing part */
245+
h1 = __crc32cd(h1, *cast_uint64p(str));
246+
h2 = __crc32cd(h2, *cast_uint64p(str + len - 8));
247+
248+
h1 = __crc32cw(h1, h2);
249+
return h1;
250+
}
251+
252+
253+
/* NOTE: the "len" should not be zero */
254+
static LJ_AINLINE uint32_t lj_str_hash(const char* str, size_t len)
255+
{
256+
if (len < 128) {
257+
if (len >= 16) {
258+
return lj_str_hash_16_128(str, len);
259+
}
260+
261+
if ((len >= 4) && (len < 16)) {
262+
return lj_str_hash_4_16(str, len);
263+
}
264+
265+
return lj_str_hash_1_4(str, len);
266+
}
267+
return lj_str_hash_128_above(str, len);
268+
}
269+
270+
#endif // defined(__aarch64__)
271+
#endif // _LJ_STR_HASH_ARM64_H_

src/lj_str.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,13 +163,18 @@ lj_str_indep_hash(GCstr *str) {
163163
return lj_str_original_hash(strdata(str), str->len);
164164
}
165165

166+
#if defined(__aarch64__)
167+
/* AArch64 CRC32 support determined at runtime */
168+
#include "arm64/src/lj_str_hash_arm64.h"
169+
#else /* x64 */
166170
#include "x64/src/lj_str_hash_x64.h"
167171

168172
#if defined(LJ_ARCH_STR_HASH)
169173
#define LJ_STR_HASH LJ_ARCH_STR_HASH
170174
#else
171175
#define LJ_STR_HASH lj_str_original_hash
172176
#endif
177+
#endif
173178

174179
/* Intern a string and return string object. */
175180
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)

0 commit comments

Comments
 (0)