Skip to content

Commit eb0d45c

Browse files
committed
deps: V8: cherry-pick 67507b2a88f4
Original commit message: Reland "use highway to check and copy leading ascii" This is a reland of commit a3e84e5f01540cec142f4d4f41f1921373c220e5 Original change's description: > use highway to check and copy leading ascii > > Change-Id: I065532aeeee95273821aa1f25b5ffc5c5c23cbf1 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7172479 > Reviewed-by: Patrick Thier <pthier@chromium.org> > Reviewed-by: Toon Verwaest <verwaest@chromium.org> > Commit-Queue: Dan Carney <dcarney@chromium.org> > Cr-Commit-Position: refs/heads/main@{#103820} Change-Id: I43b4ad18817eb52b701e112d2d0a5f685374ae1f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7184338 Reviewed-by: Toon Verwaest <verwaest@chromium.org> Reviewed-by: Patrick Thier <pthier@chromium.org> Commit-Queue: Dan Carney <dcarney@chromium.org> Cr-Commit-Position: refs/heads/main@{#103865} Refs: v8/v8@67507b2
1 parent 9cc7fcc commit eb0d45c

File tree

4 files changed

+49
-22
lines changed

4 files changed

+49
-22
lines changed

common.gypi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
# Reset this number to 0 on major V8 upgrades.
4040
# Increment by one for each non-official patch applied to deps/v8.
41-
'v8_embedder_string': '-node.12',
41+
'v8_embedder_string': '-node.13',
4242

4343
##### V8 defaults for Node.js #####
4444

deps/v8/src/strings/unicode-inl.h

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -206,16 +206,6 @@ bool Utf8::IsValidCharacter(uchar c) {
206206
c != kBadChar);
207207
}
208208

209-
template <>
210-
bool Utf8::IsAsciiOneByteString<uint8_t>(const uint8_t* buffer, size_t size) {
211-
return simdutf::validate_ascii(reinterpret_cast<const char*>(buffer), size);
212-
}
213-
214-
template <>
215-
bool Utf8::IsAsciiOneByteString<uint16_t>(const uint16_t* buffer, size_t size) {
216-
return false;
217-
}
218-
219209
template <typename Char>
220210
Utf8::EncodingResult Utf8::Encode(v8::base::Vector<const Char> string,
221211
char* buffer, size_t capacity,
@@ -234,12 +224,10 @@ Utf8::EncodingResult Utf8::Encode(v8::base::Vector<const Char> string,
234224
size_t read_index = 0;
235225
if (kSourceIsOneByte) {
236226
size_t writeable = std::min(string.size(), content_capacity);
237-
// Just memcpy when possible.
238-
if (writeable > 0 && Utf8::IsAsciiOneByteString(characters, writeable)) {
239-
memcpy(buffer, characters, writeable);
240-
read_index = writeable;
241-
write_index = writeable;
242-
}
227+
size_t ascii_length =
228+
Utf8::WriteLeadingAscii(characters, buffer, writeable);
229+
read_index = ascii_length;
230+
write_index = ascii_length;
243231
}
244232
uint16_t last = Utf16::kNoPreviousCharacter;
245233
for (; read_index < string.size(); read_index++) {

deps/v8/src/strings/unicode.cc

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,49 @@
2121
#include "unicode/uchar.h"
2222
#endif
2323

24+
#include "hwy/highway.h"
2425
#include "third_party/simdutf/simdutf.h"
2526

2627
namespace unibrow {
2728

29+
template <>
30+
size_t Utf8::WriteLeadingAscii<uint8_t>(const uint8_t* src, char* dest,
31+
size_t length) {
32+
namespace hw = hwy::HWY_NAMESPACE;
33+
const hw::ScalableTag<int8_t> d;
34+
const size_t N = hw::Lanes(d);
35+
// Don't bother with simd if the string isn't long enough. We're using 2
36+
// registers, so don't enter the loop unless we can iterate 2 times through.
37+
if (length < 4 * N) {
38+
return 0;
39+
}
40+
// We're checking ascii by checking the sign bit so make the strings signed.
41+
const int8_t* src_s = reinterpret_cast<const int8_t*>(src);
42+
int8_t* dst_s = reinterpret_cast<int8_t*>(dest);
43+
size_t i = 0;
44+
DCHECK_GE(length, 2 * N);
45+
for (; i <= length - 2 * N; i += 2 * N) {
46+
const auto v0 = hw::LoadU(d, src_s + i);
47+
const auto v1 = hw::LoadU(d, src_s + i + N);
48+
const auto combined = hw::Or(v0, v1);
49+
bool is_ascii = hw::AllTrue(d, hw::Ge(combined, hw::Zero(d)));
50+
if (is_ascii) {
51+
hw::StoreU(v0, d, dst_s + i);
52+
hw::StoreU(v1, d, dst_s + i + N);
53+
} else {
54+
break;
55+
}
56+
}
57+
return i;
58+
}
59+
60+
template <>
61+
size_t Utf8::WriteLeadingAscii<uint16_t>(const uint16_t* src, char* dest,
62+
size_t size) {
63+
// TODO(dcarney): this could be implemented similarly to the one byte variant
64+
return 0;
65+
}
66+
2867
#ifndef V8_INTL_SUPPORT
2968
static const int kStartBit = (1 << 30);
3069
static const int kChunkBits = (1 << 13);

deps/v8/src/strings/unicode.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ class V8_EXPORT_PRIVATE Utf8 {
213213
static bool ValidateEncoding(const uint8_t* str, size_t length);
214214

215215
template <typename Char>
216-
static bool IsAsciiOneByteString(const Char* buffer, size_t size);
216+
static size_t WriteLeadingAscii(const Char* src, char* dest, size_t size);
217217

218218
// Encode the given characters as Utf8 into the provided output buffer.
219219
struct EncodingResult {
@@ -227,12 +227,12 @@ class V8_EXPORT_PRIVATE Utf8 {
227227
};
228228

229229
template <>
230-
inline bool Utf8::IsAsciiOneByteString<uint8_t>(const uint8_t* buffer,
231-
size_t size);
230+
size_t Utf8::WriteLeadingAscii<uint8_t>(const uint8_t* src, char* dest,
231+
size_t size);
232232

233233
template <>
234-
inline bool Utf8::IsAsciiOneByteString<uint16_t>(const uint16_t* buffer,
235-
size_t size);
234+
size_t Utf8::WriteLeadingAscii<uint16_t>(const uint16_t* src, char* dest,
235+
size_t size);
236236

237237
#if V8_ENABLE_WEBASSEMBLY
238238
class V8_EXPORT_PRIVATE Wtf8 {

0 commit comments

Comments
 (0)