@@ -10,8 +10,12 @@ pub(super) fn bytes_to_words(bytes: &[u8]) -> Vec<Word> {
10
10
11
11
pub ( super ) fn string_to_words ( input : & str ) -> Vec < Word > {
12
12
let bytes = input. as_bytes ( ) ;
13
- let mut words = bytes_to_words ( bytes) ;
14
13
14
+ str_bytes_to_words ( bytes)
15
+ }
16
+
17
+ pub ( super ) fn str_bytes_to_words ( bytes : & [ u8 ] ) -> Vec < Word > {
18
+ let mut words = bytes_to_words ( bytes) ;
15
19
if bytes. len ( ) % 4 == 0 {
16
20
// nul-termination
17
21
words. push ( 0x0u32 ) ;
@@ -20,6 +24,21 @@ pub(super) fn string_to_words(input: &str) -> Vec<Word> {
20
24
words
21
25
}
22
26
27
+ /// split a string into chunks and keep utf8 valid
28
+ #[ allow( unstable_name_collisions) ]
29
+ pub ( super ) fn string_to_byte_chunks ( input : & str , limit : usize ) -> Vec < & [ u8 ] > {
30
+ let mut offset: usize = 0 ;
31
+ let mut start: usize = 0 ;
32
+ let mut words = vec ! [ ] ;
33
+ while offset < input. len ( ) {
34
+ offset = input. floor_char_boundary ( offset + limit) ;
35
+ words. push ( input[ start..offset] . as_bytes ( ) ) ;
36
+ start = offset;
37
+ }
38
+
39
+ words
40
+ }
41
+
23
42
pub ( super ) const fn map_storage_class ( space : crate :: AddressSpace ) -> spirv:: StorageClass {
24
43
match space {
25
44
crate :: AddressSpace :: Handle => spirv:: StorageClass :: UniformConstant ,
@@ -107,3 +126,35 @@ pub fn global_needs_wrapper(ir_module: &crate::Module, var: &crate::GlobalVariab
107
126
_ => true ,
108
127
}
109
128
}
129
+
130
+ ///HACK: this is taken from std unstable, remove it when std's floor_char_boundary is stable
131
+ trait U8Internal {
132
+ fn is_utf8_char_boundary ( & self ) -> bool ;
133
+ }
134
+
135
+ impl U8Internal for u8 {
136
+ fn is_utf8_char_boundary ( & self ) -> bool {
137
+ // This is bit magic equivalent to: b < 128 || b >= 192
138
+ ( * self as i8 ) >= -0x40
139
+ }
140
+ }
141
+
142
+ trait StrUnstable {
143
+ fn floor_char_boundary ( & self , index : usize ) -> usize ;
144
+ }
145
+
146
+ impl StrUnstable for str {
147
+ fn floor_char_boundary ( & self , index : usize ) -> usize {
148
+ if index >= self . len ( ) {
149
+ self . len ( )
150
+ } else {
151
+ let lower_bound = index. saturating_sub ( 3 ) ;
152
+ let new_index = self . as_bytes ( ) [ lower_bound..=index]
153
+ . iter ( )
154
+ . rposition ( |b| b. is_utf8_char_boundary ( ) ) ;
155
+
156
+ // SAFETY: we know that the character boundary will be within four bytes
157
+ unsafe { lower_bound + new_index. unwrap_unchecked ( ) }
158
+ }
159
+ }
160
+ }
0 commit comments