@@ -47,6 +47,9 @@ use memchr::memmem;
4747use regex:: bytes:: Regex ;
4848use stringzilla:: sz;
4949
50+ mod utils;
51+ use utils:: should_run;
52+
5053fn log_stringzilla_metadata ( ) {
5154 let v = sz:: version ( ) ;
5255 println ! ( "StringZilla v{}.{}.{}" , v. major, v. minor, v. patch) ;
@@ -104,52 +107,60 @@ fn bench_substring_forward(
104107
105108 // Benchmark for StringZilla forward search using a cycle iterator.
106109 let mut tokens = needles. iter ( ) . cycle ( ) ;
107- g. bench_function ( "stringzilla::find" , |b| {
108- b. iter ( || {
109- let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
110- let mut pos: usize = 0 ;
111- while let Some ( found) = sz:: find ( & haystack[ pos..] , token) {
112- pos += found + token. len ( ) ;
113- }
114- } )
115- } ) ;
110+ if should_run ( "stringzilla::find" ) {
111+ g. bench_function ( "stringzilla::find" , |b| {
112+ b. iter ( || {
113+ let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
114+ let mut pos: usize = 0 ;
115+ while let Some ( found) = sz:: find ( & haystack[ pos..] , token) {
116+ pos += found + token. len ( ) ;
117+ }
118+ } )
119+ } ) ;
120+ }
116121
117122 // Benchmark for `memmem::find` forward search using a cycle iterator.
118123 let mut tokens = needles. iter ( ) . cycle ( ) ;
119- g. bench_function ( "memmem::find" , |b| {
120- b. iter ( || {
121- let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
122- let mut pos: usize = 0 ;
123- while let Some ( found) = memmem:: find ( & haystack[ pos..] , token) {
124- pos += found + token. len ( ) ;
125- }
126- } )
127- } ) ;
124+ if should_run ( "memmem::find" ) {
125+ g. bench_function ( "memmem::find" , |b| {
126+ b. iter ( || {
127+ let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
128+ let mut pos: usize = 0 ;
129+ while let Some ( found) = memmem:: find ( & haystack[ pos..] , token) {
130+ pos += found + token. len ( ) ;
131+ }
132+ } )
133+ } ) ;
134+ }
128135
129136 // Benchmark for `memmem::Finder` forward search with pre-constructed matcher.
130137 let mut tokens = needles. iter ( ) . cycle ( ) ;
131- g. bench_function ( "memmem::Finder" , |b| {
132- b. iter ( || {
133- let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
134- let finder = memmem:: Finder :: new ( token) ;
135- let mut pos: usize = 0 ;
136- while let Some ( found) = finder. find ( & haystack[ pos..] ) {
137- pos += found + token. len ( ) ;
138- }
139- } )
140- } ) ;
138+ if should_run ( "memmem::Finder" ) {
139+ g. bench_function ( "memmem::Finder" , |b| {
140+ b. iter ( || {
141+ let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
142+ let finder = memmem:: Finder :: new ( token) ;
143+ let mut pos: usize = 0 ;
144+ while let Some ( found) = finder. find ( & haystack[ pos..] ) {
145+ pos += found + token. len ( ) ;
146+ }
147+ } )
148+ } ) ;
149+ }
141150
142151 // Benchmark for default `std::str::find` forward search.
143152 let mut tokens = needles. iter ( ) . cycle ( ) ;
144- g. bench_function ( "std::str::find" , |b| {
145- b. iter ( || {
146- let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
147- let mut pos = 0 ;
148- while let Some ( found) = haystack[ pos..] . find ( token) {
149- pos += found + token. len ( ) ;
150- }
151- } )
152- } ) ;
153+ if should_run ( "std::str::find" ) {
154+ g. bench_function ( "std::str::find" , |b| {
155+ b. iter ( || {
156+ let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
157+ let mut pos = 0 ;
158+ while let Some ( found) = haystack[ pos..] . find ( token) {
159+ pos += found + token. len ( ) ;
160+ }
161+ } )
162+ } ) ;
163+ }
153164}
154165
155166/// Benchmarks backward substring search using "StringZilla", "MemMem", and standard strings.
@@ -162,68 +173,76 @@ fn bench_substring_backward(
162173
163174 // Benchmark for StringZilla backward search using a cycle iterator.
164175 let mut tokens = needles. iter ( ) . cycle ( ) ;
165- g. bench_function ( "stringzilla::rfind" , |b| {
166- b. iter ( || {
167- let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
168- let mut pos: Option < usize > = Some ( haystack. len ( ) ) ;
169- while let Some ( end) = pos {
170- if let Some ( found) = sz:: rfind ( & haystack[ ..end] , token) {
171- pos = Some ( found) ;
172- } else {
173- break ;
176+ if should_run ( "stringzilla::rfind" ) {
177+ g. bench_function ( "stringzilla::rfind" , |b| {
178+ b. iter ( || {
179+ let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
180+ let mut pos: Option < usize > = Some ( haystack. len ( ) ) ;
181+ while let Some ( end) = pos {
182+ if let Some ( found) = sz:: rfind ( & haystack[ ..end] , token) {
183+ pos = Some ( found) ;
184+ } else {
185+ break ;
186+ }
174187 }
175- }
176- } )
177- } ) ;
188+ } )
189+ } ) ;
190+ }
178191
179192 // Benchmark for `memmem::rfind` backward search using a cycle iterator.
180193 let mut tokens = needles. iter ( ) . cycle ( ) ;
181- g. bench_function ( "memmem::rfind" , |b| {
182- b. iter ( || {
183- let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
184- let mut pos: Option < usize > = Some ( haystack. len ( ) ) ;
185- while let Some ( end) = pos {
186- if let Some ( found) = memmem:: rfind ( & haystack[ ..end] , token) {
187- pos = Some ( found) ;
188- } else {
189- break ;
194+ if should_run ( "memmem::rfind" ) {
195+ g. bench_function ( "memmem::rfind" , |b| {
196+ b. iter ( || {
197+ let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
198+ let mut pos: Option < usize > = Some ( haystack. len ( ) ) ;
199+ while let Some ( end) = pos {
200+ if let Some ( found) = memmem:: rfind ( & haystack[ ..end] , token) {
201+ pos = Some ( found) ;
202+ } else {
203+ break ;
204+ }
190205 }
191- }
192- } )
193- } ) ;
206+ } )
207+ } ) ;
208+ }
194209
195210 // Benchmark for `memmem::FinderRev` backward search with pre-constructed matcher.
196211 let mut tokens = needles. iter ( ) . cycle ( ) ;
197- g. bench_function ( "memmem::FinderRev" , |b| {
198- b. iter ( || {
199- let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
200- let finder = memmem:: FinderRev :: new ( token) ;
201- let mut pos: Option < usize > = Some ( haystack. len ( ) ) ;
202- while let Some ( end) = pos {
203- if let Some ( found) = finder. rfind ( & haystack[ ..end] ) {
204- pos = Some ( found) ;
205- } else {
206- break ;
212+ if should_run ( "memmem::FinderRev" ) {
213+ g. bench_function ( "memmem::FinderRev" , |b| {
214+ b. iter ( || {
215+ let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
216+ let finder = memmem:: FinderRev :: new ( token) ;
217+ let mut pos: Option < usize > = Some ( haystack. len ( ) ) ;
218+ while let Some ( end) = pos {
219+ if let Some ( found) = finder. rfind ( & haystack[ ..end] ) {
220+ pos = Some ( found) ;
221+ } else {
222+ break ;
223+ }
207224 }
208- }
209- } )
210- } ) ;
225+ } )
226+ } ) ;
227+ }
211228
212229 // Benchmark for default `std::str::rfind` backward search.
213230 let mut tokens = needles. iter ( ) . cycle ( ) ;
214- g. bench_function ( "std::str::rfind" , |b| {
215- b. iter ( || {
216- let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
217- let mut pos: Option < usize > = Some ( haystack. len ( ) ) ;
218- while let Some ( end) = pos {
219- if let Some ( found) = haystack[ ..end] . rfind ( token) {
220- pos = Some ( found) ;
221- } else {
222- break ;
231+ if should_run ( "std::str::rfind" ) {
232+ g. bench_function ( "std::str::rfind" , |b| {
233+ b. iter ( || {
234+ let token = black_box ( * tokens. next ( ) . unwrap ( ) ) ;
235+ let mut pos: Option < usize > = Some ( haystack. len ( ) ) ;
236+ while let Some ( end) = pos {
237+ if let Some ( found) = haystack[ ..end] . rfind ( token) {
238+ pos = Some ( found) ;
239+ } else {
240+ break ;
241+ }
223242 }
224- }
225- } )
226- } ) ;
243+ } )
244+ } ) ;
245+ }
227246}
228247
229248/// Benchmarks byteset search using "StringZilla", "bstr", "RegEx", and "AhoCorasick"
@@ -243,62 +262,73 @@ fn bench_byteset_forward(
243262 let sz_tabs = sz:: Byteset :: from ( BYTES_TABS ) ;
244263 let sz_html = sz:: Byteset :: from ( BYTES_HTML ) ;
245264 let sz_digits = sz:: Byteset :: from ( BYTES_DIGITS ) ;
246- g. bench_function ( "stringzilla::find_byteset" , |b| {
247- b. iter ( || {
248- for token in needles. iter ( ) {
249- let mut pos: usize = 0 ;
250- while let Some ( found) = sz:: find_byteset ( & token[ pos..] , sz_tabs) {
251- pos += found + 1 ;
252- }
253- pos = 0 ;
254- while let Some ( found) = sz:: find_byteset ( & token[ pos..] , sz_html) {
255- pos += found + 1 ;
265+ if should_run ( "stringzilla::find_byteset" ) {
266+ g. bench_function ( "stringzilla::find_byteset" , |b| {
267+ b. iter ( || {
268+ for token in needles. iter ( ) {
269+ let mut pos: usize = 0 ;
270+ while let Some ( found) = sz:: find_byteset ( & token[ pos..] , sz_tabs) {
271+ pos += found + 1 ;
272+ }
273+ pos = 0 ;
274+ while let Some ( found) = sz:: find_byteset ( & token[ pos..] , sz_html) {
275+ pos += found + 1 ;
276+ }
277+ pos = 0 ;
278+ while let Some ( found) = sz:: find_byteset ( & token[ pos..] , sz_digits) {
279+ pos += found + 1 ;
280+ }
256281 }
257- pos = 0 ;
258- while let Some ( found) = sz:: find_byteset ( & token[ pos..] , sz_digits) {
259- pos += found + 1 ;
260- }
261- }
262- } )
263- } ) ;
282+ } )
283+ } ) ;
284+ }
264285
265286 // Benchmark for bstr's byteset search.
266- g. bench_function ( "bstr::iter" , |b| {
267- b. iter ( || {
268- for token in needles. iter ( ) {
269- let mut pos: usize = 0 ;
270- // Inline search for `BYTES_TABS`.
271- while let Some ( found) = token[ pos..] . iter ( ) . position ( |& c| BYTES_TABS . contains ( & c) ) {
272- pos += found + 1 ;
287+ if should_run ( "bstr::iter" ) {
288+ g. bench_function ( "bstr::iter" , |b| {
289+ b. iter ( || {
290+ for token in needles. iter ( ) {
291+ let mut pos: usize = 0 ;
292+ // Inline search for `BYTES_TABS`.
293+ while let Some ( found) =
294+ token[ pos..] . iter ( ) . position ( |& c| BYTES_TABS . contains ( & c) )
295+ {
296+ pos += found + 1 ;
297+ }
298+ pos = 0 ;
299+ // Inline search for `BYTES_HTML`.
300+ while let Some ( found) =
301+ token[ pos..] . iter ( ) . position ( |& c| BYTES_HTML . contains ( & c) )
302+ {
303+ pos += found + 1 ;
304+ }
305+ pos = 0 ;
306+ // Inline search for `BYTES_DIGITS`.
307+ while let Some ( found) =
308+ token[ pos..] . iter ( ) . position ( |& c| BYTES_DIGITS . contains ( & c) )
309+ {
310+ pos += found + 1 ;
311+ }
273312 }
274- pos = 0 ;
275- // Inline search for `BYTES_HTML`.
276- while let Some ( found) = token[ pos..] . iter ( ) . position ( |& c| BYTES_HTML . contains ( & c) ) {
277- pos += found + 1 ;
278- }
279- pos = 0 ;
280- // Inline search for `BYTES_DIGITS`.
281- while let Some ( found) = token[ pos..] . iter ( ) . position ( |& c| BYTES_DIGITS . contains ( & c) )
282- {
283- pos += found + 1 ;
284- }
285- }
286- } )
287- } ) ;
313+ } )
314+ } ) ;
315+ }
288316
289317 // Benchmark for Regex-based byteset search.
290318 let re_tabs = Regex :: new ( "[\n \r \x0B \x0C ]" ) . unwrap ( ) ;
291319 let re_html = Regex :: new ( "[</>&'\" =\\ [\\ ]]" ) . unwrap ( ) ;
292320 let re_digits = Regex :: new ( "[0-9]" ) . unwrap ( ) ;
293- g. bench_function ( "regex::find_iter" , |b| {
294- b. iter ( || {
295- for token in needles. iter ( ) {
296- black_box ( re_tabs. find_iter ( token. as_bytes ( ) ) . count ( ) ) ;
297- black_box ( re_html. find_iter ( token. as_bytes ( ) ) . count ( ) ) ;
298- black_box ( re_digits. find_iter ( token. as_bytes ( ) ) . count ( ) ) ;
299- }
300- } )
301- } ) ;
321+ if should_run ( "regex::find_iter" ) {
322+ g. bench_function ( "regex::find_iter" , |b| {
323+ b. iter ( || {
324+ for token in needles. iter ( ) {
325+ black_box ( re_tabs. find_iter ( token. as_bytes ( ) ) . count ( ) ) ;
326+ black_box ( re_html. find_iter ( token. as_bytes ( ) ) . count ( ) ) ;
327+ black_box ( re_digits. find_iter ( token. as_bytes ( ) ) . count ( ) ) ;
328+ }
329+ } )
330+ } ) ;
331+ }
302332
303333 // Benchmark for Aho–Corasick-based byteset search.
304334 let ac_tabs = AhoCorasick :: new (
@@ -322,15 +352,17 @@ fn bench_byteset_forward(
322352 . collect :: < Vec < _ > > ( ) ,
323353 )
324354 . expect ( "failed to create AhoCorasick FSA" ) ;
325- g. bench_function ( "aho_corasick::find_iter" , |b| {
326- b. iter ( || {
327- for token in needles. iter ( ) {
328- black_box ( ac_tabs. find_iter ( token) . count ( ) ) ;
329- black_box ( ac_html. find_iter ( token) . count ( ) ) ;
330- black_box ( ac_digits. find_iter ( token) . count ( ) ) ;
331- }
332- } )
333- } ) ;
355+ if should_run ( "aho_corasick::find_iter" ) {
356+ g. bench_function ( "aho_corasick::find_iter" , |b| {
357+ b. iter ( || {
358+ for token in needles. iter ( ) {
359+ black_box ( ac_tabs. find_iter ( token) . count ( ) ) ;
360+ black_box ( ac_html. find_iter ( token) . count ( ) ) ;
361+ black_box ( ac_digits. find_iter ( token) . count ( ) ) ;
362+ }
363+ } )
364+ } ) ;
365+ }
334366}
335367
336368fn main ( ) {
0 commit comments