Skip to content

Commit 6b98ebd

Browse files
committed
Add: rust-bio comparison
1 parent 2b2ce46 commit 6b98ebd

File tree

5 files changed

+193
-29
lines changed

5 files changed

+193
-29
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"corasick",
1010
"CUDF",
1111
"Dataframe",
12+
"Gotoh",
1213
"gxhash",
1314
"lexsort",
1415
"Melem",

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ optional = true
7676
[dependencies.bio]
7777
version = "3.0.0"
7878
default-features = false
79+
features = ["runtime-dispatch-simd"]
7980
optional = true
8081

8182
[dependencies.bstr]

README.md

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -221,19 +221,28 @@ Performing in-place lookups in a precomputed table of 256 bytes:
221221

222222
Edit Distance calculation is a common component of Search Engines, Data Cleaning, and Natural Language Processing, as well as in Bioinformatics.
223223
It's a computationally expensive operation, generally implemented using dynamic programming, with a quadratic time complexity upper bound.
224+
For biological sequences, the Needleman-Wunsch and Smith-Waterman algorithms are more appropriate, as they allow overriding the default substitution costs.
225+
Each of those has two flavors - with linear and affine gap penalties, also known as the "Gotoh" variation.
226+
227+
- byte-level and unicode [Levenshtein](#levenshtein) distance;
228+
- [Needleman-Wunsch](#needleman-wunsch), [Needleman-Wunsch-Gotoh](#needleman-wunsch-gotoh);
229+
- [Smith-Waterman](#smith-waterman), [Smith-Waterman-Gotoh](#smith-waterman-gotoh).
230+
231+
### Levenshtein
224232

225233
| Library | ≅ 100 bytes lines | ≅ 1'000 bytes lines |
226234
| ---------------------------------------------------- | ----------------: | ------------------: |
227235
| Rust 🦀 | |
228-
| `rapidfuzz::levenshtein<Bytes>` | 4'633 MCUPS | 14'316 MCUPS |
236+
| `bio::levenshtein` on 1x SPR | 428 MCUPS | 823 MCUPS |
237+
| `rapidfuzz::levenshtein<Bytes>` on 1x SPR | 4'633 MCUPS | 14'316 MCUPS |
238+
| `rapidfuzz::levenshtein<Chars>` on 1x SPR | 3'877 MCUPS | 13'179 MCUPS |
229239
| `stringzillas::LevenshteinDistances` on 1x SPR | 3'315 MCUPS | 13'084 MCUPS |
240+
| `stringzillas::LevenshteinDistancesUtf8` on 1x SPR | 3'283 MCUPS | 11'690 MCUPS |
230241
| `stringzillas::LevenshteinDistances` on 16x SPR | 29'430 MCUPS | 105'400 MCUPS |
242+
| `stringzillas::LevenshteinDistancesUtf8` on 16x SPR | 38'954 MCUPS | 103'500 MCUPS |
231243
| `stringzillas::LevenshteinDistances` on RTX6000 | __32'030 MCUPS__ | __901'990 MCUPS__ |
232244
| `stringzillas::LevenshteinDistances` on H100 | __31'913 MCUPS__ | __925'890 MCUPS__ |
233245
| `stringzillas::LevenshteinDistances` on 384x GNR | __114'190 MCUPS__ | __3'084'270 MCUPS__ |
234-
| `rapidfuzz::levenshtein<Chars>` | 3'877 MCUPS | 13'179 MCUPS |
235-
| `stringzillas::LevenshteinDistancesUtf8` on 1x SPR | 3'283 MCUPS | 11'690 MCUPS |
236-
| `stringzillas::LevenshteinDistancesUtf8` on 16x SPR | 38'954 MCUPS | 103'500 MCUPS |
237246
| `stringzillas::LevenshteinDistancesUtf8` on 384x GNR | __103'590 MCUPS__ | __2'938'320 MCUPS__ |
238247
| | | |
239248
| Python 🐍 | | |
@@ -250,42 +259,61 @@ It's a computationally expensive operation, generally implemented using dynamic
250259
| `stringzillas.LevenshteinDistances` batch on 16x SPR | 3'762 MCUPS | 119'261 MCUPS |
251260
| `stringzillas.LevenshteinDistances` batch on H100 | __18'081 MCUPS__ | __320'109 MCUPS__ |
252261

253-
254-
For biological sequences, the Needleman-Wunsch and Smith-Waterman algorithms are more appropriate, as they allow overriding the default substitution costs.
255-
Another common adaptation is to used Gotoh's affine gap penalties, which better model the evolutionary events in DNA and Protein sequences.
262+
### Needleman-Wunsch
256263

257264
| Library | ≅ 100 bytes lines | ≅ 1'000 bytes lines |
258265
| ----------------------------------------------------- | ----------------: | ------------------: |
259-
| Rust 🦀 with linear gaps | |
266+
| Rust 🦀 | | |
267+
| `bio::pairwise::global` on 1x SPR | 51 MCUPS | 57 MCUPS |
260268
| `stringzillas::NeedlemanWunschScores` on 1x SPR | 278 MCUPS | 612 MCUPS |
261269
| `stringzillas::NeedlemanWunschScores` on 16x SPR | 4'057 MCUPS | 8'492 MCUPS |
262270
| `stringzillas::NeedlemanWunschScores` on 384x GNR | __64'290 MCUPS__ | __331'340 MCUPS__ |
263271
| `stringzillas::NeedlemanWunschScores` on H100 | 131 MCUPS | __12'113 MCUPS__ |
264-
| `stringzillas::SmithWatermanScores` on 1x SPR | 263 MCUPS | 552 MCUPS |
265-
| `stringzillas::SmithWatermanScores` on 16x SPR | 3'883 MCUPS | 8'011 MCUPS |
266-
| `stringzillas::SmithWatermanScores` on 384x GNR | __58'880 MCUPS__ | __285'480 MCUPS__ |
267-
| `stringzillas::SmithWatermanScores` on H100 | 143 MCUPS | __12'921 MCUPS__ |
268272
| | | |
269-
| Python 🐍 with linear gaps | | |
273+
| Python 🐍 | | |
270274
| `biopython.PairwiseAligner.score` on 1x SPR | 95 MCUPS | 557 MCUPS |
271275
| `stringzillas.NeedlemanWunschScores` on 1x SPR | 30 MCUPS | 481 MCUPS |
272276
| `stringzillas.NeedlemanWunschScores` batch on 1x SPR | 246 MCUPS | 570 MCUPS |
273277
| `stringzillas.NeedlemanWunschScores` batch on 16x SPR | 3'103 MCUPS | 9'208 MCUPS |
274278
| `stringzillas.NeedlemanWunschScores` batch on H100 | 127 MCUPS | 12'246 MCUPS |
275-
| `stringzillas.SmithWatermanScores` on 1x SPR | 28 MCUPS | 440 MCUPS |
276-
| `stringzillas.SmithWatermanScores` batch on 1x SPR | 255 MCUPS | 582 MCUPS |
277-
| `stringzillas.SmithWatermanScores` batch on 16x SPR | __3'535 MCUPS__ | 8'235 MCUPS |
278-
| `stringzillas.SmithWatermanScores` batch on H100 | 130 MCUPS | __12'702 MCUPS__ |
279-
| | | |
280-
| Rust 🦀 with affine gaps | | |
281-
| `stringzillas::NeedlemanWunschScores` on 1x SPR | 83 MCUPS | 354 MCUPS |
282-
| `stringzillas::NeedlemanWunschScores` on 16x SPR | 1'267 MCUPS | 4'694 MCUPS |
283-
| `stringzillas::NeedlemanWunschScores` on 384x GNR | __42'050 MCUPS__ | __155'920 MCUPS__ |
284-
| `stringzillas::NeedlemanWunschScores` on H100 | 128 MCUPS | __13'799 MCUPS__ |
285-
| `stringzillas::SmithWatermanScores` on 1x SPR | 79 MCUPS | 284 MCUPS |
286-
| `stringzillas::SmithWatermanScores` on 16x SPR | 1'026 MCUPS | 3'776 MCUPS |
287-
| `stringzillas::SmithWatermanScores` on 384x GNR | __38'430 MCUPS__ | __129'140 MCUPS__ |
288-
| `stringzillas::SmithWatermanScores` on H100 | 127 MCUPS | __13'205 MCUPS__ |
279+
280+
### Smith-Waterman
281+
282+
| Library | ≅ 100 bytes lines | ≅ 1'000 bytes lines |
283+
| --------------------------------------------------- | ----------------: | ------------------: |
284+
| Rust 🦀 | | |
285+
| `bio::pairwise::local` on 1x SPR | 49 MCUPS | 50 MCUPS |
286+
| `stringzillas::SmithWatermanScores` on 1x SPR | 263 MCUPS | 552 MCUPS |
287+
| `stringzillas::SmithWatermanScores` on 16x SPR | 3'883 MCUPS | 8'011 MCUPS |
288+
| `stringzillas::SmithWatermanScores` on 384x GNR | __58'880 MCUPS__ | __285'480 MCUPS__ |
289+
| `stringzillas::SmithWatermanScores` on H100 | 143 MCUPS | __12'921 MCUPS__ |
290+
| | | |
291+
| Python 🐍 | | |
292+
| `biopython.PairwiseAligner.score` on 1x SPR | 95 MCUPS | 557 MCUPS |
293+
| `stringzillas.SmithWatermanScores` on 1x SPR | 28 MCUPS | 440 MCUPS |
294+
| `stringzillas.SmithWatermanScores` batch on 1x SPR | 255 MCUPS | 582 MCUPS |
295+
| `stringzillas.SmithWatermanScores` batch on 16x SPR | __3'535 MCUPS__ | 8'235 MCUPS |
296+
| `stringzillas.SmithWatermanScores` batch on H100 | 130 MCUPS | __12'702 MCUPS__ |
297+
298+
### Needleman-Wunsch-Gotoh
299+
300+
| Library | ≅ 100 bytes lines | ≅ 1'000 bytes lines |
301+
| ------------------------------------------------- | ----------------: | ------------------: |
302+
| Rust 🦀 | | |
303+
| `stringzillas::NeedlemanWunschScores` on 1x SPR | 83 MCUPS | 354 MCUPS |
304+
| `stringzillas::NeedlemanWunschScores` on 16x SPR | 1'267 MCUPS | 4'694 MCUPS |
305+
| `stringzillas::NeedlemanWunschScores` on 384x GNR | __42'050 MCUPS__ | __155'920 MCUPS__ |
306+
| `stringzillas::NeedlemanWunschScores` on H100 | 128 MCUPS | __13'799 MCUPS__ |
307+
308+
### Smith-Waterman-Gotoh
309+
310+
| Library | ≅ 100 bytes lines | ≅ 1'000 bytes lines |
311+
| ----------------------------------------------- | ----------------: | ------------------: |
312+
| Rust 🦀 | | |
313+
| `stringzillas::SmithWatermanScores` on 1x SPR | 79 MCUPS | 284 MCUPS |
314+
| `stringzillas::SmithWatermanScores` on 16x SPR | 1'026 MCUPS | 3'776 MCUPS |
315+
| `stringzillas::SmithWatermanScores` on 384x GNR | __38'430 MCUPS__ | __129'140 MCUPS__ |
316+
| `stringzillas::SmithWatermanScores` on H100 | 127 MCUPS | __13'205 MCUPS__ |
289317

290318
## Byte-level Fingerprinting & Sketching Benchmarks
291319

bench_fingerprints.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,10 @@ fn bench_fingerprints(c: &mut Criterion<HashesWallTime>) {
387387
}
388388

389389
// StringZilla: Nx CPU
390-
if should_run(&format!("fingerprinting/stringzillas::Fingerprints({}xCPU)", num_cores)) {
390+
if should_run(&format!(
391+
"fingerprinting/stringzillas::Fingerprints({}xCPU)",
392+
num_cores
393+
)) {
391394
g.throughput(Throughput::Elements(per_batch_hash_ops));
392395
g.bench_function(
393396
&format!("stringzillas::Fingerprints({}xCPU)", num_cores),

bench_similarities.rs

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ use criterion::{Criterion, Throughput};
5050
use fork_union::count_logical_cores;
5151
use stringtape::{BytesTape, BytesTapeView, CharsTapeView};
5252

53+
use bio::alignment::{distance as bio_distance, pairwise::Aligner};
5354
use rapidfuzz::distance::levenshtein;
5455
use stringzilla::szs::{
5556
error_costs_256x256_unary, AnyBytesTape, AnyCharsTape, DeviceScope, LevenshteinDistances,
@@ -434,8 +435,22 @@ fn perform_uniform_benchmarks(
434435
levenshtein::distance(a_str.chars(), b_str.chars())
435436
})
436437
});
438+
}
437439

438-
// StringZilla Binary Levenshtein Distance (uniform costs: 0,1,1,1)
440+
if should_run("uniform/bio::levenshtein(1xCPU)") {
441+
g.throughput(Throughput::Elements(per_pair_bytes));
442+
g.bench_function("bio::levenshtein(1xCPU)", |b| {
443+
let mut pair_index = 0;
444+
b.iter(|| {
445+
let a_bytes = &tape_a_view[pair_index % pairs_count];
446+
let b_bytes = &tape_b_view[pair_index % pairs_count];
447+
pair_index = (pair_index + 1) % pairs_count;
448+
std::hint::black_box(bio_distance::levenshtein(a_bytes, b_bytes))
449+
})
450+
});
451+
}
452+
453+
if should_run("uniform/stringzillas::LevenshteinDistances(1xCPU)") {
439454
g.throughput(Throughput::Elements(per_batch_bytes));
440455
g.bench_function("stringzillas::LevenshteinDistances(1xCPU)", |b| {
441456
let mut results = UnifiedVec::<usize>::with_capacity_in(batch_size, UnifiedAlloc);
@@ -644,7 +659,65 @@ fn perform_linear_benchmarks(
644659
.ok()
645660
.and_then(|gpu| SmithWatermanScores::new(gpu, &matrix, -2, -2).ok());
646661

662+
let mut max_len = 0usize;
663+
for idx in 0..pairs_count {
664+
let a_len = tape_a_view[idx].len();
665+
let b_len = tape_b_view[idx].len();
666+
if a_len > max_len {
667+
max_len = a_len;
668+
}
669+
if b_len > max_len {
670+
max_len = b_len;
671+
}
672+
}
673+
let max_len = std::cmp::max(1, max_len);
674+
647675
let per_batch = (batch_size as u64) * avg_cells_bytes;
676+
let per_pair = avg_cells_bytes;
677+
678+
if should_run("linear/bio::pairwise::global(1xCPU)") {
679+
g.throughput(Throughput::Elements(per_pair));
680+
g.bench_function("bio::pairwise::global(1xCPU)", |b| {
681+
let mut aligner =
682+
Aligner::with_capacity(
683+
max_len,
684+
max_len,
685+
-2,
686+
-2,
687+
|a: u8, b: u8| if a == b { 2 } else { -1 },
688+
);
689+
let mut pair_index = 0;
690+
b.iter(|| {
691+
let a_bytes = &tape_a_view[pair_index % pairs_count];
692+
let b_bytes = &tape_b_view[pair_index % pairs_count];
693+
pair_index = (pair_index + 1) % pairs_count;
694+
let score = aligner.global(a_bytes, b_bytes).score;
695+
std::hint::black_box(score);
696+
})
697+
});
698+
}
699+
700+
if should_run("linear/bio::pairwise::local(1xCPU)") {
701+
g.throughput(Throughput::Elements(per_pair));
702+
g.bench_function("bio::pairwise::local(1xCPU)", |b| {
703+
let mut aligner =
704+
Aligner::with_capacity(
705+
max_len,
706+
max_len,
707+
-2,
708+
-2,
709+
|a: u8, b: u8| if a == b { 2 } else { -1 },
710+
);
711+
let mut pair_index = 0;
712+
b.iter(|| {
713+
let a_bytes = &tape_a_view[pair_index % pairs_count];
714+
let b_bytes = &tape_b_view[pair_index % pairs_count];
715+
pair_index = (pair_index + 1) % pairs_count;
716+
let score = aligner.local(a_bytes, b_bytes).score;
717+
std::hint::black_box(score);
718+
})
719+
});
720+
}
648721

649722
// Needleman-Wunsch (Global alignment)
650723
if should_run("stringzillas::NeedlemanWunschScores(1xCPU)") {
@@ -890,7 +963,65 @@ fn perform_affine_benchmarks(
890963
.ok()
891964
.and_then(|gpu| SmithWatermanScores::new(gpu, &matrix, -5, -1).ok());
892965

966+
let mut max_len = 0usize;
967+
for idx in 0..pairs_count {
968+
let a_len = tape_a_view[idx].len();
969+
let b_len = tape_b_view[idx].len();
970+
if a_len > max_len {
971+
max_len = a_len;
972+
}
973+
if b_len > max_len {
974+
max_len = b_len;
975+
}
976+
}
977+
let max_len = std::cmp::max(1, max_len);
978+
893979
let per_batch = (batch_size as u64) * avg_cells_bytes;
980+
let per_pair = avg_cells_bytes;
981+
982+
if should_run("affine/bio::pairwise::global(1xCPU)") {
983+
g.throughput(Throughput::Elements(per_pair));
984+
g.bench_function("bio::pairwise::global(1xCPU)", |b| {
985+
let mut aligner =
986+
Aligner::with_capacity(
987+
max_len,
988+
max_len,
989+
-5,
990+
-1,
991+
|a: u8, b: u8| if a == b { 2 } else { -1 },
992+
);
993+
let mut pair_index = 0;
994+
b.iter(|| {
995+
let a_bytes = &tape_a_view[pair_index % pairs_count];
996+
let b_bytes = &tape_b_view[pair_index % pairs_count];
997+
pair_index = (pair_index + 1) % pairs_count;
998+
let score = aligner.global(a_bytes, b_bytes).score;
999+
std::hint::black_box(score);
1000+
})
1001+
});
1002+
}
1003+
1004+
if should_run("affine/bio::pairwise::local(1xCPU)") {
1005+
g.throughput(Throughput::Elements(per_pair));
1006+
g.bench_function("bio::pairwise::local(1xCPU)", |b| {
1007+
let mut aligner =
1008+
Aligner::with_capacity(
1009+
max_len,
1010+
max_len,
1011+
-5,
1012+
-1,
1013+
|a: u8, b: u8| if a == b { 2 } else { -1 },
1014+
);
1015+
let mut pair_index = 0;
1016+
b.iter(|| {
1017+
let a_bytes = &tape_a_view[pair_index % pairs_count];
1018+
let b_bytes = &tape_b_view[pair_index % pairs_count];
1019+
pair_index = (pair_index + 1) % pairs_count;
1020+
let score = aligner.local(a_bytes, b_bytes).score;
1021+
std::hint::black_box(score);
1022+
})
1023+
});
1024+
}
8941025

8951026
// Needleman-Wunsch (Global alignment)
8961027
if should_run("stringzillas::NeedlemanWunschScores(1xCPU)") {

0 commit comments

Comments
 (0)