Skip to content

Commit b586a24

Browse files
committed
📉 Add SequenceSet benchmarks
This adds a simple ruby script for measuring `ObjectSpace.memsize_of`, and several benchmark-driver scripts for: * `SequenceSet.new` (indirectly via `::[]`) * `SequenceSet#slice` (aka `#[]`) * Various set ops: `&`, `|`, `-`, `^`, `~` * Various set predicates: `#intersect?`, `#disjoint?`, `#cover?` * Several alternate implementations of: * AND — `#&` and `#intersect!` * NOT — `#~` and `#complement!` * XOR — `#^` and `#xor!`
1 parent a8932f6 commit b586a24

File tree

7 files changed

+463
-0
lines changed

7 files changed

+463
-0
lines changed

benchmarks/seqset-memsize.rb

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# frozen_string_literal: true
2+
3+
$LOAD_PATH.unshift "./lib"
4+
require "net/imap"
5+
require "objspace"
6+
7+
def seqset(n, min: 1, max: (n * 1.25).to_i)
8+
inputs = Array.new(n) { rand(min..max) }
9+
Net::IMAP::SequenceSet[inputs]
10+
end
11+
12+
def obj_tree(obj, seen: Set.new)
13+
seen << obj
14+
children = ObjectSpace.reachable_objects_from(obj)
15+
.reject { _1 in Module or seen.include?(_1) }
16+
.flat_map { obj_tree(_1, seen:) }
17+
[obj, *children]
18+
end
19+
20+
def memsize(obj) = obj_tree(obj).sum { ObjectSpace.memsize_of _1 }
21+
22+
def avg(ary) = ary.sum / ary.count.to_f
23+
24+
def print_avg(n, count: 10, **)
25+
print "Average memsize of SequenceSet with %6d inputs: " % [n]
26+
sizes = Array.new(count) {
27+
print "."
28+
memsize seqset(n, **)
29+
}
30+
puts "%9.1f" % [avg(sizes)]
31+
end
32+
33+
# pp obj_tree(seqset(200, min: 1_000_000, max: 1_000_999)).to_h { [_1, memsize(_1)] }
34+
print_avg 1
35+
print_avg 10
36+
print_avg 100
37+
38+
print_avg 1_000
39+
print_avg 10_000
40+
print_avg 100_000

benchmarks/sequence_set-aget.yml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
---
2+
prelude: |
3+
require "net/imap"
4+
5+
def init(n:, d:)
6+
n = n.to_int
7+
samples = (1e5.to_i / n).ceil.clamp(1..100)
8+
domain = 1..(n*d).floor
9+
$sets = Array.new(samples) {
10+
set = Net::IMAP::SequenceSet.new
11+
n.times do set << rand(domain) end
12+
set
13+
}
14+
$idxs = Array.new([10_000, 2 * n].min) { rand(0..n - 1) }
15+
$lens = Array.new([10_000, n].min) { rand(1..n) }
16+
$ranges = Array.new([10_000, n * n].min) {
17+
start = idx
18+
stop = start.negative? ? rand(start..-1) : rand(start...n)
19+
start..stop
20+
}
21+
end
22+
23+
def set = $sets.sample
24+
def idx = $idxs.sample
25+
def len = $lens.sample
26+
def range = $ranges.sample
27+
28+
# warmup
29+
init n: 100, d: 2
30+
100.times do
31+
set[idx]
32+
set[range]
33+
set[idx, len]
34+
end
35+
36+
benchmark:
37+
38+
- { name: "(N= 10 ) set[idx]", prelude: "init(n: 1e1, d: 4.0)", script: "set[idx]" }
39+
- { name: "(N=100 ) set[idx]", prelude: "init(n: 1e2, d: 4.0)", script: "set[idx]" }
40+
- { name: "(N= 1K) set[idx]", prelude: "init(n: 1e3, d: 2.0)", script: "set[idx]" }
41+
- { name: "(N= 10K) set[idx]", prelude: "init(n: 1e4, d: 2.0)", script: "set[idx]" }
42+
- { name: "(N=100K) set[idx]", prelude: "init(n: 1e5, d: 2.0)", script: "set[idx]" }
43+
- { name: "(N= 1M) set[idx]", prelude: "init(n: 1e6, d: 1.5)", script: "set[idx]" }
44+
45+
- { name: "(N= 10 ) set[idx, len]", prelude: "init(n: 1e1, d: 4.0)", script: "set[idx, len]" }
46+
- { name: "(N=100 ) set[idx, len]", prelude: "init(n: 1e2, d: 4.0)", script: "set[idx, len]" }
47+
- { name: "(N= 1K) set[idx, len]", prelude: "init(n: 1e3, d: 2.0)", script: "set[idx, len]" }
48+
- { name: "(N= 10K) set[idx, len]", prelude: "init(n: 1e4, d: 2.0)", script: "set[idx, len]" }
49+
- { name: "(N=100K) set[idx, len]", prelude: "init(n: 1e5, d: 2.0)", script: "set[idx, len]" }
50+
- { name: "(N= 1M) set[idx, len]", prelude: "init(n: 1e6, d: 1.5)", script: "set[idx, len]" }
51+
52+
- { name: "(N= 10 ) set[range]", prelude: "init(n: 1e1, d: 4.0)", script: "set[range]" }
53+
- { name: "(N=100 ) set[range]", prelude: "init(n: 1e2, d: 4.0)", script: "set[range]" }
54+
- { name: "(N= 1K) set[range]", prelude: "init(n: 1e3, d: 2.0)", script: "set[range]" }
55+
- { name: "(N= 10K) set[range]", prelude: "init(n: 1e4, d: 2.0)", script: "set[range]" }
56+
- { name: "(N=100K) set[range]", prelude: "init(n: 1e5, d: 2.0)", script: "set[range]" }
57+
- { name: "(N= 1M) set[range]", prelude: "init(n: 1e6, d: 1.5)", script: "set[range]" }

benchmarks/sequence_set-and.yml

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
---
2+
prelude: |
3+
require "yaml"
4+
require "net/imap"
5+
6+
INPUT_COUNT = Integer ENV.fetch("SEQSET_LHS_SIZE", 1000)
7+
MAX_INPUT = Integer ENV.fetch("SEQSET_LHS_MAX", 1400)
8+
9+
SETS = Array.new(1000) {
10+
Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }]
11+
}
12+
13+
def sets
14+
l, r = SETS.sample(2)
15+
[l.dup, r]
16+
end
17+
18+
class Net::IMAP
19+
class SequenceSet
20+
def and0(other) remain_frozen dup.and0! other end
21+
def and1(other) remain_frozen dup.and1! other end
22+
def and2(other) remain_frozen dup.and2! other end
23+
24+
# L - ~R
25+
def and0!(other)
26+
modifying!
27+
subtract SequenceSet.new(other).complement!
28+
end
29+
30+
# L - (L - R)
31+
def and1!(other)
32+
modifying!
33+
subtract dup.subtract(SequenceSet.new(other))
34+
end
35+
36+
# TODO: add this as a public method
37+
def xor!(other) # :nodoc:
38+
modifying!
39+
copy = dup
40+
other = SequenceSet.new(other)
41+
merge(other).subtract(other.subtract(copy.complement!))
42+
end
43+
44+
# L - (L ^ R)
45+
def and2!(other)
46+
modifying!
47+
subtract SequenceSet.new(other).xor! self
48+
end
49+
end
50+
end
51+
52+
# warmup for YJIT
53+
if RubyVM::YJIT.enabled?
54+
300.times do
55+
lhs, rhs = sets
56+
lhs | rhs
57+
lhs & rhs
58+
lhs - rhs
59+
lhs ^ rhs
60+
~lhs
61+
lhs.and0 rhs
62+
lhs.and1 rhs
63+
lhs.and2 rhs
64+
end
65+
end
66+
67+
benchmark:
68+
" L & R": l, r = sets; l & r
69+
" L - ~R": l, r = sets; l - ~r
70+
"and0 L - ~R": l, r = sets; l.and0 r
71+
"and0! L - ~R": l, r = sets; l.and0! r
72+
" L - (L - R)": l, r = sets; l - (l - r)
73+
"and1 L - (L - R)": l, r = sets; l.and1 r
74+
"and1! L - (L - R)": l, r = sets; l.and1! r
75+
" L - (L ^ R)": l, r = sets; l - (l ^ r)
76+
"and2 L - (L ^ R)": l, r = sets; l.and2 r
77+
"and2! L - (L ^ R)": l, r = sets; l.and2! r

benchmarks/sequence_set-new.yml

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
---
2+
prelude: |
3+
$LOAD_PATH.unshift "./lib"
4+
require "net/imap"
5+
SeqSet = Net::IMAP::SequenceSet
6+
7+
8+
N_RAND = 100
9+
10+
def rand_nums(n, min: 1, max: (n * 1.25).to_i) = Array.new(n) { rand(1..max) }
11+
def rand_entries(...) = SeqSet[rand_nums(...)].elements.shuffle
12+
def rand_string(...) = SeqSet[rand_nums(...)].string.split(?,).shuffle.join(?,)
13+
14+
def build_string_inputs(n, n_rand, **)
15+
Array.new(n_rand) { rand_string(n, **) }
16+
end
17+
18+
def build_int_inputs(n, n_rand, **)
19+
Array.new(n_rand) { rand_entries(n, **) }
20+
end
21+
22+
inputs = nil
23+
i = 0
24+
25+
# warm up, especially for YJIT
26+
300.times do
27+
ints = rand_nums(1000)
28+
seqset = SeqSet[ints]
29+
string = seqset.string.split(?,).shuffle.join(?,)
30+
SeqSet[string]
31+
end
32+
33+
benchmark:
34+
35+
- name: n=10 ints
36+
prelude: inputs = build_int_inputs 10, N_RAND
37+
script: 10_000.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
38+
39+
- name: n=10 string
40+
prelude: inputs = build_string_inputs 10, N_RAND
41+
script: 10_000.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
42+
43+
- name: n=100 ints
44+
prelude: inputs = build_int_inputs 100, N_RAND
45+
script: 1_000.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
46+
47+
- name: n=100 string
48+
prelude: inputs = build_string_inputs 100, N_RAND
49+
script: 1_000.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
50+
51+
- name: n=1000 ints
52+
prelude: inputs = build_int_inputs 1000, N_RAND
53+
script: 100.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
54+
55+
- name: n=1000 string
56+
prelude: inputs = build_string_inputs 1000, N_RAND
57+
script: 100.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
58+
59+
- name: n=10,000 ints
60+
prelude: inputs = build_int_inputs 10_000, N_RAND
61+
script: 10.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
62+
63+
- name: n=10,000 string
64+
prelude: inputs = build_string_inputs 10_000, N_RAND
65+
script: 10.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
66+
67+
- name: n=100,000 ints
68+
prelude: inputs = build_int_inputs 100_000, N_RAND / 2
69+
script: SeqSet[inputs[i = (i+1) % N_RAND]]
70+
71+
- name: n=100,000 string
72+
prelude: inputs = build_string_inputs 100_000, N_RAND / 2
73+
script: SeqSet[inputs[i = (i+1) % (N_RAND / 2)]]
74+
75+
# - name: n=1,000,000 ints
76+
# prelude: inputs = build_int_inputs 1_000_000
77+
# script: SeqSet[inputs[i = (i+1) % N_RAND]]
78+
79+
# - name: n=10,000,000 ints
80+
# prelude: inputs = build_int_inputs 10_000_000
81+
# script: SeqSet[inputs[i = (i+1) % N_RAND]]

benchmarks/sequence_set-not.yml

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
---
2+
prelude: |
3+
require "yaml"
4+
require "net/imap"
5+
6+
INPUT_COUNT = Integer ENV.fetch("SEQSET_LHS_SIZE", 1000)
7+
MAX_INPUT = Integer ENV.fetch("SEQSET_LHS_MAX", 1400)
8+
9+
SETS = Array.new(1000) {
10+
Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }]
11+
}
12+
13+
class Net::IMAP
14+
class SequenceSet
15+
def orig_not
16+
remain_frozen dup.orig_not!
17+
end
18+
19+
# 0.5.8 implementation
20+
def orig_not!
21+
return replace(self.class.full) if empty?
22+
return clear if full?
23+
flat = @tuples.flat_map { [_1 - 1, _2 + 1] }
24+
if flat.first < 1 then flat.shift else flat.unshift 1 end
25+
if STAR_INT < flat.last then flat.pop else flat.push STAR_INT end
26+
@tuples = flat.each_slice(2).to_a
27+
normalize!
28+
end
29+
30+
# enumerator based
31+
def enum_not
32+
result = SequenceSet.new
33+
each_tuple_complement do |min, max| result.tuples << [min, max] end
34+
remain_frozen result
35+
end
36+
37+
# enumerator based
38+
def enum_not_2
39+
remain_frozen dup.enum_not!
40+
end
41+
42+
def enum_not!
43+
last = -1
44+
each_tuple_complement.with_index do |minmax, idx|
45+
last = idx
46+
@tuples[idx] = minmax
47+
end
48+
@tuples.delete_at(last + 1)
49+
self
50+
end
51+
52+
private
53+
54+
def each_tuple_complement
55+
return to_enum(__method__) unless block_given?
56+
if full? then # no yield
57+
elsif empty? then yield 1, STAR_INT
58+
else
59+
yield 1, min - 1 unless min <= 1
60+
tuples.each_cons(2) do |(_, a), (b,_)| yield a+1, b-1 end
61+
yield max + 1, STAR_INT unless max == STAR_INT
62+
end
63+
nil
64+
end
65+
66+
end
67+
end
68+
69+
# warmup for YJIT
70+
if RubyVM::YJIT.enabled?
71+
300.times do
72+
~SETS.sample
73+
SETS.sample.orig_not
74+
SETS.sample.enum_not
75+
SETS.sample.dup.orig_not!
76+
SETS.sample.dup.enum_not!
77+
end
78+
end
79+
80+
benchmark:
81+
" ~set": ~SETS.sample
82+
"0.5.8 ~set": SETS.sample.orig_not
83+
"enum ~set": SETS.sample.enum_not
84+
"enum2 ~set": SETS.sample.enum_not_2
85+
"0.5.8 ~dup": SETS.sample.dup.orig_not!
86+
"enum ~dup": SETS.sample.dup.enum_not!

benchmarks/sequence_set-ops.yml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
---
2+
prelude: |
3+
require "yaml"
4+
require "net/imap"
5+
6+
INPUT_COUNT = Integer ENV.fetch("SEQSET_LHS_SIZE", 1000)
7+
MAX_INPUT = Integer ENV.fetch("SEQSET_LHS_MAX", 1400)
8+
9+
SETS = Array.new(1000) {
10+
Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }]
11+
}
12+
13+
def sets
14+
l, r = SETS.sample(2)
15+
[l.dup, r]
16+
end
17+
18+
# warmup for YJIT
19+
if RubyVM::YJIT.enabled?
20+
200.times do
21+
lhs, rhs = sets
22+
lhs | rhs
23+
lhs & rhs
24+
lhs - rhs
25+
lhs ^ rhs
26+
~lhs
27+
end
28+
end
29+
30+
benchmark:
31+
union: l, r = sets; l | r
32+
intersection: l, r = sets; l & r
33+
difference: l, r = sets; l - r
34+
xor: l, r = sets; l ^ r
35+
complement: l, _ = sets; ~l
36+
37+
intersect?: l, r = sets; l.intersect? r
38+
disjoint?: l, r = sets; l.disjoint? r
39+
cover?: l, r = sets; l.cover? r

0 commit comments

Comments
 (0)