diff --git a/benchmarks/seqset-memsize.rb b/benchmarks/seqset-memsize.rb new file mode 100644 index 000000000..7af18a14a --- /dev/null +++ b/benchmarks/seqset-memsize.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +$LOAD_PATH.unshift "./lib" +require "net/imap" +require "objspace" + +def seqset(n, min: 1, max: (n * 1.25).to_i) + inputs = Array.new(n) { rand(min..max) } + Net::IMAP::SequenceSet[inputs] +end + +def obj_tree(obj, seen: Set.new) + seen << obj + children = ObjectSpace.reachable_objects_from(obj) + .reject { _1 in Module or seen.include?(_1) } + .flat_map { obj_tree(_1, seen:) } + [obj, *children] +end + +def memsize(obj) = obj_tree(obj).sum { ObjectSpace.memsize_of _1 } + +def avg(ary) = ary.sum / ary.count.to_f + +def print_avg(n, count: 10, **) + print "Average memsize of SequenceSet with %6d inputs: " % [n] + sizes = Array.new(count) { + print "." + memsize seqset(n, **) + } + puts "%9.1f" % [avg(sizes)] +end + +# pp obj_tree(seqset(200, min: 1_000_000, max: 1_000_999)).to_h { [_1, memsize(_1)] } +print_avg 1 +print_avg 10 +print_avg 100 + +print_avg 1_000 +print_avg 10_000 +print_avg 100_000 diff --git a/benchmarks/sequence_set-and.yml b/benchmarks/sequence_set-and.yml new file mode 100644 index 000000000..406d61e3c --- /dev/null +++ b/benchmarks/sequence_set-and.yml @@ -0,0 +1,76 @@ +--- +prelude: | + require "yaml" + require "net/imap" + + INPUT_COUNT = Integer ENV.fetch("PROFILE_INPUT_COUNT", 1000) + MAX_INPUT = Integer ENV.fetch("PROFILE_MAX_INPUT", 1400) + WARMUP_RUNS = Integer ENV.fetch("PROFILE_WARMUP_RUNS", 200) + + SETS = Array.new(1000) { + Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }] + } + + def sets + l, r = SETS.sample(2) + [l.dup, r] + end + + class Net::IMAP + class SequenceSet + def and0(other) remain_frozen dup.and0! other end + def and1(other) remain_frozen dup.and1! other end + def and2(other) remain_frozen dup.and2! other end + + # L - ~R + def and0!(other) + modifying! + subtract SequenceSet.new(other).complement! + end + + # L - (L - R) + def and1!(other) + modifying! + subtract dup.subtract(SequenceSet.new(other)) + end + + # TODO: add this as a public method + def xor!(other) # :nodoc: + modifying! + copy = dup + other = SequenceSet.new(other) + merge(other).subtract(other.subtract(copy.complement!)) + end + + # L - (L ^ R) + def and2!(other) + modifying! + subtract SequenceSet.new(other).xor! self + end + end + end + + # warmup (esp. for JIT) + WARMUP_RUNS.times do + lhs, rhs = sets + lhs | rhs + lhs & rhs + lhs - rhs + lhs ^ rhs + ~lhs + lhs.and0 rhs + lhs.and1 rhs + lhs.and2 rhs + end + +benchmark: + " L & R": l, r = sets; l & r + " L - ~R": l, r = sets; l - ~r + "and0 L - ~R": l, r = sets; l.and0 r + "and0! L - ~R": l, r = sets; l.and0! r + " L - (L - R)": l, r = sets; l - (l - r) + "and1 L - (L - R)": l, r = sets; l.and1 r + "and1! L - (L - R)": l, r = sets; l.and1! r + " L - (L ^ R)": l, r = sets; l - (l ^ r) + "and2 L - (L ^ R)": l, r = sets; l.and2 r + "and2! L - (L ^ R)": l, r = sets; l.and2! r diff --git a/benchmarks/sequence_set-new.yml b/benchmarks/sequence_set-new.yml new file mode 100644 index 000000000..3d30b4947 --- /dev/null +++ b/benchmarks/sequence_set-new.yml @@ -0,0 +1,97 @@ +--- +prelude: | + require "net/imap" + SeqSet = Net::IMAP::SequenceSet + + N_RAND = 100 + + def rand_nums(n, min: 1, max: (n * 1.25).to_i) = Array.new(n) { rand(1..max) } + def rand_entries(...) = SeqSet[rand_nums(...)].elements.shuffle + def rand_string(...) = SeqSet[rand_nums(...)].string.split(?,).shuffle.join(?,) + + def build_string_inputs(n, n_rand, **) + Array.new(n_rand) { rand_string(n, **) } + end + + def build_int_inputs(n, n_rand, **) + Array.new(n_rand) { rand_entries(n, **) } + end + + inputs = nil + i = 0 + + # warm up, especially for YJIT + 1000.times do + ints = rand_nums(1000) + seqset = SeqSet[ints] + string = seqset.string.split(?,).shuffle.join(?,) + SeqSet[string] + end + +benchmark: + + - name: n=10 ints + prelude: inputs = build_int_inputs 10, N_RAND + script: SeqSet[inputs[i = (i+1) % N_RAND]] + + - name: n=10 string + prelude: inputs = build_string_inputs 10, N_RAND + script: SeqSet[inputs[i = (i+1) % N_RAND]] + + - name: n=100 ints + prelude: inputs = build_int_inputs 100, N_RAND + script: SeqSet[inputs[i = (i+1) % N_RAND]] + + - name: n=100 string + prelude: inputs = build_string_inputs 100, N_RAND + script: SeqSet[inputs[i = (i+1) % N_RAND]] + + - name: n=1000 ints + prelude: inputs = build_int_inputs 1000, N_RAND + script: SeqSet[inputs[i = (i+1) % N_RAND]] + + - name: n=1000 string + prelude: inputs = build_string_inputs 1000, N_RAND + script: SeqSet[inputs[i = (i+1) % N_RAND]] + + - name: n=10,000 ints + prelude: inputs = build_int_inputs 10_000, N_RAND + script: SeqSet[inputs[i = (i+1) % N_RAND]] + + - name: n=10,000 string + prelude: inputs = build_string_inputs 10_000, N_RAND + script: SeqSet[inputs[i = (i+1) % N_RAND]] + + - name: n=100,000 ints + prelude: inputs = build_int_inputs 100_000, N_RAND / 2 + script: SeqSet[inputs[i = (i+1) % N_RAND]] + + - name: n=100,000 string + prelude: inputs = build_string_inputs 100_000, N_RAND / 2 + script: SeqSet[inputs[i = (i+1) % (N_RAND / 2)]] + +# - name: n=1,000,000 ints +# prelude: inputs = build_int_inputs 1_000_000 +# script: SeqSet[inputs[i = (i+1) % N_RAND]] + +# - name: n=10,000,000 ints +# prelude: inputs = build_int_inputs 10_000_000 +# script: SeqSet[inputs[i = (i+1) % N_RAND]] + +contexts: + - name: local + prelude: | + $LOAD_PATH.unshift "./lib" + require: false + - name: v0.5.9 + gems: + net-imap: 0.5.9 + require: false + - name: v0.5.0 + gems: + net-imap: 0.5.0 + require: false + - name: v0.4.21 + gems: + net-imap: 0.4.21 + require: false diff --git a/benchmarks/sequence_set-not.yml b/benchmarks/sequence_set-not.yml new file mode 100644 index 000000000..1230f6675 --- /dev/null +++ b/benchmarks/sequence_set-not.yml @@ -0,0 +1,85 @@ +--- +prelude: | + require "yaml" + require "net/imap" + + INPUT_COUNT = Integer ENV.fetch("PROFILE_INPUT_COUNT", 1000) + MAX_INPUT = Integer ENV.fetch("PROFILE_MAX_INPUT", 1400) + WARMUP_RUNS = Integer ENV.fetch("PROFILE_WARMUP_RUNS", 200) + + SETS = Array.new(1000) { + Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }] + } + + class Net::IMAP + class SequenceSet + def orig_not + remain_frozen dup.orig_not! + end + + # 0.5.8 implementation + def orig_not! + return replace(self.class.full) if empty? + return clear if full? + flat = @tuples.flat_map { [_1 - 1, _2 + 1] } + if flat.first < 1 then flat.shift else flat.unshift 1 end + if STAR_INT < flat.last then flat.pop else flat.push STAR_INT end + @tuples = flat.each_slice(2).to_a + normalize! + end + + # enumerator based + def enum_not + result = SequenceSet.new + each_tuple_complement do |min, max| result.tuples << [min, max] end + remain_frozen result + end + + # enumerator based + def enum_not_2 + remain_frozen dup.enum_not! + end + + def enum_not! + last = -1 + each_tuple_complement.with_index do |minmax, idx| + last = idx + @tuples[idx] = minmax + end + @tuples.delete_at(last + 1) + self + end + + private + + def each_tuple_complement + return to_enum(__method__) unless block_given? + if full? then # no yield + elsif empty? then yield 1, STAR_INT + else + yield 1, min - 1 unless min <= 1 + tuples.each_cons(2) do |(_, a), (b,_)| yield a+1, b-1 end + yield max + 1, STAR_INT unless max == STAR_INT + end + nil + end + + end + end + + # warmup (esp. for JIT) + WARMUP_RUNS.times do + ~SETS.sample + SETS.sample.orig_not + SETS.sample.enum_not + SETS.sample.dup.orig_not! + SETS.sample.dup.enum_not! + end + +benchmark: + " ~set": ~SETS.sample + "0.5.8 ~set": SETS.sample.orig_not + "enum ~set": SETS.sample.enum_not + "enum2 ~set": SETS.sample.enum_not_2 + "0.5.8 ~dup": SETS.sample.dup.orig_not! + "enum ~dup": SETS.sample.dup.enum_not! diff --git a/benchmarks/sequence_set-ops.yml b/benchmarks/sequence_set-ops.yml new file mode 100644 index 000000000..0ff6f1465 --- /dev/null +++ b/benchmarks/sequence_set-ops.yml @@ -0,0 +1,34 @@ +--- +prelude: | + require "yaml" + require "net/imap" + + INPUT_COUNT = Integer ENV.fetch("PROFILE_INPUT_COUNT", 1000) + MAX_INPUT = Integer ENV.fetch("PROFILE_MAX_INPUT", 1400) + WARMUP_RUNS = Integer ENV.fetch("PROFILE_WARMUP_RUNS", 200) + + SETS = Array.new(1000) { + Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }] + } + + def sets + l, r = SETS.sample(2) + [l.dup, r] + end + + # warmup (esp. for JIT) + 200.times do + lhs, rhs = sets + lhs | rhs + lhs & rhs + lhs - rhs + lhs ^ rhs + ~lhs + end + +benchmark: + union: l, r = sets; l | r + intersection: l, r = sets; l & r + difference: l, r = sets; l - r + xor: l, r = sets; l ^ r + complement: l, _ = sets; ~l diff --git a/benchmarks/sequence_set-predicates.yml b/benchmarks/sequence_set-predicates.yml new file mode 100644 index 000000000..3c39677c7 --- /dev/null +++ b/benchmarks/sequence_set-predicates.yml @@ -0,0 +1,32 @@ +--- +prelude: | + require "yaml" + require "net/imap" + + INPUT_COUNT = Integer ENV.fetch("PROFILE_INPUT_COUNT", 1000) + MAX_INPUT = Integer ENV.fetch("PROFILE_MAX_INPUT", 1400) + WARMUP_RUNS = Integer ENV.fetch("PROFILE_WARMUP_RUNS", 200) + + SETS = Array.new(1000) { + Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }] + } + + def sets + l, r = SETS.sample(2) + [l.dup, r] + end + + # warmup (esp. for JIT) + WARMUP_RUNS.times do + lhs, rhs = sets + lhs | rhs + lhs & rhs + lhs - rhs + lhs ^ rhs + ~lhs + end + +benchmark: + intersect?: l, r = sets; l.intersect? r + disjoint?: l, r = sets; l.disjoint? r + cover?: l, r = sets; l.cover? r diff --git a/benchmarks/sequence_set-slice.yml b/benchmarks/sequence_set-slice.yml new file mode 100644 index 000000000..ed1dffa06 --- /dev/null +++ b/benchmarks/sequence_set-slice.yml @@ -0,0 +1,71 @@ +--- +prelude: | + require "net/imap" + + def init(n:, d:) + n = n.to_int + samples = (1e5.to_i / n).ceil.clamp(1..100) + domain = 1..(n*d).floor + $sets = Array.new(samples) { + set = Net::IMAP::SequenceSet.new + n.times do set << rand(domain) end + set + } + $idxs = Array.new([10_000, 2 * n].min) { rand(0..n - 1) } + $lens = Array.new([10_000, n].min) { rand(1..n) } + $ranges = Array.new([10_000, n * n].min) { + start = idx + stop = start.negative? ? rand(start..-1) : rand(start...n) + start..stop + } + end + + def set = $sets.sample + def idx = $idxs.sample + def len = $lens.sample + def range = $ranges.sample + + # warmup + init n: 100, d: 2 + 2000.times do + set[idx] + set[range] + set[idx, len] + end + +benchmark: + + - { name: "(N= 10 ) set[idx]", prelude: "init(n: 1e1, d: 4.0)", script: "set[idx]" } + - { name: "(N=100 ) set[idx]", prelude: "init(n: 1e2, d: 4.0)", script: "set[idx]" } + - { name: "(N= 1K) set[idx]", prelude: "init(n: 1e3, d: 2.0)", script: "set[idx]" } + - { name: "(N= 10K) set[idx]", prelude: "init(n: 1e4, d: 2.0)", script: "set[idx]" } + - { name: "(N=100K) set[idx]", prelude: "init(n: 1e5, d: 2.0)", script: "set[idx]" } + - { name: "(N= 1M) set[idx]", prelude: "init(n: 1e6, d: 1.5)", script: "set[idx]" } + + - { name: "(N= 10 ) set[idx, len]", prelude: "init(n: 1e1, d: 4.0)", script: "set[idx, len]" } + - { name: "(N=100 ) set[idx, len]", prelude: "init(n: 1e2, d: 4.0)", script: "set[idx, len]" } + - { name: "(N= 1K) set[idx, len]", prelude: "init(n: 1e3, d: 2.0)", script: "set[idx, len]" } + - { name: "(N= 10K) set[idx, len]", prelude: "init(n: 1e4, d: 2.0)", script: "set[idx, len]" } + - { name: "(N=100K) set[idx, len]", prelude: "init(n: 1e5, d: 2.0)", script: "set[idx, len]" } + - { name: "(N= 1M) set[idx, len]", prelude: "init(n: 1e6, d: 1.5)", script: "set[idx, len]" } + + - { name: "(N= 10 ) set[range]", prelude: "init(n: 1e1, d: 4.0)", script: "set[range]" } + - { name: "(N=100 ) set[range]", prelude: "init(n: 1e2, d: 4.0)", script: "set[range]" } + - { name: "(N= 1K) set[range]", prelude: "init(n: 1e3, d: 2.0)", script: "set[range]" } + - { name: "(N= 10K) set[range]", prelude: "init(n: 1e4, d: 2.0)", script: "set[range]" } + - { name: "(N=100K) set[range]", prelude: "init(n: 1e5, d: 2.0)", script: "set[range]" } + - { name: "(N= 1M) set[range]", prelude: "init(n: 1e6, d: 1.5)", script: "set[range]" } + +contexts: + - name: local + prelude: | + $LOAD_PATH.unshift "./lib" + require: false + - name: v0.5.8 # fixes several bugs + gems: + net-imap: 0.5.8 + require: false + - name: v0.4.21 # backports 0.5.8 bugfixes + gems: + net-imap: 0.4.21 + require: false diff --git a/benchmarks/sequence_set-xor.yml b/benchmarks/sequence_set-xor.yml new file mode 100644 index 000000000..2f9808514 --- /dev/null +++ b/benchmarks/sequence_set-xor.yml @@ -0,0 +1,82 @@ +--- +prelude: | + require "yaml" + require "net/imap" + + INPUT_COUNT = Integer ENV.fetch("PROFILE_INPUT_COUNT", 1000) + MAX_INPUT = Integer ENV.fetch("PROFILE_MAX_INPUT", 1400) + WARMUP_RUNS = Integer ENV.fetch("PROFILE_WARMUP_RUNS", 200) + + SETS = Array.new(1000) { + Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }] + } + + def sets + l, r = SETS.sample(2) + [l.dup, r] + end + + class Net::IMAP + class SequenceSet + def xor0(other) remain_frozen (dup | other) - (self & other) end + def xor1(other) remain_frozen dup.xor1! other end + def xor2(other) remain_frozen dup.xor2! other end + def xor3(other) remain_frozen dup.xor3! other end + + # (L | R) - (L & R) + def xor1!(other) + modifying! + copy = dup + other = SequenceSet.new(other) + merge(other).subtract(other.subtract(copy.complement!)) + end + + # TODO: add this as a public method + def intersect!(other) # :nodoc: + modifying! + subtract SequenceSet.new(other).complement! + end + + # (L | R) - (L & R) + def xor2!(other) + modifying! + copy = dup + other = SequenceSet.new(other) + merge(other).subtract(copy.intersect!(other)) + end + + # (L - R) | (R - L) + def xor3!(other) + modifying! + copy = dup + other = SequenceSet.new(other) + subtract(other).merge(other.subtract(copy)) + end + end + end + + # warmup (esp. for JIT) + WARMUP_RUNS.times do + lhs, rhs = sets + lhs | rhs + lhs & rhs + lhs - rhs + lhs ^ rhs + ~lhs + lhs.xor0 rhs + lhs.xor1 rhs + lhs.xor2 rhs + lhs.xor3 rhs + end + +benchmark: + " L ^ R": l, r = sets; l ^ r + " (L | R) - (R & L)": l, r = sets; (l | r) - (r & l) + "0.5.8 (L | R) - (R & L)": l, r = sets; l.xor0 r + "dup1 (L | R) - (R & L)": l, r = sets; l.xor1 r + "mut1 (L | R) - (R & L)": l, r = sets; l.xor1! r + "dup2 (L | R) - (R & L)": l, r = sets; l.xor2 r + "mut2 (L | R) - (R & L)": l, r = sets; l.xor2! r + " (L - R) | (R - L)": l, r = sets; (l - r) | (r - l) + "dup3 (L - R) | (R - L)": l, r = sets; l.xor3 r + "mut3 (L - R) | (R - L)": l, r = sets; l.xor3! r