Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmark/REQUIRE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DataTables
22 changes: 22 additions & 0 deletions benchmark/benchmarks.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
using PkgBenchmark
using Query
using DataTables

@benchgroup "Query" begin
N = 100_000_000;
A = rand(N);
B = rand(1:100, N);
dt = DataTable([A, B], [:A, :B]);

@bench "group" @from i in $dt begin
@group i.A by i.B into g
@select {m = mean(g)}
@collect DataTable
end

@bench "group2" @from i in $dt begin
@group i.A by i.B into g
@select {m = mean(g)}
@collect DataTable
end
end
32 changes: 32 additions & 0 deletions benchmark/perf.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
using DataTables, Query

N = 100_000_000;
A = rand(N);
B = rand(1:100, N);
dt = DataTable([A, B], [:A, :B]);
dt = DataTable(A = NullableArray(A), B = NullableArray(B));

@time by(dt, :B, d -> mean(d[:A]));

@time x = @from i in dt begin
@group i.A by i.B into g
@select {m = mean(g)}
@collect DataTable
end;

function foo1(dt)
by(dt, :B, d -> mean(d[:A]))
end

function foo2(dt)
x = @from i in dt begin
@group i.A by i.B into g
@select {m = mean(g)}
@collect DataTable
end
end

@time foo1(dt);
@time foo2(dt);

@profile foo2(dt);
39 changes: 26 additions & 13 deletions src/enumerable/enumerable_groupby.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ immutable Grouping{TKey,T} <: AbstractArray{T,1}
end

import Base.size
size{TKey,T}(A::Grouping{TKey,T}) = size(A.elements)
size{TKey,T}(A::Grouping{TKey,T}) = (length(A.elements),)
Base.IndexStyle(::Type{<:Grouping}) = IndexLinear()
import Base.getindex
getindex{TKey,T}(A::Grouping{TKey,T},i) = A.elements[i]
import Base.length
Expand Down Expand Up @@ -63,6 +64,10 @@ immutable EnumerableGroupBy{T,TKey,TR,SO,ES<:Function,RS<:Function} <: Enumerabl
resultSelector::RS
end

IterableTables.iteratorsize2(::Type{<:EnumerableGroupBy}) = IterableTables.HasLengthAfterStart()

Base.length(iter::EnumerableGroupBy, state) = length(state[1])

Base.eltype{T,TKey,TR,SO,ES}(iter::EnumerableGroupBy{T,TKey,TR,SO,ES}) = T

Base.eltype{T,TKey,TR,SO,ES}(iter::Type{EnumerableGroupBy{T,TKey,TR,SO,ES}}) = T
Expand All @@ -85,25 +90,33 @@ end

# TODO This should be rewritten as a lazy iterator
function start{T,TKey,TR,SO,ES}(iter::EnumerableGroupBy{T,TKey,TR,SO,ES})
result = OrderedDict{TKey,T}()
result = OrderedDict{TKey,Grouping{TKey,TR}}()
for i in iter.source
key = iter.elementSelector(i)
if !haskey(result, key)
result[key] = Grouping(key,Array{TR}(0))
end
push!(result[key].elements,iter.resultSelector(i))
let key=key
g = get!(result, key) do
return Grouping{TKey, TR}(key,Array{TR,1}(0))
end
push!(g.elements,iter.resultSelector(i))
end
end
return collect(values(result)),1
dict_iterator = values(result)
return dict_iterator,start(dict_iterator)
end

function next{T,TKey,TR,SO,ES}(iter::EnumerableGroupBy{T,TKey,TR,SO,ES}, state)
results = state[1]
curr_index = state[2]
return results[curr_index], (results, curr_index+1)
dict_iterator = state[1]
dict_iterator_state = state[2]

x = next(dict_iterator, dict_iterator_state)
v = x[1]
dict_iterator_state_new = x[2]

return v, (dict_iterator, dict_iterator_state_new)
end

function done{T,TKey,TR,SO,ES}(iter::EnumerableGroupBy{T,TKey,TR,SO,ES}, state)
results = state[1]
curr_index = state[2]
return curr_index > length(results)
dict_iterator = state[1]
dict_iterator_state = state[2]
return done(dict_iterator, dict_iterator_state)
end
4 changes: 4 additions & 0 deletions src/enumerable/enumerable_select.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ immutable EnumerableSelect{T, S, Q<:Function} <: Enumerable
f::Q
end

IterableTables.iteratorsize2{T,S,Q}(::Type{EnumerableSelect{T,S,Q}}) = IterableTables.iteratorsize2(S)

Base.iteratorsize{T,S,Q}(::Type{EnumerableSelect{T,S,Q}}) = Base.iteratorsize(S)

Base.eltype{T,S,Q}(iter::EnumerableSelect{T,S,Q}) = T
Expand All @@ -11,6 +13,8 @@ Base.eltype{T,S,Q}(iter::Type{EnumerableSelect{T,S,Q}}) = T

Base.length{T,S,Q}(iter::EnumerableSelect{T,S,Q}) = length(iter.source)

Base.length{T,S,Q}(iter::EnumerableSelect{T,S,Q}, state) = length(iter.source, state)

function select(source::Enumerable, f::Function, f_expr::Expr)
TS = eltype(source)
T = Base.return_types(f, (TS,))[1]
Expand Down
5 changes: 2 additions & 3 deletions src/sources/source_iterable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ function start{T,S}(iter::EnumerableIterable{T,S})
return start(iter.source)
end

function next{T,S}(iter::EnumerableIterable{T,S}, state)
source_value, source_next_state = next(iter.source, state)
return source_value, source_next_state
@inline function next{T,S}(iter::EnumerableIterable{T,S}, state)
return next(iter.source, state)
end

function done{T,S}(iter::EnumerableIterable{T,S}, state)
Expand Down