Skip to content

Commit 7ecf8db

Browse files
committed
separate different streaming join
1 parent e2d9f19 commit 7ecf8db

39 files changed

+1537
-521
lines changed

src/Common/HashMapsTemplate.h

Lines changed: 132 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,123 @@ namespace DB
1212
class WriteBuffer;
1313
class ReadBuffer;
1414

15+
/// Different types of keys for maps.
16+
#define APPLY_FOR_HASH_KEY_VARIANTS(M) \
17+
M(key8) \
18+
M(key16) \
19+
M(key32) \
20+
M(key64) \
21+
M(key_string) \
22+
M(key_fixed_string) \
23+
M(keys128) \
24+
M(keys256) \
25+
M(hashed)
26+
27+
enum class HashType
28+
{
29+
#define M(NAME) NAME,
30+
APPLY_FOR_HASH_KEY_VARIANTS(M)
31+
#undef M
32+
};
33+
34+
template <typename Mapped>
35+
using FindResultImpl = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped, true>;
36+
37+
/// Dummy key getter, always find nothing, used for JOIN ON NULL
38+
template <typename Mapped>
39+
class KeyGetterEmpty
40+
{
41+
public:
42+
struct MappedType
43+
{
44+
using mapped_type = Mapped;
45+
};
46+
47+
using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped>;
48+
49+
KeyGetterEmpty() = default;
50+
51+
FindResult findKey(MappedType, size_t, const Arena &) { return FindResult(); }
52+
};
53+
54+
template <HashType type, typename Value, typename Mapped>
55+
struct KeyGetterForTypeImpl;
56+
57+
template <typename Value, typename Mapped>
58+
struct KeyGetterForTypeImpl<HashType::key8, Value, Mapped>
59+
{
60+
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt8, false, true>;
61+
};
62+
63+
template <typename Value, typename Mapped>
64+
struct KeyGetterForTypeImpl<HashType::key16, Value, Mapped>
65+
{
66+
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt16, false, true>;
67+
};
68+
69+
template <typename Value, typename Mapped>
70+
struct KeyGetterForTypeImpl<HashType::key32, Value, Mapped>
71+
{
72+
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt32, false, true>;
73+
};
74+
75+
template <typename Value, typename Mapped>
76+
struct KeyGetterForTypeImpl<HashType::key64, Value, Mapped>
77+
{
78+
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt64, false, true>;
79+
};
80+
81+
template <typename Value, typename Mapped>
82+
struct KeyGetterForTypeImpl<HashType::key_string, Value, Mapped>
83+
{
84+
using Type = ColumnsHashing::HashMethodString<Value, Mapped, true, false, true>;
85+
};
86+
87+
template <typename Value, typename Mapped>
88+
struct KeyGetterForTypeImpl<HashType::key_fixed_string, Value, Mapped>
89+
{
90+
using Type = ColumnsHashing::HashMethodFixedString<Value, Mapped, true, false, true>;
91+
};
92+
93+
template <typename Value, typename Mapped>
94+
struct KeyGetterForTypeImpl<HashType::keys128, Value, Mapped>
95+
{
96+
using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt128, Mapped, false, false, false, true>;
97+
};
98+
99+
template <typename Value, typename Mapped>
100+
struct KeyGetterForTypeImpl<HashType::keys256, Value, Mapped>
101+
{
102+
using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt256, Mapped, false, false, false, true>;
103+
};
104+
105+
template <typename Value, typename Mapped>
106+
struct KeyGetterForTypeImpl<HashType::hashed, Value, Mapped>
107+
{
108+
using Type = ColumnsHashing::HashMethodHashed<Value, Mapped, false, true>;
109+
};
110+
111+
template <HashType type, typename Data>
112+
struct KeyGetterForType
113+
{
114+
using Value = typename Data::value_type;
115+
using Mapped_t = typename Data::mapped_type;
116+
using Mapped = std::conditional_t<std::is_const_v<Data>, const Mapped_t, Mapped_t>;
117+
using Type = typename KeyGetterForTypeImpl<type, Value, Mapped>::Type;
118+
};
119+
120+
template <typename KeyGetter, typename Map, typename MappedHandler>
121+
requires (std::is_invocable_v<MappedHandler, typename Map::mapped_type /*mapped*/, bool /*inserted*/, size_t /*row*/>)
122+
void insertIntoHashMap(
123+
Map & map, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, size_t rows, Arena & pool, MappedHandler && mapped_handler)
124+
{
125+
KeyGetter key_getter(key_columns, key_sizes, nullptr);
126+
for (size_t i = 0; i < rows; ++i)
127+
{
128+
auto emplace_result = key_getter.emplaceKey(map, i, pool);
129+
mapped_handler(emplace_result.getMapped(), emplace_result.isInserted(), i);
130+
}
131+
}
15132

16133
template <typename Map, typename MappedSerializer>
17134
void serializeHashMap(const Map & map, MappedSerializer && mapped_serializer, WriteBuffer & wb)
@@ -83,25 +200,6 @@ void deserializeTwoLevelHashMap(Map & map, MappedDeserializer && mapped_deserial
83200
/// HashMapsTemplate is a taken from HashJoin class and make it standalone
84201
/// and could be shared among different components
85202

86-
/// Different types of keys for maps.
87-
#define APPLY_FOR_HASH_KEY_VARIANTS(M) \
88-
M(key8) \
89-
M(key16) \
90-
M(key32) \
91-
M(key64) \
92-
M(key_string) \
93-
M(key_fixed_string) \
94-
M(keys128) \
95-
M(keys256) \
96-
M(hashed)
97-
98-
enum class HashType
99-
{
100-
#define M(NAME) NAME,
101-
APPLY_FOR_HASH_KEY_VARIANTS(M)
102-
#undef M
103-
};
104-
105203
template <size_t initial_size_degree = 8>
106204
struct ConservativeHashTableGrowerWithPrecalculation : public HashTableGrowerWithPrecalculation<initial_size_degree>
107205
{
@@ -139,6 +237,21 @@ struct HashMapsTemplate
139237
type = which;
140238
}
141239

240+
template <typename MappedHandler>
241+
void insert(const ColumnRawPtrs & key_columns, const Sizes & key_sizes, size_t rows, Arena & pool, MappedHandler && mapped_handler)
242+
{
243+
switch (type)
244+
{
245+
#define M(NAME) \
246+
case HashType::NAME: \
247+
using KeyGetter = typename KeyGetterForType<HashType::NAME, std::remove_reference_t<decltype(*NAME)>>::Type; \
248+
insertIntoHashMap<KeyGetter>(*NAME, key_columns, key_sizes, rows, pool, std::move(mapped_handler)); \
249+
break;
250+
APPLY_FOR_HASH_KEY_VARIANTS(M)
251+
#undef M
252+
}
253+
}
254+
142255
size_t getTotalRowCount() const
143256
{
144257
switch (type)
@@ -219,89 +332,4 @@ struct HashMapsTemplate
219332
HashType type;
220333
};
221334

222-
template <typename Mapped>
223-
using FindResultImpl = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped, true>;
224-
225-
/// Dummy key getter, always find nothing, used for JOIN ON NULL
226-
template <typename Mapped>
227-
class KeyGetterEmpty
228-
{
229-
public:
230-
struct MappedType
231-
{
232-
using mapped_type = Mapped;
233-
};
234-
235-
using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped>;
236-
237-
KeyGetterEmpty() = default;
238-
239-
FindResult findKey(MappedType, size_t, const Arena &) { return FindResult(); }
240-
};
241-
242-
template <HashType type, typename Value, typename Mapped>
243-
struct KeyGetterForTypeImpl;
244-
245-
template <typename Value, typename Mapped>
246-
struct KeyGetterForTypeImpl<HashType::key8, Value, Mapped>
247-
{
248-
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt8, false, true>;
249-
};
250-
251-
template <typename Value, typename Mapped>
252-
struct KeyGetterForTypeImpl<HashType::key16, Value, Mapped>
253-
{
254-
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt16, false, true>;
255-
};
256-
257-
template <typename Value, typename Mapped>
258-
struct KeyGetterForTypeImpl<HashType::key32, Value, Mapped>
259-
{
260-
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt32, false, true>;
261-
};
262-
263-
template <typename Value, typename Mapped>
264-
struct KeyGetterForTypeImpl<HashType::key64, Value, Mapped>
265-
{
266-
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt64, false, true>;
267-
};
268-
269-
template <typename Value, typename Mapped>
270-
struct KeyGetterForTypeImpl<HashType::key_string, Value, Mapped>
271-
{
272-
using Type = ColumnsHashing::HashMethodString<Value, Mapped, true, false, true>;
273-
};
274-
275-
template <typename Value, typename Mapped>
276-
struct KeyGetterForTypeImpl<HashType::key_fixed_string, Value, Mapped>
277-
{
278-
using Type = ColumnsHashing::HashMethodFixedString<Value, Mapped, true, false, true>;
279-
};
280-
281-
template <typename Value, typename Mapped>
282-
struct KeyGetterForTypeImpl<HashType::keys128, Value, Mapped>
283-
{
284-
using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt128, Mapped, false, false, false, true>;
285-
};
286-
287-
template <typename Value, typename Mapped>
288-
struct KeyGetterForTypeImpl<HashType::keys256, Value, Mapped>
289-
{
290-
using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt256, Mapped, false, false, false, true>;
291-
};
292-
293-
template <typename Value, typename Mapped>
294-
struct KeyGetterForTypeImpl<HashType::hashed, Value, Mapped>
295-
{
296-
using Type = ColumnsHashing::HashMethodHashed<Value, Mapped, false, true>;
297-
};
298-
299-
template <HashType type, typename Data>
300-
struct KeyGetterForType
301-
{
302-
using Value = typename Data::value_type;
303-
using Mapped_t = typename Data::mapped_type;
304-
using Mapped = std::conditional_t<std::is_const_v<Data>, const Mapped_t, Mapped_t>;
305-
using Type = typename KeyGetterForTypeImpl<type, Value, Mapped>::Type;
306-
};
307335
}

src/Core/BlockWithShard.h

Lines changed: 0 additions & 17 deletions
This file was deleted.

src/Core/DataBlockWithShard.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#pragma once
2+
3+
#include <Core/Block.h>
4+
#include <Core/LightChunk.h>
5+
6+
namespace DB
7+
{
8+
template <typename DataBlock>
9+
struct DataBlockWithShard
10+
{
11+
DataBlock block;
12+
int32_t shard;
13+
14+
DataBlockWithShard(DataBlock && block_, int32_t shard_) : block(std::move(block_)), shard(shard_) { }
15+
};
16+
17+
using BlockWithShard = DataBlockWithShard<Block>;
18+
using BlocksWithShard = std::vector<BlockWithShard>;
19+
20+
using LightChunkWithShard = DataBlockWithShard<LightChunk>;
21+
using LightChunksWithShard = std::vector<LightChunkWithShard>;
22+
}
23+

src/Core/LightChunk.h

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ struct LightChunk
2727
void concat(const LightChunk & other)
2828
{
2929
auto added_rows = other.rows();
30+
if (added_rows <= 0)
31+
return;
32+
3033
assert(columns() == other.columns());
3134
for (size_t c = 0; auto & col : data)
3235
{
@@ -35,9 +38,21 @@ struct LightChunk
3538
}
3639
}
3740

41+
LightChunk cloneEmpty() const
42+
{
43+
LightChunk res;
44+
res.data.reserve(data.size());
45+
46+
for (const auto & elem : data)
47+
res.data.emplace_back(elem->cloneEmpty());
48+
49+
return res;
50+
}
51+
3852
size_t rows() const noexcept { return data.empty() ? 0 : data[0]->size(); }
3953
size_t columns() const noexcept { return data.size(); }
4054

55+
Columns & getColumns() noexcept { return data; }
4156
const Columns & getColumns() const noexcept { return data; }
4257
Columns detachColumns() noexcept { return std::move(data); }
4358

@@ -88,7 +103,9 @@ struct LightChunkWithTimestamp
88103
LightChunkWithTimestamp() = default;
89104
LightChunkWithTimestamp(Columns && data_) : chunk(std::move(data_)) { }
90105
LightChunkWithTimestamp(Chunk && chunk_, Int64 min_ts, Int64 max_ts)
91-
: chunk(std::move(chunk_)), min_timestamp(min_ts), max_timestamp(max_ts) { }
106+
: chunk(std::move(chunk_)), min_timestamp(min_ts), max_timestamp(max_ts)
107+
{
108+
}
92109
LightChunkWithTimestamp(const Block & block)
93110
: chunk(block), min_timestamp(block.minTimestamp()), max_timestamp(block.maxTimestamp()) { }
94111

@@ -122,4 +139,4 @@ struct LightChunkWithTimestamp
122139
Int64 maxTimestamp() const noexcept { return max_timestamp; }
123140
};
124141

125-
}
142+
}

src/Interpreters/ExpressionAnalyzer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2394,7 +2394,7 @@ std::shared_ptr<IJoin> SelectQueryExpressionAnalyzer::chooseJoinAlgorithmStreami
23942394
return std::make_shared<Streaming::ConcurrentHashJoin>(
23952395
analyzed_join, max_threads, std::move(left_join_stream_desc), std::move(right_join_stream_desc));
23962396
else
2397-
return std::make_shared<Streaming::HashJoin>(analyzed_join, std::move(left_join_stream_desc), std::move(right_join_stream_desc));
2397+
return Streaming::HashJoin::create(analyzed_join, std::move(left_join_stream_desc), std::move(right_join_stream_desc));
23982398
}
23992399
/// proton : ends
24002400

0 commit comments

Comments
 (0)