Skip to content

Commit 6a3a870

Browse files
committed
[Wasm] Fix #217: Handle duplicated build keys.
1 parent 517219f commit 6a3a870

File tree

1 file changed

+22
-10
lines changed

1 file changed

+22
-10
lines changed

src/backend/WasmOperator.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,17 @@ decompose_equi_predicate(const cnf::CNF &cnf, const Schema &schema_left)
893893
return { std::move(ids_left), std::move(ids_right) };
894894
}
895895

896+
/** Returns a deduplicated version of \p v, i.e. duplicate identifiers are only contained once. */
897+
std::vector<Schema::Identifier> deduplicate(const std::vector<Schema::Identifier> &v)
898+
{
899+
std::vector<Schema::Identifier> res;
900+
for (auto &id : v) {
901+
if (not contains(res, id))
902+
res.push_back(id);
903+
}
904+
return res;
905+
}
906+
896907

897908
/*======================================================================================================================
898909
* NoOp
@@ -3922,8 +3933,8 @@ void SimpleHashJoin<UniqueBuild, Predicated>::execute(const Match<SimpleHashJoin
39223933
const auto ht_schema = M.build.schema().deduplicate();
39233934

39243935
/*----- Decompose each clause of the join predicate of the form `A.x = B.y` into parts `A.x` and `B.y`. -----*/
3925-
auto p = decompose_equi_predicate(M.join.predicate(), ht_schema);
3926-
const std::vector<Schema::Identifier> &build_keys = p.first, &probe_keys = p.second;
3936+
const auto decomposed_ids = decompose_equi_predicate(M.join.predicate(), ht_schema);
3937+
const auto build_keys = deduplicate(decomposed_ids.first), probe_keys = deduplicate(decomposed_ids.second);
39273938

39283939
/*----- Compute payload IDs and its total size in bits (ignoring padding). -----*/
39293940
std::vector<Schema::Identifier> payload_ids;
@@ -4059,10 +4070,11 @@ void SimpleHashJoin<UniqueBuild, Predicated>::execute(const Match<SimpleHashJoin
40594070
key.emplace_back(env.get(probe_key));
40604071
if constexpr (UniqueBuild) {
40614072
/*----- Add build key to current environment since `ht->find()` will only return the payload values. -----*/
4062-
for (auto build_it = build_keys.cbegin(), probe_it = probe_keys.cbegin(); build_it != build_keys.cend();
4063-
++build_it, ++probe_it)
4073+
for (auto build_it = decomposed_ids.first.cbegin(), probe_it = decomposed_ids.second.cbegin();
4074+
build_it != decomposed_ids.first.cend();
4075+
++build_it, ++probe_it) // use build and probe keys *with* duplicates to ensure correct indices
40644076
{
4065-
M_insist(probe_it != probe_keys.cend());
4077+
M_insist(probe_it != decomposed_ids.second.cend());
40664078
env.add(*build_it, env.get(*probe_it)); // since build and probe keys match for join partners
40674079
}
40684080

@@ -4444,11 +4456,12 @@ ConditionSet HashBasedGroupJoin::pre_condition(
44444456
if (child_idx == 0) {
44454457
/*----- Decompose each clause of the join predicate of the form `A.x = B.y` into parts `A.x` and `B.y`. -----*/
44464458
auto &build = *std::get<2>(partial_inner_nodes);
4447-
auto build_keys = decompose_equi_predicate(join.predicate(), build.schema()).first;
4459+
const auto decomposed_ids = decompose_equi_predicate(join.predicate(), build.schema());
4460+
const auto build_keys = deduplicate(decomposed_ids.first);
44484461

44494462
/*----- Hash-based group-join can only be used if grouping and join (i.e. build) key match (ignoring order). -*/
44504463
const auto num_grouping_keys = grouping.group_by().size();
4451-
if (num_grouping_keys != build_keys.size()) { // XXX: duplicated IDs are still a match but rejected here
4464+
if (num_grouping_keys != build_keys.size()) {
44524465
pre_cond.add_condition(Unsatisfiable());
44534466
return pre_cond;
44544467
}
@@ -4526,9 +4539,8 @@ void HashBasedGroupJoin::execute(const Match<HashBasedGroupJoin> &M, setup_t set
45264539
aggregates_size_in_bits += 64;
45274540

45284541
/*----- Decompose each clause of the join predicate of the form `A.x = B.y` into parts `A.x` and `B.y`. -----*/
4529-
auto decomposed_ids = decompose_equi_predicate(M.join.predicate(), M.build.schema());
4530-
const auto &build_keys = decomposed_ids.first;
4531-
const auto &probe_keys = decomposed_ids.second;
4542+
const auto decomposed_ids = decompose_equi_predicate(M.join.predicate(), M.build.schema());
4543+
const auto build_keys = deduplicate(decomposed_ids.first), probe_keys = deduplicate(decomposed_ids.second);
45324544
M_insist(build_keys.size() == num_keys);
45334545

45344546
/*----- Compute initial capacity of hash table. -----*/

0 commit comments

Comments
 (0)