@@ -893,6 +893,17 @@ decompose_equi_predicate(const cnf::CNF &cnf, const Schema &schema_left)
893
893
return { std::move (ids_left), std::move (ids_right) };
894
894
}
895
895
896
+ /* * Returns a deduplicated version of \p v, i.e. duplicate identifiers are only contained once. */
897
+ std::vector<Schema::Identifier> deduplicate (const std::vector<Schema::Identifier> &v)
898
+ {
899
+ std::vector<Schema::Identifier> res;
900
+ for (auto &id : v) {
901
+ if (not contains (res, id))
902
+ res.push_back (id);
903
+ }
904
+ return res;
905
+ }
906
+
896
907
897
908
/* ======================================================================================================================
898
909
* NoOp
@@ -3922,8 +3933,8 @@ void SimpleHashJoin<UniqueBuild, Predicated>::execute(const Match<SimpleHashJoin
3922
3933
const auto ht_schema = M.build .schema ().deduplicate ();
3923
3934
3924
3935
/* ----- Decompose each clause of the join predicate of the form `A.x = B.y` into parts `A.x` and `B.y`. -----*/
3925
- auto p = decompose_equi_predicate (M.join .predicate (), ht_schema);
3926
- const std::vector<Schema::Identifier> & build_keys = p .first , & probe_keys = p .second ;
3936
+ const auto decomposed_ids = decompose_equi_predicate (M.join .predicate (), ht_schema);
3937
+ const auto build_keys = deduplicate (decomposed_ids .first ), probe_keys = deduplicate (decomposed_ids .second ) ;
3927
3938
3928
3939
/* ----- Compute payload IDs and its total size in bits (ignoring padding). -----*/
3929
3940
std::vector<Schema::Identifier> payload_ids;
@@ -4059,10 +4070,11 @@ void SimpleHashJoin<UniqueBuild, Predicated>::execute(const Match<SimpleHashJoin
4059
4070
key.emplace_back (env.get (probe_key));
4060
4071
if constexpr (UniqueBuild) {
4061
4072
/* ----- Add build key to current environment since `ht->find()` will only return the payload values. -----*/
4062
- for (auto build_it = build_keys.cbegin (), probe_it = probe_keys.cbegin (); build_it != build_keys.cend ();
4063
- ++build_it, ++probe_it)
4073
+ for (auto build_it = decomposed_ids.first .cbegin (), probe_it = decomposed_ids.second .cbegin ();
4074
+ build_it != decomposed_ids.first .cend ();
4075
+ ++build_it, ++probe_it) // use build and probe keys *with* duplicates to ensure correct indices
4064
4076
{
4065
- M_insist (probe_it != probe_keys .cend ());
4077
+ M_insist (probe_it != decomposed_ids. second .cend ());
4066
4078
env.add (*build_it, env.get (*probe_it)); // since build and probe keys match for join partners
4067
4079
}
4068
4080
@@ -4444,11 +4456,12 @@ ConditionSet HashBasedGroupJoin::pre_condition(
4444
4456
if (child_idx == 0 ) {
4445
4457
/* ----- Decompose each clause of the join predicate of the form `A.x = B.y` into parts `A.x` and `B.y`. -----*/
4446
4458
auto &build = *std::get<2 >(partial_inner_nodes);
4447
- auto build_keys = decompose_equi_predicate (join.predicate (), build.schema ()).first ;
4459
+ const auto decomposed_ids = decompose_equi_predicate (join.predicate (), build.schema ());
4460
+ const auto build_keys = deduplicate (decomposed_ids.first );
4448
4461
4449
4462
/* ----- Hash-based group-join can only be used if grouping and join (i.e. build) key match (ignoring order). -*/
4450
4463
const auto num_grouping_keys = grouping.group_by ().size ();
4451
- if (num_grouping_keys != build_keys.size ()) { // XXX: duplicated IDs are still a match but rejected here
4464
+ if (num_grouping_keys != build_keys.size ()) {
4452
4465
pre_cond.add_condition (Unsatisfiable ());
4453
4466
return pre_cond;
4454
4467
}
@@ -4526,9 +4539,8 @@ void HashBasedGroupJoin::execute(const Match<HashBasedGroupJoin> &M, setup_t set
4526
4539
aggregates_size_in_bits += 64 ;
4527
4540
4528
4541
/* ----- Decompose each clause of the join predicate of the form `A.x = B.y` into parts `A.x` and `B.y`. -----*/
4529
- auto decomposed_ids = decompose_equi_predicate (M.join .predicate (), M.build .schema ());
4530
- const auto &build_keys = decomposed_ids.first ;
4531
- const auto &probe_keys = decomposed_ids.second ;
4542
+ const auto decomposed_ids = decompose_equi_predicate (M.join .predicate (), M.build .schema ());
4543
+ const auto build_keys = deduplicate (decomposed_ids.first ), probe_keys = deduplicate (decomposed_ids.second );
4532
4544
M_insist (build_keys.size () == num_keys);
4533
4545
4534
4546
/* ----- Compute initial capacity of hash table. -----*/
0 commit comments