Skip to content

Commit 330b018

Browse files
authored
HIVE-29176: Wrong result when HiveAntiJoin is replacing an IS NULL filter on a nullable column (#6062)
1 parent 8d7c9c4 commit 330b018

File tree

8 files changed

+871
-116
lines changed

8 files changed

+871
-116
lines changed

ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java

Lines changed: 9 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,43 +1233,17 @@ public FixNullabilityShuttle(RexBuilder rexBuilder,
12331233
}
12341234

12351235
/**
1236-
* Checks if any of the expression given as list expressions are from right side of the join.
1237-
* This is used during anti join conversion.
1238-
*
1239-
* @param joinRel Join node whose right side has to be searched.
1240-
* @param expressions The list of expression to search.
1241-
* @return true if any of the expressions is from right side of join.
1236+
* Given a join, creates a bitset of the joined columns originating from the right-hand side.
1237+
* @param joinRel a join that concatenates all columns from its inputs (so no semi-join)
1238+
* @return a bitset
12421239
*/
1243-
public static boolean hasAnyExpressionFromRightSide(RelNode joinRel, List<RexNode> expressions) {
1244-
List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
1245-
int nTotalFields = joinFields.size();
1246-
List<RelDataTypeField> leftFields = (joinRel.getInputs().get(0)).getRowType().getFieldList();
1247-
int nFieldsLeft = leftFields.size();
1248-
ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, nTotalFields);
1249-
1250-
for (RexNode node : expressions) {
1251-
ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node);
1252-
if (rightBitmap.contains(inputBits)) {
1253-
return true;
1254-
}
1255-
}
1256-
return false;
1257-
}
1258-
1259-
public static boolean hasAllExpressionsFromRightSide(RelNode joinRel, List<RexNode> expressions) {
1260-
List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
1261-
int nTotalFields = joinFields.size();
1262-
List<RelDataTypeField> leftFields = (joinRel.getInputs().get(0)).getRowType().getFieldList();
1263-
int nFieldsLeft = leftFields.size();
1264-
ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, nTotalFields);
1265-
1266-
for (RexNode node : expressions) {
1267-
ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node);
1268-
if (!rightBitmap.contains(inputBits)) {
1269-
return false;
1270-
}
1240+
public static ImmutableBitSet getRightSideBitset(RelNode joinRel) {
1241+
if(joinRel.getInputs().size() != 2) {
1242+
throw new IllegalArgumentException("The relation must have exactly two children:\n" + RelOptUtil.toString(joinRel));
12711243
}
1272-
return true;
1244+
int nTotalFields = joinRel.getRowType().getFieldCount();
1245+
int nFieldsLeft = (joinRel.getInputs().get(0)).getRowType().getFieldCount();
1246+
return ImmutableBitSet.range(nFieldsLeft, nTotalFields);
12731247
}
12741248

12751249
/**

ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java

Lines changed: 135 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,40 @@
1717
*/
1818
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
1919

20+
import com.google.common.collect.ImmutableList;
21+
import org.apache.calcite.plan.RelOptCluster;
22+
import org.apache.calcite.plan.RelOptPredicateList;
2023
import org.apache.calcite.plan.RelOptRule;
2124
import org.apache.calcite.plan.RelOptRuleCall;
2225
import org.apache.calcite.plan.RelOptUtil;
26+
import org.apache.calcite.plan.RexImplicationChecker;
2327
import org.apache.calcite.plan.Strong;
2428
import org.apache.calcite.rel.RelNode;
2529
import org.apache.calcite.rel.core.Filter;
2630
import org.apache.calcite.rel.core.Join;
2731
import org.apache.calcite.rel.core.JoinRelType;
2832
import org.apache.calcite.rel.core.Project;
33+
import org.apache.calcite.rel.metadata.RelMetadataQuery;
34+
import org.apache.calcite.rel.type.RelDataTypeField;
35+
import org.apache.calcite.rex.RexBuilder;
2936
import org.apache.calcite.rex.RexCall;
37+
import org.apache.calcite.rex.RexExecutor;
38+
import org.apache.calcite.rex.RexExecutorImpl;
3039
import org.apache.calcite.rex.RexNode;
40+
import org.apache.calcite.rex.RexUtil;
3141
import org.apache.calcite.rex.RexVisitorImpl;
3242
import org.apache.calcite.sql.SqlKind;
3343
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
44+
import org.apache.calcite.util.ImmutableBitSet;
45+
import org.apache.calcite.util.Util;
3446
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
3547
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin;
3648
import org.slf4j.Logger;
3749
import org.slf4j.LoggerFactory;
3850

3951
import java.util.ArrayList;
40-
import java.util.Collections;
4152
import java.util.List;
42-
import java.util.concurrent.atomic.AtomicBoolean;
53+
import java.util.Optional;
4354

4455
/**
4556
* Planner rule that converts a join plus filter to anti join.
@@ -86,14 +97,17 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join
8697

8798
assert (filter != null);
8899

89-
List<RexNode> filterList = getResidualFilterNodes(filter, join);
90-
if (filterList == null) {
100+
ImmutableBitSet rhsFields = HiveCalciteUtil.getRightSideBitset(join);
101+
Optional<List<RexNode>> optFilterList = getResidualFilterNodes(filter, join, rhsFields);
102+
if (optFilterList.isEmpty()) {
91103
return;
92104
}
105+
List<RexNode> filterList = optFilterList.get();
93106

94107
// If any projection is there from right side, then we can not convert to anti join.
95-
boolean hasProjection = HiveCalciteUtil.hasAnyExpressionFromRightSide(join, project.getProjects());
96-
if (hasProjection) {
108+
ImmutableBitSet projectedFields = RelOptUtil.InputFinder.bits(project.getProjects(), null);
109+
boolean projectionUsesRHS = projectedFields.intersects(rhsFields);
110+
if (projectionUsesRHS) {
97111
return;
98112
}
99113

@@ -124,13 +138,14 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join
124138
/**
125139
* Extracts the non-null filter conditions from given filter node.
126140
*
127-
* @param filter The filter condition to be checked.
128-
* @param join Join node whose right side has to be searched.
141+
* @param filter The filter condition to be checked.
142+
* @param join Join node whose right side has to be searched.
143+
* @param rhsFields
129144
* @return null : Anti join condition is not matched for filter.
130-
* Empty list : No residual filter conditions present.
131-
* Valid list containing the filter to be applied after join.
145+
* Empty list : No residual filter conditions present.
146+
* Valid list containing the filter to be applied after join.
132147
*/
133-
private List<RexNode> getResidualFilterNodes(Filter filter, Join join) {
148+
private Optional<List<RexNode>> getResidualFilterNodes(Filter filter, Join join, ImmutableBitSet rhsFields) {
134149
// 1. If null filter is not present from right side then we can not convert to anti join.
135150
// 2. If any non-null filter is present from right side, we can not convert it to anti join.
136151
// 3. Keep other filters which needs to be executed after join.
@@ -140,43 +155,123 @@ private List<RexNode> getResidualFilterNodes(Filter filter, Join join) {
140155
List<RexNode> aboveFilters = RelOptUtil.conjunctions(filter.getCondition());
141156
boolean hasNullFilterOnRightSide = false;
142157
List<RexNode> filterList = new ArrayList<>();
158+
final ImmutableBitSet notNullColumnsFromRightSide = getNotNullColumnsFromRightSide(join);
159+
143160
for (RexNode filterNode : aboveFilters) {
144-
if (filterNode.getKind() == SqlKind.IS_NULL) {
145-
// Null filter from right side table can be removed and its a pre-condition for anti join conversion.
146-
if (HiveCalciteUtil.hasAllExpressionsFromRightSide(join, Collections.singletonList(filterNode))
147-
&& isStrong(((RexCall) filterNode).getOperands().get(0))) {
148-
hasNullFilterOnRightSide = true;
149-
} else {
150-
filterList.add(filterNode);
151-
}
152-
} else {
153-
if (HiveCalciteUtil.hasAnyExpressionFromRightSide(join, Collections.singletonList(filterNode))) {
154-
// If some non null condition is present from right side, we can not convert the join to anti join as
155-
// anti join does not project the fields from right side.
156-
return null;
157-
} else {
158-
filterList.add(filterNode);
159-
}
161+
final ImmutableBitSet usedFields = RelOptUtil.InputFinder.bits(filterNode);
162+
boolean usesFieldFromRHS = usedFields.intersects(rhsFields);
163+
164+
if(!usesFieldFromRHS) {
165+
// Only LHS fields or constants, so the filterNode is part of the residual filter
166+
filterList.add(filterNode);
167+
continue;
168+
}
169+
170+
// In the following we check for filter nodes that let us deduce that
171+
// "an (originally) not-null column of RHS IS NULL because the LHS row will not be matched"
172+
173+
if(filterNode.getKind() != SqlKind.IS_NULL) {
174+
return Optional.empty();
175+
}
176+
177+
boolean usesRHSFieldsOnly = rhsFields.contains(usedFields);
178+
if (!usesRHSFieldsOnly) {
179+
// If there is a mix between LHS and RHS fields, don't convert to anti-join
180+
return Optional.empty();
181+
}
182+
183+
// Null filter from right side table can be removed and it is a pre-condition for anti join conversion.
184+
RexNode arg = ((RexCall) filterNode).getOperands().get(0);
185+
if (isStrong(arg, notNullColumnsFromRightSide)) {
186+
hasNullFilterOnRightSide = true;
187+
} else if(!isStrong(arg, rhsFields)) {
188+
// if all RHS fields are null and the IS NULL is still not fulfilled, bail out
189+
return Optional.empty();
160190
}
161191
}
162192

163193
if (!hasNullFilterOnRightSide) {
164-
return null;
194+
return Optional.empty();
165195
}
166-
return filterList;
196+
return Optional.of(filterList);
167197
}
168198

169-
private boolean isStrong(RexNode rexNode) {
170-
AtomicBoolean hasCast = new AtomicBoolean(false);
171-
rexNode.accept(new RexVisitorImpl<Void>(true) {
172-
@Override
173-
public Void visitCall(RexCall call) {
174-
if (call.getKind() == SqlKind.CAST) {
175-
hasCast.set(true);
176-
}
177-
return super.visitCall(call);
199+
private ImmutableBitSet getNotNullColumnsFromRightSide(RelNode joinRel) {
200+
// we need to shift the indices of the second child to the right
201+
int shift = (joinRel.getInput(0)).getRowType().getFieldCount();
202+
ImmutableBitSet rhsNotnullColumns = deduceNotNullColumns(joinRel.getInput(1));
203+
return rhsNotnullColumns.shift(shift);
204+
}
205+
206+
/**
207+
* Deduce which columns of the <code>relNode</code> are definitively NOT NULL.
208+
*/
209+
private ImmutableBitSet deduceNotNullColumns(RelNode relNode) {
210+
// adapted from org.apache.calcite.plan.RelOptUtil.containsNullableFields
211+
RelOptCluster cluster = relNode.getCluster();
212+
final RexBuilder rexBuilder = cluster.getRexBuilder();
213+
final RelMetadataQuery mq = cluster.getMetadataQuery();
214+
ImmutableBitSet.Builder result = ImmutableBitSet.builder();
215+
ImmutableBitSet.Builder candidatesBuilder = ImmutableBitSet.builder();
216+
List<RelDataTypeField> fieldList = relNode.getRowType().getFieldList();
217+
for (int i=0; i<fieldList.size(); i++) {
218+
if (fieldList.get(i).getType().isNullable()) {
219+
candidatesBuilder.set(i);
220+
}
221+
else {
222+
result.set(i);
223+
}
224+
}
225+
ImmutableBitSet candidates = candidatesBuilder.build();
226+
if (candidates.isEmpty()) {
227+
// All columns are declared NOT NULL, no need to change
228+
return result.build();
229+
}
230+
final RexExecutor executor = cluster.getPlanner().getExecutor();
231+
if (!(executor instanceof RexExecutorImpl)) {
232+
// Cannot proceed without an executor.
233+
return result.build();
234+
}
235+
236+
final RexImplicationChecker checker =
237+
new RexImplicationChecker(rexBuilder, executor, relNode.getRowType());
238+
final RelOptPredicateList predicates = mq.getPulledUpPredicates(relNode);
239+
240+
ImmutableList<RexNode> preds = predicates.pulledUpPredicates;
241+
final List<RexNode> antecedent = new ArrayList<>(preds);
242+
final RexNode first = RexUtil.composeConjunction(rexBuilder, antecedent);
243+
for (int c : candidates) {
244+
RelDataTypeField field = fieldList.get(c);
245+
final RexNode second = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL,
246+
rexBuilder.makeInputRef(field.getType(), field.getIndex()));
247+
// Suppose we have EMP(empno INT NOT NULL, mgr INT),
248+
// and predicates [empno > 0, mgr > 0].
249+
// We make first: "empno > 0 AND mgr > 0"
250+
// and second: "mgr IS NOT NULL"
251+
// and ask whether first implies second.
252+
// It does, so we have no nullable columns.
253+
if(checker.implies(first, second)) {
254+
result.set(c);
178255
}
179-
});
180-
return !hasCast.get() && Strong.isStrong(rexNode);
256+
}
257+
return result.build();
258+
}
259+
260+
private boolean isStrong(RexNode rexNode, ImmutableBitSet rightSideBitset) {
261+
try {
262+
rexNode.accept(new RexVisitorImpl<Void>(true) {
263+
@Override
264+
public Void visitCall(RexCall call) {
265+
if (call.getKind() == SqlKind.CAST) {
266+
throw Util.FoundOne.NULL;
267+
}
268+
return super.visitCall(call);
269+
}
270+
});
271+
} catch (Util.FoundOne e) {
272+
// Hive's CAST might introduce NULL for NOT NULL fields
273+
return false;
274+
}
275+
return Strong.isNull(rexNode, rightSideBitset);
181276
}
182277
}

0 commit comments

Comments
 (0)