1717 */
1818package org .apache .hadoop .hive .ql .optimizer .calcite .rules ;
1919
20+ import com .google .common .collect .ImmutableList ;
21+ import org .apache .calcite .plan .RelOptCluster ;
22+ import org .apache .calcite .plan .RelOptPredicateList ;
2023import org .apache .calcite .plan .RelOptRule ;
2124import org .apache .calcite .plan .RelOptRuleCall ;
2225import org .apache .calcite .plan .RelOptUtil ;
26+ import org .apache .calcite .plan .RexImplicationChecker ;
2327import org .apache .calcite .plan .Strong ;
2428import org .apache .calcite .rel .RelNode ;
2529import org .apache .calcite .rel .core .Filter ;
2630import org .apache .calcite .rel .core .Join ;
2731import org .apache .calcite .rel .core .JoinRelType ;
2832import org .apache .calcite .rel .core .Project ;
33+ import org .apache .calcite .rel .metadata .RelMetadataQuery ;
34+ import org .apache .calcite .rel .type .RelDataTypeField ;
35+ import org .apache .calcite .rex .RexBuilder ;
2936import org .apache .calcite .rex .RexCall ;
37+ import org .apache .calcite .rex .RexExecutor ;
38+ import org .apache .calcite .rex .RexExecutorImpl ;
3039import org .apache .calcite .rex .RexNode ;
40+ import org .apache .calcite .rex .RexUtil ;
3141import org .apache .calcite .rex .RexVisitorImpl ;
3242import org .apache .calcite .sql .SqlKind ;
3343import org .apache .calcite .sql .fun .SqlStdOperatorTable ;
44+ import org .apache .calcite .util .ImmutableBitSet ;
45+ import org .apache .calcite .util .Util ;
3446import org .apache .hadoop .hive .ql .optimizer .calcite .HiveCalciteUtil ;
3547import org .apache .hadoop .hive .ql .optimizer .calcite .reloperators .HiveAntiJoin ;
3648import org .slf4j .Logger ;
3749import org .slf4j .LoggerFactory ;
3850
3951import java .util .ArrayList ;
40- import java .util .Collections ;
4152import java .util .List ;
42- import java .util .concurrent . atomic . AtomicBoolean ;
53+ import java .util .Optional ;
4354
4455/**
4556 * Planner rule that converts a join plus filter to anti join.
@@ -86,14 +97,17 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join
8697
8798 assert (filter != null );
8899
89- List <RexNode > filterList = getResidualFilterNodes (filter , join );
90- if (filterList == null ) {
100+ ImmutableBitSet rhsFields = HiveCalciteUtil .getRightSideBitset (join );
101+ Optional <List <RexNode >> optFilterList = getResidualFilterNodes (filter , join , rhsFields );
102+ if (optFilterList .isEmpty ()) {
91103 return ;
92104 }
105+ List <RexNode > filterList = optFilterList .get ();
93106
94107 // If any projection is there from right side, then we can not convert to anti join.
95- boolean hasProjection = HiveCalciteUtil .hasAnyExpressionFromRightSide (join , project .getProjects ());
96- if (hasProjection ) {
108+ ImmutableBitSet projectedFields = RelOptUtil .InputFinder .bits (project .getProjects (), null );
109+ boolean projectionUsesRHS = projectedFields .intersects (rhsFields );
110+ if (projectionUsesRHS ) {
97111 return ;
98112 }
99113
@@ -124,13 +138,14 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join
124138 /**
125139 * Extracts the non-null filter conditions from given filter node.
126140 *
127- * @param filter The filter condition to be checked.
128- * @param join Join node whose right side has to be searched.
141+ * @param filter The filter condition to be checked.
142+ * @param join Join node whose right side has to be searched.
143+ * @param rhsFields
129144 * @return null : Anti join condition is not matched for filter.
130- * Empty list : No residual filter conditions present.
131- * Valid list containing the filter to be applied after join.
145+ * Empty list : No residual filter conditions present.
146+ * Valid list containing the filter to be applied after join.
132147 */
133- private List <RexNode > getResidualFilterNodes (Filter filter , Join join ) {
148+ private Optional < List <RexNode >> getResidualFilterNodes (Filter filter , Join join , ImmutableBitSet rhsFields ) {
134149 // 1. If null filter is not present from right side then we can not convert to anti join.
135150 // 2. If any non-null filter is present from right side, we can not convert it to anti join.
136151 // 3. Keep other filters which needs to be executed after join.
@@ -140,43 +155,123 @@ private List<RexNode> getResidualFilterNodes(Filter filter, Join join) {
140155 List <RexNode > aboveFilters = RelOptUtil .conjunctions (filter .getCondition ());
141156 boolean hasNullFilterOnRightSide = false ;
142157 List <RexNode > filterList = new ArrayList <>();
158+ final ImmutableBitSet notNullColumnsFromRightSide = getNotNullColumnsFromRightSide (join );
159+
143160 for (RexNode filterNode : aboveFilters ) {
144- if (filterNode .getKind () == SqlKind .IS_NULL ) {
145- // Null filter from right side table can be removed and its a pre-condition for anti join conversion.
146- if (HiveCalciteUtil .hasAllExpressionsFromRightSide (join , Collections .singletonList (filterNode ))
147- && isStrong (((RexCall ) filterNode ).getOperands ().get (0 ))) {
148- hasNullFilterOnRightSide = true ;
149- } else {
150- filterList .add (filterNode );
151- }
152- } else {
153- if (HiveCalciteUtil .hasAnyExpressionFromRightSide (join , Collections .singletonList (filterNode ))) {
154- // If some non null condition is present from right side, we can not convert the join to anti join as
155- // anti join does not project the fields from right side.
156- return null ;
157- } else {
158- filterList .add (filterNode );
159- }
161+ final ImmutableBitSet usedFields = RelOptUtil .InputFinder .bits (filterNode );
162+ boolean usesFieldFromRHS = usedFields .intersects (rhsFields );
163+
164+ if (!usesFieldFromRHS ) {
165+ // Only LHS fields or constants, so the filterNode is part of the residual filter
166+ filterList .add (filterNode );
167+ continue ;
168+ }
169+
170+ // In the following we check for filter nodes that let us deduce that
171+ // "an (originally) not-null column of RHS IS NULL because the LHS row will not be matched"
172+
173+ if (filterNode .getKind () != SqlKind .IS_NULL ) {
174+ return Optional .empty ();
175+ }
176+
177+ boolean usesRHSFieldsOnly = rhsFields .contains (usedFields );
178+ if (!usesRHSFieldsOnly ) {
179+ // If there is a mix between LHS and RHS fields, don't convert to anti-join
180+ return Optional .empty ();
181+ }
182+
183+ // Null filter from right side table can be removed and it is a pre-condition for anti join conversion.
184+ RexNode arg = ((RexCall ) filterNode ).getOperands ().get (0 );
185+ if (isStrong (arg , notNullColumnsFromRightSide )) {
186+ hasNullFilterOnRightSide = true ;
187+ } else if (!isStrong (arg , rhsFields )) {
188+ // if all RHS fields are null and the IS NULL is still not fulfilled, bail out
189+ return Optional .empty ();
160190 }
161191 }
162192
163193 if (!hasNullFilterOnRightSide ) {
164- return null ;
194+ return Optional . empty () ;
165195 }
166- return filterList ;
196+ return Optional . of ( filterList ) ;
167197 }
168198
169- private boolean isStrong (RexNode rexNode ) {
170- AtomicBoolean hasCast = new AtomicBoolean (false );
171- rexNode .accept (new RexVisitorImpl <Void >(true ) {
172- @ Override
173- public Void visitCall (RexCall call ) {
174- if (call .getKind () == SqlKind .CAST ) {
175- hasCast .set (true );
176- }
177- return super .visitCall (call );
199+ private ImmutableBitSet getNotNullColumnsFromRightSide (RelNode joinRel ) {
200+ // we need to shift the indices of the second child to the right
201+ int shift = (joinRel .getInput (0 )).getRowType ().getFieldCount ();
202+ ImmutableBitSet rhsNotnullColumns = deduceNotNullColumns (joinRel .getInput (1 ));
203+ return rhsNotnullColumns .shift (shift );
204+ }
205+
206+ /**
207+ * Deduce which columns of the <code>relNode</code> are definitively NOT NULL.
208+ */
209+ private ImmutableBitSet deduceNotNullColumns (RelNode relNode ) {
210+ // adapted from org.apache.calcite.plan.RelOptUtil.containsNullableFields
211+ RelOptCluster cluster = relNode .getCluster ();
212+ final RexBuilder rexBuilder = cluster .getRexBuilder ();
213+ final RelMetadataQuery mq = cluster .getMetadataQuery ();
214+ ImmutableBitSet .Builder result = ImmutableBitSet .builder ();
215+ ImmutableBitSet .Builder candidatesBuilder = ImmutableBitSet .builder ();
216+ List <RelDataTypeField > fieldList = relNode .getRowType ().getFieldList ();
217+ for (int i =0 ; i <fieldList .size (); i ++) {
218+ if (fieldList .get (i ).getType ().isNullable ()) {
219+ candidatesBuilder .set (i );
220+ }
221+ else {
222+ result .set (i );
223+ }
224+ }
225+ ImmutableBitSet candidates = candidatesBuilder .build ();
226+ if (candidates .isEmpty ()) {
227+ // All columns are declared NOT NULL, no need to change
228+ return result .build ();
229+ }
230+ final RexExecutor executor = cluster .getPlanner ().getExecutor ();
231+ if (!(executor instanceof RexExecutorImpl )) {
232+ // Cannot proceed without an executor.
233+ return result .build ();
234+ }
235+
236+ final RexImplicationChecker checker =
237+ new RexImplicationChecker (rexBuilder , executor , relNode .getRowType ());
238+ final RelOptPredicateList predicates = mq .getPulledUpPredicates (relNode );
239+
240+ ImmutableList <RexNode > preds = predicates .pulledUpPredicates ;
241+ final List <RexNode > antecedent = new ArrayList <>(preds );
242+ final RexNode first = RexUtil .composeConjunction (rexBuilder , antecedent );
243+ for (int c : candidates ) {
244+ RelDataTypeField field = fieldList .get (c );
245+ final RexNode second = rexBuilder .makeCall (SqlStdOperatorTable .IS_NOT_NULL ,
246+ rexBuilder .makeInputRef (field .getType (), field .getIndex ()));
247+ // Suppose we have EMP(empno INT NOT NULL, mgr INT),
248+ // and predicates [empno > 0, mgr > 0].
249+ // We make first: "empno > 0 AND mgr > 0"
250+ // and second: "mgr IS NOT NULL"
251+ // and ask whether first implies second.
252+ // It does, so we have no nullable columns.
253+ if (checker .implies (first , second )) {
254+ result .set (c );
178255 }
179- });
180- return !hasCast .get () && Strong .isStrong (rexNode );
256+ }
257+ return result .build ();
258+ }
259+
260+ private boolean isStrong (RexNode rexNode , ImmutableBitSet rightSideBitset ) {
261+ try {
262+ rexNode .accept (new RexVisitorImpl <Void >(true ) {
263+ @ Override
264+ public Void visitCall (RexCall call ) {
265+ if (call .getKind () == SqlKind .CAST ) {
266+ throw Util .FoundOne .NULL ;
267+ }
268+ return super .visitCall (call );
269+ }
270+ });
271+ } catch (Util .FoundOne e ) {
272+ // Hive's CAST might introduce NULL for NOT NULL fields
273+ return false ;
274+ }
275+ return Strong .isNull (rexNode , rightSideBitset );
181276 }
182277}
0 commit comments