Skip to content

Commit df03566

Browse files
committed
Fix PartialOrd for Window
Before the changes, `PartialOrd` could return `Some(Equal)` for two `Window` nodes that are not equal in `PartialEq` sense. This is violation of `PartialOrd` contract. The fix is to consult eq inside ord implementation. If ord thinks two instances are equal, but they are not equal in Eq sense, they are considered incomparable.
1 parent 338a96e commit df03566

File tree

1 file changed

+75
-7
lines changed
  • datafusion/expr/src/logical_plan

1 file changed

+75
-7
lines changed

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2503,9 +2503,22 @@ impl Window {
25032503
// Manual implementation needed because of `schema` field. Comparison excludes this field.
25042504
impl PartialOrd for Window {
25052505
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2506-
match self.input.partial_cmp(&other.input) {
2507-
Some(Ordering::Equal) => self.window_expr.partial_cmp(&other.window_expr),
2508-
cmp => cmp,
2506+
match self.input.partial_cmp(&other.input)? {
2507+
Ordering::Equal => {} // continue
2508+
not_equal => return Some(not_equal),
2509+
}
2510+
2511+
match self.window_expr.partial_cmp(&other.window_expr)? {
2512+
Ordering::Equal => {} // continue
2513+
not_equal => return Some(not_equal),
2514+
}
2515+
2516+
// Contract for PartialOrd and PartialEq consistency requires that
2517+
// a == b if and only if partial_cmp(a, b) == Some(Equal).
2518+
if self == other {
2519+
Some(Ordering::Equal)
2520+
} else {
2521+
None
25092522
}
25102523
}
25112524
}
@@ -4268,22 +4281,20 @@ fn get_unnested_list_datatype_recursive(
42684281

42694282
#[cfg(test)]
42704283
mod tests {
4271-
42724284
use super::*;
42734285
use crate::builder::LogicalTableSource;
42744286
use crate::logical_plan::table_scan;
4287+
use crate::test::function_stub::{count, count_udaf};
42754288
use crate::{
42764289
binary_expr, col, exists, in_subquery, lit, placeholder, scalar_subquery,
42774290
GroupingSet,
42784291
};
4279-
42804292
use datafusion_common::tree_node::{
42814293
TransformedResult, TreeNodeRewriter, TreeNodeVisitor,
42824294
};
42834295
use datafusion_common::{not_impl_err, Constraint, ScalarValue};
42844296
use insta::{assert_debug_snapshot, assert_snapshot};
4285-
4286-
use crate::test::function_stub::count;
4297+
use std::hash::DefaultHasher;
42874298

42884299
fn employee_schema() -> Schema {
42894300
Schema::new(vec![
@@ -4687,6 +4698,63 @@ mod tests {
46874698
);
46884699
}
46894700

4701+
#[test]
4702+
fn test_partial_eq_hash_and_partial_ord() {
4703+
let empty_values = Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
4704+
produce_one_row: true,
4705+
schema: Arc::new(DFSchema::empty()),
4706+
}));
4707+
4708+
let count_window_function = |schema| {
4709+
Window::try_new_with_schema(
4710+
vec![Expr::WindowFunction(Box::new(WindowFunction::new(
4711+
WindowFunctionDefinition::AggregateUDF(count_udaf()),
4712+
vec![],
4713+
)))],
4714+
Arc::clone(&empty_values),
4715+
Arc::new(schema),
4716+
)
4717+
.unwrap()
4718+
};
4719+
4720+
let schema_without_metadata = || {
4721+
DFSchema::from_unqualified_fields(
4722+
vec![Field::new("count", DataType::Int64, false)].into(),
4723+
HashMap::new(),
4724+
)
4725+
.unwrap()
4726+
};
4727+
4728+
let schema_with_metadata = || {
4729+
DFSchema::from_unqualified_fields(
4730+
vec![Field::new("count", DataType::Int64, false)].into(),
4731+
[("key".to_string(), "value".to_string())].into(),
4732+
)
4733+
.unwrap()
4734+
};
4735+
4736+
// A Window
4737+
let f = count_window_function(schema_without_metadata());
4738+
4739+
// Same like `f`, different instance
4740+
let f2 = count_window_function(schema_without_metadata());
4741+
assert_eq!(f, f2);
4742+
assert_eq!(hash(&f), hash(&f2));
4743+
assert_eq!(f.partial_cmp(&f2), Some(Ordering::Equal));
4744+
4745+
// Same like `f`, except for schema metadata
4746+
let o = count_window_function(schema_with_metadata());
4747+
assert_ne!(f, o);
4748+
assert_ne!(hash(&f), hash(&o)); // hash can collide for different values but does not collide in this test
4749+
assert_eq!(f.partial_cmp(&o), None);
4750+
}
4751+
4752+
fn hash<T: Hash>(value: &T) -> u64 {
4753+
let hasher = &mut DefaultHasher::new();
4754+
value.hash(hasher);
4755+
hasher.finish()
4756+
}
4757+
46904758
#[test]
46914759
fn projection_expr_schema_mismatch() -> Result<()> {
46924760
let empty_schema = Arc::new(DFSchema::empty());

0 commit comments

Comments
 (0)