Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ccae8ff
Implement st_haszm using WKBBytesExecutor instead (missing one last e…
petern48 Oct 1, 2025
07aa206
Add note about handling last edge case
petern48 Oct 1, 2025
cf031fb
Minor fix to the comments
petern48 Oct 1, 2025
526fc3b
Fix pre-commit
petern48 Oct 1, 2025
a491d91
Fix cargo clippy
petern48 Oct 2, 2025
cf90bcf
Save progress
petern48 Oct 3, 2025
22a6087
Pull out dimension calculation logic into new wkb_header.rs
petern48 Oct 4, 2025
5c616af
Add MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB fixture
petern48 Oct 4, 2025
207ecb1
Fix dimension calculation to support all collection types and add fix…
petern48 Oct 4, 2025
43009f8
Fix clippy and clean up
petern48 Oct 4, 2025
1078bdd
Remove public byte_order method since it's not needed atm
petern48 Oct 4, 2025
4d4e7e0
Perform all wkb_header operations lazily and cache the values as Opti…
petern48 Oct 4, 2025
dfd6c1a
Add python integration test benches
petern48 Oct 4, 2025
0ef812d
Add tests for wkb_header
petern48 Oct 4, 2025
075d6e6
Apply suggestion from @paleolimbot
petern48 Oct 5, 2025
491b3c7
Remaining clean up
petern48 Oct 5, 2025
7efccc0
Update to method to dimensions plural
petern48 Oct 5, 2025
06501e5
Rename method to try_new
petern48 Oct 5, 2025
1b397fd
Update fixture to be multipoint ((1 2 3)) instead of all zeros
petern48 Oct 5, 2025
9ce9f08
Implement refactor
petern48 Oct 7, 2025
617f9b8
Remove inferred dimension case
petern48 Oct 7, 2025
96311fa
Move logic to st_haszm
petern48 Oct 7, 2025
d1d4fc8
Add empty_geometry_first_coord_dimensions test
petern48 Oct 7, 2025
573f7c8
Add test for size
petern48 Oct 7, 2025
b5984dc
Add tests
petern48 Oct 7, 2025
8c1d2f0
Implement fix for first_xy POLYGON logic
petern48 Oct 7, 2025
dc49aac
clean up
petern48 Oct 7, 2025
89ebb4c
Rename from first_coord_dimensions to first_geom_dimensions and adjus…
petern48 Oct 7, 2025
bdc0fae
Update name of method in st_haszm and update some sedona_internal_err…
petern48 Oct 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions benchmarks/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,38 @@ def queries():

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
[
"collections_simple",
"collections_complex",
],
)
def test_st_hasm(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_HasM(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
[
"collections_simple",
"collections_complex",
],
)
def test_st_hasz(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_HasZ(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
Expand Down
5 changes: 5 additions & 0 deletions python/sedonadb/tests/functions/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,8 +481,13 @@ def test_st_geomfromwkb(eng, geom):
("LINESTRING Z (0 0 0, 1 1 1)", True),
("POLYGON EMPTY", False),
("MULTIPOINT ((0 0), (1 1))", False),
("MULTIPOINT Z ((0 0 0))", True),
("MULTIPOINT ZM ((0 0 0 0))", True),
("GEOMETRYCOLLECTION EMPTY", False),
# Z-dim specified only in the nested geometry
("GEOMETRYCOLLECTION (POINT Z (0 0 0))", True),
# Z-dim specified on both levels
("GEOMETRYCOLLECTION Z (POINT Z (0 0 0))", True),
("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT Z (0 0 0)))", True),
],
)
Expand Down
97 changes: 69 additions & 28 deletions rust/sedona-functions/src/st_haszm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,17 @@
// under the License.
use std::sync::Arc;

use crate::executor::WkbExecutor;
use crate::executor::WkbBytesExecutor;
use arrow_array::builder::BooleanBuilder;
use arrow_schema::DataType;
use datafusion_common::error::Result;
use datafusion_expr::{
scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility,
};
use geo_traits::{Dimensions, GeometryTrait};
use sedona_common::sedona_internal_err;
use geo_traits::Dimensions;
use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
use sedona_geometry::wkb_header::WkbHeader;
use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
use wkb::reader::Wkb;

pub fn st_hasz_udf() -> SedonaScalarUDF {
SedonaScalarUDF::new(
Expand Down Expand Up @@ -90,13 +89,13 @@ impl SedonaScalarKernel for STHasZm {
_ => unreachable!(),
};

let executor = WkbExecutor::new(arg_types, args);
let executor = WkbBytesExecutor::new(arg_types, args);
let mut builder = BooleanBuilder::with_capacity(executor.num_iterations());

executor.execute_wkb_void(|maybe_item| {
match maybe_item {
Some(item) => {
builder.append_option(invoke_scalar(&item, dim_index)?);
builder.append_option(invoke_scalar(item, dim_index)?);
}
None => builder.append_null(),
}
Expand All @@ -107,28 +106,33 @@ impl SedonaScalarKernel for STHasZm {
}
}

fn invoke_scalar(item: &Wkb, dim_index: usize) -> Result<Option<bool>> {
match item.as_type() {
geo_traits::GeometryType::GeometryCollection(collection) => {
use geo_traits::GeometryCollectionTrait;
if collection.num_geometries() == 0 {
Ok(Some(false))
} else {
// PostGIS doesn't allow creating a GeometryCollection with geometries of different dimensions
// so we can just check the dimension of the first one
let first_geom = unsafe { collection.geometry_unchecked(0) };
invoke_scalar(first_geom, dim_index)
}
}
_ => {
let geom_dim = item.dim();
match dim_index {
2 => Ok(Some(matches!(geom_dim, Dimensions::Xyz | Dimensions::Xyzm))),
3 => Ok(Some(matches!(geom_dim, Dimensions::Xym | Dimensions::Xyzm))),
_ => sedona_internal_err!("unexpected dim_index"),
}
}
/// Fast-path inference of geometry type name from raw WKB bytes
fn invoke_scalar(buf: &[u8], dim_index: usize) -> Result<Option<bool>> {
let header = WkbHeader::try_new(buf)?;
let top_level_dimensions = header.dimensions()?;

// Infer dimension based on first coordinate dimension for cases where it differs from top-level
// e.g GEOMETRYCOLLECTION (POINT Z (1 2 3))
let dimensions;
if let Some(first_geom_dimensions) = header.first_geom_dimensions() {
dimensions = first_geom_dimensions;
} else {
dimensions = top_level_dimensions;
}

if dim_index == 2 {
return Ok(Some(matches!(
dimensions,
Dimensions::Xyz | Dimensions::Xyzm
)));
}
if dim_index == 3 {
return Ok(Some(matches!(
dimensions,
Dimensions::Xym | Dimensions::Xyzm
)));
}
Ok(Some(false))
}

#[cfg(test)]
Expand All @@ -137,7 +141,9 @@ mod tests {
use datafusion_expr::ScalarUDF;
use rstest::rstest;
use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
use sedona_testing::testers::ScalarUdfTester;
use sedona_testing::{
fixtures::MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB, testers::ScalarUdfTester,
};

use super::*;

Expand Down Expand Up @@ -184,11 +190,19 @@ mod tests {
let result = m_tester.invoke_wkb_scalar(None).unwrap();
m_tester.assert_scalar_result_equals(result, ScalarValue::Null);

// Z-dimension specified only in the nested geometry, but not the geom collection level
let result = z_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT Z (1 2 3))"))
.unwrap();
z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));

// Z-dimension specified on both the geom collection and nested geometry level
// Geometry collection with Z dimension both on the geom collection and nested geometry level
let result = z_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z (POINT Z (1 2 3))"))
.unwrap();
z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));

let result = m_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT M (1 2 3))"))
.unwrap();
Expand All @@ -203,5 +217,32 @@ mod tests {
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION EMPTY"))
.unwrap();
m_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(false)));

// Empty geometry collections with Z or M dimensions
let result = z_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z EMPTY"))
.unwrap();
z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));

let result = m_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION M EMPTY"))
.unwrap();
m_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));
}

#[test]
fn multipoint_with_inferred_z_dimension() {
let z_tester = ScalarUdfTester::new(st_hasz_udf().into(), vec![WKB_GEOMETRY]);
let m_tester = ScalarUdfTester::new(st_hasm_udf().into(), vec![WKB_GEOMETRY]);

let scalar = ScalarValue::Binary(Some(MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB.to_vec()));
assert_eq!(
z_tester.invoke_scalar(scalar.clone()).unwrap(),
ScalarValue::Boolean(Some(true))
);
assert_eq!(
m_tester.invoke_scalar(scalar.clone()).unwrap(),
ScalarValue::Boolean(Some(false))
);
}
}
2 changes: 2 additions & 0 deletions rust/sedona-geometry/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ serde_json = { workspace = true }
wkt = { workspace = true }

[dependencies]
datafusion-common = { workspace = true }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably should not depend on datafusion-common or sedona-common here (this is otherwise a pretty lightweight crate).

geo-traits = { workspace = true }
lru = { workspace = true }
sedona-common = { path = "../sedona-common" }
serde = { workspace = true }
serde_with = { workspace = true }
thiserror = { workspace = true }
Expand Down
1 change: 1 addition & 0 deletions rust/sedona-geometry/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ pub mod point_count;
pub mod transform;
pub mod types;
pub mod wkb_factory;
pub mod wkb_header;
Loading