Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ Install pytest-benchmark:
pip install pytest-benchmark
```

Please also remember to install sedonadb in release mode and not debug mode (avoid using the `-e` mentioned in the development docs). Currently we also need to include the test dependencies.

```bash
pip install "python/sedonadb[test]"
```

### Running benchmarks

The below commands assume your working directory is in `benchmarks`.
Expand Down
50 changes: 37 additions & 13 deletions benchmarks/test_bench_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import json
from sedonadb.testing import DuckDB, PostGIS, SedonaDB


Expand All @@ -29,13 +28,28 @@ def setup_class(self):
# Setup tables
for name, options in [
(
"segments_large",
"points_simple",
{
"geom_type": "Point",
"target_rows": num_geoms,
},
),
(
"linestrings_simple",
{
"geom_type": "LineString",
"target_rows": num_geoms,
"vertices_per_linestring_range": [2, 2],
},
),
(
"linestrings_complex",
{
"geom_type": "LineString",
"target_rows": num_geoms,
"vertices_per_linestring_range": [500, 500],
},
),
(
"polygons_simple",
{
Expand Down Expand Up @@ -70,18 +84,28 @@ def setup_class(self):
),
]:
# Generate synthetic data
query = f"""
SELECT
geometry as geom1,
geometry as geom2,
round(random() * 100) as integer
FROM sd_random_geometry('{json.dumps(options)}')
"""
tab = self.sedonadb.execute_and_collect(query)
# query = f"""
# SELECT
# geometry as geom1,
# geometry as geom2,
# round(random() * 100) as integer
# FROM sd_random_geometry('{json.dumps(options)}')
# """

data_path = f"data/{name}.parquet"

# pd_df = self.sedonadb.con.sql(query).to_pandas(geometry="geom1")
# pd_df.to_parquet(data_path)

# read the parquet data into tables
self.sedonadb.create_table_parquet(name, data_path)
self.postgis.create_table_parquet(name, data_path)
self.duckdb.create_table_parquet(name, data_path)

self.sedonadb.create_table_arrow(name, tab)
self.postgis.create_table_arrow(name, tab)
self.duckdb.create_table_arrow(name, tab)
# tab = self.sedonadb.execute_and_collect(query)
# self.sedonadb.create_table_arrow(name, tab)
# self.postgis.create_table_arrow(name, tab)
# self.duckdb.create_table_arrow(name, tab)

def _get_eng(self, eng):
if eng == SedonaDB:
Expand Down
187 changes: 185 additions & 2 deletions benchmarks/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,23 @@ def queries():
"collections_complex",
],
)
def test_st_astext(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_AsText(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
[
"polygons_simple",
"polygons_complex",
"points_simple",
],
)
def test_st_buffer(self, benchmark, eng, table):
eng = self._get_eng(eng)

Expand Down Expand Up @@ -88,8 +105,8 @@ def queries():
@pytest.mark.parametrize(
"table",
[
"collections_simple",
"collections_complex",
"polygons_simple",
"polygons_complex",
],
)
def test_st_envelope(self, benchmark, eng, table):
Expand All @@ -115,3 +132,169 @@ def queries():
eng.execute_and_collect(f"SELECT ST_GeometryType(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"wkt",
[
"POINT (1.2 2.3)",
"GEOMETRYCOLLECTION (POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0)), LINESTRING (0 0, 5 5))",
],
)
def test_st_geomfromtext(self, benchmark, eng, wkt):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_GeomFromText('{wkt}')")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
[
"collections_simple",
"collections_complex",
],
)
def test_st_hasz(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_HasZ(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
[
"collections_simple",
"collections_complex",
],
)
def test_st_isempty(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_IsEmpty(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
[
"linestrings_simple",
"linestrings_complex",
"collections_simple",
],
)
def test_st_length(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_Length(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
[
"polygons_simple",
"polygons_complex",
"collections_simple",
],
)
def test_st_perimeter(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_Perimeter(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
("x", "y"),
[
(1, 2),
(1.99993, -2007.9),
],
)
def test_st_point(self, benchmark, eng, x, y):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_Point({x}, {y})")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
("x", "y", "z"),
[
(1, 2, 3),
(1.99993, -2007.9, 20.5),
],
)
def test_st_pointz(self, benchmark, eng, x, y, z):
eng = self._get_eng(eng)
# DuckDB has a different name for the function
func = "ST_Point3D" if isinstance(eng, DuckDB) else "ST_PointZ"

def queries():
eng.execute_and_collect(f"SELECT {func}({x}, {y}, {z})")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
("x", "y", "z", "m"),
[
(1, 2, 3, 4),
(1.99993, -2007.9, 20.5, 10.5),
],
)
def test_st_pointzm(self, benchmark, eng, x, y, z, m):
eng = self._get_eng(eng)
# DuckDB has a different name for the function
func = "ST_Point4D" if isinstance(eng, DuckDB) else "ST_PointZM"

def queries():
eng.execute_and_collect(f"SELECT {func}({x}, {y}, {z}, {m})")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
[
"points_simple",
],
)
def test_st_x(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_X(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
[
"polygons_simple",
"polygons_complex",
"collections_complex",
],
)
def test_st_xmin(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_XMin(geom1) from {table}")

benchmark(queries)
18 changes: 18 additions & 0 deletions benchmarks/test_overlay.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,21 @@ def queries():
eng.execute_and_collect(f"SELECT ST_Difference(geom1, geom2) from {table}")

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
[
"polygons_simple",
"polygons_complex",
],
)
def test_st_intersection(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(
f"SELECT ST_Intersection(geom1, geom2) from {table}"
)

benchmark(queries)
16 changes: 16 additions & 0 deletions benchmarks/test_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,19 @@ def queries():
)

benchmark(queries)

@pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
@pytest.mark.parametrize(
"table",
[
"polygons_simple",
"polygons_complex",
],
)
def test_st_intersects(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_Intersects(geom1, geom2) from {table}")

benchmark(queries)