From 738baa59a6982cf582e1cebd98792c3fd24a900b Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 11 Nov 2025 23:03:02 +0100 Subject: [PATCH 1/2] gdal vector partition: also accept geometry fields, to partition on geometry type --- apps/gdalalg_vector_partition.cpp | 185 +++++++++++++----- .../test_gdalalg_vector_partition.py | 147 +++++++++++++- doc/source/programs/gdal_vector_partition.rst | 18 +- 3 files changed, 295 insertions(+), 55 deletions(-) diff --git a/apps/gdalalg_vector_partition.cpp b/apps/gdalalg_vector_partition.cpp index 7d7d7b3cebc3..9f7e616975a3 100644 --- a/apps/gdalalg_vector_partition.cpp +++ b/apps/gdalalg_vector_partition.cpp @@ -82,7 +82,8 @@ GDALVectorPartitionAlgorithm::GDALVectorPartitionAlgorithm(bool standaloneStep) AddCreationOptionsArg(&m_creationOptions); AddLayerCreationOptionsArg(&m_layerCreationOptions); - AddArg("field", 0, _("Field(s) on which to partition"), &m_fields) + AddArg("field", 0, _("Attribute or geomery field(s) on which to partition"), + &m_fields) .SetRequired(); AddArg("scheme", 0, _("Partitioning scheme"), &m_scheme) .SetChoices(SCHEME_HIVE, SCHEME_FLAT) @@ -363,11 +364,13 @@ struct Layer static bool GetCurrentOutputLayer( GDALAlgorithm *const alg, const OGRFeatureDefn *const poSrcFeatureDefn, OGRLayer *const poSrcLayer, const std::string &osKey, + const std::vector &aeGeomTypes, const std::string &osLayerDir, const std::string &osScheme, const std::string &osPatternIn, bool partDigitLeadingZeroes, size_t partDigitCount, const int featureLimit, const GIntBig maxFileSize, const bool omitPartitionedFields, - const std::vector &abPartitionedFields, const char *pszExtension, + const std::vector &abPartitionedFields, + const std::vector &abPartitionedGeomFields, const char *pszExtension, GDALDriver *const poOutDriver, const CPLStringList &datasetCreationOptions, const CPLStringList &layerCreationOptions, const OGRFeatureDefn *const poFeatureDefnWithoutPartitionedFields, @@ -662,11 +665,22 @@ static bool GetCurrentOutputLayer( pszSrcFIDColumn); } + std::unique_ptr poFirstGeomFieldDefn; + if (poSrcFeatureDefn->GetGeomFieldCount()) + { + poFirstGeomFieldDefn = std::make_unique( + *poSrcFeatureDefn->GetGeomFieldDefn(0)); + if (abPartitionedGeomFields[0]) + { + if (aeGeomTypes[0] == wkbNone) + poFirstGeomFieldDefn.reset(); + else + whileUnsealing(poFirstGeomFieldDefn.get()) + ->SetType(aeGeomTypes[0]); + } + } auto poLayer = outputLayer->poDS->CreateLayer( - poSrcLayer->GetDescription(), - poSrcFeatureDefn->GetGeomFieldCount() - ? poSrcFeatureDefn->GetGeomFieldDefn(0) - : nullptr, + poSrcLayer->GetDescription(), poFirstGeomFieldDefn.get(), modLayerCreationOptions.List()); if (!poLayer) { @@ -687,14 +701,22 @@ static bool GetCurrentOutputLayer( return false; } } - bool bFirst = true; + int iGeomField = -1; for (const auto *poGeomFieldDefn : poSrcFeatureDefn->GetGeomFields()) { - if (!bFirst) + ++iGeomField; + if (iGeomField > 0) { - if (poLayer->CreateGeomField(poGeomFieldDefn) != - OGRERR_NONE) + OGRGeomFieldDefn oClone(poGeomFieldDefn); + if (abPartitionedGeomFields[iGeomField]) + { + if (aeGeomTypes[iGeomField] == wkbNone) + continue; + whileUnsealing(&oClone)->SetType( + aeGeomTypes[iGeomField]); + } + if (poLayer->CreateGeomField(&oClone) != OGRERR_NONE) { alg->ReportError(CE_Failure, CPLE_AppDefined, "Cannot create geometry field '%s'", @@ -702,7 +724,6 @@ static bool GetCurrentOutputLayer( return false; } } - bFirst = false; } if (bUseTransactions) @@ -949,6 +970,7 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) struct Field { int nIdx{}; + bool bIsGeom = false; std::string encodedFieldName{}; OGRFieldType eType{}; }; @@ -956,33 +978,63 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) std::vector asFields; std::vector abPartitionedFields(poSrcFeatureDefn->GetFieldCount(), false); + std::vector abPartitionedGeomFields( + poSrcFeatureDefn->GetGeomFieldCount(), false); for (const std::string &fieldName : m_fields) { - const int nIdx = poSrcFeatureDefn->GetFieldIndex(fieldName.c_str()); + int nIdx = poSrcFeatureDefn->GetFieldIndex(fieldName.c_str()); if (nIdx < 0) { - ReportError(CE_Failure, CPLE_AppDefined, - "Cannot find field '%s' in layer '%s'", - fieldName.c_str(), poSrcLayer->GetDescription()); - return false; + if (fieldName == "OGR_GEOMETRY" && + poSrcFeatureDefn->GetGeomFieldCount() > 0) + nIdx = 0; + else + nIdx = + poSrcFeatureDefn->GetGeomFieldIndex(fieldName.c_str()); + if (nIdx < 0) + { + ReportError(CE_Failure, CPLE_AppDefined, + "Cannot find field '%s' in layer '%s'", + fieldName.c_str(), + poSrcLayer->GetDescription()); + return false; + } + else + { + abPartitionedGeomFields[nIdx] = true; + Field f; + f.nIdx = nIdx; + f.bIsGeom = true; + if (fieldName.empty()) + f.encodedFieldName = "OGR_GEOMETRY"; + else + f.encodedFieldName = PercentEncode(fieldName); + asFields.push_back(std::move(f)); + } } - const auto eType = poSrcFeatureDefn->GetFieldDefn(nIdx)->GetType(); - if (eType != OFTString && eType != OFTInteger && - eType != OFTInteger64) + else { - ReportError( - CE_Failure, CPLE_NotSupported, - "Field '%s' not valid for partitioning. Only fields of " - "type String, Integer or Integer64 are accepted", - fieldName.c_str()); - return false; + const auto eType = + poSrcFeatureDefn->GetFieldDefn(nIdx)->GetType(); + if (eType != OFTString && eType != OFTInteger && + eType != OFTInteger64) + { + ReportError( + CE_Failure, CPLE_NotSupported, + "Field '%s' not valid for partitioning. Only fields of " + "type String, Integer or Integer64, or geometry fields," + " are accepted", + fieldName.c_str()); + return false; + } + abPartitionedFields[nIdx] = true; + Field f; + f.nIdx = nIdx; + f.bIsGeom = false; + f.encodedFieldName = PercentEncode(fieldName); + f.eType = eType; + asFields.push_back(std::move(f)); } - abPartitionedFields[nIdx] = true; - Field f; - f.nIdx = nIdx; - f.encodedFieldName = PercentEncode(fieldName); - f.eType = eType; - asFields.push_back(std::move(f)); } std::vector aeSrcFieldTypes; @@ -996,12 +1048,16 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) std::vector anMapForSetFrom; if (m_omitPartitionedFields) { - for (const std::string &fieldName : m_fields) + // Sort fields by descending index (so we can delete them easily) + std::vector sortedFields(asFields); + std::sort(sortedFields.begin(), sortedFields.end(), + [](const Field &a, const Field &b) + { return a.nIdx > b.nIdx; }); + for (const auto &field : sortedFields) { - const int nIdx = - poFeatureDefnWithoutPartitionedFields->GetFieldIndex( - fieldName.c_str()); - poFeatureDefnWithoutPartitionedFields->DeleteFieldDefn(nIdx); + if (!field.bIsGeom) + poFeatureDefnWithoutPartitionedFields->DeleteFieldDefn( + field.nIdx); } anMapForSetFrom = poFeatureDefnWithoutPartitionedFields->ComputeMapForSetFrom( @@ -1025,18 +1081,40 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) osAttrQueryString = pszAttrQueryString; std::string osKeyTmp; + std::vector aeGeomTypesTmp; const auto BuildKey = - [&osKeyTmp](const std::vector &fields, - const OGRFeature *poFeature) -> const std::string & + [&osKeyTmp, &aeGeomTypesTmp](const std::vector &fields, + const OGRFeature *poFeature) + -> std::pair &> { osKeyTmp.clear(); + aeGeomTypesTmp.resize(poFeature->GetDefnRef()->GetGeomFieldCount()); for (const auto &field : fields) { if (!osKeyTmp.empty()) osKeyTmp += '/'; osKeyTmp += field.encodedFieldName; osKeyTmp += '='; - if (poFeature->IsFieldSetAndNotNull(field.nIdx)) + if (field.bIsGeom) + { + const auto poGeom = poFeature->GetGeomFieldRef(field.nIdx); + if (poGeom) + { + aeGeomTypesTmp[field.nIdx] = poGeom->getGeometryType(); + osKeyTmp += poGeom->getGeometryName(); + if (poGeom->Is3D()) + osKeyTmp += 'Z'; + if (poGeom->IsMeasured()) + osKeyTmp += 'M'; + } + else + { + aeGeomTypesTmp[field.nIdx] = wkbNone; + osKeyTmp += NULL_MARKER; + } + } + else if (poFeature->IsFieldSetAndNotNull(field.nIdx)) { if (field.eType == OFTString) { @@ -1062,7 +1140,7 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) osKeyTmp += NULL_MARKER; } } - return osKeyTmp; + return {osKeyTmp, aeGeomTypesTmp}; }; std::set oSetKeys; @@ -1072,7 +1150,7 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) "GDAL", "First pass to determine all distinct partitioned values..."); - if (asFields.size() == 1) + if (asFields.size() == 1 && !asFields[0].bIsGeom) { std::string osSQL = "SELECT DISTINCT \""; osSQL += CPLString(m_fields[0]).replaceAll('"', "\"\""); @@ -1093,8 +1171,8 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) asSingleField[0].nIdx = 0; for (auto &poFeature : *poSQLLayer) { - const std::string &osKey = - BuildKey(asSingleField, poFeature.get()); + const auto sPair = BuildKey(asFields, poFeature.get()); + const std::string &osKey = sPair.first; oSetKeys.insert(osKey); #ifdef DEBUG_VERBOSE CPLDebug("GDAL", "Found %s", osKey.c_str()); @@ -1111,8 +1189,8 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) { for (auto &poFeature : *poSrcLayer) { - const std::string &osKey = - BuildKey(asFields, poFeature.get()); + const auto sPair = BuildKey(asFields, poFeature.get()); + const std::string &osKey = sPair.first; if (oSetKeys.insert(osKey).second) { #ifdef DEBUG_VERBOSE @@ -1154,7 +1232,9 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) for (auto &poFeature : *poSrcLayer) { - const std::string &osKey = BuildKey(asFields, poFeature.get()); + const auto sPair = BuildKey(asFields, poFeature.get()); + const std::string &osKey = sPair.first; + const auto &aeGeomTypes = sPair.second; if (!oSetKeysAllowedInThisPass.empty() && !cpl::contains(oSetKeysAllowedInThisPass, osKey)) @@ -1163,10 +1243,11 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) } if (!GetCurrentOutputLayer( - this, poSrcFeatureDefn, poSrcLayer, osKey, osLayerDir, - m_scheme, m_pattern, m_partDigitLeadingZeroes, - m_partDigitCount, m_featureLimit, m_maxFileSize, - m_omitPartitionedFields, abPartitionedFields, + this, poSrcFeatureDefn, poSrcLayer, osKey, aeGeomTypes, + osLayerDir, m_scheme, m_pattern, + m_partDigitLeadingZeroes, m_partDigitCount, + m_featureLimit, m_maxFileSize, m_omitPartitionedFields, + abPartitionedFields, abPartitionedGeomFields, pszExtension, poOutDriver, datasetCreationOptions, layerCreationOptions, poFeatureDefnWithoutPartitionedFields.get(), @@ -1183,7 +1264,9 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) poFeature->SetFID(OGRNullFID); OGRErr eErr; - if (m_omitPartitionedFields) + if (m_omitPartitionedFields || + std::find(aeGeomTypes.begin(), aeGeomTypes.end(), + wkbNone) != aeGeomTypes.end()) { OGRFeature oFeat(outputLayer->poLayer->GetLayerDefn()); oFeat.SetFrom(poFeature.get(), anMapForSetFrom.data()); diff --git a/autotest/utilities/test_gdalalg_vector_partition.py b/autotest/utilities/test_gdalalg_vector_partition.py index 15781accbeef..78d64fff8a5e 100755 --- a/autotest/utilities/test_gdalalg_vector_partition.py +++ b/autotest/utilities/test_gdalalg_vector_partition.py @@ -395,7 +395,7 @@ def test_gdalalg_vector_partition_errors(tmp_vsimem): with pytest.raises( Exception, - match="Field 'real_field' not valid for partitioning. Only fields of type String, Integer or Integer64 are accepted", + match="Field 'real_field' not valid for partitioning", ): gdal.Run( "vector", @@ -1046,3 +1046,148 @@ def test_gdalalg_vector_partition_pattern_error(tmp_vsimem): match=r"Number of digits in part number specifiation should be in \[1,10\] range", ): alg["pattern"] = "%11d" + + +@pytest.mark.require_driver("GPKG") +def test_gdalalg_vector_partition_geometry(tmp_vsimem): + + src_ds = gdal.GetDriverByName("MEM").CreateVector("") + lyr = src_ds.CreateLayer("test") + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeometry(ogr.CreateGeometryFromWkt("POLYGON ((0 0,0 1,1 1,0 0))")) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeometry(ogr.CreateGeometryFromWkt("POINT (1 2)")) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeometry(ogr.CreateGeometryFromWkt("POINT (3 4)")) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeometry(ogr.CreateGeometryFromWkt("POINT Z (1 2 3)")) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeometry(ogr.CreateGeometryFromWkt("POINT M (1 2 4)")) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeometry(ogr.CreateGeometryFromWkt("POINT ZM (1 2 3 4)")) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + lyr.CreateFeature(f) + + gdal.alg.vector.partition( + input=src_ds, + output=tmp_vsimem / "out", + output_format="GPKG", + field="OGR_GEOMETRY", + ) + + with ogr.Open( + tmp_vsimem / "out/test/OGR_GEOMETRY=POLYGON/part_0000000001.gpkg" + ) as ds: + lyr = ds.GetLayer(0) + assert lyr.GetGeomType() == ogr.wkbPolygon + assert lyr.GetFeatureCount() == 1 + assert ( + lyr.GetFeature(0).GetGeometryRef().ExportToIsoWkt() + == "POLYGON ((0 0,0 1,1 1,0 0))" + ) + + with ogr.Open( + tmp_vsimem / "out/test/OGR_GEOMETRY=POINT/part_0000000001.gpkg" + ) as ds: + lyr = ds.GetLayer(0) + assert lyr.GetGeomType() == ogr.wkbPoint + assert lyr.GetFeatureCount() == 2 + assert lyr.GetFeature(1).GetGeometryRef().ExportToIsoWkt() == "POINT (1 2)" + assert lyr.GetFeature(2).GetGeometryRef().ExportToIsoWkt() == "POINT (3 4)" + + with ogr.Open( + tmp_vsimem / "out/test/OGR_GEOMETRY=POINTZ/part_0000000001.gpkg" + ) as ds: + lyr = ds.GetLayer(0) + assert lyr.GetGeomType() == ogr.wkbPoint25D + assert lyr.GetFeatureCount() == 1 + assert lyr.GetFeature(3).GetGeometryRef().ExportToIsoWkt() == "POINT Z (1 2 3)" + + with ogr.Open( + tmp_vsimem / "out/test/OGR_GEOMETRY=POINTM/part_0000000001.gpkg" + ) as ds: + lyr = ds.GetLayer(0) + assert lyr.GetGeomType() == ogr.wkbPointM + assert lyr.GetFeatureCount() == 1 + assert lyr.GetFeature(4).GetGeometryRef().ExportToIsoWkt() == "POINT M (1 2 4)" + + with ogr.Open( + tmp_vsimem / "out/test/OGR_GEOMETRY=POINTZM/part_0000000001.gpkg" + ) as ds: + lyr = ds.GetLayer(0) + assert lyr.GetGeomType() == ogr.wkbPointZM + assert lyr.GetFeatureCount() == 1 + assert ( + lyr.GetFeature(5).GetGeometryRef().ExportToIsoWkt() == "POINT ZM (1 2 3 4)" + ) + + with ogr.Open( + tmp_vsimem + / "out/test/OGR_GEOMETRY=__HIVE_DEFAULT_PARTITION__/part_0000000001.gpkg" + ) as ds: + lyr = ds.GetLayer(0) + assert lyr.GetGeomType() == ogr.wkbNone + assert lyr.GetFeatureCount() == 1 + assert lyr.GetFeature(6).GetGeometryRef() is None + + +@pytest.mark.require_driver("SQLite") +def test_gdalalg_vector_partition_geometry_multi_geom_fields(tmp_vsimem): + + src_ds = gdal.GetDriverByName("MEM").CreateVector("") + lyr = src_ds.CreateLayer("test") + lyr.CreateGeomField(ogr.GeomFieldDefn("aux_geom", ogr.wkbUnknown)) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeomField(0, ogr.CreateGeometryFromWkt("POLYGON ((0 0,0 1,1 1,0 0))")) + f.SetGeomField(1, ogr.CreateGeometryFromWkt("POINT (1 2)")) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeomField(0, ogr.CreateGeometryFromWkt("POINT (3 4)")) + f.SetGeomField(1, ogr.CreateGeometryFromWkt("LINESTRING (5 6,7 8)")) + lyr.CreateFeature(f) + + gdal.alg.vector.partition( + input=src_ds, + output=tmp_vsimem / "out", + output_format="SQLite", + field="aux_geom", + ) + + with ogr.Open(tmp_vsimem / "out/test/aux_geom=POINT/part_0000000001.sqlite") as ds: + lyr = ds.GetLayer(0) + assert lyr.GetLayerDefn().GetGeomFieldDefn(0).GetType() == ogr.wkbUnknown + assert lyr.GetLayerDefn().GetGeomFieldDefn(1).GetType() == ogr.wkbPoint + assert lyr.GetFeatureCount() == 1 + assert ( + lyr.GetFeature(0).GetGeomFieldRef(0).ExportToIsoWkt() + == "POLYGON ((0 0,0 1,1 1,0 0))" + ) + assert lyr.GetFeature(0).GetGeomFieldRef(1).ExportToIsoWkt() == "POINT (1 2)" + + with ogr.Open( + tmp_vsimem / "out/test/aux_geom=LINESTRING/part_0000000001.sqlite" + ) as ds: + lyr = ds.GetLayer(0) + assert lyr.GetLayerDefn().GetGeomFieldDefn(0).GetType() == ogr.wkbUnknown + assert lyr.GetLayerDefn().GetGeomFieldDefn(1).GetType() == ogr.wkbLineString + assert lyr.GetFeatureCount() == 1 + assert lyr.GetFeature(1).GetGeomFieldRef(0).ExportToIsoWkt() == "POINT (3 4)" + assert ( + lyr.GetFeature(1).GetGeomFieldRef(1).ExportToIsoWkt() + == "LINESTRING (5 6,7 8)" + ) diff --git a/doc/source/programs/gdal_vector_partition.rst b/doc/source/programs/gdal_vector_partition.rst index cc8dbbe1f273..8ac999e5bf25 100644 --- a/doc/source/programs/gdal_vector_partition.rst +++ b/doc/source/programs/gdal_vector_partition.rst @@ -21,8 +21,8 @@ Description ----------- :program:`gdal vector partition` dispatches features into different -files, depending on the values the feature take on a subset of fields specified -by the user. +files, depending on the values the feature take on a subset of attribute or +geometry fields specified by the user. Two partitioning schemes are available: @@ -63,10 +63,15 @@ Standard options Fields(s) on which to partition. [required] - Only fields of type String, Integer and Integer64 are allowed. + Only attribute fields of type String, Integer and Integer64 are allowed. The order into which fields are specified matter to determine the directory hierarchy. + Starting with GDAL 3.13, geometry field names can be specified (``OGR_GEOMETRY`` + being the generic name for the first geometry field). Partitioning on + geometry fields is done on the geometry type. This can be useful for file + formats where a single geometry type per layer is allowed. + .. include:: gdal_options/of_vector.rst .. include:: gdal_options/co_vector.rst @@ -157,3 +162,10 @@ Examples .. code-block:: bash $ gdal pipeline ! read world_cities.gpkg ! filter --where "pop > 1e6" ! partition out_directory --field country --format GPKG --scheme flat + +.. example:: + :title: Create multiple Shapefiles, one for each geometry type, from a GeoPackage file, by grouping together POLYGON/MULTIPOLYGON or LINESTRING/MULTILINESTRING. + + .. code-block:: bash + + $ gdal vector pipeline ! read input.gpkg ! set-geom-type --multi ! partition out_directory --scheme flat --field OGR_GEOMETRY --format "ESRI Shapefile" From a5497b46655cbfddd686cfbc6a99c19a2907716c Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 11 Nov 2025 23:04:52 +0100 Subject: [PATCH 2/2] Remove swig/python/gdal-utils/osgeo_utils/samples/ogr_dispatch.py, replaced by gdal vector partition --- doc/source/api/python/python_samples.rst | 2 +- .../osgeo_utils/samples/ogr_dispatch.py | 397 ------------------ 2 files changed, 1 insertion(+), 398 deletions(-) delete mode 100644 swig/python/gdal-utils/osgeo_utils/samples/ogr_dispatch.py diff --git a/doc/source/api/python/python_samples.rst b/doc/source/api/python/python_samples.rst index 45b8dfd7f6ac..8e74e9d520db 100644 --- a/doc/source/api/python/python_samples.rst +++ b/doc/source/api/python/python_samples.rst @@ -65,7 +65,6 @@ The scripts provide examples of both raster and vector usage of the GDAL Python this script tries to match features between the datasources, to decide whether to create a new feature, or to update an existing one. - :source_file:`swig/python/gdal-utils/osgeo_utils/samples/ogr_build_junction_table.py`: Create junction tables for layers coming from GML datasources that reference other objects in _href fields - - :source_file:`swig/python/gdal-utils/osgeo_utils/samples/ogr_dispatch.py`: Dispatch features into layers according to the value of some fields or the geometry type. - :source_file:`swig/python/gdal-utils/osgeo_utils/samples/rel.py`: Script to produce a shaded relief image from the elevation data. (similar functionality in gdaldem now) - :source_file:`swig/python/gdal-utils/osgeo_utils/samples/tigerpoly.py`: Script demonstrating how to assemble polygons from arcs in TIGER/Line datasource, writing results to a newly created shapefile. @@ -111,6 +110,7 @@ The following scripts have been replaced by equivalent functionality in GDAL, an - `gdal_cp.py`: Copy a virtual file. Equivalent functionality available in :ref:`gdal_dataset_copy`. - `gdal_vrtmerge.py`: Similar to gdal_merge, but produces a VRT file. Equivalent functionality available in :ref:`gdal_raster_mosaic`. - `gdal2grd.py`: Script to write out ASCII GRD rasters (used in Golden Software Surfer). from any source supported by GDAL. Equivalent functionality available in the :ref:`raster.gsag` driver. + - `ogr_dispatch.py`: Dispatch features into layers according to the value of some fields or the geometry type. Equivalent functionality available in :ref:`gdal_vector_partition`. .. note:: diff --git a/swig/python/gdal-utils/osgeo_utils/samples/ogr_dispatch.py b/swig/python/gdal-utils/osgeo_utils/samples/ogr_dispatch.py deleted file mode 100644 index ee0550182cb9..000000000000 --- a/swig/python/gdal-utils/osgeo_utils/samples/ogr_dispatch.py +++ /dev/null @@ -1,397 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -############################################################################### -# -# Project: GDAL/OGR samples -# Purpose: Dispatch features into layers according to the value of some fields -# or the geometry type. -# Author: Even Rouault -# -############################################################################### -# Copyright (c) 2013, Even Rouault -# -# SPDX-License-Identifier: MIT -############################################################################### - -import sys - -from osgeo import ogr, osr - - -def Usage(): - print("Usage: ogr_dispatch.py [-f format] -src name -dst name [-field field]+") - print(" [-25D_as_2D] [-multi_as_single]") - print(" [-remove_dispatch_fields] [-prefix_with_layer_name]") - print(" [-dsco KEY=VALUE]... [-lco KEY=VALUE]... [-a_srs srs_def]") - print( - " [-style_as_field] [-where restricted_where] [-gt n] [-quiet]" - ) - print("") - print("Dispatch features into layers according to the value of some fields or the") - print("geometry type.") - print("") - print("Arguments:") - print(" -f format: name of the driver to use to create the destination dataset") - print(" (default 'ESRI Shapefile').") - print(" -src name: name of the source dataset.") - print(" -dst name: name of the destination dataset (existing or to be created).") - print(" -field field: name of a field to use to dispatch features. May also") - print(" be 'OGR_GEOMETRY'. At least, one occurrence needed.") - print(" -25D_as_2D: for dispatching, consider 2.5D geometries as 2D.") - print(" -multi_as_single: for dispatching, consider MULTIPOLYGON as POLYGON and") - print(" MULTILINESTRING as LINESTRING.") - print( - " -remove_dispatch_fields: remove the dispatch fields from the target layer definitions." - ) - print( - " -prefix_with_layer_name: prefix the target layer name with the source layer name." - ) - print(" -dsco KEY=VALUE: dataset creation option. May be repeated.") - print(" -lco KEY=VALUE: layer creation option. May be repeated.") - print( - " -a_srs srs_def: assign a SRS to the target layers. Source layer SRS is otherwise used." - ) - print( - " -style_as_field: add a OGR_STYLE field with the content of the feature style string." - ) - print(" -where restricted_where: where clause to filter source features.") - print(" -gt n: group n features per transaction (default 200).") - print("") - print("Example :") - print(" ogr_dispatch.py -src in.dxf -dst out -field Layer -field OGR_GEOMETRY") - print("") - - return 2 - - -############################################################################### - - -def EQUAL(a, b): - return a.lower() == b.lower() - - -############################################################### -# wkbFlatten() - - -def wkbFlatten(x): - return x & (~ogr.wkb25DBit) - - -############################################################### - - -class Options: - def __init__(self): - self.lco = [] - self.dispatch_fields = [] - self.b25DAs2D = False - self.bMultiAsSingle = False - self.bRemoveDispatchFields = False - self.poOutputSRS = None - self.bNullifyOutputSRS = False - self.bPrefixWithLayerName = False - self.bStyleAsField = False - self.nGroupTransactions = 200 - self.bQuiet = False - - -############################################################### -# GeometryTypeToName() - - -def GeometryTypeToName(eGeomType, options): - - if options.b25DAs2D: - eGeomType = wkbFlatten(eGeomType) - - if eGeomType == ogr.wkbPoint: - return "POINT" - if eGeomType == ogr.wkbLineString: - return "LINESTRING" - if eGeomType == ogr.wkbPolygon: - return "POLYGON" - if eGeomType == ogr.wkbMultiPoint: - return "MULTIPOINT" - if eGeomType == ogr.wkbMultiLineString: - return "LINESTRING" if options.bMultiAsSingle else "MULTILINESTRING" - if eGeomType == ogr.wkbMultiPolygon: - return "POLYGON" if options.bMultiAsSingle else "MULTIPOLYGON" - if eGeomType == ogr.wkbGeometryCollection: - return "GEOMETRYCOLLECTION" - if eGeomType == ogr.wkbPoint25D: - return "POINT25D" - if eGeomType == ogr.wkbLineString25D: - return "LINESTRING25D" - if eGeomType == ogr.wkbPolygon25D: - return "POLYGON25D" - if eGeomType == ogr.wkbMultiPoint25D: - return "MULTIPOINT25D" - if eGeomType == ogr.wkbMultiLineString25D: - return "LINESTRING25D" if options.bMultiAsSingle else "MULTILINESTRING25D" - if eGeomType == ogr.wkbMultiPolygon25D: - return "POLYGON25D" if options.bMultiAsSingle else "MULTIPOLYGON25D" - if eGeomType == ogr.wkbGeometryCollection25D: - return "GEOMETRYCOLLECTION25D" - # Shouldn't happen - return "UNKNOWN" - - -############################################################### -# get_out_lyr_name() - - -def get_out_lyr_name(src_lyr, feat, options): - if options.bPrefixWithLayerName: - out_lyr_name = src_lyr.GetName() - else: - out_lyr_name = "" - - for dispatch_field in options.dispatch_fields: - if EQUAL(dispatch_field, "OGR_GEOMETRY"): - geom = feat.GetGeometryRef() - if geom is None: - val = "NONE" - else: - val = GeometryTypeToName(geom.GetGeometryType(), options) - else: - if feat.IsFieldSet(dispatch_field): - val = feat.GetFieldAsString(dispatch_field) - else: - val = "null" - - if out_lyr_name == "": - out_lyr_name = val - else: - out_lyr_name = out_lyr_name + "_" + val - - return out_lyr_name - - -############################################################### -# get_layer_and_map() - - -def get_layer_and_map(out_lyr_name, src_lyr, dst_ds, layerMap, geom_type, options): - - if out_lyr_name not in layerMap: - if options.poOutputSRS is not None or options.bNullifyOutputSRS: - srs = options.poOutputSRS - else: - srs = src_lyr.GetSpatialRef() - out_lyr = dst_ds.GetLayerByName(out_lyr_name) - if out_lyr is None: - if not options.bQuiet: - print("Creating layer %s" % out_lyr_name) - out_lyr = dst_ds.CreateLayer( - out_lyr_name, srs=srs, geom_type=geom_type, options=options.lco - ) - if out_lyr is None: - return 1 - src_field_count = src_lyr.GetLayerDefn().GetFieldCount() - panMap = [-1 for i in range(src_field_count)] - for i in range(src_field_count): - field_defn = src_lyr.GetLayerDefn().GetFieldDefn(i) - if options.bRemoveDispatchFields: - found = False - for dispatch_field in options.dispatch_fields: - if EQUAL(dispatch_field, field_defn.GetName()): - found = True - break - if found: - continue - idx = out_lyr.GetLayerDefn().GetFieldIndex(field_defn.GetName()) - if idx >= 0: - panMap[i] = idx - elif out_lyr.CreateField(field_defn) == 0: - panMap[i] = out_lyr.GetLayerDefn().GetFieldCount() - 1 - if options.bStyleAsField: - out_lyr.CreateField(ogr.FieldDefn("OGR_STYLE", ogr.OFTString)) - else: - panMap = None - layerMap[out_lyr_name] = [out_lyr, panMap] - else: - out_lyr = layerMap[out_lyr_name][0] - panMap = layerMap[out_lyr_name][1] - - return (out_lyr, panMap) - - -############################################################### -# convert_layer() - - -def convert_layer(src_lyr, dst_ds, layerMap, options): - - current_out_lyr = None - nFeaturesInTransaction = 0 - - for feat in src_lyr: - - out_lyr_name = get_out_lyr_name(src_lyr, feat, options) - - geom = feat.GetGeometryRef() - if geom is not None: - geom_type = geom.GetGeometryType() - else: - geom_type = ogr.wkbUnknown - - (out_lyr, panMap) = get_layer_and_map( - out_lyr_name, src_lyr, dst_ds, layerMap, geom_type, options - ) - - if options.nGroupTransactions > 0: - if out_lyr != current_out_lyr: - if current_out_lyr is not None: - current_out_lyr.CommitTransaction() - current_out_lyr = out_lyr - current_out_lyr.StartTransaction() - nFeaturesInTransaction = 0 - - if nFeaturesInTransaction == options.nGroupTransactions: - current_out_lyr.CommitTransaction() - current_out_lyr.StartTransaction() - else: - current_out_lyr = out_lyr - - out_feat = ogr.Feature(out_lyr.GetLayerDefn()) - if panMap is not None: - out_feat.SetFromWithMap(feat, 1, panMap) - else: - out_feat.SetFrom(feat) - if options.bStyleAsField: - style = feat.GetStyleString() - if style is not None: - out_feat.SetField("OGR_STYLE", style) - out_lyr.CreateFeature(out_feat) - - nFeaturesInTransaction = nFeaturesInTransaction + 1 - - if options.nGroupTransactions > 0 and current_out_lyr is not None: - current_out_lyr.CommitTransaction() - - return 1 - - -############################################################### -# ogr_dispatch() - - -def ogr_dispatch(argv, progress=None, progress_arg=None): - # pylint: disable=unused-argument - src_filename = None - dst_filename = None - driver_name = "ESRI Shapefile" - options = Options() - lco = [] - dsco = [] - pszWHERE = None - - if not argv: - return Usage() - - i = 0 - while i < len(argv): - arg = argv[i] - if EQUAL(arg, "-src") and i + 1 < len(argv): - i = i + 1 - src_filename = argv[i] - elif EQUAL(arg, "-dst") and i + 1 < len(argv): - i = i + 1 - dst_filename = argv[i] - elif EQUAL(arg, "-f") and i + 1 < len(argv): - i = i + 1 - driver_name = argv[i] - - elif EQUAL(arg, "-a_srs") and i + 1 < len(argv): - i = i + 1 - pszOutputSRSDef = argv[i] - if EQUAL(pszOutputSRSDef, "NULL") or EQUAL(pszOutputSRSDef, "NONE"): - options.bNullifyOutputSRS = True - else: - options.poOutputSRS = osr.SpatialReference() - if options.poOutputSRS.SetFromUserInput(pszOutputSRSDef) != 0: - print("Failed to process SRS definition: %s" % pszOutputSRSDef) - return 1 - elif EQUAL(arg, "-dsco") and i + 1 < len(argv): - i = i + 1 - dsco.append(argv[i]) - elif EQUAL(arg, "-lco") and i + 1 < len(argv): - i = i + 1 - lco.append(argv[i]) - elif EQUAL(arg, "-field") and i + 1 < len(argv): - i = i + 1 - options.dispatch_fields.append(argv[i]) - elif EQUAL(arg, "-25D_as_2D"): - options.b25DAs2D = True - elif EQUAL(arg, "-multi_as_single"): - options.bMultiAsSingle = True - elif EQUAL(arg, "-remove_dispatch_fields"): - options.bRemoveDispatchFields = True - elif EQUAL(arg, "-prefix_with_layer_name"): - options.bPrefixWithLayerName = True - elif EQUAL(arg, "-style_as_field"): - options.bStyleAsField = True - elif (EQUAL(arg, "-tg") or EQUAL(arg, "-gt")) and i + 1 < len(argv): - i = i + 1 - options.nGroupTransactions = int(argv[i]) - elif EQUAL(arg, "-where") and i + 1 < len(argv): - i = i + 1 - pszWHERE = argv[i] - elif EQUAL(arg, "-quiet"): - options.bQuiet = True - else: - print("Unrecognized argument : %s" % arg) - return Usage() - i = i + 1 - - if src_filename is None: - print("Missing -src") - return 1 - - if dst_filename is None: - print("Missing -dst") - return 1 - - if not options.dispatch_fields: - print("Missing -dispatch_field") - return 1 - - src_ds = ogr.Open(src_filename) - if src_ds is None: - print("Cannot open source datasource %s" % src_filename) - return 1 - - dst_ds = ogr.Open(dst_filename, update=1) - if dst_ds is not None: - if dsco: - print("-dsco should not be specified for an existing datasource") - return 1 - else: - dst_ds = ogr.GetDriverByName(driver_name).CreateDataSource( - dst_filename, options=dsco - ) - if dst_ds is None: - print("Cannot open or create target datasource %s" % dst_filename) - return 1 - - layerMap = {} - - for src_lyr in src_ds: - if pszWHERE is not None: - src_lyr.SetAttributeFilter(pszWHERE) - ret = convert_layer(src_lyr, dst_ds, layerMap, options) - if ret != 0: - return ret - - return 0 - - -def main(argv=sys.argv): - argv = ogr.GeneralCmdLineProcessor(argv) - return ogr_dispatch(argv[1:]) - - -if __name__ == "__main__": - sys.exit(main(sys.argv))