Skip to content

Commit e416ffb

Browse files
authored
Merge pull request #13388 from rouault/gdal_vector_partition_geometry
gdal vector partition: also accept geometry fields, to partition on geometry type
2 parents d122f15 + a5497b4 commit e416ffb

File tree

5 files changed

+296
-453
lines changed

5 files changed

+296
-453
lines changed

apps/gdalalg_vector_partition.cpp

Lines changed: 134 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ GDALVectorPartitionAlgorithm::GDALVectorPartitionAlgorithm(bool standaloneStep)
8282
AddCreationOptionsArg(&m_creationOptions);
8383
AddLayerCreationOptionsArg(&m_layerCreationOptions);
8484

85-
AddArg("field", 0, _("Field(s) on which to partition"), &m_fields)
85+
AddArg("field", 0, _("Attribute or geomery field(s) on which to partition"),
86+
&m_fields)
8687
.SetRequired();
8788
AddArg("scheme", 0, _("Partitioning scheme"), &m_scheme)
8889
.SetChoices(SCHEME_HIVE, SCHEME_FLAT)
@@ -363,11 +364,13 @@ struct Layer
363364
static bool GetCurrentOutputLayer(
364365
GDALAlgorithm *const alg, const OGRFeatureDefn *const poSrcFeatureDefn,
365366
OGRLayer *const poSrcLayer, const std::string &osKey,
367+
const std::vector<OGRwkbGeometryType> &aeGeomTypes,
366368
const std::string &osLayerDir, const std::string &osScheme,
367369
const std::string &osPatternIn, bool partDigitLeadingZeroes,
368370
size_t partDigitCount, const int featureLimit, const GIntBig maxFileSize,
369371
const bool omitPartitionedFields,
370-
const std::vector<bool> &abPartitionedFields, const char *pszExtension,
372+
const std::vector<bool> &abPartitionedFields,
373+
const std::vector<bool> &abPartitionedGeomFields, const char *pszExtension,
371374
GDALDriver *const poOutDriver, const CPLStringList &datasetCreationOptions,
372375
const CPLStringList &layerCreationOptions,
373376
const OGRFeatureDefn *const poFeatureDefnWithoutPartitionedFields,
@@ -662,11 +665,22 @@ static bool GetCurrentOutputLayer(
662665
pszSrcFIDColumn);
663666
}
664667

668+
std::unique_ptr<OGRGeomFieldDefn> poFirstGeomFieldDefn;
669+
if (poSrcFeatureDefn->GetGeomFieldCount())
670+
{
671+
poFirstGeomFieldDefn = std::make_unique<OGRGeomFieldDefn>(
672+
*poSrcFeatureDefn->GetGeomFieldDefn(0));
673+
if (abPartitionedGeomFields[0])
674+
{
675+
if (aeGeomTypes[0] == wkbNone)
676+
poFirstGeomFieldDefn.reset();
677+
else
678+
whileUnsealing(poFirstGeomFieldDefn.get())
679+
->SetType(aeGeomTypes[0]);
680+
}
681+
}
665682
auto poLayer = outputLayer->poDS->CreateLayer(
666-
poSrcLayer->GetDescription(),
667-
poSrcFeatureDefn->GetGeomFieldCount()
668-
? poSrcFeatureDefn->GetGeomFieldDefn(0)
669-
: nullptr,
683+
poSrcLayer->GetDescription(), poFirstGeomFieldDefn.get(),
670684
modLayerCreationOptions.List());
671685
if (!poLayer)
672686
{
@@ -687,22 +701,29 @@ static bool GetCurrentOutputLayer(
687701
return false;
688702
}
689703
}
690-
bool bFirst = true;
704+
int iGeomField = -1;
691705
for (const auto *poGeomFieldDefn :
692706
poSrcFeatureDefn->GetGeomFields())
693707
{
694-
if (!bFirst)
708+
++iGeomField;
709+
if (iGeomField > 0)
695710
{
696-
if (poLayer->CreateGeomField(poGeomFieldDefn) !=
697-
OGRERR_NONE)
711+
OGRGeomFieldDefn oClone(poGeomFieldDefn);
712+
if (abPartitionedGeomFields[iGeomField])
713+
{
714+
if (aeGeomTypes[iGeomField] == wkbNone)
715+
continue;
716+
whileUnsealing(&oClone)->SetType(
717+
aeGeomTypes[iGeomField]);
718+
}
719+
if (poLayer->CreateGeomField(&oClone) != OGRERR_NONE)
698720
{
699721
alg->ReportError(CE_Failure, CPLE_AppDefined,
700722
"Cannot create geometry field '%s'",
701723
poGeomFieldDefn->GetNameRef());
702724
return false;
703725
}
704726
}
705-
bFirst = false;
706727
}
707728

708729
if (bUseTransactions)
@@ -949,40 +970,71 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt)
949970
struct Field
950971
{
951972
int nIdx{};
973+
bool bIsGeom = false;
952974
std::string encodedFieldName{};
953975
OGRFieldType eType{};
954976
};
955977

956978
std::vector<Field> asFields;
957979
std::vector<bool> abPartitionedFields(poSrcFeatureDefn->GetFieldCount(),
958980
false);
981+
std::vector<bool> abPartitionedGeomFields(
982+
poSrcFeatureDefn->GetGeomFieldCount(), false);
959983
for (const std::string &fieldName : m_fields)
960984
{
961-
const int nIdx = poSrcFeatureDefn->GetFieldIndex(fieldName.c_str());
985+
int nIdx = poSrcFeatureDefn->GetFieldIndex(fieldName.c_str());
962986
if (nIdx < 0)
963987
{
964-
ReportError(CE_Failure, CPLE_AppDefined,
965-
"Cannot find field '%s' in layer '%s'",
966-
fieldName.c_str(), poSrcLayer->GetDescription());
967-
return false;
988+
if (fieldName == "OGR_GEOMETRY" &&
989+
poSrcFeatureDefn->GetGeomFieldCount() > 0)
990+
nIdx = 0;
991+
else
992+
nIdx =
993+
poSrcFeatureDefn->GetGeomFieldIndex(fieldName.c_str());
994+
if (nIdx < 0)
995+
{
996+
ReportError(CE_Failure, CPLE_AppDefined,
997+
"Cannot find field '%s' in layer '%s'",
998+
fieldName.c_str(),
999+
poSrcLayer->GetDescription());
1000+
return false;
1001+
}
1002+
else
1003+
{
1004+
abPartitionedGeomFields[nIdx] = true;
1005+
Field f;
1006+
f.nIdx = nIdx;
1007+
f.bIsGeom = true;
1008+
if (fieldName.empty())
1009+
f.encodedFieldName = "OGR_GEOMETRY";
1010+
else
1011+
f.encodedFieldName = PercentEncode(fieldName);
1012+
asFields.push_back(std::move(f));
1013+
}
9681014
}
969-
const auto eType = poSrcFeatureDefn->GetFieldDefn(nIdx)->GetType();
970-
if (eType != OFTString && eType != OFTInteger &&
971-
eType != OFTInteger64)
1015+
else
9721016
{
973-
ReportError(
974-
CE_Failure, CPLE_NotSupported,
975-
"Field '%s' not valid for partitioning. Only fields of "
976-
"type String, Integer or Integer64 are accepted",
977-
fieldName.c_str());
978-
return false;
1017+
const auto eType =
1018+
poSrcFeatureDefn->GetFieldDefn(nIdx)->GetType();
1019+
if (eType != OFTString && eType != OFTInteger &&
1020+
eType != OFTInteger64)
1021+
{
1022+
ReportError(
1023+
CE_Failure, CPLE_NotSupported,
1024+
"Field '%s' not valid for partitioning. Only fields of "
1025+
"type String, Integer or Integer64, or geometry fields,"
1026+
" are accepted",
1027+
fieldName.c_str());
1028+
return false;
1029+
}
1030+
abPartitionedFields[nIdx] = true;
1031+
Field f;
1032+
f.nIdx = nIdx;
1033+
f.bIsGeom = false;
1034+
f.encodedFieldName = PercentEncode(fieldName);
1035+
f.eType = eType;
1036+
asFields.push_back(std::move(f));
9791037
}
980-
abPartitionedFields[nIdx] = true;
981-
Field f;
982-
f.nIdx = nIdx;
983-
f.encodedFieldName = PercentEncode(fieldName);
984-
f.eType = eType;
985-
asFields.push_back(std::move(f));
9861038
}
9871039

9881040
std::vector<OGRFieldType> aeSrcFieldTypes;
@@ -996,12 +1048,16 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt)
9961048
std::vector<int> anMapForSetFrom;
9971049
if (m_omitPartitionedFields)
9981050
{
999-
for (const std::string &fieldName : m_fields)
1051+
// Sort fields by descending index (so we can delete them easily)
1052+
std::vector<Field> sortedFields(asFields);
1053+
std::sort(sortedFields.begin(), sortedFields.end(),
1054+
[](const Field &a, const Field &b)
1055+
{ return a.nIdx > b.nIdx; });
1056+
for (const auto &field : sortedFields)
10001057
{
1001-
const int nIdx =
1002-
poFeatureDefnWithoutPartitionedFields->GetFieldIndex(
1003-
fieldName.c_str());
1004-
poFeatureDefnWithoutPartitionedFields->DeleteFieldDefn(nIdx);
1058+
if (!field.bIsGeom)
1059+
poFeatureDefnWithoutPartitionedFields->DeleteFieldDefn(
1060+
field.nIdx);
10051061
}
10061062
anMapForSetFrom =
10071063
poFeatureDefnWithoutPartitionedFields->ComputeMapForSetFrom(
@@ -1025,18 +1081,40 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt)
10251081
osAttrQueryString = pszAttrQueryString;
10261082

10271083
std::string osKeyTmp;
1084+
std::vector<OGRwkbGeometryType> aeGeomTypesTmp;
10281085
const auto BuildKey =
1029-
[&osKeyTmp](const std::vector<Field> &fields,
1030-
const OGRFeature *poFeature) -> const std::string &
1086+
[&osKeyTmp, &aeGeomTypesTmp](const std::vector<Field> &fields,
1087+
const OGRFeature *poFeature)
1088+
-> std::pair<const std::string &,
1089+
const std::vector<OGRwkbGeometryType> &>
10311090
{
10321091
osKeyTmp.clear();
1092+
aeGeomTypesTmp.resize(poFeature->GetDefnRef()->GetGeomFieldCount());
10331093
for (const auto &field : fields)
10341094
{
10351095
if (!osKeyTmp.empty())
10361096
osKeyTmp += '/';
10371097
osKeyTmp += field.encodedFieldName;
10381098
osKeyTmp += '=';
1039-
if (poFeature->IsFieldSetAndNotNull(field.nIdx))
1099+
if (field.bIsGeom)
1100+
{
1101+
const auto poGeom = poFeature->GetGeomFieldRef(field.nIdx);
1102+
if (poGeom)
1103+
{
1104+
aeGeomTypesTmp[field.nIdx] = poGeom->getGeometryType();
1105+
osKeyTmp += poGeom->getGeometryName();
1106+
if (poGeom->Is3D())
1107+
osKeyTmp += 'Z';
1108+
if (poGeom->IsMeasured())
1109+
osKeyTmp += 'M';
1110+
}
1111+
else
1112+
{
1113+
aeGeomTypesTmp[field.nIdx] = wkbNone;
1114+
osKeyTmp += NULL_MARKER;
1115+
}
1116+
}
1117+
else if (poFeature->IsFieldSetAndNotNull(field.nIdx))
10401118
{
10411119
if (field.eType == OFTString)
10421120
{
@@ -1062,7 +1140,7 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt)
10621140
osKeyTmp += NULL_MARKER;
10631141
}
10641142
}
1065-
return osKeyTmp;
1143+
return {osKeyTmp, aeGeomTypesTmp};
10661144
};
10671145

10681146
std::set<std::string> oSetKeys;
@@ -1072,7 +1150,7 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt)
10721150
"GDAL",
10731151
"First pass to determine all distinct partitioned values...");
10741152

1075-
if (asFields.size() == 1)
1153+
if (asFields.size() == 1 && !asFields[0].bIsGeom)
10761154
{
10771155
std::string osSQL = "SELECT DISTINCT \"";
10781156
osSQL += CPLString(m_fields[0]).replaceAll('"', "\"\"");
@@ -1093,8 +1171,8 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt)
10931171
asSingleField[0].nIdx = 0;
10941172
for (auto &poFeature : *poSQLLayer)
10951173
{
1096-
const std::string &osKey =
1097-
BuildKey(asSingleField, poFeature.get());
1174+
const auto sPair = BuildKey(asFields, poFeature.get());
1175+
const std::string &osKey = sPair.first;
10981176
oSetKeys.insert(osKey);
10991177
#ifdef DEBUG_VERBOSE
11001178
CPLDebug("GDAL", "Found %s", osKey.c_str());
@@ -1111,8 +1189,8 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt)
11111189
{
11121190
for (auto &poFeature : *poSrcLayer)
11131191
{
1114-
const std::string &osKey =
1115-
BuildKey(asFields, poFeature.get());
1192+
const auto sPair = BuildKey(asFields, poFeature.get());
1193+
const std::string &osKey = sPair.first;
11161194
if (oSetKeys.insert(osKey).second)
11171195
{
11181196
#ifdef DEBUG_VERBOSE
@@ -1154,7 +1232,9 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt)
11541232

11551233
for (auto &poFeature : *poSrcLayer)
11561234
{
1157-
const std::string &osKey = BuildKey(asFields, poFeature.get());
1235+
const auto sPair = BuildKey(asFields, poFeature.get());
1236+
const std::string &osKey = sPair.first;
1237+
const auto &aeGeomTypes = sPair.second;
11581238

11591239
if (!oSetKeysAllowedInThisPass.empty() &&
11601240
!cpl::contains(oSetKeysAllowedInThisPass, osKey))
@@ -1163,10 +1243,11 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt)
11631243
}
11641244

11651245
if (!GetCurrentOutputLayer(
1166-
this, poSrcFeatureDefn, poSrcLayer, osKey, osLayerDir,
1167-
m_scheme, m_pattern, m_partDigitLeadingZeroes,
1168-
m_partDigitCount, m_featureLimit, m_maxFileSize,
1169-
m_omitPartitionedFields, abPartitionedFields,
1246+
this, poSrcFeatureDefn, poSrcLayer, osKey, aeGeomTypes,
1247+
osLayerDir, m_scheme, m_pattern,
1248+
m_partDigitLeadingZeroes, m_partDigitCount,
1249+
m_featureLimit, m_maxFileSize, m_omitPartitionedFields,
1250+
abPartitionedFields, abPartitionedGeomFields,
11701251
pszExtension, poOutDriver, datasetCreationOptions,
11711252
layerCreationOptions,
11721253
poFeatureDefnWithoutPartitionedFields.get(),
@@ -1183,7 +1264,9 @@ bool GDALVectorPartitionAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt)
11831264
poFeature->SetFID(OGRNullFID);
11841265

11851266
OGRErr eErr;
1186-
if (m_omitPartitionedFields)
1267+
if (m_omitPartitionedFields ||
1268+
std::find(aeGeomTypes.begin(), aeGeomTypes.end(),
1269+
wkbNone) != aeGeomTypes.end())
11871270
{
11881271
OGRFeature oFeat(outputLayer->poLayer->GetLayerDefn());
11891272
oFeat.SetFrom(poFeature.get(), anMapForSetFrom.data());

0 commit comments

Comments
 (0)