Skip to content

Commit 027d80b

Browse files
authored
Remove redundant locations when constructing access policies (#2149)
Iceberg tables can technically store data across any number of paths, but Polaris currently uses 3 different locations for credential vending: 1. The table's base location 2. The table's `write.data.path`, if set 3. The table's `write.metadata.path`, if set This was intended to capture scenarios where e.g. (2) is not a child path of (1), so that the vended credentials can still be valid for reading the entire table. However, there are systems that seem to always set (2) and (3), such as: 1. `s3:/my-bucket/base/iceberg` 2. `s3:/my-bucket/base/iceberg/data` 3. `s3:/my-bucket/base/iceberg/metadata` In such cases the extra paths (e.g. extra resources in the AWS Policy) are redundant. In one such case, these redundant paths caused the policy to exceed the maximum allowable 2048 characters. This PR removes redundant paths -- those that are the child of another path -- from the list of accessible locations tracked for a given table and does some slight refactoring to consolidate the logic for extracting these paths from a TableMetadata.
1 parent 1277eff commit 027d80b

File tree

5 files changed

+117
-83
lines changed

5 files changed

+117
-83
lines changed

polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,14 @@
3232
import java.util.Collections;
3333
import java.util.List;
3434
import java.util.Locale;
35-
import java.util.Map;
36-
import java.util.Objects;
3735
import java.util.Optional;
38-
import java.util.stream.Collectors;
39-
import java.util.stream.Stream;
36+
import java.util.Set;
4037
import org.apache.polaris.core.PolarisCallContext;
4138
import org.apache.polaris.core.admin.model.Catalog;
4239
import org.apache.polaris.core.config.FeatureConfiguration;
4340
import org.apache.polaris.core.entity.CatalogEntity;
4441
import org.apache.polaris.core.entity.PolarisEntity;
4542
import org.apache.polaris.core.entity.PolarisEntityConstants;
46-
import org.apache.polaris.core.entity.table.IcebergTableLikeEntity;
4743
import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo;
4844
import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo;
4945
import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo;
@@ -162,30 +158,19 @@ public static Optional<PolarisStorageConfigurationInfo> forEntityPath(
162158
entityPathReversed.get(0).getName());
163159

164160
// TODO: figure out the purpose of adding `userSpecifiedWriteLocations`
165-
List<String> locs =
166-
userSpecifiedWriteLocations(entityPathReversed.get(0).getPropertiesAsMap());
161+
Set<String> locations =
162+
StorageUtil.getLocationsAllowedToBeAccessed(
163+
null, entityPathReversed.get(0).getPropertiesAsMap());
167164
return new StorageConfigurationOverride(
168165
configInfo,
169166
ImmutableList.<String>builder()
170167
.addAll(configInfo.getAllowedLocations())
171-
.addAll(locs)
168+
.addAll(locations)
172169
.build());
173170
}
174171
});
175172
}
176173

177-
private static List<String> userSpecifiedWriteLocations(Map<String, String> properties) {
178-
return Optional.ofNullable(properties)
179-
.map(
180-
p ->
181-
Stream.of(
182-
p.get(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY),
183-
p.get(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_METADATA_LOCATION_KEY))
184-
.filter(Objects::nonNull)
185-
.collect(Collectors.toList()))
186-
.orElse(List.of());
187-
}
188-
189174
private static @Nonnull Optional<PolarisEntity> findStorageInfoFromHierarchy(
190175
List<PolarisEntity> entityPath) {
191176
for (int i = entityPath.size() - 1; i >= 0; i--) {

polaris-core/src/main/java/org/apache/polaris/core/storage/StorageLocation.java

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,11 @@
2727
import org.slf4j.Logger;
2828
import org.slf4j.LoggerFactory;
2929

30-
/** An abstraction over a storage location */
30+
/**
31+
* An abstraction over a storage location. This type may be used for a generic string-based
32+
* location, but child classes might be used where behavior specific to a storage provider is
33+
* needed.
34+
*/
3135
public class StorageLocation {
3236
private static final Logger LOGGER = LoggerFactory.getLogger(StorageLocation.class);
3337
private static final Pattern SCHEME_PATTERN = Pattern.compile("^(.+?):(.+)");
@@ -36,7 +40,7 @@ public class StorageLocation {
3640

3741
private final String location;
3842

39-
/** Create a StorageLocation from a String path */
43+
/** Create a StorageLocation of the appropriate type from a String path */
4044
public static StorageLocation of(String location) {
4145
// TODO implement StorageLocation for all supported file systems and add isValidLocation
4246
if (AzureLocation.isAzureLocation(location)) {
@@ -61,7 +65,7 @@ protected StorageLocation(@Nonnull String location) {
6165
}
6266

6367
/** If a path doesn't end in `/`, this will add one */
64-
protected final String ensureTrailingSlash(String location) {
68+
protected static String ensureTrailingSlash(String location) {
6569
if (location == null || location.endsWith("/")) {
6670
return location;
6771
} else {
@@ -70,7 +74,7 @@ protected final String ensureTrailingSlash(String location) {
7074
}
7175

7276
/** If a path doesn't start with `/`, this will add one */
73-
protected final @Nonnull String ensureLeadingSlash(@Nonnull String location) {
77+
protected static @Nonnull String ensureLeadingSlash(@Nonnull String location) {
7478
if (location.startsWith("/")) {
7579
return location;
7680
} else {
@@ -83,6 +87,12 @@ public int hashCode() {
8387
return location.hashCode();
8488
}
8589

90+
/**
91+
* Checks if two StorageLocations represent the same physical location.
92+
*
93+
* <p>Child classes should override this behavior if a check other than basic string-matching
94+
* should be done.
95+
*/
8696
@Override
8797
public boolean equals(Object obj) {
8898
if (obj instanceof StorageLocation) {
@@ -99,7 +109,10 @@ public String toString() {
99109

100110
/**
101111
* Returns true if this StorageLocation's location string starts with the other StorageLocation's
102-
* location string
112+
* location string.
113+
*
114+
* <p>Child classes should override this behavior if a check other than basic string-matching
115+
* should be done.
103116
*/
104117
public boolean isChildOf(StorageLocation potentialParent) {
105118
if (this.location == null || potentialParent.location == null) {

polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@
2020

2121
import jakarta.annotation.Nonnull;
2222
import java.net.URI;
23+
import java.util.HashSet;
24+
import java.util.Map;
25+
import java.util.Set;
26+
import org.apache.iceberg.TableMetadata;
27+
import org.apache.iceberg.view.ViewMetadata;
28+
import org.apache.polaris.core.entity.table.IcebergTableLikeEntity;
2329

2430
public class StorageUtil {
2531
/**
@@ -62,4 +68,44 @@ public class StorageUtil {
6268
public static @Nonnull String getBucket(URI uri) {
6369
return uri.getAuthority();
6470
}
71+
72+
/** Given a TableMetadata, extracts the locations where the table's [meta]data might be found. */
73+
public static @Nonnull Set<String> getLocationsAllowedToBeAccessed(TableMetadata tableMetadata) {
74+
return getLocationsAllowedToBeAccessed(tableMetadata.location(), tableMetadata.properties());
75+
}
76+
77+
/** Given a baseLocation and entity (table?) properties, extracts the relevant locations */
78+
public static @Nonnull Set<String> getLocationsAllowedToBeAccessed(
79+
String baseLocation, Map<String, String> properties) {
80+
Set<String> locations = new HashSet<>();
81+
locations.add(baseLocation);
82+
locations.add(properties.get(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY));
83+
locations.add(
84+
properties.get(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_METADATA_LOCATION_KEY));
85+
locations.remove(null);
86+
return removeRedundantLocations(locations);
87+
}
88+
89+
/** Given a ViewMetadata, extracts the locations where the view's [meta]data might be found. */
90+
public static @Nonnull Set<String> getLocationsAllowedToBeAccessed(ViewMetadata viewMetadata) {
91+
return Set.of(viewMetadata.location());
92+
}
93+
94+
/** Removes "redundant" locations, so {/a/b/, /a/b/c, /a/b/d} will be reduced to just {/a/b/} */
95+
private static @Nonnull Set<String> removeRedundantLocations(Set<String> locationStrings) {
96+
HashSet<String> result = new HashSet<>(locationStrings);
97+
98+
for (String potentialParent : locationStrings) {
99+
StorageLocation potentialParentLocation = StorageLocation.of(potentialParent);
100+
for (String potentialChild : locationStrings) {
101+
if (!potentialParent.equals(potentialChild)) {
102+
StorageLocation potentialChildLocation = StorageLocation.of(potentialChild);
103+
if (potentialChildLocation.isChildOf(potentialParentLocation)) {
104+
result.remove(potentialChild);
105+
}
106+
}
107+
}
108+
}
109+
return result;
110+
}
65111
}

polaris-core/src/test/java/org/apache/polaris/core/storage/StorageUtilTest.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
*/
1919
package org.apache.polaris.core.storage;
2020

21+
import java.util.Map;
22+
import org.apache.polaris.core.entity.table.IcebergTableLikeEntity;
2123
import org.assertj.core.api.Assertions;
2224
import org.junit.jupiter.api.Test;
2325
import org.junit.jupiter.params.ParameterizedTest;
@@ -66,4 +68,44 @@ public void testAuthorityWithPort() {
6668
Assertions.assertThat(StorageUtil.getBucket("s3://bucket:8080/path/file.txt"))
6769
.isEqualTo("bucket:8080");
6870
}
71+
72+
@Test
73+
public void getLocationsAllowedToBeAccessed() {
74+
Assertions.assertThat(StorageUtil.getLocationsAllowedToBeAccessed(null, Map.of())).isEmpty();
75+
Assertions.assertThat(StorageUtil.getLocationsAllowedToBeAccessed("", Map.of())).isNotEmpty();
76+
Assertions.assertThat(StorageUtil.getLocationsAllowedToBeAccessed("/foo/", Map.of()))
77+
.contains("/foo/");
78+
Assertions.assertThat(
79+
StorageUtil.getLocationsAllowedToBeAccessed(
80+
"/foo/",
81+
Map.of(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY, "/foo/")))
82+
.contains("/foo/");
83+
Assertions.assertThat(
84+
StorageUtil.getLocationsAllowedToBeAccessed(
85+
"/foo/",
86+
Map.of(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY, "/bar/")))
87+
.contains("/foo/", "/bar/");
88+
Assertions.assertThat(
89+
StorageUtil.getLocationsAllowedToBeAccessed(
90+
"/foo/",
91+
Map.of(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY, "/foo/bar/")))
92+
.contains("/foo/");
93+
Assertions.assertThat(
94+
StorageUtil.getLocationsAllowedToBeAccessed(
95+
"/foo/bar/",
96+
Map.of(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY, "/foo/")))
97+
.contains("/foo/");
98+
Assertions.assertThat(
99+
StorageUtil.getLocationsAllowedToBeAccessed(
100+
"/foo/bar/",
101+
Map.of(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_METADATA_LOCATION_KEY, "/foo/")))
102+
.contains("/foo/");
103+
Assertions.assertThat(
104+
StorageUtil.getLocationsAllowedToBeAccessed(
105+
"/1/",
106+
Map.of(
107+
IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY, "/2/",
108+
IcebergTableLikeEntity.USER_SPECIFIED_WRITE_METADATA_LOCATION_KEY, "/3/")))
109+
.contains("/1/", "/2/", "/3/");
110+
}
69111
}

runtime/service/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalog.java

Lines changed: 6 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
import java.util.Arrays;
3636
import java.util.Collections;
3737
import java.util.HashMap;
38-
import java.util.HashSet;
3938
import java.util.List;
4039
import java.util.Locale;
4140
import java.util.Map;
@@ -126,6 +125,7 @@
126125
import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo;
127126
import org.apache.polaris.core.storage.PolarisStorageIntegration;
128127
import org.apache.polaris.core.storage.StorageLocation;
128+
import org.apache.polaris.core.storage.StorageUtil;
129129
import org.apache.polaris.core.storage.cache.StorageCredentialCache;
130130
import org.apache.polaris.service.catalog.SupportsNotifications;
131131
import org.apache.polaris.service.catalog.common.LocationUtils;
@@ -379,32 +379,6 @@ private String defaultNamespaceLocation(Namespace namespace) {
379379
}
380380
}
381381

382-
private Set<String> getLocationsAllowedToBeAccessed(TableMetadata tableMetadata) {
383-
Set<String> locations = new HashSet<>();
384-
locations.add(tableMetadata.location());
385-
if (tableMetadata
386-
.properties()
387-
.containsKey(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY)) {
388-
locations.add(
389-
tableMetadata
390-
.properties()
391-
.get(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY));
392-
}
393-
if (tableMetadata
394-
.properties()
395-
.containsKey(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_METADATA_LOCATION_KEY)) {
396-
locations.add(
397-
tableMetadata
398-
.properties()
399-
.get(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_METADATA_LOCATION_KEY));
400-
}
401-
return locations;
402-
}
403-
404-
private Set<String> getLocationsAllowedToBeAccessed(ViewMetadata viewMetadata) {
405-
return Set.of(viewMetadata.location());
406-
}
407-
408382
@Override
409383
public boolean dropTable(TableIdentifier tableIdentifier, boolean purge) {
410384
TableOperations ops = newTableOps(tableIdentifier);
@@ -870,7 +844,7 @@ public AccessConfig getAccessConfig(
870844
storageCredentialCache,
871845
getCredentialVendor(),
872846
tableIdentifier,
873-
getLocationsAllowedToBeAccessed(tableMetadata),
847+
StorageUtil.getLocationsAllowedToBeAccessed(tableMetadata),
874848
storageActions,
875849
storageInfo.get());
876850
}
@@ -1473,7 +1447,7 @@ public void doCommit(TableMetadata base, TableMetadata metadata) {
14731447
tableFileIO =
14741448
loadFileIOForTableLike(
14751449
tableIdentifier,
1476-
getLocationsAllowedToBeAccessed(metadata),
1450+
StorageUtil.getLocationsAllowedToBeAccessed(metadata),
14771451
resolvedStorageEntity,
14781452
new HashMap<>(metadata.properties()),
14791453
Set.of(
@@ -1495,24 +1469,8 @@ public void doCommit(TableMetadata base, TableMetadata metadata) {
14951469
.get(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY))) {
14961470
// If location is changing then we must validate that the requested location is valid
14971471
// for the storage configuration inherited under this entity's path.
1498-
Set<String> dataLocations = new HashSet<>();
1499-
dataLocations.add(metadata.location());
1500-
if (metadata.properties().get(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY)
1501-
!= null) {
1502-
dataLocations.add(
1503-
metadata
1504-
.properties()
1505-
.get(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY));
1506-
}
1507-
if (metadata
1508-
.properties()
1509-
.get(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_METADATA_LOCATION_KEY)
1510-
!= null) {
1511-
dataLocations.add(
1512-
metadata
1513-
.properties()
1514-
.get(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_METADATA_LOCATION_KEY));
1515-
}
1472+
Set<String> dataLocations =
1473+
StorageUtil.getLocationsAllowedToBeAccessed(metadata.location(), metadata.properties());
15161474
validateLocationsForTableLike(tableIdentifier, dataLocations, resolvedStorageEntity);
15171475
// also validate that the table location doesn't overlap an existing table
15181476
dataLocations.forEach(
@@ -1873,7 +1831,7 @@ public void doCommit(ViewMetadata base, ViewMetadata metadata) {
18731831
viewFileIO =
18741832
loadFileIOForTableLike(
18751833
identifier,
1876-
getLocationsAllowedToBeAccessed(metadata),
1834+
StorageUtil.getLocationsAllowedToBeAccessed(metadata),
18771835
resolvedStorageEntity,
18781836
tableProperties,
18791837
Set.of(PolarisStorageActions.READ, PolarisStorageActions.WRITE));
@@ -2616,16 +2574,6 @@ protected FileIO loadFileIO(String ioImpl, Map<String, String> properties) {
26162574
callContext, ioImpl, properties, identifier, locations, storageActions, resolvedPath);
26172575
}
26182576

2619-
private void blockedUserSpecifiedWriteLocation(Map<String, String> properties) {
2620-
if (properties != null
2621-
&& (properties.containsKey(IcebergTableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY)
2622-
|| properties.containsKey(
2623-
IcebergTableLikeEntity.USER_SPECIFIED_WRITE_METADATA_LOCATION_KEY))) {
2624-
throw new ForbiddenException(
2625-
"Delegate access to table with user-specified write location is temporarily not supported.");
2626-
}
2627-
}
2628-
26292577
private int getMaxMetadataRefreshRetries() {
26302578
return callContext
26312579
.getRealmConfig()

0 commit comments

Comments
 (0)