Skip to content

Commit 0c1e13f

Browse files
Enmkzvonand
authored andcommitted
Merge pull request #688 from Altinity/s3-roles
Role-based S3 access
1 parent efd42b3 commit 0c1e13f

File tree

9 files changed

+197
-7
lines changed

9 files changed

+197
-7
lines changed

contrib/aws-cmake/CMakeLists.txt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-s3")
5757
SET(AWS_SDK_KMS_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-kms")
5858
SET(AWS_SDK_GLUE_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-glue")
5959

60+
SET(AWS_SDK_IDENTITY_MANAGEMENT_DIR "${AWS_SDK_DIR}/src/aws-cpp-sdk-identity-management")
61+
SET(AWS_SDK_STS_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-sts")
62+
SET(AWS_SDK_COGNITO_IDENTITY_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-cognito-identity")
63+
6064
SET(AWS_AUTH_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-auth")
6165
SET(AWS_CAL_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-cal")
6266
SET(AWS_CHECKSUMS_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-checksums")
@@ -145,6 +149,34 @@ list(APPEND AWS_SOURCES ${AWS_SDK_S3_SRC})
145149
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDK_S3_DIR}/include/")
146150

147151

152+
# aws-cpp-sdk-identity-management
153+
file(GLOB AWS_SDK_IDENTITY_MANAGEMENT_SRC
154+
"${AWS_SDK_IDENTITY_MANAGEMENT_DIR}/source/auth/*.cpp"
155+
)
156+
157+
list(APPEND AWS_SOURCES ${AWS_SDK_IDENTITY_MANAGEMENT_SRC})
158+
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDK_IDENTITY_MANAGEMENT_DIR}/include/")
159+
160+
161+
# aws-cpp-sdk-sts
162+
file(GLOB AWS_SDK_STS_SRC
163+
"${AWS_SDK_STS_DIR}/source/*.cpp"
164+
"${AWS_SDK_STS_DIR}/source/model/*.cpp"
165+
)
166+
167+
list(APPEND AWS_SOURCES ${AWS_SDK_STS_SRC})
168+
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDK_STS_DIR}/include/")
169+
170+
# aws-cpp-sdk-cognito-identity
171+
file(GLOB AWS_SDK_COGNITO_IDENTITY_SRC
172+
"${AWS_SDK_COGNITO_IDENTITY_DIR}/source/*.cpp"
173+
"${AWS_SDK_COGNITO_IDENTITY_DIR}/source/model/*.cpp"
174+
)
175+
176+
list(APPEND AWS_SOURCES ${AWS_SDK_COGNITO_IDENTITY_SRC})
177+
list(APPEND AWS_PRIVATE_INCLUDES "${AWS_SDK_COGNITO_IDENTITY_DIR}/include/")
178+
179+
148180
if(CLICKHOUSE_CLOUD)
149181
# aws-cpp-sdk-kms
150182
file(GLOB AWS_SDK_KMS_SRC

contrib/sparse-checkout/update-aws.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@ FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
66
echo '/*' > $FILES_TO_CHECKOUT
77
echo '!/*/*' >> $FILES_TO_CHECKOUT
88
echo '/src/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
9+
echo '/src/aws-cpp-sdk-identity-management/*' >> $FILES_TO_CHECKOUT
910
echo '/generated/src/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
1011
echo '/generated/src/aws-cpp-sdk-aws/*' >> $FILES_TO_CHECKOUT
1112
echo '/generated/src/aws-cpp-sdk-glue/*' >> $FILES_TO_CHECKOUT
1213
echo '/generated/src/aws-cpp-sdk-kms/*' >> $FILES_TO_CHECKOUT
14+
echo '/generated/src/aws-cpp-sdk-sts/*' >> $FILES_TO_CHECKOUT
15+
echo '/generated/src/aws-cpp-sdk-cognito-identity/*' >> $FILES_TO_CHECKOUT
1316

1417
git config core.sparsecheckout true
1518
git checkout $1

docs/en/sql-reference/table-functions/s3.md

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer
2222

2323
## Syntax {#syntax}
2424

25-
```sql
26-
s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method],[,headers])
25+
``` sql
26+
s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method] [,headers] [,extra_credentials])
2727
s3(named_collection[, option=value [,..]])
2828
```
2929

@@ -47,6 +47,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `access_key_id`
4747
| `structure` | Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. |
4848
| `compression_method` | Parameter is optional. Supported values: `none`, `gzip` or `gz`, `brotli` or `br`, `xz` or `LZMA`, `zstd` or `zst`. By default, it will autodetect compression method by file extension. |
4949
| `headers` | Parameter is optional. Allows headers to be passed in the S3 request. Pass in the format `headers(key=value)` e.g. `headers('x-amz-request-payer' = 'requester')`. |
50+
| `extra_credentials` | Parameter is optional. Allows to specify role ARN and role session name for AssumeRole (see below). Pass in the format `extra_credentials(key=value)`. |
5051

5152
:::note GCS
5253
The GCS url is in this format as the endpoint for the Google XML API is different than the JSON API:
@@ -280,7 +281,35 @@ Once configured, a `roleARN` can be passed to the s3 function via an `extra_cred
280281
SELECT count() FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/mta/*.tsv','CSVWithNames',extra_credentials(role_arn = 'arn:aws:iam::111111111111:role/ClickHouseAccessRole-001'))
281282
```
282283

283-
Further examples can be found [here](/cloud/security/secure-s3#access-your-s3-bucket-with-the-clickhouseaccess-role)
284+
## Role Assumption
285+
286+
ClickHouse supports assuming an AWS IAM role using a set of AWS credentials (`access_key_id`, `secret_access_key`, `session_token`).
287+
This allows ClickHouse to obtain temporary credentials for accessing an S3 bucket, even if the original credentials do not have direct access.
288+
289+
For example, if the provided credentials have permission to assume a role but lack direct access to the S3 bucket, ClickHouse will first request temporary credentials from AWS STS and then use those credentials to access S3.
290+
291+
For more details on role assumption, read [AWS AssumeRole documentation](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html).
292+
293+
To enable role assumption, pass parameters via the extra_credentials argument in the s3 function. The following keys are supported:
294+
295+
* `role_arn` (required) — ARN of the IAM role to assume. **If this key is not provided, ClickHouse will not attempt to assume a role and will use the original credentials as-is.**
296+
* `role_session_name` (optional) — Custom session name to include in the AssumeRole request.
297+
* `sts_endpoint_override` (optional) — Overrides the default AWS STS endpoint (https://sts.amazonaws.com). Useful for testing with a mock or when using another STS-compatible service.
298+
299+
```sql
300+
SELECT count() FROM s3(
301+
'<s3_bucket_uri>/*.csv',
302+
access_key_id,
303+
secret_access_key,
304+
'CSVWithNames',
305+
extra_credentials(
306+
role_arn = 'arn:aws:iam::111111111111:role/BucketAccessRole-001',
307+
role_session_name = 'ClickHouseSession',
308+
sts_endpoint_override = 'http://mock-sts:8080'
309+
)
310+
)
311+
```
312+
Further examples can be found [here](/docs/cloud/security/secure-s3#access-your-s3-bucket-with-the-clickhouseaccess-role)
284313

285314
## Working with archives {#working-with-archives}
286315

src/Disks/ObjectStorages/S3/diskSettings.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ namespace S3AuthSetting
5050
extern const S3AuthSettingsString secret_access_key;
5151
extern const S3AuthSettingsString server_side_encryption_customer_key_base64;
5252
extern const S3AuthSettingsString session_token;
53+
extern const S3AuthSettingsString role_arn;
54+
extern const S3AuthSettingsString role_session_name;
55+
extern const S3AuthSettingsString sts_endpoint_override;
5356
extern const S3AuthSettingsBool use_adaptive_timeouts;
5457
extern const S3AuthSettingsBool use_environment_credentials;
5558
extern const S3AuthSettingsBool use_insecure_imds_request;
@@ -175,6 +178,9 @@ std::unique_ptr<S3::Client> getClient(
175178
auth_settings[S3AuthSetting::use_insecure_imds_request],
176179
auth_settings[S3AuthSetting::expiration_window_seconds],
177180
auth_settings[S3AuthSetting::no_sign_request],
181+
auth_settings[S3AuthSetting::role_arn],
182+
auth_settings[S3AuthSetting::role_session_name],
183+
auth_settings[S3AuthSetting::sts_endpoint_override]
178184
};
179185

180186
return S3::ClientFactory::instance().create(

src/IO/S3/Credentials.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ namespace S3
3939
# include <aws/core/utils/UUID.h>
4040
# include <aws/core/http/HttpClientFactory.h>
4141

42+
# include <aws/sts/STSClient.h>
43+
# include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
44+
4245
# include <aws/core/utils/HashingUtils.h>
4346
# include <aws/core/platform/FileSystem.h>
4447

@@ -708,7 +711,35 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
708711
/// because it's manually defined by the user
709712
if (!credentials.IsEmpty())
710713
{
711-
AddProvider(std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(credentials));
714+
if (credentials_configuration.role_arn.empty())
715+
AddProvider(std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(credentials));
716+
else
717+
{
718+
auto sts_client_config = Aws::STS::STSClientConfiguration();
719+
720+
if (!credentials_configuration.sts_endpoint_override.empty())
721+
{
722+
auto endpoint_uri = Poco::URI(credentials_configuration.sts_endpoint_override);
723+
724+
String url_without_scheme = endpoint_uri.getHost();
725+
if (endpoint_uri.getPort() != 0)
726+
url_without_scheme += ":" + std::to_string(endpoint_uri.getPort());
727+
728+
sts_client_config.endpointOverride = url_without_scheme;
729+
sts_client_config.scheme = endpoint_uri.getScheme() == "https" ? Aws::Http::Scheme::HTTPS : Aws::Http::Scheme::HTTP;
730+
}
731+
732+
AddProvider(std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
733+
credentials_configuration.role_arn,
734+
/* sessionName */ credentials_configuration.role_session_name,
735+
/* externalId */ Aws::String(),
736+
/* loadFrequency */ Aws::Auth::DEFAULT_CREDS_LOAD_FREQ_SECONDS,
737+
std::make_shared<Aws::STS::STSClient>(credentials,
738+
/* endpointProvider */ Aws::MakeShared<Aws::STS::STSEndpointProvider>(Aws::STS::STSClient::ALLOCATION_TAG),
739+
/* clientConfiguration */ sts_client_config)
740+
)
741+
);
742+
}
712743
return;
713744
}
714745

src/IO/S3/Credentials.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,9 @@ struct CredentialsConfiguration
174174
bool use_insecure_imds_request = false;
175175
uint64_t expiration_window_seconds = DEFAULT_EXPIRATION_WINDOW_SECONDS;
176176
bool no_sign_request = false;
177+
String role_arn = ""; // NOLINT(*-redundant-string-init)
178+
String role_session_name = ""; // NOLINT(*-redundant-string-init)
179+
String sts_endpoint_override = ""; // NOLINT(*-redundant-string-init)
177180
};
178181

179182
class S3CredentialsProviderChain : public Aws::Auth::AWSCredentialsProviderChain

src/IO/S3AuthSettings.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ namespace ErrorCodes
3636
DECLARE(String, secret_access_key, "", "", 0) \
3737
DECLARE(String, session_token, "", "", 0) \
3838
DECLARE(String, region, "", "", 0) \
39-
DECLARE(String, server_side_encryption_customer_key_base64, "", "", 0)
39+
DECLARE(String, server_side_encryption_customer_key_base64, "", "", 0) \
40+
DECLARE(String, role_arn, "", "", 0) \
41+
DECLARE(String, role_session_name, "", "", 0) \
42+
DECLARE(String, sts_endpoint_override, "", "", 0)
4043

4144
#define CLIENT_SETTINGS_LIST(M, ALIAS) \
4245
CLIENT_SETTINGS(M, ALIAS) \

src/Storages/ObjectStorage/S3/Configuration.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include <Disks/ObjectStorages/S3/S3ObjectStorage.h>
1515
#include <Disks/ObjectStorages/S3/diskSettings.h>
1616

17+
#include <Interpreters/evaluateConstantExpression.h>
18+
1719
#include <Parsers/ASTFunction.h>
1820
#include <Parsers/ASTIdentifier.h>
1921
#include <Parsers/ASTLiteral.h>
@@ -46,6 +48,9 @@ namespace S3AuthSetting
4648
extern const S3AuthSettingsString secret_access_key;
4749
extern const S3AuthSettingsString session_token;
4850
extern const S3AuthSettingsBool use_environment_credentials;
51+
extern const S3AuthSettingsString role_arn;
52+
extern const S3AuthSettingsString role_session_name;
53+
extern const S3AuthSettingsString sts_endpoint_override;
4954
}
5055

5156
namespace ErrorCodes
@@ -182,8 +187,66 @@ void StorageS3Configuration::fromNamedCollection(const NamedCollection & collect
182187
keys = {url.key};
183188
}
184189

190+
void StorageS3Configuration::extractExtraCreds(ASTs & args, ContextPtr context)
191+
{
192+
ASTs::iterator extra_creds_it = args.end();
193+
194+
for (auto * arg_it = args.begin(); arg_it != args.end(); ++arg_it)
195+
{
196+
const auto * extra_creds_ast_function = (*arg_it)->as<ASTFunction>();
197+
if (extra_creds_ast_function && extra_creds_ast_function->name == "extra_credentials")
198+
{
199+
if (extra_creds_it != args.end())
200+
throw Exception(
201+
ErrorCodes::BAD_ARGUMENTS,
202+
"S3 table function can have only one extra_credentials argument");
203+
204+
const auto * extra_creds_function_args_expr = assert_cast<const ASTExpressionList *>(extra_creds_ast_function->arguments.get());
205+
auto extra_creds_function_args = extra_creds_function_args_expr->children;
206+
207+
for (auto & extra_cred_arg : extra_creds_function_args)
208+
{
209+
const auto * extra_cred_ast = extra_cred_arg->as<ASTFunction>();
210+
if (!extra_cred_ast || extra_cred_ast->name != "equals")
211+
throw Exception(ErrorCodes::BAD_ARGUMENTS, "extra_credentials argument is incorrect: shall be key=value");
212+
213+
const auto * extra_cred_args_expr = assert_cast<const ASTExpressionList *>(extra_cred_ast->arguments.get());
214+
auto extra_cred_args = extra_cred_args_expr->children;
215+
if (extra_cred_args.size() != 2)
216+
throw Exception(
217+
ErrorCodes::BAD_ARGUMENTS,
218+
"extra_credentials argument is incorrect: expected 2 arguments, got {}",
219+
extra_cred_args.size());
220+
221+
auto ast_literal = evaluateConstantExpressionOrIdentifierAsLiteral(extra_cred_args[0], context);
222+
auto arg_name_value = ast_literal->as<ASTLiteral>()->value;
223+
if (arg_name_value.getType() != Field::Types::Which::String)
224+
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected string as extra_credentials name");
225+
auto arg_name = arg_name_value.safeGet<String>();
226+
227+
ast_literal = evaluateConstantExpressionOrIdentifierAsLiteral(extra_cred_args[1], context);
228+
auto arg_value = ast_literal->as<ASTLiteral>()->value;
229+
if (arg_value.getType() != Field::Types::Which::String)
230+
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected string as extra_credentials value");
231+
232+
extra_credentials_from_ast.emplace_back(arg_name, arg_value.safeGet<String>());
233+
}
234+
235+
extra_creds_it = arg_it;
236+
continue;
237+
}
238+
}
239+
240+
/// To avoid making unnecessary changes and avoid potential conflicts in future,
241+
/// simply remove the "extra" argument after processing if it exists.
242+
if (extra_creds_it != args.end())
243+
args.erase(extra_creds_it);
244+
}
245+
185246
void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_structure)
186247
{
248+
extractExtraCreds(args, context);
249+
187250
size_t count = StorageURL::evalArgsAndCollectHeaders(args, headers_from_ast, context);
188251

189252
if (count == 0 || count > getMaxNumberOfArguments(with_structure))
@@ -381,6 +444,23 @@ void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_
381444
if (no_sign_request)
382445
auth_settings[S3AuthSetting::no_sign_request] = no_sign_request;
383446

447+
if (!extra_credentials_from_ast.empty())
448+
{
449+
auto extract_extra_cred_value = [&extra_creds = this->extra_credentials_from_ast](const String & cred_name) -> String
450+
{
451+
auto role_arn_it = std::find_if(extra_creds.begin(), extra_creds.end(),
452+
[&cred_name](const HTTPHeaderEntry & entry) { return entry.name == cred_name; });
453+
if (role_arn_it != extra_creds.end())
454+
return role_arn_it->value;
455+
456+
return {};
457+
};
458+
459+
auth_settings[S3AuthSetting::role_arn] = extract_extra_cred_value("role_arn");
460+
auth_settings[S3AuthSetting::role_session_name] = extract_extra_cred_value("role_session_name");
461+
auth_settings[S3AuthSetting::sts_endpoint_override] = extract_extra_cred_value("sts_endpoint_override");
462+
}
463+
384464
static_configuration = !auth_settings[S3AuthSetting::access_key_id].value.empty() || auth_settings[S3AuthSetting::no_sign_request].changed;
385465
auth_settings[S3AuthSetting::no_sign_request] = no_sign_request;
386466

src/Storages/ObjectStorage/S3/Configuration.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class StorageS3Configuration : public StorageObjectStorage::Configuration
3737
" - url, access_key_id, secret_access_key, session_token, format, structure\n"
3838
" - url, access_key_id, secret_access_key, format, structure, compression_method\n"
3939
" - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n"
40-
"All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
40+
"All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`) and extra credentials for role assumption (`extra_credentials(role_arn=value, role_session_name=value)`)";
4141

4242
/// All possible signatures for S3 storage without structure argument (for example for S3 table engine).
4343
static constexpr auto max_number_of_arguments_without_structure = 6;
@@ -54,7 +54,7 @@ class StorageS3Configuration : public StorageObjectStorage::Configuration
5454
" - url, access_key_id, secret_access_key, session_token, format\n"
5555
" - url, access_key_id, secret_access_key, format, compression_method\n"
5656
" - url, access_key_id, secret_access_key, session_token, format, compression_method\n"
57-
"All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
57+
"All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`) and extra credentials for role assumption (`extra_credentials(role_arn=value, role_session_name=value)`)";
5858

5959
StorageS3Configuration() = default;
6060

@@ -96,6 +96,7 @@ class StorageS3Configuration : public StorageObjectStorage::Configuration
9696
bool with_structure) override;
9797

9898
private:
99+
void extractExtraCreds(ASTs & args, ContextPtr context);
99100
void fromNamedCollection(const NamedCollection & collection, ContextPtr context) override;
100101
void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
101102

@@ -105,6 +106,8 @@ class StorageS3Configuration : public StorageObjectStorage::Configuration
105106
S3::S3AuthSettings auth_settings;
106107
S3::S3RequestSettings request_settings;
107108
HTTPHeaderEntries headers_from_ast; /// Headers from ast is a part of static configuration.
109+
HTTPHeaderEntries extra_credentials_from_ast; /// Avoid duplicated entities: HTTPHeaderEntry structure matches our needs here, use it.
110+
108111
/// If s3 configuration was passed from ast, then it is static.
109112
/// If from config - it can be changed with config reload.
110113
bool static_configuration = true;

0 commit comments

Comments
 (0)