Skip to content

Commit db183de

Browse files
Enmkzvonand
authored andcommitted
Merge pull request #688 from Altinity/s3-roles
Role-based S3 access
1 parent c4d9b13 commit db183de

File tree

10 files changed

+259
-5
lines changed

10 files changed

+259
-5
lines changed

contrib/aws-cmake/CMakeLists.txt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-s3")
6060
SET(AWS_SDK_KMS_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-kms")
6161
SET(AWS_SDK_GLUE_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-glue")
6262

63+
SET(AWS_SDK_IDENTITY_MANAGEMENT_DIR "${AWS_SDK_DIR}/src/aws-cpp-sdk-identity-management")
64+
SET(AWS_SDK_STS_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-sts")
65+
SET(AWS_SDK_COGNITO_IDENTITY_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-cognito-identity")
66+
6367
SET(AWS_AUTH_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-auth")
6468
SET(AWS_CAL_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-cal")
6569
SET(AWS_CHECKSUMS_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-checksums")
@@ -146,6 +150,34 @@ list(APPEND AWS_SOURCES ${AWS_SDK_S3_SRC})
146150
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDK_S3_DIR}/include/")
147151

148152

153+
# aws-cpp-sdk-identity-management
154+
file(GLOB AWS_SDK_IDENTITY_MANAGEMENT_SRC
155+
"${AWS_SDK_IDENTITY_MANAGEMENT_DIR}/source/auth/*.cpp"
156+
)
157+
158+
list(APPEND AWS_SOURCES ${AWS_SDK_IDENTITY_MANAGEMENT_SRC})
159+
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDK_IDENTITY_MANAGEMENT_DIR}/include/")
160+
161+
162+
# aws-cpp-sdk-sts
163+
file(GLOB AWS_SDK_STS_SRC
164+
"${AWS_SDK_STS_DIR}/source/*.cpp"
165+
"${AWS_SDK_STS_DIR}/source/model/*.cpp"
166+
)
167+
168+
list(APPEND AWS_SOURCES ${AWS_SDK_STS_SRC})
169+
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDK_STS_DIR}/include/")
170+
171+
# aws-cpp-sdk-cognito-identity
172+
file(GLOB AWS_SDK_COGNITO_IDENTITY_SRC
173+
"${AWS_SDK_COGNITO_IDENTITY_DIR}/source/*.cpp"
174+
"${AWS_SDK_COGNITO_IDENTITY_DIR}/source/model/*.cpp"
175+
)
176+
177+
list(APPEND AWS_SOURCES ${AWS_SDK_COGNITO_IDENTITY_SRC})
178+
list(APPEND AWS_PRIVATE_INCLUDES "${AWS_SDK_COGNITO_IDENTITY_DIR}/include/")
179+
180+
149181
if(CLICKHOUSE_CLOUD)
150182
# aws-cpp-sdk-kms
151183
file(GLOB AWS_SDK_KMS_SRC

contrib/sparse-checkout/update-aws.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,12 @@ FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
66
echo '/*' > $FILES_TO_CHECKOUT
77
echo '!/*/*' >> $FILES_TO_CHECKOUT
88
echo '/src/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
9+
echo '/src/aws-cpp-sdk-identity-management/*' >> $FILES_TO_CHECKOUT
910
echo '/generated/src/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
1011
echo '/generated/src/aws-cpp-sdk-aws/*' >> $FILES_TO_CHECKOUT
1112
echo '/generated/src/aws-cpp-sdk-glue/*' >> $FILES_TO_CHECKOUT
13+
echo '/generated/src/aws-cpp-sdk-sts/*' >> $FILES_TO_CHECKOUT
14+
echo '/generated/src/aws-cpp-sdk-cognito-identity/*' >> $FILES_TO_CHECKOUT
1215

1316
git config core.sparsecheckout true
1417
git checkout $1

docs/en/sql-reference/table-functions/s3.md

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer
2323
**Syntax**
2424

2525
``` sql
26-
s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method],[,headers])
26+
s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method],[,headers] [,extra_credentials])
2727
s3(named_collection[, option=value [,..]])
2828
```
2929

@@ -52,6 +52,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `access_key_id`
5252
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
5353
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip` or `gz`, `brotli` or `br`, `xz` or `LZMA`, `zstd` or `zst`. By default, it will autodetect compression method by file extension.
5454
- `headers` - Parameter is optional. Allows headers to be passed in the S3 request. Pass in the format `headers(key=value)` e.g. `headers('x-amz-request-payer' = 'requester')`.
55+
- `extra_credentials` - Parameter is optional. Allows to specify role ARN and role session name for AssumeRole (see below). Pass in the format `extra_credentials(key=value)`.
5556

5657
Arguments can also be passed using [named collections](operations/named-collections.md). In this case `url`, `access_key_id`, `secret_access_key`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported:
5758

@@ -273,6 +274,35 @@ Once configured, a `roleARN` can be passed to the s3 function via an `extra_cred
273274
SELECT count() FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/mta/*.tsv','CSVWithNames',extra_credentials(role_arn = 'arn:aws:iam::111111111111:role/ClickHouseAccessRole-001'))
274275
```
275276

277+
278+
## Role Assumption
279+
280+
ClickHouse supports assuming an AWS IAM role using a set of AWS credentials (`access_key_id`, `secret_access_key`, `session_token`).
281+
This allows ClickHouse to obtain temporary credentials for accessing an S3 bucket, even if the original credentials do not have direct access.
282+
283+
For example, if the provided credentials have permission to assume a role but lack direct access to the S3 bucket, ClickHouse will first request temporary credentials from AWS STS and then use those credentials to access S3.
284+
285+
For more details on role assumption, read [AWS AssumeRole documentation](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html).
286+
287+
To enable role assumption, pass parameters via the extra_credentials argument in the s3 function. The following keys are supported:
288+
289+
* `role_arn` (required) — ARN of the IAM role to assume. **If this key is not provided, ClickHouse will not attempt to assume a role and will use the original credentials as-is.**
290+
* `role_session_name` (optional) — Custom session name to include in the AssumeRole request.
291+
* `sts_endpoint_override` (optional) — Overrides the default AWS STS endpoint (https://sts.amazonaws.com). Useful for testing with a mock or when using another STS-compatible service.
292+
293+
```sql
294+
SELECT count() FROM s3(
295+
'<s3_bucket_uri>/*.csv',
296+
access_key_id,
297+
secret_access_key,
298+
'CSVWithNames',
299+
extra_credentials(
300+
role_arn = 'arn:aws:iam::111111111111:role/BucketAccessRole-001',
301+
role_session_name = 'ClickHouseSession',
302+
sts_endpoint_override = 'http://mock-sts:8080'
303+
)
304+
)
305+
```
276306
Further examples can be found [here](/cloud/security/secure-s3#access-your-s3-bucket-with-the-clickhouseaccess-role)
277307

278308
## Working with archives {#working-with-archives}

docs/ru/sql-reference/table-functions/s3.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,22 @@ LIMIT 2;
9898
└───────┴───────┘
9999
```
100100

101+
## Role Assumption
102+
103+
СlickHouse может использовать `AssumeRole` для получения временных учетных данных AWS. Это позволяет выполнять запросы к S3 из-под IAM-ролей без передачи постоянных ключей, которым выданы широкие права (`access_key_id`, `secret_access_key`).
104+
105+
Так, если у исходных учетных данных нет прямого доступа к S3, но они имеют право выполнять `AssumeRole`, ClickHouse сначала запрашивает временные учетные данные через AWS STS, а затем использует их для работы с S3.
106+
107+
Подробнее про Assume Role можно прочитать в [документации AWS](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html).
108+
109+
Чтобы указать роль, которую нужно использовать, необходимо передать `roleARN` через параметр `extra_credentials`:
110+
111+
```sql
112+
SELECT count() FROM s3('<s3_bucket_uri>/*.tsv',access_key_id,secret_access_key,'CSVWithNames',extra_credentials(role_arn = 'arn:aws:iam::111111111111:role/BucketAccessRole-001'))
113+
```
114+
115+
Там же можно указать `role_session_name`, если необходимо.
116+
101117
## Примеры использования {#usage-examples}
102118

103119
Предположим, у нас есть несколько файлов со следующими URI на S3:

src/Disks/ObjectStorages/S3/diskSettings.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ namespace S3AuthSetting
4949
extern const S3AuthSettingsString secret_access_key;
5050
extern const S3AuthSettingsString server_side_encryption_customer_key_base64;
5151
extern const S3AuthSettingsString session_token;
52+
extern const S3AuthSettingsString role_arn;
53+
extern const S3AuthSettingsString role_session_name;
54+
extern const S3AuthSettingsString sts_endpoint_override;
5255
extern const S3AuthSettingsBool use_adaptive_timeouts;
5356
extern const S3AuthSettingsBool use_environment_credentials;
5457
extern const S3AuthSettingsBool use_insecure_imds_request;
@@ -169,6 +172,9 @@ std::unique_ptr<S3::Client> getClient(
169172
auth_settings[S3AuthSetting::use_insecure_imds_request],
170173
auth_settings[S3AuthSetting::expiration_window_seconds],
171174
auth_settings[S3AuthSetting::no_sign_request],
175+
auth_settings[S3AuthSetting::role_arn],
176+
auth_settings[S3AuthSetting::role_session_name],
177+
auth_settings[S3AuthSetting::sts_endpoint_override]
172178
};
173179

174180
return S3::ClientFactory::instance().create(

src/IO/S3/Credentials.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ namespace S3
3939
# include <aws/core/utils/UUID.h>
4040
# include <aws/core/http/HttpClientFactory.h>
4141

42+
# include <aws/sts/STSClient.h>
43+
# include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
44+
4245
# include <aws/core/utils/HashingUtils.h>
4346
# include <aws/core/platform/FileSystem.h>
4447

@@ -689,7 +692,35 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
689692
/// because it's manually defined by the user
690693
if (!credentials.IsEmpty())
691694
{
692-
AddProvider(std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(credentials));
695+
if (credentials_configuration.role_arn.empty())
696+
AddProvider(std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(credentials));
697+
else
698+
{
699+
auto sts_client_config = Aws::STS::STSClientConfiguration();
700+
701+
if (!credentials_configuration.sts_endpoint_override.empty())
702+
{
703+
auto endpoint_uri = Poco::URI(credentials_configuration.sts_endpoint_override);
704+
705+
String url_without_scheme = endpoint_uri.getHost();
706+
if (endpoint_uri.getPort() != 0)
707+
url_without_scheme += ":" + std::to_string(endpoint_uri.getPort());
708+
709+
sts_client_config.endpointOverride = url_without_scheme;
710+
sts_client_config.scheme = endpoint_uri.getScheme() == "https" ? Aws::Http::Scheme::HTTPS : Aws::Http::Scheme::HTTP;
711+
}
712+
713+
AddProvider(std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
714+
credentials_configuration.role_arn,
715+
/* sessionName */ credentials_configuration.role_session_name,
716+
/* externalId */ Aws::String(),
717+
/* loadFrequency */ Aws::Auth::DEFAULT_CREDS_LOAD_FREQ_SECONDS,
718+
std::make_shared<Aws::STS::STSClient>(credentials,
719+
/* endpointProvider */ Aws::MakeShared<Aws::STS::STSEndpointProvider>(Aws::STS::STSClient::ALLOCATION_TAG),
720+
/* clientConfiguration */ sts_client_config)
721+
)
722+
);
723+
}
693724
return;
694725
}
695726

src/IO/S3/Credentials.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,9 @@ struct CredentialsConfiguration
173173
bool use_insecure_imds_request = false;
174174
uint64_t expiration_window_seconds = DEFAULT_EXPIRATION_WINDOW_SECONDS;
175175
bool no_sign_request = false;
176+
String role_arn = ""; // NOLINT(*-redundant-string-init)
177+
String role_session_name = ""; // NOLINT(*-redundant-string-init)
178+
String sts_endpoint_override = ""; // NOLINT(*-redundant-string-init)
176179
};
177180

178181
class S3CredentialsProviderChain : public Aws::Auth::AWSCredentialsProviderChain

src/IO/S3AuthSettings.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ namespace ErrorCodes
3636
DECLARE(String, secret_access_key, "", "", 0) \
3737
DECLARE(String, session_token, "", "", 0) \
3838
DECLARE(String, region, "", "", 0) \
39-
DECLARE(String, server_side_encryption_customer_key_base64, "", "", 0)
39+
DECLARE(String, server_side_encryption_customer_key_base64, "", "", 0) \
40+
DECLARE(String, role_arn, "", "", 0) \
41+
DECLARE(String, role_session_name, "", "", 0) \
42+
DECLARE(String, sts_endpoint_override, "", "", 0)
4043

4144
#define CLIENT_SETTINGS_LIST(M, ALIAS) \
4245
CLIENT_SETTINGS(M, ALIAS) \

src/Storages/ObjectStorage/S3/Configuration.cpp

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include <Disks/ObjectStorages/S3/S3ObjectStorage.h>
1414
#include <Disks/ObjectStorages/S3/diskSettings.h>
1515

16+
#include <Interpreters/evaluateConstantExpression.h>
17+
1618
#include <Parsers/ASTFunction.h>
1719
#include <Parsers/ASTIdentifier.h>
1820
#include <Parsers/ASTLiteral.h>
@@ -45,6 +47,9 @@ namespace S3AuthSetting
4547
extern const S3AuthSettingsString secret_access_key;
4648
extern const S3AuthSettingsString session_token;
4749
extern const S3AuthSettingsBool use_environment_credentials;
50+
extern const S3AuthSettingsString role_arn;
51+
extern const S3AuthSettingsString role_session_name;
52+
extern const S3AuthSettingsString sts_endpoint_override;
4853
}
4954

5055
namespace ErrorCodes
@@ -109,6 +114,7 @@ StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & ot
109114
url = other.url;
110115
static_configuration = other.static_configuration;
111116
headers_from_ast = other.headers_from_ast;
117+
extra_credentials_from_ast = other.extra_credentials_from_ast;
112118
keys = other.keys;
113119
}
114120

@@ -190,8 +196,66 @@ void StorageS3Configuration::fromNamedCollection(const NamedCollection & collect
190196
keys = {url.key};
191197
}
192198

199+
void StorageS3Configuration::extractExtraCreds(ASTs & args, ContextPtr context)
200+
{
201+
ASTs::iterator extra_creds_it = args.end();
202+
203+
for (auto * arg_it = args.begin(); arg_it != args.end(); ++arg_it)
204+
{
205+
const auto * extra_creds_ast_function = (*arg_it)->as<ASTFunction>();
206+
if (extra_creds_ast_function && extra_creds_ast_function->name == "extra_credentials")
207+
{
208+
if (extra_creds_it != args.end())
209+
throw Exception(
210+
ErrorCodes::BAD_ARGUMENTS,
211+
"S3 table function can have only one extra_credentials argument");
212+
213+
const auto * extra_creds_function_args_expr = assert_cast<const ASTExpressionList *>(extra_creds_ast_function->arguments.get());
214+
auto extra_creds_function_args = extra_creds_function_args_expr->children;
215+
216+
for (auto & extra_cred_arg : extra_creds_function_args)
217+
{
218+
const auto * extra_cred_ast = extra_cred_arg->as<ASTFunction>();
219+
if (!extra_cred_ast || extra_cred_ast->name != "equals")
220+
throw Exception(ErrorCodes::BAD_ARGUMENTS, "extra_credentials argument is incorrect: shall be key=value");
221+
222+
const auto * extra_cred_args_expr = assert_cast<const ASTExpressionList *>(extra_cred_ast->arguments.get());
223+
auto extra_cred_args = extra_cred_args_expr->children;
224+
if (extra_cred_args.size() != 2)
225+
throw Exception(
226+
ErrorCodes::BAD_ARGUMENTS,
227+
"extra_credentials argument is incorrect: expected 2 arguments, got {}",
228+
extra_cred_args.size());
229+
230+
auto ast_literal = evaluateConstantExpressionOrIdentifierAsLiteral(extra_cred_args[0], context);
231+
auto arg_name_value = ast_literal->as<ASTLiteral>()->value;
232+
if (arg_name_value.getType() != Field::Types::Which::String)
233+
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected string as extra_credentials name");
234+
auto arg_name = arg_name_value.safeGet<String>();
235+
236+
ast_literal = evaluateConstantExpressionOrIdentifierAsLiteral(extra_cred_args[1], context);
237+
auto arg_value = ast_literal->as<ASTLiteral>()->value;
238+
if (arg_value.getType() != Field::Types::Which::String)
239+
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected string as extra_credentials value");
240+
241+
extra_credentials_from_ast.emplace_back(arg_name, arg_value.safeGet<String>());
242+
}
243+
244+
extra_creds_it = arg_it;
245+
continue;
246+
}
247+
}
248+
249+
/// To avoid making unnecessary changes and avoid potential conflicts in future,
250+
/// simply remove the "extra" argument after processing if it exists.
251+
if (extra_creds_it != args.end())
252+
args.erase(extra_creds_it);
253+
}
254+
193255
void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_structure)
194256
{
257+
extractExtraCreds(args, context);
258+
195259
size_t count = StorageURL::evalArgsAndCollectHeaders(args, headers_from_ast, context);
196260

197261
if (count == 0 || count > getMaxNumberOfArguments(with_structure))
@@ -389,6 +453,23 @@ void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_
389453
if (no_sign_request)
390454
auth_settings[S3AuthSetting::no_sign_request] = no_sign_request;
391455

456+
if (!extra_credentials_from_ast.empty())
457+
{
458+
auto extract_extra_cred_value = [&extra_creds = this->extra_credentials_from_ast](const String & cred_name) -> String
459+
{
460+
auto role_arn_it = std::find_if(extra_creds.begin(), extra_creds.end(),
461+
[&cred_name](const HTTPHeaderEntry & entry) { return entry.name == cred_name; });
462+
if (role_arn_it != extra_creds.end())
463+
return role_arn_it->value;
464+
465+
return {};
466+
};
467+
468+
auth_settings[S3AuthSetting::role_arn] = extract_extra_cred_value("role_arn");
469+
auth_settings[S3AuthSetting::role_session_name] = extract_extra_cred_value("role_session_name");
470+
auth_settings[S3AuthSetting::sts_endpoint_override] = extract_extra_cred_value("sts_endpoint_override");
471+
}
472+
392473
static_configuration = !auth_settings[S3AuthSetting::access_key_id].value.empty() || auth_settings[S3AuthSetting::no_sign_request].changed;
393474
auth_settings[S3AuthSetting::no_sign_request] = no_sign_request;
394475

@@ -586,6 +667,52 @@ void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded(
586667
}
587668
}
588669

670+
ASTPtr StorageS3Configuration::createArgsWithAccessData() const
671+
{
672+
auto arguments = std::make_shared<ASTExpressionList>();
673+
674+
arguments->children.push_back(std::make_shared<ASTLiteral>(url.uri_str));
675+
if (auth_settings[S3AuthSetting::no_sign_request])
676+
{
677+
arguments->children.push_back(std::make_shared<ASTLiteral>("NOSIGN"));
678+
}
679+
else
680+
{
681+
arguments->children.push_back(std::make_shared<ASTLiteral>(auth_settings[S3AuthSetting::access_key_id].value));
682+
arguments->children.push_back(std::make_shared<ASTLiteral>(auth_settings[S3AuthSetting::secret_access_key].value));
683+
if (!auth_settings[S3AuthSetting::session_token].value.empty())
684+
arguments->children.push_back(std::make_shared<ASTLiteral>(auth_settings[S3AuthSetting::session_token].value));
685+
if (format != "auto")
686+
arguments->children.push_back(std::make_shared<ASTLiteral>(format));
687+
if (!compression_method.empty())
688+
arguments->children.push_back(std::make_shared<ASTLiteral>(compression_method));
689+
690+
if (!auth_settings[S3AuthSetting::role_arn].value.empty())
691+
{
692+
auto extra_creds_ast_function = std::make_shared<ASTFunction>();
693+
extra_creds_ast_function->name = "extra_credentials";
694+
695+
auto role_arn_ast = std::make_shared<ASTFunction>();
696+
role_arn_ast->name = "equals";
697+
role_arn_ast->children.push_back(std::make_shared<ASTLiteral>("role_arn"));
698+
role_arn_ast->children.push_back(std::make_shared<ASTLiteral>(auth_settings[S3AuthSetting::role_arn].value));
699+
700+
extra_creds_ast_function->children.push_back(role_arn_ast);
701+
702+
auto role_session_name_ast = std::make_shared<ASTFunction>();
703+
role_session_name_ast->name = "equals";
704+
role_session_name_ast->children.push_back(std::make_shared<ASTLiteral>("role_session_name"));
705+
role_session_name_ast->children.push_back(std::make_shared<ASTLiteral>(auth_settings[S3AuthSetting::role_session_name].value));
706+
707+
extra_creds_ast_function->children.push_back(role_session_name_ast);
708+
709+
arguments->children.push_back(extra_creds_ast_function);
710+
}
711+
}
712+
713+
return arguments;
714+
}
715+
589716
}
590717

591718
#endif

0 commit comments

Comments
 (0)