Skip to content

Deprecated: Role-based S3 access #688

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions contrib/aws-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/src/aws-cpp-sdk-core")
SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-s3")
SET(AWS_SDK_KMS_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-kms")

SET(AWS_SDK_IDENTITY_MANAGEMENT_DIR "${AWS_SDK_DIR}/src/aws-cpp-sdk-identity-management")
SET(AWS_SDK_STS_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-sts")
SET(AWS_SDK_COGNITO_IDENTITY_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-cognito-identity")

SET(AWS_AUTH_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-auth")
SET(AWS_CAL_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-cal")
SET(AWS_CHECKSUMS_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-checksums")
Expand Down Expand Up @@ -146,6 +150,34 @@ list(APPEND AWS_SOURCES ${AWS_SDK_S3_SRC})
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDK_S3_DIR}/include/")


# aws-cpp-sdk-identity-management
file(GLOB AWS_SDK_IDENTITY_MANAGEMENT_SRC
"${AWS_SDK_IDENTITY_MANAGEMENT_DIR}/source/auth/*.cpp"
)

list(APPEND AWS_SOURCES ${AWS_SDK_IDENTITY_MANAGEMENT_SRC})
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDK_IDENTITY_MANAGEMENT_DIR}/include/")


# aws-cpp-sdk-sts
file(GLOB AWS_SDK_STS_SRC
"${AWS_SDK_STS_DIR}/source/*.cpp"
"${AWS_SDK_STS_DIR}/source/model/*.cpp"
)

list(APPEND AWS_SOURCES ${AWS_SDK_STS_SRC})
list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDK_STS_DIR}/include/")

# aws-cpp-sdk-cognito-identity
file(GLOB AWS_SDK_COGNITO_IDENTITY_SRC
"${AWS_SDK_COGNITO_IDENTITY_DIR}/source/*.cpp"
"${AWS_SDK_COGNITO_IDENTITY_DIR}/source/model/*.cpp"
)

list(APPEND AWS_SOURCES ${AWS_SDK_COGNITO_IDENTITY_SRC})
list(APPEND AWS_PRIVATE_INCLUDES "${AWS_SDK_COGNITO_IDENTITY_DIR}/include/")


if(CLICKHOUSE_CLOUD)
# aws-cpp-sdk-kms
file(GLOB AWS_SDK_KMS_SRC
Expand Down
3 changes: 3 additions & 0 deletions contrib/sparse-checkout/update-aws.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/*/*' >> $FILES_TO_CHECKOUT
echo '/src/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
echo '/src/aws-cpp-sdk-identity-management/*' >> $FILES_TO_CHECKOUT
echo '/generated/src/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
echo '/generated/src/aws-cpp-sdk-aws/*' >> $FILES_TO_CHECKOUT
echo '/generated/src/aws-cpp-sdk-sts/*' >> $FILES_TO_CHECKOUT
echo '/generated/src/aws-cpp-sdk-cognito-identity/*' >> $FILES_TO_CHECKOUT

git config core.sparsecheckout true
git checkout $1
Expand Down
31 changes: 30 additions & 1 deletion docs/en/sql-reference/table-functions/s3.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer
**Syntax**

``` sql
s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method],[,headers])
s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method],[,headers] [,extra_credentials])
s3(named_collection[, option=value [,..]])
```

Expand Down Expand Up @@ -48,6 +48,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `access_key_id`
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip` or `gz`, `brotli` or `br`, `xz` or `LZMA`, `zstd` or `zst`. By default, it will autodetect compression method by file extension.
- `headers` - Parameter is optional. Allows headers to be passed in the S3 request. Pass in the format `headers(key=value)` e.g. `headers('x-amz-request-payer' = 'requester')`.
- `extra_credentials` - Parameter is optional. Allows to specify role ARN and role session name for AssumeRole (see below). Pass in the format `extra_credentials(key=value)`.

Arguments can also be passed using [named collections](/docs/operations/named-collections.md). In this case `url`, `access_key_id`, `secret_access_key`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported:

Expand Down Expand Up @@ -269,6 +270,34 @@ Once configured, a `roleARN` can be passed to the s3 function via an `extra_cred
SELECT count() FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/mta/*.tsv','CSVWithNames',extra_credentials(role_arn = 'arn:aws:iam::111111111111:role/ClickHouseAccessRole-001'))
```

## Role Assumption

ClickHouse supports assuming an AWS IAM role using a set of AWS credentials (`access_key_id`, `secret_access_key`, `session_token`).
This allows ClickHouse to obtain temporary credentials for accessing an S3 bucket, even if the original credentials do not have direct access.

For example, if the provided credentials have permission to assume a role but lack direct access to the S3 bucket, ClickHouse will first request temporary credentials from AWS STS and then use those credentials to access S3.

For more details on role assumption, read [AWS AssumeRole documentation](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html).

To enable role assumption, pass parameters via the extra_credentials argument in the s3 function. The following keys are supported:

* `role_arn` (required) — ARN of the IAM role to assume. **If this key is not provided, ClickHouse will not attempt to assume a role and will use the original credentials as-is.**
* `role_session_name` (optional) — Custom session name to include in the AssumeRole request.
* `sts_endpoint_override` (optional) — Overrides the default AWS STS endpoint (https://sts.amazonaws.com). Useful for testing with a mock or when using another STS-compatible service.

```sql
SELECT count() FROM s3(
'<s3_bucket_uri>/*.csv',
access_key_id,
secret_access_key,
'CSVWithNames',
extra_credentials(
role_arn = 'arn:aws:iam::111111111111:role/BucketAccessRole-001',
role_session_name = 'ClickHouseSession',
sts_endpoint_override = 'http://mock-sts:8080'
)
)
```
Further examples can be found [here](/docs/cloud/security/secure-s3#access-your-s3-bucket-with-the-clickhouseaccess-role)

## Working with archives
Expand Down
17 changes: 17 additions & 0 deletions docs/ru/sql-reference/table-functions/s3.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,23 @@ LIMIT 2;
└─────────┴─────────┴─────────┘
```

## Role Assumption

СlickHouse может использовать `AssumeRole` для получения временных учетных данных AWS. Это позволяет выполнять запросы к S3 из-под IAM-ролей без передачи постоянных ключей, которым выданы широкие права (`access_key_id`, `secret_access_key`).

Так, если у исходных учетных данных нет прямого доступа к S3, но они имеют право выполнять `AssumeRole`, ClickHouse сначала запрашивает временные учетные данные через AWS STS, а затем использует их для работы с S3.

Подробнее про Assume Role можно прочитать в [документации AWS](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html).

Чтобы указать роль, которую нужно использовать, необходимо передать `roleARN` через параметр `extra_credentials`:

```sql
SELECT count() FROM s3('<s3_bucket_uri>/*.tsv',access_key_id,secret_access_key,'CSVWithNames',extra_credentials(role_arn = 'arn:aws:iam::111111111111:role/BucketAccessRole-001'))
```

Там же можно указать `role_session_name`, если необходимо.


## Примеры использования {#usage-examples}

Предположим, у нас есть несколько файлов со следующими URI на S3:
Expand Down
6 changes: 6 additions & 0 deletions src/Disks/ObjectStorages/S3/diskSettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ namespace S3AuthSetting
extern const S3AuthSettingsString secret_access_key;
extern const S3AuthSettingsString server_side_encryption_customer_key_base64;
extern const S3AuthSettingsString session_token;
extern const S3AuthSettingsString role_arn;
extern const S3AuthSettingsString role_session_name;
extern const S3AuthSettingsString sts_endpoint_override;
extern const S3AuthSettingsBool use_adaptive_timeouts;
extern const S3AuthSettingsBool use_environment_credentials;
extern const S3AuthSettingsBool use_insecure_imds_request;
Expand Down Expand Up @@ -169,6 +172,9 @@ std::unique_ptr<S3::Client> getClient(
auth_settings[S3AuthSetting::use_insecure_imds_request],
auth_settings[S3AuthSetting::expiration_window_seconds],
auth_settings[S3AuthSetting::no_sign_request],
auth_settings[S3AuthSetting::role_arn],
auth_settings[S3AuthSetting::role_session_name],
auth_settings[S3AuthSetting::sts_endpoint_override]
};

return S3::ClientFactory::instance().create(
Expand Down
33 changes: 32 additions & 1 deletion src/IO/S3/Credentials.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ namespace S3
# include <aws/core/utils/UUID.h>
# include <aws/core/http/HttpClientFactory.h>

# include <aws/sts/STSClient.h>
# include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>

# include <aws/core/utils/HashingUtils.h>
# include <aws/core/platform/FileSystem.h>

Expand Down Expand Up @@ -689,7 +692,35 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
/// because it's manually defined by the user
if (!credentials.IsEmpty())
{
AddProvider(std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(credentials));
if (credentials_configuration.role_arn.empty())
AddProvider(std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(credentials));
else
{
auto sts_client_config = Aws::STS::STSClientConfiguration();

if (!credentials_configuration.sts_endpoint_override.empty())
{
auto endpoint_uri = Poco::URI(credentials_configuration.sts_endpoint_override);

String url_without_scheme = endpoint_uri.getHost();
if (endpoint_uri.getPort() != 0)
url_without_scheme += ":" + std::to_string(endpoint_uri.getPort());

sts_client_config.endpointOverride = url_without_scheme;
sts_client_config.scheme = endpoint_uri.getScheme() == "https" ? Aws::Http::Scheme::HTTPS : Aws::Http::Scheme::HTTP;
}

AddProvider(std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
credentials_configuration.role_arn,
/* sessionName */ credentials_configuration.role_session_name,
/* externalId */ Aws::String(),
/* loadFrequency */ Aws::Auth::DEFAULT_CREDS_LOAD_FREQ_SECONDS,
std::make_shared<Aws::STS::STSClient>(credentials,
/* endpointProvider */ Aws::MakeShared<Aws::STS::STSEndpointProvider>(Aws::STS::STSClient::ALLOCATION_TAG),
/* clientConfiguration */ sts_client_config)
)
);
}
return;
}

Expand Down
3 changes: 3 additions & 0 deletions src/IO/S3/Credentials.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ struct CredentialsConfiguration
bool use_insecure_imds_request = false;
uint64_t expiration_window_seconds = DEFAULT_EXPIRATION_WINDOW_SECONDS;
bool no_sign_request = false;
String role_arn = ""; // NOLINT(*-redundant-string-init)
String role_session_name = ""; // NOLINT(*-redundant-string-init)
String sts_endpoint_override = ""; // NOLINT(*-redundant-string-init)
};

class S3CredentialsProviderChain : public Aws::Auth::AWSCredentialsProviderChain
Expand Down
5 changes: 4 additions & 1 deletion src/IO/S3AuthSettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ namespace ErrorCodes
DECLARE(String, secret_access_key, "", "", 0) \
DECLARE(String, session_token, "", "", 0) \
DECLARE(String, region, "", "", 0) \
DECLARE(String, server_side_encryption_customer_key_base64, "", "", 0)
DECLARE(String, server_side_encryption_customer_key_base64, "", "", 0) \
DECLARE(String, role_arn, "", "", 0) \
DECLARE(String, role_session_name, "", "", 0) \
DECLARE(String, sts_endpoint_override, "", "", 0)

#define CLIENT_SETTINGS_LIST(M, ALIAS) \
CLIENT_SETTINGS(M, ALIAS) \
Expand Down
103 changes: 103 additions & 0 deletions src/Storages/ObjectStorage/S3/Configuration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <Disks/ObjectStorages/S3/S3ObjectStorage.h>
#include <Disks/ObjectStorages/S3/diskSettings.h>

#include <Interpreters/evaluateConstantExpression.h>

#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
Expand Down Expand Up @@ -45,6 +47,9 @@ namespace S3AuthSetting
extern const S3AuthSettingsString secret_access_key;
extern const S3AuthSettingsString session_token;
extern const S3AuthSettingsBool use_environment_credentials;
extern const S3AuthSettingsString role_arn;
extern const S3AuthSettingsString role_session_name;
extern const S3AuthSettingsString sts_endpoint_override;
}

namespace ErrorCodes
Expand Down Expand Up @@ -109,6 +114,7 @@ StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & ot
url = other.url;
static_configuration = other.static_configuration;
headers_from_ast = other.headers_from_ast;
extra_credentials_from_ast = other.extra_credentials_from_ast;
keys = other.keys;
}

Expand Down Expand Up @@ -190,8 +196,66 @@ void StorageS3Configuration::fromNamedCollection(const NamedCollection & collect
keys = {url.key};
}

void StorageS3Configuration::extractExtraCreds(ASTs & args, ContextPtr context)
{
ASTs::iterator extra_creds_it = args.end();

for (auto * arg_it = args.begin(); arg_it != args.end(); ++arg_it)
{
const auto * extra_creds_ast_function = (*arg_it)->as<ASTFunction>();
if (extra_creds_ast_function && extra_creds_ast_function->name == "extra_credentials")
{
if (extra_creds_it != args.end())
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"S3 table function can have only one extra_credentials argument");

const auto * extra_creds_function_args_expr = assert_cast<const ASTExpressionList *>(extra_creds_ast_function->arguments.get());
auto extra_creds_function_args = extra_creds_function_args_expr->children;

for (auto & extra_cred_arg : extra_creds_function_args)
{
const auto * extra_cred_ast = extra_cred_arg->as<ASTFunction>();
if (!extra_cred_ast || extra_cred_ast->name != "equals")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "extra_credentials argument is incorrect: shall be key=value");

const auto * extra_cred_args_expr = assert_cast<const ASTExpressionList *>(extra_cred_ast->arguments.get());
auto extra_cred_args = extra_cred_args_expr->children;
if (extra_cred_args.size() != 2)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"extra_credentials argument is incorrect: expected 2 arguments, got {}",
extra_cred_args.size());

auto ast_literal = evaluateConstantExpressionOrIdentifierAsLiteral(extra_cred_args[0], context);
auto arg_name_value = ast_literal->as<ASTLiteral>()->value;
if (arg_name_value.getType() != Field::Types::Which::String)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected string as extra_credentials name");
auto arg_name = arg_name_value.safeGet<String>();

ast_literal = evaluateConstantExpressionOrIdentifierAsLiteral(extra_cred_args[1], context);
auto arg_value = ast_literal->as<ASTLiteral>()->value;
if (arg_value.getType() != Field::Types::Which::String)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected string as extra_credentials value");

extra_credentials_from_ast.emplace_back(arg_name, arg_value.safeGet<String>());
}

extra_creds_it = arg_it;
continue;
}
}

/// To avoid making unnecessary changes and avoid potential conflicts in future,
/// simply remove the "extra" argument after processing if it exists.
if (extra_creds_it != args.end())
args.erase(extra_creds_it);
}

void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_structure)
{
extractExtraCreds(args, context);

size_t count = StorageURL::evalArgsAndCollectHeaders(args, headers_from_ast, context);

if (count == 0 || count > getMaxNumberOfArguments(with_structure))
Expand Down Expand Up @@ -389,6 +453,23 @@ void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_
if (no_sign_request)
auth_settings[S3AuthSetting::no_sign_request] = no_sign_request;

if (!extra_credentials_from_ast.empty())
{
auto extract_extra_cred_value = [&extra_creds = this->extra_credentials_from_ast](const String & cred_name) -> String
{
auto role_arn_it = std::find_if(extra_creds.begin(), extra_creds.end(),
[&cred_name](const HTTPHeaderEntry & entry) { return entry.name == cred_name; });
if (role_arn_it != extra_creds.end())
return role_arn_it->value;

return {};
};

auth_settings[S3AuthSetting::role_arn] = extract_extra_cred_value("role_arn");
auth_settings[S3AuthSetting::role_session_name] = extract_extra_cred_value("role_session_name");
auth_settings[S3AuthSetting::sts_endpoint_override] = extract_extra_cred_value("sts_endpoint_override");
}

static_configuration = !auth_settings[S3AuthSetting::access_key_id].value.empty() || auth_settings[S3AuthSetting::no_sign_request].changed;
auth_settings[S3AuthSetting::no_sign_request] = no_sign_request;

Expand Down Expand Up @@ -605,6 +686,28 @@ ASTPtr StorageS3Configuration::createArgsWithAccessData() const
arguments->children.push_back(std::make_shared<ASTLiteral>(format));
if (!compression_method.empty())
arguments->children.push_back(std::make_shared<ASTLiteral>(compression_method));

if (!auth_settings[S3AuthSetting::role_arn].value.empty())
{
auto extra_creds_ast_function = std::make_shared<ASTFunction>();
extra_creds_ast_function->name = "extra_credentials";

auto role_arn_ast = std::make_shared<ASTFunction>();
role_arn_ast->name = "equals";
role_arn_ast->children.push_back(std::make_shared<ASTLiteral>("role_arn"));
role_arn_ast->children.push_back(std::make_shared<ASTLiteral>(auth_settings[S3AuthSetting::role_arn].value));

extra_creds_ast_function->children.push_back(role_arn_ast);

auto role_session_name_ast = std::make_shared<ASTFunction>();
role_session_name_ast->name = "equals";
role_session_name_ast->children.push_back(std::make_shared<ASTLiteral>("role_session_name"));
role_session_name_ast->children.push_back(std::make_shared<ASTLiteral>(auth_settings[S3AuthSetting::role_session_name].value));

extra_creds_ast_function->children.push_back(role_session_name_ast);

arguments->children.push_back(extra_creds_ast_function);
}
}

return arguments;
Expand Down
Loading
Loading