Skip to content

Commit 3d9e365

Browse files
authored
Merge branch 'antalya-25.6.5' into s3_hive_style_reads_and_writes_25_6_5
2 parents 98f8cfe + f0b3cd9 commit 3d9e365

21 files changed

+311
-10
lines changed

programs/server/Server.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@
156156
# include <azure/core/diagnostics/logger.hpp>
157157
#endif
158158

159+
#if USE_PARQUET
160+
# include <Processors/Formats/Impl/ParquetFileMetaDataCache.h>
161+
#endif
162+
159163

160164
#include <incbin.h>
161165
/// A minimal file used when the server is run without installation
@@ -326,6 +330,7 @@ namespace ServerSetting
326330
extern const ServerSettingsUInt64 os_cpu_busy_time_threshold;
327331
extern const ServerSettingsFloat min_os_cpu_wait_time_ratio_to_drop_connection;
328332
extern const ServerSettingsFloat max_os_cpu_wait_time_ratio_to_drop_connection;
333+
extern const ServerSettingsUInt64 input_format_parquet_metadata_cache_max_size;
329334
}
330335

331336
namespace ErrorCodes
@@ -2423,6 +2428,10 @@ try
24232428

24242429
auto replicas_reconnector = ReplicasReconnector::init(global_context);
24252430

2431+
#if USE_PARQUET
2432+
ParquetFileMetaDataCache::instance()->setMaxSizeInBytes(server_settings[ServerSetting::input_format_parquet_metadata_cache_max_size]);
2433+
#endif
2434+
24262435
/// Set current database name before loading tables and databases because
24272436
/// system logs may copy global context.
24282437
std::string default_database = server_settings[ServerSetting::default_database];

src/Access/Common/AccessType.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ enum class AccessType : uint8_t
184184
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
185185
M(SYSTEM_DROP_FORMAT_SCHEMA_CACHE, "SYSTEM DROP FORMAT SCHEMA CACHE, DROP FORMAT SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
186186
M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
187+
M(SYSTEM_DROP_PARQUET_METADATA_CACHE, "SYSTEM DROP PARQUET METADATA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
187188
M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \
188189
M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \
189190
M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \

src/Common/ProfileEvents.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,8 @@ The server successfully detected this situation and will download merged part fr
10411041
M(IndexGenericExclusionSearchAlgorithm, "Number of times the generic exclusion search algorithm is used over the index marks", ValueType::Number) \
10421042
M(ParallelReplicasQueryCount, "Number of (sub)queries executed using parallel replicas during a query execution", ValueType::Number) \
10431043
M(DistributedConnectionReconnectCount, "Number of reconnects to other servers done during distributed query execution. It can happen when a stale connection has been acquired from connection pool", ValueType::Number) \
1044-
1044+
M(ParquetMetaDataCacheHits, "Number of times the read from filesystem cache hit the cache.", ValueType::Number) \
1045+
M(ParquetMetaDataCacheMisses, "Number of times the read from filesystem cache miss the cache.", ValueType::Number) \
10451046

10461047
#ifdef APPLY_FOR_EXTERNAL_EVENTS
10471048
#define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)

src/Core/FormatFactorySettings.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,8 +1348,7 @@ Limits the size of the blocks formed during data parsing in input formats in byt
13481348
DECLARE(Bool, input_format_parquet_allow_geoparquet_parser, true, R"(
13491349
Use geo column parser to convert Array(UInt8) into Point/Linestring/Polygon/MultiLineString/MultiPolygon types
13501350
)", 0) \
1351-
1352-
1351+
DECLARE(Bool, input_format_parquet_use_metadata_cache, true, R"(Enable parquet file metadata caching)", 0) \
13531352
// End of FORMAT_FACTORY_SETTINGS
13541353

13551354
#define OBSOLETE_FORMAT_SETTINGS(M, ALIAS) \

src/Core/ServerSettings.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,8 +1064,7 @@ The policy on how to perform a scheduling of CPU slots specified by `concurrent_
10641064
See [Controlling behavior on server CPU overload](/operations/settings/server-overload) for more details.
10651065
)", 0) \
10661066
DECLARE(Float, distributed_cache_keep_up_free_connections_ratio, 0.1f, "Soft limit for number of active connection distributed cache will try to keep free. After the number of free connections goes below distributed_cache_keep_up_free_connections_ratio * max_connections, connections with oldest activity will be closed until the number goes above the limit.", 0) \
1067-
1068-
1067+
DECLARE(UInt64, input_format_parquet_metadata_cache_max_size, 500000000, "Maximum size of parquet file metadata cache", 0) \
10691068
// clang-format on
10701069

10711070
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in dumpToSystemServerSettingsColumns below

src/Core/SettingsChangesHistory.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,11 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
182182
{"parallel_hash_join_threshold", 0, 0, "New setting"},
183183
/// Release closed. Please use 25.4
184184
});
185+
addSettingsChanges(settings_changes_history, "24.12.2.20000",
186+
{
187+
// Altinity Antalya modifications atop of 24.12
188+
{"input_format_parquet_use_metadata_cache", true, true, "New setting, turned ON by default"}, // https://github.com/Altinity/ClickHouse/pull/586
189+
});
185190
addSettingsChanges(settings_changes_history, "25.2",
186191
{
187192
/// Release closed. Please use 25.3

src/Interpreters/InterpreterSystemQuery.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@
7979
#include <Formats/ProtobufSchemas.h>
8080
#endif
8181

82+
#if USE_PARQUET
83+
#include <Processors/Formats/Impl/ParquetFileMetaDataCache.h>
84+
#endif
85+
8286
#if USE_AWS_S3
8387
#include <IO/S3/Client.h>
8488
#endif
@@ -433,6 +437,16 @@ BlockIO InterpreterSystemQuery::execute()
433437
getContext()->clearQueryResultCache(query.query_result_cache_tag);
434438
break;
435439
}
440+
case Type::DROP_PARQUET_METADATA_CACHE:
441+
{
442+
#if USE_PARQUET
443+
getContext()->checkAccess(AccessType::SYSTEM_DROP_PARQUET_METADATA_CACHE);
444+
ParquetFileMetaDataCache::instance()->clear();
445+
break;
446+
#else
447+
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The server was compiled without the support for Parquet");
448+
#endif
449+
}
436450
case Type::DROP_COMPILED_EXPRESSION_CACHE:
437451
#if USE_EMBEDDED_COMPILER
438452
getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE);
@@ -1533,6 +1547,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
15331547
case Type::DROP_PAGE_CACHE:
15341548
case Type::DROP_SCHEMA_CACHE:
15351549
case Type::DROP_FORMAT_SCHEMA_CACHE:
1550+
case Type::DROP_PARQUET_METADATA_CACHE:
15361551
case Type::DROP_S3_CLIENT_CACHE:
15371552
{
15381553
required_access.emplace_back(AccessType::SYSTEM_DROP_CACHE);

src/Parsers/ASTSystemQuery.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,7 @@ void ASTSystemQuery::formatImpl(WriteBuffer & ostr, const FormatSettings & setti
467467
case Type::DROP_COMPILED_EXPRESSION_CACHE:
468468
case Type::DROP_S3_CLIENT_CACHE:
469469
case Type::DROP_ICEBERG_METADATA_CACHE:
470+
case Type::DROP_PARQUET_METADATA_CACHE:
470471
case Type::RESET_COVERAGE:
471472
case Type::RESTART_REPLICAS:
472473
case Type::JEMALLOC_PURGE:

src/Parsers/ASTSystemQuery.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster
4242
DROP_SCHEMA_CACHE,
4343
DROP_FORMAT_SCHEMA_CACHE,
4444
DROP_S3_CLIENT_CACHE,
45+
DROP_PARQUET_METADATA_CACHE,
4546
STOP_LISTEN,
4647
START_LISTEN,
4748
RESTART_REPLICAS,

src/Processors/Formats/IInputFormat.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ class IInputFormat : public SourceWithKeyCondition
7070

7171
void needOnlyCount() { need_only_count = true; }
7272

73+
/// Set additional info/key/id related to underlying storage of the ReadBuffer
74+
virtual void setStorageRelatedUniqueKey(const Settings & /*settings*/, const String & /*key*/) {}
75+
7376
protected:
7477
ReadBuffer & getReadBuffer() const { chassert(in); return *in; }
7578

0 commit comments

Comments
 (0)