diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py
index 646b952a7e00..737a137d863a 100755
--- a/tests/ci/integration_tests_runner.py
+++ b/tests/ci/integration_tests_runner.py
@@ -230,7 +230,7 @@ def _pre_pull_images(self):
cmd = (
f"cd {self.repo_path}/tests/integration && "
- f"timeout --verbose --signal=KILL 2h ./runner {self._get_runner_opts()} {image_cmd} "
+ f"timeout --verbose --signal=KILL 3h ./runner {self._get_runner_opts()} {image_cmd} "
"--pre-pull --command ' echo Pre Pull finished ' "
)
@@ -364,7 +364,7 @@ def all_tests(self) -> List[str]:
report_file = "runner_get_all_tests.jsonl"
cmd = (
f"cd {self.repo_path}/tests/integration && "
- f"timeout --verbose --signal=KILL 2h ./runner {runner_opts} {image_cmd} -- "
+ f"timeout --verbose --signal=KILL 3h ./runner {runner_opts} {image_cmd} -- "
f"--setup-plan --report-log={report_file}"
)
diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py
index 0a7061b0c1a5..4802e86ed17b 100644
--- a/tests/integration/test_mask_sensitive_info/test.py
+++ b/tests/integration/test_mask_sensitive_info/test.py
@@ -273,11 +273,6 @@ def test_create_table():
f"IcebergS3('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
"DNS_ERROR",
),
- (
- f"Iceberg(storage_type='s3', 'http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
- "DNS_ERROR",
- ),
-
f"AzureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')",
f"AzureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')",
f"AzureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_2.csv', '{azure_account_name}', '{azure_account_key}')",
diff --git a/tests/integration/test_parquet_drop_metadata_cache/__init__.py b/tests/integration/test_parquet_drop_metadata_cache/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tests/integration/test_parquet_drop_metadata_cache/configs/config.d/cluster.xml b/tests/integration/test_parquet_drop_metadata_cache/configs/config.d/cluster.xml
deleted file mode 100644
index 1388c0788fc0..000000000000
--- a/tests/integration/test_parquet_drop_metadata_cache/configs/config.d/cluster.xml
+++ /dev/null
@@ -1,20 +0,0 @@
-
-
-
-
-
- node1
- 9000
-
-
- node2
- 9000
-
-
- node3
- 9000
-
-
-
-
-
\ No newline at end of file
diff --git a/tests/integration/test_parquet_drop_metadata_cache/test.py b/tests/integration/test_parquet_drop_metadata_cache/test.py
deleted file mode 100644
index 5e0f2b816fce..000000000000
--- a/tests/integration/test_parquet_drop_metadata_cache/test.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import pytest
-from helpers.cluster import ClickHouseCluster
-from helpers.config_cluster import minio_access_key
-from helpers.config_cluster import minio_secret_key
-import time
-
-cluster = ClickHouseCluster(__file__)
-node1 = cluster.add_instance("node1", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True, with_minio=True)
-node2 = cluster.add_instance("node2", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True)
-node3 = cluster.add_instance("node3", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True)
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
- try:
- cluster.start()
- yield cluster
- finally:
- cluster.shutdown()
-
-
-def test_clear_cache_on_cluster(started_cluster):
- node1.query(f"INSERT INTO TABLE FUNCTION s3('http://minio1:9001/root/data/test_clear_cache/{{_partition_id}}.parquet', 'minio', '{minio_secret_key}', 'Parquet') PARTITION BY number SELECT number FROM numbers(1, 3)")
-
- node1.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/1.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cold_cache'")
- node2.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/2.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cold_cache'")
- node3.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/3.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cold_cache'")
-
- node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster")
-
- cold_cache_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
- cold_cache_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
- cold_cache_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
-
- assert(cold_cache_result_n1 == cold_cache_result_n2 == cold_cache_result_n3)
- assert(cold_cache_result_n1 == '0\n')
-
- node1.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/1.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='hot_cache'")
- node2.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/2.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='hot_cache'")
- node3.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/3.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='hot_cache'")
-
- node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster")
-
- warm_cache_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'hot_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
- warm_cache_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'hot_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
- warm_cache_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'hot_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
-
- assert(warm_cache_result_n1 == warm_cache_result_n2 == warm_cache_result_n3)
- assert(warm_cache_result_n1 == '1\n')
-
- node1.query("SYSTEM DROP PARQUET METADATA CACHE ON CLUSTER parquet_clear_cache_cluster")
-
- node1.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/1.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cache_after_drop'")
- node2.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/2.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cache_after_drop'")
- node3.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/3.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cache_after_drop'")
-
- node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster")
-
- cache_after_drop_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
- cache_after_drop_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
- cache_after_drop_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
-
- assert(cache_after_drop_result_n1 == cache_after_drop_result_n2 == cache_after_drop_result_n3)
- assert(cache_after_drop_result_n1 == '0\n')
-
- misses_after_drop_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
- misses_after_drop_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
- misses_after_drop_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;")
-
- assert(misses_after_drop_result_n1 == misses_after_drop_result_n2 == misses_after_drop_result_n3)
- assert(misses_after_drop_result_n1 == '1\n')
diff --git a/tests/integration/test_s3_assume_role/__init__.py b/tests/integration/test_s3_assume_role/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tests/integration/test_s3_assume_role/mocks/mock_sts.py b/tests/integration/test_s3_assume_role/mocks/mock_sts.py
deleted file mode 100644
index 7329713cdcef..000000000000
--- a/tests/integration/test_s3_assume_role/mocks/mock_sts.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import sys
-
-from bottle import response, route, run
-
-
-MOCK_XML_RESPONSE = """
-
-
- minio
- ClickHouse_Minio_P@ssw0rd
- 2055-12-31T23:59:59Z
-
-
-"""
-
-# MOCK_SESSION_TOKEN is not being returned -- it is not required by minio
-# When "real" STS returns it -- it is also used to read from s3
-
-@route("/", method="POST")
-def return_creds():
- response.status = 200
- response.content_type = "application/xml"
- return MOCK_XML_RESPONSE
-
-
-@route("/", method="GET")
-def ping():
- return "OK"
-
-
-run(host="0.0.0.0", port=int(sys.argv[1]))
\ No newline at end of file
diff --git a/tests/integration/test_s3_assume_role/test.py b/tests/integration/test_s3_assume_role/test.py
deleted file mode 100644
index e46c86af61d6..000000000000
--- a/tests/integration/test_s3_assume_role/test.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import logging
-import os
-
-import pytest
-
-from helpers.cluster import ClickHouseCluster
-from helpers.mock_servers import start_mock_servers
-
-SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
-
-
-def run_sts_mock(started_cluster):
- script_dir = os.path.join(os.path.dirname(__file__), "mocks")
- start_mock_servers(
- started_cluster,
- script_dir,
- [
- ("mock_sts.py", "resolver", "8081"),
- ],
- )
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
- cluster = ClickHouseCluster(__file__)
- try:
- cluster.add_instance(
- "node1",
- with_minio=True,
- )
-
- logging.info("Starting cluster...")
- cluster.start()
-
- run_sts_mock(cluster)
- yield cluster
-
- finally:
- cluster.shutdown()
-
-
-def test_using_assumed_creds(started_cluster):
- instance = started_cluster.instances["node1"]
-
- # Create some file in non public-accessible minio
- instance.query(
- """
- INSERT INTO FUNCTION s3
- (
- 'http://minio1:9001/root/test_assume.csv', 'minio', 'ClickHouse_Minio_P@ssw0rd', 'CSVWithNames'
- )
- SELECT number as num, toString(number) as strnum FROM numbers(5);
- """
- )
-
- # Read them using credentials received from our fake STS
- r = instance.query(
- """
- SELECT count() FROM s3
- ('http://minio1:9001/root/test_assume.csv',
- SOME_FAKE_ID, SOME_FAKE_SECRET, 'CSVWithNames',
- extra_credentials(
- role_arn = 'arn:aws:iam::111111111111:role/BucketAccessRole-001',
- sts_endpoint_override = 'http://resolver:8081'
- )
- )
- """
- )
-
- assert r == "5\n"
-
-
diff --git a/tests/integration/test_s3_cache_locality/__init__.py b/tests/integration/test_s3_cache_locality/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tests/integration/test_s3_cache_locality/configs/cluster.xml b/tests/integration/test_s3_cache_locality/configs/cluster.xml
deleted file mode 100644
index db54c35374b9..000000000000
--- a/tests/integration/test_s3_cache_locality/configs/cluster.xml
+++ /dev/null
@@ -1,126 +0,0 @@
-
-
-
-
-
-
-
- clickhouse1
- 9000
-
-
- clickhouse2
- 9000
-
-
- clickhouse3
- 9000
-
-
- clickhouse4
- 9000
-
-
- clickhouse5
- 9000
-
-
-
-
-
-
-
- clickhouse1
- 9000
-
-
- clickhouse2
- 9000
-
-
- clickhouse3
- 9000
-
-
- clickhouse4
- 9000
-
-
-
-
-
-
-
- clickhouse2
- 9000
-
-
- clickhouse3
- 9000
-
-
- clickhouse4
- 9000
-
-
- clickhouse5
- 9000
-
-
-
-
-
-
-
- clickhouse3
- 9000
-
-
- clickhouse4
- 9000
-
-
- clickhouse5
- 9000
-
-
- clickhouse1
- 9000
-
-
- clickhouse2
- 9000
-
-
-
-
-
-
-
- clickhouse4
- 9000
-
-
- clickhouse5
- 9000
-
-
- clickhouse2
- 9000
-
-
- clickhouse3
- 9000
-
-
-
-
-
-
-
-
- /var/lib/clickhouse/raw_s3_cache
- 10Gi
-
-
-
diff --git a/tests/integration/test_s3_cache_locality/configs/named_collections.xml b/tests/integration/test_s3_cache_locality/configs/named_collections.xml
deleted file mode 100644
index 511078d6f0d9..000000000000
--- a/tests/integration/test_s3_cache_locality/configs/named_collections.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
- http://minio1:9001/root/data/*
- minio
- minio123
- CSV>
-
-
-
diff --git a/tests/integration/test_s3_cache_locality/configs/users.xml b/tests/integration/test_s3_cache_locality/configs/users.xml
deleted file mode 100644
index 4b6ba057ecb1..000000000000
--- a/tests/integration/test_s3_cache_locality/configs/users.xml
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
-
- default
- 1
-
-
-
diff --git a/tests/integration/test_s3_cache_locality/test.py b/tests/integration/test_s3_cache_locality/test.py
deleted file mode 100644
index a2020d7e0568..000000000000
--- a/tests/integration/test_s3_cache_locality/test.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import csv
-import logging
-import os
-import shutil
-import uuid
-
-import pytest
-
-from helpers.cluster import ClickHouseCluster
-
-logging.getLogger().setLevel(logging.INFO)
-logging.getLogger().addHandler(logging.StreamHandler())
-
-SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
-
-
-def create_buckets_s3(cluster):
- minio = cluster.minio_client
-
- s3_data = []
-
- for file_number in range(1000):
- file_name = f"data/generated/file_{file_number}.csv"
- os.makedirs(os.path.join(SCRIPT_DIR, "data/generated/"), exist_ok=True)
- s3_data.append(file_name)
- with open(os.path.join(SCRIPT_DIR, file_name), "w+", encoding="utf-8") as f:
- # a String, b UInt64
- data = []
-
- # Make all files a bit different
- data.append(
- ["str_" + str(file_number), file_number]
- )
-
- writer = csv.writer(f)
- writer.writerows(data)
-
- for file in s3_data:
- minio.fput_object(
- bucket_name=cluster.minio_bucket,
- object_name=file,
- file_path=os.path.join(SCRIPT_DIR, file),
- )
-
- for obj in minio.list_objects(cluster.minio_bucket, recursive=True):
- print(obj.object_name)
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
- try:
- cluster = ClickHouseCluster(__file__)
- # clickhouse0 not a member of cluster_XXX
- for i in range(6):
- cluster.add_instance(
- f"clickhouse{i}",
- main_configs=["configs/cluster.xml", "configs/named_collections.xml"],
- user_configs=["configs/users.xml"],
- macros={"replica": f"clickhouse{i}"},
- with_minio=True,
- with_zookeeper=True,
- )
-
- logging.info("Starting cluster...")
- cluster.start()
- logging.info("Cluster started")
-
- create_buckets_s3(cluster)
-
- yield cluster
- finally:
- shutil.rmtree(os.path.join(SCRIPT_DIR, "data/generated/"))
- cluster.shutdown()
-
-
-def check_s3_gets(cluster, node, expected_result, cluster_first, cluster_second, enable_filesystem_cache):
- for host in list(cluster.instances.values()):
- host.query("SYSTEM DROP FILESYSTEM CACHE 'raw_s3_cache'", timeout=30)
-
- query_id_first = str(uuid.uuid4())
- result_first = node.query(
- f"""
- SELECT count(*)
- FROM s3Cluster('{cluster_first}', 'http://minio1:9001/root/data/generated/*', 'minio', 'minio123', 'CSV', 'a String, b UInt64')
- WHERE b=42
- SETTINGS
- enable_filesystem_cache={enable_filesystem_cache},
- filesystem_cache_name='raw_s3_cache'
- """,
- query_id=query_id_first,
- timeout=30,
- )
- assert result_first == expected_result
- query_id_second = str(uuid.uuid4())
- result_second = node.query(
- f"""
- SELECT count(*)
- FROM s3Cluster('{cluster_second}', 'http://minio1:9001/root/data/generated/*', 'minio', 'minio123', 'CSV', 'a String, b UInt64')
- WHERE b=42
- SETTINGS
- enable_filesystem_cache={enable_filesystem_cache},
- filesystem_cache_name='raw_s3_cache'
- """,
- query_id=query_id_second,
- timeout=30,
- )
- assert result_second == expected_result
-
- node.query("SYSTEM FLUSH LOGS", timeout=30)
- node.query(f"SYSTEM FLUSH LOGS ON CLUSTER {cluster_first}", timeout=30)
- node.query(f"SYSTEM FLUSH LOGS ON CLUSTER {cluster_second}", timeout=30)
-
- s3_get_first = node.query(
- f"""
- SELECT sum(ProfileEvents['S3GetObject'])
- FROM clusterAllReplicas('{cluster_first}', system.query_log)
- WHERE type='QueryFinish'
- AND initial_query_id='{query_id_first}'
- """,
- timeout=30,
- )
- s3_get_second = node.query(
- f"""
- SELECT sum(ProfileEvents['S3GetObject'])
- FROM clusterAllReplicas('{cluster_second}', system.query_log)
- WHERE type='QueryFinish'
- AND initial_query_id='{query_id_second}'
- """,
- timeout=30,
- )
-
- return int(s3_get_first), int(s3_get_second)
-
-
-def check_s3_gets_repeat(cluster, node, expected_result, cluster_first, cluster_second, enable_filesystem_cache):
- # Repeat test several times to get average result
- iterations = 10
- s3_get_first_sum = 0
- s3_get_second_sum = 0
- for _ in range(iterations):
- (s3_get_first, s3_get_second) = check_s3_gets(cluster, node, expected_result, cluster_first, cluster_second, enable_filesystem_cache)
- s3_get_first_sum += s3_get_first
- s3_get_second_sum += s3_get_second
- return s3_get_first_sum, s3_get_second_sum
-
-
-def test_cache_locality(started_cluster):
- node = started_cluster.instances["clickhouse0"]
-
- expected_result = node.query(
- """
- SELECT count(*)
- FROM s3('http://minio1:9001/root/data/generated/*', 'minio', 'minio123', 'CSV', 'a String, b UInt64')
- WHERE b=42
- """
- )
-
- # Algorithm does not give 100% guarantee, so add 10% on dispersion
- dispersion = 0.1
-
- # No cache
- (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_12345', 0)
- assert s3_get_second == s3_get_first
-
- # With cache
- (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_12345', 1)
- assert s3_get_second <= s3_get_first * dispersion
-
- # Different nodes order
- (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_34512', 1)
- assert s3_get_second <= s3_get_first * dispersion
-
- # No last node
- (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_1234', 1)
- assert s3_get_second <= s3_get_first * (0.2 + dispersion)
-
- # No first node
- (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_2345', 1)
- assert s3_get_second <= s3_get_first * (0.2 + dispersion)
-
- # No first node, different nodes order
- (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_4523', 1)
- assert s3_get_second <= s3_get_first * (0.2 + dispersion)
-
- # Add new node, different nodes order
- (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_4523', 'cluster_12345', 1)
- assert s3_get_second <= s3_get_first * (0.2 + dispersion)
-
- # New node and old node, different nodes order
- (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_1234', 'cluster_4523', 1)
- assert s3_get_second <= s3_get_first * (0.4375 + dispersion)