diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 646b952a7e00..737a137d863a 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -230,7 +230,7 @@ def _pre_pull_images(self): cmd = ( f"cd {self.repo_path}/tests/integration && " - f"timeout --verbose --signal=KILL 2h ./runner {self._get_runner_opts()} {image_cmd} " + f"timeout --verbose --signal=KILL 3h ./runner {self._get_runner_opts()} {image_cmd} " "--pre-pull --command ' echo Pre Pull finished ' " ) @@ -364,7 +364,7 @@ def all_tests(self) -> List[str]: report_file = "runner_get_all_tests.jsonl" cmd = ( f"cd {self.repo_path}/tests/integration && " - f"timeout --verbose --signal=KILL 2h ./runner {runner_opts} {image_cmd} -- " + f"timeout --verbose --signal=KILL 3h ./runner {runner_opts} {image_cmd} -- " f"--setup-plan --report-log={report_file}" ) diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 0a7061b0c1a5..4802e86ed17b 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -273,11 +273,6 @@ def test_create_table(): f"IcebergS3('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", "DNS_ERROR", ), - ( - f"Iceberg(storage_type='s3', 'http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", - "DNS_ERROR", - ), - f"AzureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')", f"AzureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')", f"AzureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_2.csv', '{azure_account_name}', '{azure_account_key}')", diff --git a/tests/integration/test_parquet_drop_metadata_cache/__init__.py b/tests/integration/test_parquet_drop_metadata_cache/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/integration/test_parquet_drop_metadata_cache/configs/config.d/cluster.xml b/tests/integration/test_parquet_drop_metadata_cache/configs/config.d/cluster.xml deleted file mode 100644 index 1388c0788fc0..000000000000 --- a/tests/integration/test_parquet_drop_metadata_cache/configs/config.d/cluster.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - node1 - 9000 - - - node2 - 9000 - - - node3 - 9000 - - - - - \ No newline at end of file diff --git a/tests/integration/test_parquet_drop_metadata_cache/test.py b/tests/integration/test_parquet_drop_metadata_cache/test.py deleted file mode 100644 index 5e0f2b816fce..000000000000 --- a/tests/integration/test_parquet_drop_metadata_cache/test.py +++ /dev/null @@ -1,71 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster -from helpers.config_cluster import minio_access_key -from helpers.config_cluster import minio_secret_key -import time - -cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True, with_minio=True) -node2 = cluster.add_instance("node2", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True) -node3 = cluster.add_instance("node3", main_configs=["configs/config.d/cluster.xml"], with_zookeeper=True) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def test_clear_cache_on_cluster(started_cluster): - node1.query(f"INSERT INTO TABLE FUNCTION s3('http://minio1:9001/root/data/test_clear_cache/{{_partition_id}}.parquet', 'minio', '{minio_secret_key}', 'Parquet') PARTITION BY number SELECT number FROM numbers(1, 3)") - - node1.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/1.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cold_cache'") - node2.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/2.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cold_cache'") - node3.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/3.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cold_cache'") - - node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") - - cold_cache_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - cold_cache_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - cold_cache_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cold_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - - assert(cold_cache_result_n1 == cold_cache_result_n2 == cold_cache_result_n3) - assert(cold_cache_result_n1 == '0\n') - - node1.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/1.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='hot_cache'") - node2.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/2.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='hot_cache'") - node3.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/3.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='hot_cache'") - - node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") - - warm_cache_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'hot_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - warm_cache_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'hot_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - warm_cache_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'hot_cache' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - - assert(warm_cache_result_n1 == warm_cache_result_n2 == warm_cache_result_n3) - assert(warm_cache_result_n1 == '1\n') - - node1.query("SYSTEM DROP PARQUET METADATA CACHE ON CLUSTER parquet_clear_cache_cluster") - - node1.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/1.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cache_after_drop'") - node2.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/2.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cache_after_drop'") - node3.query(f"SELECT * FROM s3('http://minio1:9001/root/data/test_clear_cache/3.parquet', 'minio', '{minio_secret_key}', 'Parquet') SETTINGS log_comment='cache_after_drop'") - - node1.query("SYSTEM FLUSH LOGS ON CLUSTER parquet_clear_cache_cluster") - - cache_after_drop_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - cache_after_drop_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - cache_after_drop_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheHits'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - - assert(cache_after_drop_result_n1 == cache_after_drop_result_n2 == cache_after_drop_result_n3) - assert(cache_after_drop_result_n1 == '0\n') - - misses_after_drop_result_n1 = node1.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - misses_after_drop_result_n2 = node2.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - misses_after_drop_result_n3 = node3.query("SELECT ProfileEvents['ParquetMetaDataCacheMisses'] FROM system.query_log where log_comment = 'cache_after_drop' AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1;") - - assert(misses_after_drop_result_n1 == misses_after_drop_result_n2 == misses_after_drop_result_n3) - assert(misses_after_drop_result_n1 == '1\n') diff --git a/tests/integration/test_s3_assume_role/__init__.py b/tests/integration/test_s3_assume_role/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/integration/test_s3_assume_role/mocks/mock_sts.py b/tests/integration/test_s3_assume_role/mocks/mock_sts.py deleted file mode 100644 index 7329713cdcef..000000000000 --- a/tests/integration/test_s3_assume_role/mocks/mock_sts.py +++ /dev/null @@ -1,31 +0,0 @@ -import sys - -from bottle import response, route, run - - -MOCK_XML_RESPONSE = """ - - - minio - ClickHouse_Minio_P@ssw0rd - 2055-12-31T23:59:59Z - - -""" - -# MOCK_SESSION_TOKEN is not being returned -- it is not required by minio -# When "real" STS returns it -- it is also used to read from s3 - -@route("/", method="POST") -def return_creds(): - response.status = 200 - response.content_type = "application/xml" - return MOCK_XML_RESPONSE - - -@route("/", method="GET") -def ping(): - return "OK" - - -run(host="0.0.0.0", port=int(sys.argv[1])) \ No newline at end of file diff --git a/tests/integration/test_s3_assume_role/test.py b/tests/integration/test_s3_assume_role/test.py deleted file mode 100644 index e46c86af61d6..000000000000 --- a/tests/integration/test_s3_assume_role/test.py +++ /dev/null @@ -1,72 +0,0 @@ -import logging -import os - -import pytest - -from helpers.cluster import ClickHouseCluster -from helpers.mock_servers import start_mock_servers - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - - -def run_sts_mock(started_cluster): - script_dir = os.path.join(os.path.dirname(__file__), "mocks") - start_mock_servers( - started_cluster, - script_dir, - [ - ("mock_sts.py", "resolver", "8081"), - ], - ) - - -@pytest.fixture(scope="module") -def started_cluster(): - cluster = ClickHouseCluster(__file__) - try: - cluster.add_instance( - "node1", - with_minio=True, - ) - - logging.info("Starting cluster...") - cluster.start() - - run_sts_mock(cluster) - yield cluster - - finally: - cluster.shutdown() - - -def test_using_assumed_creds(started_cluster): - instance = started_cluster.instances["node1"] - - # Create some file in non public-accessible minio - instance.query( - """ - INSERT INTO FUNCTION s3 - ( - 'http://minio1:9001/root/test_assume.csv', 'minio', 'ClickHouse_Minio_P@ssw0rd', 'CSVWithNames' - ) - SELECT number as num, toString(number) as strnum FROM numbers(5); - """ - ) - - # Read them using credentials received from our fake STS - r = instance.query( - """ - SELECT count() FROM s3 - ('http://minio1:9001/root/test_assume.csv', - SOME_FAKE_ID, SOME_FAKE_SECRET, 'CSVWithNames', - extra_credentials( - role_arn = 'arn:aws:iam::111111111111:role/BucketAccessRole-001', - sts_endpoint_override = 'http://resolver:8081' - ) - ) - """ - ) - - assert r == "5\n" - - diff --git a/tests/integration/test_s3_cache_locality/__init__.py b/tests/integration/test_s3_cache_locality/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/integration/test_s3_cache_locality/configs/cluster.xml b/tests/integration/test_s3_cache_locality/configs/cluster.xml deleted file mode 100644 index db54c35374b9..000000000000 --- a/tests/integration/test_s3_cache_locality/configs/cluster.xml +++ /dev/null @@ -1,126 +0,0 @@ - - - - - - - - clickhouse1 - 9000 - - - clickhouse2 - 9000 - - - clickhouse3 - 9000 - - - clickhouse4 - 9000 - - - clickhouse5 - 9000 - - - - - - - - clickhouse1 - 9000 - - - clickhouse2 - 9000 - - - clickhouse3 - 9000 - - - clickhouse4 - 9000 - - - - - - - - clickhouse2 - 9000 - - - clickhouse3 - 9000 - - - clickhouse4 - 9000 - - - clickhouse5 - 9000 - - - - - - - - clickhouse3 - 9000 - - - clickhouse4 - 9000 - - - clickhouse5 - 9000 - - - clickhouse1 - 9000 - - - clickhouse2 - 9000 - - - - - - - - clickhouse4 - 9000 - - - clickhouse5 - 9000 - - - clickhouse2 - 9000 - - - clickhouse3 - 9000 - - - - - - - - - /var/lib/clickhouse/raw_s3_cache - 10Gi - - - diff --git a/tests/integration/test_s3_cache_locality/configs/named_collections.xml b/tests/integration/test_s3_cache_locality/configs/named_collections.xml deleted file mode 100644 index 511078d6f0d9..000000000000 --- a/tests/integration/test_s3_cache_locality/configs/named_collections.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - http://minio1:9001/root/data/* - minio - minio123 - CSV> - - - diff --git a/tests/integration/test_s3_cache_locality/configs/users.xml b/tests/integration/test_s3_cache_locality/configs/users.xml deleted file mode 100644 index 4b6ba057ecb1..000000000000 --- a/tests/integration/test_s3_cache_locality/configs/users.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - default - 1 - - - diff --git a/tests/integration/test_s3_cache_locality/test.py b/tests/integration/test_s3_cache_locality/test.py deleted file mode 100644 index a2020d7e0568..000000000000 --- a/tests/integration/test_s3_cache_locality/test.py +++ /dev/null @@ -1,191 +0,0 @@ -import csv -import logging -import os -import shutil -import uuid - -import pytest - -from helpers.cluster import ClickHouseCluster - -logging.getLogger().setLevel(logging.INFO) -logging.getLogger().addHandler(logging.StreamHandler()) - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - - -def create_buckets_s3(cluster): - minio = cluster.minio_client - - s3_data = [] - - for file_number in range(1000): - file_name = f"data/generated/file_{file_number}.csv" - os.makedirs(os.path.join(SCRIPT_DIR, "data/generated/"), exist_ok=True) - s3_data.append(file_name) - with open(os.path.join(SCRIPT_DIR, file_name), "w+", encoding="utf-8") as f: - # a String, b UInt64 - data = [] - - # Make all files a bit different - data.append( - ["str_" + str(file_number), file_number] - ) - - writer = csv.writer(f) - writer.writerows(data) - - for file in s3_data: - minio.fput_object( - bucket_name=cluster.minio_bucket, - object_name=file, - file_path=os.path.join(SCRIPT_DIR, file), - ) - - for obj in minio.list_objects(cluster.minio_bucket, recursive=True): - print(obj.object_name) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster = ClickHouseCluster(__file__) - # clickhouse0 not a member of cluster_XXX - for i in range(6): - cluster.add_instance( - f"clickhouse{i}", - main_configs=["configs/cluster.xml", "configs/named_collections.xml"], - user_configs=["configs/users.xml"], - macros={"replica": f"clickhouse{i}"}, - with_minio=True, - with_zookeeper=True, - ) - - logging.info("Starting cluster...") - cluster.start() - logging.info("Cluster started") - - create_buckets_s3(cluster) - - yield cluster - finally: - shutil.rmtree(os.path.join(SCRIPT_DIR, "data/generated/")) - cluster.shutdown() - - -def check_s3_gets(cluster, node, expected_result, cluster_first, cluster_second, enable_filesystem_cache): - for host in list(cluster.instances.values()): - host.query("SYSTEM DROP FILESYSTEM CACHE 'raw_s3_cache'", timeout=30) - - query_id_first = str(uuid.uuid4()) - result_first = node.query( - f""" - SELECT count(*) - FROM s3Cluster('{cluster_first}', 'http://minio1:9001/root/data/generated/*', 'minio', 'minio123', 'CSV', 'a String, b UInt64') - WHERE b=42 - SETTINGS - enable_filesystem_cache={enable_filesystem_cache}, - filesystem_cache_name='raw_s3_cache' - """, - query_id=query_id_first, - timeout=30, - ) - assert result_first == expected_result - query_id_second = str(uuid.uuid4()) - result_second = node.query( - f""" - SELECT count(*) - FROM s3Cluster('{cluster_second}', 'http://minio1:9001/root/data/generated/*', 'minio', 'minio123', 'CSV', 'a String, b UInt64') - WHERE b=42 - SETTINGS - enable_filesystem_cache={enable_filesystem_cache}, - filesystem_cache_name='raw_s3_cache' - """, - query_id=query_id_second, - timeout=30, - ) - assert result_second == expected_result - - node.query("SYSTEM FLUSH LOGS", timeout=30) - node.query(f"SYSTEM FLUSH LOGS ON CLUSTER {cluster_first}", timeout=30) - node.query(f"SYSTEM FLUSH LOGS ON CLUSTER {cluster_second}", timeout=30) - - s3_get_first = node.query( - f""" - SELECT sum(ProfileEvents['S3GetObject']) - FROM clusterAllReplicas('{cluster_first}', system.query_log) - WHERE type='QueryFinish' - AND initial_query_id='{query_id_first}' - """, - timeout=30, - ) - s3_get_second = node.query( - f""" - SELECT sum(ProfileEvents['S3GetObject']) - FROM clusterAllReplicas('{cluster_second}', system.query_log) - WHERE type='QueryFinish' - AND initial_query_id='{query_id_second}' - """, - timeout=30, - ) - - return int(s3_get_first), int(s3_get_second) - - -def check_s3_gets_repeat(cluster, node, expected_result, cluster_first, cluster_second, enable_filesystem_cache): - # Repeat test several times to get average result - iterations = 10 - s3_get_first_sum = 0 - s3_get_second_sum = 0 - for _ in range(iterations): - (s3_get_first, s3_get_second) = check_s3_gets(cluster, node, expected_result, cluster_first, cluster_second, enable_filesystem_cache) - s3_get_first_sum += s3_get_first - s3_get_second_sum += s3_get_second - return s3_get_first_sum, s3_get_second_sum - - -def test_cache_locality(started_cluster): - node = started_cluster.instances["clickhouse0"] - - expected_result = node.query( - """ - SELECT count(*) - FROM s3('http://minio1:9001/root/data/generated/*', 'minio', 'minio123', 'CSV', 'a String, b UInt64') - WHERE b=42 - """ - ) - - # Algorithm does not give 100% guarantee, so add 10% on dispersion - dispersion = 0.1 - - # No cache - (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_12345', 0) - assert s3_get_second == s3_get_first - - # With cache - (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_12345', 1) - assert s3_get_second <= s3_get_first * dispersion - - # Different nodes order - (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_34512', 1) - assert s3_get_second <= s3_get_first * dispersion - - # No last node - (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_1234', 1) - assert s3_get_second <= s3_get_first * (0.2 + dispersion) - - # No first node - (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_2345', 1) - assert s3_get_second <= s3_get_first * (0.2 + dispersion) - - # No first node, different nodes order - (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_12345', 'cluster_4523', 1) - assert s3_get_second <= s3_get_first * (0.2 + dispersion) - - # Add new node, different nodes order - (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_4523', 'cluster_12345', 1) - assert s3_get_second <= s3_get_first * (0.2 + dispersion) - - # New node and old node, different nodes order - (s3_get_first, s3_get_second) = check_s3_gets_repeat(started_cluster, node, expected_result, 'cluster_1234', 'cluster_4523', 1) - assert s3_get_second <= s3_get_first * (0.4375 + dispersion)