From 4e255d4527c7cc817bc07316ca8a1bea730f7b78 Mon Sep 17 00:00:00 2001 From: Stanley Law Date: Wed, 16 Jul 2025 05:39:41 +0800 Subject: [PATCH 1/8] Adds Docker Compose setup for Trino Adds a Docker Compose configuration for setting up Trino with Iceberg, including support for Hive Metastore and REST catalog types. This allows for easier testing and development with Trino and Iceberg. closes #2219 --- dev/docker-compose-integration.yml | 15 ++++ dev/docker-compose-trino.yml | 79 +++++++++++++++++++++ dev/trino/catalog/warehouse_hive.properties | 13 ++++ dev/trino/catalog/warehouse_rest.properties | 15 ++++ 4 files changed, 122 insertions(+) create mode 100644 dev/docker-compose-trino.yml create mode 100644 dev/trino/catalog/warehouse_hive.properties create mode 100644 dev/trino/catalog/warehouse_rest.properties diff --git a/dev/docker-compose-integration.yml b/dev/docker-compose-integration.yml index c901b2ee23..07e9cc9984 100644 --- a/dev/docker-compose-integration.yml +++ b/dev/docker-compose-integration.yml @@ -54,6 +54,21 @@ services: - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO - CATALOG_S3_ENDPOINT=http://minio:9000 - CATALOG_JDBC_STRICT__MODE=true + + trino: + image: trinodb/trino:476 + container_name: pyiceberg-trino + networks: + iceberg_net: + ports: + - 8082:8080 + environment: + - CATALOG_MANAGEMENT=dynamic + depends_on: + - rest + - hive + volumes: + - ./trino/catalog:/etc/trino/catalog minio: image: minio/minio container_name: pyiceberg-minio diff --git a/dev/docker-compose-trino.yml b/dev/docker-compose-trino.yml new file mode 100644 index 0000000000..3970d667b5 --- /dev/null +++ b/dev/docker-compose-trino.yml @@ -0,0 +1,79 @@ +services: + rest: + image: apache/iceberg-rest-fixture + container_name: pyiceberg-rest + networks: + iceberg_net: + ports: + - 8181:8181 + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + - CATALOG_WAREHOUSE=s3://warehouse/ + - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO + - CATALOG_S3_ENDPOINT=http://minio:9000 + + trino: + image: trinodb/trino:476 + container_name: pyiceberg-trino + networks: + iceberg_net: + ports: + - 8082:8080 + environment: + - CATALOG_MANAGEMENT=dynamic + depends_on: + - rest + - hive + volumes: + - ./trino/catalog:/etc/trino/catalog + + minio: + image: minio/minio + container_name: pyiceberg-minio + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + - MINIO_DOMAIN=minio + networks: + iceberg_net: + aliases: + - warehouse.minio + ports: + - 9001:9001 + - 9000:9000 + command: ["server", "/data", "--console-address", ":9001"] + mc: + depends_on: + - minio + image: minio/mc + container_name: pyiceberg-mc + networks: + iceberg_net: + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/warehouse; + /usr/bin/mc policy set public minio/warehouse; + tail -f /dev/null + " + + hive: + build: hive/ + container_name: hive + hostname: hive + networks: + iceberg_net: + ports: + - 9083:9083 + environment: + SERVICE_NAME: "metastore" + SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/" + +networks: + iceberg_net: diff --git a/dev/trino/catalog/warehouse_hive.properties b/dev/trino/catalog/warehouse_hive.properties new file mode 100644 index 0000000000..26e67489a5 --- /dev/null +++ b/dev/trino/catalog/warehouse_hive.properties @@ -0,0 +1,13 @@ +connector.name=iceberg +iceberg.catalog.type=hive_metastore +iceberg.expire-snapshots.min-retention=0d +iceberg.remove-orphan-files.min-retention=0d +iceberg.register-table-procedure.enabled=true +hive.metastore.uri=thrift://hive:9083 +iceberg.hive-catalog-name=hive +fs.native-s3.enabled=true +s3.region=us-east-1 +s3.aws-access-key=admin +s3.aws-secret-key=password +s3.endpoint=http://minio:9000 +s3.path-style-access=false diff --git a/dev/trino/catalog/warehouse_rest.properties b/dev/trino/catalog/warehouse_rest.properties new file mode 100644 index 0000000000..dec0f3761e --- /dev/null +++ b/dev/trino/catalog/warehouse_rest.properties @@ -0,0 +1,15 @@ +connector.name=iceberg +iceberg.catalog.type=rest +iceberg.rest-catalog.uri=http://rest:8181 +iceberg.rest-catalog.warehouse=s3://warehouse/default +iceberg.rest-catalog.nested-namespace-enabled=false +iceberg.rest-catalog.case-insensitive-name-matching=true +iceberg.expire-snapshots.min-retention=0d +iceberg.remove-orphan-files.min-retention=0d +iceberg.register-table-procedure.enabled=true +fs.native-s3.enabled=true +s3.region=us-east-1 +s3.aws-access-key=admin +s3.aws-secret-key=password +s3.endpoint=http://minio:9000 +s3.path-style-access=false From 3589ef3124baab0b6f2aef0f0cbbb4ab8e44ba9b Mon Sep 17 00:00:00 2001 From: Stanley Law Date: Sat, 19 Jul 2025 22:05:55 +0800 Subject: [PATCH 2/8] Add Trino integration tests and update dependencies - Introduced new tests for registering tables and schema existence in Trino. - Added Trino connection fixtures to support integration testing. - Updated `pyproject.toml` to include the Trino dependency. - Enhanced the Makefile with a new command to run Trino integration tests. --- Makefile | 4 + dev/run-trino.sh | 33 ++++++ poetry.lock | 143 ++++++++++++++++++++--- pyproject.toml | 2 + tests/conftest.py | 44 +++++-- tests/integration/test_register_table.py | 41 +++++-- tests/integration/test_rest_catalog.py | 20 ++++ 7 files changed, 253 insertions(+), 34 deletions(-) create mode 100644 dev/run-trino.sh diff --git a/Makefile b/Makefile index 859d8dfa23..aca922e86d 100644 --- a/Makefile +++ b/Makefile @@ -103,6 +103,10 @@ test-integration-rebuild: ## Rebuild integration Docker services from scratch docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml build --no-cache +test-integration-trino: + sh ./dev/run-trino.sh + $(TEST_RUNNER) pytest tests/ -m integration_trino $(PYTEST_ARGS) + test-s3: ## Run tests marked with @pytest.mark.s3 sh ./dev/run-minio.sh $(TEST_RUNNER) pytest tests/ -m s3 $(PYTEST_ARGS) diff --git a/dev/run-trino.sh b/dev/run-trino.sh new file mode 100644 index 0000000000..e212067293 --- /dev/null +++ b/dev/run-trino.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -ex + +if [ $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ]; then + echo "Trino service running" +else + docker compose -f dev/docker-compose-trino.yml kill + docker compose -f dev/docker-compose-trino.yml up -d + while [ -z $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ] + do + echo "Waiting for Trino" + sleep 1 + done +fi diff --git a/poetry.lock b/poetry.lock index 9360f9e79c..192fff1a9b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -706,7 +706,7 @@ files = [ {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, ] -markers = {main = "(extra == \"zstandard\" or extra == \"adlfs\") and platform_python_implementation == \"PyPy\" or extra == \"adlfs\"", dev = "platform_python_implementation != \"PyPy\""} +markers = {main = "(extra == \"zstandard\" or extra == \"adlfs\") and platform_python_implementation == \"PyPy\" or extra == \"adlfs\""} [package.dependencies] pycparser = "*" @@ -1526,7 +1526,7 @@ files = [ {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"}, {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"}, ] -markers = {main = "python_version == \"3.9\" and (extra == \"ray\" or extra == \"hf\") or extra == \"hf\" or extra == \"ray\""} +markers = {main = "extra == \"ray\" or extra == \"hf\""} [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] @@ -2001,10 +2001,9 @@ typing-extensions = {version = ">=4,<5", markers = "python_version < \"3.10\""} name = "greenlet" version = "3.2.3" description = "Lightweight in-process concurrent programming" -optional = true +optional = false python-versions = ">=3.9" -groups = ["main"] -markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") and (extra == \"sql-postgres\" or extra == \"sql-sqlite\")" +groups = ["main", "dev"] files = [ {file = "greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be"}, {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac"}, @@ -2061,6 +2060,7 @@ files = [ {file = "greenlet-3.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:aaa7aae1e7f75eaa3ae400ad98f8644bb81e1dc6ba47ce8a93d3f17274e08322"}, {file = "greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365"}, ] +markers = {main = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") and (extra == \"sql-postgres\" or extra == \"sql-sqlite\")", dev = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} [package.extras] docs = ["Sphinx", "furo"] @@ -2553,6 +2553,62 @@ files = [ {file = "llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4"}, ] +[[package]] +name = "lz4" +version = "4.4.4" +description = "LZ4 Bindings for Python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "lz4-4.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f170abb8416c4efca48e76cac2c86c3185efdf841aecbe5c190121c42828ced0"}, + {file = "lz4-4.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d33a5105cd96ebd32c3e78d7ece6123a9d2fb7c18b84dec61f27837d9e0c496c"}, + {file = "lz4-4.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30ebbc5b76b4f0018988825a7e9ce153be4f0d4eba34e6c1f2fcded120573e88"}, + {file = "lz4-4.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc64d6dfa7a89397529b22638939e70d85eaedc1bd68e30a29c78bfb65d4f715"}, + {file = "lz4-4.4.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a355223a284f42a723c120ce68827de66d5cb872a38732b3d5abbf544fa2fe26"}, + {file = "lz4-4.4.4-cp310-cp310-win32.whl", hash = "sha256:b28228197775b7b5096898851d59ef43ccaf151136f81d9c436bc9ba560bc2ba"}, + {file = "lz4-4.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:45e7c954546de4f85d895aa735989d77f87dd649f503ce1c8a71a151b092ed36"}, + {file = "lz4-4.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:e3fc90f766401684740978cd781d73b9685bd81b5dbf7257542ef9de4612e4d2"}, + {file = "lz4-4.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ddfc7194cd206496c445e9e5b0c47f970ce982c725c87bd22de028884125b68f"}, + {file = "lz4-4.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:714f9298c86f8e7278f1c6af23e509044782fa8220eb0260f8f8f1632f820550"}, + {file = "lz4-4.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8474c91de47733856c6686df3c4aca33753741da7e757979369c2c0d32918ba"}, + {file = "lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80dd27d7d680ea02c261c226acf1d41de2fd77af4fb2da62b278a9376e380de0"}, + {file = "lz4-4.4.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9b7d6dddfd01b49aedb940fdcaf32f41dc58c926ba35f4e31866aeec2f32f4f4"}, + {file = "lz4-4.4.4-cp311-cp311-win32.whl", hash = "sha256:4134b9fd70ac41954c080b772816bb1afe0c8354ee993015a83430031d686a4c"}, + {file = "lz4-4.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:f5024d3ca2383470f7c4ef4d0ed8eabad0b22b23eeefde1c192cf1a38d5e9f78"}, + {file = "lz4-4.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:6ea715bb3357ea1665f77874cf8f55385ff112553db06f3742d3cdcec08633f7"}, + {file = "lz4-4.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:23ae267494fdd80f0d2a131beff890cf857f1b812ee72dbb96c3204aab725553"}, + {file = "lz4-4.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fff9f3a1ed63d45cb6514bfb8293005dc4141341ce3500abdfeb76124c0b9b2e"}, + {file = "lz4-4.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ea7f07329f85a8eda4d8cf937b87f27f0ac392c6400f18bea2c667c8b7f8ecc"}, + {file = "lz4-4.4.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ccab8f7f7b82f9fa9fc3b0ba584d353bd5aa818d5821d77d5b9447faad2aaad"}, + {file = "lz4-4.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e43e9d48b2daf80e486213128b0763deed35bbb7a59b66d1681e205e1702d735"}, + {file = "lz4-4.4.4-cp312-cp312-win32.whl", hash = "sha256:33e01e18e4561b0381b2c33d58e77ceee850a5067f0ece945064cbaac2176962"}, + {file = "lz4-4.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d21d1a2892a2dcc193163dd13eaadabb2c1b803807a5117d8f8588b22eaf9f12"}, + {file = "lz4-4.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:2f4f2965c98ab254feddf6b5072854a6935adab7bc81412ec4fe238f07b85f62"}, + {file = "lz4-4.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ed6eb9f8deaf25ee4f6fad9625d0955183fdc90c52b6f79a76b7f209af1b6e54"}, + {file = "lz4-4.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:18ae4fe3bafb344dbd09f976d45cbf49c05c34416f2462828f9572c1fa6d5af7"}, + {file = "lz4-4.4.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57fd20c5fc1a49d1bbd170836fccf9a338847e73664f8e313dce6ac91b8c1e02"}, + {file = "lz4-4.4.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9cb387c33f014dae4db8cb4ba789c8d2a0a6d045ddff6be13f6c8d9def1d2a6"}, + {file = "lz4-4.4.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0be9f68240231e1e44118a4ebfecd8a5d4184f0bdf5c591c98dd6ade9720afd"}, + {file = "lz4-4.4.4-cp313-cp313-win32.whl", hash = "sha256:e9ec5d45ea43684f87c316542af061ef5febc6a6b322928f059ce1fb289c298a"}, + {file = "lz4-4.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:a760a175b46325b2bb33b1f2bbfb8aa21b48e1b9653e29c10b6834f9bb44ead4"}, + {file = "lz4-4.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:f4c21648d81e0dda38b4720dccc9006ae33b0e9e7ffe88af6bf7d4ec124e2fba"}, + {file = "lz4-4.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bd1add57b6fe1f96bed2d529de085e9378a3ac04b86f116d10506f85b68e97fc"}, + {file = "lz4-4.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:900912e8a7cf74b4a2bea18a3594ae0bf1138f99919c20017167b6e05f760aa4"}, + {file = "lz4-4.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:017f8d269a739405a59d68a4d63d23a8df23e3bb2c70aa069b7563af08dfdffb"}, + {file = "lz4-4.4.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac522788296a9a02a39f620970dea86c38e141e21e51238f1b5e9fa629f8e69"}, + {file = "lz4-4.4.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6b56aa9eef830bf6443acd8c4e18b208a8993dc32e0d6ef4263ecfa6afb3f599"}, + {file = "lz4-4.4.4-cp39-cp39-win32.whl", hash = "sha256:585b42eb37ab16a278c3a917ec23b2beef175aa669f4120142b97aebf90ef775"}, + {file = "lz4-4.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:4ab1537bd3b3bfbafd3c8847e06827129794488304f21945fc2f5b669649d94f"}, + {file = "lz4-4.4.4-cp39-cp39-win_arm64.whl", hash = "sha256:38730927ad51beb42ab8dbc5555270bfbe86167ba734265f88bbd799fced1004"}, + {file = "lz4-4.4.4.tar.gz", hash = "sha256:070fd0627ec4393011251a094e08ed9fdcc78cb4e7ab28f507638eee4e39abda"}, +] + +[package.extras] +docs = ["sphinx (>=1.6.0)", "sphinx_bootstrap_theme"] +flake8 = ["flake8"] +tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] + [[package]] name = "markdown" version = "3.8.2" @@ -3680,7 +3736,7 @@ files = [ {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, ] -markers = {main = "python_version == \"3.9\" and (extra == \"ray\" or extra == \"hf\") or extra == \"hf\" or extra == \"ray\""} +markers = {main = "extra == \"ray\" or extra == \"hf\""} [[package]] name = "paginate" @@ -4054,7 +4110,7 @@ description = "" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "python_version == \"3.9\" and (extra == \"ray\" or extra == \"gcsfs\") or extra == \"gcsfs\" or extra == \"ray\"" +markers = "extra == \"ray\" or extra == \"gcsfs\"" files = [ {file = "protobuf-6.31.1-cp310-abi3-win32.whl", hash = "sha256:7fa17d5a29c2e04b7d90e5e32388b8bfd0e7107cd8e616feef7ed3fa6bdab5c9"}, {file = "protobuf-6.31.1-cp310-abi3-win_amd64.whl", hash = "sha256:426f59d2964864a1a366254fa703b8632dcec0790d8862d30034d8245e1cd447"}, @@ -4310,7 +4366,7 @@ files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] -markers = {main = "(extra == \"zstandard\" or extra == \"adlfs\") and platform_python_implementation == \"PyPy\" or extra == \"adlfs\"", dev = "platform_python_implementation != \"PyPy\""} +markers = {main = "(extra == \"zstandard\" or extra == \"adlfs\") and platform_python_implementation == \"PyPy\" or extra == \"adlfs\""} [[package]] name = "pydantic" @@ -4772,14 +4828,15 @@ cramjam = "*" name = "pytz" version = "2025.2" description = "World timezone definitions, modern and historical" -optional = true +optional = false python-versions = "*" -groups = ["main"] +groups = ["main", "dev"] markers = "extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\"" files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, ] +markers = {main = "extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\""} [[package]] name = "pywin32" @@ -4874,7 +4931,7 @@ files = [ {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] -markers = {main = "python_version == \"3.9\" and (extra == \"ray\" or extra == \"hf\") or extra == \"hf\" or extra == \"ray\""} +markers = {main = "extra == \"ray\" or extra == \"hf\""} [[package]] name = "pyyaml-env-tag" @@ -5706,10 +5763,9 @@ test = ["pytest"] name = "sqlalchemy" version = "2.0.41" description = "Database Abstraction Library" -optional = true +optional = false python-versions = ">=3.7" -groups = ["main"] -markers = "extra == \"sql-postgres\" or extra == \"sql-sqlite\"" +groups = ["main", "dev"] files = [ {file = "SQLAlchemy-2.0.41-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6854175807af57bdb6425e47adbce7d20a4d79bbfd6f6d6519cd10bb7109a7f8"}, {file = "SQLAlchemy-2.0.41-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05132c906066142103b83d9c250b60508af556982a385d96c4eaa9fb9720ac2b"}, @@ -5769,6 +5825,7 @@ files = [ {file = "sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576"}, {file = "sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9"}, ] +markers = {main = "extra == \"sql-postgres\" or extra == \"sql-sqlite\""} [package.dependencies] greenlet = {version = ">=1", markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} @@ -5949,6 +6006,35 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "trino" +version = "0.335.0" +description = "Client for the Trino distributed SQL Engine" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "trino-0.335.0-py3-none-any.whl", hash = "sha256:5c96d89d610ab7712ede532d2eb41beb8627339571bceff6134370a8a496f685"}, + {file = "trino-0.335.0.tar.gz", hash = "sha256:b5e6c928953689be8446cbf7dbb87894cbfe54cf099a85cf461c4206c252cd67"}, +] + +[package.dependencies] +lz4 = "*" +python-dateutil = "*" +pytz = "*" +requests = ">=2.31.0" +sqlalchemy = {version = ">=1.3", optional = true, markers = "extra == \"sqlalchemy\""} +tzlocal = "*" +zstandard = "*" + +[package.extras] +all = ["requests_kerberos", "sqlalchemy (>=1.3)"] +external-authentication-token-cache = ["keyring"] +gssapi = ["krb5 (==0.5.1)", "requests_gssapi"] +kerberos = ["requests_kerberos"] +sqlalchemy = ["sqlalchemy (>=1.3)"] +tests = ["black", "boto3", "httpretty (<1.1)", "isort", "keyring", "krb5 (==0.5.1)", "pre-commit", "pytest", "pytest-runner", "requests_gssapi", "requests_kerberos", "sqlalchemy (>=1.3)", "testcontainers"] + [[package]] name = "typing-extensions" version = "4.14.1" @@ -5981,14 +6067,33 @@ typing-extensions = ">=4.12.0" name = "tzdata" version = "2025.2" description = "Provider of IANA time zone data" -optional = true +optional = false python-versions = ">=2" -groups = ["main"] +groups = ["main", "dev"] markers = "extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\"" files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, ] +markers = {main = "extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\"", dev = "platform_system == \"Windows\""} + +[[package]] +name = "tzlocal" +version = "5.3.1" +description = "tzinfo object for the local timezone" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d"}, + {file = "tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd"}, +] + +[package.dependencies] +tzdata = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] [[package]] name = "urllib3" @@ -6358,8 +6463,7 @@ version = "0.23.0" description = "Zstandard bindings for Python" optional = false python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"zstandard\"" +groups = ["main", "dev"] files = [ {file = "zstandard-0.23.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bf0a05b6059c0528477fba9054d09179beb63744355cab9f38059548fedd46a9"}, {file = "zstandard-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fc9ca1c9718cb3b06634c7c8dec57d24e9438b2aa9a0f02b8bb36bf478538880"}, @@ -6459,6 +6563,7 @@ files = [ {file = "zstandard-0.23.0-cp39-cp39-win_amd64.whl", hash = "sha256:f8346bfa098532bc1fb6c7ef06783e969d87a99dd1d2a5a18a892c1d7a643c58"}, {file = "zstandard-0.23.0.tar.gz", hash = "sha256:b2d8c62d08e7255f68f7a740bae85b3c9b8e5466baa9cbf7f57f1cde0ac6bc09"}, ] +markers = {main = "extra == \"zstandard\""} [package.dependencies] cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\""} @@ -6493,4 +6598,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.1" python-versions = "^3.9.2, !=3.9.7" -content-hash = "5cef3b70e2b74aaa1ba9d9b718c6374dfc137b87d9b4eeaab572604cbf616bd5" +content-hash = "16572e7c065a3d0cd52d9a2b5e069cddcf7f16c77d4d9e77094873d35402b131" diff --git a/pyproject.toml b/pyproject.toml index 6f1b7c779b..0c06a8f8e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,6 +104,7 @@ deptry = ">=0.14,<0.24" docutils = "!=0.21.post1" # https://github.com/python-poetry/poetry/issues/9248#issuecomment-2026240520 mypy-boto3-glue = ">=1.28.18" mypy-boto3-dynamodb = ">=1.28.18" +trino = {extras = ["sqlalchemy"], version = "^0.335.0"} [tool.poetry.group.docs.dependencies] # for mkdocs @@ -326,6 +327,7 @@ markers = [ "s3: marks a test as requiring access to s3 compliant storage (use with --aws-access-key-id, --aws-secret-access-key, and --endpoint args)", "adls: marks a test as requiring access to adls compliant storage (use with --adls.account-name, --adls.account-key, and --adls.endpoint args)", "integration: marks integration tests against Apache Spark", + "integration_trino: marks integration tests against Trino", "gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)", "benchmark: collection of tests to validate read/write performance before and after a change" ] diff --git a/tests/conftest.py b/tests/conftest.py index 584b6c633a..d2662ec05d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -35,18 +35,12 @@ from pathlib import Path from random import choice, randint from tempfile import TemporaryDirectory -from typing import ( - TYPE_CHECKING, - Any, - Dict, - Generator, - List, - Optional, -) +from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional import boto3 import pytest from moto import mock_aws +from sqlalchemy import Connection from pyiceberg.catalog import Catalog, load_catalog from pyiceberg.catalog.noop import NoopCatalog @@ -141,6 +135,18 @@ def pytest_addoption(parser: pytest.Parser) -> None: "--gcs.oauth2.token", action="store", default="anon", help="The GCS authentication method for tests marked gcs" ) parser.addoption("--gcs.project-id", action="store", default="test", help="The GCP project for tests marked gcs") + parser.addoption( + "--trino.rest.endpoint", + action="store", + default="trino://test@localhost:8082/warehouse_rest", + help="The Trino REST endpoint URL for tests marked as integration_trino", + ) + parser.addoption( + "--trino.hive.endpoint", + action="store", + default="trino://test@localhost:8082/warehouse_hive", + help="The Trino Hive endpoint URL for tests marked as integration_trino", + ) @pytest.fixture(scope="session") @@ -2436,6 +2442,28 @@ def bound_reference_uuid() -> BoundReference[str]: return BoundReference(field=NestedField(1, "field", UUIDType(), required=False), accessor=Accessor(position=0, inner=None)) +@pytest.fixture(scope="session") +def trino_hive_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]: + from sqlalchemy import create_engine + + trino_endpoint = request.config.getoption("--trino.hive.endpoint") + engine = create_engine(trino_endpoint) + connection = engine.connect() + yield connection + connection.close() + + +@pytest.fixture(scope="session") +def trino_rest_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]: + from sqlalchemy import create_engine + + trino_endpoint = request.config.getoption("--trino.rest.endpoint") + engine = create_engine(trino_endpoint) + connection = engine.connect() + yield connection + connection.close() + + @pytest.fixture(scope="session") def session_catalog() -> Catalog: return load_catalog( diff --git a/tests/integration/test_register_table.py b/tests/integration/test_register_table.py index c0db2014af..d376e9e75f 100644 --- a/tests/integration/test_register_table.py +++ b/tests/integration/test_register_table.py @@ -15,19 +15,14 @@ # specific language governing permissions and limitations # under the License. import pytest +from sqlalchemy import Connection, inspect from pyiceberg.catalog import Catalog from pyiceberg.exceptions import NoSuchTableError, TableAlreadyExistsError from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec from pyiceberg.schema import Schema from pyiceberg.table import Table -from pyiceberg.types import ( - BooleanType, - DateType, - IntegerType, - NestedField, - StringType, -) +from pyiceberg.types import BooleanType, DateType, IntegerType, NestedField, StringType TABLE_SCHEMA = Schema( NestedField(field_id=1, name="foo", field_type=BooleanType(), required=False), @@ -86,3 +81,35 @@ def test_register_table_existing( # Assert that registering the table again raises TableAlreadyExistsError with pytest.raises(TableAlreadyExistsError): catalog.register_table(("default", "register_table_existing"), metadata_location=tbl.metadata_location) + + +@pytest.mark.integration_trino +@pytest.mark.integration +@pytest.mark.parametrize( + "catalog, trino_conn", + [ + (pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("trino_hive_conn")), + (pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("trino_rest_conn")), + ], +) +def test_register_table_existing_in_trino( + catalog: Catalog, + trino_conn: Connection, +) -> None: + """Test the registration of a table in the catalog that already exists in Trino. + This test verifies that a table can be registered in the catalog with an existing + metadata location and properly reflected in Trino. + """ + namespace = "default" + table_name = "register_table_trino" + identifier = f"{namespace}.{table_name}" + location = f"s3a://warehouse/{namespace}/{table_name}" + tbl = _create_table(catalog, identifier, 2, location) + assert catalog.table_exists(identifier=identifier) + assert table_name in inspect(trino_conn).get_table_names(schema=namespace) + catalog.drop_table(identifier=identifier) + assert not catalog.table_exists(identifier=identifier) + assert table_name not in inspect(trino_conn).get_table_names(schema=namespace) + catalog.register_table((namespace, table_name), metadata_location=tbl.metadata_location) + assert catalog.table_exists(identifier=identifier) + assert table_name in inspect(trino_conn).get_table_names(schema=namespace) diff --git a/tests/integration/test_rest_catalog.py b/tests/integration/test_rest_catalog.py index 24a8d9f6ef..607259bf30 100644 --- a/tests/integration/test_rest_catalog.py +++ b/tests/integration/test_rest_catalog.py @@ -17,6 +17,7 @@ # pylint:disable=redefined-outer-name import pytest +from sqlalchemy import Connection, inspect from pyiceberg.catalog.rest import RestCatalog @@ -61,3 +62,22 @@ def test_create_namespace_if_already_existing(catalog: RestCatalog) -> None: catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER) assert catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER) + + +@pytest.mark.integration +@pytest.mark.integration_trino +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog")]) +def test_schema_exists_in_trino(trino_rest_conn: Connection, catalog: RestCatalog) -> None: + """Verifies that an Iceberg namespace correctly appears as a schema in Trino. + + This test ensures the synchronization between Iceberg's namespace concept and + Trino's schema concept, confirming that after creating a namespace in the Iceberg + catalog, it becomes visible as a schema in the Trino environment. + """ + + if not catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER): + catalog.drop_namespace(TEST_NAMESPACE_IDENTIFIER) + catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER) + + assert catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER) + assert TEST_NAMESPACE_IDENTIFIER.lower() in inspect(trino_rest_conn).get_schema_names() From 5d43addb3eab390b0bfadfd85d52d9dd134b290e Mon Sep 17 00:00:00 2001 From: Stanley Law Date: Tue, 22 Jul 2025 02:16:09 +0800 Subject: [PATCH 3/8] enable multipart namespaces --- dev/trino/catalog/warehouse_rest.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/trino/catalog/warehouse_rest.properties b/dev/trino/catalog/warehouse_rest.properties index dec0f3761e..7a3515d37e 100644 --- a/dev/trino/catalog/warehouse_rest.properties +++ b/dev/trino/catalog/warehouse_rest.properties @@ -2,7 +2,7 @@ connector.name=iceberg iceberg.catalog.type=rest iceberg.rest-catalog.uri=http://rest:8181 iceberg.rest-catalog.warehouse=s3://warehouse/default -iceberg.rest-catalog.nested-namespace-enabled=false +iceberg.rest-catalog.nested-namespace-enabled=true iceberg.rest-catalog.case-insensitive-name-matching=true iceberg.expire-snapshots.min-retention=0d iceberg.remove-orphan-files.min-retention=0d From a0e1a110127717675d4d03ebb19a05f2a6009750 Mon Sep 17 00:00:00 2001 From: Stanley Law Date: Wed, 23 Jul 2025 02:57:56 +0800 Subject: [PATCH 4/8] add uuid partitions test with trino Add Trino as alternative tool for test uuid partitions as Java Iceberg 1.9.2 in spark is not yet supported. --- tests/integration/test_writes/test_writes.py | 75 ++++++++++++++++---- 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index e63883c1db..e7d80ccba2 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -38,6 +38,8 @@ from pydantic_core import ValidationError from pyspark.sql import SparkSession from pytest_mock.plugin import MockerFixture +from sqlalchemy import Connection +from sqlalchemy.sql.expression import text from pyiceberg.catalog import Catalog, load_catalog from pyiceberg.catalog.hive import HiveCatalog @@ -50,18 +52,8 @@ from pyiceberg.table import TableProperties from pyiceberg.table.refs import MAIN_BRANCH from pyiceberg.table.sorting import SortDirection, SortField, SortOrder -from pyiceberg.transforms import DayTransform, HourTransform, IdentityTransform, Transform -from pyiceberg.types import ( - DateType, - DecimalType, - DoubleType, - IntegerType, - ListType, - LongType, - NestedField, - StringType, - UUIDType, -) +from pyiceberg.transforms import BucketTransform, DayTransform, HourTransform, IdentityTransform, Transform +from pyiceberg.types import DateType, DecimalType, DoubleType, IntegerType, ListType, LongType, NestedField, StringType, UUIDType from utils import _create_table @@ -1865,6 +1857,7 @@ def test_read_write_decimals(session_catalog: Catalog) -> None: assert tbl.scan().to_arrow() == arrow_table +@pytest.mark.skip("UUID BucketTransform is not supported in Spark Iceberg 1.9.2 yet") @pytest.mark.integration @pytest.mark.parametrize( "transform", @@ -1918,6 +1911,64 @@ def test_uuid_partitioning(session_catalog: Catalog, spark: SparkSession, transf assert lhs == rhs +@pytest.mark.integration_trino +@pytest.mark.integration +@pytest.mark.parametrize( + "transform", + [IdentityTransform(), BucketTransform(32)], +) +@pytest.mark.parametrize( + "catalog, trino_conn", + [ + (pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("trino_hive_conn")), + (pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("trino_rest_conn")), + ], +) +def test_uuid_partitioning_with_trino(catalog: Catalog, trino_conn: Connection, transform: Transform) -> None: # type: ignore + identifier = f"default.test_uuid_partitioning_{str(transform).replace('[32]', '')}" + + schema = Schema(NestedField(field_id=1, name="uuid", field_type=UUIDType(), required=True)) + + try: + catalog.drop_table(identifier=identifier) + except NoSuchTableError: + pass + + partition_spec = PartitionSpec( + PartitionField(source_id=1, field_id=1000, transform=transform, name=f"uuid_{str(transform).replace('[32]', '')}") + ) + + import pyarrow as pa + + arr_table = pa.Table.from_pydict( + { + "uuid": [ + uuid.UUID("00000000-0000-0000-0000-000000000000").bytes, + uuid.UUID("11111111-1111-1111-1111-111111111111").bytes, + ], + }, + schema=pa.schema( + [ + # Uuid not yet supported, so we have to stick with `binary(16)` + # https://github.com/apache/arrow/issues/46468 + pa.field("uuid", pa.binary(16), nullable=False), + ] + ), + ) + + tbl = catalog.create_table( + identifier=identifier, + schema=schema, + partition_spec=partition_spec, + ) + + tbl.append(arr_table) + rows = trino_conn.execute(text(f"SELECT * FROM {identifier}")).fetchall() + lhs = sorted([r[0] for r in rows]) + rhs = sorted([u.as_py() for u in tbl.scan().to_arrow()["uuid"].combine_chunks()]) + assert lhs == rhs + + @pytest.mark.integration def test_avro_compression_codecs(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.test_avro_compression_codecs" From dc1532dad512052a3e127d50619bfce700d7826f Mon Sep 17 00:00:00 2001 From: Stanley Law Date: Wed, 23 Jul 2025 03:05:40 +0800 Subject: [PATCH 5/8] fix: correct conditions in namespace --- tests/integration/test_rest_catalog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_rest_catalog.py b/tests/integration/test_rest_catalog.py index 607259bf30..092428e641 100644 --- a/tests/integration/test_rest_catalog.py +++ b/tests/integration/test_rest_catalog.py @@ -75,7 +75,7 @@ def test_schema_exists_in_trino(trino_rest_conn: Connection, catalog: RestCatalo catalog, it becomes visible as a schema in the Trino environment. """ - if not catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER): + if catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER): catalog.drop_namespace(TEST_NAMESPACE_IDENTIFIER) catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER) From a2f16a9f8e37f9a25d8904b48812f4005c6639da Mon Sep 17 00:00:00 2001 From: Stanley Law Date: Wed, 23 Jul 2025 09:23:43 +0000 Subject: [PATCH 6/8] Add lincense and update poetry.lock --- Makefile | 4 +- dev/docker-compose-trino.yml | 16 ++ dev/trino/catalog/warehouse_hive.properties | 16 ++ dev/trino/catalog/warehouse_rest.properties | 16 ++ poetry.lock | 153 +++++++++++--------- 5 files changed, 134 insertions(+), 71 deletions(-) diff --git a/Makefile b/Makefile index aca922e86d..2ab9ce6344 100644 --- a/Makefile +++ b/Makefile @@ -103,7 +103,7 @@ test-integration-rebuild: ## Rebuild integration Docker services from scratch docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml build --no-cache -test-integration-trino: +test-integration-trino: ## Run tests marked with @pytest.mark.integration_trino sh ./dev/run-trino.sh $(TEST_RUNNER) pytest tests/ -m integration_trino $(PYTEST_ARGS) @@ -120,7 +120,7 @@ test-gcs: ## Run tests marked with @pytest.mark.gcs $(TEST_RUNNER) pytest tests/ -m gcs $(PYTEST_ARGS) test-coverage: COVERAGE=1 -test-coverage: test test-integration test-s3 test-adls test-gcs coverage-report ## Run all tests with coverage and report +test-coverage: test test-integration test-integration-trino test-s3 test-adls test-gcs coverage-report ## Run all tests with coverage and report coverage-report: ## Combine and report coverage poetry run coverage combine diff --git a/dev/docker-compose-trino.yml b/dev/docker-compose-trino.yml index 3970d667b5..e00e51b1b3 100644 --- a/dev/docker-compose-trino.yml +++ b/dev/docker-compose-trino.yml @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. services: rest: image: apache/iceberg-rest-fixture diff --git a/dev/trino/catalog/warehouse_hive.properties b/dev/trino/catalog/warehouse_hive.properties index 26e67489a5..54b69d966b 100644 --- a/dev/trino/catalog/warehouse_hive.properties +++ b/dev/trino/catalog/warehouse_hive.properties @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. connector.name=iceberg iceberg.catalog.type=hive_metastore iceberg.expire-snapshots.min-retention=0d diff --git a/dev/trino/catalog/warehouse_rest.properties b/dev/trino/catalog/warehouse_rest.properties index 7a3515d37e..c80d9cff04 100644 --- a/dev/trino/catalog/warehouse_rest.properties +++ b/dev/trino/catalog/warehouse_rest.properties @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. connector.name=iceberg iceberg.catalog.type=rest iceberg.rest-catalog.uri=http://rest:8181 diff --git a/poetry.lock b/poetry.lock index 192fff1a9b..3368da71f9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -59,7 +59,7 @@ description = "Happy Eyeballs for asyncio" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"}, {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"}, @@ -72,7 +72,7 @@ description = "Async http client/server framework (asyncio)" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "aiohttp-3.12.14-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:906d5075b5ba0dd1c66fcaaf60eb09926a9fef3ca92d912d2a0bbdbecf8b1248"}, {file = "aiohttp-3.12.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c875bf6fc2fd1a572aba0e02ef4e7a63694778c5646cdbda346ee24e630d30fb"}, @@ -202,7 +202,7 @@ description = "aiosignal: a list of registered asynchronous callbacks" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\")" +markers = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\")" files = [ {file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"}, {file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"}, @@ -269,7 +269,7 @@ description = "Timeout context manager for asyncio programs" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "(extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and python_version <= \"3.10\"" +markers = "(extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\") and python_version <= \"3.10\"" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, @@ -286,7 +286,7 @@ files = [ {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"}, {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"}, ] -markers = {main = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\")"} +markers = {main = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\")"} [package.extras] benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] @@ -375,15 +375,15 @@ requests = ">=2.20.0" [[package]] name = "azure-identity" -version = "1.23.0" +version = "1.23.1" description = "Microsoft Azure Identity Library for Python" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"adlfs\"" files = [ - {file = "azure_identity-1.23.0-py3-none-any.whl", hash = "sha256:dbbeb64b8e5eaa81c44c565f264b519ff2de7ff0e02271c49f3cb492762a50b0"}, - {file = "azure_identity-1.23.0.tar.gz", hash = "sha256:d9cdcad39adb49d4bb2953a217f62aec1f65bbb3c63c9076da2be2a47e53dde4"}, + {file = "azure_identity-1.23.1-py3-none-any.whl", hash = "sha256:7eed28baa0097a47e3fb53bd35a63b769e6b085bb3cb616dfce2b67f28a004a1"}, + {file = "azure_identity-1.23.1.tar.gz", hash = "sha256:226c1ef982a9f8d5dcf6e0f9ed35eaef2a4d971e7dd86317e9b9d52e70a035e4"}, ] [package.dependencies] @@ -395,15 +395,15 @@ typing-extensions = ">=4.0.0" [[package]] name = "azure-storage-blob" -version = "12.25.1" +version = "12.26.0" description = "Microsoft Azure Blob Storage Client Library for Python" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"adlfs\"" files = [ - {file = "azure_storage_blob-12.25.1-py3-none-any.whl", hash = "sha256:1f337aab12e918ec3f1b638baada97550673911c4ceed892acc8e4e891b74167"}, - {file = "azure_storage_blob-12.25.1.tar.gz", hash = "sha256:4f294ddc9bc47909ac66b8934bd26b50d2000278b10ad82cc109764fdc6e0e3b"}, + {file = "azure_storage_blob-12.26.0-py3-none-any.whl", hash = "sha256:8c5631b8b22b4f53ec5fff2f3bededf34cfef111e2af613ad42c9e6de00a77fe"}, + {file = "azure_storage_blob-12.26.0.tar.gz", hash = "sha256:5dd7d7824224f7de00bfeb032753601c982655173061e242f13be6e26d78d71f"}, ] [package.dependencies] @@ -545,7 +545,7 @@ files = [ {file = "boto3-1.38.46-py3-none-any.whl", hash = "sha256:9c8e88a32a6465e5905308708cff5b17547117f06982908bdfdb0108b4a65079"}, {file = "boto3-1.38.46.tar.gz", hash = "sha256:d1ca2b53138afd0341e1962bd52be6071ab7a63c5b4f89228c5ef8942c40c852"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\""} [package.dependencies] botocore = ">=1.38.46,<1.39.0" @@ -566,7 +566,7 @@ files = [ {file = "botocore-1.38.46-py3-none-any.whl", hash = "sha256:89ca782ffbf2e8769ca9c89234cfa5ca577f1987d07d913ee3c68c4776b1eb5b"}, {file = "botocore-1.38.46.tar.gz", hash = "sha256:8798e5a418c27cf93195b077153644aea44cb171fcd56edc1ecebaa1e49e226e"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} [package.dependencies] jmespath = ">=0.7.1,<2.0.0" @@ -859,6 +859,7 @@ description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" groups = ["main", "dev", "docs"] +markers = "python_version < \"3.10\"" files = [ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, @@ -867,6 +868,22 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "click" +version = "8.2.1" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.10" +groups = ["main", "dev", "docs"] +markers = "python_version >= \"3.10\"" +files = [ + {file = "click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b"}, + {file = "click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "cloudpickle" version = "3.1.1" @@ -1228,19 +1245,19 @@ files = [ [[package]] name = "daft" -version = "0.5.11" +version = "0.5.12" description = "Distributed Dataframes for Multimodal Data" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"daft\"" files = [ - {file = "daft-0.5.11-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:661978a311c9a75c071c1be5d6576e47b1d5a698d4472851d10f82f451970780"}, - {file = "daft-0.5.11-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:aa21103bdeffc7a4810d79ed21822fa279409e53f6eb4d9dd711fb44aa162c30"}, - {file = "daft-0.5.11-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:aa01effca4e6d23df2e767bee126c853a6e6e2dc5c27357d081b1973423e4f87"}, - {file = "daft-0.5.11-cp39-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:640cbd53bfba15184d942206d1a6a0b3baf00c9c98c689dbe432e3728b125b94"}, - {file = "daft-0.5.11-cp39-abi3-win_amd64.whl", hash = "sha256:771ebd208247600391459e5b45b8895d64071550aef407f5185cf9a81ea70d67"}, - {file = "daft-0.5.11.tar.gz", hash = "sha256:b2495828b27e1658505b1c088eeda7eaa02aae51715528c738fe4b2e367eed28"}, + {file = "daft-0.5.12-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:dd1c745612ca8e346a9d150ea65866e0b5ff896e5d74baa7e1f9a6dd3f55bab0"}, + {file = "daft-0.5.12-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ca0a1ffb9fdd4ecabab5bd433856a6bca85d92df2a3aae7fe3b6c3d6327fd24b"}, + {file = "daft-0.5.12-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:977a16b0ced05980ecda27d73572f874de466d5eb9e32509ad84b9e4b508dbe5"}, + {file = "daft-0.5.12-cp39-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:b998a4f7ef2be7956aabbccc72bd66bfb343528a8aa856a82045bd3692aa9ec5"}, + {file = "daft-0.5.12-cp39-abi3-win_amd64.whl", hash = "sha256:a656897d6ad76134148b15185b4b94fbad769eb977a60d83fa27bcacbd8549e5"}, + {file = "daft-0.5.12.tar.gz", hash = "sha256:5eba32a6b25a21cb53357394e22e3d2cf959a78897a88716bfc5ed375abd113e"}, ] [package.dependencies] @@ -1332,14 +1349,14 @@ tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} [[package]] name = "distlib" -version = "0.3.9" +version = "0.4.0" description = "Distribution utilities" optional = false python-versions = "*" groups = ["dev"] files = [ - {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, - {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, + {file = "distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16"}, + {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"}, ] [[package]] @@ -1526,7 +1543,7 @@ files = [ {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"}, {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"}, ] -markers = {main = "extra == \"ray\" or extra == \"hf\""} +markers = {main = "python_version == \"3.9\" and (extra == \"ray\" or extra == \"hf\") or extra == \"hf\" or extra == \"ray\""} [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] @@ -1581,7 +1598,7 @@ description = "A list-like structure which implements collections.abc.MutableSeq optional = true python-versions = ">=3.9" groups = ["main"] -markers = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\")" +markers = "python_version == \"3.9\" and extra == \"ray\" or (extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\") and python_version < \"3.10\" or python_version >= \"3.10\" and (extra == \"ray\" or extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\")" files = [ {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cc4df77d638aa2ed703b878dd093725b72a824c3c546c076e8fdf276f78ee84a"}, {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:716a9973a2cc963160394f701964fe25012600f3d311f60c790400b00e568b61"}, @@ -1691,14 +1708,14 @@ files = [ [[package]] name = "fsspec" -version = "2025.5.1" +version = "2025.7.0" description = "File-system specification" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "fsspec-2025.5.1-py3-none-any.whl", hash = "sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462"}, - {file = "fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475"}, + {file = "fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21"}, + {file = "fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58"}, ] [package.extras] @@ -1706,7 +1723,7 @@ abfs = ["adlfs"] adl = ["adlfs"] arrow = ["pyarrow (>=1)"] dask = ["dask", "distributed"] -dev = ["pre-commit", "ruff"] +dev = ["pre-commit", "ruff (>=0.5)"] doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"] dropbox = ["dropbox", "dropboxdrivefs", "requests"] full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] @@ -1726,26 +1743,26 @@ smb = ["smbprotocol"] ssh = ["paramiko"] test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"] test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"] -test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] +test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard ; python_version < \"3.14\""] tqdm = ["tqdm"] [[package]] name = "gcsfs" -version = "2025.5.1" +version = "2025.7.0" description = "Convenient Filesystem interface over GCS" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"gcsfs\"" files = [ - {file = "gcsfs-2025.5.1-py2.py3-none-any.whl", hash = "sha256:48712471ff71ac83d3e2152ba4dc232874698466e344d5e700feba06b0a0de7b"}, - {file = "gcsfs-2025.5.1.tar.gz", hash = "sha256:ba945530cf4857cd9d599ccb3ae729c65c39088880b11c4df1fecac30df5f3e3"}, + {file = "gcsfs-2025.7.0-py2.py3-none-any.whl", hash = "sha256:653503331d58cb02bb34e725d4595d166e93f7f2f3ff88e4c66ef535ae66eae5"}, + {file = "gcsfs-2025.7.0.tar.gz", hash = "sha256:ad3ff66cf189ae8fc375ac8a2af409003dbca02357621cb94a66e457e02ba420"}, ] [package.dependencies] aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" decorator = ">4.1.2" -fsspec = "2025.5.1" +fsspec = "2025.7.0" google-auth = ">=1.2" google-auth-oauthlib = "*" google-cloud-storage = "*" @@ -2330,7 +2347,7 @@ files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\" or extra == \"s3fs\""} [[package]] name = "joserfc" @@ -2395,14 +2412,14 @@ files = [ [[package]] name = "jsonschema" -version = "4.24.0" +version = "4.25.0" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "jsonschema-4.24.0-py3-none-any.whl", hash = "sha256:a462455f19f5faf404a7902952b6f0e3ce868f3ee09a359b05eca6673bd8412d"}, - {file = "jsonschema-4.24.0.tar.gz", hash = "sha256:0b4e8069eb12aedfa881333004bccaec24ecef5a8a6a4b6df142b2cc9599d196"}, + {file = "jsonschema-4.25.0-py3-none-any.whl", hash = "sha256:24c2e8da302de79c8b9382fee3e76b355e44d2a4364bb207159ce10b517bd716"}, + {file = "jsonschema-4.25.0.tar.gz", hash = "sha256:e63acf5c11762c0e6672ffb61482bdf57f0876684d8d249c0fe2d730d48bc55f"}, ] markers = {main = "extra == \"ray\""} @@ -2414,7 +2431,7 @@ rpds-py = ">=0.7.1" [package.extras] format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] -format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=24.6.0)"] +format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "rfc3987-syntax (>=1.1.0)", "uri-template", "webcolors (>=24.6.0)"] [[package]] name = "jsonschema-path" @@ -3125,24 +3142,24 @@ tests = ["pytest (>=4.6)"] [[package]] name = "msal" -version = "1.32.3" +version = "1.33.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." optional = true python-versions = ">=3.7" groups = ["main"] markers = "extra == \"adlfs\"" files = [ - {file = "msal-1.32.3-py3-none-any.whl", hash = "sha256:b2798db57760b1961b142f027ffb7c8169536bf77316e99a0df5c4aaebb11569"}, - {file = "msal-1.32.3.tar.gz", hash = "sha256:5eea038689c78a5a70ca8ecbe1245458b55a857bd096efb6989c69ba15985d35"}, + {file = "msal-1.33.0-py3-none-any.whl", hash = "sha256:c0cd41cecf8eaed733ee7e3be9e040291eba53b0f262d3ae9c58f38b04244273"}, + {file = "msal-1.33.0.tar.gz", hash = "sha256:836ad80faa3e25a7d71015c990ce61f704a87328b1e73bcbb0623a18cbf17510"}, ] [package.dependencies] -cryptography = ">=2.5,<47" +cryptography = ">=2.5,<48" PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} requests = ">=2.0.0,<3" [package.extras] -broker = ["pymsalruntime (>=0.14,<0.18) ; python_version >= \"3.6\" and platform_system == \"Windows\"", "pymsalruntime (>=0.17,<0.18) ; python_version >= \"3.8\" and platform_system == \"Darwin\""] +broker = ["pymsalruntime (>=0.14,<0.19) ; python_version >= \"3.6\" and platform_system == \"Windows\"", "pymsalruntime (>=0.17,<0.19) ; python_version >= \"3.8\" and platform_system == \"Darwin\"", "pymsalruntime (>=0.18,<0.19) ; python_version >= \"3.8\" and platform_system == \"Linux\""] [[package]] name = "msal-extensions" @@ -3240,7 +3257,7 @@ description = "multidict implementation" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "multidict-6.6.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a2be5b7b35271f7fff1397204ba6708365e3d773579fe2a30625e16c4b4ce817"}, {file = "multidict-6.6.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:12f4581d2930840295c461764b9a65732ec01250b46c6b2c510d7ee68872b140"}, @@ -3557,7 +3574,7 @@ description = "Fundamental package for array computing in Python" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "python_version < \"3.10\" and (extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\")" +markers = "python_version < \"3.10\" and (extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\")" files = [ {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"}, {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"}, @@ -3613,7 +3630,7 @@ description = "Fundamental package for array computing in Python" optional = true python-versions = ">=3.10" groups = ["main"] -markers = "python_version >= \"3.10\" and (extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\")" +markers = "python_version >= \"3.10\" and (extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\")" files = [ {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"}, {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"}, @@ -3736,7 +3753,7 @@ files = [ {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, ] -markers = {main = "extra == \"ray\" or extra == \"hf\""} +markers = {main = "python_version == \"3.9\" and (extra == \"ray\" or extra == \"hf\") or extra == \"hf\" or extra == \"ray\""} [[package]] name = "paginate" @@ -3761,7 +3778,7 @@ description = "Powerful data structures for data analysis, time series, and stat optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\"" +markers = "extra == \"pandas\" or extra == \"ray\" or extra == \"bodo\"" files = [ {file = "pandas-2.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22c2e866f7209ebc3a8f08d75766566aae02bcc91d196935a1d9e59c7b990ac9"}, {file = "pandas-2.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3583d348546201aff730c8c47e49bc159833f971c2899d6097bce68b9112a4f1"}, @@ -3810,8 +3827,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3982,7 +3999,7 @@ description = "Accelerated property cache" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:22d9962a358aedbb7a2e36187ff273adeaab9743373a272976d2e348d08c7770"}, {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d0fda578d1dc3f77b6b5a5dce3b9ad69a8250a891760a548df850a5e8da87f3"}, @@ -4110,7 +4127,7 @@ description = "" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"ray\" or extra == \"gcsfs\"" +markers = "python_version == \"3.9\" and (extra == \"ray\" or extra == \"gcsfs\") or extra == \"gcsfs\" or extra == \"ray\"" files = [ {file = "protobuf-6.31.1-cp310-abi3-win32.whl", hash = "sha256:7fa17d5a29c2e04b7d90e5e32388b8bfd0e7107cd8e616feef7ed3fa6bdab5c9"}, {file = "protobuf-6.31.1-cp310-abi3-win_amd64.whl", hash = "sha256:426f59d2964864a1a366254fa703b8632dcec0790d8862d30034d8245e1cd447"}, @@ -4277,7 +4294,7 @@ description = "Python library for Apache Arrow" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"bodo\" or extra == \"daft\" or extra == \"datafusion\" or extra == \"duckdb\" or extra == \"pandas\" or extra == \"pyarrow\" or extra == \"ray\"" +markers = "extra == \"pyarrow\" or extra == \"pandas\" or extra == \"duckdb\" or extra == \"ray\" or extra == \"bodo\" or extra == \"daft\" or extra == \"datafusion\"" files = [ {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69"}, {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec"}, @@ -4831,7 +4848,6 @@ description = "World timezone definitions, modern and historical" optional = false python-versions = "*" groups = ["main", "dev"] -markers = "extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\"" files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, @@ -4931,7 +4947,7 @@ files = [ {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] -markers = {main = "extra == \"ray\" or extra == \"hf\""} +markers = {main = "python_version == \"3.9\" and (extra == \"ray\" or extra == \"hf\") or extra == \"hf\" or extra == \"ray\""} [[package]] name = "pyyaml-env-tag" @@ -5451,21 +5467,21 @@ pyasn1 = ">=0.1.3" [[package]] name = "s3fs" -version = "2025.5.1" +version = "2025.7.0" description = "Convenient Filesystem interface over S3" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"s3fs\"" files = [ - {file = "s3fs-2025.5.1-py3-none-any.whl", hash = "sha256:7475e7c40a3a112f17144907ffae50782ab6c03487fe0b45a9c3942bb7a5c606"}, - {file = "s3fs-2025.5.1.tar.gz", hash = "sha256:84beffa231b8ed94f8d667e93387b38351e1c4447aedea5c2c19dd88b7fcb658"}, + {file = "s3fs-2025.7.0-py3-none-any.whl", hash = "sha256:b6b2d3f84b6aa1c2ba5e62e39dd9410cf54f10a2cce1ea6db1ba0d1a6bcce685"}, + {file = "s3fs-2025.7.0.tar.gz", hash = "sha256:5e7f9ec0cad7745155e3eb86fae15b1481fa29946bf5b3a4ce3a60701ce6022d"}, ] [package.dependencies] aiobotocore = ">=2.5.4,<3.0.0" aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" -fsspec = "2025.5.1" +fsspec = "2025.7.0" [package.extras] awscli = ["aiobotocore[awscli] (>=2.5.4,<3.0.0)"] @@ -5473,16 +5489,16 @@ boto3 = ["aiobotocore[boto3] (>=2.5.4,<3.0.0)"] [[package]] name = "s3transfer" -version = "0.13.0" +version = "0.13.1" description = "An Amazon S3 Transfer Manager" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "s3transfer-0.13.0-py3-none-any.whl", hash = "sha256:0148ef34d6dd964d0d8cf4311b2b21c474693e57c2e069ec708ce043d2b527be"}, - {file = "s3transfer-0.13.0.tar.gz", hash = "sha256:f5e6db74eb7776a37208001113ea7aa97695368242b364d73e91c981ac522177"}, + {file = "s3transfer-0.13.1-py3-none-any.whl", hash = "sha256:a981aa7429be23fe6dfc13e80e4020057cbab622b08c0315288758d67cabc724"}, + {file = "s3transfer-0.13.1.tar.gz", hash = "sha256:c3fdba22ba1bd367922f27ec8032d6a1cf5f10c934fb5d68cf60fd5a23d936cf"}, ] -markers = {main = "extra == \"dynamodb\" or extra == \"glue\" or extra == \"rest-sigv4\""} +markers = {main = "extra == \"glue\" or extra == \"dynamodb\" or extra == \"rest-sigv4\""} [package.dependencies] botocore = ">=1.37.4,<2.0a.0" @@ -5990,7 +6006,7 @@ description = "Fast, Extensible Progress Meter" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"daft\" or extra == \"hf\"" +markers = "extra == \"hf\" or extra == \"daft\"" files = [ {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, @@ -6070,7 +6086,6 @@ description = "Provider of IANA time zone data" optional = false python-versions = ">=2" groups = ["main", "dev"] -markers = "extra == \"bodo\" or extra == \"pandas\" or extra == \"ray\"" files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, @@ -6134,14 +6149,14 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "virtualenv" -version = "20.31.2" +version = "20.32.0" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11"}, - {file = "virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af"}, + {file = "virtualenv-20.32.0-py3-none-any.whl", hash = "sha256:2c310aecb62e5aa1b06103ed7c2977b81e042695de2697d01017ff0f1034af56"}, + {file = "virtualenv-20.32.0.tar.gz", hash = "sha256:886bf75cadfdc964674e6e33eb74d787dff31ca314ceace03ca5810620f4ecf0"}, ] [package.dependencies] @@ -6323,7 +6338,7 @@ description = "Yet another URL library" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"adlfs\" or extra == \"gcsfs\" or extra == \"s3fs\"" +markers = "extra == \"s3fs\" or extra == \"adlfs\" or extra == \"gcsfs\"" files = [ {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6032e6da6abd41e4acda34d75a816012717000fa6839f37124a47fcefc49bec4"}, {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2c7b34d804b8cf9b214f05015c4fee2ebe7ed05cf581e7192c06555c71f4446a"}, From cee8671045073ed5d101b976b517d744546966a8 Mon Sep 17 00:00:00 2001 From: Stanley Law <19900516+dingo4dev@users.noreply.github.com> Date: Tue, 29 Jul 2025 06:26:01 +0000 Subject: [PATCH 7/8] add create namespace if not exists --- tests/integration/test_register_table.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_register_table.py b/tests/integration/test_register_table.py index d376e9e75f..2d2c07d107 100644 --- a/tests/integration/test_register_table.py +++ b/tests/integration/test_register_table.py @@ -104,6 +104,7 @@ def test_register_table_existing_in_trino( table_name = "register_table_trino" identifier = f"{namespace}.{table_name}" location = f"s3a://warehouse/{namespace}/{table_name}" + catalog.create_namespace_if_not_exists(namespace) tbl = _create_table(catalog, identifier, 2, location) assert catalog.table_exists(identifier=identifier) assert table_name in inspect(trino_conn).get_table_names(schema=namespace) From df48ce0ff93d1ec74e3605c71e8013c89094c022 Mon Sep 17 00:00:00 2001 From: Stanley Law <19900516+dingo4dev@users.noreply.github.com> Date: Tue, 29 Jul 2025 08:44:54 +0000 Subject: [PATCH 8/8] add custom trino config --- dev/docker-compose-trino.yml | 1 + dev/trino/config.properties | 8 ++++++++ 2 files changed, 9 insertions(+) create mode 100644 dev/trino/config.properties diff --git a/dev/docker-compose-trino.yml b/dev/docker-compose-trino.yml index e00e51b1b3..24a3ecf974 100644 --- a/dev/docker-compose-trino.yml +++ b/dev/docker-compose-trino.yml @@ -44,6 +44,7 @@ services: - hive volumes: - ./trino/catalog:/etc/trino/catalog + - ./trino/config.properties:/etc/trino/config.properties minio: image: minio/minio diff --git a/dev/trino/config.properties b/dev/trino/config.properties new file mode 100644 index 0000000000..733e937032 --- /dev/null +++ b/dev/trino/config.properties @@ -0,0 +1,8 @@ +#single node install config +coordinator=true +node-scheduler.include-coordinator=true +http-server.http.port=8080 +discovery.uri=http://localhost:8080 +http-server.process-forwarded=true +http-server.https.enabled=false +catalog.management=${ENV:CATALOG_MANAGEMENT} \ No newline at end of file