diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 24a9ef51ea2ef..b99ab2889980d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -41,14 +41,14 @@ on: description: Additional environment variables to set when running the tests. Should be in JSON format. required: false type: string - default: '{"PYSPARK_IMAGE_TO_TEST": "python-311", "PYTHON_TO_TEST": "python3.11"}' + default: '{"PYSPARK_IMAGE_TO_TEST": "python-314", "PYTHON_TO_TEST": "python3.14"}' jobs: description: >- Jobs to run, and should be in JSON format. The values should be matched with the job's key defined in this file, e.g., build. See precondition job below. required: false type: string - default: '' + default: '{"pyspark": "true", "pyspark-pandas": "true"}' secrets: codecov_token: description: The upload token of codecov. diff --git a/.github/workflows/build_infra_images_cache.yml b/.github/workflows/build_infra_images_cache.yml index ccd47826ff099..7840a31246c8a 100644 --- a/.github/workflows/build_infra_images_cache.yml +++ b/.github/workflows/build_infra_images_cache.yml @@ -41,6 +41,7 @@ on: - 'dev/spark-test-image/python-313/Dockerfile' - 'dev/spark-test-image/python-313-nogil/Dockerfile' - 'dev/spark-test-image/numpy-213/Dockerfile' + - 'dev/spark-test-image/python-314/Dockerfile' - '.github/workflows/build_infra_images_cache.yml' # Create infra image when cutting down branches/tags create: @@ -257,3 +258,16 @@ jobs: - name: Image digest (PySpark with Numpy 2.1.3) if: hashFiles('dev/spark-test-image/numpy-213/Dockerfile') != '' run: echo ${{ steps.docker_build_pyspark_numpy_213.outputs.digest }} + - name: Build and push (PySpark with Python 3.14) + if: hashFiles('dev/spark-test-image/python-314/Dockerfile') != '' + id: docker_build_pyspark_python_314 + uses: docker/build-push-action@v6 + with: + context: ./dev/spark-test-image/python-314/ + push: true + tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{ github.ref_name }}-static + cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{ github.ref_name }} + cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{ github.ref_name }},mode=max + - name: Image digest (PySpark with Python 3.14) + if: hashFiles('dev/spark-test-image/python-314/Dockerfile') != '' + run: echo ${{ steps.docker_build_pyspark_python_314.outputs.digest }} diff --git a/.github/workflows/build_python_3.14.yml b/.github/workflows/build_python_3.14.yml new file mode 100644 index 0000000000000..c3e054b47beab --- /dev/null +++ b/.github/workflows/build_python_3.14.yml @@ -0,0 +1,47 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: "Build / Python-only (master, Python 3.14)" + +on: + schedule: + - cron: '0 20 * * *' + workflow_dispatch: + +jobs: + run-build: + permissions: + packages: write + name: Run + uses: ./.github/workflows/build_and_test.yml + if: github.repository == 'apache/spark' + with: + java: 17 + branch: master + hadoop: hadoop3 + envs: >- + { + "PYSPARK_IMAGE_TO_TEST": "python-314", + "PYTHON_TO_TEST": "python3.14" + } + jobs: >- + { + "pyspark": "true", + "pyspark-pandas": "true" + } diff --git a/dev/spark-test-image/python-314/Dockerfile b/dev/spark-test-image/python-314/Dockerfile new file mode 100644 index 0000000000000..6b79b00677d29 --- /dev/null +++ b/dev/spark-test-image/python-314/Dockerfile @@ -0,0 +1,80 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Image for building and testing Spark branches. Based on Ubuntu 22.04. +# See also in https://hub.docker.com/_/ubuntu +FROM ubuntu:jammy-20240911.1 +LABEL org.opencontainers.image.authors="Apache Spark project " +LABEL org.opencontainers.image.licenses="Apache-2.0" +LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark with Python 3.14" +# Overwrite this label to avoid exposing the underlying Ubuntu OS version label +LABEL org.opencontainers.image.version="" + +ENV FULL_REFRESH_DATE=20250704 + +ENV DEBIAN_FRONTEND=noninteractive +ENV DEBCONF_NONINTERACTIVE_SEEN=true + +RUN apt-get update && apt-get install -y \ + build-essential \ + ca-certificates \ + curl \ + gfortran \ + git \ + gnupg \ + libcurl4-openssl-dev \ + libfontconfig1-dev \ + libfreetype6-dev \ + libfribidi-dev \ + libgit2-dev \ + libharfbuzz-dev \ + libjpeg-dev \ + liblapack-dev \ + libopenblas-dev \ + libpng-dev \ + libpython3-dev \ + libssl-dev \ + libtiff5-dev \ + libxml2-dev \ + openjdk-17-jdk-headless \ + pkg-config \ + qpdf \ + tzdata \ + software-properties-common \ + wget \ + zlib1g-dev + +# Install Python 3.14 +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt-get update && apt-get install -y \ + python3.14 \ + && apt-get autoremove --purge -y \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + + +ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +# Python deps for Spark Connect +ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" + +# Install Python 3.14 packages +RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.14 +RUN python3.14 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this +RUN python3.14 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS lxml && \ + python3.14 -m pip install 'torch<2.6.0' torchvision --index-url https://download.pytorch.org/whl/cpu && \ + python3.14 -m pip install torcheval && \ + python3.14 -m pip cache purge