diff --git a/README.md b/README.md index 1c467c6e471..fc883ffcbd8 100644 --- a/README.md +++ b/README.md @@ -68,9 +68,9 @@ Linux OS (including RHEL/Rocky Linux, and Ubuntu) and macOS. ### Try out quickly Welcome to try out Cloudberry via building [one Docker-based -Sandbox](https://github.com/apache/cloudberry-bootcamp), which is tailored to -help you gain a basic understanding of Cloudberry's capabilities and features -a range of materials, including tutorials, sample code, and crash courses. +Sandbox](./devops/sandbox), which is tailored to help you gain a basic +understanding of Cloudberry's capabilities and features a range of +materials, including tutorials, sample code, and crash courses. ## Repositories @@ -79,10 +79,8 @@ this, there are several ecosystem repositories for Cloudberry, including the website, extensions, connectors, adapters, and other utilities. * [apache/cloudberry-site](https://github.com/apache/cloudberry-site): website and documentation sources. -* [apache/cloudberry-bootcamp](https://github.com/apache/cloudberry-bootcamp): help you quickly try out Cloudberry via one Docker-based Sandbox. -* [apache/cloudberry-gpbackup](https://github.com/apache/cloudberry-gpbackup): backup utility for Cloudberry. +* [apache/cloudberry-backup](https://github.com/apache/cloudberry-backup): backup utility for Cloudberry. * [apache/cloudberry-go-libs](https://github.com/apache/cloudberry-go-libs): go-libs for Cloudberry. -* [apache/cloudberry-gpbackup-s3-plugin](https://github.com/apache/cloudberry-gpbackup-s3-plugin): S3 plugin for use with Cloudberry backup utility. * [apache/cloudberry-pxf](https://github.com/apache/cloudberry-pxf): Platform Extension Framework (PXF) for Cloudberry. ## Community & Support diff --git a/devops/sandbox/.env b/devops/sandbox/.env new file mode 100644 index 00000000000..233d7c5b1b5 --- /dev/null +++ b/devops/sandbox/.env @@ -0,0 +1,21 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +CODEBASE_VERSION=2.0.0 +OS_VERSION=rockylinux9 diff --git a/devops/sandbox/Dockerfile.RELEASE.rockylinux9 b/devops/sandbox/Dockerfile.RELEASE.rockylinux9 new file mode 100644 index 00000000000..697303c7c38 --- /dev/null +++ b/devops/sandbox/Dockerfile.RELEASE.rockylinux9 @@ -0,0 +1,266 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# Multi-stage Dockerfile for Apache Cloudberry Sandbox Environment (Release) +# -------------------------------------------------------------------- +# This Dockerfile compiles and installs a specific release version of +# Cloudberry, then creates a runtime environment for testing and development. +# -------------------------------------------------------------------- + +# -------------------------------------------------------------------- +# Build stage: Rocky Linux 9 builder to compile Cloudberry (release tarball) +# -------------------------------------------------------------------- +FROM rockylinux/rockylinux:9.6 AS builder + +# Install build toolchains and development headers (avoid coreutils/curl conflicts on arm64) +RUN dnf makecache && \ + dnf install -y \ + epel-release \ + git && \ + dnf config-manager --disable epel-cisco-openh264 && \ + dnf makecache && \ + dnf config-manager --disable epel && \ + dnf install -y --enablerepo=epel \ + the_silver_searcher \ + bat \ + htop && \ + dnf install -y \ + bison \ + cmake3 \ + ed \ + file \ + flex \ + gcc \ + gcc-c++ \ + gdb \ + glibc-langpack-en \ + glibc-locale-source \ + initscripts \ + iproute \ + less \ + lsof \ + m4 \ + net-tools \ + openssh-clients \ + openssh-server \ + perl \ + rpm-build \ + rpmdevtools \ + rsync \ + sudo \ + tar \ + unzip \ + util-linux-ng \ + wget \ + sshpass \ + which && \ + dnf install -y \ + apr-devel \ + bzip2-devel \ + java-11-openjdk \ + java-11-openjdk-devel \ + krb5-devel \ + libcurl-devel \ + libevent-devel \ + libxml2-devel \ + libuuid-devel \ + libzstd-devel \ + lz4 \ + lz4-devel \ + openldap-devel \ + openssl-devel \ + pam-devel \ + perl-ExtUtils-Embed \ + perl-Test-Simple \ + perl-core \ + python3-devel \ + python3-pytest \ + readline-devel \ + zlib-devel && \ + dnf install -y --enablerepo=crb \ + libuv-devel \ + libyaml-devel \ + perl-IPC-Run \ + protobuf-devel && \ + dnf clean all && \ + cd && XERCES_LATEST_RELEASE=3.3.0 && \ + wget -nv "https://archive.apache.org/dist/xerces/c/3/sources/xerces-c-${XERCES_LATEST_RELEASE}.tar.gz" && \ + echo "$(curl -sL https://archive.apache.org/dist/xerces/c/3/sources/xerces-c-${XERCES_LATEST_RELEASE}.tar.gz.sha256)" | sha256sum -c - && \ + tar xf "xerces-c-${XERCES_LATEST_RELEASE}.tar.gz"; rm "xerces-c-${XERCES_LATEST_RELEASE}.tar.gz" && \ + cd xerces-c-${XERCES_LATEST_RELEASE} && \ + ./configure --prefix=/usr/local/xerces-c && \ + make -j$(nproc) && \ + make install -C ~/xerces-c-${XERCES_LATEST_RELEASE} && \ + rm -rf ~/xerces-c* + +# Create gpadmin user and grant passwordless sudo in builder +RUN groupadd -r gpadmin && \ + useradd -m -r -g gpadmin -s /bin/bash gpadmin && \ + echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin && \ + chmod 440 /etc/sudoers.d/gpadmin + +# Switch to gpadmin user +USER gpadmin +WORKDIR /home/gpadmin + +# Release version to build (Apache official tarball) +ARG CB_RELEASE_VERSION=2.0.0-incubating + +# Download and extract the specified release version from Apache +RUN wget -nv "https://downloads.apache.org/incubator/cloudberry/${CB_RELEASE_VERSION}/apache-cloudberry-${CB_RELEASE_VERSION}-src.tar.gz" -O /home/gpadmin/apache-cloudberry-${CB_RELEASE_VERSION}-src.tar.gz && \ + tar -xzf /home/gpadmin/apache-cloudberry-${CB_RELEASE_VERSION}-src.tar.gz -C /home/gpadmin && \ + rm -f /home/gpadmin/apache-cloudberry-${CB_RELEASE_VERSION}-src.tar.gz && \ + mv /home/gpadmin/apache-cloudberry-${CB_RELEASE_VERSION} /home/gpadmin/cloudberry + +# Build Cloudberry using the official build scripts +RUN cd /home/gpadmin/cloudberry && \ + export SRC_DIR=/home/gpadmin/cloudberry && \ + mkdir -p "${SRC_DIR}/build-logs" && \ + # Ensure Cloudberry lib dir exists and has Xerces libs available + sudo rm -rf /usr/local/cloudberry-db && \ + sudo mkdir -p /usr/local/cloudberry-db/lib && \ + sudo cp -v /usr/local/xerces-c/lib/libxerces-c.so \ + /usr/local/xerces-c/lib/libxerces-c-3.*.so \ + /usr/local/cloudberry-db/lib/ && \ + sudo chown -R gpadmin:gpadmin /usr/local/cloudberry-db && \ + # Configure with required features and paths + export LD_LIBRARY_PATH=/usr/local/cloudberry-db/lib:$LD_LIBRARY_PATH && \ + ./configure --prefix=/usr/local/cloudberry-db \ + --disable-external-fts \ + --enable-debug \ + --enable-cassert \ + --enable-debug-extensions \ + --enable-gpcloud \ + --enable-ic-proxy \ + --enable-mapreduce \ + --enable-orafce \ + --enable-orca \ + --enable-pax \ + --enable-pxf \ + --enable-tap-tests \ + --with-gssapi \ + --with-ldap \ + --with-libxml \ + --with-lz4 \ + --with-pam \ + --with-perl \ + --with-pgport=5432 \ + --with-python \ + --with-pythonsrc-ext \ + --with-ssl=openssl \ + --with-uuid=e2fs \ + --with-includes=/usr/local/xerces-c/include \ + --with-libraries=/usr/local/cloudberry-db/lib && \ + # Build and install + make -j$(nproc) --directory ${SRC_DIR} && \ + make -j$(nproc) --directory ${SRC_DIR}/contrib && \ + make install --directory ${SRC_DIR} && \ + make install --directory "${SRC_DIR}/contrib" + +# -------------------------------------------------------------------- +# Runtime stage: Rocky Linux 9 runtime with required dependencies +# -------------------------------------------------------------------- +FROM rockylinux/rockylinux:9.6 + +# Install required runtime dependencies, SSH server, sudo, and tools +# Note: Use dnf on Rocky Linux 9 +RUN dnf -y update && \ + dnf -y install \ + openssh-server openssh-clients \ + sudo shadow-utils \ + bash procps-ng \ + ca-certificates \ + python3 \ + apr \ + bzip2-libs \ + krb5-libs \ + libevent \ + libicu \ + libuuid \ + libxml2 \ + libyaml \ + libzstd \ + lz4 \ + ncurses \ + openldap \ + openssl \ + pam \ + pcre2 \ + perl \ + protobuf \ + readline \ + zlib \ + glibc-langpack-en \ + libuv \ + iproute \ + net-tools \ + which \ + rsync \ + keyutils \ + libstdc++ && \ + dnf clean all && rm -rf /var/cache/dnf + +# Create gpadmin user and group, grant passwordless sudo +RUN groupadd -r gpadmin && \ + useradd -m -r -g gpadmin -s /bin/bash gpadmin && \ + echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin && \ + chmod 440 /etc/sudoers.d/gpadmin + +# Prepare SSH daemon: generate host keys and ensure runtime dir +RUN ssh-keygen -A && mkdir -p /run/sshd + +# Copy built Cloudberry from builder stage +COPY --from=builder /usr/local/cloudberry-db /usr/local/cloudberry-db + +# Bring Xerces libs into Cloudberry lib dir +COPY --from=builder /usr/local/xerces-c/lib/libxerces-c.so /usr/local/cloudberry-db/lib/ +COPY --from=builder /usr/local/xerces-c/lib/libxerces-c-3.*.so /usr/local/cloudberry-db/lib/ + +# Copy configuration files to their final destinations +COPY ./configs/90-cbdb-limits.conf /etc/security/limits.d/90-cbdb-limits.conf +COPY ./configs/90-cbdb-sysctl.conf /etc/sysctl.d/90-cbdb-sysctl.conf +COPY ./configs/gpinitsystem_singlenode /tmp/gpinitsystem_singlenode +COPY ./configs/gpinitsystem_multinode /tmp/gpinitsystem_multinode +COPY ./configs/multinode-gpinit-hosts /tmp/multinode-gpinit-hosts +COPY ./configs/init_system.sh /tmp/init_system.sh + +# Runtime configuration +RUN sudo sysctl -p /etc/sysctl.d/90-cbdb-sysctl.conf || true && \ + echo "cdw" > /tmp/gpdb-hosts && \ + sudo chmod 755 /tmp/gpinitsystem_singlenode && \ + sudo chmod 755 /tmp/gpinitsystem_multinode && \ + sudo chmod 755 /tmp/init_system.sh && \ + mkdir -p /opt/cbdb/cluster-ssh && \ + chown gpadmin:gpadmin /opt/cbdb/cluster-ssh && \ + mkdir -p /data0/database/coordinator /data0/database/primary /data0/database/mirror && \ + chown -R gpadmin:gpadmin /data0 && \ + echo "export COORDINATOR_DATA_DIRECTORY=/data0/database/coordinator/gpseg-1" >> /home/gpadmin/.bashrc && \ + echo -e '\n# Add Cloudberry entries\nif [ -f /usr/local/cloudberry-db/cloudberry-env.sh ]; then\n source /usr/local/cloudberry-db/cloudberry-env.sh\nfi\n# Add Greenplum compatibility entries\nif [ -f /usr/local/cloudberry-db/greenplum_path.sh ]; then\n source /usr/local/cloudberry-db/greenplum_path.sh\nfi' >> /home/gpadmin/.bashrc + +# Set default user and working directory +USER gpadmin +WORKDIR /home/gpadmin + +EXPOSE 5432 22 + +# cgroup mount (provided by compose/run) +VOLUME [ "/sys/fs/cgroup" ] + +# Start the container by running the initialization script +CMD ["bash","-c","/tmp/init_system.sh"] diff --git a/devops/sandbox/Dockerfile.main.rockylinux9 b/devops/sandbox/Dockerfile.main.rockylinux9 new file mode 100644 index 00000000000..51efd8db87b --- /dev/null +++ b/devops/sandbox/Dockerfile.main.rockylinux9 @@ -0,0 +1,221 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# Multi-stage Dockerfile for Apache Cloudberry Sandbox Environment +# -------------------------------------------------------------------- +# This Dockerfile uses pre-built Apache Cloudberry build images to +# compile and install Cloudberry from the main branch, then creates +# a runtime environment for testing and development. +# -------------------------------------------------------------------- + +# Build stage: Use pre-built image to compile Cloudberry +FROM rockylinux/rockylinux:9.6 AS builder + +# Install build toolchains and development headers (avoid coreutils/curl conflicts on arm64) +RUN dnf makecache && \ + dnf install -y \ + epel-release \ + git && \ + dnf config-manager --disable epel-cisco-openh264 && \ + dnf makecache && \ + dnf config-manager --disable epel && \ + dnf install -y --enablerepo=epel \ + the_silver_searcher \ + bat \ + htop && \ + dnf install -y \ + bison \ + cmake3 \ + ed \ + file \ + flex \ + gcc \ + gcc-c++ \ + gdb \ + glibc-langpack-en \ + glibc-locale-source \ + initscripts \ + iproute \ + less \ + lsof \ + m4 \ + net-tools \ + openssh-clients \ + openssh-server \ + perl \ + rpm-build \ + rpmdevtools \ + rsync \ + sudo \ + tar \ + unzip \ + util-linux-ng \ + wget \ + sshpass \ + which && \ + dnf install -y \ + apr-devel \ + bzip2-devel \ + java-11-openjdk \ + java-11-openjdk-devel \ + krb5-devel \ + libcurl-devel \ + libevent-devel \ + libxml2-devel \ + libuuid-devel \ + libzstd-devel \ + lz4 \ + lz4-devel \ + openldap-devel \ + openssl-devel \ + pam-devel \ + perl-ExtUtils-Embed \ + perl-Test-Simple \ + perl-core \ + python3-devel \ + python3-pytest \ + readline-devel \ + zlib-devel && \ + dnf install -y --enablerepo=crb \ + libuv-devel \ + libyaml-devel \ + perl-IPC-Run \ + protobuf-devel && \ + dnf clean all && \ + cd && XERCES_LATEST_RELEASE=3.3.0 && \ + wget -nv "https://archive.apache.org/dist/xerces/c/3/sources/xerces-c-${XERCES_LATEST_RELEASE}.tar.gz" && \ + echo "$(curl -sL https://archive.apache.org/dist/xerces/c/3/sources/xerces-c-${XERCES_LATEST_RELEASE}.tar.gz.sha256)" | sha256sum -c - && \ + tar xf "xerces-c-${XERCES_LATEST_RELEASE}.tar.gz"; rm "xerces-c-${XERCES_LATEST_RELEASE}.tar.gz" && \ + cd xerces-c-${XERCES_LATEST_RELEASE} && \ + ./configure --prefix=/usr/local/xerces-c && \ + make -j$(nproc) && \ + make install -C ~/xerces-c-${XERCES_LATEST_RELEASE} && \ + rm -rf ~/xerces-c* + +# Create gpadmin user and grant passwordless sudo in builder +RUN groupadd -r gpadmin && \ + useradd -m -r -g gpadmin -s /bin/bash gpadmin && \ + echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin && \ + chmod 440 /etc/sudoers.d/gpadmin + +# Switch to gpadmin user +USER gpadmin +WORKDIR /home/gpadmin + +# Clone the latest Cloudberry source code +RUN git clone --recurse-submodules --branch main --single-branch --depth=1 https://github.com/apache/cloudberry.git + +# Build Cloudberry using the official build scripts +RUN cd /home/gpadmin/cloudberry && \ + export SRC_DIR=/home/gpadmin/cloudberry && \ + mkdir -p ${SRC_DIR}/build-logs && \ + ./devops/build/automation/cloudberry/scripts/configure-cloudberry.sh && \ + ./devops/build/automation/cloudberry/scripts/build-cloudberry.sh + +# -------------------------------------------------------------------- +# Runtime stage: Switch to a slimmer base image (Rocky Linux 9) +# -------------------------------------------------------------------- +FROM rockylinux/rockylinux:9.6 + +# Install required runtime dependencies, SSH server, sudo, and tools +# Note: Use dnf on Rocky Linux 9 +RUN dnf -y update && \ + dnf -y install \ + openssh-server openssh-clients \ + sudo shadow-utils \ + bash procps-ng \ + ca-certificates \ + python3 \ + apr \ + bzip2-libs \ + krb5-libs \ + libevent \ + libicu \ + libuuid \ + libxml2 \ + libyaml \ + libzstd \ + lz4 \ + ncurses \ + openldap \ + openssl \ + pam \ + pcre2 \ + perl \ + protobuf \ + readline \ + zlib \ + glibc-langpack-en \ + libuv \ + iproute \ + net-tools \ + which \ + rsync \ + keyutils \ + libstdc++ && \ + dnf clean all && rm -rf /var/cache/dnf + +# Create gpadmin user and group, grant passwordless sudo +RUN groupadd -r gpadmin && \ + useradd -m -r -g gpadmin -s /bin/bash gpadmin && \ + echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin && \ + chmod 440 /etc/sudoers.d/gpadmin + +# Prepare SSH daemon: generate host keys and ensure runtime dir +RUN ssh-keygen -A && mkdir -p /run/sshd + +# Copy built Cloudberry from builder stage +COPY --from=builder /usr/local/cloudberry-db /usr/local/cloudberry-db + +# Bring Xerces libs into Cloudberry lib dir and normalize SONAME via builder-installed versioned prefix +COPY --from=builder /usr/local/xerces-c/lib/libxerces-c.so /usr/local/cloudberry-db/lib/ +COPY --from=builder /usr/local/xerces-c/lib/libxerces-c-3.*.so /usr/local/cloudberry-db/lib/ + +# Copy configuration files to their final destinations +COPY ./configs/90-cbdb-limits.conf /etc/security/limits.d/90-cbdb-limits.conf +COPY ./configs/90-cbdb-sysctl.conf /etc/sysctl.d/90-cbdb-sysctl.conf +COPY ./configs/gpinitsystem_singlenode /tmp/gpinitsystem_singlenode +COPY ./configs/gpinitsystem_multinode /tmp/gpinitsystem_multinode +COPY ./configs/multinode-gpinit-hosts /tmp/multinode-gpinit-hosts +COPY ./configs/init_system.sh /tmp/init_system.sh + +# Runtime configuration +RUN sudo sysctl -p /etc/sysctl.d/90-cbdb-sysctl.conf || true && \ + echo "cdw" > /tmp/gpdb-hosts && \ + sudo chmod 755 /tmp/gpinitsystem_singlenode && \ + sudo chmod 755 /tmp/gpinitsystem_multinode && \ + sudo chmod 755 /tmp/init_system.sh && \ + mkdir -p /opt/cbdb/cluster-ssh && \ + chown gpadmin:gpadmin /opt/cbdb/cluster-ssh && \ + mkdir -p /data0/database/coordinator /data0/database/primary /data0/database/mirror && \ + chown -R gpadmin:gpadmin /data0 && \ + echo "export COORDINATOR_DATA_DIRECTORY=/data0/database/coordinator/gpseg-1" >> /home/gpadmin/.bashrc && \ + echo -e '\n# Add Cloudberry entries\nif [ -f /usr/local/cloudberry-db/cloudberry-env.sh ]; then\n source /usr/local/cloudberry-db/cloudberry-env.sh\nfi' >> /home/gpadmin/.bashrc + +# Set default user and working directory +USER gpadmin +WORKDIR /home/gpadmin + +EXPOSE 5432 22 + +# cgroup mount (provided by compose/run) +VOLUME [ "/sys/fs/cgroup" ] + +# Start the container by running the initialization script +CMD ["bash","-c","/tmp/init_system.sh"] diff --git a/devops/sandbox/README.md b/devops/sandbox/README.md new file mode 100644 index 00000000000..e3ca6cb2d09 --- /dev/null +++ b/devops/sandbox/README.md @@ -0,0 +1,190 @@ + +--- +title: Sandbox of Apache Cloudberry +--- + +# Install Apache Cloudberry With Docker + +This document guides you on how to quickly set up and connect to Apache Cloudberry in a Docker environment. You can try out Apache Cloudberry by performing some basic operations and running SQL commands. + +> [!WARNING] +> This guide is intended for testing or development. DO NOT use it for production. + + +## Prerequisites + +Make sure that your environment meets the following requirements: + +- Platform requirement: Any platform with Docker runtime. For details, refer to [Get Started with Docker](https://www.docker.com/get-started/). +- Other dependencies: Git, SSH, and internet connection + +## Build the Sandbox + +When building and deploying Apache Cloudberry in Docker, you will have 2 different deployment options as well as different build options. + +**Deployment Options** +1. **Single Container** (Default) - With the single container option, you will have the coordinator as well as the Apache Cloudberry segments all running on a single container. This is the default behavior when deploying using the `run.sh` script provided. +2. **Multi-Container** - Deploying with the multi-container option will give you a more realistic deployment of what actual production Apache Cloudberry clusters look like. With multi-node, you will have the coordinator, the standby coordinator, and 2 segment hosts all on their own respective containers. This is to both highlight the distributed nature of Apache Cloudberry as well as highlight how high availability (HA) features work in the event of a server (or in this case a container) failing. This is enabled by passing the -m flag to the `run.sh` script which will be highlighted below. + +![cloudberry Sandbox Deployments](./sandbox-deployment.jpg) + +**Build Options** + +1. Compile with the source code of the latest Apache Cloudberry (released in [Apache Cloudberry Release Page](https://github.com/apache/cloudberry/releases)). The base OS will be Rocky Linux 9 Docker image. +2. Method 2 - Compile with the latest Apache Cloudberry [main](https://github.com/apache/cloudberry/tree/main) branch. The base OS will be Rocky Linux 9 Docker image. + +Build and deploy steps: + +1. Start Docker Desktop and make sure it is running properly on your host platform. + +2. Clone the Apache Cloudberry repository to the target machine. + + ```shell + git clone https://github.com/apache/cloudberry.git + ``` + +3. Enter the repository and run the `run.sh` script to start the Docker container. This will start the automatic installation process. Depending on your environment, you may need to run this with 'sudo' command. + + - For latest Apache Cloudberry release running on a single container + + ```shell + cd cloudberry/devops/sandbox + ./run.sh -c 2.0.0 + ``` + - For latest Apache Cloudberry release running across multiple containers + + ```shell + cd cloudberry/devops/sandbox + ./run.sh -c 2.0.0 -m + ``` + - For latest main branch running on a single container + + ```shell + cd cloudberry/devops/sandbox + ./run.sh -c main + ``` + + - For latest main branch running across multiple containers + + ```shell + cd cloudberry/devops/sandbox + ./run.sh -c main -m + ``` + + Once the script finishes without error, the sandbox is built and running successfully. The `docker run` and `docker compose` commands use the --detach option allowing you to ssh or access the running Apache Cloudberry instance remotely. + + Please review run.sh script for additional options (e.g. setting Timezone in running container, only building container). You can also execute `./run.sh -h` to see the usage. + +## Connect to the database + +> [!NOTE] +> When deploying the multi-container Apache Cloudberry environment it may take extra time for the database to initialize, so you may need to wait a few minutes before you can execute the psql prompt successfully. You can run `docker logs cbdb-cdw -f` to see the current state of the database initialization process, you'll know the process is finished when you see the "Deployment Successful" output. + +You can now connect to the database and try some basic operations. + +1. Connect to the Docker container from the host machine: + + ```shell + docker exec -it cbdb-cdw /bin/bash + ``` + + If it is successful, you will see the following prompt: + + ```shell + [gpadmin@cdw /]$ + ``` + +2. Log into Apache Cloudberry in Docker. See the following commands and example outputs: + + ```shell + [gpadmin@cdw ~]$ psql # Connects to the database with the default database name "gpadmin". + + # psql (14.4, server 14.4) + # Type "help" for help. + # Note: No password is required for the gpadmin user in this sandbox environment. + ``` + + ```sql + gpadmin=# SELECT VERSION(); -- Checks the database version. + + PostgreSQL 14.4 (Apache Cloudberry 1.0.0 build dev) on aarch64-unknown-linux-gnu, compiled by gcc (GCC) 10.2.1 20210130 (Red Hat 10.2.1-11), 64-bit compiled on Oct 24 2023 10:24:28 + (1 row) + ``` + +Now you have an Apache Cloudberry and can continue with [Apache Cloudberry Tutorials](https://cloudberry.apache.org/docs/)! Enjoy! + +## Working with your Apache Cloudberry Docker environment + +When working with the Apache Cloudberry Docker environment there are a few commands that will be useful to you. + +**Stopping Your Single Container Deployment With Docker** + +To stop the **single container** deployment while _keeping the data and state_ within the container, you can run the command below. This means that you can later start the container again and any changes you made to the containers will be persisted between runs. + +```shell +docker stop cbdb-cdw +``` + +To stop the **single container** deployment and also remove the volume that belongs to the container, you can run the following command. Keep in mind this will remove the volume as well as the container associated which means any changes you've made inside of the container or any database state will be wiped and unrecoverable. + +```shell +docker rm -f cbdb-cdw +``` + +**Stopping Your Multi-Container Deployment With Docker** + +To stop the **multi-container** deployment while _keeping the data and state_ within the container, you can run the command below. This means that you can later start the container again and any changes you made to the containers will be persisted between runs. + +```shell +docker compose -f docker-compose-rockylinux9.yml stop +``` + +To stop the **multi-container** deployment and also remove the network and volumes that belong to the containers, you can run the command below. Running this command means it will delete the containers as well as remove the volumes that the containers are associated with. This means any changes you've made inside of the containers or any database state will be wiped and unrecoverable. + +```shell +docker compose -f docker-compose-rockylinux9.yml down -v +``` + +**Starting A Stopped Single Container Apache Cloudberry Docker Deployment** + +If you've run any of the commands above that keep the Docker volumes persisted between shutting the containers down, you can use the following commands to bring that same deployment back up with it's previous state. + +To start a **single container** deployment after it was shut down, you can simply run the following + +```shell +docker start cbdb-cdw +``` + +**Starting A Stopped Multi-Container Apache Cloudberry Docker Deployment** + +To start a **multi-container** deployment after it was shut down, you can run the following command. + +```shell +docker compose -f docker-compose-rockylinux9.yml start +``` + +> [!NOTE] +> When starting a previously stopped Apache Cloudberry Docker environment, you'll need to manually start the database back up. To do this, just run the following commands once the container(s) are back up and running. The `gpstart` command is used for starting the database, and -a is a flag saying to start the database without prompting (non-interactive). + +```shell +docker exec -it cbdb-cdw /bin/bash + +[gpadmin@cdw /] gpstart -a +``` \ No newline at end of file diff --git a/devops/sandbox/configs/90-cbdb-limits.conf b/devops/sandbox/configs/90-cbdb-limits.conf new file mode 100644 index 00000000000..33088f92278 --- /dev/null +++ b/devops/sandbox/configs/90-cbdb-limits.conf @@ -0,0 +1,29 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +###################### +# CBDB CONFIG PARAMS # +###################### + + * soft core unlimited + * hard core unlimited + * soft nofile 524288 + * hard nofile 524288 + * soft nproc 131072 + * hard nproc 131072 diff --git a/devops/sandbox/configs/90-cbdb-sysctl.conf b/devops/sandbox/configs/90-cbdb-sysctl.conf new file mode 100644 index 00000000000..9f0b7c576e4 --- /dev/null +++ b/devops/sandbox/configs/90-cbdb-sysctl.conf @@ -0,0 +1,54 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +###################### +# CBDB CONFIG PARAMS # +###################### + +kernel.shmmax = 1000000000 +kernel.shmall = 4000000000 +kernel.shmmni = 4096 +vm.overcommit_memory = 2 +vm.overcommit_ratio = 95 +net.ipv4.ip_local_port_range = 10000 65535 +kernel.sem = 250 2048000 200 8192 +kernel.sysrq = 1 +kernel.core_uses_pid = 1 +kernel.msgmnb = 65536 +kernel.msgmax = 65536 +kernel.msgmni = 2048 +net.ipv4.tcp_syncookies = 1 +net.ipv4.conf.default.accept_source_route = 0 +net.ipv4.tcp_max_syn_backlog = 4096 +net.ipv4.conf.all.arp_filter = 1 +net.ipv4.ipfrag_high_thresh = 41943040 +net.ipv4.ipfrag_low_thresh = 31457280 +net.ipv4.ipfrag_time = 60 +net.core.netdev_max_backlog = 10000 +net.core.rmem_max = 2097152 +net.core.wmem_max = 2097152 +vm.swappiness = 10 +vm.zone_reclaim_mode = 0 +vm.dirty_expire_centisecs = 500 +vm.dirty_writeback_centisecs = 100 +vm.dirty_background_ratio = 0 +vm.dirty_ratio = 0 +vm.dirty_background_bytes = 1610612736 +vm.dirty_bytes = 4294967296 +kernel.core_pattern=/var/core/core.%h.%t diff --git a/devops/sandbox/configs/gpinitsystem_multinode b/devops/sandbox/configs/gpinitsystem_multinode new file mode 100644 index 00000000000..d6a46d4d410 --- /dev/null +++ b/devops/sandbox/configs/gpinitsystem_multinode @@ -0,0 +1,138 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# FILE NAME: gpinitsystem_multinode + +# A configuration file is needed by the gpinitsystem utility. +# This sample file initializes an Apache Cloudberry multi-node cluster +# with one coordinator, one standby coordinator, and segment instances +# across multiple hosts. This file is referenced when you run gpinitsystem. + +################################################ +# REQUIRED PARAMETERS +################################################ + +# A name for the array you are configuring. You can use any name you +# like. Enclose the name in quotes if the name contains spaces. + +ARRAY_NAME="Sandbox: Apache Cloudberry Cluster" + +# This specifies the file that contains the list of segment host names +# that comprise the Greenplum system. For a single-node system, this +# file contains the local OS-configured hostname (as output by the +# hostname command). If the file does not reside in the same +# directory where the gpinitsystem utility is executed, specify +# the absolute path to the file. + +MACHINE_LIST_FILE=/tmp/gpdb-hosts + +# This specifies a prefix that will be used to name the data directories +# of the coordinator and segment instances. The naming convention for data +# directories in a Apache Cloudberry system is SEG_PREFIX +# where starts with 0 for segment instances and the coordinator +# is always -1. So for example, if you choose the prefix gpsne, your +# coordinator instance data directory would be named gpsne-1, and the segment +# instances would be named gpsne0, gpsne1, gpsne2, gpsne3, and so on. + +SEG_PREFIX=gpseg + +# Base port number on which primary segment instances will be +# started on a segment host. The base port number will be +# incremented by one for each segment instance started on a host. + +PORT_BASE=40000 + +# This specifies the data storage location(s) where the script will +# create the primary segment data directories. The script creates a +# unique data directory for each segment instance. If you want multiple +# segment instances per host, list a data storage area for each primary +# segment you want created. The recommended number is one primary segment +# per CPU. It is OK to list the same data storage area multiple times +# if you want your data directories created in the same location. The +# number of data directory locations specified will determine the number +# of primary segment instances created per host. +# You must make sure that the user who runs gpinitsystem (for example, +# the gpadmin user) has permissions to write to these directories. You +# may want to create these directories on the segment hosts before running +# gpinitsystem and chown them to the appropriate user. + +declare -a DATA_DIRECTORY=(/data0/database/primary \ + /data0/database/primary) + +# The OS-configured hostname of the Apache Cloudberry coordinator instance. + +COORDINATOR_HOSTNAME=cdw + +# The location where the data directory will be created on the +# Greenplum coordinator host. +# You must make sure that the user who runs gpinitsystem +# has permissions to write to this directory. You may want to +# create this directory on the coordinator host before running +# gpinitsystem and chown it to the appropriate user. + +COORDINATOR_DIRECTORY=/data0/database/coordinator + +# The port number for the coordinator instance. This is the port number +# that users and client connections will use when accessing the +# Apache Cloudberry system. + +COORDINATOR_PORT=5432 + +# The shell the gpinitsystem script uses to execute +# commands on remote hosts. Allowed value is ssh. You must set up +# your trusted host environment before running the gpinitsystem +# script. You can use gpssh-exkeys to do this. + +TRUSTED_SHELL=ssh + +# Maximum distance between automatic write ahead log (WAL) +# checkpoints, in log file segments (each segment is normally 16 +# megabytes). This will set the checkpoint_segments parameter +# in the postgresql.conf file for each segment instance in the +# Apache Cloudberry system. + +CHECK_POINT_SEGMENTS=8 + +# The character set encoding to use. Greenplum supports the +# same character sets as PostgreSQL. See 'Character Set Support' +# in the PostgreSQL documentation for allowed character sets. +# Should correspond to the OS locale specified with the +# gpinitsystem -n option. + +ENCODING=UNICODE + +################################################ +# OPTIONAL PARAMETERS +################################################ + +# Optional. Uncomment to create a database of this name after the +# system is initialized. You can always create a database later using +# the CREATE DATABASE command or the createdb script. + +DATABASE_NAME=gpadmin + +# Mirror configuration + +MIRROR_PORT_BASE=50000 + +declare -a MIRROR_DATA_DIRECTORY=(/data0/database/mirror \ + /data0/database/mirror) + +# REPLICATION_PORT_BASE=41000 +# MIRROR_REPLICATION_PORT_BASE=51000 diff --git a/devops/sandbox/configs/gpinitsystem_singlenode b/devops/sandbox/configs/gpinitsystem_singlenode new file mode 100644 index 00000000000..f221d81938c --- /dev/null +++ b/devops/sandbox/configs/gpinitsystem_singlenode @@ -0,0 +1,140 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# FILE NAME: gpinitsystem_singlenode + +# A configuration file is needed by the gpinitsystem utility. +# This sample file initializes a Apache Cloudberry Single Node +# Edition (SNE) system with one coordinator and two segment instances +# on the local host. This file is referenced when you run gpinitsystem. + +################################################ +# REQUIRED PARAMETERS +################################################ + +# A name for the array you are configuring. You can use any name you +# like. Enclose the name in quotes if the name contains spaces. + +ARRAY_NAME="Sandbox: Apache Cloudberry Cluster" + +# This specifies the file that contains the list of segment host names +# that comprise the Greenplum system. For a single-node system, this +# file contains the local OS-configured hostname (as output by the +# hostname command). If the file does not reside in the same +# directory where the gpinitsystem utility is executed, specify +# the absolute path to the file. + +MACHINE_LIST_FILE=/tmp/gpdb-hosts + +# This specifies a prefix that will be used to name the data directories +# of the coordinator and segment instances. The naming convention for data +# directories in a Apache Cloudberry system is SEG_PREFIX +# where starts with 0 for segment instances and the coordinator +# is always -1. So for example, if you choose the prefix gpsne, your +# coordinator instance data directory would be named gpsne-1, and the segment +# instances would be named gpsne0, gpsne1, gpsne2, gpsne3, and so on. + +SEG_PREFIX=gpseg + +# Base port number on which primary segment instances will be +# started on a segment host. The base port number will be +# incremented by one for each segment instance started on a host. + +PORT_BASE=40000 + +# This specifies the data storage location(s) where the script will +# create the primary segment data directories. The script creates a +# unique data directory for each segment instance. If you want multiple +# segment instances per host, list a data storage area for each primary +# segment you want created. The recommended number is one primary segment +# per CPU. It is OK to list the same data storage area multiple times +# if you want your data directories created in the same location. The +# number of data directory locations specified will determine the number +# of primary segment instances created per host. +# You must make sure that the user who runs gpinitsystem (for example, +# the gpadmin user) has permissions to write to these directories. You +# may want to create these directories on the segment hosts before running +# gpinitsystem and chown them to the appropriate user. + +declare -a DATA_DIRECTORY=(/data0/database/primary \ + /data0/database/primary \ + /data0/database/primary) + +# The OS-configured hostname of the Apache Cloudberry coordinator instance. + +COORDINATOR_HOSTNAME=cdw + +# The location where the data directory will be created on the +# Greenplum coordinator host. +# You must make sure that the user who runs gpinitsystem +# has permissions to write to this directory. You may want to +# create this directory on the coordinator host before running +# gpinitsystem and chown it to the appropriate user. + +COORDINATOR_DIRECTORY=/data0/database/coordinator + +# The port number for the coordinator instance. This is the port number +# that users and client connections will use when accessing the +# Apache Cloudberry system. + +COORDINATOR_PORT=5432 + +# The shell the gpinitsystem script uses to execute +# commands on remote hosts. Allowed value is ssh. You must set up +# your trusted host environment before running the gpinitsystem +# script. You can use gpssh-exkeys to do this. + +TRUSTED_SHELL=ssh + +# Maximum distance between automatic write ahead log (WAL) +# checkpoints, in log file segments (each segment is normally 16 +# megabytes). This will set the checkpoint_segments parameter +# in the postgresql.conf file for each segment instance in the +# Apache Cloudberry system. + +CHECK_POINT_SEGMENTS=8 + +# The character set encoding to use. Greenplum supports the +# same character sets as PostgreSQL. See 'Character Set Support' +# in the PostgreSQL documentation for allowed character sets. +# Should correspond to the OS locale specified with the +# gpinitsystem -n option. + +ENCODING=UNICODE + +################################################ +# OPTIONAL PARAMETERS +################################################ + +# Optional. Uncomment to create a database of this name after the +# system is initialized. You can always create a database later using +# the CREATE DATABASE command or the createdb script. + +DATABASE_NAME=gpadmin + +# Mirror configuration + +MIRROR_PORT_BASE=50000 + +declare -a MIRROR_DATA_DIRECTORY=(/data0/database/mirror \ + /data0/database/mirror \ + /data0/database/mirror) + +# REPLICATION_PORT_BASE=41000 +# MIRROR_REPLICATION_PORT_BASE=51000 diff --git a/devops/sandbox/configs/init_system.sh b/devops/sandbox/configs/init_system.sh new file mode 100755 index 00000000000..da40be893af --- /dev/null +++ b/devops/sandbox/configs/init_system.sh @@ -0,0 +1,247 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +## ====================================================================== +## Container initialization script for Apache Cloudberry Sandbox +## ====================================================================== + +# ---------------------------------------------------------------------- +# Start SSH daemon and setup for SSH access +# ---------------------------------------------------------------------- +# The SSH daemon is started to allow remote access to the container via +# SSH. This is useful for development and debugging purposes. +# ---------------------------------------------------------------------- + +# Ensure SSH directory exists +sudo mkdir -p /run/sshd + +# Start SSH daemon (base image already handles most SSH setup) +sudo /usr/sbin/sshd + +# Give SSH daemon time to start +sleep 5 + +# ---------------------------------------------------------------------- +# Remove /run/nologin to allow logins +# ---------------------------------------------------------------------- +# The /run/nologin file, if present, prevents users from logging into +# the system. This file is removed to ensure that users can log in via SSH. +# ---------------------------------------------------------------------- +sudo rm -rf /run/nologin + +# ## Set gpadmin ownership - Cloudberry install directory and supporting +# ## cluster creation files. +sudo chown -R gpadmin.gpadmin /usr/local/cloudberry-db \ + /tmp/gpinitsystem_singlenode \ + /tmp/gpinitsystem_multinode \ + /tmp/gpdb-hosts \ + /tmp/multinode-gpinit-hosts + +# ---------------------------------------------------------------------- +# Configure passwordless SSH access for 'gpadmin' user +# ---------------------------------------------------------------------- +# The script sets up SSH key-based authentication for the 'gpadmin' user, +# allowing passwordless SSH access. It generates a new SSH key pair if one +# does not already exist, and configures the necessary permissions. +# ---------------------------------------------------------------------- +mkdir -p /home/gpadmin/.ssh +chmod 700 /home/gpadmin/.ssh + +# Prefer build-time cluster keypair on cdw; only generate if absent +if [[ "$HOSTNAME" == "cdw" && -f "/opt/cbdb/cluster-ssh/id_rsa" && -f "/opt/cbdb/cluster-ssh/id_rsa.pub" ]]; then + cp /opt/cbdb/cluster-ssh/id_rsa /home/gpadmin/.ssh/id_rsa + cp /opt/cbdb/cluster-ssh/id_rsa.pub /home/gpadmin/.ssh/id_rsa.pub +else + if [ ! -f /home/gpadmin/.ssh/id_rsa ]; then + ssh-keygen -t rsa -b 4096 -C gpadmin -f /home/gpadmin/.ssh/id_rsa -P "" > /dev/null 2>&1 + fi +fi + +# Enforce strict permissions on key files +chmod 600 /home/gpadmin/.ssh/id_rsa +chmod 644 /home/gpadmin/.ssh/id_rsa.pub +chown gpadmin:gpadmin /home/gpadmin/.ssh/id_rsa /home/gpadmin/.ssh/id_rsa.pub + +# Multi-node key distribution (HTTP removed): +# - Require embedded coordinator pubkey in segment/standby images (/opt/cbdb/cluster-ssh/coordinator.pub) +# - No network-based key distribution is performed. +if [[ "${MULTINODE:-false}" == "true" ]]; then + # Ensure local authorized_keys exists with strict permissions + touch /home/gpadmin/.ssh/authorized_keys + chmod 600 /home/gpadmin/.ssh/authorized_keys + chown gpadmin:gpadmin /home/gpadmin/.ssh/authorized_keys + + if [[ $HOSTNAME == "cdw" ]]; then + # Coordinator: ensure its own pubkey is present idempotently (robust append using temp file) + TMP_PUB="$(mktemp)" + tr -d "\r" < /home/gpadmin/.ssh/id_rsa.pub > "${TMP_PUB}" + touch /home/gpadmin/.ssh/authorized_keys + chmod 600 /home/gpadmin/.ssh/authorized_keys + chown gpadmin:gpadmin /home/gpadmin/.ssh/authorized_keys + if ! grep -F -x -f "${TMP_PUB}" /home/gpadmin/.ssh/authorized_keys >/dev/null 2>&1; then + cat "${TMP_PUB}" >> /home/gpadmin/.ssh/authorized_keys + fi + rm -f "${TMP_PUB}" + else + # Segments/standby: require embedded coordinator pubkey (robust append using temp file) + if [[ -f "/opt/cbdb/cluster-ssh/coordinator.pub" ]]; then + TMP_COORD_PUB="$(mktemp)" + tr -d "\r" < /opt/cbdb/cluster-ssh/coordinator.pub > "${TMP_COORD_PUB}" + touch /home/gpadmin/.ssh/authorized_keys + chmod 600 /home/gpadmin/.ssh/authorized_keys + chown gpadmin:gpadmin /home/gpadmin/.ssh/authorized_keys + if ! grep -F -x -f "${TMP_COORD_PUB}" /home/gpadmin/.ssh/authorized_keys >/dev/null 2>&1; then + cat "${TMP_COORD_PUB}" >> /home/gpadmin/.ssh/authorized_keys + fi + rm -f "${TMP_COORD_PUB}" + else + echo "Error: Missing embedded coordinator pubkey: /opt/cbdb/cluster-ssh/coordinator.pub" + echo "Hint: Use main build with role-differentiated images (coord/seg) or provide the pubkey via a mounted file." + exit 1 + fi + fi +else + # Single node: use local key + cat /home/gpadmin/.ssh/id_rsa.pub >> /home/gpadmin/.ssh/authorized_keys + chmod 600 /home/gpadmin/.ssh/authorized_keys + chown gpadmin:gpadmin /home/gpadmin/.ssh/authorized_keys +fi + +# Add container hostnames to the known_hosts file to avoid SSH warnings +if [[ "${MULTINODE:-false}" == "true" ]]; then + ssh-keyscan -t rsa cdw scdw sdw1 sdw2 > /home/gpadmin/.ssh/known_hosts 2>/dev/null || true +else + ssh-keyscan -t rsa cdw > /home/gpadmin/.ssh/known_hosts 2>/dev/null || true +fi +chmod 600 /home/gpadmin/.ssh/known_hosts +chown gpadmin:gpadmin /home/gpadmin/.ssh/known_hosts + +# Load Cloudberry/Greenplum environment with fallback, then ensure PATH +if [ -f "/usr/local/cloudberry-db/cloudberry-env.sh" ]; then + # shellcheck disable=SC1091 + . /usr/local/cloudberry-db/cloudberry-env.sh +elif [ -f "/usr/local/cloudberry-db/greenplum_path.sh" ]; then + # shellcheck disable=SC1091 + . /usr/local/cloudberry-db/greenplum_path.sh +else + # Fallback: minimal env to find gp* tools + export GPHOME="/usr/local/cloudberry-db" +fi +# Ensure coordinator data dir variable is set +export COORDINATOR_DATA_DIRECTORY="${COORDINATOR_DATA_DIRECTORY:-/data0/database/coordinator/gpseg-1}" +# Ensure PATH includes Cloudberry bin +if [ -d "/usr/local/cloudberry-db/bin" ]; then + case ":$PATH:" in + *":/usr/local/cloudberry-db/bin:"*) : ;; + *) export PATH="/usr/local/cloudberry-db/bin:$PATH" ;; + esac +fi + +# Initialize single node Cloudberry cluster +if [[ "${MULTINODE:-false}" == "false" && "$HOSTNAME" == "cdw" ]]; then + gpinitsystem -a \ + -c /tmp/gpinitsystem_singlenode \ + -h /tmp/gpdb-hosts \ + --max_connections=100 +# Initialize multi node Cloudberry cluster +elif [[ "${MULTINODE:-false}" == "true" && "$HOSTNAME" == "cdw" ]]; then + # Wait for other containers' SSH to become reachable (max 300s per host) + for host in sdw1 sdw2 scdw; do + MAX_WAIT=300 + WAITED=0 + until ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o ConnectTimeout=5 gpadmin@${host} "echo Connected to ${host}" 2>/dev/null; do + if [ $WAITED -ge $MAX_WAIT ]; then + echo "Timeout waiting for SSH on ${host}" + exit 1 + fi + sleep 5 + WAITED=$((WAITED+5)) + done + done + + # Clean up any existing data directories to avoid conflicts + sudo rm -rf /data0/database/coordinator/* /data0/database/primary/* /data0/database/mirror/* 2>/dev/null || true + + # Ensure database directories exist with proper permissions + sudo mkdir -p /data0/database/coordinator /data0/database/primary /data0/database/mirror + sudo chown -R gpadmin:gpadmin /data0/database + sudo chmod -R 700 /data0/database + + gpinitsystem -a \ + -c /tmp/gpinitsystem_multinode \ + -h /tmp/multinode-gpinit-hosts \ + --max_connections=100 + gpinitstandby -s scdw -a + printf "sdw1\nsdw2\n" >> /tmp/gpdb-hosts + + if [ $HOSTNAME == "cdw" ]; then + ## Allow any host access the Cloudberry Cluster + echo 'host all all 0.0.0.0/0 trust' >> /data0/database/coordinator/gpseg-1/pg_hba.conf + gpstop -u + + # Remove password requirement for gpadmin user + psql -d template1 \ + -c "ALTER USER gpadmin PASSWORD NULL" + + cat <<-'EOF' + +====================================================================== + ____ _ _ _ + / ___| | ___ _ _ __| | |__ ___ _ __ _ __ _ _ + | | | |/ _ \| | | |/ _` | '_ \ / _ \ '__| '__| | | | + | |___| | (_) | |_| | (_| | |_) | __/ | | | | |_| | + \____|_|\___/ \__,_|\__,_|_.__/ \___|_| |_| \__, | + |___/ +====================================================================== +EOF + + cat <<-'EOF' + +====================================================================== +Sandbox: Apache Cloudberry Cluster details +====================================================================== + +EOF + + echo "Current time: $(date)" + source /etc/os-release + echo "OS Version: ${NAME} ${VERSION}" + + ## Set gpadmin password, display version and cluster configuration + psql -P pager=off -d template1 -c "SELECT VERSION()" + psql -P pager=off -d template1 -c "SELECT * FROM gp_segment_configuration ORDER BY dbid" + psql -P pager=off -d template1 -c "SHOW optimizer" +fi + +fi # Close the main if/elif block + +echo """ +=========================== += DEPLOYMENT SUCCESSFUL = +=========================== +""" + +# ---------------------------------------------------------------------- +# Start an interactive bash shell +# ---------------------------------------------------------------------- +# Finally, the script starts an interactive bash shell to keep the +# container running and allow the user to interact with the environment. +# ---------------------------------------------------------------------- +/bin/bash diff --git a/devops/sandbox/configs/multinode-gpinit-hosts b/devops/sandbox/configs/multinode-gpinit-hosts new file mode 100644 index 00000000000..6da00621266 --- /dev/null +++ b/devops/sandbox/configs/multinode-gpinit-hosts @@ -0,0 +1,2 @@ +sdw1 +sdw2 diff --git a/devops/sandbox/docker-compose-rockylinux9.yml b/devops/sandbox/docker-compose-rockylinux9.yml new file mode 100644 index 00000000000..c6e2c22432d --- /dev/null +++ b/devops/sandbox/docker-compose-rockylinux9.yml @@ -0,0 +1,126 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + + +services: + # One-shot init service to populate the shared cluster SSH volume + cluster-ssh-init: + container_name: cbdb-cluster-ssh-init + image: cbdb-${CODEBASE_VERSION}:${OS_VERSION} + user: "0" + command: > + bash -lc 'set -euxo pipefail; + mkdir -p /opt/cbdb/cluster-ssh; + if [ ! -f /opt/cbdb/cluster-ssh/id_rsa ]; then + TMP_DIR="$$(mktemp -d)"; + ssh-keygen -t rsa -b 4096 -N "" -f "$$TMP_DIR/id_rsa" >/dev/null 2>&1; + cp "$$TMP_DIR/id_rsa" /opt/cbdb/cluster-ssh/id_rsa; + cp "$$TMP_DIR/id_rsa.pub" /opt/cbdb/cluster-ssh/id_rsa.pub; + rm -rf "$$TMP_DIR"; + fi; + cp /opt/cbdb/cluster-ssh/id_rsa.pub /opt/cbdb/cluster-ssh/coordinator.pub; + chmod 700 /opt/cbdb/cluster-ssh; + chmod 600 /opt/cbdb/cluster-ssh/id_rsa; + chmod 644 /opt/cbdb/cluster-ssh/id_rsa.pub /opt/cbdb/cluster-ssh/coordinator.pub; + chown -R gpadmin:gpadmin /opt/cbdb/cluster-ssh' + volumes: + - cbdb-cluster-ssh:/opt/cbdb/cluster-ssh + restart: "no" + + cbdb-coordinator: + container_name: cbdb-cdw + image: cbdb-${CODEBASE_VERSION}:${OS_VERSION} + ports: + - "15432:5432" + hostname: cdw + tty: true + networks: + interconnect: + ipv4_address: 10.5.0.10 + environment: + MULTINODE: "true" + volumes: + - cbdb-cluster-ssh:/opt/cbdb/cluster-ssh + - /sys/fs/cgroup:/sys/fs/cgroup:ro + depends_on: + cluster-ssh-init: + condition: service_completed_successfully + + + cbdb-standby-coordinator: + container_name: cbdb-scdw + image: cbdb-${CODEBASE_VERSION}:${OS_VERSION} + hostname: scdw + tty: true + networks: + interconnect: + ipv4_address: 10.5.0.11 + environment: + MULTINODE: "true" + volumes: + - cbdb-cluster-ssh:/opt/cbdb/cluster-ssh + - /sys/fs/cgroup:/sys/fs/cgroup:ro + depends_on: + cluster-ssh-init: + condition: service_completed_successfully + cbdb-segment-host-1: + container_name: cbdb-sdw1 + image: cbdb-${CODEBASE_VERSION}:${OS_VERSION} + hostname: sdw1 + tty: true + networks: + interconnect: + ipv4_address: 10.5.0.12 + environment: + MULTINODE: "true" + volumes: + - cbdb-cluster-ssh:/opt/cbdb/cluster-ssh + - /sys/fs/cgroup:/sys/fs/cgroup:ro + depends_on: + cluster-ssh-init: + condition: service_completed_successfully + cbdb-segment-host-2: + container_name: cbdb-sdw2 + image: cbdb-${CODEBASE_VERSION}:${OS_VERSION} + hostname: sdw2 + tty: true + networks: + interconnect: + ipv4_address: 10.5.0.13 + environment: + MULTINODE: "true" + volumes: + - cbdb-cluster-ssh:/opt/cbdb/cluster-ssh + - /sys/fs/cgroup:/sys/fs/cgroup:ro + depends_on: + cluster-ssh-init: + condition: service_completed_successfully + +networks: + interconnect: + name: cbdb-interconnect + driver: bridge + ipam: + config: + - subnet: 10.5.0.0/16 + gateway: 10.5.0.1 + +volumes: + cbdb-cluster-ssh: diff --git a/devops/sandbox/run.sh b/devops/sandbox/run.sh new file mode 100755 index 00000000000..1e1bdc94931 --- /dev/null +++ b/devops/sandbox/run.sh @@ -0,0 +1,170 @@ +#!/usr/bin/env bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +set -euo pipefail + +# Default values +DEFAULT_OS_VERSION="rockylinux9" +DEFAULT_TIMEZONE_VAR="America/Los_Angeles" +DEFAULT_PIP_INDEX_URL_VAR="https://pypi.org/simple" +BUILD_ONLY="false" +MULTINODE="false" + +# Use environment variables if set, otherwise use default values +# Export set for some variables to be used referenced docker compose file +export OS_VERSION="${OS_VERSION:-$DEFAULT_OS_VERSION}" +BUILD_ONLY="${BUILD_ONLY:-false}" +export CODEBASE_VERSION="${CODEBASE_VERSION:-}" +TIMEZONE_VAR="${TIMEZONE_VAR:-$DEFAULT_TIMEZONE_VAR}" +PIP_INDEX_URL_VAR="${PIP_INDEX_URL_VAR:-$DEFAULT_PIP_INDEX_URL_VAR}" + +# Function to display help message +function usage() { + echo "Usage: $0 [-o ] [-c ] [-b] [-m]" + echo " -c Codebase version (valid values: main, or other available version like 2.0.0)" + echo " -t Timezone (default: America/Los_Angeles, or set via TIMEZONE_VAR environment variable)" + echo " -p Python Package Index (PyPI) (default: https://pypi.org/simple, or set via PIP_INDEX_URL_VAR environment variable)" + echo " -b Build only, do not run the container (default: false, or set via BUILD_ONLY environment variable)" + echo " -m Multinode, this creates a multinode (multi-container) Cloudberry cluster using docker compose (requires compose to be installed)" + exit 1 +} + +# Parse command-line options +while getopts "c:t:p:bmh" opt; do + case "${opt}" in + c) + CODEBASE_VERSION=${OPTARG} + ;; + t) + TIMEZONE_VAR=${OPTARG} + ;; + p) + PIP_INDEX_URL_VAR=${OPTARG} + ;; + b) + BUILD_ONLY="true" + ;; + m) + MULTINODE="true" + ;; + h) + usage + ;; + *) + usage + ;; + esac +done + +if [[ $MULTINODE == "true" ]] && ! docker compose version; then + echo "Error: Multinode -m flag found in run arguments but calling docker compose failed. Please install Docker Compose by following the instructions at https://docs.docker.com/compose/install/. Exiting" + exit 1 +fi + +if [[ "${MULTINODE}" == "true" && "${BUILD_ONLY}" == "true" ]]; then + echo "Error: Cannot pass both multinode deployment [m] and build only [b] flags together" + exit 1 +fi + +# CODEBASE_VERSION must be specified via -c argument or CODEBASE_VERSION environment variable +if [[ -z "$CODEBASE_VERSION" ]]; then + echo "Error: CODEBASE_VERSION must be specified via environment variable or '-c' command line parameter." + usage +fi + +# Validate OS_VERSION and map to appropriate Docker image +case "${OS_VERSION}" in + rockylinux9) + OS_DOCKER_IMAGE="rockylinux9" + ;; + *) + echo "Invalid OS version: ${OS_VERSION}" + usage + ;; +esac + +# Validate CODEBASE_VERSION +if [[ "${CODEBASE_VERSION}" != "main" && ! "${CODEBASE_VERSION}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "Invalid codebase version: ${CODEBASE_VERSION}" + usage +fi + +# Build image +if [[ "${CODEBASE_VERSION}" = "main" ]]; then + DOCKERFILE=Dockerfile.${CODEBASE_VERSION}.${OS_VERSION} + + # Single image build + docker build --file ${DOCKERFILE} \ + --build-arg TIMEZONE_VAR="${TIMEZONE_VAR}" \ + --tag cbdb-${CODEBASE_VERSION}:${OS_VERSION} . + + # Prepare shared cluster-ssh volume for multinode (keys live in /opt/cbdb/cluster-ssh inside the volume) + if [[ "${MULTINODE}" == "true" ]]; then + TMP_CLUSTER_SSH_DIR="$(mktemp -d)" + ssh-keygen -t rsa -b 4096 -N "" -f "${TMP_CLUSTER_SSH_DIR}/id_rsa" >/dev/null 2>&1 + docker volume create cbdb-cluster-ssh >/dev/null 2>&1 || true + # Populate the volume using a one-off container + docker run --rm \ + -v cbdb-cluster-ssh:/opt/cbdb/cluster-ssh \ + -v "${TMP_CLUSTER_SSH_DIR}":/tmp/keys:ro \ + cbdb-${CODEBASE_VERSION}:${OS_VERSION} bash -lc 'set -e; sudo mkdir -p /opt/cbdb/cluster-ssh; sudo cp /tmp/keys/id_rsa /opt/cbdb/cluster-ssh/id_rsa; sudo cp /tmp/keys/id_rsa.pub /opt/cbdb/cluster-ssh/id_rsa.pub; sudo cp /tmp/keys/id_rsa.pub /opt/cbdb/cluster-ssh/coordinator.pub; sudo chmod 700 /opt/cbdb/cluster-ssh; sudo chmod 600 /opt/cbdb/cluster-ssh/id_rsa; sudo chmod 644 /opt/cbdb/cluster-ssh/id_rsa.pub /opt/cbdb/cluster-ssh/coordinator.pub' + rm -rf "${TMP_CLUSTER_SSH_DIR}" + fi +else + DOCKERFILE=Dockerfile.RELEASE.${OS_VERSION} + + docker build --file ${DOCKERFILE} \ + --build-arg TIMEZONE_VAR="${TIMEZONE_VAR}" \ + --build-arg CODEBASE_VERSION_VAR="${CODEBASE_VERSION}" \ + --tag cbdb-${CODEBASE_VERSION}:${OS_VERSION} . + + # For release multinode, also prepare shared cluster-ssh volume (same as main) + if [[ "${MULTINODE}" == "true" ]]; then + TMP_CLUSTER_SSH_DIR="$(mktemp -d)" + ssh-keygen -t rsa -b 4096 -N "" -f "${TMP_CLUSTER_SSH_DIR}/id_rsa" >/dev/null 2>&1 + docker volume create cbdb-cluster-ssh >/dev/null 2>&1 || true + docker run --rm \ + -v cbdb-cluster-ssh:/opt/cbdb/cluster-ssh \ + -v "${TMP_CLUSTER_SSH_DIR}":/tmp/keys:ro \ + cbdb-${CODEBASE_VERSION}:${OS_VERSION} bash -lc 'set -e; sudo mkdir -p /opt/cbdb/cluster-ssh; sudo cp /tmp/keys/id_rsa /opt/cbdb/cluster-ssh/id_rsa; sudo cp /tmp/keys/id_rsa.pub /opt/cbdb/cluster-ssh/id_rsa.pub; sudo cp /tmp/keys/id_rsa.pub /opt/cbdb/cluster-ssh/coordinator.pub; sudo chmod 700 /opt/cbdb/cluster-ssh; sudo chmod 600 /opt/cbdb/cluster-ssh/id_rsa; sudo chmod 644 /opt/cbdb/cluster-ssh/id_rsa.pub /opt/cbdb/cluster-ssh/coordinator.pub' + rm -rf "${TMP_CLUSTER_SSH_DIR}" + fi +fi + +# Check if build only flag is set +if [ "${BUILD_ONLY}" == "true" ]; then + echo "Docker image built successfully with OS version ${OS_VERSION} and codebase version ${CODEBASE_VERSION}. Build only mode, not running the container." + exit 0 +fi + +# Deploy container(s) +if [ "${MULTINODE}" == "true" ]; then + docker compose -f docker-compose-$OS_VERSION.yml up --detach +else + docker run --interactive \ + --tty \ + --name cbdb-cdw \ + --detach \ + --volume /sys/fs/cgroup:/sys/fs/cgroup:ro \ + --hostname cdw \ + --publish 122:22 \ + --publish 15432:5432 \ + cbdb-${CODEBASE_VERSION}:${OS_VERSION} +fi \ No newline at end of file diff --git a/devops/sandbox/sandbox-deployment.jpg b/devops/sandbox/sandbox-deployment.jpg new file mode 100644 index 00000000000..bb1b2dc0741 Binary files /dev/null and b/devops/sandbox/sandbox-deployment.jpg differ diff --git a/pom.xml b/pom.xml index d6913ec5053..6d33d7e9de5 100644 --- a/pom.xml +++ b/pom.xml @@ -1748,6 +1748,10 @@ code or new licensing patterns. --> devops/build/packaging/deb/ubuntu22.04/** + + devops/sandbox/configs/multinode-gpinit-hosts +