Skip to content

Commit 59265d3

Browse files
authored
Merge pull request #432 from kprinssu/upstream-master
Add experimental/dev AMD GPU support
2 parents 5c8a51e + 4a578cc commit 59265d3

File tree

6 files changed

+284
-16
lines changed

6 files changed

+284
-16
lines changed

.github/workflows/release.yml

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ jobs:
2828
id: get-version
2929
run: |
3030
VERSION_PLAIN=$(cat VERSION)
31-
31+
3232
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
3333
BRANCH_NAME="${{ inputs.branch_name }}"
3434
else
3535
BRANCH_NAME="${{ github.ref_name }}"
3636
fi
37-
37+
3838
if [[ "$BRANCH_NAME" == "release" ]]; then
3939
echo "version=${VERSION_PLAIN}" >> $GITHUB_OUTPUT
4040
echo "version_tag=v${VERSION_PLAIN}" >> $GITHUB_OUTPUT
@@ -61,15 +61,18 @@ jobs:
6161
- build_target: "cpu"
6262
platform: "linux/amd64"
6363
runs_on: "ubuntu-latest"
64-
- build_target: "gpu"
64+
- build_target: "gpu"
6565
platform: "linux/amd64"
6666
runs_on: "ubuntu-latest"
6767
- build_target: "cpu"
6868
platform: "linux/arm64"
6969
runs_on: "ubuntu-24.04-arm"
7070
- build_target: "gpu"
71-
platform: "linux/arm64"
71+
platform: "linux/arm64"
7272
runs_on: "ubuntu-24.04-arm"
73+
- build_target: "rocm"
74+
platform: "linux/amd64"
75+
runs_on: "ubuntu-latest"
7376
runs-on: ${{ matrix.runs_on }}
7477
steps:
7578
- name: Checkout repository
@@ -118,12 +121,12 @@ jobs:
118121
PLATFORM="${{ matrix.platform }}"
119122
BUILD_TARGET="${{ matrix.build_target }}"
120123
VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}"
121-
124+
122125
echo "Building ${PLATFORM} image for ${BUILD_TARGET} version ${VERSION_TAG}"
123-
126+
124127
TARGET="${BUILD_TARGET}-$(echo ${PLATFORM} | cut -d'/' -f2)"
125128
echo "Using bake target: $TARGET"
126-
129+
127130
docker buildx bake $TARGET --push --progress=plain
128131
129132
create-manifests:
@@ -137,28 +140,28 @@ jobs:
137140
REPO: ${{ vars.REPO || 'kokoro-fastapi' }}
138141
strategy:
139142
matrix:
140-
build_target: ["cpu", "gpu"]
143+
build_target: ["cpu", "gpu", "rocm"]
141144
steps:
142145
- name: Log in to GitHub Container Registry
143146
uses: docker/login-action@v3
144147
with:
145148
registry: ghcr.io
146149
username: ${{ github.actor }}
147150
password: ${{ secrets.GITHUB_TOKEN }}
148-
151+
149152
- name: Create multi-platform manifest
150153
run: |
151154
VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}"
152155
TARGET="${{ matrix.build_target }}"
153156
REGISTRY="${{ env.REGISTRY }}"
154157
OWNER="${{ env.OWNER }}"
155158
REPO="${{ env.REPO }}"
156-
159+
157160
docker buildx imagetools create -t \
158161
${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG} \
159162
${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64 \
160163
${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-arm64
161-
164+
162165
if [[ "$VERSION_TAG" != *"-"* ]]; then
163166
docker buildx imagetools create -t \
164167
${REGISTRY}/${OWNER}/${REPO}-${TARGET}:latest \

docker-bake.hcl

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@ target "gpu" {
6060
]
6161
}
6262

63+
# Base settings for AMD ROCm builds
64+
target "_rocm_base" {
65+
inherits = ["_common"]
66+
dockerfile = "docker/rocm/Dockerfile"
67+
}
68+
69+
6370
# Individual platform targets for debugging/testing
6471
target "cpu-amd64" {
6572
inherits = ["_cpu_base"]
@@ -97,6 +104,16 @@ target "gpu-arm64" {
97104
]
98105
}
99106

107+
# AMD ROCm only supports x86
108+
target "rocm-amd64" {
109+
inherits = ["_rocm_base"]
110+
platforms = ["linux/amd64"]
111+
tags = [
112+
"${REGISTRY}/${OWNER}/${REPO}-rocm:${VERSION}-amd64",
113+
"${REGISTRY}/${OWNER}/${REPO}-rocm:latest-amd64"
114+
]
115+
}
116+
100117
# Development targets for faster local builds
101118
target "cpu-dev" {
102119
inherits = ["_cpu_base"]
@@ -123,10 +140,14 @@ group "gpu-all" {
123140
targets = ["gpu", "gpu-amd64", "gpu-arm64"]
124141
}
125142

143+
group "rocm-all" {
144+
targets = ["rocm-amd64"]
145+
}
146+
126147
group "all" {
127-
targets = ["cpu", "gpu"]
148+
targets = ["cpu", "gpu", "rocm"]
128149
}
129150

130151
group "individual-platforms" {
131-
targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64"]
152+
targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64", "rocm-amd64"]
132153
}

docker/rocm/Dockerfile

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
FROM rocm/dev-ubuntu-24.04:6.4.4-complete
2+
ENV DEBIAN_FRONTEND=noninteractive \
3+
PHONEMIZER_ESPEAK_PATH=/usr/bin \
4+
PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
5+
ESPEAK_DATA_PATH=/usr/share/espeak-ng-data
6+
7+
# Install Python and other dependencies
8+
RUN apt-get update && apt upgrade -y && apt-get install -y --no-install-recommends \
9+
espeak-ng \
10+
espeak-ng-data \
11+
rocrand \
12+
git \
13+
libsndfile1 \
14+
curl \
15+
ffmpeg \
16+
wget \
17+
nano \
18+
g++ \
19+
zstd \
20+
&& apt-get clean \
21+
&& rm -rf /var/lib/apt/lists/* \
22+
&& mkdir -p /usr/share/espeak-ng-data \
23+
&& ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ \
24+
25+
# Install UV using the installer script
26+
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
27+
&& mv /root/.local/bin/uv /usr/local/bin/ \
28+
&& mv /root/.local/bin/uvx /usr/local/bin/ \
29+
30+
# Create non-root user and set up directories and permissions
31+
&& useradd -m -u 1001 appuser \
32+
&& mkdir -p /app/api/src/models/v1_0 \
33+
&& chown -R appuser:appuser /app \
34+
# Models folder
35+
&& mkdir -p /app/api/src/models/v1_0
36+
37+
USER appuser
38+
WORKDIR /app
39+
40+
# Copy dependency files
41+
COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
42+
43+
ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \
44+
PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
45+
ESPEAK_DATA_PATH=/usr/share/espeak-ng-data
46+
47+
# Install dependencies with GPU extras (using cache mounts)
48+
RUN --mount=type=cache,target=/root/.cache/uv \
49+
uv venv --python 3.12 && \
50+
uv sync --extra rocm
51+
52+
# Run kdb files (shape files for MIOpen)
53+
ENV ROCM_VERSION=6.4.4
54+
COPY --chown=appuser:appuser docker/rocm/kdb_install.sh /tmp/
55+
RUN /tmp/kdb_install.sh
56+
57+
# Support older GFX Arch
58+
RUN cd /tmp && wget https://archlinux.org/packages/extra/x86_64/rocblas/download -O rocblas.tar.zst \
59+
&& pwd && ls -lah ./ \
60+
&& tar --zstd -xvf rocblas.tar.zst && rm rocblas.tar.zst \
61+
&& rm -rf /app/.venv/lib/python3.12/site-packages/torch/lib/rocblas/library/ \
62+
&& mv ./opt/rocm/lib/rocblas/library/ /app/.venv/lib/python3.12/site-packages/torch/lib/rocblas/
63+
64+
# Copy project files including models
65+
COPY --chown=appuser:appuser api ./api
66+
COPY --chown=appuser:appuser web ./web
67+
COPY --chown=appuser:appuser docker/scripts/ ./
68+
69+
RUN chmod +x ./entrypoint.sh
70+
71+
# Set all environment variables in one go
72+
ENV PYTHONUNBUFFERED=1 \
73+
PYTHONPATH=/app:/app/api \
74+
PATH="/app/.venv/bin:$PATH" \
75+
UV_LINK_MODE=copy \
76+
USE_GPU=true \
77+
DOWNLOAD_MODEL=true \
78+
DEVICE="gpu"
79+
80+
# Run FastAPI server through entrypoint.sh
81+
CMD ["./entrypoint.sh"]

docker/rocm/docker-compose.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
services:
2+
kokoro-tts:
3+
image: kprinssu/kokoro-fastapi:rocm
4+
devices:
5+
- /dev/dri
6+
- /dev/kfd
7+
group_add:
8+
# NOTE: These groups are the group ids for: video, input, and render
9+
# Numbers can be found via running: getent group $GROUP_NAME | cut -d: -f3
10+
- 44
11+
- 993
12+
- 996
13+
restart: 'always'
14+
volumes:
15+
- ./kokoro-tts/config:/root/.config/miopen
16+
- ./kokoro-tts/cache:/root/.cache/miopen
17+
ports:
18+
- 8880:8880
19+
environment:
20+
- USE_GPU=true
21+
- TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
22+
# IMPORTANT: This is only required for RDNA 2 GPUs. You do not need the following steps if you use GPUS that are RDNA 1 (gfx1030) or older.
23+
# ROCm's MIOpen libray will be slow if it has to figure out the optimal kernel shapes for each model
24+
# See documentation on performancing tuning: https://github.com/ROCm/MIOpen/blob/develop/docs/conceptual/tuningdb.rst
25+
# The volumes above cache the MIOpen shape files and user database for subsequent runs
26+
#
27+
# Steps:
28+
# 1. Run Kokoro once with the following environment variables set:
29+
# - MIOPEN_FIND_MODE=3
30+
# - MIOPEN_FIND_ENFORCE=3
31+
# 2. Generate various recordings using sample data (e.g. first couple paragraphs of Dracula); this will be slow
32+
# 3. Comment out/remove the previously set environment variables
33+
# 4. Add the following environment variables to enable caching of model shapes:
34+
# - MIOPEN_FIND_MODE=2
35+
# 5. Restart the container and run Kokoro again, it should be much faster

docker/rocm/kdb_install.sh

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
ver() {
6+
printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
7+
}
8+
9+
# Sets GFX_ARCH to default if not set
10+
if [ -z "$GFX_ARCH" ]; then
11+
echo "WARNING: missing env var GFX_ARCH, using default (this will take longer)"
12+
GFX_ARCHS=("gfx900" "gfx906" "gfx908" "gfx90a" "gfx942" "gfx1030")
13+
else
14+
# Convert ; seperated string to array
15+
IFS=';' read -ra GFX_ARCHS <<< "$GFX_ARCH"
16+
fi
17+
18+
# Sets ROCM_VERSION to "latest" if not set
19+
if [ -z "$ROCM_VERSION" ]; then
20+
echo "WARNING: missing env var ROCM_VERSION, using latest kdb repo (NOT RECOMMENDED)"
21+
ROCM_VERSION="latest"
22+
fi
23+
24+
# Set PyTorch version and wheel install path
25+
TORCH_INSTALL_PATH=$(uv pip show torch | grep Location | cut -d" " -f 2)
26+
27+
# Check if Torch installation path exists
28+
if [ ! -d "$TORCH_INSTALL_PATH" ]; then
29+
echo "Error: Torch installation path '$TORCH_INSTALL_PATH' does not exist."
30+
exit 1
31+
fi
32+
33+
# Print variable overview
34+
echo "ROCM version: $ROCM_VERSION"
35+
echo "GFX architectures: ${GFX_ARCHS[@]}"
36+
echo "PyTorch installation path: $TORCH_INSTALL_PATH"
37+
38+
# Create directory for extraction
39+
EXTRACT_DIR=extract_miopen_dbs
40+
rm -rf $EXTRACT_DIR
41+
mkdir -p "$EXTRACT_DIR" && cd "$EXTRACT_DIR"
42+
43+
if [[ -f /etc/lsb-release ]]; then
44+
# Exit if not 20.04, 22.04, or 24.04
45+
source /etc/lsb-release
46+
echo "DISTRIB_RELEASE: $DISTRIB_RELEASE"
47+
if [[ "$DISTRIB_RELEASE" != "20.04" && "$DISTRIB_RELEASE" != "22.04" ]]; then
48+
if [[ "$ROCM_VERSION" != "latest" && $(ver $ROCM_VERSION) -lt $(ver 6.2) && "$DISTRIB_RELEASE" == "24.04" ]]; then
49+
echo "ERROR: Unsupported Ubuntu version."
50+
exit 1
51+
fi
52+
fi
53+
54+
for arch in "${GFX_ARCHS[@]}"; do
55+
# Download MIOpen .kdbs for ROCm version and GPU architecture on ubuntu
56+
echo "Downloading .kdb files for rocm-$ROCM_VERSION ($arch arch) ..."
57+
wget -q -r -np -nd -A miopen-hip-$arch*kdb_*$DISTRIB_RELEASE*deb \
58+
https://repo.radeon.com/rocm/apt/$ROCM_VERSION/pool/main/m/
59+
60+
# Check if files were downloaded. No KDB files in repo.radeon will result in error.
61+
if ! ls miopen-hip-$arch*kdb_*$DISTRIB_RELEASE*deb 1> /dev/null 2>&1; then
62+
echo -e "ERROR: No MIOpen kernel database files found for $arch\nPlease check https://repo.radeon.com/rocm/apt/$ROCM_VERSION/pool/main/m/ for supported architectures"
63+
exit 1
64+
fi
65+
done
66+
67+
# Extract all .deb files to local directory
68+
echo "Extracting deb packages for ${GFX_ARCHS[@]} ..."
69+
for deb_file in `ls *deb`; do
70+
echo "Extracting $deb_file..."
71+
dpkg-deb -xv "$deb_file" . > /dev/null 2>&1
72+
done
73+
74+
elif [[ -f /etc/centos-release || -f /etc/redhat-release ]]; then
75+
# Centos kdbs
76+
source /etc/os-release && RHEL_VERSION="$VERSION_ID"
77+
RHEL_MAJOR_VERSION=${RHEL_VERSION%%.*}
78+
echo "RHEL_VERSION: $RHEL_VERSION; RHEL_MAJOR_VERSION: $RHEL_MAJOR_VERSION"
79+
if [[ ! "$RHEL_VERSION" =~ ^(8|9) ]]; then
80+
echo "ERROR: Unsupported CentOS/RHEL release"
81+
fi
82+
for arch in "${GFX_ARCHS[@]}"; do
83+
# Download MIOpen .kdbs for ROCm version and GPU architecture on centos
84+
echo "Downloading .kdb files for rocm-$ROCM_VERSION ($arch arch) ..."
85+
wget -q -r -np -nd -A miopen-hip-$arch*kdb-[0-9]*rpm \
86+
https://repo.radeon.com/rocm/rhel${RHEL_MAJOR_VERSION}/$ROCM_VERSION/main
87+
88+
# Check if files were downloaded. No KDB files in repo.radeon will result in error.
89+
if ! ls miopen-hip-$arch*kdb-*rpm 1> /dev/null 2>&1; then
90+
echo -e "ERROR: No MIOpen kernel database files found for $arch\nPlease check https://repo.radeon.com/rocm/rhel${RHEL_MAJOR_VERSION}/$ROCM_VERSION/main for supported architectures"
91+
exit 1
92+
fi
93+
done
94+
95+
# Extract all RPM files to current directory
96+
echo "Extracting rpm packages for ${GFX_ARCHS[@]} ..."
97+
for rpm_file in `ls *rpm`; do
98+
echo "Extracting $rpm_file..."
99+
rpm2cpio "$rpm_file" | cpio -idmv 2> /dev/null
100+
done
101+
else
102+
echo "ERROR: Unsupported operating system."
103+
exit 1
104+
fi
105+
106+
# Copy miopen db files to PyTorch installation path
107+
echo "Copying kdb files to ${TORCH_INSTALL_PATH}/torch/share"
108+
cp -ra opt/rocm-*/share/miopen $TORCH_INSTALL_PATH/torch/share
109+
110+
# Remove downloaded files and extract directory
111+
cd .. && rm -rf $EXTRACT_DIR
112+
echo "Successfully installed MIOpen kernel database files"

0 commit comments

Comments
 (0)