Skip to content
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,10 @@ cscope.*

/bazel-*
*.pyc

# Helm chart dependecies cache
**/Chart.lock
**/charts/*.tgz

# Helm chart output directory
ai/ai-starter-kit/out
64 changes: 64 additions & 0 deletions ai/ai-starter-kit/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
.PHONY: check_hf_token check_OCI_target package_helm lint dep_update install install_gke start uninstall push_helm
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the usage of the make commands?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You want me to document each?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just in general in README. User can still following the current README to install via helm, so not sure when these make commands should be used.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Documented in commit: 78a03d7


check_hf_token:
ifndef HF_TOKEN
$(error HF_TOKEN is not set)
endif

check_OCI_target:
ifndef OCI_HELM_TARGET
$(error OCI_HELM_TARGET is not set)
endif

package_helm:
helm package helm-chart/ai-starter-kit/ --destination out/

push_helm: check_OCI_target
helm push out/ai-starter-kit* oci://$$OCI_HELM_TARGET

lint:
helm lint helm-chart/ai-starter-kit

dep_update:
helm dependency update helm-chart/ai-starter-kit

install: check_hf_token
helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values.yaml

start:
mkdir -p /tmp/models-cache
minikube start --cpus 4 --memory 15000 --mount --mount-string="$$HOME/models-cache:/tmp/models-cache"

start_gpu:
mkdir -p $HOME/models-cache
minikube start --driver krunkit --cpus 4 --memory 15000 --mount --mount-string="$HOME/models-cache:$HOME/models-cache"

uninstall:
helm uninstall ai-starter-kit
kubectl delete pod jupyter-user
kubectl delete pvc ai-starter-kit-jupyterhub-hub-db-dir

destroy:
minikube delete

validate_jupyterhub:
kubectl get pods; \
kubectl wait --for=condition=Ready pods -l 'component!=continuous-image-puller' --timeout=1800s; \
kubectl get pods; \
kubectl get services; \
kubectl port-forward service/ai-starter-kit-jupyterhub-proxy-public 8081:80 & \
PID=$$!; \
echo "Port-forward PID=$${PID}"; \
sleep 5s; \
python3 ./ci/test_hub.py "127.0.0.1:8081"; \
kill $$PID

validate_ray:
kubectl wait --for=condition=Ready pods -l 'app.kubernetes.io/created-by=kuberay-operator' --timeout=1800s; \
kubectl get pods; \
kubectl get services; \
kubectl port-forward service/ai-starter-kit-kuberay-head-svc 8265:8265 & \
PID=$$!; \
sleep 10s; \
ray job submit --address=http://127.0.0.1:8265 -- python -c "import ray; ray.init(); print(ray.cluster_resources())"; \
kill $$PID
23 changes: 23 additions & 0 deletions ai/ai-starter-kit/helm-chart/ai-starter-kit/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
45 changes: 45 additions & 0 deletions ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
apiVersion: v2
name: ai-starter-kit
description: A Helm chart for Kubernetes

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "0.1.0"


dependencies:
- name: kuberay-operator
condition: ray-cluster.enabled
version: "1.3.0"
repository: "https://ray-project.github.io/kuberay-helm"
- condition: ray-cluster.enabled
name: ray-cluster
version: "1.3.0"
repository: "https://ray-project.github.io/kuberay-helm"
- name: jupyterhub
version: "4.2.0"
repository: "https://hub.jupyter.org/helm-chart/"
- name: mlflow
version: "0.12.0"
repository: "https://community-charts.github.io/helm-charts"
- name: ollama
condition: ollama.enabled
version: "1.27.0"
repository: "https://helm.otwld.com"
233 changes: 233 additions & 0 deletions ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
# AI Starter Kit

A comprehensive Helm chart for deploying a complete AI/ML development environment on Kubernetes. This starter kit provides a ready-to-use platform with JupyterHub notebooks, model serving capabilities, and experiment tracking - perfect for teams starting their AI journey or prototyping AI applications.

## Purpose

The AI Starter Kit simplifies the deployment of AI infrastructure by providing:

- **JupyterHub**: Multi-user notebook environment with pre-configured AI/ML libraries
- **Model Serving**: Support for both Ollama and Ramalama model servers
- **MLflow**: Experiment tracking and model management
- **Model Caching**: Persistent storage for efficient model management
- **Example Notebooks**: Pre-loaded notebooks to get you started immediately

## Prerequisites

### General Requirements
- Kubernetes cluster (minikube, GKE)
- Helm 3.x installed
- kubectl configured to access your cluster
- Hugging Face token for accessing models

### Platform-Specific Requirements

#### Minikube (Local Development)
- Docker Desktop or similar container runtime
- Minimum 4 CPU cores and 16GB RAM available
- 40GB+ free disk space

## Installation

### Quick Start (Minikube)

1. **Create a folder for the persistent storage:**
```bash
mkdir -p /$HOME/models-cache
```

2. **Start minikube with persistent storage:**
```bash
minikube start --cpus 4 --memory 15000 \
--mount --mount-string="/$HOME/models-cache:/tmp/models-cache"
```

3. **Install the chart:**
```bash
cd ai/ai-starter-kit/helm-chart/ai-starter-kit
helm dependency update
helm install ai-starter-kit . \
--set huggingface.token="YOUR_HF_TOKEN" \
-f values.yaml
```

4. **Access JupyterHub:**
```bash
kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80
```
Navigate to http://localhost:8080 and login with any username and password `password`

## Configuration

### Key Configuration Options

| Parameter | Description | Default |
|-----------|-------------|---------|
| `huggingface.token` | HuggingFace token for models | `"YOUR_HF_TOKEN"` |
| `ollama.enabled` | Enable Ollama model server | `true` |
| `ramalama.enabled` | Enable Ramalama model server | `true` |
| `modelsCachePvc.size` | Size of model cache storage | `10Gi` |
| `jupyterhub.singleuser.defaultUrl` | Default notebook path | `/lab/tree/welcome.ipynb` |
| `mlflow.enabled` | Enable MLflow tracking server | `true` |

### Storage Configuration

The chart supports different storage configurations:

- **Local Development**: Uses hostPath volumes with minikube mount
- **Custom**: Configure via `modelsCachePvc.storageClassName`

### Using GPUs

In order to use GPUs for AI/ML workloads we need to add the necessary config to the services. Check the dependency charts documentation for the values. For example jupyterhub config would be:

```yaml
juypterhub:
...
extraResource:
limits:
nvidia.com/gpu: 1
guarantees:
nvidia.com/gpu: 1

nodeSelector:
cloud.google.com/gke-accelerator: nvidia-l4
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's call out in the description above that this is using GKE as an example

Copy link

@alex-akv alex-akv Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Described in commit: ced46e9

```
### Model Servers
#### Ollama
Ollama is enabled by default and provides:
- Easy model management
- REST API for inference
- Support for popular models (Llama, Gemma, Qwen, etc.)
- GPU acceleration support
#### Ramalama
Ramalama provides:
- Alternative model serving solution
- Support for CUDA and Metal (macOS) acceleration
- Lightweight deployment option
## Usage
### Accessing Services
#### JupyterHub
```bash
# Port forward to access JupyterHub
kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80
# Access at: http://localhost:8080
# Default password: sneakypass
```

#### MLflow
```bash
# Port forward to access MLflow UI
kubectl port-forward svc/ai-starter-kit-mlflow 5000:5000
# Access at: http://localhost:5000
```

#### Ollama/Ramalama API
```bash
# For Ollama
kubectl port-forward svc/ai-starter-kit-ollama 11434:11434

# For Ramalama
kubectl port-forward svc/ai-starter-kit-ramalama 8080:8080
```

### Pre-loaded Example Notebooks

The JupyterHub environment comes with pre-loaded example notebooks:
- `ray.ipynb`: Simple Ray nad MLflow example
- `chat_bot.ipynb`: Simple chatbot interface using Ollama for conversational AI.
- `multi-agent.ipynb`:Multi-agent workflow demonstration using Ray.
- `multi-agent-ollama.ipynb`: Similar multi-agent workflow demonstration using Ollama.
- `multi-agent-ramalama.ipynb`: Similar multi-agent workflow using RamaLama runtime for comparison.
- `welcome.ipynb`: Introduction notebook with embedding model examples using Qwen models.

These notebooks are automatically copied to your workspace on first login.

## Architecture

The AI Starter Kit consists of:

1. **JupyterHub**: Multi-user notebook server with persistent storage
2. **Model Serving**: Choice of Ollama or Ramalama for LLM inference
3. **MLflow**: Experiment tracking and model registry
4. **Persistent Storage**: Shared model cache to avoid redundant downloads
5. **Init Containers**: Automated setup of models and notebooks

## Cleanup

### Uninstall the chart
```bash
helm uninstall ai-starter-kit
```

### Delete persistent volumes (optional)
```bash
kubectl delete pvc ai-starter-kit-models-cache-pvc
kubectl delete pvc ai-starter-kit-jupyterhub-hub-db-dir
```

### Delete GKE cluster
```bash
gcloud container clusters delete ${CLUSTER_NAME} --region=${REGION}
```
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove this

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed in commit: ced46e9


### Stop minikube
```bash
minikube stop
minikube delete # To completely remove the cluster
```

## Troubleshooting

### Common Issues

#### Pods stuck in Pending state
- Check available resources: `kubectl describe pod <pod-name>`
- Increase cluster resources or reduce resource requests

#### Model download failures
- Verify Hugging Face token is set correctly
- Check internet connectivity from pods
- Increase init container timeout in values

#### GPU not detected
- Verify GPU nodes are available: `kubectl get nodes -o wide`
- Check GPU driver installation
- Ensure correct node selectors and tolerations

#### Storage issues
- Verify PVC is bound: `kubectl get pvc`
- Check storage class availability: `kubectl get storageclass`
- Ensure sufficient disk space

### Debug Commands
```bash
# Check pod status
kubectl get pods -n default

# View pod logs
kubectl logs -f <pod-name>

# Describe pod for events
kubectl describe pod <pod-name>

# Check resource usage
kubectl top nodes
kubectl top pods
```

## Resources

- [JupyterHub Documentation](https://jupyterhub.readthedocs.io/)
- [MLflow Documentation](https://mlflow.org/docs/latest/index.html)
- [Ollama Documentation](https://ollama.ai/docs)
- [Kubernetes Documentation](https://kubernetes.io/docs/)
- [Helm Documentation](https://helm.sh/docs/)
Loading