Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 56 additions & 24 deletions src/ai/backend_onnx.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,40 +78,72 @@ static void _stderr_suppress_end(int saved)
}
#endif

// Load ORT API from a dynamically loaded module. Returns NULL on failure.
static const OrtApi *_ort_api_from_module(GModule *mod, const char *label)
{
typedef const OrtApiBase *(*OrtGetApiBaseFn)(void);
OrtGetApiBaseFn get_api_base = NULL;
if(!g_module_symbol(mod, "OrtGetApiBase", (gpointer *)&get_api_base) || !get_api_base)
{
dt_print(DT_DEBUG_AI, "[darktable_ai] OrtGetApiBase symbol not found in '%s'", label);
return NULL;
}
dt_print(DT_DEBUG_AI, "[darktable_ai] loaded ORT %s from '%s'",
get_api_base()->GetVersionString(), label);
return get_api_base()->GetApi(ORT_API_VERSION);
}

static gpointer _init_ort_api(gpointer data)
{
(void)data;
const OrtApi *api = NULL;

#ifdef ORT_LAZY_LOAD
// Ubuntu/Debian's system ORT links against libonnx, causing harmless but noisy
// "already registered" ONNX schema warnings when the library is first loaded.
// suppress them by loading ORT explicitly, with stderr temporarily redirected.
// G_MODULE_BIND_LAZY = RTLD_LAZY; default (no BIND_LOCAL) = RTLD_GLOBAL so
// provider symbols remain visible to the rest of the process via dlsym(NULL).
const int saved = _stderr_suppress_begin();
// the handle is intentionally not stored: ORT must stay loaded for the process
// lifetime and g_module_close is never called, so the library stays resident.
GModule *ort_mod = g_module_open(ORT_LIBRARY_PATH, G_MODULE_BIND_LAZY);
_stderr_suppress_end(saved);
// DT_ORT_LIBRARY allows users to point to a GPU-enabled ORT build
// (e.g. CUDA or ROCm) without rebuilding darktable. On Linux this
// overrides the compile-time default; on Windows/macOS it dynamically
// loads a user-supplied library instead of the bundled DirectML/CoreML one.
const char *ort_override = g_getenv("DT_ORT_LIBRARY");

if(!ort_mod)
if(ort_override && ort_override[0])
{
dt_print(DT_DEBUG_AI,
"[darktable_ai] failed to load ORT library '%s': %s",
ORT_LIBRARY_PATH, g_module_error());
return NULL;
GModule *ort_mod = g_module_open(ort_override, G_MODULE_BIND_LAZY);
if(!ort_mod)
{
dt_print(DT_DEBUG_AI,
"[darktable_ai] failed to load ORT library '%s': %s",
ort_override, g_module_error());
return NULL;
}
api = _ort_api_from_module(ort_mod, ort_override);
}
typedef const OrtApiBase *(*OrtGetApiBaseFn)(void);
OrtGetApiBaseFn get_api_base = NULL;
if(!g_module_symbol(ort_mod, "OrtGetApiBase", (gpointer *)&get_api_base) || !get_api_base)
#ifdef ORT_LAZY_LOAD
else
{
dt_print(DT_DEBUG_AI, "[darktable_ai] OrtGetApiBase symbol not found");
return NULL;
// Linux default: lazy-load the bundled or system ORT library.
// Suppress stderr during load - Ubuntu/Debian's system ORT links against
// libonnx, causing harmless "already registered" ONNX schema warnings.
const int saved = _stderr_suppress_begin();
GModule *ort_mod = g_module_open(ORT_LIBRARY_PATH, G_MODULE_BIND_LAZY);
_stderr_suppress_end(saved);

if(!ort_mod)
{
dt_print(DT_DEBUG_AI,
"[darktable_ai] failed to load ORT library '%s': %s",
ORT_LIBRARY_PATH, g_module_error());
return NULL;
}
api = _ort_api_from_module(ort_mod, ORT_LIBRARY_PATH);
}
api = get_api_base()->GetApi(ORT_API_VERSION);
#else
api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
else
{
// Windows/macOS: use the directly linked ORT library (DirectML/CoreML).
const OrtApiBase *base = OrtGetApiBase();
dt_print(DT_DEBUG_AI, "[darktable_ai] loaded ORT %s (bundled)",
base->GetVersionString());
api = base->GetApi(ORT_API_VERSION);
}
#endif

if(!api)
Expand Down Expand Up @@ -876,7 +908,7 @@ dt_ai_onnx_load_ext(const char *model_dir, const char *model_file,
{
ctx->dynamic_outputs = TRUE;
dt_print(DT_DEBUG_AI,
"[darktable_ai] output[%zu] has dynamic dims using ORT-allocated outputs",
"[darktable_ai] output[%zu] has dynamic dims - using ORT-allocated outputs",
i);
break;
}
Expand Down
172 changes: 172 additions & 0 deletions tools/ai/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
# GPU-Accelerated ONNX Runtime for darktable

darktable bundles a CPU-only ONNX Runtime by default. On Linux, it also
bundles DirectML on Windows and CoreML on macOS. These scripts install a
GPU-enabled ORT build to accelerate AI features (denoise, upscale,
segmentation).

## What's bundled by default

| Platform | Bundled ORT | GPU support |
|----------|------------|-------------|
| Linux | CPU only | None – use scripts below |
| Windows | DirectML | AMD, NVIDIA, Intel via DirectX 12 |
| macOS | CoreML | Apple Silicon Neural Engine |

## Installing GPU-accelerated ORT

### NVIDIA (CUDA) – Linux & Windows

**Requirements:**

- NVIDIA GPU with compute capability 6.0+ (GeForce GTX 1000 "Pascal" or newer)
- NVIDIA driver 525 or later
- CUDA 12.x runtime – included with the driver on Windows; on Linux install
the CUDA toolkit (`nvidia-cuda-toolkit` on Ubuntu/Debian, `cuda` on Arch)
- cuDNN 9.x – download from https://developer.nvidia.com/cudnn-downloads or
install via package manager (`libcudnn9-cuda-12` on Ubuntu/Debian, `cudnn`
on Arch)

Linux:
```bash
./tools/ai/install-ort-nvidia.sh
```

Windows (PowerShell):
```powershell
.\tools\ai\install-ort-nvidia.ps1
```

Downloads a prebuilt ORT with CUDA EP from GitHub (~200 MB, ~30 sec).
On Windows, use this instead of the bundled DirectML for potentially
better NVIDIA performance.

### AMD (MIGraphX) – Linux

**Requirements:**

- AMD GPU supported by ROCm:
- Consumer: Radeon RX 6000 series (RDNA2) or newer
- Data center: Instinct MI100 (CDNA) or newer
- ROCm 6.0 or later – install from AMD's repo:
https://rocm.docs.amd.com/projects/install-on-linux/en/latest/
- Ubuntu/Debian: `sudo apt install rocm`
- Arch: `sudo pacman -S rocm-hip-sdk`
- Fedora: `sudo dnf install rocm`
- MIGraphX (included in ROCm, or install separately):
- Ubuntu/Debian: `sudo apt install migraphx migraphx-dev`
- Arch: `sudo pacman -S migraphx`
- For building from source: cmake 3.26+, gcc/g++, python3, git

Prebuilt (fast, ~30 sec):
```bash
./tools/ai/install-ort-amd.sh
```

Build from source (fallback if prebuilt doesn't work, 10-20 min):
```bash
./tools/ai/install-ort-amd-build.sh
```

The prebuilt script downloads a wheel from AMD's package repository. The
build script compiles ORT against your installed ROCm headers and
libraries – use it if the prebuilt version has ABI compatibility issues.
Both auto-detect your ROCm version and select the matching ORT release:

| ROCm | ORT version |
|------|-------------|
| 7.2 | 1.23.2 |
| 7.1 | 1.23.1 |
| 7.0 | 1.22.1 |
| 6.4 | 1.21.0 |
| 6.3 | 1.19.0 |
| 6.2 | 1.18.0 |
| 6.1 | 1.17.0 |
| 6.0 | 1.16.0 |

### Intel (OpenVINO) – Linux

**Requirements:**

- Intel GPU or any x86_64 CPU:
- Integrated: HD Graphics, UHD Graphics, Iris Xe (Gen9+)
- Discrete: Intel Arc A-series (A770, A750, A580, etc.)
- CPU-only mode works on any x86_64 processor (Intel or AMD)
- For GPU acceleration: Intel compute runtime with Level Zero
- Ubuntu/Debian: `sudo apt install intel-opencl-icd level-zero`
- Arch: `sudo pacman -S intel-compute-runtime level-zero-loader`
- For Arc GPUs: kernel 6.2 or later recommended
- pip3 (for downloading the wheel)
- OpenVINO runtime is bundled in the package – no separate install needed

```bash
./tools/ai/install-ort-intel.sh
```

Downloads a prebuilt ORT with OpenVINO EP from PyPI (~60 MB, ~30 sec).
Includes all OpenVINO runtime libraries.

## Using the custom ORT

All scripts install to `~/.local/lib/onnxruntime-<provider>/` and print
the path to use. Set the `DT_ORT_LIBRARY` environment variable to point
darktable to the custom build:

```bash
DT_ORT_LIBRARY=~/.local/lib/onnxruntime-cuda/libonnxruntime.so.1.24.4 darktable
```

Or add to `~/.bashrc` for persistence:
```bash
export DT_ORT_LIBRARY=~/.local/lib/onnxruntime-cuda/libonnxruntime.so.1.24.4
```

On Windows (PowerShell):
```powershell
$env:DT_ORT_LIBRARY="C:\Users\you\AppData\Local\onnxruntime-cuda\onnxruntime.dll"
darktable
```

Or set permanently via System → Environment Variables.

If `DT_ORT_LIBRARY` is not set, darktable uses the bundled ORT (CPU on
Linux, DirectML on Windows, CoreML on macOS).

## Manual installation (without scripts)

If you prefer to install manually or the scripts don't work for your setup:

1. **Get an ORT shared library with your desired EP compiled in:**
- NVIDIA CUDA: download `onnxruntime-linux-x64-gpu-VERSION.tgz` (Linux)
or `onnxruntime-win-x64-gpu-VERSION.zip` (Windows) from
https://github.com/microsoft/onnxruntime/releases
- AMD MIGraphX: download `onnxruntime_rocm` wheel from
https://repo.radeon.com/rocm/manylinux/ (match your ROCm version)
or build from source: `./build.sh --config Release --build_shared_lib --use_migraphx --migraphx_home /opt/rocm`
- Intel OpenVINO: `pip download --no-deps onnxruntime-openvino`

2. **Extract the shared library:**
- `.tgz`/`.zip`: extract `lib/libonnxruntime.so*` (or `lib/onnxruntime.dll`)
- `.whl`: rename to `.zip` and extract `onnxruntime/capi/libonnxruntime.so*`
and any `libonnxruntime_providers_*.so` files

3. **Point darktable to it:**
```bash
export DT_ORT_LIBRARY=/path/to/libonnxruntime.so.X.Y.Z
```

## Verifying

Run darktable with AI debug output to confirm which ORT is loaded:

```bash
DT_ORT_LIBRARY=... darktable -d ai
```

Look for:
```
[darktable_ai] loaded ORT 1.24.4 from '/home/user/.local/lib/onnxruntime-cuda/libonnxruntime.so.1.24.4'
```

Then check Preferences → Processing → AI execution provider to select
your GPU provider (CUDA, MIGraphX, OpenVINO).
Loading
Loading