diff --git a/.github/ISSUE_TEMPLATE/new-release.md b/.github/ISSUE_TEMPLATE/new-release.md index 27e837845..507d8fa97 100644 --- a/.github/ISSUE_TEMPLATE/new-release.md +++ b/.github/ISSUE_TEMPLATE/new-release.md @@ -66,7 +66,7 @@ This document defines the process for releasing Gateway API Inference Extension. git checkout -b release-${MAJOR}.${MINOR} ${REMOTE}/release-${MAJOR}.${MINOR} ``` -4. Update release-specific content, generate release artifacts, and stage the changes. +4. Update release-specific content, generate release artifacts, build the versioned docs, and stage the changes. ```shell make release @@ -149,8 +149,7 @@ Use the following steps to announce the release. ## Final Steps -1. Update docs in the `main` branch. -2. Close this issue. +Close this issue. [repo]: https://github.com/kubernetes-sigs/gateway-api-inference-extension [staging registry]: https://console.cloud.google.com/artifacts/docker/k8s-staging-images/us-central1/gateway-api-inference-extension/epp diff --git a/.gitignore b/.gitignore index 4442b6516..f259fa3df 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ go.work.sum # generated docs site + +# virtual environment for mkdocs +.venv diff --git a/Makefile b/Makefile index 884d42294..af0ef8771 100644 --- a/Makefile +++ b/Makefile @@ -260,28 +260,28 @@ bbr-image-kind: bbr-image-build ## Build the image and load it to kind cluster $ ##@ Docs -.PHONY: build-docs -build-docs: - docker build --pull -t gaie/mkdocs hack/mkdocs/image - docker run --rm -v ${PWD}:/docs gaie/mkdocs build -.PHONY: build-docs-netlify -build-docs-netlify: - pip install -r hack/mkdocs/image/requirements.txt - python -m mkdocs build +.PHONY: docs +docs: ## Deploy documentation using mike, determining latest version from git tags. + chmod +x ./hack/mkdocs/make-docs.sh + ./hack/mkdocs/make-docs.sh .PHONY: live-docs live-docs: docker build -t gaie/mkdocs hack/mkdocs/image docker run --rm -it -p 3000:3000 -v ${PWD}:/docs gaie/mkdocs -.PHONY: api-ref-docs -api-ref-docs: - crd-ref-docs \ - --source-path=${PWD}/api \ - --config=crd-ref-docs.yaml \ - --renderer=markdown \ - --output-path=${PWD}/site-src/reference/spec.md +# Generate a virtualenv install, which is useful for hacking on the +# docs since it installs mkdocs and all the right dependencies. +# +# On Ubuntu, this requires the python3-venv package. +virtualenv: .venv +.venv: hack/mkdocs/image/requirements.txt + @echo Creating a virtualenv in $@"... " + @python3 -m venv $@ || (rm -rf $@ && exit 1) + @echo Installing packages in $@"... " + @$@/bin/python3 -m pip install -q -r hack/mkdocs/image/requirements.txt || (rm -rf $@ && exit 1) + @echo To enter the virtualenv type \"source $@/bin/activate\", to exit type \"deactivate\" ##@ Deployment @@ -321,7 +321,7 @@ artifacts: kustomize @$(call clean-manifests) .PHONY: release -release: artifacts release-quickstart verify test # Create a release. +release: artifacts release-quickstart verify test docs # Create a release. ##@ Dependencies diff --git a/hack/mkdocs/image/Dockerfile b/hack/mkdocs/image/Dockerfile index 87e5bd7bf..4cc9f9e10 100644 --- a/hack/mkdocs/image/Dockerfile +++ b/hack/mkdocs/image/Dockerfile @@ -14,6 +14,9 @@ FROM python:3.13-alpine +# Install git, required for mike versioning +RUN apk add --no-cache git + COPY requirements.txt /requirements.txt RUN pip install -r /requirements.txt diff --git a/hack/mkdocs/image/entrypoint.sh b/hack/mkdocs/image/entrypoint.sh index 9e7accc0c..94faa23cc 100755 --- a/hack/mkdocs/image/entrypoint.sh +++ b/hack/mkdocs/image/entrypoint.sh @@ -22,7 +22,10 @@ CMD=$1 if [ "$CMD" == "build" ]; then mkdocs build + # Set the default version to latest after building + mike set-default --branch docs latest exit 0; fi -mkdocs serve --dev-addr=0.0.0.0:3000 --livereload \ No newline at end of file +# Use mike serve for versioning support +mike serve -a 0.0.0.0:3000 --branch docs diff --git a/hack/mkdocs/image/requirements.txt b/hack/mkdocs/image/requirements.txt index e7cb9b0c6..7f5144c0f 100644 --- a/hack/mkdocs/image/requirements.txt +++ b/hack/mkdocs/image/requirements.txt @@ -23,3 +23,4 @@ mkdocs-material==9.5.36 mkdocs-material-extensions==1.3.1 mkdocs-redirects==1.2.1 mkdocs-mermaid2-plugin==1.1.1 +mike==2.1.3 diff --git a/hack/mkdocs/make-docs.sh b/hack/mkdocs/make-docs.sh new file mode 100755 index 000000000..a566c4d4f --- /dev/null +++ b/hack/mkdocs/make-docs.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +# Copyright 2021 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o nounset +set -o pipefail + +readonly SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE}")"/../.. && pwd)" +cd $SCRIPT_ROOT + +# ----------------------------------------------------------------------------- +# Version extraction from git branch +# ----------------------------------------------------------------------------- +get_version_from_branch() { + # Get current branch name + local branch_name + branch_name=$(git rev-parse --abbrev-ref HEAD) + + # If the branch is main, set the version to main (and not a MAJOR.MINOR version) + if [[ "$branch_name" == "main" ]]; then + VERSION="main" + + # Extract version from branch name (e.g., release-0.3 -> 0.3) + elif [[ $branch_name =~ release-([0-9]+)\.([0-9]+) ]]; then + MAJOR="${BASH_REMATCH[1]}" + MINOR="${BASH_REMATCH[2]}" + VERSION="${MAJOR}.${MINOR}" + else + echo "Error: Could not extract version from branch name: $branch_name" + echo "Expected branch name format: 'release-X.Y' or 'main'" + exit 1 + fi +} + +# ----------------------------------------------------------------------------- +# Check if version should be marked as latest (ignore release candidates "-rc" or any hyphenated suffix) +# ----------------------------------------------------------------------------- +is_latest_version() { + # 1) List all tags matching semver-ish (vX.Y[.Z] or X.Y[.Z]), sort by version descending, + # and pick the very first one. + local latest_tag + latest_tag=$(git tag --list 'v[0-9]*.[0-9]*' --list '[0-9]*.[0-9]*' --sort=-v:refname | head -n1) + + if [[ -z "$latest_tag" ]]; then + echo "Error: Could not find any semver‐style tags." + return 1 + fi + + # 2) Strip leading 'v', then drop anything after the first hyphen (e.g. "0.3.0-rc.1" → "0.3.0") + local bare="${latest_tag#v}" # remove leading "v" if present + bare="${bare%%-*}" # drop "-" (so "0.3.0-rc.1" → "0.3.0") + + # 3) Now extract MAJOR and MINOR from e.g. "0.3.0" or "2.5" + if [[ "$bare" =~ ^([0-9]+)\.([0-9]+)(\.[0-9]+)?$ ]]; then + local latest_major="${BASH_REMATCH[1]}" + local latest_minor="${BASH_REMATCH[2]}" + else + echo "Error: Could not parse version from latest tag: ${latest_tag} (bare='${bare}')" + return 1 + fi + + # 4) Compare numeric MAJOR/MINOR for exact match + if (( MAJOR == latest_major && MINOR == latest_minor )); then + return 0 + fi + return 1 +} + +# Get version from current branch +get_version_from_branch + +# ----------------------------------------------------------------------------- +# Environment variables (defaults) +# ----------------------------------------------------------------------------- +# VERSION is now set by get_version_from_branch() + +# Wrap sed to deal with GNU and BSD sed flags. +run::sed() { + local -r vers="$(sed --version < /dev/null 2>&1 | grep -q GNU && echo gnu || echo bsd)" + case "$vers" in + gnu) sed -i "$@" ;; + *) sed -i '' "$@" ;; + esac +} + +# ----------------------------------------------------------------------------- +# Build versioned docs +# ----------------------------------------------------------------------------- + +# Generate API docs + +GOPATH=${GOPATH:-$(go env GOPATH)} + +# "go env" doesn't print anything if GOBIN is the default, so we +# have to manually default it. +GOBIN=${GOBIN:-$(go env GOBIN)} +GOBIN=${GOBIN:-${GOPATH}/bin} + +echo $GOBIN + +go install github.com/elastic/crd-ref-docs + +${GOBIN}/crd-ref-docs \ + --source-path=${PWD}/api \ + --config=crd-ref-docs.yaml \ + --renderer=markdown \ + --output-path=${PWD}/site-src/reference/spec.md + +# Deploy docs with mike +echo "Deploying docs for version ${VERSION}" +if [[ "$VERSION" == "main" ]]; then + echo "Deploying docs as 'main'." + mike deploy --push --branch docs main +elif is_latest_version; then + echo "This version will be deployed and marked as 'latest'." + mike deploy --push --update-aliases --alias-type=copy --branch docs "${VERSION}" latest +else + echo "This version will be deployed, but not marked as 'latest'." + mike deploy --push --branch docs "${VERSION}" +fi + +# Always set the default version to 'latest' +echo "Setting default version to 'latest'." +mike set-default --branch docs latest diff --git a/mkdocs.yml b/mkdocs.yml index e5927ed53..7c745ddde 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,7 +2,12 @@ site_name: Kubernetes Gateway API Inference Extension repo_url: https://github.com/kubernetes-sigs/gateway-api-inference-extension repo_name: kubernetes-sigs/gateway-api-inference-extension site_dir: site +site_url: 'https://gateway-api-inference-extension.sigs.k8s.io/' docs_dir: site-src +extra: + version: + provider: mike + alias: true extra_css: - stylesheets/extra.css theme: @@ -51,9 +56,9 @@ nav: - Overview: - Introduction: index.md - Concepts: - API Overview: concepts/api-overview.md - Conformance: concepts/conformance.md - Roles and Personas: concepts/roles-and-personas.md + - API Overview: concepts/api-overview.md + - Conformance: concepts/conformance.md + - Roles and Personas: concepts/roles-and-personas.md - Implementations: - Gateways: implementations/gateways.md - Model Servers: implementations/model-servers.md @@ -77,3 +82,4 @@ nav: - Contributing: - How to Get Involved: contributing/index.md - Developer Guide: contributing/devguide.md + - Documentation Guide: contributing/docs.md diff --git a/netlify.toml b/netlify.toml index ff61e3bfc..e6167fd26 100644 --- a/netlify.toml +++ b/netlify.toml @@ -1,6 +1,14 @@ # netlify configuration [build] -publish = "site" -command = "make build-docs-netlify" -# available here https://github.com/netlify/build-image/blob/focal/included_software.md#languages -environment = { PYTHON_VERSION = "3.8" } \ No newline at end of file + # Base directory is the root (/) + base = "/" + # Publish directory is the current directory (.) + publish = "." + # No build command needed since we're publishing from the docs branch + command = "" + # available here https://github.com/netlify/build-image/blob/focal/included_software.md#languages + environment = { PYTHON_VERSION = "3.8" } + +# Specify that we want to build from the docs branch +[context.production] + branch = "docs" diff --git a/site-src/contributing/docs.md b/site-src/contributing/docs.md new file mode 100644 index 000000000..25809b202 --- /dev/null +++ b/site-src/contributing/docs.md @@ -0,0 +1,140 @@ +# Contributing to the docs + +This doc site is built using MkDocs. It includes a Docker image for you to preview local changes without needing to set up MkDocs and its related plug-ins. + +Branch sources of the docs content: + +- `main` branch for `main` version +- `release-MAJOR.MINOR` branches such as `release-0.1` for `0.1` version +- `docs` branch for the versioned directories that are published via Netlify to the website + +## Preview local changes + +1. In the `site-src` directory, make your changes to the Markdown files. + +2. If you add a new page, make sure to update the `nav` section in the `mkdocs.yml` file. + +3. From the root directory of this project, run the Docker image with the following command from the `Makefile`. + ```sh + make live-docs + ``` + +4. Open your browser to preview the local build, [http://localhost:3000](http://localhost:3000). + +!!! type "One preview at a time" + For better performance, open one localhost preview at a time. If you have multiple browsers rendering `localhost:3000`, you might notice a lag time in loading pages. + +## Style guides + +Refer to the following style guides: + +* [Gateway API](https://gateway-api.sigs.k8s.io/contributing/style-guide/) +* [Kubernetes](https://kubernetes.io/docs/contribute/style/style-guide/) + +If you need guidance on specific words that are not covered in one of those style guides, check a common cloud provider, such as [Google developer docs](https://developers.google.com/style). + +## Version the docs + +The Material theme uses `mike` to version the docs. + +### Automatic versioning for releases + +The `make docs` target in the Makefile runs the `hack/mkdocs/make-docs.sh` script. This script runs `mike` to version the docs based on the current branch. It works for `main` and major/minor release branches such as `release-0.1`. + +### Update versioned docs + +For main or release branches such as `release-0.1`, you can update doc content as follows: + +1. Check out the main or release branch. +2. Make changes to the markdown files in the `site-src` directory. +3. Run `make docs` to build the docs and push the changes to the `docs` branch. +4. Netlify gets triggered automatically and publishes the changes to the website. + +### Manual versioning + +Sometimes, you might need to manually update a doc version. For example, you might want to delete an old LTS version that is no longer needed. + +The following steps cover common workflows for versioning. For more information, see the following resources: + +* [Material theme versioning page](https://squidfunk.github.io/mkdocs-material/setup/setting-up-versioning/) +* [`mike` readme](https://github.com/jimporter/mike) + +Example workflow for using `mike`: + +1. List the current versions. Aliases are included in brackets. + ```sh + mike list + + # Example output + 0.3 [main] + 0.2 [latest] + 0.1 + ``` + +2. Check out the branch that you want to build the docs from. + +3. In the `site-src` directory, make and save your doc changes. + +4. Add the changes to the versions that you want to publish them in. If the version has an alias such as latest, you can include that. Make sure to include the `--branch docs` flag, so as not to publish docs to the `mike` default `gh-pages` branch. + ```sh + mike deploy --push --branch docs main + mike deploy --push --update-aliases 0.4 --branch docs latest + ``` + +5. Delete an old version of the docs that you no longer need. The following example adds a new version 0.4 as main based on the current content, renames 0.3 to latest with the current content, removes the latest alias from 0.2 but leaves the version content untouched, and deletes version 0.1. + ```sh + mike delete 0.1 + ``` + +### How versioning works + +The `mike` commands add each version as a separate commit and directory on the `docs` branch. + +* The versioned directories contain the output of the MkDocs build for each version. +* The `latest` and `main` aliases are copies of the versioned directories. +* The `versions.json` file has the information for each version and alias that `mike` tracks. You can check this if you use + +Example directory structure: + +```plaintext +'docs' branch +│── 0.1/ +│── 0.2/ +│── 0.3/ +│── 0.4/ +│── latest/ +│── main/ +│── versions.json +``` + +The doc builds then publish the versioned content from this branch to the website. + +## Develop the MkDocs theme + +As you contribute to the Kubernetes Gateway API Inference Extension project, you might want to add features to the MkDocs theme or build process. + +Helpful resources: + +* [Customization, extensions, and overrides](https://squidfunk.github.io/mkdocs-material/customization/) +* [Setup features](https://squidfunk.github.io/mkdocs-material/setup/) +* [Plugins](https://squidfunk.github.io/mkdocs-material/plugins/) + +General steps: + +1. Set up a virtual environment with python, pip, mkdocs, and the plugins that this project uses. + ```sh + make virtualenv + ``` + +2. Try out the MkDocs Material theme features, plugins, or other customizations that you want to add locally. + +3. For plugins, add the plugin to the `/hack/mkdocs/image/requirements.txt` file. + +4. From the root directory, run the Docker image of the docs. Make sure that your changes build and works as you expect. + ```sh + make live-docs + ``` + +## Publish the docs + +The project uses Netlify to host the docs. Netlify automatically builds the docs based on the versioned directories in the `docs` branch. diff --git a/site-src/index.md b/site-src/index.md index 61bece27f..5328d9187 100644 --- a/site-src/index.md +++ b/site-src/index.md @@ -9,7 +9,7 @@ The overall resource model focuses on 2 new inference-focused they are expected to manage: -Gateway API Inference Extension Resource Model +Gateway API Inference Extension Resource Model ## Key Features Gateway API Inference Extension, along with a reference implementation in Envoy Proxy, provides the following key features: @@ -82,7 +82,7 @@ routed to. 5. The Gateway will route the request to the desired endpoint. -Gateway API Inference Extension Request Flow +Gateway API Inference Extension Request Flow ## Who is working on Gateway API Inference Extension? diff --git a/site-src/overrides/main.html b/site-src/overrides/main.html new file mode 100644 index 000000000..3cf182cfa --- /dev/null +++ b/site-src/overrides/main.html @@ -0,0 +1,8 @@ +{% extends "base.html" %} + +{% block outdated %} + You're not viewing the latest version. + + Click here to go to latest. + +{% endblock %} \ No newline at end of file diff --git a/site-src/reference/spec.md b/site-src/reference/spec.md index d8e0c95bf..c76caa6a1 100644 --- a/site-src/reference/spec.md +++ b/site-src/reference/spec.md @@ -21,7 +21,7 @@ inference.networking.x-k8s.io API group. _Underlying type:_ _string_ Criticality defines how important it is to serve the model compared to other models. -Criticality is intentionally a bounded enum to contain the possibilities that need to be supported by the load balancing algorithm. Any reference to the Criticality field must be optional(use a pointer), and set no default. +Criticality is intentionally a bounded enum to contain the possibilities that need to be supported by the load balancing algorithm. Any reference to the Criticality field must be optional (use a pointer), and set no default. This allows us to union this with a oneOf field in the future should we wish to adjust/extend this behavior. _Validation:_ @@ -69,7 +69,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `group` _[Group](#group)_ | Group is the group of the referent.
The default value is "", representing the Core API group. | | MaxLength: 253
Pattern: `^$\|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$`
| -| `kind` _[Kind](#kind)_ | Kind is the Kubernetes resource kind of the referent. For example
"Service".
Defaults to "Service" when not specified.
ExternalName services can refer to CNAME DNS records that may live
outside of the cluster and as such are difficult to reason about in
terms of conformance. They also may not be safe to forward to (see
CVE-2021-25740 for more information). Implementations MUST NOT
support ExternalName Services. | Service | MaxLength: 63
MinLength: 1
Pattern: `^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
| +| `kind` _[Kind](#kind)_ | Kind is the Kubernetes resource kind of the referent. For example
"Service".

Defaults to "Service" when not specified.

ExternalName services can refer to CNAME DNS records that may live
outside of the cluster and as such are difficult to reason about in
terms of conformance. They also may not be safe to forward to (see
CVE-2021-25740 for more information). Implementations MUST NOT
support ExternalName Services. | Service | MaxLength: 63
MinLength: 1
Pattern: `^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
| | `name` _[ObjectName](#objectname)_ | Name is the name of the referent. | | MaxLength: 253
MinLength: 1
Required: \{\}
| | `portNumber` _[PortNumber](#portnumber)_ | The port number on the service running the extension. When unspecified,
implementations SHOULD infer a default value of 9002 when the Kind is
Service. | | Maximum: 65535
Minimum: 1
| | `failureMode` _[ExtensionFailureMode](#extensionfailuremode)_ | Configures how the gateway handles the case when the extension is not responsive.
Defaults to failClose. | FailClose | Enum: [FailOpen FailClose]
| @@ -125,7 +125,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `group` _[Group](#group)_ | Group is the group of the referent.
The default value is "", representing the Core API group. | | MaxLength: 253
Pattern: `^$\|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$`
| -| `kind` _[Kind](#kind)_ | Kind is the Kubernetes resource kind of the referent. For example
"Service".
Defaults to "Service" when not specified.
ExternalName services can refer to CNAME DNS records that may live
outside of the cluster and as such are difficult to reason about in
terms of conformance. They also may not be safe to forward to (see
CVE-2021-25740 for more information). Implementations MUST NOT
support ExternalName Services. | Service | MaxLength: 63
MinLength: 1
Pattern: `^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
| +| `kind` _[Kind](#kind)_ | Kind is the Kubernetes resource kind of the referent. For example
"Service".

Defaults to "Service" when not specified.

ExternalName services can refer to CNAME DNS records that may live
outside of the cluster and as such are difficult to reason about in
terms of conformance. They also may not be safe to forward to (see
CVE-2021-25740 for more information). Implementations MUST NOT
support ExternalName Services. | Service | MaxLength: 63
MinLength: 1
Pattern: `^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
| | `name` _[ObjectName](#objectname)_ | Name is the name of the referent. | | MaxLength: 253
MinLength: 1
Required: \{\}
| | `portNumber` _[PortNumber](#portnumber)_ | The port number on the service running the extension. When unspecified,
implementations SHOULD infer a default value of 9002 when the Kind is
Service. | | Maximum: 65535
Minimum: 1
| @@ -137,17 +137,22 @@ _Underlying type:_ _string_ Group refers to a Kubernetes Group. It must either be an empty string or a RFC 1123 subdomain. + This validation is based off of the corresponding Kubernetes validation: https://github.com/kubernetes/apimachinery/blob/02cfb53916346d085a6c6c7c66f882e3c6b0eca6/pkg/util/validation/validation.go#L208 + Valid values include: + * "" - empty string implies core Kubernetes API group * "gateway.networking.k8s.io" * "foo.example.com" + Invalid values include: + * "example.com/bar" - "/" is an invalid character _Validation:_ @@ -191,6 +196,7 @@ InferenceModel is the Schema for the InferenceModels API. InferenceModelSpec represents the desired state of a specific model use case. This resource is managed by the "Inference Workload Owner" persona. + The Inference Workload Owner persona is someone that trains, verifies, and leverages a large language model from a model frontend, drives the lifecycle and rollout of new versions of those models, and defines the specific @@ -198,6 +204,7 @@ performance and latency goals for the model. These workloads are expected to operate within an InferencePool sharing compute capacity with other InferenceModels, defined by the Inference Platform Admin. + InferenceModel's modelName (not the ObjectMeta name) is unique for a given InferencePool, if the name is reused, an error will be shown on the status of a InferenceModel that attempted to reuse. The oldest InferenceModel, based on @@ -212,7 +219,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `modelName` _string_ | ModelName is the name of the model as it will be set in the "model" parameter for an incoming request.
ModelNames must be unique for a referencing InferencePool
(names can be reused for a different pool in the same cluster).
The modelName with the oldest creation timestamp is retained, and the incoming
InferenceModel is sets the Ready status to false with a corresponding reason.
In the rare case of a race condition, one Model will be selected randomly to be considered valid, and the other rejected.
Names can be reserved without an underlying model configured in the pool.
This can be done by specifying a target model and setting the weight to zero,
an error will be returned specifying that no valid target model is found. | | MaxLength: 256
Required: \{\}
| -| `criticality` _[Criticality](#criticality)_ | Criticality defines how important it is to serve the model compared to other models referencing the same pool.
Criticality impacts how traffic is handled in resource constrained situations. It handles this by
queuing or rejecting requests of lower criticality. InferenceModels of an equivalent Criticality will
fairly share resources over throughput of tokens. In the future, the metric used to calculate fairness,
and the proportionality of fairness will be configurable.
Default values for this field will not be set, to allow for future additions of new field that may 'one of' with this field.
Any implementations that may consume this field may treat an unset value as the 'Standard' range. | | Enum: [Critical Standard Sheddable]
| +| `criticality` _[Criticality](#criticality)_ | Criticality defines how important it is to serve the model compared to other models referencing the same pool.
Criticality impacts how traffic is handled in resource constrained situations. It handles this by
queuing or rejecting requests of lower criticality. InferenceModels of an equivalent Criticality will
fairly share resources over throughput of tokens. In the future, the metric used to calculate fairness,
and the proportionality of fairness will be configurable.

Default values for this field will not be set, to allow for future additions of new field that may 'one of' with this field.
Any implementations that may consume this field may treat an unset value as the 'Standard' range. | | Enum: [Critical Standard Sheddable]
| | `targetModels` _[TargetModel](#targetmodel) array_ | TargetModels allow multiple versions of a model for traffic splitting.
If not specified, the target model name is defaulted to the modelName parameter.
modelName is often in reference to a LoRA adapter. | | MaxItems: 10
| | `poolRef` _[PoolObjectReference](#poolobjectreference)_ | PoolRef is a reference to the inference pool, the pool must exist in the same namespace. | | Required: \{\}
| @@ -230,7 +237,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#condition-v1-meta) array_ | Conditions track the state of the InferenceModel.
Known condition types are:
* "Accepted" | [map[lastTransitionTime:1970-01-01T00:00:00Z message:Waiting for controller reason:Pending status:Unknown type:Ready]] | MaxItems: 8
| +| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#condition-v1-meta) array_ | Conditions track the state of the InferenceModel.

Known condition types are:

* "Accepted" | [map[lastTransitionTime:1970-01-01T00:00:00Z message:Waiting for controller reason:Pending status:Unknown type:Ready]] | MaxItems: 8
| #### InferencePool @@ -287,7 +294,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `parent` _[PoolStatus](#poolstatus) array_ | Parents is a list of parent resources (usually Gateways) that are
associated with the route, and the status of the InferencePool with respect to
each parent.
A maximum of 32 Gateways will be represented in this list. An empty list
means the route has not been attached to any Gateway. | | MaxItems: 32
| +| `parent` _[PoolStatus](#poolstatus) array_ | Parents is a list of parent resources (usually Gateways) that are
associated with the route, and the status of the InferencePool with respect to
each parent.

A maximum of 32 Gateways will be represented in this list. An empty list
means the route has not been attached to any Gateway. | | MaxItems: 32
| #### Kind @@ -296,13 +303,17 @@ _Underlying type:_ _string_ Kind refers to a Kubernetes Kind. + Valid values include: + * "Service" * "HTTPRoute" + Invalid values include: + * "invalid/kind" - "/" is an invalid character _Validation:_ @@ -324,19 +335,24 @@ _Underlying type:_ _string_ LabelKey was originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731 Duplicated as to not take an unexpected dependency on gw's API. + LabelKey is the key of a label. This is used for validation of maps. This matches the Kubernetes "qualified name" validation that is used for labels. Labels are case sensitive, so: my-label and My-Label are considered distinct. + Valid values include: + * example * example.com * example.com/path * example.com/path.html + Invalid values include: + * example~ - "~" is an invalid character * example.com. - can not start or end with "." @@ -360,8 +376,10 @@ of maps. This matches the Kubernetes label validation rules: * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]), * could contain dashes (-), underscores (_), dots (.), and alphanumerics between. + Valid values include: + * MyValue * my.name * 123-my-value @@ -428,7 +446,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `parentRef` _[ObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#objectreference-v1-core)_ | GatewayRef indicates the gateway that observed state of InferencePool. | | | -| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#condition-v1-meta) array_ | Conditions track the state of the InferencePool.
Known condition types are:
* "Accepted"
* "ResolvedRefs" | [map[lastTransitionTime:1970-01-01T00:00:00Z message:Waiting for controller reason:Pending status:Unknown type:Accepted]] | MaxItems: 8
| +| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#condition-v1-meta) array_ | Conditions track the state of the InferencePool.

Known condition types are:

* "Accepted"
* "ResolvedRefs" | [map[lastTransitionTime:1970-01-01T00:00:00Z message:Waiting for controller reason:Pending status:Unknown type:Accepted]] | MaxItems: 8
| #### PortNumber @@ -467,6 +485,6 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `name` _string_ | Name is the name of the adapter or base model, as expected by the ModelServer. | | MaxLength: 253
Required: \{\}
| -| `weight` _integer_ | Weight is used to determine the proportion of traffic that should be
sent to this model when multiple target models are specified.
Weight defines the proportion of requests forwarded to the specified
model. This is computed as weight/(sum of all weights in this
TargetModels list). For non-zero values, there may be some epsilon from
the exact proportion defined here depending on the precision an
implementation supports. Weight is not a percentage and the sum of
weights does not need to equal 100.
If a weight is set for any targetModel, it must be set for all targetModels.
Conversely weights are optional, so long as ALL targetModels do not specify a weight. | | Maximum: 1e+06
Minimum: 1
| +| `weight` _integer_ | Weight is used to determine the proportion of traffic that should be
sent to this model when multiple target models are specified.

Weight defines the proportion of requests forwarded to the specified
model. This is computed as weight/(sum of all weights in this
TargetModels list). For non-zero values, there may be some epsilon from
the exact proportion defined here depending on the precision an
implementation supports. Weight is not a percentage and the sum of
weights does not need to equal 100.

If a weight is set for any targetModel, it must be set for all targetModels.
Conversely weights are optional, so long as ALL targetModels do not specify a weight. | | Maximum: 1e+06
Minimum: 1
| diff --git a/site-src/stylesheets/extra.css b/site-src/stylesheets/extra.css index e42cfb893..ee9e61c2d 100644 --- a/site-src/stylesheets/extra.css +++ b/site-src/stylesheets/extra.css @@ -1,8 +1,13 @@ /* Hide title in favor of logo */ -.md-header__topic { +.md-header__topic .md-ellipsis { display: none; } +/* Ensure the version switcher remains visible */ +.md-header__topic .md-version { + display: block; +} + /* Use Kubernetes color as primary */ :root { --md-primary-fg-color: #326ce5;