Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
287 changes: 287 additions & 0 deletions apix/v1/inferencepool_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
/*
Copyright 2025 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// InferencePool is the Schema for the InferencePools API.
//
// +kubebuilder:object:root=true
// TODO: change the annotation once it gets officially approved
// +kubebuilder:metadata:annotations="api-approved.kubernetes.io=unapproved, experimental-only"
// +kubebuilder:resource:shortName=infpool
// +kubebuilder:subresource:status
// +kubebuilder:storageversion
// +genclient
type InferencePool struct {
metav1.TypeMeta `json:",inline"`

// +optional
metav1.ObjectMeta `json:"metadata,omitempty"`

// +required
Spec InferencePoolSpec `json:"spec,omitzero"`

// Status defines the observed state of InferencePool.
//
// +kubebuilder:default={parent: {{parentRef: {kind: "Status", name: "default"}, conditions: {{type: "Accepted", status: "Unknown", reason: "Pending", message: "Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}}}}
// +optional
Status InferencePoolStatus `json:"status,omitzero"`
}

// InferencePoolList contains a list of InferencePool.
//
// +kubebuilder:object:root=true
type InferencePoolList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []InferencePool `json:"items"`
}

// InferencePoolSpec defines the desired state of InferencePool
type InferencePoolSpec struct {
// Selector determines which Pods are members of this inference pool.
// It matches Pods by their labels only within the same namespace; cross-namespace
// selection is not supported.
//
// The structure of this LabelSelector is intentionally simple to be compatible
// with Kubernetes Service selectors, as some implementations may translate
// this configuration into a Service resource.
//
// +required
Selector LabelSelector `json:"selector,omitempty,omitzero"`

// TargetPorts defines a list of ports that are exposed by this InferencePool.
// Currently, the list may only include a single port definition.
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=1
// +listType=atomic
// +required
TargetPorts []Port `json:"targetPorts,omitempty"`

// Extension configures an endpoint picker as an extension service.
// +required
ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"`
}

// Port defines the network port that will be exposed by this InferencePool.
type Port struct {
// Number defines the port number to access the selected model server Pods.
// The number must be in the range 1 to 65535.
//
// +required
Number PortNumber `json:"number,omitempty"`
}

// Extension specifies how to configure an extension that runs the endpoint picker.
type Extension struct {
// Group is the group of the referent.
// The default value is "", representing the Core API group.
//
// +optional
// +kubebuilder:default=""
Group *Group `json:"group,omitempty"`

// Kind is the Kubernetes resource kind of the referent.
//
// Defaults to "Service" when not specified.
//
// ExternalName services can refer to CNAME DNS records that may live
// outside of the cluster and as such are difficult to reason about in
// terms of conformance. They also may not be safe to forward to (see
// CVE-2021-25740 for more information). Implementations MUST NOT
// support ExternalName Services.
//
// +optional
// +kubebuilder:default=Service
Kind Kind `json:"kind,omitempty"`

// Name is the name of the referent.
//
// +required
Name ObjectName `json:"name,omitempty"`

// The port number on the service running the extension. When unspecified,
// implementations SHOULD infer a default value of 9002 when the Kind is
// Service.
//
// +optional
//nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer here as 0 usually means all ports.
PortNumber *PortNumber `json:"portNumber,omitempty"`
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why "PortNumber" vs "Port" ?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will we need protocol here eventually? Did we discuss this one?

Why not use the same Port struct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@robscott Do you have any context about this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I think the ideal would be to mirror BackendObjectReference in GW API where we have a port field with the same validation instead of portNumber. I think this started as portNumber to avoid any confusion over portName, but given that we've used port successfully for years in GW API without any issues or need for struct/other fields, I'd expect that to work well here as well.

I'm a bit split on whether or not we should make the change from portNumber to port at this late stage. Will defer to @danehans, @kfswain, and @nirrozenbaum.

Copy link
Contributor

@nirrozenbaum nirrozenbaum Aug 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m fine with this change. portNumber was introduced only in v1 (different field exists in v1alpha2) and as long as the api has not been finalized, that’s ok to change.
we do need to keep in mind that v1 rc1 was already cut, but if we feel strongly that this is an improvement let’s go for it.

Copy link
Contributor Author

@capri-xiyue capri-xiyue Aug 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the PR of approach 3 to make two different ports symmetric #1484

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why "PortNumber" vs "Port" ?

To reduce the nesting within EndpointPickerRef. To your point, if named ports and app protocol are anticipated future requirements, then +1 to use type Port instead of PortNumber. Currently, this spec defines the Gateway <> EPP protocol as ext-proc (gRPC), so I'm unsure if any additional app protocols are needed. Thoughts @robscott @kfswain @nirrozenbaum

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've updated PR to reflect the change. Let me know whether it works now or not.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, +1. I think further app protocols may be useful at some point in the future, but it's not necessary now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned by @capri-xiyue, this has been addressed by #1484.


// Configures how the gateway handles the case when the extension is not responsive.
// Defaults to failClose.
//
// +optional
// +kubebuilder:default="FailClose"
FailureMode ExtensionFailureMode `json:"failureMode,omitempty"`
}

// ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not
// responsive.
// +kubebuilder:validation:Enum=FailOpen;FailClose
type ExtensionFailureMode string

const (
// FailOpen specifies that the proxy should forward the request to an endpoint of its picking when the Endpoint Picker fails.
FailOpen ExtensionFailureMode = "FailOpen"
// FailClose specifies that the proxy should drop the request when the Endpoint Picker fails.
FailClose ExtensionFailureMode = "FailClose"
)

// InferencePoolStatus defines the observed state of InferencePool.
// +kubebuilder:validation:MinProperties=1
type InferencePoolStatus struct {
// Parents is a list of parent resources (usually Gateways) that are
// associated with the InferencePool, and the status of the InferencePool with respect to
// each parent.
//
// A maximum of 32 Gateways will be represented in this list. When the list contains
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not understand the comment about "kind: Status, name: default" -- where are those fields? They are not in PoolStatus.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also don't understand why you need a canary entry to signal "nothing to see" -- why is an empty list not sufficient? I find the imposition on the controllers to be very awkward.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was copied from Gateway API. Generally k8s users don't seem to treat the absence of status as a bad thing, so they may not realize their InferencePool is not being used/referenced.

This is particularly useful in Gateways where you made a typo on gatewayClass and nothing picks up/implements your Gateway. Having a baseline signal that a Gateway is waiting for a controller to pick it up is useful.

A similar pattern is useful for Routes if you make a typo referring to a Gateway - there's no controller empowered to warn you of this since each Gateway controller only populates status on Routes that are attached to their Gateway. If there's a reference to a nonexistent Gateway, there's nothing to warn you about that other than some kind of default status.

With that said, this doesn't seem quite as useful on InferencePool. If someone makes a typo when pointing a route to an InferencePool, they'll get a warning in the status of that Route. This status can be useful in indicating that the InferencePool has not actually been implemented yet, but an empty list may suffice here.

cc @danehans

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do like the explicitness of the current approach, but I don't have a strong opinion here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd make it as resolved for now seems the majority is ok with the current approach. Feel free to re-open it if further discussion is needed.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this very brittle and awkward. Doesn't this argue instead for something like an MAP which detects an empty list and inserts the canary?

Copy link
Contributor

@danehans danehans Aug 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

xref kubernetes-sigs/gateway-api#3738 for adding a similar default status condition to HTTPRoute.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a quick update from Slack - we discussed this and decided to remove the defaulting for now. In the corresponding Gateway issue we've been looking into MAP as a potential solution as well. Although it's not clear that will work as we'd like, I think it makes sense to hold off on this until we have more time to look into a MAP-based solution.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can I resolve this with the agreement to remove the defaulting for now?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#1427 is removing the defaulting.

// `kind: Status, name: default`, it indicates that the InferencePool is not
// associated with any Gateway and a controller must perform the following:
//
// - Remove the parent when setting the "Accepted" condition.
// - Add the parent when the controller will no longer manage the InferencePool
// and no other parents exist.
//
// +kubebuilder:validation:MaxItems=32
// +optional
// +listType=atomic
Parents []PoolStatus `json:"parent,omitempty"`
}

// PoolStatus defines the observed state of InferencePool from a Gateway.
type PoolStatus struct {
// Conditions track the state of the InferencePool.
//
// Known condition types are:
//
// * "Accepted"
// * "ResolvedRefs"
//
// +optional
// +listType=map
// +listMapKey=type
// +kubebuilder:validation:MaxItems=8
// +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
Conditions []metav1.Condition `json:"conditions,omitempty"`

// GatewayRef indicates the gateway that observed state of InferencePool.
// +required
GatewayRef ParentGatewayReference `json:"parentRef,omitzero"`
}

// InferencePoolConditionType is a type of condition for the InferencePool
type InferencePoolConditionType string

// InferencePoolReason is the reason for a given InferencePoolConditionType
type InferencePoolReason string

const (
// This condition indicates whether the InferencePool has been accepted or rejected
// by a Gateway, and why.
//
// Possible reasons for this condition to be True are:
//
// * "Accepted"
//
// Possible reasons for this condition to be False are:
//
// * "NotSupportedByGateway"
// * "HTTPRouteNotAccepted"
//
// Possible reasons for this condition to be Unknown are:
//
// * "Pending"
//
// Controllers MAY raise this condition with other reasons, but should
// prefer to use the reasons listed above to improve interoperability.
InferencePoolConditionAccepted InferencePoolConditionType = "Accepted"

// This reason is used with the "Accepted" condition when the InferencePool has been
// accepted by the Gateway.
InferencePoolReasonAccepted InferencePoolReason = "Accepted"

// This reason is used with the "Accepted" condition when the InferencePool
// has not been accepted by a Gateway because the Gateway does not support
// InferencePool as a backend.
InferencePoolReasonNotSupportedByGateway InferencePoolReason = "NotSupportedByGateway"

// This reason is used with the "Accepted" condition when the InferencePool is
// referenced by an HTTPRoute that has been rejected by the Gateway. The user
// should inspect the status of the referring HTTPRoute for the specific reason.
InferencePoolReasonHTTPRouteNotAccepted InferencePoolReason = "HTTPRouteNotAccepted"

// This reason is used with the "Accepted" when a controller has not yet
// reconciled the InferencePool.
InferencePoolReasonPending InferencePoolReason = "Pending"
)

const (
// This condition indicates whether the controller was able to resolve all
// the object references for the InferencePool.
//
// Possible reasons for this condition to be True are:
//
// * "ResolvedRefs"
//
// Possible reasons for this condition to be False are:
//
// * "InvalidExtensionRef"
//
// Controllers MAY raise this condition with other reasons, but should
// prefer to use the reasons listed above to improve interoperability.
InferencePoolConditionResolvedRefs InferencePoolConditionType = "ResolvedRefs"

// This reason is used with the "ResolvedRefs" condition when the condition
// is true.
InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs"

// This reason is used with the "ResolvedRefs" condition when the
// Extension is invalid in some way. This can include an unsupported kind
// or API group, or a reference to a resource that can not be found.
InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef"
)

// ParentGatewayReference identifies an API object including its namespace,
// defaulting to Gateway.
type ParentGatewayReference struct {
// Group is the group of the referent.
//
// +optional
// +kubebuilder:default="gateway.networking.k8s.io"
Group *Group `json:"group,omitempty"`

// Kind is kind of the referent. For example "Gateway".
//
// +optional
// +kubebuilder:default=Gateway
Kind Kind `json:"kind,omitempty"`

// Name is the name of the referent.
// +required
Name ObjectName `json:"name,omitempty"`

// Namespace is the namespace of the referent. If not present,
// the namespace of the referent is assumed to be the same as
// the namespace of the referring object.
//
// +optional
Namespace Namespace `json:"namespace,omitempty"`
}
Loading