1- // pkg/apis/optimizer/v1alpha1/optimizationjob_types.go
2-
31package v1alpha1
42
53import (
@@ -8,78 +6,220 @@ import (
86 runtime "k8s.io/apimachinery/pkg/runtime"
97)
108
9+ // ObjectiveDirection is the optimization direction for an objective metric.
10+ // +kubebuilder:validation:Enum=minimize;maximize
11+ type ObjectiveDirection string
12+
13+ const (
14+ ObjectiveDirectionMinimize ObjectiveDirection = "minimize"
15+ ObjectiveDirectionMaximize ObjectiveDirection = "maximize"
16+ )
17+
18+ // Distribution defines the sampling distribution for a continuous parameter.
19+ // +kubebuilder:validation:Enum=uniform;logUniform;normal;logNormal
20+ type Distribution string
21+
22+ const (
23+ DistributionUniform Distribution = "uniform"
24+ DistributionLogUniform Distribution = "logUniform"
25+ DistributionNormal Distribution = "normal"
26+ DistributionLogNormal Distribution = "logNormal"
27+ )
28+
29+ // OptimizationJobConditionType defines the condition types for an OptimizationJob.
30+ type OptimizationJobConditionType string
31+
32+ const (
33+ OptimizationJobInitializerReady OptimizationJobConditionType = "InitializerReady"
34+ OptimizationJobRunning OptimizationJobConditionType = "Running"
35+ OptimizationJobSucceeded OptimizationJobConditionType = "Succeeded"
36+ OptimizationJobFailed OptimizationJobConditionType = "Failed"
37+ )
38+
1139// Objective defines the metric and goal for the HPO job.
1240type Objective struct {
13- Metric string `json:"metric"`
14- Direction string `json:"direction"`
15- Goal * float64 `json:"goal,omitempty"`
41+ // Metric is the name of the metric to optimize (e.g., "accuracy", "loss").
42+ // +kubebuilder:validation:MinLength=1
43+ Metric string `json:"metric"`
44+
45+ // Direction specifies whether to minimize or maximize the metric.
46+ // +kubebuilder:default=maximize
47+ Direction ObjectiveDirection `json:"direction"`
48+
49+ // Goal is the target value for the metric. When reached, the optimization stops.
50+ // +optional
51+ Goal * float64 `json:"goal,omitempty"`
1652}
1753
1854// Algorithm defines the optimization algorithm configuration.
1955type Algorithm struct {
20- Name string `json:"name"`
21- Settings []SettingKV `json:"settings,omitempty"`
56+ // Name is the optimization algorithm (e.g., "random", "bayesian", "tpe", "cmaes").
57+ // +kubebuilder:validation:MinLength=1
58+ Name string `json:"name"`
59+
60+ // Settings are algorithm-specific key-value parameters.
61+ // +optional
62+ // +listType=map
63+ // +listMapKey=name
64+ Settings []AlgorithmSetting `json:"settings,omitempty"`
2265}
2366
24- // SettingKV is a key-value pair for algorithm settings.
25- type SettingKV struct {
26- Name string `json:"name"`
67+ // AlgorithmSetting is a key-value pair for algorithm configuration.
68+ type AlgorithmSetting struct {
69+ // +kubebuilder:validation:MinLength=1
70+ Name string `json:"name"`
71+
2772 Value string `json:"value"`
2873}
2974
3075// TrialConfig controls the orchestration of the trials.
3176type TrialConfig struct {
32- NumTrials * int32 `json:"num_trials,omitempty"`
33- ParallelTrials * int32 `json:"parallel_trials,omitempty"`
34- MaxFailedTrials * int32 `json:"max_failed_trials,omitempty"`
77+ // NumTrials is the maximum number of trials to run.
78+ // +kubebuilder:validation:Minimum=1
79+ // +optional
80+ NumTrials * int32 `json:"numTrials,omitempty"`
81+
82+ // ParallelTrials is how many trials can run concurrently.
83+ // +kubebuilder:validation:Minimum=1
84+ // +kubebuilder:default=1
85+ // +optional
86+ ParallelTrials * int32 `json:"parallelTrials,omitempty"`
87+
88+ // MaxFailedTrials is the threshold of failures before marking the job as failed.
89+ // +kubebuilder:validation:Minimum=0
90+ // +optional
91+ MaxFailedTrials * int32 `json:"maxFailedTrials,omitempty"`
92+ }
93+
94+ // ParameterSpec defines one hyperparameter and its search domain.
95+ // Exactly one of Continuous, Categorical, or Discrete must be set.
96+ type ParameterSpec struct {
97+ // +kubebuilder:validation:MinLength=1
98+ Name string `json:"name"`
99+
100+ // Continuous defines a float-valued parameter with min/max bounds.
101+ // +optional
102+ Continuous * ContinuousParam `json:"continuous,omitempty"`
103+
104+ // Categorical defines a parameter that takes one of a fixed set of string values.
105+ // +optional
106+ Categorical * CategoricalParam `json:"categorical,omitempty"`
107+
108+ // Discrete defines a parameter that takes one of a fixed set of numeric values.
109+ // +optional
110+ Discrete * DiscreteParam `json:"discrete,omitempty"`
111+ }
112+
113+ // ContinuousParam defines a float-valued search range.
114+ type ContinuousParam struct {
115+ Min float64 `json:"min"`
116+ Max float64 `json:"max"`
117+
118+ // Distribution controls how values are sampled within [min, max].
119+ // +kubebuilder:default=uniform
120+ // +optional
121+ Distribution Distribution `json:"distribution,omitempty"`
122+ }
123+
124+ // CategoricalParam defines a set of allowed string values.
125+ type CategoricalParam struct {
126+ // +kubebuilder:validation:MinItems=1
127+ Choices []string `json:"choices"`
128+ }
129+
130+ // DiscreteParam defines a set of allowed numeric values.
131+ type DiscreteParam struct {
132+ // +kubebuilder:validation:MinItems=1
133+ Values []float64 `json:"values"`
134+ }
135+
136+ // MetricValue holds a single objective metric observation.
137+ type MetricValue struct {
138+ // Metric is the name of the objective metric.
139+ Metric string `json:"metric"`
140+ // Value is the observed value.
141+ Value float64 `json:"value"`
35142}
36143
37144// BestTrial tracks the best performing trial and its metrics.
38145type BestTrial struct {
39- Name string `json:"name"`
40- Value float64 `json:"value"`
146+ // Name is the name of the best-performing Trial / TrainJob.
147+ Name string `json:"name"`
148+
149+ // Metrics are the observed objective metric values for this trial.
150+ Metrics []MetricValue `json:"metrics"`
151+
152+ // OptimalParameters is the map of hyperparameter names to the values used by this trial.
153+ // +optional
154+ OptimalParameters map [string ]string `json:"optimalParameters,omitempty"`
41155}
42156
43157// OptimizationJobSpec defines the desired state of OptimizationJob.
44158type OptimizationJobSpec struct {
159+ // Objectives defines the metrics to optimize, their direction, and optional goal.
160+ // +kubebuilder:validation:MinItems=1
45161 Objectives []Objective `json:"objectives"`
46- Algorithm Algorithm `json:"algorithm"`
47162
48- // Using map[string]string initially, can be refined to strict types later if needed.
49- SearchSpace map [string ]string `json:"searchSpace"`
163+ // Algorithm specifies the HPO algorithm and its settings.
164+ Algorithm Algorithm `json:"algorithm"`
165+
166+ // SearchSpace defines the hyperparameter boundaries.
167+ // +kubebuilder:validation:MinItems=1
168+ // +listType=map
169+ // +listMapKey=name
170+ SearchSpace []ParameterSpec `json:"searchSpace"`
50171
172+ // TrialConfig controls parallelism, trial limits, and failure thresholds.
51173 TrialConfig TrialConfig `json:"trialConfig"`
52174
175+ // Initializer runs once before any trials to download shared artifacts (models, datasets)
176+ // and stores them on a PVC that is mounted into every trial's TrainJob.
177+ // +optional
53178 Initializer * trainerv1alpha1.Initializer `json:"initializer,omitempty"`
54179
55- // Tighter TrainJob Integration: Strongly typed to TrainJob rather than arbitrary CRDs .
56- // runtime.RawExtension allows embedding the raw TrainJob Kubernetes object .
180+ // TrialTemplate is the TrainJob manifest used as the template for each trial .
181+ // The controller substitutes search-space values using ${searchSpace.<paramName>} placeholders .
57182 // +kubebuilder:pruning:PreserveUnknownFields
58183 TrialTemplate runtime.RawExtension `json:"trialTemplate"`
59184}
60185
61186// OptimizationJobStatus defines the observed state of OptimizationJob.
62187type OptimizationJobStatus struct {
63- // Conditions track the overall lifecycle of the OptimizationJob (e.g., Created, Running, Succeeded, Failed).
188+ // Conditions track the overall lifecycle of the OptimizationJob.
189+ // Known condition types: InitializerReady, Running, Succeeded, Failed.
190+ // +optional
191+ // +listType=map
192+ // +listMapKey=type
64193 Conditions []metav1.Condition `json:"conditions,omitempty"`
65194
66- // Active is the number of currently running trials.
67- Active int32 `json:"active,omitempty"`
195+ // StartTime is when the OptimizationJob controller first started processing this resource.
196+ // +optional
197+ StartTime * metav1.Time `json:"startTime,omitempty"`
68198
69- // Succeeded is the number of trials that successfully completed.
70- Succeeded int32 `json:"succeeded,omitempty"`
199+ // CompletionTime is when all trials finished (succeeded or hit failure threshold).
200+ // +optional
201+ CompletionTime * metav1.Time `json:"completionTime,omitempty"`
71202
72- // Failed is the number of trials that failed.
73- Failed int32 `json:"failed,omitempty"`
203+ // Trial counters.
204+ Active int32 `json:"active,omitempty"`
205+ Succeeded int32 `json:"succeeded,omitempty"`
206+ Failed int32 `json:"failed,omitempty"`
74207
75- // BestTrial holds the information about the best performing trial so far.
208+ // BestTrial holds the best performing trial observed so far.
209+ // +optional
76210 BestTrial * BestTrial `json:"bestTrial,omitempty"`
77211}
78212
79213// +genclient
80214// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
81215// +kubebuilder:object:root=true
82216// +kubebuilder:subresource:status
217+ // +kubebuilder:resource:shortName=optjob
218+ // +kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status.conditions[?(@.status=="True")].type`
219+ // +kubebuilder:printcolumn:name="Best Metric",type=string,JSONPath=`.status.bestTrial.metrics[0].value`
220+ // +kubebuilder:printcolumn:name="Succeeded",type=integer,JSONPath=`.status.succeeded`
221+ // +kubebuilder:printcolumn:name="Failed",type=integer,JSONPath=`.status.failed`
222+ // +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
83223
84224// OptimizationJob is the Schema for the optimizationjobs API.
85225type OptimizationJob struct {
0 commit comments