Skip to content

Commit 6c67982

Browse files
Patrick Schwagerpschwager
authored andcommitted
Add argo rollouts support
1 parent e7c6f72 commit 6c67982

File tree

10 files changed

+383
-10
lines changed

10 files changed

+383
-10
lines changed

src/main/helm/templates/deployment.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ spec:
6161
{{- with .Values.env }}
6262
{{- toYaml . | nindent 12 }}
6363
{{- end }}
64+
{{- if eq .Values.argoRolloutSupport.enabled true }}
65+
- name: lmos.operator.rollout.enabled
66+
value: "true"
67+
{{- end }}
6468
{{- if eq .Values.embeddingEnabled true }}
6569
envFrom:
6670
- configMapRef:

src/main/helm/values.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ embeddingStoreTlsEnabled: false
144144
embeddingBlacklistTenants: []
145145
embeddingBlacklistChannels: []
146146

147+
# Set to true if argo rollout support should be enabled.
148+
argoRolloutSupport:
149+
enabled: false
150+
147151
# Set to true if the secret containing the LLM API key should be created by helm.
148152
# If set to false, a secret named <secretName> must be available in the cluster.
149153
# If set to true, a secret named <secretName> will be created.

src/main/kotlin/org/eclipse/lmos/operator/reconciler/AgentDeploymentReconciler.kt

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,10 @@ import org.slf4j.LoggerFactory
2121
import org.springframework.stereotype.Component
2222
import java.util.concurrent.TimeUnit
2323

24-
private const val WELL_KNOWN_AGENT_SPEC_ENDPOINT = ".well-known/capabilities.json"
2524
private const val DEPLOYMENT_NOT_READY_RECONCILE_INTERVAL_SECONDS = 10L
26-
private const val ERROR_RETRY_INITIAL_INTERVAL_MS = 5000L
27-
private const val ERROR_RETRY_INTERVAL_MULTIPLIER = 1.5
28-
private const val ERROR_RETRY_MAX_ATTEMPTS = 3
2925

3026
@Component
31-
@ControllerConfiguration(labelSelector = "lmos-agent=true")
27+
@ControllerConfiguration(labelSelector = LABEL_SELECTOR)
3228
@GradualRetry(
3329
initialInterval = ERROR_RETRY_INITIAL_INTERVAL_MS,
3430
intervalMultiplier = ERROR_RETRY_INTERVAL_MULTIPLIER,

src/main/kotlin/org/eclipse/lmos/operator/reconciler/AgentEmbeddingReconciler.kt

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,8 @@ import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty
2626
import org.springframework.boot.context.properties.ConfigurationProperties
2727
import org.springframework.stereotype.Component
2828

29-
private const val ERROR_RETRY_INITIAL_INTERVAL_MS = 5000L
30-
private const val ERROR_RETRY_INTERVAL_MULTIPLIER = 1.5
31-
private const val ERROR_RETRY_MAX_ATTEMPTS = 3
32-
3329
@Component
34-
@ControllerConfiguration(labelSelector = "lmos-agent=true")
30+
@ControllerConfiguration(labelSelector = LABEL_SELECTOR)
3531
@GradualRetry(
3632
initialInterval = ERROR_RETRY_INITIAL_INTERVAL_MS,
3733
intervalMultiplier = ERROR_RETRY_INTERVAL_MULTIPLIER,
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2025 Deutsche Telekom AG and others
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
package org.eclipse.lmos.operator.reconciler
8+
9+
import io.javaoperatorsdk.operator.api.reconciler.Cleaner
10+
import io.javaoperatorsdk.operator.api.reconciler.Context
11+
import io.javaoperatorsdk.operator.api.reconciler.ControllerConfiguration
12+
import io.javaoperatorsdk.operator.api.reconciler.DeleteControl
13+
import io.javaoperatorsdk.operator.api.reconciler.Reconciler
14+
import io.javaoperatorsdk.operator.api.reconciler.UpdateControl
15+
import io.javaoperatorsdk.operator.processing.retry.GradualRetry
16+
import org.eclipse.lmos.operator.reconciler.client.AgentClient
17+
import org.eclipse.lmos.operator.reconciler.generator.AgentGenerator
18+
import org.eclipse.lmos.operator.reconciler.k8s.KubernetesResourceManager
19+
import org.eclipse.lmos.operator.resources.RolloutResource
20+
import org.slf4j.LoggerFactory
21+
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty
22+
import org.springframework.stereotype.Component
23+
import java.util.concurrent.TimeUnit
24+
25+
private const val ROLLOUT_NOT_READY_RECONCILE_INTERVAL_SECONDS = 10L
26+
27+
@Component
28+
@ControllerConfiguration(labelSelector = LABEL_SELECTOR)
29+
@GradualRetry(
30+
initialInterval = ERROR_RETRY_INITIAL_INTERVAL_MS,
31+
intervalMultiplier = ERROR_RETRY_INTERVAL_MULTIPLIER,
32+
maxAttempts = ERROR_RETRY_MAX_ATTEMPTS,
33+
)
34+
@ConditionalOnProperty(
35+
prefix = "lmos.operator.rollout",
36+
name = ["enabled"],
37+
havingValue = "true",
38+
matchIfMissing = false,
39+
)
40+
class AgentRolloutReconciler(
41+
private val kubernetesResourceManager: KubernetesResourceManager,
42+
private val agentClient: AgentClient,
43+
) : Reconciler<RolloutResource>,
44+
Cleaner<RolloutResource> {
45+
private val log = LoggerFactory.getLogger(javaClass)
46+
47+
override fun reconcile(
48+
rollout: RolloutResource,
49+
context: Context<RolloutResource>,
50+
): UpdateControl<RolloutResource> {
51+
val rolloutReady = kubernetesResourceManager.isRolloutReady(rollout)
52+
if (rolloutReady) {
53+
log.info("Rollout reconcile: Create agent resource for rollout '{}'.", rollout.metadata.name)
54+
try {
55+
val agentSpecUrl = kubernetesResourceManager.getServiceUrl(rollout, WELL_KNOWN_AGENT_SPEC_ENDPOINT)
56+
val agentSpec = agentClient.get(agentSpecUrl, AgentSpecification::class.java)
57+
val agentResource = AgentGenerator.createAgentResource(rollout, agentSpec)
58+
kubernetesResourceManager.createAgentResource(agentResource)
59+
log.info("Creating agent resource '{}' in namespace '{}'.", agentResource.metadata.name, agentResource.metadata.namespace)
60+
return UpdateControl.noUpdate()
61+
} catch (e: Exception) {
62+
throw IllegalStateException("Failed to create agent resource for rollout '${rollout.metadata.name}'.", e)
63+
}
64+
}
65+
return UpdateControl.noUpdate<RolloutResource>().rescheduleAfter(ROLLOUT_NOT_READY_RECONCILE_INTERVAL_SECONDS, TimeUnit.SECONDS)
66+
}
67+
68+
override fun cleanup(
69+
rollout: RolloutResource,
70+
context: Context<RolloutResource?>?,
71+
): DeleteControl {
72+
log.info("Rollout cleanup: Delete agent resource '{}' due to reconcile.", rollout.metadata.name)
73+
kubernetesResourceManager.deleteAgentResource(rollout)
74+
return DeleteControl.defaultDelete()
75+
}
76+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2025 Deutsche Telekom AG and others
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
package org.eclipse.lmos.operator.reconciler
8+
9+
const val LABEL_SELECTOR = "lmos-agent=true"
10+
11+
const val WELL_KNOWN_AGENT_SPEC_ENDPOINT = ".well-known/capabilities.json"
12+
13+
const val ERROR_RETRY_INITIAL_INTERVAL_MS = 5000L
14+
const val ERROR_RETRY_INTERVAL_MULTIPLIER = 1.5
15+
const val ERROR_RETRY_MAX_ATTEMPTS = 3

src/main/kotlin/org/eclipse/lmos/operator/reconciler/generator/AgentGenerator.kt

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import org.eclipse.lmos.operator.reconciler.AgentSpecification
1414
import org.eclipse.lmos.operator.resources.AgentResource
1515
import org.eclipse.lmos.operator.resources.AgentSpec
1616
import org.eclipse.lmos.operator.resources.ProvidedCapability
17+
import org.eclipse.lmos.operator.resources.RolloutResource
1718

1819
const val DEPLOYMENT_LABEL_KEY_AGENT = "lmos-agent"
1920

@@ -55,4 +56,42 @@ object AgentGenerator {
5556

5657
return agentResource
5758
}
59+
60+
fun createAgentResource(
61+
rollout: RolloutResource,
62+
agentSpecification: AgentSpecification,
63+
): AgentResource {
64+
val subset = rollout.metadata.labels[DEPLOYMENT_SUBSET_LABEL_KEY] ?: DEPLOYMENT_SUBSET_LABEL_DEFAULT_VALUE
65+
val agentMetadata =
66+
ObjectMetaBuilder()
67+
.withName(rollout.metadata.name)
68+
.withNamespace(rollout.metadata.namespace)
69+
.addToLabels(DEPLOYMENT_LABEL_KEY_AGENT, "true")
70+
.addToLabels(DEPLOYMENT_SUBSET_LABEL_KEY, subset)
71+
.build()
72+
73+
val agentSpec =
74+
AgentSpec(
75+
id = rollout.metadata.name,
76+
description = agentSpecification.description,
77+
supportedTenants = agentSpecification.supportedTenants,
78+
supportedChannels = agentSpecification.supportedChannels,
79+
providedCapabilities =
80+
agentSpecification.capabilities
81+
.map {
82+
ProvidedCapability(
83+
id = it.id,
84+
name = it.name,
85+
version = it.version,
86+
description = it.description,
87+
examples = it.examples,
88+
)
89+
}.toSet(),
90+
)
91+
val agentResource = AgentResource()
92+
agentResource.metadata = agentMetadata
93+
agentResource.spec = agentSpec
94+
95+
return agentResource
96+
}
5897
}

src/main/kotlin/org/eclipse/lmos/operator/reconciler/k8s/KubernetesResourceManager.kt

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import io.fabric8.kubernetes.api.model.StatusDetails
1111
import io.fabric8.kubernetes.api.model.apps.Deployment
1212
import io.fabric8.kubernetes.client.KubernetesClient
1313
import org.eclipse.lmos.operator.resources.AgentResource
14+
import org.eclipse.lmos.operator.resources.RolloutResource
1415
import org.slf4j.LoggerFactory
1516

1617
@org.springframework.stereotype.Service
@@ -34,6 +35,16 @@ class KubernetesResourceManager(
3435
log.info("Deleted Agent Resources for deployment '{}', status '{}'.", deployment.metadata.name, deleteStatus)
3536
}
3637

38+
fun deleteAgentResource(rollout: RolloutResource) {
39+
val deleteStatus: List<StatusDetails> =
40+
kubernetesClient
41+
.resources(AgentResource::class.java)
42+
.inNamespace(rollout.metadata.namespace)
43+
.withName(rollout.metadata.name)
44+
.delete()
45+
log.info("Deleted Agent Resources for rollout '{}', status '{}'.", rollout.metadata.name, deleteStatus)
46+
}
47+
3748
fun isDeploymentReady(deployment: Deployment): Boolean {
3849
val replicas = deployment.status.replicas
3950
val desiredReplicas = deployment.spec.replicas
@@ -46,6 +57,19 @@ class KubernetesResourceManager(
4657
)
4758
}
4859

60+
fun isRolloutReady(rollout: RolloutResource): Boolean {
61+
val replicas = rollout.status?.replicas
62+
val desiredReplicas = rollout.spec?.replicas
63+
val availableReplicas = rollout.status?.availableReplicas
64+
return (
65+
replicas != null &&
66+
desiredReplicas != null &&
67+
availableReplicas != null &&
68+
replicas == desiredReplicas &&
69+
availableReplicas == desiredReplicas
70+
)
71+
}
72+
4973
fun getServiceUrl(
5074
deployment: Deployment,
5175
path: String,
@@ -59,6 +83,19 @@ class KubernetesResourceManager(
5983
}.apply { log.info("Determined service URL: $this") }
6084
}
6185

86+
fun getServiceUrl(
87+
rollout: RolloutResource,
88+
path: String,
89+
): String {
90+
val service = findService(rollout)
91+
val baseUrl = getBaseUrl(service)
92+
return if (path.startsWith("/")) {
93+
"$baseUrl$path"
94+
} else {
95+
"$baseUrl/$path"
96+
}.apply { log.info("Determined service URL: $this") }
97+
}
98+
6299
private fun findService(deployment: Deployment): Service {
63100
val deploymentPodLabels =
64101
deployment.spec
@@ -90,6 +127,37 @@ class KubernetesResourceManager(
90127
return matchingServices.first()
91128
}
92129

130+
private fun findService(rollout: RolloutResource): Service {
131+
val rolloutPodLabels =
132+
rollout.spec
133+
?.template
134+
?.metadata
135+
?.labels ?: emptyMap()
136+
137+
val servicesInNamespace =
138+
kubernetesClient
139+
.services()
140+
.inNamespace(rollout.metadata.namespace)
141+
.list()
142+
143+
val matchingServices =
144+
servicesInNamespace.items.filter { service ->
145+
val selectors = service.spec?.selector
146+
if (selectors.isNullOrEmpty()) return@filter false
147+
selectors.all { (key, value) -> rolloutPodLabels[key] == value }
148+
}
149+
150+
if (matchingServices.size != 1) {
151+
val name = rollout.metadata.name
152+
log.error(
153+
"Expected exactly one service for rollout $name, but got ${matchingServices.size}; $matchingServices",
154+
)
155+
throw IllegalStateException("Expected exactly one service for rollout $name, but got ${matchingServices.size}")
156+
}
157+
158+
return matchingServices.first()
159+
}
160+
93161
private fun getBaseUrl(service: Service): String {
94162
val ports = service.spec.ports
95163

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2025 Deutsche Telekom AG and others
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
package org.eclipse.lmos.operator.resources
8+
9+
import io.fabric8.kubernetes.api.model.LabelSelector
10+
import io.fabric8.kubernetes.api.model.Namespaced
11+
import io.fabric8.kubernetes.api.model.PodTemplateSpec
12+
import io.fabric8.kubernetes.client.CustomResource
13+
import io.fabric8.kubernetes.model.annotation.Group
14+
import io.fabric8.kubernetes.model.annotation.Kind
15+
import io.fabric8.kubernetes.model.annotation.Plural
16+
import io.fabric8.kubernetes.model.annotation.Singular
17+
import io.fabric8.kubernetes.model.annotation.Version
18+
19+
@Group("argoproj.io")
20+
@Version("v1alpha1")
21+
@Plural("rollouts")
22+
@Singular("rollout")
23+
@Kind("Rollout")
24+
class RolloutResource :
25+
CustomResource<RolloutSpec, RolloutStatus>(),
26+
Namespaced
27+
28+
data class RolloutSpec(
29+
var replicas: Int? = null,
30+
var selector: LabelSelector? = null,
31+
var template: PodTemplateSpec? = null,
32+
)
33+
34+
data class RolloutStatus(
35+
var replicas: Int? = null,
36+
var availableReplicas: Int? = null,
37+
var readyReplicas: Int? = null,
38+
var updatedReplicas: Int? = null,
39+
var phase: String? = null,
40+
)

0 commit comments

Comments
 (0)