diff --git a/CHANGELOG.md b/CHANGELOG.md index 54f6d9db..81a52d45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,8 +14,13 @@ All notable changes to this project will be documented in this file. - Remove ZooKeeper 3.8.4 from tests and docs ([#857]). +### Fixed + +- Failing to parse one `ZookeeperCluster`/`ZookeeperZnode` should no longer cause the whole operator to stop functioning ([#872]). + [#853]: https://github.com/stackabletech/zookeeper-operator/pull/853 [#857]: https://github.com/stackabletech/zookeeper-operator/pull/857 +[#872]: https://github.com/stackabletech/zookeeper-operator/pull/872 ## [24.7.0] - 2024-07-24 diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index 0971dd89..83840b63 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -8,7 +8,11 @@ use stackable_operator::{ apps::v1::StatefulSet, core::v1::{ConfigMap, Endpoints, Service}, }, - kube::runtime::{reflector::ObjectRef, watcher, Controller}, + kube::{ + core::DeserializeGuard, + runtime::{reflector::ObjectRef, watcher, Controller}, + Resource, + }, logging::controller::report_controller_reconciled, CustomResourceExt, }; @@ -71,36 +75,40 @@ async fn main() -> anyhow::Result<()> { let client = stackable_operator::client::create_client(Some(OPERATOR_NAME.to_string())).await?; let zk_controller_builder = Controller::new( - watch_namespace.get_api::(&client), + watch_namespace.get_api::>(&client), watcher::Config::default(), ); let zk_store = zk_controller_builder.store(); let zk_controller = zk_controller_builder .owns( - watch_namespace.get_api::(&client), + watch_namespace.get_api::>(&client), watcher::Config::default(), ) .watches( - watch_namespace.get_api::(&client), + watch_namespace.get_api::>(&client), watcher::Config::default(), move |endpoints| { zk_store .state() .into_iter() .filter(move |zk| { - zk.metadata.namespace == endpoints.metadata.namespace - && zk.server_role_service_name() == endpoints.metadata.name + let Ok(zk) = &zk.0 else { + return false; + }; + let endpoints_meta = endpoints.meta(); + zk.metadata.namespace == endpoints_meta.namespace + && zk.server_role_service_name() == endpoints_meta.name }) .map(|zk| ObjectRef::from_obj(&*zk)) }, ) .owns( - watch_namespace.get_api::(&client), + watch_namespace.get_api::>(&client), watcher::Config::default(), ) .owns( - watch_namespace.get_api::(&client), + watch_namespace.get_api::>(&client), watcher::Config::default(), ) .shutdown_on_signal() @@ -120,25 +128,29 @@ async fn main() -> anyhow::Result<()> { ); }); let znode_controller_builder = Controller::new( - watch_namespace.get_api::(&client), + watch_namespace.get_api::>(&client), watcher::Config::default(), ); let znode_store = znode_controller_builder.store(); let znode_controller = znode_controller_builder .owns( - watch_namespace.get_api::(&client), + watch_namespace.get_api::>(&client), watcher::Config::default(), ) .watches( - watch_namespace.get_api::(&client), + watch_namespace.get_api::>(&client), watcher::Config::default(), move |zk| { znode_store .state() .into_iter() .filter(move |znode| { - zk.metadata.namespace == znode.spec.cluster_ref.namespace - && zk.metadata.name == znode.spec.cluster_ref.name + let Ok(znode) = &znode.0 else { + return false; + }; + let zk_meta = zk.meta(); + zk_meta.namespace == znode.spec.cluster_ref.namespace + && zk_meta.name == znode.spec.cluster_ref.name }) .map(|znode| ObjectRef::from_obj(&*znode)) }, diff --git a/rust/operator-binary/src/zk_controller.rs b/rust/operator-binary/src/zk_controller.rs index 85cf5c9a..dce670fb 100644 --- a/rust/operator-binary/src/zk_controller.rs +++ b/rust/operator-binary/src/zk_controller.rs @@ -38,7 +38,12 @@ use stackable_operator::{ apimachinery::pkg::apis::meta::v1::LabelSelector, DeepMerge, }, - kube::{api::DynamicObject, runtime::controller, Resource}, + kube::{ + api::DynamicObject, + core::{error_boundary, DeserializeGuard}, + runtime::controller, + Resource, + }, kvp::{Label, LabelError, Labels}, logging::controller::ReconcilerError, product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, @@ -89,6 +94,11 @@ type Result = std::result::Result; #[strum_discriminants(derive(IntoStaticStr))] #[allow(clippy::enum_variant_names)] pub enum Error { + #[snafu(display("ZookeeperCluster object is invalid"))] + InvalidZookeeperCluster { + source: error_boundary::InvalidObject, + }, + #[snafu(display("crd validation failure"))] CrdValidationFailure { source: stackable_zookeeper_crd::Error, @@ -253,6 +263,7 @@ impl ReconcilerError for Error { } fn secondary_object(&self) -> Option> { match self { + Error::InvalidZookeeperCluster { source: _ } => None, Error::CrdValidationFailure { .. } => None, Error::NoServerRole => None, Error::RoleParseFailure { .. } => None, @@ -289,8 +300,15 @@ impl ReconcilerError for Error { } } -pub async fn reconcile_zk(zk: Arc, ctx: Arc) -> Result { +pub async fn reconcile_zk( + zk: Arc>, + ctx: Arc, +) -> Result { tracing::info!("Starting reconcile"); + let zk = + zk.0.as_ref() + .map_err(error_boundary::InvalidObject::clone) + .context(InvalidZookeeperClusterSnafu)?; let client = &ctx.client; let resolved_product_image = zk @@ -310,7 +328,7 @@ pub async fn reconcile_zk(zk: Arc, ctx: Arc) -> Result, ctx: Arc) -> Result, ctx: Arc) -> Result, ctx: Arc) -> Result, ctx: Arc) -> Result, ctx: Arc) -> Result, ctx: Arc) -> Result, ctx: Arc) -> Result, ctx: Arc) -> Result, - _error: &Error, + _obj: Arc>, + error: &Error, _ctx: Arc, ) -> controller::Action { - controller::Action::requeue(*Duration::from_secs(5)) + match error { + // root object is invalid, will be requeued when modified anyway + Error::InvalidZookeeperCluster { .. } => controller::Action::await_change(), + + _ => controller::Action::requeue(*Duration::from_secs(5)), + } } #[cfg(test)] diff --git a/rust/operator-binary/src/znode_controller.rs b/rust/operator-binary/src/znode_controller.rs index a6f8867c..e6424fd6 100644 --- a/rust/operator-binary/src/znode_controller.rs +++ b/rust/operator-binary/src/znode_controller.rs @@ -11,7 +11,7 @@ use stackable_operator::{ kube::{ self, api::ObjectMeta, - core::DynamicObject, + core::{error_boundary, DeserializeGuard, DynamicObject}, runtime::{controller, finalizer, reflector::ObjectRef}, Resource, }, @@ -40,6 +40,11 @@ pub struct Ctx { #[strum_discriminants(derive(IntoStaticStr))] #[allow(clippy::enum_variant_names)] pub enum Error { + #[snafu(display("ZookeeperZnode object is invalid"))] + InvalidZookeeperZnode { + source: error_boundary::InvalidObject, + }, + #[snafu(display( "object is missing metadata that should be created by the Kubernetes cluster", ))] @@ -147,6 +152,7 @@ impl ReconcilerError for Error { fn secondary_object(&self) -> Option> { match self { + Error::InvalidZookeeperZnode { source: _ } => None, Error::ObjectMissingMetadata => None, Error::InvalidZkReference => None, Error::FindZk { zk, .. } => Some(zk.clone().erase()), @@ -168,10 +174,15 @@ impl ReconcilerError for Error { } pub async fn reconcile_znode( - znode: Arc, + znode: Arc>, ctx: Arc, ) -> Result { tracing::info!("Starting reconcile"); + let znode = znode + .0 + .as_ref() + .map_err(error_boundary::InvalidObject::clone) + .context(InvalidZookeeperZnodeSnafu)?; let (ns, uid) = if let ObjectMeta { namespace: Some(ns), uid: Some(uid), @@ -184,7 +195,7 @@ pub async fn reconcile_znode( }; let client = &ctx.client; - let zk = find_zk_of_znode(client, &znode).await; + let zk = find_zk_of_znode(client, znode).await; let mut default_status_updates: Option = None; // Store the znode path in the status rather than the object itself, to ensure that only K8s administrators can override it let znode_path = match znode.status.as_ref().and_then(|s| s.znode_path.as_deref()) { @@ -210,7 +221,7 @@ pub async fn reconcile_znode( if let Some(status) = default_status_updates { info!("Writing default configuration to status"); ctx.client - .merge_patch_status(&*znode, &status) + .merge_patch_status(znode, &status) .await .context(ApplyStatusSnafu)?; } @@ -218,7 +229,7 @@ pub async fn reconcile_znode( finalizer( &client.get_api::(&ns), &format!("{OPERATOR_NAME}/znode"), - znode.clone(), + Arc::new(znode.clone()), |ev| async { match ev { finalizer::Event::Apply(znode) => { @@ -381,7 +392,7 @@ async fn find_zk_of_znode( } pub fn error_policy( - _obj: Arc, + _obj: Arc>, _error: &Error, _ctx: Arc, ) -> controller::Action {