|
1 | 1 | package main
|
2 | 2 |
|
3 | 3 | import (
|
| 4 | + "strings" |
4 | 5 | "time"
|
5 | 6 |
|
6 | 7 | "context"
|
7 | 8 | "errors"
|
8 | 9 | "fmt"
|
| 10 | + |
9 | 11 | "github.com/golang/glog"
|
10 | 12 | osconfigv1 "github.com/openshift/api/config/v1"
|
| 13 | + caov1alpha1 "github.com/openshift/cluster-autoscaler-operator/pkg/apis/autoscaling/v1alpha1" |
11 | 14 | cvoresourcemerge "github.com/openshift/cluster-version-operator/lib/resourcemerge"
|
12 | 15 | kappsapi "k8s.io/api/apps/v1"
|
| 16 | + batchv1 "k8s.io/api/batch/v1" |
13 | 17 | corev1 "k8s.io/api/core/v1"
|
| 18 | + resource "k8s.io/apimachinery/pkg/api/resource" |
| 19 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
14 | 20 | "k8s.io/apimachinery/pkg/types"
|
15 | 21 | "k8s.io/apimachinery/pkg/util/sets"
|
16 | 22 | "k8s.io/apimachinery/pkg/util/wait"
|
@@ -340,3 +346,265 @@ MachineLoop:
|
340 | 346 | }
|
341 | 347 | return nil
|
342 | 348 | }
|
| 349 | + |
| 350 | +// ExpectAutoscalerScalesOut is an smoke test for the autoscaling feature |
| 351 | +// Create a clusterAutoscaler object |
| 352 | +// Create a machineAutoscaler object |
| 353 | +// Create a workLoad to force autoscaling |
| 354 | +// Validate the targeted machineSet scales out the field for the expected number of replicas |
| 355 | +// Validate the number of nodes in the cluster is growing |
| 356 | +// Delete the workLoad |
| 357 | +// Delete the autoscaler object |
| 358 | +// Ensure initial number of replicas and nodes |
| 359 | +func (tc *testConfig) ExpectAutoscalerScalesOut() error { |
| 360 | + listOptions := client.ListOptions{ |
| 361 | + Namespace: namespace, |
| 362 | + } |
| 363 | + glog.Info("Get one machineSet") |
| 364 | + machineSetList := capiv1alpha1.MachineSetList{} |
| 365 | + err := wait.PollImmediate(1*time.Second, waitMedium, func() (bool, error) { |
| 366 | + if err := tc.client.List(context.TODO(), &listOptions, &machineSetList); err != nil { |
| 367 | + glog.Errorf("error querying api for nodeList object: %v, retrying...", err) |
| 368 | + return false, nil |
| 369 | + } |
| 370 | + return len(machineSetList.Items) > 0, nil |
| 371 | + }) |
| 372 | + if err != nil { |
| 373 | + return err |
| 374 | + } |
| 375 | + |
| 376 | + // When we add support for machineDeployments on the installer, cluster-autoscaler and cluster-autoscaler-operator |
| 377 | + // we need to test against deployments instead so we skip this test. |
| 378 | + targetMachineSet := machineSetList.Items[0] |
| 379 | + if ownerReferences := targetMachineSet.GetOwnerReferences(); len(ownerReferences) > 0 { |
| 380 | + glog.Infof("MachineSet %s is owned by a machineDeployment. Please run tests agains machineDeployment instead", targetMachineSet.Name) |
| 381 | + return nil |
| 382 | + } |
| 383 | + |
| 384 | + glog.Infof("Create ClusterAutoscaler and MachineAutoscaler objects. Targeting machineSet %s", targetMachineSet.Name) |
| 385 | + initialNumberOfReplicas := targetMachineSet.Spec.Replicas |
| 386 | + clusterAutoscaler := caov1alpha1.ClusterAutoscaler{ |
| 387 | + ObjectMeta: metav1.ObjectMeta{ |
| 388 | + Name: "default", |
| 389 | + Namespace: namespace, |
| 390 | + }, |
| 391 | + TypeMeta: metav1.TypeMeta{ |
| 392 | + Kind: "ClusterAutoscaler", |
| 393 | + APIVersion: "autoscaling.openshift.io/v1alpha1", |
| 394 | + }, |
| 395 | + } |
| 396 | + machineAutoscaler := caov1alpha1.MachineAutoscaler{ |
| 397 | + ObjectMeta: metav1.ObjectMeta{ |
| 398 | + GenerateName: fmt.Sprintf("autoscale-%s", targetMachineSet.Name), |
| 399 | + Namespace: namespace, |
| 400 | + }, |
| 401 | + TypeMeta: metav1.TypeMeta{ |
| 402 | + Kind: "MachineAutoscaler", |
| 403 | + APIVersion: "autoscaling.openshift.io/v1alpha1", |
| 404 | + }, |
| 405 | + Spec: caov1alpha1.MachineAutoscalerSpec{ |
| 406 | + MaxReplicas: 12, |
| 407 | + MinReplicas: 1, |
| 408 | + ScaleTargetRef: caov1alpha1.CrossVersionObjectReference{ |
| 409 | + Name: targetMachineSet.Name, |
| 410 | + Kind: "MachineSet", |
| 411 | + APIVersion: "cluster.k8s.io/v1alpha1", |
| 412 | + }, |
| 413 | + }, |
| 414 | + } |
| 415 | + err = wait.PollImmediate(1*time.Second, waitMedium, func() (bool, error) { |
| 416 | + if err := tc.client.Create(context.TODO(), &clusterAutoscaler); err != nil { |
| 417 | + if !strings.Contains(err.Error(), "already exists") { |
| 418 | + glog.Errorf("error querying api for clusterAutoscaler object: %v, retrying...", err) |
| 419 | + return false, nil |
| 420 | + } |
| 421 | + } |
| 422 | + if err := tc.client.Create(context.TODO(), &machineAutoscaler); err != nil { |
| 423 | + if !strings.Contains(err.Error(), "already exists") { |
| 424 | + glog.Errorf("error querying api for machineAutoscaler object: %v, retrying...", err) |
| 425 | + return false, nil |
| 426 | + } |
| 427 | + } |
| 428 | + return true, nil |
| 429 | + }) |
| 430 | + if err != nil { |
| 431 | + return err |
| 432 | + } |
| 433 | + |
| 434 | + glog.Info("Get nodeList") |
| 435 | + nodeList := corev1.NodeList{} |
| 436 | + err = wait.PollImmediate(1*time.Second, waitMedium, func() (bool, error) { |
| 437 | + if err := tc.client.List(context.TODO(), &listOptions, &nodeList); err != nil { |
| 438 | + glog.Errorf("error querying api for nodeList object: %v, retrying...", err) |
| 439 | + return false, nil |
| 440 | + } |
| 441 | + return true, nil |
| 442 | + }) |
| 443 | + if err != nil { |
| 444 | + return err |
| 445 | + } |
| 446 | + clusterInitialTotalNodes := len(nodeList.Items) |
| 447 | + glog.Infof("Cluster initial number of nodes is %d", clusterInitialTotalNodes) |
| 448 | + |
| 449 | + glog.Info("Create workload") |
| 450 | + mem, err := resource.ParseQuantity("500Mi") |
| 451 | + if err != nil { |
| 452 | + glog.Fatal("failed to ParseQuantity %v", err) |
| 453 | + } |
| 454 | + cpu, err := resource.ParseQuantity("500m") |
| 455 | + if err != nil { |
| 456 | + glog.Fatal("failed to ParseQuantity %v", err) |
| 457 | + } |
| 458 | + backoffLimit := int32(4) |
| 459 | + completions := int32(50) |
| 460 | + parallelism := int32(50) |
| 461 | + activeDeadlineSeconds := int64(100) |
| 462 | + workLoad := batchv1.Job{ |
| 463 | + ObjectMeta: metav1.ObjectMeta{ |
| 464 | + Name: "workload", |
| 465 | + Namespace: namespace, |
| 466 | + }, |
| 467 | + TypeMeta: metav1.TypeMeta{ |
| 468 | + Kind: "Job", |
| 469 | + APIVersion: "batch/v1", |
| 470 | + }, |
| 471 | + Spec: batchv1.JobSpec{ |
| 472 | + Template: corev1.PodTemplateSpec{ |
| 473 | + Spec: corev1.PodSpec{ |
| 474 | + Containers: []corev1.Container{ |
| 475 | + { |
| 476 | + Name: "workload", |
| 477 | + Image: "busybox", |
| 478 | + Command: []string{ |
| 479 | + "sleep", |
| 480 | + "300", |
| 481 | + }, |
| 482 | + Resources: corev1.ResourceRequirements{ |
| 483 | + Requests: corev1.ResourceList{ |
| 484 | + "memory": mem, |
| 485 | + "cpu": cpu, |
| 486 | + }, |
| 487 | + }, |
| 488 | + }, |
| 489 | + }, |
| 490 | + RestartPolicy: corev1.RestartPolicy("Never"), |
| 491 | + }, |
| 492 | + }, |
| 493 | + ActiveDeadlineSeconds: &activeDeadlineSeconds, |
| 494 | + BackoffLimit: &backoffLimit, |
| 495 | + Completions: &completions, |
| 496 | + Parallelism: ¶llelism, |
| 497 | + }, |
| 498 | + } |
| 499 | + err = wait.PollImmediate(1*time.Second, waitMedium, func() (bool, error) { |
| 500 | + if err := tc.client.Create(context.TODO(), &workLoad); err != nil { |
| 501 | + glog.Errorf("error querying api for workLoad object: %v, retrying...", err) |
| 502 | + return false, nil |
| 503 | + } |
| 504 | + return true, nil |
| 505 | + }) |
| 506 | + if err != nil { |
| 507 | + return err |
| 508 | + } |
| 509 | + |
| 510 | + glog.Info("Wait for cluster to scale out number of replicas") |
| 511 | + err = wait.PollImmediate(1*time.Second, waitLong, func() (bool, error) { |
| 512 | + msKey := types.NamespacedName{ |
| 513 | + Namespace: namespace, |
| 514 | + Name: targetMachineSet.Name, |
| 515 | + } |
| 516 | + ms := &capiv1alpha1.MachineSet{} |
| 517 | + if err := tc.client.Get(context.TODO(), msKey, ms); err != nil { |
| 518 | + glog.Errorf("error querying api for clusterAutoscaler object: %v, retrying...", err) |
| 519 | + return false, nil |
| 520 | + } |
| 521 | + glog.Infof("MachineSet %s. Initial number of replicas: %d. New number of replicas: %d", targetMachineSet.Name, *initialNumberOfReplicas, *ms.Spec.Replicas) |
| 522 | + return *ms.Spec.Replicas > *initialNumberOfReplicas, nil |
| 523 | + }) |
| 524 | + if err != nil { |
| 525 | + return err |
| 526 | + } |
| 527 | + |
| 528 | + glog.Info("Wait for cluster to scale out nodes") |
| 529 | + err = wait.PollImmediate(1*time.Second, waitLong, func() (bool, error) { |
| 530 | + nodeList := corev1.NodeList{} |
| 531 | + if err := tc.client.List(context.TODO(), &listOptions, &nodeList); err != nil { |
| 532 | + glog.Errorf("error querying api for nodeList object: %v, retrying...", err) |
| 533 | + return false, nil |
| 534 | + } |
| 535 | + glog.Info("Expect at least a new node to come up") |
| 536 | + glog.Infof("Initial number of nodes: %d. New number of nodes: %d", clusterInitialTotalNodes, len(nodeList.Items)) |
| 537 | + return len(nodeList.Items) > clusterInitialTotalNodes, nil |
| 538 | + }) |
| 539 | + |
| 540 | + glog.Info("Delete workload") |
| 541 | + err = wait.PollImmediate(1*time.Second, waitMedium, func() (bool, error) { |
| 542 | + if err := tc.client.Delete(context.TODO(), &workLoad); err != nil { |
| 543 | + glog.Errorf("error querying api for workLoad object: %v, retrying...", err) |
| 544 | + return false, nil |
| 545 | + } |
| 546 | + return true, nil |
| 547 | + }) |
| 548 | + if err != nil { |
| 549 | + return err |
| 550 | + } |
| 551 | + |
| 552 | + // We delete the clusterAutoscaler and ensure the initial number of replicas to get the cluster to the initial number of nodes |
| 553 | + // TODO: validate the autoscaler to scale down |
| 554 | + glog.Info("Delete clusterAutoscaler object") |
| 555 | + err = wait.PollImmediate(1*time.Second, waitShort, func() (bool, error) { |
| 556 | + if err := tc.client.Delete(context.TODO(), &clusterAutoscaler); err != nil { |
| 557 | + glog.Errorf("error querying api for clusterAutoscaler object: %v, retrying...", err) |
| 558 | + return false, nil |
| 559 | + } |
| 560 | + return true, nil |
| 561 | + }) |
| 562 | + if err != nil { |
| 563 | + return err |
| 564 | + } |
| 565 | + |
| 566 | + glog.Info("Delete machineAutoscaler object") |
| 567 | + err = wait.PollImmediate(1*time.Second, waitShort, func() (bool, error) { |
| 568 | + if err := tc.client.Delete(context.TODO(), &machineAutoscaler); err != nil { |
| 569 | + glog.Errorf("error querying api for machineAutoscaler object: %v, retrying...", err) |
| 570 | + return false, nil |
| 571 | + } |
| 572 | + return true, nil |
| 573 | + }) |
| 574 | + if err != nil { |
| 575 | + return err |
| 576 | + } |
| 577 | + |
| 578 | + glog.Infof("Ensure initial number of replicas: %d", initialNumberOfReplicas) |
| 579 | + err = wait.PollImmediate(1*time.Second, waitShort, func() (bool, error) { |
| 580 | + msKey := types.NamespacedName{ |
| 581 | + Namespace: namespace, |
| 582 | + Name: targetMachineSet.Name, |
| 583 | + } |
| 584 | + ms := &capiv1alpha1.MachineSet{} |
| 585 | + if err := tc.client.Get(context.TODO(), msKey, ms); err != nil { |
| 586 | + glog.Errorf("error querying api for machineSet object: %v, retrying...", err) |
| 587 | + return false, nil |
| 588 | + } |
| 589 | + ms.Spec.Replicas = initialNumberOfReplicas |
| 590 | + if err := tc.client.Update(context.TODO(), ms); err != nil { |
| 591 | + glog.Errorf("error querying api for machineSet object: %v, retrying...", err) |
| 592 | + return false, nil |
| 593 | + } |
| 594 | + return true, nil |
| 595 | + }) |
| 596 | + if err != nil { |
| 597 | + return err |
| 598 | + } |
| 599 | + |
| 600 | + glog.Info("Wait for cluster to match initial number of nodes") |
| 601 | + return wait.PollImmediate(1*time.Second, waitLong, func() (bool, error) { |
| 602 | + nodeList := corev1.NodeList{} |
| 603 | + if err := tc.client.List(context.TODO(), &listOptions, &nodeList); err != nil { |
| 604 | + glog.Errorf("error querying api for nodeList object: %v, retrying...", err) |
| 605 | + return false, nil |
| 606 | + } |
| 607 | + glog.Infof("Initial number of nodes: %d. Current number of nodes: %d", clusterInitialTotalNodes, len(nodeList.Items)) |
| 608 | + return len(nodeList.Items) == clusterInitialTotalNodes, nil |
| 609 | + }) |
| 610 | +} |
0 commit comments