diff --git a/pkg/defaultmonitortests/types.go b/pkg/defaultmonitortests/types.go index 822bc08fbbb3..37baa2742a7f 100644 --- a/pkg/defaultmonitortests/types.go +++ b/pkg/defaultmonitortests/types.go @@ -22,6 +22,7 @@ import ( "github.com/openshift/origin/pkg/monitortests/kubeapiserver/disruptionnewapiserver" "github.com/openshift/origin/pkg/monitortests/kubeapiserver/faultyloadbalancer" "github.com/openshift/origin/pkg/monitortests/kubeapiserver/generationanalyzer" + "github.com/openshift/origin/pkg/monitortests/kubeapiserver/kasloganalyzer" "github.com/openshift/origin/pkg/monitortests/kubeapiserver/legacykubeapiservermonitortests" "github.com/openshift/origin/pkg/monitortests/kubeapiserver/staticpodinstall" "github.com/openshift/origin/pkg/monitortests/kubelet/containerfailures" @@ -179,6 +180,7 @@ func newUniversalMonitorTests(info monitortestframework.MonitorTestInitializatio monitorTestRegistry.AddMonitorTestOrDie("audit-log-analyzer", "kube-apiserver", auditloganalyzer.NewAuditLogAnalyzer(info)) monitorTestRegistry.AddMonitorTestOrDie("legacy-kube-apiserver-invariants", "kube-apiserver", legacykubeapiservermonitortests.NewLegacyTests()) monitorTestRegistry.AddMonitorTestOrDie("graceful-shutdown-analyzer", "kube-apiserver", apiservergracefulrestart.NewGracefulShutdownAnalyzer()) + monitorTestRegistry.AddMonitorTestOrDie("kas-log-analyzer", "kube-apiserver", kasloganalyzer.NewKASLogAnalyzer()) monitorTestRegistry.AddMonitorTestOrDie("legacy-networking-invariants", "Networking / cluster-network-operator", legacynetworkmonitortests.NewLegacyTests()) diff --git a/pkg/monitor/monitorapi/types.go b/pkg/monitor/monitorapi/types.go index bf68bcd45a5a..be753c0686fd 100644 --- a/pkg/monitor/monitorapi/types.go +++ b/pkg/monitor/monitorapi/types.go @@ -82,7 +82,6 @@ func ConditionLevelFromString(s string) (IntervalLevel, error) { default: return Error, fmt.Errorf("did not define event level string for %q", s) } - } type Condition struct { @@ -368,6 +367,7 @@ const ( APIServerGracefulShutdown IntervalSource = "APIServerGracefulShutdown" APIServerClusterOperatorWatcher IntervalSource = "APIServerClusterOperatorWatcher" SourceAuditLog IntervalSource = "AuditLog" + SourceKubeAPIServerLog IntervalSource = "KubeAPIServerLog" SourceTestData IntervalSource = "TestData" // some tests have no real source to assign SourceOVSVswitchdLog IntervalSource = "OVSVswitchdLog" @@ -488,7 +488,6 @@ func (i Locator) HasKey(k LocatorKey) bool { // This will hopefully eventually go away but for now we need it. // Courtesy of ChatGPT but unit tested. func sortKeys(keys []string) []string { - // Ensure these keys appear in this order. Other keys can be mixed in and will appear at the end in alphabetical // order. orderedKeys := []string{"namespace", "node", "pod", "uid", "server", "container", "shutdown", "row"} @@ -875,5 +874,7 @@ type InstanceKey struct { UID string } -type InstanceMap map[InstanceKey]runtime.Object -type ResourcesMap map[string]InstanceMap +type ( + InstanceMap map[InstanceKey]runtime.Object + ResourcesMap map[string]InstanceMap +) diff --git a/pkg/monitortests/kubeapiserver/kasloganalyzer/monitortest.go b/pkg/monitortests/kubeapiserver/kasloganalyzer/monitortest.go new file mode 100644 index 000000000000..528e593bce72 --- /dev/null +++ b/pkg/monitortests/kubeapiserver/kasloganalyzer/monitortest.go @@ -0,0 +1,154 @@ +package kasloganalyzer + +import ( + "context" + "fmt" + "regexp" + "time" + + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" + "k8s.io/client-go/informers" + coreinformers "k8s.io/client-go/informers/core/v1" + "k8s.io/client-go/kubernetes" + + "github.com/openshift/origin/pkg/monitor/monitorapi" + "github.com/openshift/origin/pkg/monitortestframework" + "github.com/openshift/origin/pkg/monitortestlibrary/podaccess" + "github.com/openshift/origin/pkg/test/ginkgo/junitapi" + "k8s.io/client-go/rest" +) + +type kasLogAnalyzer struct { + stopCollection context.CancelFunc + finishedCollecting chan struct{} + + evaluator evaluator +} + +func NewKASLogAnalyzer() monitortestframework.MonitorTest { + return &kasLogAnalyzer{ + finishedCollecting: make(chan struct{}), + evaluator: newEvaluator(), + } +} + +func (w *kasLogAnalyzer) PrepareCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error { + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) + if err != nil { + return err + } + kubeInformers := informers.NewSharedInformerFactory(kubeClient, 0) + namespaceScopedCoreInformers := coreinformers.New(kubeInformers, "openshift-kube-apiserver", nil) + + okasAppLabel, err := labels.NewRequirement("app", selection.Equals, []string{"openshift-kube-apiserver"}) + if err != nil { + return err + } + + okasApiServerLabel, err := labels.NewRequirement("apiserver", selection.Equals, []string{"true"}) + if err != nil { + return err + } + + ctx, w.stopCollection = context.WithCancel(ctx) + podStreamer := podaccess.NewPodsStreamer( + kubeClient, + labels.NewSelector().Add(*okasAppLabel, *okasApiServerLabel), + "openshift-kube-apiserver", + "kube-apiserver", + w.evaluator, + namespaceScopedCoreInformers.Pods(), + ) + + go kubeInformers.Start(ctx.Done()) + go podStreamer.Run(ctx, w.finishedCollecting) + + return nil +} + +func (w *kasLogAnalyzer) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error { + return nil +} + +func (w *kasLogAnalyzer) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) { + w.stopCollection() + + // wait until we're drained + <-w.finishedCollecting + + return nil, w.evaluator.Reports(), nil +} + +func (w *kasLogAnalyzer) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) { + return nil, nil +} + +func (w *kasLogAnalyzer) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) { + return nil, nil +} + +func (w *kasLogAnalyzer) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error { + return nil +} + +func (*kasLogAnalyzer) Cleanup(ctx context.Context) error { + return nil +} + +type evaluator struct { + evaluations []evaluation +} + +func (e evaluator) Reports() []*junitapi.JUnitTestCase { + out := []*junitapi.JUnitTestCase{} + + for _, eval := range e.evaluations { + out = append(out, eval.Report()) + } + + return out +} + +func newEvaluator() evaluator { + return evaluator{ + evaluations: defaultEvaluations, + } +} + +func (e evaluator) HandleLogLine(logLine podaccess.LogLineContent) { + for _, evaluation := range e.evaluations { + if evaluation.regex.MatchString(logLine.Line) { + evaluation.count++ + } + } +} + +type evaluation struct { + name string + threshold int + count int + regex *regexp.Regexp +} + +var defaultEvaluations = []evaluation{ + { + name: "[Jira:\"kube-apiserver\"] should not excessively log informer reflector unhandled errors", + regex: regexp.MustCompile(`reflector\.go.+\"Failed to watch\".+err=\"failed to list.+\".+logger=\"UnhandledError\"`), + threshold: 0, + }, +} + +func (e evaluation) Report() *junitapi.JUnitTestCase { + out := &junitapi.JUnitTestCase{ + Name: e.name, + } + + if e.count > e.threshold { + out.FailureOutput = &junitapi.FailureOutput{ + Message: fmt.Sprintf("kube-apiserver logged %d informer-related unhandled errors. Should not log more than %d", e.count, e.threshold), + } + } + + return out +}