Skip to content

Commit 69329e3

Browse files
Merge pull request #30014 from hongkailiu/servicemonitor
NO-JIRA: Ensure Platform Prometheus targets are protected
2 parents 1718cf2 + 90a26ea commit 69329e3

File tree

3 files changed

+114
-0
lines changed

3 files changed

+114
-0
lines changed

test/extended/prometheus/prometheus.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"context"
66
"encoding/json"
77
"fmt"
8+
"net/http"
9+
"os"
810
"regexp"
911
"strings"
1012
"time"
@@ -49,6 +51,101 @@ type TelemeterClientConfig struct {
4951
Enabled *bool `json:"enabled"`
5052
}
5153

54+
// Set $MONITORING_AUTH_TEST_NAMESPACE to focus on the targets from a single namespace
55+
var monitoringAuthTestNamespace = os.Getenv("MONITORING_AUTH_TEST_NAMESPACE")
56+
57+
var _ = g.Describe("[sig-instrumentation][Late] Platform Prometheus targets", func() {
58+
defer g.GinkgoRecover()
59+
var (
60+
oc = exutil.NewCLIWithPodSecurityLevel("prometheus", admissionapi.LevelBaseline)
61+
prometheusURL, bearerToken string
62+
63+
// TODO: remove the namespace when the bug is fixed.
64+
namespacesToSkip = sets.New[string]("openshift-marketplace", // https://issues.redhat.com/browse/OCPBUGS-59763
65+
"openshift-image-registry", // https://issues.redhat.com/browse/OCPBUGS-59767
66+
"openshift-operator-lifecycle-manager", // https://issues.redhat.com/browse/OCPBUGS-59768
67+
"openshift-cluster-samples-operator", // https://issues.redhat.com/browse/OCPBUGS-59769
68+
"openshift-cluster-version", // https://issues.redhat.com/browse/OCPBUGS-57585
69+
"openshift-cluster-csi-drivers", // https://issues.redhat.com/browse/OCPBUGS-60159
70+
"openshift-cluster-node-tuning-operator", // https://issues.redhat.com/browse/OCPBUGS-60258
71+
"openshift-etcd", // https://issues.redhat.com/browse/OCPBUGS-60263
72+
)
73+
)
74+
75+
g.BeforeEach(func(ctx g.SpecContext) {
76+
var err error
77+
78+
kubeClient, err := kubernetes.NewForConfig(oc.AdminConfig())
79+
o.Expect(err).NotTo(o.HaveOccurred())
80+
nsExist, err := exutil.IsNamespaceExist(kubeClient, "openshift-monitoring")
81+
o.Expect(err).NotTo(o.HaveOccurred())
82+
if !nsExist {
83+
g.Skip("openshift-monitoring namespace does not exist, skipping")
84+
}
85+
86+
prometheusURL, err = helper.PrometheusRouteURL(ctx, oc)
87+
o.Expect(err).NotTo(o.HaveOccurred(), "Get public url of prometheus")
88+
bearerToken, err = helper.RequestPrometheusServiceAccountAPIToken(ctx, oc)
89+
o.Expect(err).NotTo(o.HaveOccurred(), "Request prometheus service account API token")
90+
91+
if namespacesToSkip.Has(monitoringAuthTestNamespace) {
92+
e2e.Logf("The namespace %s is not skipped because $MONITORING_AUTH_TEST_NAMESPACE is set to it", monitoringAuthTestNamespace)
93+
namespacesToSkip.Delete(monitoringAuthTestNamespace)
94+
}
95+
})
96+
97+
g.It("should not be accessible without auth [Serial]", func() {
98+
var errs []error
99+
100+
g.By("checking that targets reject the requests with 401 or 403")
101+
execPod := exutil.CreateExecPodOrFail(oc.AdminKubeClient(), oc.Namespace(), "execpod-targets-authorization")
102+
defer func() {
103+
err := oc.AdminKubeClient().CoreV1().Pods(execPod.Namespace).Delete(context.Background(), execPod.Name, *metav1.NewDeleteOptions(1))
104+
o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Delete pod %s/%s", execPod.Namespace, execPod.Name))
105+
}()
106+
107+
contents, err := helper.GetURLWithToken(helper.MustJoinUrlPath(prometheusURL, "api/v1/targets"), bearerToken)
108+
o.Expect(err).NotTo(o.HaveOccurred())
109+
110+
targets := &prometheusTargets{}
111+
err = json.Unmarshal([]byte(contents), targets)
112+
o.Expect(err).NotTo(o.HaveOccurred())
113+
o.Expect(len(targets.Data.ActiveTargets)).Should(o.BeNumerically(">=", 5))
114+
115+
expected := sets.New[int](http.StatusUnauthorized, http.StatusForbidden)
116+
for _, target := range targets.Data.ActiveTargets {
117+
ns := target.Labels["namespace"]
118+
o.Expect(ns).NotTo(o.BeEmpty())
119+
if monitoringAuthTestNamespace != "" && ns != monitoringAuthTestNamespace {
120+
continue
121+
}
122+
pod := target.Labels["pod"]
123+
e2e.Logf("Checking via pod exec status code from the scrape url %s for pod %s/%s without authorization (skip=%t)", target.ScrapeUrl, ns, pod, namespacesToSkip.Has(ns))
124+
err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, time.Minute, true, func(context.Context) (bool, error) {
125+
statusCode, execError := helper.URLStatusCodeExecViaPod(execPod.Namespace, execPod.Name, target.ScrapeUrl)
126+
e2e.Logf("The scrape url %s for pod %s/%s without authorization returned %d, %v (skip=%t)", target.ScrapeUrl, ns, pod, statusCode, execError, namespacesToSkip.Has(ns))
127+
if expected.Has(statusCode) {
128+
return true, nil
129+
}
130+
// retry on those cases
131+
if execError != nil ||
132+
statusCode/100 == 5 ||
133+
statusCode == http.StatusRequestTimeout ||
134+
statusCode == http.StatusTooManyRequests {
135+
return false, nil
136+
}
137+
return false, fmt.Errorf("expecting status code %v but returned %d", expected.UnsortedList(), statusCode)
138+
})
139+
if err != nil && !namespacesToSkip.Has(ns) {
140+
errs = append(errs, fmt.Errorf("the scrape url %s for pod %s/%s is accessible without authorization: %w", target.ScrapeUrl, ns, pod, err))
141+
}
142+
}
143+
144+
o.Expect(errs).To(o.BeEmpty())
145+
})
146+
147+
})
148+
52149
var _ = g.Describe("[sig-instrumentation][Late] OpenShift alerting rules [apigroup:image.openshift.io]", func() {
53150
defer g.GinkgoRecover()
54151

test/extended/util/annotate/generated/zz_generated.annotations.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/extended/util/prometheus/helpers.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,21 @@ func ExpectURLStatusCodeExecViaPod(ns, execPodName, url string, statusCodes ...i
299299
return fmt.Errorf("last response from server was not in %v: %s", statusCodes, output)
300300
}
301301

302+
// URLStatusCodeExecViaPod attempts connection to url via exec pod and returns the status code
303+
// or an error if any errors happens during the process.
304+
func URLStatusCodeExecViaPod(ns, name, url string) (int, error) {
305+
cmd := fmt.Sprintf("curl -k -s -o /dev/null -w '%%{http_code}' %q", url)
306+
output, err := e2eoutput.RunHostCmd(ns, name, cmd)
307+
if err != nil {
308+
return 0, fmt.Errorf("host command failed: %v\n%s", err, output)
309+
}
310+
ret, err := strconv.Atoi(output)
311+
if err != nil {
312+
return 0, fmt.Errorf("unable to parse status code out of the command's ouput: %v\n%s", err, output)
313+
}
314+
return ret, nil
315+
}
316+
302317
// ExpectPrometheusEndpoint attempts to connect to the metrics endpoint with
303318
// delayed retries upon failure.
304319
func ExpectPrometheusEndpoint(url string) {

0 commit comments

Comments
 (0)