Skip to content

Commit f0eda25

Browse files
pracuccigotjosh
andauthored
Fix -ruler.alertmanager-url flag (#2989) (#2997)
* fix: `-ruler.alertmanager-url` should be a string This was never space separated, you needed to register the flag multiple times for it to append to the list of strings. On top of that, the previous change implied that when using a config file you would need to provide a a list instead of a string thus breaking exisiting configuration. Signed-off-by: gotjosh <[email protected]> * Unify the use of StringSlice flags We had two very similar flag extensions: `Strings` and `StringSlice`. With this, we unify its use by removing `Strings` in favour of `StringSlice` but keeping the string representation favoured in `Strings`. The difference here is that by using `Sprintf` we would get the brackets representation as part of the string. Signed-off-by: gotjosh <[email protected]> * Rename `api_ruler_test` to `ruler_test` This is no longer only about the API. Signed-off-by: gotjosh <[email protected]> * Create an integration test for Alertmanager discovery Signed-off-by: gotjosh <[email protected]> * Fix existing test Signed-off-by: gotjosh <[email protected]> Co-authored-by: gotjosh <[email protected]>
1 parent b399437 commit f0eda25

File tree

14 files changed

+102
-71
lines changed

14 files changed

+102
-71
lines changed

docs/configuration/config-file-reference.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,12 +1011,12 @@ storage:
10111011
# CLI flag: -ruler.rule-path
10121012
[rule_path: <string> | default = "/rules"]
10131013

1014-
# Space-separated list of URL(s) of the Alertmanager(s) to send notifications
1014+
# Comma-separated list of URL(s) of the Alertmanager(s) to send notifications
10151015
# to. Each Alertmanager URL is treated as a separate group in the configuration.
10161016
# Multiple Alertmanagers in HA per group can be supported by using DNS
10171017
# resolution via -ruler.alertmanager-discovery.
10181018
# CLI flag: -ruler.alertmanager-url
1019-
[alertmanager_url: <list of string> | default = ]
1019+
[alertmanager_url: <string> | default = ""]
10201020

10211021
# Use DNS SRV records to discover Alertmanager hosts.
10221022
# CLI flag: -ruler.alertmanager-discovery
@@ -1152,7 +1152,7 @@ The `alertmanager_config` configures the Cortex alertmanager.
11521152
11531153
# Initial peers (may be repeated).
11541154
# CLI flag: -cluster.peer
1155-
[peers: <list of string> | default = ]
1155+
[peers: <list of string> | default = []]
11561156
11571157
# Time to wait between peers to send notifications.
11581158
# CLI flag: -cluster.peer-timeout
@@ -1888,7 +1888,7 @@ cassandra:
18881888
# If set, when authenticating with cassandra a custom authenticator will be
18891889
# expected during the handshake. This flag can be set multiple times.
18901890
# CLI flag: -cassandra.custom-authenticator
1891-
[custom_authenticators: <list of string> | default = ]
1891+
[custom_authenticators: <list of string> | default = []]
18921892

18931893
# Timeout when connecting to cassandra.
18941894
# CLI flag: -cassandra.timeout
@@ -2514,7 +2514,7 @@ The `memberlist_config` configures the Gossip memberlist.
25142514
# https://cortexmetrics.io/docs/configuration/arguments/#dns-service-discovery
25152515
# for more details).
25162516
# CLI flag: -memberlist.join
2517-
[join_members: <list of string> | default = ]
2517+
[join_members: <list of string> | default = []]
25182518
25192519
# Min backoff duration to join other cluster members.
25202520
# CLI flag: -memberlist.min-join-backoff
@@ -2552,7 +2552,7 @@ The `memberlist_config` configures the Gossip memberlist.
25522552
# IP address to listen on for gossip messages. Multiple addresses may be
25532553
# specified. Defaults to 0.0.0.0
25542554
# CLI flag: -memberlist.bind-addr
2555-
[bind_addr: <list of string> | default = ]
2555+
[bind_addr: <list of string> | default = []]
25562556
25572557
# Port to listen on for gossip messages.
25582558
# CLI flag: -memberlist.bind-port
@@ -2602,7 +2602,7 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s
26022602
# ingestion within the distributor and can be repeated in order to drop multiple
26032603
# labels.
26042604
# CLI flag: -distributor.drop-label
2605-
[drop_labels: <list of string> | default = ]
2605+
[drop_labels: <list of string> | default = []]
26062606
26072607
# Maximum length accepted for label names
26082608
# CLI flag: -validation.max-length-label-name

integration/api_ruler_test.go renamed to integration/ruler_test.go

Lines changed: 74 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package main
44

55
import (
66
"path/filepath"
7+
"strings"
78
"testing"
89

910
"github.com/prometheus/prometheus/pkg/labels"
@@ -20,22 +21,9 @@ func TestRulerAPI(t *testing.T) {
2021
var (
2122
namespaceOne = "test_/encoded_+namespace/?"
2223
namespaceTwo = "test_/encoded_+namespace/?/two"
23-
24-
recordNode = yaml.Node{}
25-
exprNode = yaml.Node{}
2624
)
27-
recordNode.SetString("test_rule")
28-
exprNode.SetString("up")
29-
ruleGroup := rulefmt.RuleGroup{
30-
Name: "test_encoded_+\"+group_name/?",
31-
Interval: 100,
32-
Rules: []rulefmt.RuleNode{
33-
{
34-
Record: recordNode,
35-
Expr: exprNode,
36-
},
37-
},
38-
}
25+
ruleGroup := createTestRuleGroup(t)
26+
3927
s, err := e2e.NewScenario(networkName)
4028
require.NoError(t, err)
4129
defer s.Close()
@@ -144,3 +132,74 @@ func TestRulerAPISingleBinary(t *testing.T) {
144132
require.NoError(t, cortex.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"prometheus_engine_queries"}, e2e.WithLabelMatchers(
145133
labels.MustNewMatcher(labels.MatchEqual, "engine", "ruler"))))
146134
}
135+
136+
func TestRulerAlertmanager(t *testing.T) {
137+
var namespaceOne = "test_/encoded_+namespace/?"
138+
ruleGroup := createTestRuleGroup(t)
139+
140+
s, err := e2e.NewScenario(networkName)
141+
require.NoError(t, err)
142+
defer s.Close()
143+
144+
// Start dependencies.
145+
dynamo := e2edb.NewDynamoDB()
146+
minio := e2edb.NewMinio(9000, RulerConfigs["-ruler.storage.s3.buckets"])
147+
require.NoError(t, s.StartAndWaitReady(minio, dynamo))
148+
149+
// Have at least one alertmanager configuration.
150+
require.NoError(t, writeFileToSharedDir(s, "alertmanager_configs/user-1.yaml", []byte(cortexAlertmanagerUserConfigYaml)))
151+
152+
// Start Alertmanagers.
153+
am1 := e2ecortex.NewAlertmanager("alertmanager1", mergeFlags(AlertmanagerFlags, AlertmanagerLocalFlags), "")
154+
require.NoError(t, s.StartAndWaitReady(am1))
155+
am2 := e2ecortex.NewAlertmanager("alertmanager2", mergeFlags(AlertmanagerFlags, AlertmanagerLocalFlags), "")
156+
require.NoError(t, s.StartAndWaitReady(am2))
157+
158+
am1URL := "http://" + am1.HTTPEndpoint()
159+
am2URL := "http://" + am2.HTTPEndpoint()
160+
161+
// Connect the ruler to Alertmanagers
162+
configOverrides := map[string]string{
163+
"-ruler.alertmanager-url": strings.Join([]string{am1URL, am2URL}, ","),
164+
}
165+
166+
// Start Ruler.
167+
require.NoError(t, writeFileToSharedDir(s, cortexSchemaConfigFile, []byte(cortexSchemaConfigYaml)))
168+
ruler := e2ecortex.NewRuler("ruler", mergeFlags(ChunksStorageFlags, RulerConfigs, configOverrides), "")
169+
require.NoError(t, s.StartAndWaitReady(ruler))
170+
171+
// Create a client with the ruler address configured
172+
c, err := e2ecortex.NewClient("", "", "", ruler.HTTPEndpoint(), "user-1")
173+
require.NoError(t, err)
174+
175+
// Set the rule group into the ruler
176+
require.NoError(t, c.SetRuleGroup(ruleGroup, namespaceOne))
177+
178+
// Wait until the user manager is created
179+
require.NoError(t, ruler.WaitSumMetrics(e2e.Equals(1), "cortex_ruler_managers_total"))
180+
181+
// Wait until we've discovered the alertmanagers.
182+
require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.Equals(2), []string{"cortex_prometheus_notifications_alertmanagers_discovered"}, e2e.WaitMissingMetrics))
183+
}
184+
185+
func createTestRuleGroup(t *testing.T) rulefmt.RuleGroup {
186+
t.Helper()
187+
188+
var (
189+
recordNode = yaml.Node{}
190+
exprNode = yaml.Node{}
191+
)
192+
193+
recordNode.SetString("test_rule")
194+
exprNode.SetString("up")
195+
return rulefmt.RuleGroup{
196+
Name: "test_encoded_+\"+group_name/?",
197+
Interval: 100,
198+
Rules: []rulefmt.RuleNode{
199+
{
200+
Record: recordNode,
201+
Expr: exprNode,
202+
},
203+
},
204+
}
205+
}

pkg/compactor/compactor_ring.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
4747

4848
// Instance flags
4949
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
50-
f.Var((*flagext.Strings)(&cfg.InstanceInterfaceNames), "compactor.ring.instance-interface", "Name of network interface to read address from.")
50+
f.Var((*flagext.StringSlice)(&cfg.InstanceInterfaceNames), "compactor.ring.instance-interface", "Name of network interface to read address from.")
5151
f.StringVar(&cfg.InstanceAddr, "compactor.ring.instance-addr", "", "IP address to advertise in the ring.")
5252
f.IntVar(&cfg.InstancePort, "compactor.ring.instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).")
5353
f.StringVar(&cfg.InstanceID, "compactor.ring.instance-id", hostname, "Instance ID to register in the ring.")

pkg/distributor/distributor_ring.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
4747

4848
// Instance flags
4949
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
50-
f.Var((*flagext.Strings)(&cfg.InstanceInterfaceNames), "distributor.ring.instance-interface", "Name of network interface to read address from.")
50+
f.Var((*flagext.StringSlice)(&cfg.InstanceInterfaceNames), "distributor.ring.instance-interface", "Name of network interface to read address from.")
5151
f.StringVar(&cfg.InstanceAddr, "distributor.ring.instance-addr", "", "IP address to advertise in the ring.")
5252
f.IntVar(&cfg.InstancePort, "distributor.ring.instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).")
5353
f.StringVar(&cfg.InstanceID, "distributor.ring.instance-id", hostname, "Instance ID to register in the ring.")

pkg/ring/kv/etcd/etcd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ type Client struct {
3131
// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet.
3232
func (cfg *Config) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) {
3333
cfg.Endpoints = []string{}
34-
f.Var((*flagext.Strings)(&cfg.Endpoints), prefix+"etcd.endpoints", "The etcd endpoints to connect to.")
34+
f.Var((*flagext.StringSlice)(&cfg.Endpoints), prefix+"etcd.endpoints", "The etcd endpoints to connect to.")
3535
f.DurationVar(&cfg.DialTimeout, prefix+"etcd.dial-timeout", 10*time.Second, "The dial timeout for the etcd connection.")
3636
f.IntVar(&cfg.MaxRetries, prefix+"etcd.max-retries", 10, "The maximum number of retries to do for failed ops.")
3737
}

pkg/ring/lifecycler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ func (cfg *LifecyclerConfig) RegisterFlagsWithPrefix(prefix string, f *flag.Flag
9797
}
9898

9999
cfg.InfNames = []string{"eth0", "en0"}
100-
f.Var((*flagext.Strings)(&cfg.InfNames), prefix+"lifecycler.interface", "Name of network interface to read address from.")
100+
f.Var((*flagext.StringSlice)(&cfg.InfNames), prefix+"lifecycler.interface", "Name of network interface to read address from.")
101101
f.StringVar(&cfg.Addr, prefix+"lifecycler.addr", "", "IP address to advertise in consul.")
102102
f.IntVar(&cfg.Port, prefix+"lifecycler.port", 0, "port to advertise in consul (defaults to server.grpc-listen-port).")
103103
f.StringVar(&cfg.ID, prefix+"lifecycler.ID", hostname, "ID to register into consul.")

pkg/ruler/notifier.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"net/url"
77
"regexp"
8+
"strings"
89
"sync"
910

1011
gklog "github.com/go-kit/kit/log"
@@ -75,10 +76,11 @@ func (rn *rulerNotifier) stop() {
7576
// Builds a Prometheus config.Config from a ruler.Config with just the required
7677
// options to configure notifications to Alertmanager.
7778
func buildNotifierConfig(rulerConfig *Config) (*config.Config, error) {
78-
validURLs := make([]*url.URL, 0, len(rulerConfig.AlertmanagerURL))
79+
amURLs := strings.Split(rulerConfig.AlertmanagerURL, ",")
80+
validURLs := make([]*url.URL, 0, len(amURLs))
7981

8082
srvDNSregexp := regexp.MustCompile(`^_.+._.+`)
81-
for _, h := range rulerConfig.AlertmanagerURL {
83+
for _, h := range amURLs {
8284
url, err := url.Parse(h)
8385
if err != nil {
8486
return nil, err

pkg/ruler/notifier_test.go

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func TestBuildNotifierConfig(t *testing.T) {
2929
{
3030
name: "with a single URL and no service discovery",
3131
cfg: &Config{
32-
AlertmanagerURL: []string{"http://alertmanager.default.svc.cluster.local/alertmanager"},
32+
AlertmanagerURL: "http://alertmanager.default.svc.cluster.local/alertmanager",
3333
},
3434
ncfg: &config.Config{
3535
AlertingConfig: config.AlertingConfig{
@@ -49,7 +49,7 @@ func TestBuildNotifierConfig(t *testing.T) {
4949
{
5050
name: "with a single URL and service discovery",
5151
cfg: &Config{
52-
AlertmanagerURL: []string{"http://_http._tcp.alertmanager.default.svc.cluster.local/alertmanager"},
52+
AlertmanagerURL: "http://_http._tcp.alertmanager.default.svc.cluster.local/alertmanager",
5353
AlertmanagerDiscovery: true,
5454
AlertmanagerRefreshInterval: time.Duration(60),
5555
},
@@ -74,18 +74,15 @@ func TestBuildNotifierConfig(t *testing.T) {
7474
{
7575
name: "with service discovery and an invalid URL",
7676
cfg: &Config{
77-
AlertmanagerURL: []string{"http://_http.default.svc.cluster.local/alertmanager"},
77+
AlertmanagerURL: "http://_http.default.svc.cluster.local/alertmanager",
7878
AlertmanagerDiscovery: true,
7979
},
8080
err: fmt.Errorf("when alertmanager-discovery is on, host name must be of the form _portname._tcp.service.fqdn (is \"alertmanager.default.svc.cluster.local\")"),
8181
},
8282
{
8383
name: "with multiple URLs and no service discovery",
8484
cfg: &Config{
85-
AlertmanagerURL: []string{
86-
"http://alertmanager-0.default.svc.cluster.local/alertmanager",
87-
"http://alertmanager-1.default.svc.cluster.local/alertmanager",
88-
},
85+
AlertmanagerURL: "http://alertmanager-0.default.svc.cluster.local/alertmanager,http://alertmanager-1.default.svc.cluster.local/alertmanager",
8986
},
9087
ncfg: &config.Config{
9188
AlertingConfig: config.AlertingConfig{
@@ -113,10 +110,7 @@ func TestBuildNotifierConfig(t *testing.T) {
113110
{
114111
name: "with multiple URLs and service discovery",
115112
cfg: &Config{
116-
AlertmanagerURL: []string{
117-
"http://_http._tcp.alertmanager-0.default.svc.cluster.local/alertmanager",
118-
"http://_http._tcp.alertmanager-1.default.svc.cluster.local/alertmanager",
119-
},
113+
AlertmanagerURL: "http://_http._tcp.alertmanager-0.default.svc.cluster.local/alertmanager,http://_http._tcp.alertmanager-1.default.svc.cluster.local/alertmanager",
120114
AlertmanagerDiscovery: true,
121115
AlertmanagerRefreshInterval: time.Duration(60),
122116
},
@@ -152,9 +146,7 @@ func TestBuildNotifierConfig(t *testing.T) {
152146
{
153147
name: "with Basic Authentication",
154148
cfg: &Config{
155-
AlertmanagerURL: []string{
156-
"http://marco:[email protected]/alertmanager",
157-
},
149+
AlertmanagerURL: "http://marco:[email protected]/alertmanager",
158150
},
159151
ncfg: &config.Config{
160152
AlertingConfig: config.AlertingConfig{

pkg/ruler/ruler.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ type Config struct {
7979
RulePath string `yaml:"rule_path"`
8080

8181
// URL of the Alertmanager to send notifications to.
82-
AlertmanagerURL flagext.StringSlice `yaml:"alertmanager_url"`
82+
AlertmanagerURL string `yaml:"alertmanager_url"`
8383
// Whether to use DNS SRV records to discover Alertmanager.
8484
AlertmanagerDiscovery bool `yaml:"enable_alertmanager_discovery"`
8585
// How long to wait between refreshing the list of Alertmanager based on DNS service discovery.
@@ -132,7 +132,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
132132
f.DurationVar(&cfg.EvaluationDelay, "ruler.evaluation-delay-duration", 0, "Duration to delay the evaluation of rules to ensure they underlying metrics have been pushed to cortex.")
133133
f.DurationVar(&cfg.PollInterval, "ruler.poll-interval", 1*time.Minute, "How frequently to poll for rule changes")
134134

135-
f.Var(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "Space-separated list of URL(s) of the Alertmanager(s) to send notifications to. Each Alertmanager URL is treated as a separate group in the configuration. Multiple Alertmanagers in HA per group can be supported by using DNS resolution via -ruler.alertmanager-discovery.")
135+
f.StringVar(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "", "Comma-separated list of URL(s) of the Alertmanager(s) to send notifications to. Each Alertmanager URL is treated as a separate group in the configuration. Multiple Alertmanagers in HA per group can be supported by using DNS resolution via -ruler.alertmanager-discovery.")
136136
f.BoolVar(&cfg.AlertmanagerDiscovery, "ruler.alertmanager-discovery", false, "Use DNS SRV records to discover Alertmanager hosts.")
137137
f.DurationVar(&cfg.AlertmanagerRefreshInterval, "ruler.alertmanager-refresh-interval", 1*time.Minute, "How long to wait between refreshing DNS resolutions of Alertmanager hosts.")
138138
f.BoolVar(&cfg.AlertmanangerEnableV2API, "ruler.alertmanager-use-v2", false, "If enabled requests to Alertmanager will utilize the V2 API.")

pkg/ruler/ruler_ring.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
5959

6060
// Instance flags
6161
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
62-
f.Var((*flagext.Strings)(&cfg.InstanceInterfaceNames), "ruler.ring.instance-interface", "Name of network interface to read address from.")
62+
f.Var((*flagext.StringSlice)(&cfg.InstanceInterfaceNames), "ruler.ring.instance-interface", "Name of network interface to read address from.")
6363
f.StringVar(&cfg.InstanceAddr, "ruler.ring.instance-addr", "", "IP address to advertise in the ring.")
6464
f.IntVar(&cfg.InstancePort, "ruler.ring.instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).")
6565
f.StringVar(&cfg.InstanceID, "ruler.ring.instance-id", hostname, "Instance ID to register in the ring.")

0 commit comments

Comments
 (0)