Skip to content

Commit f7dcba4

Browse files
committed
change to check config load validation only for some targets
Signed-off-by: SungJin1212 <[email protected]>
1 parent 960b372 commit f7dcba4

File tree

5 files changed

+213
-5
lines changed

5 files changed

+213
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
* [BUGFIX] Ingester: Allow shipper to skip corrupted blocks. #6786
7575
* [BUGFIX] Compactor: Delete the prefix `blocks_meta` from the metadata fetcher metrics. #6832
7676
* [BUGFIX] Store Gateway: Avoid race condition by deduplicating entries in bucket stores user scan. #6863
77+
* [BUGFIX] Runtime-config: Change to check tenant limit validation when loading runtime config only for `all`, `distributor`, `querier`, and `ruler` targets. #6880
7778

7879
## 1.19.0 2025-02-27
7980

docs/configuration/arguments.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ The next three options only apply when the querier is used together with the Que
9797

9898
Set this flag to `true` for the new behaviour.
9999

100-
Important to note is that when setting this flag to `true`, it has to be set on both the distributor and the querier (called `-distributor.shard-by-all-labels` on Querier as well). If the flag is only set on the distributor and not on the querier, you will get incomplete query results because not all ingesters are queried.
100+
Important to note is that when setting this flag to `true`, it has to be set on the distributor, the querier, and the ruler (called `-distributor.shard-by-all-labels` on Querier as well). If the flag is only set on the distributor and not on the querier, you will get incomplete query results because not all ingesters are queried.
101101

102102
**Upgrade notes**: As this flag also makes all queries always read from all ingesters, the upgrade path is pretty trivial; just enable the flag. When you do enable it, you'll see a spike in the number of active series as the writes are "reshuffled" amongst the ingesters, but over the next stale period all the old series will be flushed, and you should end up with much better load balancing. With this flag enabled in the queriers, reads will always catch all the data from all ingesters.
103103

integration/runtime_config_test.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"io"
1111
"os"
1212
"path/filepath"
13+
"strings"
1314
"testing"
1415
"time"
1516

@@ -164,6 +165,99 @@ func TestLoadRuntimeConfigFromCloudStorage(t *testing.T) {
164165
require.NoError(t, s.Stop(cortexSvc))
165166
}
166167

168+
// Verify components are successfully started with `-distributor.shard-by-all-labels=false`
169+
// except for "distributor", "querier", and "ruler"
170+
// refer to https://github.com/cortexproject/cortex/issues/6741#issuecomment-3067244929
171+
func Test_VerifyComponentsAreSuccessfullyStarted_WithRuntimeConfigLoad(t *testing.T) {
172+
s, err := e2e.NewScenario(networkName)
173+
require.NoError(t, err)
174+
defer s.Close()
175+
176+
runtimeConfigYamlFile := `
177+
overrides:
178+
'user-1':
179+
max_global_series_per_user: 15000
180+
`
181+
182+
require.NoError(t, writeFileToSharedDir(s, runtimeConfigFile, []byte(runtimeConfigYamlFile)))
183+
filePath := filepath.Join(e2e.ContainerSharedDir, runtimeConfigFile)
184+
185+
flags := mergeFlags(BlocksStorageFlags(), map[string]string{
186+
"-runtime-config.file": filePath,
187+
"-runtime-config.backend": "filesystem",
188+
189+
// alert manager
190+
"-alertmanager.web.external-url": "http://localhost/alertmanager",
191+
"-alertmanager-storage.backend": "local",
192+
"-alertmanager-storage.local.path": filepath.Join(e2e.ContainerSharedDir, "alertmanager_configs"),
193+
194+
// store-gateway
195+
"-querier.store-gateway-addresses": "localhost:12345",
196+
197+
// distributor.shard-by-all-labels is false
198+
"-distributor.shard-by-all-labels": "false",
199+
})
200+
201+
// Start dependencies.
202+
consul := e2edb.NewConsul()
203+
minio := e2edb.NewMinio(9000, flags["-blocks-storage.s3.bucket-name"])
204+
require.NoError(t, s.StartAndWaitReady(consul, minio))
205+
206+
// make alert manager config dir
207+
require.NoError(t, writeFileToSharedDir(s, "alertmanager_configs", []byte{}))
208+
209+
// Ingester and Store gateway start
210+
ingester := e2ecortex.NewIngester("ingester", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
211+
storeGateway := e2ecortex.NewStoreGateway("store-gateway", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
212+
require.NoError(t, s.StartAndWaitReady(ingester, storeGateway))
213+
214+
// Querier start, but fail with "-distributor.shard-by-all-labels": "false"
215+
querier := e2ecortex.NewQuerier("querier", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), mergeFlags(flags, map[string]string{
216+
"-querier.store-gateway-addresses": strings.Join([]string{storeGateway.NetworkGRPCEndpoint()}, ","),
217+
}), "")
218+
require.Error(t, s.StartAndWaitReady(querier))
219+
220+
// Start Query frontend
221+
queryFrontend := e2ecortex.NewQueryFrontendWithConfigFile("query-frontend", "", flags, "")
222+
require.NoError(t, s.Start(queryFrontend))
223+
224+
// Querier start, should success with "-distributor.shard-by-all-labels": "true"
225+
querier = e2ecortex.NewQuerier("querier", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), mergeFlags(flags, map[string]string{
226+
"-querier.store-gateway-addresses": strings.Join([]string{storeGateway.NetworkGRPCEndpoint()}, ","),
227+
"-distributor.shard-by-all-labels": "true",
228+
"-querier.frontend-address": queryFrontend.NetworkGRPCEndpoint(),
229+
}), "")
230+
require.NoError(t, s.StartAndWaitReady(querier))
231+
232+
// Ruler start, but fail with "-distributor.shard-by-all-labels": "false"
233+
ruler := e2ecortex.NewRuler("ruler", consul.NetworkHTTPEndpoint(), mergeFlags(flags, RulerFlags()), "")
234+
require.Error(t, s.StartAndWaitReady(ruler))
235+
236+
// Ruler start, should success with "-distributor.shard-by-all-labels": "true"
237+
ruler = e2ecortex.NewRuler("ruler", consul.NetworkHTTPEndpoint(), mergeFlags(flags, RulerFlags(), map[string]string{
238+
"-distributor.shard-by-all-labels": "true",
239+
}), "")
240+
require.NoError(t, s.StartAndWaitReady(ruler))
241+
242+
// Start the query-scheduler
243+
queryScheduler := e2ecortex.NewQueryScheduler("query-scheduler", flags, "")
244+
require.NoError(t, s.StartAndWaitReady(queryScheduler))
245+
246+
// Start Alertmanager
247+
alertmanager := e2ecortex.NewAlertmanager("alertmanager", mergeFlags(flags, AlertmanagerFlags()), "")
248+
require.NoError(t, s.StartAndWaitReady(alertmanager))
249+
250+
// Distributor start, but fail with "-distributor.shard-by-all-labels": "false"
251+
distributor := e2ecortex.NewQuerier("distributor", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), mergeFlags(flags, map[string]string{}), "")
252+
require.Error(t, s.StartAndWaitReady(distributor))
253+
254+
// Distributor start, should success with "-distributor.shard-by-all-labels": "true"
255+
distributor = e2ecortex.NewQuerier("distributor", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), mergeFlags(flags, map[string]string{
256+
"-distributor.shard-by-all-labels": "true",
257+
}), "")
258+
require.NoError(t, s.StartAndWaitReady(distributor))
259+
}
260+
167261
func assertRuntimeConfigLoadedCorrectly(t *testing.T, cortexSvc *e2ecortex.CortexService) {
168262
runtimeConfig := getRuntimeConfig(t, cortexSvc)
169263

pkg/cortex/runtime_config.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"errors"
55
"io"
66
"net/http"
7+
"strings"
78

89
"gopkg.in/yaml.v2"
910

@@ -15,7 +16,8 @@ import (
1516
)
1617

1718
var (
18-
errMultipleDocuments = errors.New("the provided runtime configuration contains multiple documents")
19+
errMultipleDocuments = errors.New("the provided runtime configuration contains multiple documents")
20+
tenantLimitCheckTargets = []string{All, Distributor, Querier, Ruler}
1921
)
2022

2123
// RuntimeConfigValues are values that can be reloaded from configuration file while Cortex is running.
@@ -78,9 +80,16 @@ func (l runtimeConfigLoader) load(r io.Reader) (interface{}, error) {
7880
return nil, errMultipleDocuments
7981
}
8082

81-
for _, ul := range overrides.TenantLimits {
82-
if err := ul.Validate(l.cfg.Distributor.ShardByAllLabels, l.cfg.Ingester.ActiveSeriesMetricsEnabled); err != nil {
83-
return nil, err
83+
targetStr := l.cfg.Target.String()
84+
for _, target := range tenantLimitCheckTargets {
85+
if strings.Contains(targetStr, target) {
86+
// only check if target is `all`, `distributor`, "querier", and "ruler"
87+
// refer to https://github.com/cortexproject/cortex/issues/6741#issuecomment-3067244929
88+
for _, ul := range overrides.TenantLimits {
89+
if err := ul.Validate(l.cfg.Distributor.ShardByAllLabels, l.cfg.Ingester.ActiveSeriesMetricsEnabled); err != nil {
90+
return nil, err
91+
}
92+
}
8493
}
8594
}
8695

pkg/cortex/runtime_config_test.go

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/stretchr/testify/require"
99

1010
"github.com/cortexproject/cortex/pkg/distributor"
11+
"github.com/cortexproject/cortex/pkg/ingester"
1112
"github.com/cortexproject/cortex/pkg/util/validation"
1213
)
1314

@@ -109,3 +110,106 @@ overrides:
109110
assert.Nil(t, actual)
110111
}
111112
}
113+
114+
func TestLoad_ShouldNotErrorWithCertainTarget(t *testing.T) {
115+
116+
tests := []struct {
117+
desc string
118+
target []string
119+
shardByAllLabels bool
120+
isErr bool
121+
}{
122+
{
123+
desc: "all",
124+
target: []string{All},
125+
shardByAllLabels: true,
126+
},
127+
{
128+
desc: "all, shardByAllLabels:false",
129+
target: []string{All},
130+
shardByAllLabels: false,
131+
isErr: true,
132+
},
133+
{
134+
desc: "distributor",
135+
target: []string{Distributor},
136+
shardByAllLabels: true,
137+
},
138+
{
139+
desc: "distributor, shardByAllLabels:false",
140+
target: []string{Distributor},
141+
shardByAllLabels: false,
142+
isErr: true,
143+
},
144+
{
145+
desc: "querier",
146+
target: []string{Querier},
147+
shardByAllLabels: true,
148+
},
149+
{
150+
desc: "querier, shardByAllLabels:false",
151+
target: []string{Querier},
152+
shardByAllLabels: false,
153+
isErr: true,
154+
},
155+
{
156+
desc: "ruler",
157+
target: []string{Ruler},
158+
shardByAllLabels: true,
159+
},
160+
{
161+
desc: "ruler, shardByAllLabels:false",
162+
target: []string{Ruler},
163+
shardByAllLabels: false,
164+
isErr: true,
165+
},
166+
{
167+
desc: "ingester",
168+
target: []string{Ingester},
169+
},
170+
{
171+
desc: "query frontend",
172+
target: []string{QueryFrontend},
173+
},
174+
{
175+
desc: "alertmanager",
176+
target: []string{AlertManager},
177+
},
178+
{
179+
desc: "store gateway",
180+
target: []string{StoreGateway},
181+
},
182+
{
183+
desc: "compactor",
184+
target: []string{Compactor},
185+
},
186+
{
187+
desc: "overrides exporter",
188+
target: []string{OverridesExporter},
189+
},
190+
}
191+
192+
for _, tc := range tests {
193+
t.Run(tc.desc, func(t *testing.T) {
194+
yamlFile := strings.NewReader(`
195+
overrides:
196+
'user-1':
197+
max_global_series_per_user: 15000
198+
`)
199+
200+
loader := runtimeConfigLoader{}
201+
loader.cfg = Config{
202+
Target: tc.target,
203+
Distributor: distributor.Config{ShardByAllLabels: tc.shardByAllLabels},
204+
Ingester: ingester.Config{ActiveSeriesMetricsEnabled: true},
205+
}
206+
207+
_, err := loader.load(yamlFile)
208+
if tc.isErr {
209+
require.Error(t, err)
210+
} else {
211+
require.NoError(t, err)
212+
}
213+
})
214+
}
215+
}

0 commit comments

Comments
 (0)