Skip to content

Commit a5f5156

Browse files
authored
Merge branch 'main' into configure-head-chunks-write-queue-size
2 parents f0ff5c2 + 5038e0c commit a5f5156

24 files changed

+193
-697
lines changed

.github/workflows/build-image.yaml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
name: Build Image
2+
3+
on:
4+
push:
5+
branches: [ main ]
6+
paths:
7+
- 'build-image/Dockerfile'
8+
- '.github/workflows/build-image.yaml'
9+
pull_request:
10+
branches: [ main ]
11+
paths:
12+
- 'build-image/Dockerfile'
13+
- '.github/workflows/build-image.yaml'
14+
15+
jobs:
16+
build:
17+
runs-on: ubuntu-latest
18+
steps:
19+
- uses: actions/checkout@v4
20+
name: Checkout
21+
22+
- name: Build & save image
23+
run: make build-image save-build-image
24+
25+
- name: Upload Docker Images Artifact
26+
uses: actions/upload-artifact@v4
27+
with:
28+
name: build-image
29+
path: ./build-image.tar
30+
if-no-files-found: error
31+
32+
push:
33+
if: github.ref == 'refs/heads/main' && github.repository == 'cortexproject/cortex-jsonnet'
34+
needs: build
35+
runs-on: ubuntu-latest
36+
steps:
37+
- uses: actions/checkout@v4
38+
name: Checkout
39+
40+
- name: Download Docker Images Artifacts
41+
uses: actions/download-artifact@v4
42+
with:
43+
name: build-image
44+
45+
- name: Load image
46+
run: make load-build-image
47+
48+
- name: Login to Quay.io
49+
uses: docker/login-action@v3
50+
with:
51+
registry: quay.io
52+
username: ${{secrets.QUAY_REGISTRY_USER}}
53+
password: ${{secrets.QUAY_REGISTRY_PASSWORD}}
54+
55+
- name: Push image
56+
run: make publish-build-image

.github/workflows/ci.yaml

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,57 @@ name: CI
33
on:
44
push:
55
branches: [ main ]
6+
paths-ignore:
7+
- 'build-image/Dockerfile'
8+
- '.github/workflows/build-image.yaml'
69
pull_request:
710
branches: [ main ]
11+
paths-ignore:
12+
- 'build-image/Dockerfile'
13+
- '.github/workflows/build-image.yaml'
814

915
jobs:
1016
lint:
1117
runs-on: ubuntu-latest
12-
container: quay.io/cortexproject/cortex-jsonnet-build-image:e158eda
18+
container: quay.io/cortexproject/cortex-jsonnet-build-image:fbe4726
1319
steps:
14-
- uses: actions/checkout@v2
20+
- uses: actions/checkout@v4
1521
name: Checkout
1622
with:
1723
fetch-depth: 0
18-
24+
1925
- name: "Lint mixin"
2026
run: make lint-mixin
21-
27+
2228
- name: "Lint playbooks"
2329
run: make lint-playbooks
30+
2431
build:
2532
runs-on: ubuntu-latest
26-
container: quay.io/cortexproject/cortex-jsonnet-build-image:e158eda
33+
container: quay.io/cortexproject/cortex-jsonnet-build-image:fbe4726
2734
steps:
28-
- uses: actions/checkout@v2
35+
- uses: actions/checkout@v4
2936
name: Checkout
3037
with:
3138
fetch-depth: 0
32-
39+
3340
- name: "Build mixin"
3441
run: make build-mixin
42+
3543
readme:
3644
runs-on: ubuntu-latest
37-
container: quay.io/cortexproject/cortex-jsonnet-build-image:e158eda
45+
container: quay.io/cortexproject/cortex-jsonnet-build-image:fbe4726
3846
steps:
39-
- uses: actions/checkout@v2
47+
- uses: actions/checkout@v4
4048
name: Checkout
4149
with:
4250
fetch-depth: 0
43-
44-
- name: "Test readme"
45-
run: make test-readme
51+
52+
- name: "Test readme s3"
53+
run: make test-readme/s3
54+
55+
- name: "Test readme azure"
56+
run: make test-readme/azure
57+
58+
- name: "Test readme gcs"
59+
run: make test-readme/gcs

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@ cortex-mixin.zip
22
cortex-mixin/out
33
cortex-mixin/vendor
44
/test-readme/
5+
.vscode
6+
build-image.tar

CHANGELOG.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
11
# Changelog
22

33
## master / unreleased
4+
* [CHANGE] Use cortex v1.17.1
5+
* [CHANGE] Enable shuffle sharding in compactors
6+
* [CHANGE] Remove chunks support for dashboards
7+
* [ENHANCEMENT] Configure `-ingester.client.grpc-compression` to be `snappy-block`
8+
* [ENHANCEMENT] Support Grafana 11 in Cortex Service Scaling Dashboard
9+
10+
## 1.16.1
411
* [CHANGE] Upgrade memcached to 1.6.23-alpine and memcached-exporter to v0.14.2
5-
* [CHANGE] Use cortex v1.16.0
12+
* [CHANGE] Use cortex v1.16.1
613
* [ENHANCEMENT] Enable frontend query stats by default
714
* [ENHANCEMENT] Enable ruler query stats by default
15+
* [ENHANCEMENT] Configure `-blocks-storage.bucket-store.ignore-blocks-within` in queriers, rulers and store-gateways
816

917
## 1.15.3 / 2023-11-24
1018
* [CHANGE] Add default instance max series for ingesters

Makefile

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: lint build-image publish-build-image test-readme
1+
.PHONY: lint build-image publish-build-image test-readme clean
22

33
JSONNET_FMT := jsonnetfmt
44

@@ -34,6 +34,12 @@ fmt:
3434
build-image:
3535
docker build -t quay.io/cortexproject/cortex-jsonnet-build-image:$(shell git rev-parse --short HEAD) build-image
3636

37+
save-build-image:
38+
docker save quay.io/cortexproject/cortex-jsonnet-build-image:$(shell git rev-parse --short HEAD) > build-image.tar
39+
40+
load-build-image:
41+
docker load < build-image.tar
42+
3743
publish-build-image:
3844
docker push quay.io/cortexproject/cortex-jsonnet-build-image:$(shell git rev-parse --short HEAD)
3945

@@ -47,18 +53,17 @@ build-mixin:
4753
test-readme: test-readme/azure test-readme/gcs test-readme/s3
4854

4955
test-readme/%:
50-
rm -rf $@ && \
51-
mkdir -p $@ && cd $@ && \
52-
tk init --k8s=1.24 && \
53-
jb install github.com/cortexproject/cortex-jsonnet/cortex@main && \
54-
rm -fr ./vendor/cortex && \
55-
cp -r ../../cortex ./vendor/ && \
56-
cp vendor/cortex/$(notdir $@)/main.jsonnet.example environments/default/main.jsonnet && \
57-
PAGER=cat tk show environments/default
56+
@./scripts/test-readme.sh $@
5857

5958
clean-white-noise:
6059
@$(FIND) . -type f -regextype posix-extended -regex '.*(md|libsonnet)' -print | \
6160
SED_BIN="$(SED)" xargs ./scripts/cleanup-white-noise.sh
6261

6362
check-white-noise: clean-white-noise
6463
@git diff --exit-code --quiet || (echo "Please remove trailing whitespaces running 'make clean-white-noise'" && false)
64+
65+
clean:
66+
rm -rf cortex-mixin/out
67+
rm -rf cortex-mixin/vendor
68+
rm -f cortex-mixin/cortex-mixin.zip
69+
rm -rf test-readme

cortex-mixin/alerts/alerts.libsonnet

Lines changed: 0 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -71,27 +71,6 @@
7171
|||,
7272
},
7373
},
74-
{
75-
// We're syncing every 10mins, and this means with a 5min rate, we will have a NaN when syncs fail
76-
// and we will never trigger the alert.
77-
// We also have a 3h grace-period for creation of tables which means the we can fail for 3h before it's an outage.
78-
alert: 'CortexTableSyncFailure',
79-
expr: |||
80-
100 * rate(cortex_table_manager_sync_duration_seconds_count{status_code!~"2.."}[15m])
81-
/
82-
rate(cortex_table_manager_sync_duration_seconds_count[15m])
83-
> 10
84-
|||,
85-
'for': '30m',
86-
labels: {
87-
severity: 'critical',
88-
},
89-
annotations: {
90-
message: |||
91-
{{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% errors syncing tables.
92-
|||,
93-
},
94-
},
9574
{
9675
alert: 'CortexQueriesIncorrect',
9776
expr: |||
@@ -206,41 +185,6 @@
206185
|||,
207186
},
208187
},
209-
{
210-
alert: 'CortexTransferFailed',
211-
expr: |||
212-
max_over_time(cortex_shutdown_duration_seconds_count{op="transfer",status!="success"}[15m])
213-
|||,
214-
'for': '5m',
215-
labels: {
216-
severity: 'critical',
217-
},
218-
annotations: {
219-
message: |||
220-
{{ $labels.job }}/{{ $labels.instance }} transfer failed.
221-
|||,
222-
},
223-
},
224-
{
225-
alert: 'CortexOldChunkInMemory',
226-
// Even though we should flush chunks after 6h, we see that 99p of age of flushed chunks is closer
227-
// to 10 hours.
228-
// Ignore cortex_oldest_unflushed_chunk_timestamp_seconds that are zero (eg. distributors).
229-
expr: |||
230-
(time() - cortex_oldest_unflushed_chunk_timestamp_seconds > 36000)
231-
and
232-
(cortex_oldest_unflushed_chunk_timestamp_seconds > 0)
233-
|||,
234-
'for': '5m',
235-
labels: {
236-
severity: 'warning',
237-
},
238-
annotations: {
239-
message: |||
240-
{{ $labels.job }}/{{ $labels.instance }} has very old unflushed chunk in memory.
241-
|||,
242-
},
243-
},
244188
{
245189
alert: 'CortexKVStoreFailure',
246190
expr: |||
@@ -379,87 +323,6 @@
379323
},
380324
],
381325
},
382-
{
383-
name: 'cortex_wal_alerts',
384-
rules: [
385-
{
386-
// Alert immediately if WAL is corrupt.
387-
alert: 'CortexWALCorruption',
388-
expr: |||
389-
increase(cortex_ingester_wal_corruptions_total[5m]) > 0
390-
|||,
391-
labels: {
392-
severity: 'critical',
393-
},
394-
annotations: {
395-
message: |||
396-
{{ $labels.job }}/{{ $labels.instance }} has a corrupted WAL or checkpoint.
397-
|||,
398-
},
399-
},
400-
{
401-
// One or more failed checkpoint creation is a warning.
402-
alert: 'CortexCheckpointCreationFailed',
403-
expr: |||
404-
increase(cortex_ingester_checkpoint_creations_failed_total[10m]) > 0
405-
|||,
406-
labels: {
407-
severity: 'warning',
408-
},
409-
annotations: {
410-
message: |||
411-
{{ $labels.job }}/{{ $labels.instance }} failed to create checkpoint.
412-
|||,
413-
},
414-
},
415-
{
416-
// Two or more failed checkpoint creation in 1h means something is wrong.
417-
alert: 'CortexCheckpointCreationFailed',
418-
expr: |||
419-
increase(cortex_ingester_checkpoint_creations_failed_total[1h]) > 1
420-
|||,
421-
labels: {
422-
severity: 'critical',
423-
},
424-
annotations: {
425-
message: |||
426-
{{ $labels.job }}/{{ $labels.instance }} is failing to create checkpoint.
427-
|||,
428-
},
429-
},
430-
{
431-
// One or more failed checkpoint deletion is a warning.
432-
alert: 'CortexCheckpointDeletionFailed',
433-
expr: |||
434-
increase(cortex_ingester_checkpoint_deletions_failed_total[10m]) > 0
435-
|||,
436-
labels: {
437-
severity: 'warning',
438-
},
439-
annotations: {
440-
message: |||
441-
{{ $labels.job }}/{{ $labels.instance }} failed to delete checkpoint.
442-
|||,
443-
},
444-
},
445-
{
446-
// Two or more failed checkpoint deletion in 2h means something is wrong.
447-
// We give this more buffer than creation as this is a less critical operation.
448-
alert: 'CortexCheckpointDeletionFailed',
449-
expr: |||
450-
increase(cortex_ingester_checkpoint_deletions_failed_total[2h]) > 1
451-
|||,
452-
labels: {
453-
severity: 'critical',
454-
},
455-
annotations: {
456-
message: |||
457-
{{ $labels.instance }} is failing to delete checkpoint.
458-
|||,
459-
},
460-
},
461-
],
462-
},
463326
{
464327
name: 'cortex-rollout-alerts',
465328
rules: [
@@ -524,30 +387,6 @@
524387
{
525388
name: 'cortex-provisioning',
526389
rules: [
527-
{
528-
alert: 'CortexProvisioningMemcachedTooSmall',
529-
// 4 x in-memory series size = 24hrs of data.
530-
expr: |||
531-
(
532-
4 *
533-
sum by (%s) (cortex_ingester_memory_series * cortex_ingester_chunk_size_bytes_sum / cortex_ingester_chunk_size_bytes_count)
534-
/ 1e9
535-
)
536-
>
537-
(
538-
sum by (%s) (memcached_limit_bytes{job=~".+/memcached"}) / 1e9
539-
)
540-
||| % [$._config.alert_aggregation_labels, $._config.alert_aggregation_labels],
541-
'for': '15m',
542-
labels: {
543-
severity: 'warning',
544-
},
545-
annotations: {
546-
message: |||
547-
Chunk memcached cluster in %(alert_aggregation_variables)s is too small, should be at least {{ printf "%%.2f" $value }}GB.
548-
||| % $._config,
549-
},
550-
},
551390
{
552391
alert: 'CortexProvisioningTooManyActiveSeries',
553392
// We target each ingester to 1.5M in-memory series. This alert fires if the average

0 commit comments

Comments
 (0)