diff --git a/CHANGELOG.md b/CHANGELOG.md
index 678b8fe1133..77e9869a0d4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@
* [FEATURE] Distributor/Ingester: Implemented experimental feature to use gRPC stream connection for push requests. This can be enabled by setting `-distributor.use-stream-push=true`. #6580
* [FEATURE] Compactor: Add support for percentage based sharding for compactors. #6738
* [FEATURE] Querier: Allow choosing PromQL engine via header. #6777
+* [FEATURE] Querier: Support for configuring query optimizers and enabling XFunctions in the Thanos engine. #6873
* [ENHANCEMENT] Tenant Federation: Add a # of query result limit logic when the `-tenant-federation.regex-matcher-enabled` is enabled. #6845
* [ENHANCEMENT] Query Frontend: Add a `cortex_slow_queries_total` metric to track # of slow queries per user. #6859
* [ENHANCEMENT] Query Frontend: Change to return 400 when the tenant resolving fail. #6715
@@ -57,6 +58,7 @@
* [ENHANCEMENT] Distributor: Add native histograms max sample size bytes limit validation. #6834
* [ENHANCEMENT] Querier: Support caching parquet labels file in parquet queryable. #6835
* [ENHANCEMENT] Querier: Support query limits in parquet queryable. #6870
+* [ENHANCEMENT] Ring: Add zone label to ring_members metric. #6900
* [ENHANCEMENT] Ingester: Add new metric `cortex_ingester_push_errors_total` to track reasons for ingester request failures. #6901
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517
* [BUGFIX] Ingester: Fix labelset data race condition. #6573
diff --git a/README.md b/README.md
index 515b199a295..470ffe3ed50 100644
--- a/README.md
+++ b/README.md
@@ -11,14 +11,14 @@
# Cortex
-Cortex is a horizontally scalable, highly available, multi-tenant, long term storage solution for [Prometheus](https://prometheus.io) and [OpenTelemetry Metrics](https://opentelemetry.io/docs/specs/otel/metrics/)
+Cortex is a horizontally scalable, highly available, multi-tenant, long-term storage solution for [Prometheus](https://prometheus.io) and [OpenTelemetry Metrics](https://opentelemetry.io/docs/specs/otel/metrics/).
## Features
- **Horizontally scalable:** Cortex can run across multiple machines in a cluster, exceeding the throughput and storage of a single machine.
- **Highly available:** When run in a cluster, Cortex can replicate data between machines.
- **Multi-tenant:** Cortex can isolate data and queries from multiple different independent Prometheus sources in a single cluster.
-- **Long term storage:** Cortex supports S3, GCS, Swift and Microsoft Azure for long term storage of metric data.
+- **Long-term storage:** Cortex supports S3, GCS, Swift and Microsoft Azure for long-term storage of metric data.
## Documentation
@@ -76,13 +76,13 @@ Join us in shaping the future of Cortex, and let's build something amazing toget
- Sep 2020 KubeCon talk "Scaling Prometheus: How We Got Some Thanos Into Cortex" ([video](https://www.youtube.com/watch?v=Z5OJzRogAS4), [slides](https://static.sched.com/hosted_files/kccnceu20/ec/2020-08%20-%20KubeCon%20EU%20-%20Cortex%20blocks%20storage.pdf))
- Jul 2020 PromCon talk "Sharing is Caring: Leveraging Open Source to Improve Cortex & Thanos" ([video](https://www.youtube.com/watch?v=2oTLouUvsac), [slides](https://docs.google.com/presentation/d/1OuKYD7-k9Grb7unppYycdmVGWN0Bo0UwdJRySOoPdpg/edit))
- Nov 2019 KubeCon talks "[Cortex 101: Horizontally Scalable Long Term Storage for Prometheus][kubecon-cortex-101]" ([video][kubecon-cortex-101-video], [slides][kubecon-cortex-101-slides]), "[Configuring Cortex for Max
- Performance][kubecon-cortex-201]" ([video][kubecon-cortex-201-video], [slides][kubecon-cortex-201-slides], [write up][kubecon-cortex-201-writeup]) and "[Blazin’ Fast PromQL][kubecon-blazin]" ([slides][kubecon-blazin-slides], [video][kubecon-blazin-video], [write up][kubecon-blazin-writeup])
+ Performance][kubecon-cortex-201]" ([video][kubecon-cortex-201-video], [slides][kubecon-cortex-201-slides], [write up][kubecon-cortex-201-writeup]) and "[Blazin' Fast PromQL][kubecon-blazin]" ([slides][kubecon-blazin-slides], [video][kubecon-blazin-video], [write up][kubecon-blazin-writeup])
- Nov 2019 PromCon talk "[Two Households, Both Alike in Dignity: Cortex and Thanos][promcon-two-households]" ([video][promcon-two-households-video], [slides][promcon-two-households-slides], [write up][promcon-two-households-writeup])
- May 2019 KubeCon talks; "[Cortex: Intro][kubecon-cortex-intro]" ([video][kubecon-cortex-intro-video], [slides][kubecon-cortex-intro-slides], [blog post][kubecon-cortex-intro-blog]) and "[Cortex: Deep Dive][kubecon-cortex-deepdive]" ([video][kubecon-cortex-deepdive-video], [slides][kubecon-cortex-deepdive-slides])
- Nov 2018 CloudNative London meetup talk; "Cortex: Horizontally Scalable, Highly Available Prometheus" ([slides][cloudnative-london-2018-slides])
- Aug 2018 PromCon panel; "[Prometheus Long-Term Storage Approaches][promcon-2018-panel]" ([video][promcon-2018-video])
- Dec 2018 KubeCon talk; "[Cortex: Infinitely Scalable Prometheus][kubecon-2018-talk]" ([video][kubecon-2018-video], [slides][kubecon-2018-slides])
-- Aug 2017 PromCon talk; "[Cortex: Prometheus as a Service, One Year On][promcon-2017-talk]" ([videos][promcon-2017-video], [slides][promcon-2017-slides], write up [part 1][promcon-2017-writeup-1], [part 2][promcon-2017-writeup-2], [part 3][promcon-2017-writeup-3])
+- Aug 2017 PromCon talk; "[Cortex: Prometheus as a Service, One Year On][promcon-2017-talk]" ([video][promcon-2017-video], [slides][promcon-2017-slides], write up [part 1][promcon-2017-writeup-1], [part 2][promcon-2017-writeup-2], [part 3][promcon-2017-writeup-3])
- Jun 2017 Prometheus London meetup talk; "Cortex: open-source, horizontally-scalable, distributed Prometheus" ([video][prometheus-london-2017-video])
- Dec 2016 KubeCon talk; "Weave Cortex: Multi-tenant, horizontally scalable Prometheus as a Service" ([video][kubecon-2016-video], [slides][kubecon-2016-slides])
- Aug 2016 PromCon talk; "Project Frankenstein: Multitenant, Scale-Out Prometheus": ([video][promcon-2016-video], [slides][promcon-2016-slides])
@@ -90,10 +90,10 @@ Join us in shaping the future of Cortex, and let's build something amazing toget
### Blog Posts
- Dec 2020 blog post "[How AWS and Grafana Labs are scaling Cortex for the cloud](https://aws.amazon.com/blogs/opensource/how-aws-and-grafana-labs-are-scaling-cortex-for-the-cloud/)"
-- Oct 2020 blog post "[How to switch Cortex from chunks to blocks storage (and why you won’t look back)](https://grafana.com/blog/2020/10/19/how-to-switch-cortex-from-chunks-to-blocks-storage-and-why-you-wont-look-back/)"
+- Oct 2020 blog post "[How to switch Cortex from chunks to blocks storage (and why you won't look back)](https://grafana.com/blog/2020/10/19/how-to-switch-cortex-from-chunks-to-blocks-storage-and-why-you-wont-look-back/)"
- Oct 2020 blog post "[Now GA: Cortex blocks storage for running Prometheus at scale with reduced operational complexity](https://grafana.com/blog/2020/10/06/now-ga-cortex-blocks-storage-for-running-prometheus-at-scale-with-reduced-operational-complexity/)"
- Sep 2020 blog post "[A Tale of Tail Latencies](https://www.weave.works/blog/a-tale-of-tail-latencies)"
-- Aug 2020 blog post "[Scaling Prometheus: How we’re pushing Cortex blocks storage to its limit and beyond](https://grafana.com/blog/2020/08/12/scaling-prometheus-how-were-pushing-cortex-blocks-storage-to-its-limit-and-beyond/)"
+- Aug 2020 blog post "[Scaling Prometheus: How we're pushing Cortex blocks storage to its limit and beyond](https://grafana.com/blog/2020/08/12/scaling-prometheus-how-were-pushing-cortex-blocks-storage-to-its-limit-and-beyond/)"
- Jul 2020 blog post "[How blocks storage in Cortex reduces operational complexity for running Prometheus at massive scale](https://grafana.com/blog/2020/07/29/how-blocks-storage-in-cortex-reduces-operational-complexity-for-running-prometheus-at-massive-scale/)"
- Mar 2020 blog post "[Cortex: Zone Aware Replication](https://kenhaines.net/cortex-zone-aware-replication/)"
- Mar 2020 blog post "[How we're using gossip to improve Cortex and Loki availability](https://grafana.com/blog/2020/03/25/how-were-using-gossip-to-improve-cortex-and-loki-availability/)"
@@ -157,7 +157,7 @@ Join us in shaping the future of Cortex, and let's build something amazing toget
### Amazon Managed Service for Prometheus (AMP)
-[Amazon Managed Service for Prometheus (AMP)](https://aws.amazon.com/prometheus/) is a Prometheus-compatible monitoring service that makes it easy to monitor containerized applications at scale. It is a highly available, secure, and managed monitoring for your containers. Get started [here](https://console.aws.amazon.com/prometheus/home). To learn more about the AMP, reference our [documentation](https://docs.aws.amazon.com/prometheus/latest/userguide/what-is-Amazon-Managed-Service-Prometheus.html) and [Getting Started with AMP blog](https://aws.amazon.com/blogs/mt/getting-started-amazon-managed-service-for-prometheus/).
+[Amazon Managed Service for Prometheus (AMP)](https://aws.amazon.com/prometheus/) is a Prometheus-compatible monitoring service that makes it easy to monitor containerized applications at scale. It is a highly available, secure, and managed monitoring service for your containers. Get started [here](https://console.aws.amazon.com/prometheus/home). To learn more about AMP, reference our [documentation](https://docs.aws.amazon.com/prometheus/latest/userguide/what-is-Amazon-Managed-Service-Prometheus.html) and [Getting Started with AMP blog](https://aws.amazon.com/blogs/mt/getting-started-amazon-managed-service-for-prometheus/).
## Emeritus Maintainers
diff --git a/docs/architecture.md b/docs/architecture.md
index bbb2ed7ae08..b532d83239a 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -21,9 +21,9 @@ Incoming samples (writes from Prometheus) are handled by the [distributor](#dist
## Blocks storage
-The blocks storage is based on [Prometheus TSDB](https://prometheus.io/docs/prometheus/latest/storage/): it stores each tenant's time series into their own TSDB which write out their series to a on-disk Block (defaults to 2h block range periods). Each Block is composed by a few files storing the chunks and the block index.
+The blocks storage is based on [Prometheus TSDB](https://prometheus.io/docs/prometheus/latest/storage/): it stores each tenant's time series into their own TSDB which writes out their series to an on-disk Block (defaults to 2h block range periods). Each Block is composed of a few files storing the chunks and the block index.
-The TSDB chunk files contain the samples for multiple series. The series inside the Chunks are then indexed by a per-block index, which indexes metric names and labels to time series in the chunk files.
+The TSDB chunk files contain the samples for multiple series. The series inside the chunks are then indexed by a per-block index, which indexes metric names and labels to time series in the chunk files.
The blocks storage doesn't require a dedicated storage backend for the index. The only requirement is an object store for the Block files, which can be:
@@ -60,7 +60,7 @@ The **distributor** service is responsible for handling incoming samples from Pr
The validation done by the distributor includes:
-- The metric labels name are formally correct
+- The metric label names are formally correct
- The configured max number of labels per metric is respected
- The configured max length of a label name and value is respected
- The timestamp is not older/newer than the configured min/max time range
@@ -80,7 +80,7 @@ The supported KV stores for the HA tracker are:
* [Consul](https://www.consul.io)
* [Etcd](https://etcd.io)
-Note: Memberlist is not supported. Memberlist-based KV store propagates updates using gossip, which is very slow for HA purposes: result is that different distributors may see different Prometheus server as elected HA replica, which is definitely not desirable.
+Note: Memberlist is not supported. Memberlist-based KV store propagates updates using gossip, which is very slow for HA purposes: the result is that different distributors may see different Prometheus servers as the elected HA replica, which is definitely not desirable.
For more information, please refer to [config for sending HA pairs data to Cortex](guides/ha-pair-handling.md) in the documentation.
@@ -97,11 +97,11 @@ The trade-off associated with the latter is that writes are more balanced across
#### The hash ring
-A hash ring (stored in a key-value store) is used to achieve consistent hashing for the series sharding and replication across the ingesters. All [ingesters](#ingester) register themselves into the hash ring with a set of tokens they own; each token is a random unsigned 32-bit number. Each incoming series is [hashed](#hashing) in the distributor and then pushed to the ingester owning the tokens range for the series hash number plus N-1 subsequent ingesters in the ring, where N is the replication factor.
+A hash ring (stored in a key-value store) is used to achieve consistent hashing for the series sharding and replication across the ingesters. All [ingesters](#ingester) register themselves into the hash ring with a set of tokens they own; each token is a random unsigned 32-bit number. Each incoming series is [hashed](#hashing) in the distributor and then pushed to the ingester owning the token's range for the series hash number plus N-1 subsequent ingesters in the ring, where N is the replication factor.
To do the hash lookup, distributors find the smallest appropriate token whose value is larger than the [hash of the series](#hashing). When the replication factor is larger than 1, the next subsequent tokens (clockwise in the ring) that belong to different ingesters will also be included in the result.
-The effect of this hash set up is that each token that an ingester owns is responsible for a range of hashes. If there are three tokens with values 0, 25, and 50, then a hash of 3 would be given to the ingester that owns the token 25; the ingester owning token 25 is responsible for the hash range of 1-25.
+The effect of this hash setup is that each token that an ingester owns is responsible for a range of hashes. If there are three tokens with values 0, 25, and 50, then a hash of 3 would be given to the ingester that owns token 25; the ingester owning token 25 is responsible for the hash range of 1-25.
The supported KV stores for the hash ring are:
@@ -111,7 +111,7 @@ The supported KV stores for the hash ring are:
#### Quorum consistency
-Since all distributors share access to the same hash ring, write requests can be sent to any distributor and you can setup a stateless load balancer in front of it.
+Since all distributors share access to the same hash ring, write requests can be sent to any distributor and you can set up a stateless load balancer in front of it.
To ensure consistent query results, Cortex uses [Dynamo-style](https://www.allthingsdistributed.com/files/amazon-dynamo-sosp2007.pdf) quorum consistency on reads and writes. This means that the distributor will wait for a positive response of at least one half plus one of the ingesters to send the sample to before successfully responding to the Prometheus write request.
@@ -125,35 +125,35 @@ The **ingester** service is responsible for writing incoming series to a [long-t
Incoming series are not immediately written to the storage but kept in memory and periodically flushed to the storage (by default, 2 hours). For this reason, the [queriers](#querier) may need to fetch samples both from ingesters and long-term storage while executing a query on the read path.
-Ingesters contain a **lifecycler** which manages the lifecycle of an ingester and stores the **ingester state** in the [hash ring](#the-hash-ring). Each ingester could be in one of the following states:
+Ingesters contain a **lifecycler** which manages the lifecycle of an ingester and stores the **ingester state** in the [hash ring](#the-hash-ring). Each ingester can be in one of the following states:
- **`PENDING`**
- The ingester has just started. While in this state, the ingester doesn't receive neither write and read requests.
+ The ingester has just started. While in this state, the ingester doesn't receive either write or read requests.
- **`JOINING`**
- The ingester is starting up and joining the ring. While in this state the ingester doesn't receive neither write and read requests. The ingester will join the ring using tokens loaded from disk (if `-ingester.tokens-file-path` is configured) or generate a set of new random ones. Finally, the ingester optionally observes the ring for tokens conflicts and then, once any conflict is resolved, will move to `ACTIVE` state.
+ The ingester is starting up and joining the ring. While in this state the ingester doesn't receive either write or read requests. The ingester will join the ring using tokens loaded from disk (if `-ingester.tokens-file-path` is configured) or generate a set of new random ones. Finally, the ingester optionally observes the ring for token conflicts and then, once any conflict is resolved, will move to `ACTIVE` state.
- **`ACTIVE`**
The ingester is up and running. While in this state the ingester can receive both write and read requests.
- **`LEAVING`**
- The ingester is shutting down and leaving the ring. While in this state the ingester doesn't receive write requests, while it could receive read requests.
+ The ingester is shutting down and leaving the ring. While in this state the ingester doesn't receive write requests, while it can still receive read requests.
- **`UNHEALTHY`**
The ingester has failed to heartbeat to the ring's KV Store. While in this state, distributors skip the ingester while building the replication set for incoming series and the ingester does not receive write or read requests.
Ingesters are **semi-stateful**.
-#### Ingesters failure and data loss
+#### Ingester failure and data loss
If an ingester process crashes or exits abruptly, all the in-memory series that have not yet been flushed to the long-term storage will be lost. There are two main ways to mitigate this failure mode:
1. Replication
2. Write-ahead log (WAL)
-The **replication** is used to hold multiple (typically 3) replicas of each time series in the ingesters. If the Cortex cluster loses an ingester, the in-memory series held by the lost ingester are also replicated to at least another ingester. In the event of a single ingester failure, no time series samples will be lost. However, in the event of multiple ingester failures, time series may be potentially lost if the failures affect all the ingesters holding the replicas of a specific time series.
+The **replication** is used to hold multiple (typically 3) replicas of each time series in the ingesters. If the Cortex cluster loses an ingester, the in-memory series held by the lost ingester are also replicated to at least one other ingester. In the event of a single ingester failure, no time series samples will be lost. However, in the event of multiple ingester failures, time series may be potentially lost if the failures affect all the ingesters holding the replicas of a specific time series.
The **write-ahead log** (WAL) is used to write to a persistent disk all incoming series samples until they're flushed to the long-term storage. In the event of an ingester failure, a subsequent process restart will replay the WAL and recover the in-memory series samples.
-Contrary to the sole replication and given the persistent disk data is not lost, in the event of multiple ingesters failure each ingester will recover the in-memory series samples from WAL upon subsequent restart. The replication is still recommended in order to ensure no temporary failures on the read path in the event of a single ingester failure.
+Contrary to the sole replication and given that the persistent disk data is not lost, in the event of multiple ingester failures each ingester will recover the in-memory series samples from WAL upon subsequent restart. The replication is still recommended in order to ensure no temporary failures on the read path in the event of a single ingester failure.
-#### Ingesters write de-amplification
+#### Ingester write de-amplification
Ingesters store recently received samples in-memory in order to perform write de-amplification. If the ingesters would immediately write received samples to the long-term storage, the system would be very difficult to scale due to the very high pressure on the storage. For this reason, the ingesters batch and compress samples in-memory and periodically flush them out to the storage.
@@ -169,10 +169,10 @@ Queriers are **stateless** and can be scaled up and down as needed.
### Compactor
-The **compactor** is a service which is responsible to:
+The **compactor** is a service which is responsible for:
-- Compact multiple blocks of a given tenant into a single optimized larger block. This helps to reduce storage costs (deduplication, index size reduction), and increase query speed (querying fewer blocks is faster).
-- Keep the per-tenant bucket index updated. The [bucket index](./blocks-storage/bucket-index.md) is used by [queriers](./blocks-storage/querier.md), [store-gateways](#store-gateway) and rulers to discover new blocks in the storage.
+- Compacting multiple blocks of a given tenant into a single optimized larger block. This helps to reduce storage costs (deduplication, index size reduction), and increase query speed (querying fewer blocks is faster).
+- Keeping the per-tenant bucket index updated. The [bucket index](./blocks-storage/bucket-index.md) is used by [queriers](./blocks-storage/querier.md), [store-gateways](#store-gateway) and rulers to discover new blocks in the storage.
For more information, see the [compactor documentation](./blocks-storage/compactor.md).
@@ -190,7 +190,7 @@ The store gateway is **semi-stateful**.
### Query frontend
-The **query frontend** is an **optional service** providing the querier's API endpoints and can be used to accelerate the read path. When the query frontend is in place, incoming query requests should be directed to the query frontend instead of the queriers. The querier service will be still required within the cluster, in order to execute the actual queries.
+The **query frontend** is an **optional service** providing the querier's API endpoints and can be used to accelerate the read path. When the query frontend is in place, incoming query requests should be directed to the query frontend instead of the queriers. The querier service will still be required within the cluster, in order to execute the actual queries.
The query frontend internally performs some query adjustments and holds queries in an internal queue. In this setup, queriers act as workers which pull jobs from the queue, execute them, and return them to the query-frontend for aggregation. Queriers need to be configured with the query frontend address (via the `-querier.frontend-address` CLI flag) in order to allow them to connect to the query frontends.
@@ -199,15 +199,15 @@ Query frontends are **stateless**. However, due to how the internal queue works,
Flow of the query in the system when using query-frontend:
1) Query is received by query frontend, which can optionally split it or serve from the cache.
-2) Query frontend stores the query into in-memory queue, where it waits for some querier to pick it up.
+2) Query frontend stores the query into an in-memory queue, where it waits for some querier to pick it up.
3) Querier picks up the query, and executes it.
4) Querier sends result back to query-frontend, which then forwards it to the client.
-Query frontend can also be used with any Prometheus-API compatible service. In this mode Cortex can be used as an query accelerator with it's caching and splitting features on other prometheus query engines like Thanos Querier or your own Prometheus server. Query frontend needs to be configured with downstream url address(via the `-frontend.downstream-url` CLI flag), which is the endpoint of the prometheus server intended to be connected with Cortex.
+Query frontend can also be used with any Prometheus-API compatible service. In this mode Cortex can be used as a query accelerator with its caching and splitting features on other prometheus query engines like Thanos Querier or your own Prometheus server. Query frontend needs to be configured with downstream url address (via the `-frontend.downstream-url` CLI flag), which is the endpoint of the prometheus server intended to be connected with Cortex.
#### Queueing
-The query frontend queuing mechanism is used to:
+The query frontend queueing mechanism is used to:
* Ensure that large queries, that could cause an out-of-memory (OOM) error in the querier, will be retried on failure. This allows administrators to under-provision memory for queries, or optimistically run more small queries in parallel, which helps to reduce the total cost of ownership (TCO).
* Prevent multiple large requests from being convoyed on a single querier by distributing them across all queriers using a first-in/first-out queue (FIFO).
@@ -223,7 +223,7 @@ The query frontend supports caching query results and reuses them on subsequent
### Query Scheduler
-Query Scheduler is an **optional** service that moves the internal queue from query frontend into separate component.
+Query Scheduler is an **optional** service that moves the internal queue from query frontend into a separate component.
This enables independent scaling of query frontends and number of queues (query scheduler).
In order to use query scheduler, both query frontend and queriers must be configured with query scheduler address
@@ -232,10 +232,10 @@ In order to use query scheduler, both query frontend and queriers must be config
Flow of the query in the system changes when using query scheduler:
1) Query is received by query frontend, which can optionally split it or serve from the cache.
-2) Query frontend forwards the query to random query scheduler process.
-3) Query scheduler stores the query into in-memory queue, where it waits for some querier to pick it up.
-3) Querier picks up the query, and executes it.
-4) Querier sends result back to query-frontend, which then forwards it to the client.
+2) Query frontend forwards the query to a random query scheduler process.
+3) Query scheduler stores the query into an in-memory queue, where it waits for some querier to pick it up.
+4) Querier picks up the query, and executes it.
+5) Querier sends result back to query-frontend, which then forwards it to the client.
Query schedulers are **stateless**. It is recommended to run two replicas to make sure queries can still be serviced while one replica is restarting.
@@ -263,7 +263,7 @@ If all of the alertmanager nodes failed simultaneously there would be a loss of
### Configs API
The **configs API** is an **optional service** managing the configuration of Rulers and Alertmanagers.
-It provides APIs to get/set/update the ruler and alertmanager configurations and store them into backend.
-Current supported backend are PostgreSQL and in-memory.
+It provides APIs to get/set/update the ruler and alertmanager configurations and store them in the backend.
+Current supported backends are PostgreSQL and in-memory.
Configs API is **stateless**.
diff --git a/docs/blocks-storage/querier.md b/docs/blocks-storage/querier.md
index ac9dd92d291..04d74307420 100644
--- a/docs/blocks-storage/querier.md
+++ b/docs/blocks-storage/querier.md
@@ -252,11 +252,22 @@ querier:
# CLI flag: -querier.shuffle-sharding-ingesters-lookback-period
[shuffle_sharding_ingesters_lookback_period: | default = 0s]
- # Experimental. Use Thanos promql engine
- # https://github.com/thanos-io/promql-engine rather than the Prometheus promql
- # engine.
- # CLI flag: -querier.thanos-engine
- [thanos_engine: | default = false]
+ thanos_engine:
+ # Experimental. Use Thanos promql engine
+ # https://github.com/thanos-io/promql-engine rather than the Prometheus
+ # promql engine.
+ # CLI flag: -querier.thanos-engine
+ [enabled: | default = false]
+
+ # Enable xincrease, xdelta, xrate etc from Thanos engine.
+ # CLI flag: -querier.enable-x-functions
+ [enable_x_functions: | default = false]
+
+ # Logical plan optimizers. Multiple optimizers can be provided as a
+ # comma-separated list. Supported values: default, all, propagate-matchers,
+ # sort-matchers, merge-selects, detect-histogram-stats
+ # CLI flag: -querier.optimizers
+ [optimizers: | default = "default"]
# If enabled, ignore max query length check at Querier select method. Users
# can choose to ignore it since the validation can be done before Querier
diff --git a/docs/configuration/arguments.md b/docs/configuration/arguments.md
index 943d319aee3..a99fe4daced 100644
--- a/docs/configuration/arguments.md
+++ b/docs/configuration/arguments.md
@@ -73,7 +73,7 @@ The next three options only apply when the querier is used together with the Que
- `-frontend.forward-headers-list`
- Request headers forwarded by query frontend to downstream queriers. Multiple headers may be specified. Defaults to empty.
+ Request headers forwarded by query frontend to downstream queriers. Multiple headers may be specified. Defaults to empty.
- `-frontend.max-cache-freshness`
@@ -113,7 +113,7 @@ The next three options only apply when the querier is used together with the Que
Enable the distributors HA tracker so that it can accept samples from Prometheus HA replicas gracefully (requires labels). Global (for distributors), this ensures that the necessary internal data structures for the HA handling are created. The option `enable-for-all-users` is still needed to enable ingestion of HA samples for all users.
- `distributor.drop-label`
- This flag can be used to specify label names that to drop during sample ingestion within the distributor and can be repeated in order to drop multiple labels.
+ This flag can be used to specify label names to drop during sample ingestion within the distributor and can be repeated in order to drop multiple labels.
### Ring/HA Tracker Store
@@ -123,7 +123,7 @@ The KVStore client is used by both the Ring and HA Tracker (HA Tracker doesn't s
- `{ring,distributor.ha-tracker}.store`
Backend storage to use for the HA Tracker (consul, etcd, inmemory, multi).
- **Warning:** The `inmemory` store will not work correctly with multiple distributors as each distributor can have a different state, causing injestion errors.
+ **Warning:** The `inmemory` store will not work correctly with multiple distributors as each distributor can have a different state, causing ingestion errors.
- `{ring,distributor.ring}.store`
Backend storage to use for the Ring (consul, etcd, inmemory, memberlist, multi).
@@ -162,8 +162,8 @@ prefix these flags with `distributor.ha-tracker.`
The trusted CA file path.
- `etcd.tls-insecure-skip-verify`
Skip validating server certificate.
-- `etcd.ping-without-stream-allowd'`
- Enable/Disable PermitWithoutStream parameter
+- `etcd.ping-without-stream-allowed`
+ Enable/Disable PermitWithoutStream parameter
#### memberlist
@@ -178,7 +178,7 @@ All nodes run the following two loops:
1. Every "gossip interval", pick random "gossip nodes" number of nodes, and send recent ring updates to them.
2. Every "push/pull sync interval", choose random single node, and exchange full ring information with it (push/pull sync). After this operation, rings on both nodes are the same.
-When a node receives a ring update, node will merge it into its own ring state, and if that resulted in a change, node will add that update to the list of gossiped updates.
+When a node receives a ring update, the node will merge it into its own ring state, and if that resulted in a change, the node will add that update to the list of gossiped updates.
Such update will be gossiped `R * log(N+1)` times by this node (R = retransmit multiplication factor, N = number of gossiping nodes in the cluster).
If you find the propagation to be too slow, there are some tuning possibilities (default values are memberlist settings for LAN networks):
@@ -187,14 +187,14 @@ If you find the propagation to be too slow, there are some tuning possibilities
- Decrease push/pull sync interval (default 30s)
- Increase retransmit multiplication factor (default 4)
-To find propagation delay, you can use `cortex_ring_oldest_member_timestamp{state="ACTIVE"}` metric.
+To find propagation delay, you can use the `cortex_ring_oldest_member_timestamp{state="ACTIVE"}` metric.
Flags for configuring KV store based on memberlist library:
- `memberlist.nodename`
Name of the node in memberlist cluster. Defaults to hostname.
- `memberlist.randomize-node-name`
- This flag adds extra random suffix to the node name used by memberlist. Defaults to true. Using random suffix helps to prevent issues when running multiple memberlist nodes on the same machine, or when node names are reused (eg. in stateful sets).
+ This flag adds an extra random suffix to the node name used by memberlist. Defaults to true. Using a random suffix helps to prevent issues when running multiple memberlist nodes on the same machine, or when node names are reused (e.g. in stateful sets).
- `memberlist.retransmit-factor`
Multiplication factor used when sending out messages (factor * log(N+1)). If not set, default value is used.
- `memberlist.join`
@@ -228,29 +228,29 @@ Flags for configuring KV store based on memberlist library:
- `memberlist.gossip-to-dead-nodes-time`
How long to keep gossiping to the nodes that seem to be dead. After this time, dead node is removed from list of nodes. If "dead" node appears again, it will simply join the cluster again, if its name is not reused by other node in the meantime. If the name has been reused, such a reanimated node will be ignored by other members.
- `memberlist.dead-node-reclaim-time`
- How soon can dead's node name be reused by a new node (using different IP). Disabled by default, name reclaim is not allowed until `gossip-to-dead-nodes-time` expires. This can be useful to set to low numbers when reusing node names, eg. in stateful sets.
- If memberlist library detects that new node is trying to reuse the name of previous node, it will log message like this: `Conflicting address for ingester-6. Mine: 10.44.12.251:7946 Theirs: 10.44.12.54:7946 Old state: 2`. Node states are: "alive" = 0, "suspect" = 1 (doesn't respond, will be marked as dead if it doesn't respond), "dead" = 2.
+ How soon can a dead node's name be reused by a new node (using different IP). Disabled by default, name reclaim is not allowed until `gossip-to-dead-nodes-time` expires. This can be useful to set to low numbers when reusing node names, e.g. in stateful sets.
+ If memberlist library detects that a new node is trying to reuse the name of a previous node, it will log a message like this: `Conflicting address for ingester-6. Mine: 10.44.12.251:7946 Theirs: 10.44.12.54:7946 Old state: 2`. Node states are: "alive" = 0, "suspect" = 1 (doesn't respond, will be marked as dead if it doesn't respond), "dead" = 2.
#### Multi KV
-This is a special key-value implementation that uses two different KV stores (eg. consul, etcd or memberlist). One of them is always marked as primary, and all reads and writes go to primary store. Other one, secondary, is only used for writes. The idea is that operator can use multi KV store to migrate from primary to secondary store in runtime.
+This is a special key-value implementation that uses two different KV stores (e.g. consul, etcd or memberlist). One of them is always marked as primary, and all reads and writes go to the primary store. The other one, secondary, is only used for writes. The idea is that an operator can use multi KV store to migrate from primary to secondary store at runtime.
For example, migration from Consul to Etcd would look like this:
- Set `ring.store` to use `multi` store. Set `-multi.primary=consul` and `-multi.secondary=etcd`. All consul and etcd settings must still be specified.
-- Start all Cortex microservices. They will still use Consul as primary KV, but they will also write share ring via etcd.
-- Operator can now use "runtime config" mechanism to switch primary store to etcd.
-- After all Cortex microservices have picked up new primary store, and everything looks correct, operator can now shut down Consul, and modify Cortex configuration to use `-ring.store=etcd` only.
+- Start all Cortex microservices. They will still use Consul as primary KV, but they will also share the ring via etcd.
+- Operator can now use the "runtime config" mechanism to switch primary store to etcd.
+- After all Cortex microservices have picked up the new primary store, and everything looks correct, operator can now shut down Consul, and modify Cortex configuration to use `-ring.store=etcd` only.
- At this point, Consul can be shut down.
-Multi KV has following parameters:
+Multi KV has the following parameters:
- `multi.primary` - name of primary KV store. Same values as in `ring.store` are supported, except `multi`.
- `multi.secondary` - name of secondary KV store.
- `multi.mirror-enabled` - enable mirroring of values to secondary store, defaults to true
-- `multi.mirror-timeout` - wait max this time to write to secondary store to finish. Default to 2 seconds. Errors writing to secondary store are not reported to caller, but are logged and also reported via `cortex_multikv_mirror_write_errors_total` metric.
+- `multi.mirror-timeout` - wait max this time for write to secondary store to finish. Defaults to 2 seconds. Errors writing to secondary store are not reported to caller, but are logged and also reported via `cortex_multikv_mirror_write_errors_total` metric.
-Multi KV also reacts on changes done via runtime configuration. It uses this section:
+Multi KV also reacts to changes done via runtime configuration. It uses this section:
```yaml
multi_kv_config:
@@ -268,7 +268,7 @@ HA tracking has two of its own flags:
- `distributor.ha-tracker.replica`
Prometheus label to look for in samples to identify a Prometheus HA replica. (default "`__replica__`")
-It's reasonable to assume people probably already have a `cluster` label, or something similar. If not, they should add one along with `__replica__` via external labels in their Prometheus config. If you stick to these default values your Prometheus config could look like this (`POD_NAME` is an environment variable which must be set by you):
+It's reasonable to assume people probably already have a `cluster` label, or something similar. If not, they should add one along with `__replica__` via external labels in their Prometheus config. If you stick to these default values, your Prometheus config could look like this (`POD_NAME` is an environment variable which must be set by you):
```yaml
global:
@@ -277,9 +277,9 @@ global:
__replica__: $POD_NAME
```
-HA Tracking looks for the two labels (which can be overwritten per user)
+HA Tracking looks for the two labels (which can be overridden per user).
-It also talks to a KVStore and has it's own copies of the same flags used by the Distributor to connect to for the ring.
+It also talks to a KVStore and has its own copies of the same flags used by the Distributor to connect to the ring.
- `distributor.ha-tracker.failover-timeout`
If we don't receive any samples from the accepted replica for a cluster in this amount of time we will failover to the next replica we receive a sample from. This value must be greater than the update timeout (default 30s)
- `distributor.ha-tracker.store`
@@ -307,9 +307,9 @@ It also talks to a KVStore and has it's own copies of the same flags used by the
## Runtime Configuration file
-Cortex has a concept of "runtime config" file, which is simply a file that is reloaded while Cortex is running. It is used by some Cortex components to allow operator to change some aspects of Cortex configuration without restarting it. File is specified by using `-runtime-config.file=` flag and reload period (which defaults to 10 seconds) can be changed by `-runtime-config.reload-period=` flag. Previously this mechanism was only used by limits overrides, and flags were called `-limits.per-user-override-config=` and `-limits.per-user-override-period=10s` respectively. These are still used, if `-runtime-config.file=` is not specified.
+Cortex has a concept of "runtime config" file, which is simply a file that is reloaded while Cortex is running. It is used by some Cortex components to allow an operator to change some aspects of Cortex configuration without restarting it. The file is specified by using the `-runtime-config.file=` flag and reload period (which defaults to 10 seconds) can be changed by the `-runtime-config.reload-period=` flag. Previously this mechanism was only used by limits overrides, and flags were called `-limits.per-user-override-config=` and `-limits.per-user-override-period=10s` respectively. These are still used, if `-runtime-config.file=` is not specified.
-At the moment runtime configuration may contain per-user limits, multi KV store, and ingester instance limits.
+At the moment, runtime configuration may contain per-user limits, multi KV store, and ingester instance limits.
Example runtime configuration file:
@@ -333,15 +333,15 @@ ingester_limits:
max_inflight_push_requests: 10000
```
-When running Cortex on Kubernetes, store this file in a config map and mount it in each services' containers. When changing the values there is no need to restart the services, unless otherwise specified.
+When running Cortex on Kubernetes, store this file in a config map and mount it in each service's container. When changing the values there is no need to restart the services, unless otherwise specified.
The `/runtime_config` endpoint returns the whole runtime configuration, including the overrides. In case you want to get only the non-default values of the configuration you can pass the `mode` parameter with the `diff` value.
-## Ingester, Distributor & Querier limits.
+## Ingester, Distributor & Querier limits
-Cortex implements various limits on the requests it can process, in order to prevent a single tenant overwhelming the cluster. There are various default global limits which apply to all tenants which can be set on the command line. These limits can also be overridden on a per-tenant basis by using `overrides` field of runtime configuration file.
+Cortex implements various limits on the requests it can process, in order to prevent a single tenant from overwhelming the cluster. There are various default global limits which apply to all tenants which can be set on the command line. These limits can also be overridden on a per-tenant basis by using the `overrides` field of the runtime configuration file.
-The `overrides` field is a map of tenant ID (same values as passed in the `X-Scope-OrgID` header) to the various limits. An example could look like:
+The `overrides` field is a map of tenant ID (same values as passed in the `X-Scope-OrgID` header) to the various limits. An example could look like:
```yaml
overrides:
@@ -363,9 +363,9 @@ Valid per-tenant limits are (with their corresponding flags for default values):
The per-tenant rate limit (and burst size), in samples per second. It supports two strategies: `local` (default) and `global`.
- The `local` strategy enforces the limit on a per distributor basis, actual effective rate limit will be N times higher, where N is the number of distributor replicas.
+ The `local` strategy enforces the limit on a per distributor basis; the actual effective rate limit will be N times higher, where N is the number of distributor replicas.
- The `global` strategy enforces the limit globally, configuring a per-distributor local rate limiter as `ingestion_rate / N`, where N is the number of distributor replicas (it's automatically adjusted if the number of replicas change). The `ingestion_burst_size` refers to the per-distributor local rate limiter (even in the case of the `global` strategy) and should be set at least to the maximum number of samples expected in a single push request. For this reason, the `global` strategy requires that push requests are evenly distributed across the pool of distributors; if you use a load balancer in front of the distributors you should be already covered, while if you have a custom setup (ie. an authentication gateway in front) make sure traffic is evenly balanced across distributors.
+ The `global` strategy enforces the limit globally, configuring a per-distributor local rate limiter as `ingestion_rate / N`, where N is the number of distributor replicas (it's automatically adjusted if the number of replicas changes). The `ingestion_burst_size` refers to the per-distributor local rate limiter (even in the case of the `global` strategy) and should be set at least to the maximum number of samples expected in a single push request. For this reason, the `global` strategy requires that push requests are evenly distributed across the pool of distributors; if you use a load balancer in front of the distributors you should already be covered, while if you have a custom setup (i.e. an authentication gateway in front) make sure traffic is evenly balanced across distributors.
The `global` strategy requires the distributors to form their own ring, which is used to keep track of the current number of healthy distributor replicas. The ring is configured by `distributor: { ring: {}}` / `-distributor.ring.*`.
@@ -373,37 +373,37 @@ Valid per-tenant limits are (with their corresponding flags for default values):
- `max_label_value_length` / `-validation.max-length-label-value`
- `max_label_names_per_series` / `-validation.max-label-names-per-series`
- Also enforced by the distributor, limits on the on length of labels and their values, and the total number of labels allowed per series.
+ Also enforced by the distributor; limits on the length of labels and their values, and the total number of labels allowed per series.
- `reject_old_samples` / `-validation.reject-old-samples`
- `reject_old_samples_max_age` / `-validation.reject-old-samples.max-age`
- `creation_grace_period` / `-validation.create-grace-period`
- Also enforce by the distributor, limits on how far in the past (and future) timestamps that we accept can be.
+ Also enforced by the distributor; limits on how far in the past (and future) timestamps that we accept can be.
- `max_series_per_user` / `-ingester.max-series-per-user`
- `max_series_per_metric` / `-ingester.max-series-per-metric`
- Enforced by the ingesters; limits the number of active series a user (or a given metric) can have. When running with `-distributor.shard-by-all-labels=false` (the default), this limit will enforce the maximum number of series a metric can have 'globally', as all series for a single metric will be sent to the same replication set of ingesters. This is not the case when running with `-distributor.shard-by-all-labels=true`, so the actual limit will be N/RF times higher, where N is number of ingester replicas and RF is configured replication factor.
+ Enforced by the ingesters; limits the number of active series a user (or a given metric) can have. When running with `-distributor.shard-by-all-labels=false` (the default), this limit will enforce the maximum number of series a metric can have 'globally', as all series for a single metric will be sent to the same replication set of ingesters. This is not the case when running with `-distributor.shard-by-all-labels=true`, so the actual limit will be N/RF times higher, where N is the number of ingester replicas and RF is the configured replication factor.
- `max_global_series_per_user` / `-ingester.max-global-series-per-user`
- `max_global_series_per_metric` / `-ingester.max-global-series-per-metric`
- Like `max_series_per_user` and `max_series_per_metric`, but the limit is enforced across the cluster. Each ingester is configured with a local limit based on the replication factor, the `-distributor.shard-by-all-labels` setting and the current number of healthy ingesters, and is kept updated whenever the number of ingesters change.
+ Like `max_series_per_user` and `max_series_per_metric`, but the limit is enforced across the cluster. Each ingester is configured with a local limit based on the replication factor, the `-distributor.shard-by-all-labels` setting and the current number of healthy ingesters, and is kept updated whenever the number of ingesters changes.
Requires `-distributor.replication-factor`, `-distributor.shard-by-all-labels`, `-distributor.sharding-strategy` and `-distributor.zone-awareness-enabled` set for the ingesters too.
- `max_metadata_per_user` / `-ingester.max-metadata-per-user`
- `max_metadata_per_metric` / `-ingester.max-metadata-per-metric`
- Enforced by the ingesters; limits the number of active metadata a user (or a given metric) can have. When running with `-distributor.shard-by-all-labels=false` (the default), this limit will enforce the maximum number of metadata a metric can have 'globally', as all metadata for a single metric will be sent to the same replication set of ingesters. This is not the case when running with `-distributor.shard-by-all-labels=true`, so the actual limit will be N/RF times higher, where N is number of ingester replicas and RF is configured replication factor.
+ Enforced by the ingesters; limits the number of active metadata a user (or a given metric) can have. When running with `-distributor.shard-by-all-labels=false` (the default), this limit will enforce the maximum number of metadata a metric can have 'globally', as all metadata for a single metric will be sent to the same replication set of ingesters. This is not the case when running with `-distributor.shard-by-all-labels=true`, so the actual limit will be N/RF times higher, where N is the number of ingester replicas and RF is the configured replication factor.
- `max_fetched_series_per_query` / `querier.max-fetched-series-per-query`
- When running Cortex with blocks storage this limit is enforced in the queriers on unique series fetched from ingesters and store-gateways (long-term storage).
+ When running Cortex with blocks storage, this limit is enforced in the queriers on unique series fetched from ingesters and store-gateways (long-term storage).
- `max_global_metadata_per_user` / `-ingester.max-global-metadata-per-user`
- `max_global_metadata_per_metric` / `-ingester.max-global-metadata-per-metric`
- Like `max_metadata_per_user` and `max_metadata_per_metric`, but the limit is enforced across the cluster. Each ingester is configured with a local limit based on the replication factor, the `-distributor.shard-by-all-labels` setting and the current number of healthy ingesters, and is kept updated whenever the number of ingesters change.
+ Like `max_metadata_per_user` and `max_metadata_per_metric`, but the limit is enforced across the cluster. Each ingester is configured with a local limit based on the replication factor, the `-distributor.shard-by-all-labels` setting and the current number of healthy ingesters, and is kept updated whenever the number of ingesters changes.
Requires `-distributor.replication-factor`, `-distributor.shard-by-all-labels`, `-distributor.sharding-strategy` and `-distributor.zone-awareness-enabled` set for the ingesters too.
@@ -423,25 +423,25 @@ ingester_limits:
Valid ingester instance limits are (with their corresponding flags):
-- `max_ingestion_rate` \ `--ingester.instance-limits.max-ingestion-rate`
+- `max_ingestion_rate` / `--ingester.instance-limits.max-ingestion-rate`
Limit the ingestion rate in samples per second for an ingester. When this limit is reached, new requests will fail with an HTTP 500 error.
-- `max_series` \ `-ingester.instance-limits.max-series`
+- `max_series` / `-ingester.instance-limits.max-series`
Limit the total number of series that an ingester keeps in memory, across all users. When this limit is reached, requests that create new series will fail with an HTTP 500 error.
-- `max_tenants` \ `-ingester.instance-limits.max-tenants`
+- `max_tenants` / `-ingester.instance-limits.max-tenants`
Limit the maximum number of users an ingester will accept metrics for. When this limit is reached, requests from new users will fail with an HTTP 500 error.
-- `max_inflight_push_requests` \ `-ingester.instance-limits.max-inflight-push-requests`
+- `max_inflight_push_requests` / `-ingester.instance-limits.max-inflight-push-requests`
Limit the maximum number of requests being handled by an ingester at once. This setting is critical for preventing ingesters from using an excessive amount of memory during high load or temporary slow downs. When this limit is reached, new requests will fail with an HTTP 500 error.
## DNS Service Discovery
-Some clients in Cortex support service discovery via DNS to find addresses of backend servers to connect to (ie. caching servers). The clients supporting it are:
+Some clients in Cortex support service discovery via DNS to find addresses of backend servers to connect to (i.e. caching servers). The clients supporting it are:
- [Blocks storage's memcached cache](../blocks-storage/store-gateway.md#caching)
- [All caching memcached servers](./config-file-reference.md#memcached-client-config)
@@ -449,7 +449,7 @@ Some clients in Cortex support service discovery via DNS to find addresses of ba
### Supported discovery modes
-The DNS service discovery, inspired from Thanos DNS SD, supports different discovery modes. A discovery mode is selected adding a specific prefix to the address. The supported prefixes are:
+The DNS service discovery, inspired by Thanos DNS SD, supports different discovery modes. A discovery mode is selected by adding a specific prefix to the address. The supported prefixes are:
- **`dns+`**
The domain name after the prefix is looked up as an A/AAAA query. For example: `dns+memcached.local:11211`
@@ -458,13 +458,13 @@ The DNS service discovery, inspired from Thanos DNS SD, supports different disco
- **`dnssrvnoa+`**
The domain name after the prefix is looked up as a SRV query, with no A/AAAA lookup made after that. For example: `dnssrvnoa+_memcached._tcp.memcached.namespace.svc.cluster.local`
-If **no prefix** is provided, the provided IP or hostname will be used straightaway without pre-resolving it.
+If **no prefix** is provided, the provided IP or hostname will be used directly without pre-resolving it.
If you are using a managed memcached service from [Google Cloud](https://cloud.google.com/memorystore/docs/memcached/auto-discovery-overview), or [AWS](https://docs.aws.amazon.com/AmazonElastiCache/latest/mem-ug/AutoDiscovery.HowAutoDiscoveryWorks.html), use the [auto-discovery](./config-file-reference.md#memcached-client-config) flag instead of DNS discovery, then use the discovery/configuration endpoint as the domain name without any prefix.
## Logging of IP of reverse proxy
-If a reverse proxy is used in front of Cortex it might be difficult to troubleshoot errors. The following 3 settings can be used to log the IP address passed along by the reverse proxy in headers like X-Forwarded-For.
+If a reverse proxy is used in front of Cortex, it might be difficult to troubleshoot errors. The following 3 settings can be used to log the IP address passed along by the reverse proxy in headers like X-Forwarded-For.
- `-server.log_source_ips_enabled`
@@ -472,8 +472,8 @@ If a reverse proxy is used in front of Cortex it might be difficult to troublesh
- `-server.log-source-ips-header`
- Header field storing the source IPs. It is only used if `-server.log-source-ips-enabled` is true and if `-server.log-source-ips-regex` is set. If not set the default Forwarded, X-Real-IP or X-Forwarded-For headers are searched.
+ Header field storing the source IPs. It is only used if `-server.log-source-ips-enabled` is true and if `-server.log-source-ips-regex` is set. If not set, the default Forwarded, X-Real-IP or X-Forwarded-For headers are searched.
- `-server.log-source-ips-regex`
- Regular expression for matching the source IPs. It should contain at least one capturing group the first of which will be returned. Only used if `-server.log-source-ips-enabled` is true and if `-server.log-source-ips-header` is set. If not set the default Forwarded, X-Real-IP or X-Forwarded-For headers are searched.
+ Regular expression for matching the source IPs. It should contain at least one capturing group, the first of which will be returned. Only used if `-server.log-source-ips-enabled` is true and if `-server.log-source-ips-header` is set. If not set, the default Forwarded, X-Real-IP or X-Forwarded-For headers are searched.
diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md
index 2963a87348c..0ce98cb65af 100644
--- a/docs/configuration/config-file-reference.md
+++ b/docs/configuration/config-file-reference.md
@@ -4302,11 +4302,22 @@ store_gateway_client:
# CLI flag: -querier.shuffle-sharding-ingesters-lookback-period
[shuffle_sharding_ingesters_lookback_period: | default = 0s]
-# Experimental. Use Thanos promql engine
-# https://github.com/thanos-io/promql-engine rather than the Prometheus promql
-# engine.
-# CLI flag: -querier.thanos-engine
-[thanos_engine: | default = false]
+thanos_engine:
+ # Experimental. Use Thanos promql engine
+ # https://github.com/thanos-io/promql-engine rather than the Prometheus promql
+ # engine.
+ # CLI flag: -querier.thanos-engine
+ [enabled: | default = false]
+
+ # Enable xincrease, xdelta, xrate etc from Thanos engine.
+ # CLI flag: -querier.enable-x-functions
+ [enable_x_functions: | default = false]
+
+ # Logical plan optimizers. Multiple optimizers can be provided as a
+ # comma-separated list. Supported values: default, all, propagate-matchers,
+ # sort-matchers, merge-selects, detect-histogram-stats
+ # CLI flag: -querier.optimizers
+ [optimizers: | default = "default"]
# If enabled, ignore max query length check at Querier select method. Users can
# choose to ignore it since the validation can be done before Querier evaluation
@@ -5023,6 +5034,23 @@ ring:
# ruler.enable-ha-evaluation is true.
# CLI flag: -ruler.liveness-check-timeout
[liveness_check_timeout: | default = 1s]
+
+thanos_engine:
+ # Experimental. Use Thanos promql engine
+ # https://github.com/thanos-io/promql-engine rather than the Prometheus promql
+ # engine.
+ # CLI flag: -ruler.thanos-engine
+ [enabled: | default = false]
+
+ # Enable xincrease, xdelta, xrate etc from Thanos engine.
+ # CLI flag: -ruler.enable-x-functions
+ [enable_x_functions: | default = false]
+
+ # Logical plan optimizers. Multiple optimizers can be provided as a
+ # comma-separated list. Supported values: default, all, propagate-matchers,
+ # sort-matchers, merge-selects, detect-histogram-stats
+ # CLI flag: -ruler.optimizers
+ [optimizers: | default = "default"]
```
### `ruler_storage_config`
diff --git a/integration/parquet_querier_test.go b/integration/parquet_querier_test.go
index ca31a019c9a..570b4c0c45a 100644
--- a/integration/parquet_querier_test.go
+++ b/integration/parquet_querier_test.go
@@ -63,8 +63,9 @@ func TestParquetFuzz(t *testing.T) {
"-store-gateway.sharding-enabled": "false",
"--querier.store-gateway-addresses": "nonExistent", // Make sure we do not call Store gateways
// alert manager
- "-alertmanager.web.external-url": "http://localhost/alertmanager",
- "-frontend.query-vertical-shard-size": "1",
+ "-alertmanager.web.external-url": "http://localhost/alertmanager",
+ // Enable vertical sharding.
+ "-frontend.query-vertical-shard-size": "3",
"-frontend.max-cache-freshness": "1m",
// enable experimental promQL funcs
"-querier.enable-promql-experimental-functions": "true",
@@ -130,16 +131,20 @@ func TestParquetFuzz(t *testing.T) {
// Wait until we convert the blocks
cortex_testutil.Poll(t, 30*time.Second, true, func() interface{} {
found := false
+ foundBucketIndex := false
err := bkt.Iter(context.Background(), "", func(name string) error {
fmt.Println(name)
if name == fmt.Sprintf("parquet-markers/%v-parquet-converter-mark.json", id.String()) {
found = true
}
+ if name == "bucket-index.json.gz" {
+ foundBucketIndex = true
+ }
return nil
}, objstore.WithRecursiveIter())
require.NoError(t, err)
- return found
+ return found && foundBucketIndex
})
att, err := bkt.Attributes(context.Background(), "bucket-index.json.gz")
@@ -178,7 +183,7 @@ func TestParquetFuzz(t *testing.T) {
}
ps := promqlsmith.New(rnd, lbls, opts...)
- runQueryFuzzTestCases(t, ps, c1, c2, end, start, end, scrapeInterval, 500, false)
+ runQueryFuzzTestCases(t, ps, c1, c2, end, start, end, scrapeInterval, 1000, false)
require.NoError(t, cortex.WaitSumMetricsWithOptions(e2e.Greater(0), []string{"cortex_parquet_queryable_blocks_queried_total"}, e2e.WithLabelMatchers(
labels.MustNewMatcher(labels.MatchEqual, "type", "parquet"))))
diff --git a/integration/querier_test.go b/integration/querier_test.go
index 6305b4433c5..a39f47dd277 100644
--- a/integration/querier_test.go
+++ b/integration/querier_test.go
@@ -19,6 +19,7 @@ import (
"github.com/prometheus/prometheus/prompb"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
+ "github.com/thanos-io/promql-engine/execution/parse"
"github.com/cortexproject/cortex/integration/e2e"
e2ecache "github.com/cortexproject/cortex/integration/e2e/cache"
@@ -416,6 +417,7 @@ func TestQuerierWithBlocksStorageRunningInSingleBinaryMode(t *testing.T) {
"-blocks-storage.bucket-store.bucket-index.enabled": strconv.FormatBool(testCfg.bucketIndexEnabled),
"-querier.query-store-for-labels-enabled": "true",
"-querier.thanos-engine": strconv.FormatBool(thanosEngine),
+ "-querier.enable-x-functions": strconv.FormatBool(thanosEngine),
// Ingester.
"-ring.store": "consul",
"-consul.hostname": consul.NetworkHTTPEndpoint(),
@@ -1310,3 +1312,172 @@ func TestQuerierMaxSamplesLimit(t *testing.T) {
Error: "query processing would load too many samples into memory in query execution",
})
}
+
+func TestQuerierEngineConfigs(t *testing.T) {
+ const blockRangePeriod = 5 * time.Second
+
+ s, err := e2e.NewScenario(networkName)
+ require.NoError(t, err)
+ defer s.Close()
+
+ // Configure the blocks storage to frequently compact TSDB head
+ // and ship blocks to the storage.
+ flags := mergeFlags(BlocksStorageFlags(), map[string]string{
+ "-blocks-storage.tsdb.block-ranges-period": blockRangePeriod.String(),
+ "-blocks-storage.tsdb.ship-interval": "1s",
+ "-blocks-storage.tsdb.retention-period": ((blockRangePeriod * 2) - 1).String(),
+ "-querier.thanos-engine": "true",
+ "-querier.enable-x-functions": "true",
+ "-querier.optimizers": "all",
+ })
+
+ // Start dependencies.
+ minio := e2edb.NewMinio(9000, flags["-blocks-storage.s3.bucket-name"])
+ consul := e2edb.NewConsul()
+ require.NoError(t, s.StartAndWaitReady(consul, minio))
+
+ // Start Cortex components for the write path.
+ distributor := e2ecortex.NewDistributor("distributor", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
+ ingester := e2ecortex.NewIngester("ingester", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
+ require.NoError(t, s.StartAndWaitReady(distributor, ingester))
+
+ queryFrontend := e2ecortex.NewQueryFrontendWithConfigFile("query-frontend", "", flags, "")
+ require.NoError(t, s.Start(queryFrontend))
+
+ querier := e2ecortex.NewQuerier("querier", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), mergeFlags(flags, map[string]string{
+ "-querier.frontend-address": queryFrontend.NetworkGRPCEndpoint(),
+ }), "")
+ require.NoError(t, s.StartAndWaitReady(querier))
+
+ // Wait until the distributor and querier has updated the ring.
+ require.NoError(t, distributor.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
+ require.NoError(t, querier.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
+
+ c, err := e2ecortex.NewClient(distributor.HTTPEndpoint(), queryFrontend.HTTPEndpoint(), "", "", "user-1")
+ require.NoError(t, err)
+
+ // Push some series to Cortex.
+ series1Timestamp := time.Now()
+ series1, _ := generateSeries("series_1", series1Timestamp, prompb.Label{Name: "job", Value: "test"})
+ series2, _ := generateSeries("series_2", series1Timestamp, prompb.Label{Name: "job", Value: "test"})
+
+ res, err := c.Push(series1)
+ require.NoError(t, err)
+ require.Equal(t, 200, res.StatusCode)
+ res, err = c.Push(series2)
+ require.NoError(t, err)
+ require.Equal(t, 200, res.StatusCode)
+
+ for xFunc := range parse.XFunctions {
+ result, err := c.Query(fmt.Sprintf(`%s(series_1{job="test"}[1m])`, xFunc), series1Timestamp)
+ require.NoError(t, err)
+ require.Equal(t, model.ValVector, result.Type())
+ }
+
+}
+
+func TestQuerierDistributedExecution(t *testing.T) {
+ // e2e test setup
+ s, err := e2e.NewScenario(networkName)
+ require.NoError(t, err)
+ defer s.Close()
+
+ consul := e2edb.NewConsulWithName("consul")
+ memcached := e2ecache.NewMemcached()
+ require.NoError(t, s.StartAndWaitReady(consul, memcached))
+
+ // initialize the flags
+ baseFlags := mergeFlags(AlertmanagerLocalFlags(), BlocksStorageFlags())
+ flags := mergeFlags(
+ baseFlags,
+ map[string]string{
+ "-blocks-storage.tsdb.head-compaction-interval": "4m",
+ "-blocks-storage.tsdb.block-ranges-period": "2h",
+ "-blocks-storage.tsdb.ship-interval": "1h",
+ "-blocks-storage.bucket-store.sync-interval": "1s",
+ "-blocks-storage.tsdb.retention-period": "24h",
+ "-blocks-storage.bucket-store.index-cache.backend": tsdb.IndexCacheBackendInMemory,
+ "-querier.query-store-for-labels-enabled": "true",
+ // Ingester.
+ "-ring.store": "consul",
+ "-consul.hostname": consul.NetworkHTTPEndpoint(),
+ // Distributor.
+ "-distributor.replication-factor": "1",
+ // Store-gateway.
+ "-store-gateway.sharding-enabled": "false",
+ // Alert manager
+ "-alertmanager.web.external-url": "http://localhost/alertmanager",
+ "-frontend.query-vertical-shard-size": "1",
+ "-frontend.max-cache-freshness": "1m",
+ // enable experimental promQL funcs
+ "-querier.enable-promql-experimental-functions": "true",
+ // enable distributed execution (logical plan execution)
+ "-querier.distributed-exec-enabled": "true",
+ },
+ )
+
+ minio := e2edb.NewMinio(9000, flags["-blocks-storage.s3.bucket-name"])
+ require.NoError(t, s.StartAndWaitReady(minio))
+
+ // start services
+ queryScheduler := e2ecortex.NewQueryScheduler("query-scheduler", flags, "")
+ require.NoError(t, s.StartAndWaitReady(queryScheduler))
+ flags["-frontend.scheduler-address"] = queryScheduler.NetworkGRPCEndpoint()
+ flags["-querier.scheduler-address"] = queryScheduler.NetworkGRPCEndpoint()
+
+ queryFrontend := e2ecortex.NewQueryFrontend("query-frontend", flags, "")
+ require.NoError(t, s.Start(queryFrontend))
+
+ ingester := e2ecortex.NewIngester("ingester", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
+ distributor := e2ecortex.NewDistributor("distributor", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
+ querier1 := e2ecortex.NewQuerier("querier-1", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
+ querier2 := e2ecortex.NewQuerier("querier-2", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
+
+ require.NoError(t, s.StartAndWaitReady(querier1, querier2, ingester, distributor))
+ require.NoError(t, s.WaitReady(queryFrontend))
+
+ // wait until distributor and queriers have updated the ring.
+ require.NoError(t, distributor.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
+ require.NoError(t, querier1.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
+ require.NoError(t, querier2.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
+
+ // push some series to Cortex.
+ distClient, err := e2ecortex.NewClient(distributor.HTTPEndpoint(), "", "", "", userID)
+ require.NoError(t, err)
+
+ series1Timestamp := time.Now()
+ series2Timestamp := series1Timestamp.Add(time.Minute * 1)
+ series1, expectedVector1 := generateSeries("series_1", series1Timestamp, prompb.Label{Name: "series_1", Value: "series_1"})
+ series2, expectedVector2 := generateSeries("series_2", series2Timestamp, prompb.Label{Name: "series_2", Value: "series_2"})
+
+ res, err := distClient.Push(series1)
+ require.NoError(t, err)
+ require.Equal(t, 200, res.StatusCode)
+
+ res, err = distClient.Push(series2)
+ require.NoError(t, err)
+ require.Equal(t, 200, res.StatusCode)
+
+ for _, q := range []*e2ecortex.CortexService{querier1, querier2} {
+ c, err := e2ecortex.NewClient("", q.HTTPEndpoint(), "", "", userID)
+ require.NoError(t, err)
+
+ _, err = c.Query("series_1", series1Timestamp)
+ require.NoError(t, err)
+ }
+
+ require.NoError(t, queryScheduler.WaitSumMetrics(e2e.Equals(2), "cortex_query_scheduler_connected_querier_clients"))
+
+ // main tests
+ // - make sure queries are still executable with distributed execution enabled
+ var body []byte
+ res, body, err = distClient.QueryRaw(`sum({job="test"})`, series1Timestamp, map[string]string{})
+ require.NoError(t, err)
+ require.Equal(t, 200, res.StatusCode)
+ require.Equal(t, expectedVector1, string(body))
+
+ res, body, err = distClient.QueryRaw(`sum({job="test"})`, series2Timestamp, map[string]string{})
+ require.NoError(t, err)
+ require.Equal(t, 200, res.StatusCode)
+ require.Equal(t, expectedVector2, string(body))
+}
diff --git a/integration/query_fuzz_test.go b/integration/query_fuzz_test.go
index d4c501737e3..cc8d272fd2f 100644
--- a/integration/query_fuzz_test.go
+++ b/integration/query_fuzz_test.go
@@ -799,7 +799,7 @@ func TestVerticalShardingFuzz(t *testing.T) {
}
ps := promqlsmith.New(rnd, lbls, opts...)
- runQueryFuzzTestCases(t, ps, c1, c2, now, start, end, scrapeInterval, 1000, false)
+ runQueryFuzzTestCases(t, ps, c1, c2, end, start, end, scrapeInterval, 1000, false)
}
func TestProtobufCodecFuzz(t *testing.T) {
@@ -1838,7 +1838,7 @@ func runQueryFuzzTestCases(t *testing.T, ps *promqlsmith.PromQLSmith, c1, c2 *e2
failures++
}
} else if !cmp.Equal(tc.res1, tc.res2, comparer) {
- t.Logf("case %d results mismatch.\n%s: %s\nres1: %s\nres2: %s\n", i, qt, tc.query, tc.res1.String(), tc.res2.String())
+ t.Logf("case %d results mismatch.\n%s: %s\nres1 len: %d data: %s\nres2 len: %d data: %s\n", i, qt, tc.query, resultLength(tc.res1), tc.res1.String(), resultLength(tc.res2), tc.res2.String())
failures++
}
}
@@ -1872,3 +1872,17 @@ func isValidQuery(generatedQuery parser.Expr, skipStdAggregations bool) bool {
}
return isValid
}
+
+func resultLength(x model.Value) int {
+ vx, xvec := x.(model.Vector)
+ if xvec {
+ return vx.Len()
+ }
+
+ mx, xMatrix := x.(model.Matrix)
+ if xMatrix {
+ return mx.Len()
+ }
+ // Other type, return 0
+ return 0
+}
diff --git a/integration/query_response_compression_test.go b/integration/query_response_compression_test.go
index 58f54635190..8de7dafd1d6 100644
--- a/integration/query_response_compression_test.go
+++ b/integration/query_response_compression_test.go
@@ -6,10 +6,15 @@ package integration
import (
"compress/gzip"
"fmt"
+ "strings"
+
"net/http"
+ "net/url"
+
"testing"
"time"
+ "github.com/prometheus/prometheus/prompb"
"github.com/stretchr/testify/require"
"github.com/cortexproject/cortex/integration/e2e"
@@ -17,7 +22,7 @@ import (
"github.com/cortexproject/cortex/integration/e2ecortex"
)
-func TestQueryResponseCompression(t *testing.T) {
+func TestQuerierResponseCompression(t *testing.T) {
s, err := e2e.NewScenario(networkName)
require.NoError(t, err)
defer s.Close()
@@ -43,10 +48,16 @@ func TestQueryResponseCompression(t *testing.T) {
c, err := e2ecortex.NewClient(distributor.HTTPEndpoint(), "", "", "", "user-1")
require.NoError(t, err)
- series, _ := generateSeries("series_1", now)
- res, err := c.Push(series)
- require.NoError(t, err)
- require.Equal(t, 200, res.StatusCode)
+ for i := 0; i < 200; i++ {
+ series, _ := generateSeries(
+ fmt.Sprintf("series_%d", i),
+ now,
+ prompb.Label{Name: fmt.Sprintf("label_%d", i), Value: strings.Repeat("val_", 10)},
+ )
+ res, err := c.Push(series)
+ require.NoError(t, err)
+ require.Equal(t, 200, res.StatusCode)
+ }
querier := e2ecortex.NewQuerier("querier", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
require.NoError(t, s.StartAndWaitReady(querier))
@@ -54,7 +65,110 @@ func TestQueryResponseCompression(t *testing.T) {
// Wait until the querier has updated the ring.
require.NoError(t, querier.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
- endpoint := fmt.Sprintf("http://%s/api/prom/api/v1/query?query=series_1", querier.HTTPEndpoint())
+ query := `{__name__=~"series_.*"}`
+ u := &url.URL{
+ Scheme: "http",
+ Path: fmt.Sprintf("%s/api/prom/api/v1/query", querier.HTTPEndpoint()),
+ }
+ q := u.Query()
+ q.Set("query", query)
+ q.Set("time", e2ecortex.FormatTime(now))
+ u.RawQuery = q.Encode()
+ endpoint := u.String()
+
+ t.Run("Compressed", func(t *testing.T) {
+ req, err := http.NewRequest("GET", endpoint, nil)
+ require.NoError(t, err)
+ req.Header.Set("X-Scope-OrgID", "user-1")
+ req.Header.Set("Accept-Encoding", "gzip")
+
+ resp, err := http.DefaultClient.Do(req)
+ require.NoError(t, err)
+
+ defer resp.Body.Close()
+
+ require.Equal(t, http.StatusOK, resp.StatusCode)
+ require.Equal(t, "gzip", resp.Header.Get("Content-Encoding"))
+
+ gzipReader, err := gzip.NewReader(resp.Body)
+ require.NoError(t, err)
+ defer gzipReader.Close()
+ })
+
+ t.Run("Uncompressed", func(t *testing.T) {
+ req, err := http.NewRequest("GET", endpoint, nil)
+ require.NoError(t, err)
+ req.Header.Set("X-Scope-OrgID", "user-1")
+
+ resp, err := http.DefaultClient.Do(req)
+ require.NoError(t, err)
+ defer resp.Body.Close()
+
+ require.Equal(t, http.StatusOK, resp.StatusCode)
+ require.Empty(t, resp.Header.Get("Content-Encoding"))
+ })
+}
+
+func TestQueryFrontendResponseCompression(t *testing.T) {
+ s, err := e2e.NewScenario(networkName)
+ require.NoError(t, err)
+ defer s.Close()
+
+ // Start dependencies.
+ consul := e2edb.NewConsul()
+ minio := e2edb.NewMinio(9000, bucketName)
+ require.NoError(t, s.StartAndWaitReady(consul, minio))
+
+ flags := mergeFlags(BlocksStorageFlags(), map[string]string{
+ "-api.response-compression-enabled": "true",
+ })
+
+ // Start the query-frontend.
+ queryFrontend := e2ecortex.NewQueryFrontend("query-frontend", flags, "")
+ require.NoError(t, s.Start(queryFrontend))
+
+ distributor := e2ecortex.NewDistributor("distributor", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
+ ingester := e2ecortex.NewIngester("ingester", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
+ require.NoError(t, s.StartAndWaitReady(distributor, ingester))
+
+ // Wait until both the distributor updated the ring.
+ require.NoError(t, distributor.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
+
+ querier := e2ecortex.NewQuerier("querierWithFrontend", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), mergeFlags(flags, map[string]string{
+ "-querier.frontend-address": queryFrontend.NetworkGRPCEndpoint(),
+ }), "")
+
+ require.NoError(t, s.StartAndWaitReady(querier))
+ require.NoError(t, s.WaitReady(queryFrontend))
+
+ now := time.Now()
+
+ c, err := e2ecortex.NewClient(distributor.HTTPEndpoint(), queryFrontend.HTTPEndpoint(), "", "", "user-1")
+ require.NoError(t, err)
+
+ for i := 0; i < 200; i++ {
+ series, _ := generateSeries(
+ fmt.Sprintf("series_%d", i),
+ now,
+ prompb.Label{Name: fmt.Sprintf("label_%d", i), Value: strings.Repeat("val_", 10)},
+ )
+ res, err := c.Push(series)
+ require.NoError(t, err)
+ require.Equal(t, 200, res.StatusCode)
+ }
+
+ require.NoError(t, querier.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
+
+ query := `{__name__=~"series_.*"}`
+ u := &url.URL{
+ Scheme: "http",
+ Path: fmt.Sprintf("%s/api/prom/api/v1/query", queryFrontend.HTTPEndpoint()),
+ }
+ q := u.Query()
+ q.Set("query", query)
+ q.Set("time", e2ecortex.FormatTime(now))
+ u.RawQuery = q.Encode()
+ endpoint := u.String()
t.Run("Compressed", func(t *testing.T) {
req, err := http.NewRequest("GET", endpoint, nil)
diff --git a/pkg/api/handlers.go b/pkg/api/handlers.go
index 9bcc6a6906e..9c65ec07683 100644
--- a/pkg/api/handlers.go
+++ b/pkg/api/handlers.go
@@ -19,13 +19,13 @@ import (
"github.com/prometheus/common/route"
"github.com/prometheus/common/version"
"github.com/prometheus/prometheus/config"
- "github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/storage"
v1 "github.com/prometheus/prometheus/web/api/v1"
"github.com/weaveworks/common/instrument"
"github.com/weaveworks/common/middleware"
"github.com/cortexproject/cortex/pkg/api/queryapi"
+ "github.com/cortexproject/cortex/pkg/engine"
"github.com/cortexproject/cortex/pkg/querier"
"github.com/cortexproject/cortex/pkg/querier/codec"
"github.com/cortexproject/cortex/pkg/querier/stats"
@@ -163,7 +163,7 @@ func NewQuerierHandler(
cfg Config,
queryable storage.SampleAndChunkQueryable,
exemplarQueryable storage.ExemplarQueryable,
- engine promql.QueryEngine,
+ engine engine.QueryEngine,
metadataQuerier querier.MetadataQuerier,
reg prometheus.Registerer,
logger log.Logger,
diff --git a/pkg/api/queryapi/query_api.go b/pkg/api/queryapi/query_api.go
index e3793ef5bee..d3f578d5410 100644
--- a/pkg/api/queryapi/query_api.go
+++ b/pkg/api/queryapi/query_api.go
@@ -15,6 +15,7 @@ import (
"github.com/prometheus/prometheus/util/annotations"
"github.com/prometheus/prometheus/util/httputil"
v1 "github.com/prometheus/prometheus/web/api/v1"
+ "github.com/thanos-io/promql-engine/logicalplan"
"github.com/weaveworks/common/httpgrpc"
"github.com/cortexproject/cortex/pkg/engine"
@@ -25,7 +26,7 @@ import (
type QueryAPI struct {
queryable storage.SampleAndChunkQueryable
- queryEngine promql.QueryEngine
+ queryEngine engine.QueryEngine
now func() time.Time
statsRenderer v1.StatsRenderer
logger log.Logger
@@ -34,7 +35,7 @@ type QueryAPI struct {
}
func NewQueryAPI(
- qe promql.QueryEngine,
+ qe engine.QueryEngine,
q storage.SampleAndChunkQueryable,
statsRenderer v1.StatsRenderer,
logger log.Logger,
@@ -100,10 +101,29 @@ func (q *QueryAPI) RangeQueryHandler(r *http.Request) (result apiFuncResult) {
ctx = engine.AddEngineTypeToContext(ctx, r)
ctx = querier.AddBlockStoreTypeToContext(ctx, r.Header.Get(querier.BlockStoreTypeHeader))
- qry, err := q.queryEngine.NewRangeQuery(ctx, q.queryable, opts, r.FormValue("query"), convertMsToTime(start), convertMsToTime(end), convertMsToDuration(step))
- if err != nil {
- return invalidParamError(httpgrpc.Errorf(http.StatusBadRequest, "%s", err.Error()), "query")
+
+ var qry promql.Query
+ startTime := convertMsToTime(start)
+ endTime := convertMsToTime(end)
+ stepDuration := convertMsToDuration(step)
+
+ byteLP := []byte(r.PostFormValue("plan"))
+ if len(byteLP) != 0 {
+ logicalPlan, err := logicalplan.Unmarshal(byteLP)
+ if err != nil {
+ return apiFuncResult{nil, &apiError{errorBadData, fmt.Errorf("invalid logical plan: %v", err)}, nil, nil}
+ }
+ qry, err = q.queryEngine.MakeRangeQueryFromPlan(ctx, q.queryable, opts, logicalPlan, startTime, endTime, stepDuration, r.FormValue("query"))
+ if err != nil {
+ return apiFuncResult{nil, &apiError{errorBadData, fmt.Errorf("failed to create range query from logical plan: %v", err)}, nil, nil}
+ }
+ } else { // if there is logical plan field is empty, fall back
+ qry, err = q.queryEngine.NewRangeQuery(ctx, q.queryable, opts, r.FormValue("query"), startTime, endTime, stepDuration)
+ if err != nil {
+ return invalidParamError(httpgrpc.Errorf(http.StatusBadRequest, "%s", err.Error()), "query")
+ }
}
+
// From now on, we must only return with a finalizer in the result (to
// be called by the caller) or call qry.Close ourselves (which is
// required in the case of a panic).
@@ -156,9 +176,25 @@ func (q *QueryAPI) InstantQueryHandler(r *http.Request) (result apiFuncResult) {
ctx = engine.AddEngineTypeToContext(ctx, r)
ctx = querier.AddBlockStoreTypeToContext(ctx, r.Header.Get(querier.BlockStoreTypeHeader))
- qry, err := q.queryEngine.NewInstantQuery(ctx, q.queryable, opts, r.FormValue("query"), convertMsToTime(ts))
- if err != nil {
- return invalidParamError(httpgrpc.Errorf(http.StatusBadRequest, "%s", err.Error()), "query")
+
+ var qry promql.Query
+ tsTime := convertMsToTime(ts)
+
+ byteLP := []byte(r.PostFormValue("plan"))
+ if len(byteLP) != 0 {
+ logicalPlan, err := logicalplan.Unmarshal(byteLP)
+ if err != nil {
+ return apiFuncResult{nil, &apiError{errorBadData, fmt.Errorf("invalid logical plan: %v", err)}, nil, nil}
+ }
+ qry, err = q.queryEngine.MakeInstantQueryFromPlan(ctx, q.queryable, opts, logicalPlan, tsTime, r.FormValue("query"))
+ if err != nil {
+ return apiFuncResult{nil, &apiError{errorBadData, fmt.Errorf("failed to create range query from logical plan: %v", err)}, nil, nil}
+ }
+ } else { // if there is logical plan field is empty, fall back
+ qry, err = q.queryEngine.NewInstantQuery(ctx, q.queryable, opts, r.FormValue("query"), tsTime)
+ if err != nil {
+ return invalidParamError(httpgrpc.Errorf(http.StatusBadRequest, "%s", err.Error()), "query")
+ }
}
// From now on, we must only return with a finalizer in the result (to
diff --git a/pkg/api/queryapi/query_api_test.go b/pkg/api/queryapi/query_api_test.go
index 028184a12b8..0532bb6e227 100644
--- a/pkg/api/queryapi/query_api_test.go
+++ b/pkg/api/queryapi/query_api_test.go
@@ -7,21 +7,28 @@ import (
"io"
"net/http"
"net/http/httptest"
+ "net/url"
+ "strings"
"testing"
"time"
"github.com/go-kit/log"
"github.com/gorilla/mux"
"github.com/grafana/regexp"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql"
+ "github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/util/annotations"
v1 "github.com/prometheus/prometheus/web/api/v1"
"github.com/stretchr/testify/require"
+ "github.com/thanos-io/promql-engine/logicalplan"
+ "github.com/thanos-io/promql-engine/query"
"github.com/weaveworks/common/user"
+ engine2 "github.com/cortexproject/cortex/pkg/engine"
"github.com/cortexproject/cortex/pkg/querier"
"github.com/cortexproject/cortex/pkg/querier/series"
"github.com/cortexproject/cortex/pkg/querier/stats"
@@ -64,10 +71,14 @@ func (mockQuerier) Close() error {
}
func Test_CustomAPI(t *testing.T) {
- engine := promql.NewEngine(promql.EngineOpts{
- MaxSamples: 100,
- Timeout: time.Second * 2,
- })
+ engine := engine2.New(
+ promql.EngineOpts{
+ MaxSamples: 100,
+ Timeout: time.Second * 2,
+ },
+ engine2.ThanosEngineConfig{Enabled: false},
+ prometheus.NewRegistry())
+
mockQueryable := &mockSampleAndChunkQueryable{
queryableFn: func(_, _ int64) (storage.Querier, error) {
return mockQuerier{
@@ -175,10 +186,10 @@ func Test_CustomAPI(t *testing.T) {
c := NewQueryAPI(engine, mockQueryable, querier.StatsRenderer, log.NewNopLogger(), []v1.Codec{v1.JSONCodec{}}, regexp.MustCompile(".*"))
router := mux.NewRouter()
- router.Path("/api/v1/query").Methods("GET").Handler(c.Wrap(c.InstantQueryHandler))
- router.Path("/api/v1/query_range").Methods("GET").Handler(c.Wrap(c.RangeQueryHandler))
+ router.Path("/api/v1/query").Methods("POST").Handler(c.Wrap(c.InstantQueryHandler))
+ router.Path("/api/v1/query_range").Methods("POST").Handler(c.Wrap(c.RangeQueryHandler))
- req := httptest.NewRequest(http.MethodGet, test.path, nil)
+ req := httptest.NewRequest(http.MethodPost, test.path, nil)
ctx := context.Background()
_, ctx = stats.ContextWithEmptyStats(ctx)
req = req.WithContext(user.InjectOrgID(ctx, "user1"))
@@ -209,10 +220,14 @@ func (m *mockCodec) Encode(_ *v1.Response) ([]byte, error) {
}
func Test_InvalidCodec(t *testing.T) {
- engine := promql.NewEngine(promql.EngineOpts{
- MaxSamples: 100,
- Timeout: time.Second * 2,
- })
+ engine := engine2.New(
+ promql.EngineOpts{
+ MaxSamples: 100,
+ Timeout: time.Second * 2,
+ },
+ engine2.ThanosEngineConfig{Enabled: false},
+ prometheus.NewRegistry())
+
mockQueryable := &mockSampleAndChunkQueryable{
queryableFn: func(_, _ int64) (storage.Querier, error) {
return mockQuerier{
@@ -231,9 +246,9 @@ func Test_InvalidCodec(t *testing.T) {
queryAPI := NewQueryAPI(engine, mockQueryable, querier.StatsRenderer, log.NewNopLogger(), []v1.Codec{&mockCodec{}}, regexp.MustCompile(".*"))
router := mux.NewRouter()
- router.Path("/api/v1/query").Methods("GET").Handler(queryAPI.Wrap(queryAPI.InstantQueryHandler))
+ router.Path("/api/v1/query").Methods("POST").Handler(queryAPI.Wrap(queryAPI.InstantQueryHandler))
- req := httptest.NewRequest(http.MethodGet, "/api/v1/query?query=test", nil)
+ req := httptest.NewRequest(http.MethodPost, "/api/v1/query?query=test", nil)
ctx := context.Background()
_, ctx = stats.ContextWithEmptyStats(ctx)
req = req.WithContext(user.InjectOrgID(ctx, "user1"))
@@ -244,10 +259,14 @@ func Test_InvalidCodec(t *testing.T) {
}
func Test_CustomAPI_StatsRenderer(t *testing.T) {
- engine := promql.NewEngine(promql.EngineOpts{
- MaxSamples: 100,
- Timeout: time.Second * 2,
- })
+ engine := engine2.New(
+ promql.EngineOpts{
+ MaxSamples: 100,
+ Timeout: time.Second * 2,
+ },
+ engine2.ThanosEngineConfig{Enabled: false},
+ prometheus.NewRegistry())
+
mockQueryable := &mockSampleAndChunkQueryable{
queryableFn: func(_, _ int64) (storage.Querier, error) {
return mockQuerier{
@@ -269,9 +288,9 @@ func Test_CustomAPI_StatsRenderer(t *testing.T) {
queryAPI := NewQueryAPI(engine, mockQueryable, querier.StatsRenderer, log.NewNopLogger(), []v1.Codec{v1.JSONCodec{}}, regexp.MustCompile(".*"))
router := mux.NewRouter()
- router.Path("/api/v1/query_range").Methods("GET").Handler(queryAPI.Wrap(queryAPI.RangeQueryHandler))
+ router.Path("/api/v1/query_range").Methods("POST").Handler(queryAPI.Wrap(queryAPI.RangeQueryHandler))
- req := httptest.NewRequest(http.MethodGet, "/api/v1/query_range?end=1536673680&query=test&start=1536673665&step=5", nil)
+ req := httptest.NewRequest(http.MethodPost, "/api/v1/query_range?end=1536673680&query=test&start=1536673665&step=5", nil)
ctx := context.Background()
_, ctx = stats.ContextWithEmptyStats(ctx)
req = req.WithContext(user.InjectOrgID(ctx, "user1"))
@@ -285,3 +304,201 @@ func Test_CustomAPI_StatsRenderer(t *testing.T) {
require.Equal(t, uint64(4), queryStats.LoadPeakSamples())
require.Equal(t, uint64(4), queryStats.LoadScannedSamples())
}
+
+func Test_Logicalplan_Requests(t *testing.T) {
+ engine := engine2.New(
+ promql.EngineOpts{
+ MaxSamples: 100,
+ Timeout: time.Second * 2,
+ },
+ engine2.ThanosEngineConfig{Enabled: true},
+ prometheus.NewRegistry(),
+ )
+
+ mockMatrix := model.Matrix{
+ {
+ Metric: model.Metric{"__name__": "test", "foo": "bar"},
+ Values: []model.SamplePair{
+ {Timestamp: 1536673665000, Value: 0},
+ {Timestamp: 1536673670000, Value: 1},
+ },
+ },
+ }
+
+ mockQueryable := &mockSampleAndChunkQueryable{
+ queryableFn: func(_, _ int64) (storage.Querier, error) {
+ return mockQuerier{matrix: mockMatrix}, nil
+ },
+ }
+
+ tests := []struct {
+ name string
+ path string
+ start int64
+ end int64
+ stepDuration int64
+ requestBody func(t *testing.T) []byte
+ expectedCode int
+ expectedBody string
+ }{
+ {
+ name: "[Range Query] with valid logical plan and empty query string",
+ path: "/api/v1/query_range?end=1536673680&query=&start=1536673665&step=5",
+ start: 1536673665,
+ end: 1536673680,
+ stepDuration: 5,
+ requestBody: func(t *testing.T) []byte {
+ return createTestLogicalPlan(t, 1536673665, 1536673680, 5)
+ },
+ expectedCode: http.StatusOK,
+ expectedBody: `{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"test","foo":"bar"},"values":[[1536673665,"0"],[1536673670,"1"],[1536673675,"1"],[1536673680,"1"]]}]}}`,
+ },
+ {
+ name: "[Range Query] with corrupted logical plan", // will throw an error from unmarhsal step
+ path: "/api/v1/query_range?end=1536673680&query=test&start=1536673665&step=5",
+ start: 1536673665,
+ end: 1536673680,
+ stepDuration: 5,
+ requestBody: func(t *testing.T) []byte {
+ return append(createTestLogicalPlan(t, 1536673665, 1536673680, 5), []byte("random data")...)
+ },
+ expectedCode: http.StatusInternalServerError,
+ expectedBody: `{"status":"error","errorType":"server_error","error":"invalid logical plan: invalid character 'r' after top-level value"}`,
+ },
+ {
+ name: "[Range Query] with empty body and non-empty query string", // fall back to promql query execution
+ path: "/api/v1/query_range?end=1536673680&query=test&start=1536673665&step=5",
+ start: 1536673665,
+ end: 1536673680,
+ stepDuration: 5,
+ requestBody: func(t *testing.T) []byte {
+ return []byte{}
+ },
+ expectedCode: http.StatusOK,
+ expectedBody: `{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"test","foo":"bar"},"values":[[1536673665,"0"],[1536673670,"1"],[1536673675,"1"],[1536673680,"1"]]}]}}`,
+ },
+ {
+ name: "[Range Query] with empty body and empty query string", // fall back to promql query execution, but will have error because of empty query string
+ path: "/api/v1/query_range?end=1536673680&query=&start=1536673665&step=5",
+ start: 1536673665,
+ end: 1536673680,
+ stepDuration: 5,
+ requestBody: func(t *testing.T) []byte {
+ return []byte{}
+ },
+ expectedCode: http.StatusBadRequest,
+ expectedBody: "{\"status\":\"error\",\"errorType\":\"bad_data\",\"error\":\"invalid parameter \\\"query\\\"; unknown position: parse error: no expression found in input\"}",
+ },
+ {
+ name: "[Instant Query] with valid logical plan and empty query string",
+ path: "/api/v1/query?query=test&time=1536673670",
+ start: 1536673670,
+ end: 1536673670,
+ stepDuration: 0,
+ requestBody: func(t *testing.T) []byte {
+ return createTestLogicalPlan(t, 1536673670, 1536673670, 0)
+ },
+ expectedCode: http.StatusOK,
+ expectedBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"test","foo":"bar"},"value":[1536673670,"1"]}]}}`,
+ },
+ {
+ name: "[Instant Query] with corrupted logical plan",
+ path: "/api/v1/query?query=test&time=1536673670",
+ start: 1536673670,
+ end: 1536673670,
+ stepDuration: 0,
+ requestBody: func(t *testing.T) []byte {
+ return append(createTestLogicalPlan(t, 1536673670, 1536673670, 0), []byte("random data")...)
+ },
+ expectedCode: http.StatusInternalServerError,
+ expectedBody: `{"status":"error","errorType":"server_error","error":"invalid logical plan: invalid character 'r' after top-level value"}`,
+ },
+ {
+ name: "[Instant Query] with empty body and non-empty query string",
+ path: "/api/v1/query?query=test&time=1536673670",
+ start: 1536673670,
+ end: 1536673670,
+ stepDuration: 0,
+ requestBody: func(t *testing.T) []byte {
+ return []byte{}
+ },
+ expectedCode: http.StatusOK,
+ expectedBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"test","foo":"bar"},"value":[1536673670,"1"]}]}}`,
+ },
+ {
+ name: "[Instant Query] with empty body and empty query string",
+ path: "/api/v1/query?query=&time=1536673670",
+ start: 1536673670,
+ end: 1536673670,
+ stepDuration: 0,
+ requestBody: func(t *testing.T) []byte {
+ return []byte{}
+ },
+ expectedCode: http.StatusBadRequest,
+ expectedBody: "{\"status\":\"error\",\"errorType\":\"bad_data\",\"error\":\"invalid parameter \\\"query\\\"; unknown position: parse error: no expression found in input\"}",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ c := NewQueryAPI(engine, mockQueryable, querier.StatsRenderer, log.NewNopLogger(), []v1.Codec{v1.JSONCodec{}}, regexp.MustCompile(".*"))
+ router := mux.NewRouter()
+ router.Path("/api/v1/query").Methods("POST").Handler(c.Wrap(c.InstantQueryHandler))
+ router.Path("/api/v1/query_range").Methods("POST").Handler(c.Wrap(c.RangeQueryHandler))
+
+ req := createTestRequest(tt.path, tt.requestBody(t))
+ rec := httptest.NewRecorder()
+ router.ServeHTTP(rec, req)
+
+ require.Equal(t, tt.expectedCode, rec.Code)
+ body, err := io.ReadAll(rec.Body)
+ require.NoError(t, err)
+ require.Equal(t, tt.expectedBody, string(body))
+ })
+ }
+}
+
+func createTestRequest(path string, planBytes []byte) *http.Request {
+ form := url.Values{}
+ form.Set("plan", string(planBytes))
+ req := httptest.NewRequest(http.MethodPost, path, io.NopCloser(strings.NewReader(form.Encode())))
+
+ req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+ ctx := context.Background()
+ _, ctx = stats.ContextWithEmptyStats(ctx)
+ return req.WithContext(user.InjectOrgID(ctx, "user1"))
+}
+
+func createTestLogicalPlan(t *testing.T, start, end int64, stepDuration int64) []byte {
+ startTime, endTime := convertMsToTime(start), convertMsToTime(end)
+ step := convertMsToDuration(stepDuration)
+
+ qOpts := query.Options{
+ Start: startTime,
+ End: startTime,
+ Step: 0,
+ StepsBatch: 10,
+ LookbackDelta: 0,
+ EnablePerStepStats: false,
+ }
+
+ if step != 0 {
+ qOpts.End = endTime
+ qOpts.Step = step
+ }
+
+ // using a different metric name here so that we can check with debugger which query (from query string vs http request body)
+ // is being executed by the queriers
+ expr, err := parser.NewParser("up", parser.WithFunctions(parser.Functions)).ParseExpr()
+ require.NoError(t, err)
+
+ planOpts := logicalplan.PlanOptions{
+ DisableDuplicateLabelCheck: false,
+ }
+
+ logicalPlan := logicalplan.NewFromAST(expr, &qOpts, planOpts)
+ byteval, err := logicalplan.Marshal(logicalPlan.Root())
+ require.NoError(t, err)
+
+ return byteval
+}
diff --git a/pkg/configs/userconfig/config.go b/pkg/configs/userconfig/config.go
index 3900ff9f41b..25e7d39b38b 100644
--- a/pkg/configs/userconfig/config.go
+++ b/pkg/configs/userconfig/config.go
@@ -9,10 +9,10 @@ import (
"github.com/pkg/errors"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt"
- "github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/rules"
"gopkg.in/yaml.v3"
+ "github.com/cortexproject/cortex/pkg/parser"
util_log "github.com/cortexproject/cortex/pkg/util/log"
)
diff --git a/pkg/configs/userconfig/config_test.go b/pkg/configs/userconfig/config_test.go
index 06893bd605a..392ca911ca9 100644
--- a/pkg/configs/userconfig/config_test.go
+++ b/pkg/configs/userconfig/config_test.go
@@ -12,12 +12,12 @@ import (
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt"
- "github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/rules"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"
+ "github.com/cortexproject/cortex/pkg/parser"
util_log "github.com/cortexproject/cortex/pkg/util/log"
)
diff --git a/pkg/cortex/cortex.go b/pkg/cortex/cortex.go
index b141adc8127..2a09f2154b0 100644
--- a/pkg/cortex/cortex.go
+++ b/pkg/cortex/cortex.go
@@ -14,7 +14,6 @@ import (
"github.com/go-kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
- "github.com/prometheus/prometheus/promql"
prom_storage "github.com/prometheus/prometheus/storage"
"github.com/weaveworks/common/server"
"github.com/weaveworks/common/signals"
@@ -35,6 +34,7 @@ import (
"github.com/cortexproject/cortex/pkg/cortex/storage"
"github.com/cortexproject/cortex/pkg/cortexpb"
"github.com/cortexproject/cortex/pkg/distributor"
+ "github.com/cortexproject/cortex/pkg/engine"
"github.com/cortexproject/cortex/pkg/flusher"
"github.com/cortexproject/cortex/pkg/frontend"
frontendv1 "github.com/cortexproject/cortex/pkg/frontend/v1"
@@ -322,7 +322,7 @@ type Cortex struct {
QuerierQueryable prom_storage.SampleAndChunkQueryable
ExemplarQueryable prom_storage.ExemplarQueryable
MetadataQuerier querier.MetadataQuerier
- QuerierEngine promql.QueryEngine
+ QuerierEngine engine.QueryEngine
QueryFrontendTripperware tripperware.Tripperware
ResourceMonitor *resource.Monitor
@@ -379,6 +379,7 @@ func New(cfg Config) (*Cortex, error) {
return nil, err
}
+ cortex.setupPromQLFunctions()
return cortex, nil
}
@@ -537,3 +538,9 @@ func (t *Cortex) readyHandler(sm *services.Manager) http.HandlerFunc {
util.WriteTextResponse(w, "ready")
}
}
+
+func (t *Cortex) setupPromQLFunctions() {
+ // The holt_winters function is renamed to double_exponential_smoothing and has been experimental since Prometheus v3. (https://github.com/prometheus/prometheus/pull/14930)
+ // The cortex supports holt_winters for users using this function.
+ querier.EnableExperimentalPromQLFunctions(t.Cfg.Querier.EnablePromQLExperimentalFunctions, true)
+}
diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go
index a13f35e6a9d..1a147bcdf0a 100644
--- a/pkg/cortex/modules.go
+++ b/pkg/cortex/modules.go
@@ -4,6 +4,7 @@ import (
"context"
"flag"
"fmt"
+
"log/slog"
"net/http"
"runtime"
@@ -18,8 +19,6 @@ import (
"github.com/prometheus/prometheus/rules"
prom_storage "github.com/prometheus/prometheus/storage"
"github.com/thanos-io/objstore"
- "github.com/thanos-io/promql-engine/engine"
- "github.com/thanos-io/promql-engine/logicalplan"
"github.com/thanos-io/thanos/pkg/discovery/dns"
"github.com/thanos-io/thanos/pkg/querysharding"
httpgrpc_server "github.com/weaveworks/common/httpgrpc/server"
@@ -32,6 +31,7 @@ import (
configAPI "github.com/cortexproject/cortex/pkg/configs/api"
"github.com/cortexproject/cortex/pkg/configs/db"
"github.com/cortexproject/cortex/pkg/distributor"
+ "github.com/cortexproject/cortex/pkg/engine"
"github.com/cortexproject/cortex/pkg/flusher"
"github.com/cortexproject/cortex/pkg/frontend"
"github.com/cortexproject/cortex/pkg/frontend/transport"
@@ -44,6 +44,7 @@ import (
"github.com/cortexproject/cortex/pkg/querier/tripperware/instantquery"
"github.com/cortexproject/cortex/pkg/querier/tripperware/queryrange"
querier_worker "github.com/cortexproject/cortex/pkg/querier/worker"
+ cortexquerysharding "github.com/cortexproject/cortex/pkg/querysharding"
"github.com/cortexproject/cortex/pkg/ring"
"github.com/cortexproject/cortex/pkg/ring/kv/codec"
"github.com/cortexproject/cortex/pkg/ring/kv/memberlist"
@@ -511,7 +512,13 @@ func (t *Cortex) initFlusher() (serv services.Service, err error) {
// initQueryFrontendTripperware instantiates the tripperware used by the query frontend
// to optimize Prometheus query requests.
func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err error) {
- queryAnalyzer := querysharding.NewQueryAnalyzer()
+ var queryAnalyzer querysharding.Analyzer
+ queryAnalyzer = querysharding.NewQueryAnalyzer()
+ if t.Cfg.Querier.EnableParquetQueryable {
+ // Disable vertical sharding for binary expression with ignore for parquet queryable.
+ queryAnalyzer = cortexquerysharding.NewDisableBinaryExpressionAnalyzer(queryAnalyzer)
+ }
+
// PrometheusCodec is a codec to encode and decode Prometheus query range requests and responses.
prometheusCodec := queryrange.NewPrometheusCodec(false, t.Cfg.Querier.ResponseCompression, t.Cfg.API.QuerierDefaultCodec)
// ShardedPrometheusCodec is same as PrometheusCodec but to be used on the sharded queries (it sum up the stats)
@@ -534,7 +541,7 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro
shardedPrometheusCodec,
t.Cfg.Querier.LookbackDelta,
t.Cfg.Querier.DefaultEvaluationInterval,
- t.Cfg.Frontend.DistributedExecEnabled,
+ t.Cfg.Querier.DistributedExecEnabled,
)
if err != nil {
return nil, err
@@ -547,7 +554,7 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro
queryAnalyzer,
t.Cfg.Querier.LookbackDelta,
t.Cfg.Querier.DefaultEvaluationInterval,
- t.Cfg.Frontend.DistributedExecEnabled)
+ t.Cfg.Querier.DistributedExecEnabled)
if err != nil {
return nil, err
}
@@ -564,7 +571,6 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro
t.Cfg.Querier.DefaultEvaluationInterval,
t.Cfg.Querier.MaxSubQuerySteps,
t.Cfg.Querier.LookbackDelta,
- t.Cfg.Querier.EnablePromQLExperimentalFunctions,
)
return services.NewIdleService(nil, func(_ error) error {
@@ -643,7 +649,6 @@ func (t *Cortex) initRuler() (serv services.Service, err error) {
if t.Cfg.ExternalPusher != nil && t.Cfg.ExternalQueryable != nil {
rulerRegisterer := prometheus.WrapRegistererWith(prometheus.Labels{"engine": "ruler"}, prometheus.DefaultRegisterer)
- var queryEngine promql.QueryEngine
opts := promql.EngineOpts{
Logger: util_log.SLogger,
Reg: rulerRegisterer,
@@ -658,15 +663,7 @@ func (t *Cortex) initRuler() (serv services.Service, err error) {
return t.Cfg.Querier.DefaultEvaluationInterval.Milliseconds()
},
}
- if t.Cfg.Querier.ThanosEngine {
- queryEngine = engine.New(engine.Opts{
- EngineOpts: opts,
- LogicalOptimizers: logicalplan.AllOptimizers,
- EnableAnalysis: true,
- })
- } else {
- queryEngine = promql.NewEngine(opts)
- }
+ queryEngine := engine.New(opts, t.Cfg.Ruler.ThanosEngine, rulerRegisterer)
managerFactory := ruler.DefaultTenantManagerFactory(t.Cfg.Ruler, t.Cfg.ExternalPusher, t.Cfg.ExternalQueryable, queryEngine, t.Overrides, metrics, prometheus.DefaultRegisterer)
manager, err = ruler.NewDefaultMultiTenantManager(t.Cfg.Ruler, t.Overrides, managerFactory, metrics, prometheus.DefaultRegisterer, util_log.Logger)
diff --git a/pkg/engine/config.go b/pkg/engine/config.go
new file mode 100644
index 00000000000..e964196bb0e
--- /dev/null
+++ b/pkg/engine/config.go
@@ -0,0 +1,63 @@
+package engine
+
+import (
+ "flag"
+ "fmt"
+ "strings"
+
+ "github.com/thanos-io/promql-engine/logicalplan"
+)
+
+var supportedOptimizers = []string{"default", "all", "propagate-matchers", "sort-matchers", "merge-selects", "detect-histogram-stats"}
+
+// ThanosEngineConfig contains the configuration to create engine
+type ThanosEngineConfig struct {
+ Enabled bool `yaml:"enabled"`
+ EnableXFunctions bool `yaml:"enable_x_functions"`
+ Optimizers string `yaml:"optimizers"`
+ LogicalOptimizers []logicalplan.Optimizer `yaml:"-"`
+}
+
+func (cfg *ThanosEngineConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+ f.BoolVar(&cfg.Enabled, prefix+"thanos-engine", false, "Experimental. Use Thanos promql engine https://github.com/thanos-io/promql-engine rather than the Prometheus promql engine.")
+ f.BoolVar(&cfg.EnableXFunctions, prefix+"enable-x-functions", false, "Enable xincrease, xdelta, xrate etc from Thanos engine.")
+ f.StringVar(&cfg.Optimizers, prefix+"optimizers", "default", "Logical plan optimizers. Multiple optimizers can be provided as a comma-separated list. Supported values: "+strings.Join(supportedOptimizers, ", "))
+}
+
+func (cfg *ThanosEngineConfig) Validate() error {
+ splitOptimizers := strings.Split(cfg.Optimizers, ",")
+
+ for _, optimizer := range splitOptimizers {
+ if optimizer == "all" || optimizer == "default" {
+ if len(splitOptimizers) > 1 {
+ return fmt.Errorf("special optimizer %s cannot be combined with other optimizers", optimizer)
+ }
+ }
+ optimizers, err := getOptimizer(optimizer)
+ if err != nil {
+ return err
+ }
+ cfg.LogicalOptimizers = append(cfg.LogicalOptimizers, optimizers...)
+ }
+
+ return nil
+}
+
+func getOptimizer(name string) ([]logicalplan.Optimizer, error) {
+ switch name {
+ case "default":
+ return logicalplan.DefaultOptimizers, nil
+ case "all":
+ return logicalplan.AllOptimizers, nil
+ case "propagate-matchers":
+ return []logicalplan.Optimizer{logicalplan.PropagateMatchersOptimizer{}}, nil
+ case "sort-matchers":
+ return []logicalplan.Optimizer{logicalplan.SortMatchers{}}, nil
+ case "merge-selects":
+ return []logicalplan.Optimizer{logicalplan.MergeSelectsOptimizer{}}, nil
+ case "detect-histogram-stats":
+ return []logicalplan.Optimizer{logicalplan.DetectHistogramStatsOptimizer{}}, nil
+ default:
+ return nil, fmt.Errorf("unknown optimizer %s", name)
+ }
+}
diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go
index 2468bcfccbc..0701fae5891 100644
--- a/pkg/engine/engine.go
+++ b/pkg/engine/engine.go
@@ -44,6 +44,13 @@ func GetEngineType(ctx context.Context) Type {
return None
}
+type QueryEngine interface {
+ NewInstantQuery(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, qs string, ts time.Time) (promql.Query, error)
+ NewRangeQuery(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, qs string, start, end time.Time, interval time.Duration) (promql.Query, error)
+ MakeInstantQueryFromPlan(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, root logicalplan.Node, ts time.Time, qs string) (promql.Query, error)
+ MakeRangeQueryFromPlan(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, root logicalplan.Node, start time.Time, end time.Time, interval time.Duration, qs string) (promql.Query, error)
+}
+
type Engine struct {
prometheusEngine *promql.Engine
thanosEngine *thanosengine.Engine
@@ -52,15 +59,16 @@ type Engine struct {
engineSwitchQueriesTotal *prometheus.CounterVec
}
-func New(opts promql.EngineOpts, enableThanosEngine bool, reg prometheus.Registerer) *Engine {
+func New(opts promql.EngineOpts, thanosEngineCfg ThanosEngineConfig, reg prometheus.Registerer) *Engine {
prometheusEngine := promql.NewEngine(opts)
var thanosEngine *thanosengine.Engine
- if enableThanosEngine {
+ if thanosEngineCfg.Enabled {
thanosEngine = thanosengine.New(thanosengine.Opts{
EngineOpts: opts,
- LogicalOptimizers: logicalplan.DefaultOptimizers,
+ LogicalOptimizers: thanosEngineCfg.LogicalOptimizers,
EnableAnalysis: true,
+ EnableXFunctions: thanosEngineCfg.EnableXFunctions,
})
}
@@ -127,6 +135,55 @@ prom:
return qf.prometheusEngine.NewRangeQuery(ctx, q, opts, qs, start, end, interval)
}
+func (qf *Engine) MakeInstantQueryFromPlan(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, root logicalplan.Node, ts time.Time, qs string) (promql.Query, error) {
+ if engineType := GetEngineType(ctx); engineType == Prometheus {
+ qf.engineSwitchQueriesTotal.WithLabelValues(string(Prometheus)).Inc()
+ goto prom
+ } else if engineType == Thanos {
+ qf.engineSwitchQueriesTotal.WithLabelValues(string(Thanos)).Inc()
+ }
+
+ if qf.thanosEngine != nil {
+ res, err := qf.thanosEngine.MakeInstantQueryFromPlan(ctx, q, fromPromQLOpts(opts), root, ts)
+ if err != nil {
+ if thanosengine.IsUnimplemented(err) {
+ // fallback to use prometheus engine
+ qf.fallbackQueriesTotal.Inc()
+ goto prom
+ }
+ return nil, err
+ }
+ return res, nil
+ }
+
+prom:
+ return qf.prometheusEngine.NewInstantQuery(ctx, q, opts, qs, ts)
+}
+
+func (qf *Engine) MakeRangeQueryFromPlan(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, root logicalplan.Node, start time.Time, end time.Time, interval time.Duration, qs string) (promql.Query, error) {
+ if engineType := GetEngineType(ctx); engineType == Prometheus {
+ qf.engineSwitchQueriesTotal.WithLabelValues(string(Prometheus)).Inc()
+ goto prom
+ } else if engineType == Thanos {
+ qf.engineSwitchQueriesTotal.WithLabelValues(string(Thanos)).Inc()
+ }
+ if qf.thanosEngine != nil {
+ res, err := qf.thanosEngine.MakeRangeQueryFromPlan(ctx, q, fromPromQLOpts(opts), root, start, end, interval)
+ if err != nil {
+ if thanosengine.IsUnimplemented(err) {
+ // fallback to use prometheus engine
+ qf.fallbackQueriesTotal.Inc()
+ goto prom
+ }
+ return nil, err
+ }
+ return res, nil
+ }
+
+prom:
+ return qf.prometheusEngine.NewRangeQuery(ctx, q, opts, qs, start, end, interval)
+}
+
func fromPromQLOpts(opts promql.QueryOpts) *thanosengine.QueryOpts {
if opts == nil {
return &thanosengine.QueryOpts{}
diff --git a/pkg/engine/engine_test.go b/pkg/engine/engine_test.go
index facae793edc..30caa4828d1 100644
--- a/pkg/engine/engine_test.go
+++ b/pkg/engine/engine_test.go
@@ -3,6 +3,7 @@ package engine
import (
"bytes"
"context"
+ "fmt"
"net/http"
"testing"
"time"
@@ -13,6 +14,10 @@ import (
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/promql/promqltest"
+ "github.com/thanos-io/promql-engine/execution/parse"
+ "github.com/thanos-io/promql-engine/logicalplan"
+ "github.com/thanos-io/promql-engine/query"
+
"github.com/stretchr/testify/require"
utillog "github.com/cortexproject/cortex/pkg/util/log"
@@ -37,7 +42,7 @@ func TestEngine_Fallback(t *testing.T) {
Logger: utillog.GoKitLogToSlog(log.NewNopLogger()),
Reg: reg,
}
- queryEngine := New(opts, true, reg)
+ queryEngine := New(opts, ThanosEngineConfig{Enabled: true}, reg)
// instant query, should go to fallback
_, _ = queryEngine.NewInstantQuery(ctx, queryable, nil, "unimplemented(foo)", now)
@@ -68,7 +73,7 @@ func TestEngine_Switch(t *testing.T) {
Logger: utillog.GoKitLogToSlog(log.NewNopLogger()),
Reg: reg,
}
- queryEngine := New(opts, true, reg)
+ queryEngine := New(opts, ThanosEngineConfig{Enabled: true}, reg)
// Query Prometheus engine
r := &http.Request{Header: http.Header{}}
@@ -96,3 +101,122 @@ func TestEngine_Switch(t *testing.T) {
cortex_engine_switch_queries_total{engine_type="thanos"} 2
`), "cortex_engine_switch_queries_total"))
}
+
+func TestEngine_XFunctions(t *testing.T) {
+ ctx := context.Background()
+ reg := prometheus.NewRegistry()
+
+ now := time.Now()
+ start := time.Now().Add(-time.Minute * 5)
+ step := time.Minute
+ queryable := promqltest.LoadedStorage(t, "")
+ opts := promql.EngineOpts{
+ Logger: utillog.GoKitLogToSlog(log.NewNopLogger()),
+ Reg: reg,
+ }
+ queryEngine := New(opts, ThanosEngineConfig{Enabled: true, EnableXFunctions: true}, reg)
+
+ for name := range parse.XFunctions {
+ t.Run(name, func(t *testing.T) {
+ _, err := queryEngine.NewInstantQuery(ctx, queryable, nil, fmt.Sprintf("%s(foo[1m])", name), now)
+ require.NoError(t, err)
+
+ _, err = queryEngine.NewRangeQuery(ctx, queryable, nil, fmt.Sprintf("%s(foo[1m])", name), start, now, step)
+ require.NoError(t, err)
+ })
+ }
+}
+
+func TestEngine_With_Logical_Plan(t *testing.T) {
+ ctx := context.Background()
+ reg := prometheus.NewRegistry()
+
+ now := time.Now()
+ start := time.Now().Add(-time.Minute * 5)
+ step := time.Minute
+ queryable := promqltest.LoadedStorage(t, "")
+ opts := promql.EngineOpts{
+ Logger: utillog.GoKitLogToSlog(log.NewNopLogger()),
+ Reg: reg,
+ }
+ queryEngine := New(opts, ThanosEngineConfig{Enabled: true}, reg)
+
+ range_lp := createTestLogicalPlan(t, start, now, step, "up")
+ instant_lp := createTestLogicalPlan(t, now, now, 0, "up")
+
+ r := &http.Request{Header: http.Header{}}
+ r.Header.Set(TypeHeader, string(Thanos))
+ ctx = AddEngineTypeToContext(ctx, r)
+
+ // Case 1: Executing logical plan with thanos engine
+ _, _ = queryEngine.MakeInstantQueryFromPlan(ctx, queryable, nil, instant_lp.Root(), now, "up")
+ require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
+ # HELP cortex_thanos_engine_fallback_queries_total Total number of fallback queries due to not implementation in thanos engine
+ # TYPE cortex_thanos_engine_fallback_queries_total counter
+ cortex_thanos_engine_fallback_queries_total 0
+ `), "cortex_thanos_engine_fallback_queries_total"))
+
+ _, _ = queryEngine.MakeRangeQueryFromPlan(ctx, queryable, nil, range_lp.Root(), start, now, step, "up")
+ require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
+ # HELP cortex_thanos_engine_fallback_queries_total Total number of fallback queries due to not implementation in thanos engine
+ # TYPE cortex_thanos_engine_fallback_queries_total counter
+ cortex_thanos_engine_fallback_queries_total 0
+ `), "cortex_thanos_engine_fallback_queries_total"))
+
+ // Case 2: Logical plan that thanos engine cannot execute (so it will fall back to prometheus engine)
+ err_range_lp := createTestLogicalPlan(t, start, now, step, "up[10]")
+ _, _ = queryEngine.MakeRangeQueryFromPlan(ctx, queryable, nil, err_range_lp.Root(), start, now, step, "up")
+ require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
+ # HELP cortex_thanos_engine_fallback_queries_total Total number of fallback queries due to not implementation in thanos engine
+ # TYPE cortex_thanos_engine_fallback_queries_total counter
+ cortex_thanos_engine_fallback_queries_total 1
+ `), "cortex_thanos_engine_fallback_queries_total"))
+
+ // Case 3: executing with prometheus engine
+ r.Header.Set(TypeHeader, string(Prometheus))
+ ctx = AddEngineTypeToContext(ctx, r)
+
+ _, _ = queryEngine.MakeInstantQueryFromPlan(ctx, queryable, nil, instant_lp.Root(), now, "up")
+ require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
+ # HELP cortex_engine_switch_queries_total Total number of queries where engine_type is set explicitly
+ # TYPE cortex_engine_switch_queries_total counter
+ cortex_engine_switch_queries_total{engine_type="prometheus"} 1
+ cortex_engine_switch_queries_total{engine_type="thanos"} 3
+ `), "cortex_engine_switch_queries_total"))
+
+ _, _ = queryEngine.MakeRangeQueryFromPlan(ctx, queryable, nil, range_lp.Root(), start, now, step, "up")
+ require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
+ # HELP cortex_engine_switch_queries_total Total number of queries where engine_type is set explicitly
+ # TYPE cortex_engine_switch_queries_total counter
+ cortex_engine_switch_queries_total{engine_type="prometheus"} 2
+ cortex_engine_switch_queries_total{engine_type="thanos"} 3
+ `), "cortex_engine_switch_queries_total"))
+}
+
+func createTestLogicalPlan(t *testing.T, startTime time.Time, endTime time.Time, step time.Duration, q string) logicalplan.Plan {
+
+ qOpts := query.Options{
+ Start: startTime,
+ End: startTime,
+ Step: 0,
+ StepsBatch: 10,
+ LookbackDelta: 0,
+ EnablePerStepStats: false,
+ }
+
+ if step != 0 {
+ qOpts.End = endTime
+ qOpts.Step = step
+ }
+
+ expr, err := parser.NewParser(q, parser.WithFunctions(parser.Functions)).ParseExpr()
+ require.NoError(t, err)
+
+ planOpts := logicalplan.PlanOptions{
+ DisableDuplicateLabelCheck: false,
+ }
+
+ logicalPlan := logicalplan.NewFromAST(expr, &qOpts, planOpts)
+
+ return logicalPlan
+}
diff --git a/pkg/frontend/config.go b/pkg/frontend/config.go
index a1109f213ad..03dff13980e 100644
--- a/pkg/frontend/config.go
+++ b/pkg/frontend/config.go
@@ -20,8 +20,7 @@ type CombinedFrontendConfig struct {
FrontendV1 v1.Config `yaml:",inline"`
FrontendV2 v2.Config `yaml:",inline"`
- DownstreamURL string `yaml:"downstream_url"`
- DistributedExecEnabled bool `yaml:"distributed_exec_enabled" doc:"hidden"`
+ DownstreamURL string `yaml:"downstream_url"`
}
func (cfg *CombinedFrontendConfig) RegisterFlags(f *flag.FlagSet) {
@@ -30,7 +29,6 @@ func (cfg *CombinedFrontendConfig) RegisterFlags(f *flag.FlagSet) {
cfg.FrontendV2.RegisterFlags(f)
f.StringVar(&cfg.DownstreamURL, "frontend.downstream-url", "", "URL of downstream Prometheus.")
- f.BoolVar(&cfg.DistributedExecEnabled, "frontend.distributed-exec-enabled", false, "Experimental: Enables distributed execution of queries by passing logical query plan fragments to downstream components.")
}
// InitFrontend initializes frontend (either V1 -- without scheduler, or V2 -- with scheduler) or no frontend at
diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go
new file mode 100644
index 00000000000..07940c6aaaa
--- /dev/null
+++ b/pkg/parser/parser.go
@@ -0,0 +1,21 @@
+package parser
+
+import (
+ "maps"
+
+ promqlparser "github.com/prometheus/prometheus/promql/parser"
+ "github.com/thanos-io/promql-engine/execution/parse"
+)
+
+var functions = buildFunctions()
+
+func buildFunctions() map[string]*promqlparser.Function {
+ fns := make(map[string]*promqlparser.Function, len(promqlparser.Functions))
+ maps.Copy(fns, promqlparser.Functions)
+ maps.Copy(fns, parse.XFunctions)
+ return fns
+}
+
+func ParseExpr(qs string) (promqlparser.Expr, error) {
+ return promqlparser.NewParser(qs, promqlparser.WithFunctions(functions)).ParseExpr()
+}
diff --git a/pkg/querier/parquet_queryable.go b/pkg/querier/parquet_queryable.go
index 8d7fe7152ed..520438c5414 100644
--- a/pkg/querier/parquet_queryable.go
+++ b/pkg/querier/parquet_queryable.go
@@ -6,13 +6,13 @@ import (
"time"
"github.com/go-kit/log"
- "github.com/go-kit/log/level"
lru "github.com/hashicorp/golang-lru/v2"
"github.com/opentracing/opentracing-go"
"github.com/parquet-go/parquet-go"
"github.com/pkg/errors"
"github.com/prometheus-community/parquet-common/queryable"
"github.com/prometheus-community/parquet-common/schema"
+ "github.com/prometheus-community/parquet-common/search"
parquet_storage "github.com/prometheus-community/parquet-common/storage"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
@@ -20,17 +20,18 @@ import (
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/util/annotations"
+ "github.com/thanos-io/thanos/pkg/store/storepb"
"github.com/thanos-io/thanos/pkg/strutil"
"golang.org/x/sync/errgroup"
"github.com/cortexproject/cortex/pkg/cortexpb"
+ "github.com/cortexproject/cortex/pkg/querysharding"
"github.com/cortexproject/cortex/pkg/storage/bucket"
cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
"github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex"
"github.com/cortexproject/cortex/pkg/tenant"
"github.com/cortexproject/cortex/pkg/util"
"github.com/cortexproject/cortex/pkg/util/limiter"
- util_log "github.com/cortexproject/cortex/pkg/util/log"
"github.com/cortexproject/cortex/pkg/util/multierror"
"github.com/cortexproject/cortex/pkg/util/services"
"github.com/cortexproject/cortex/pkg/util/validation"
@@ -153,6 +154,7 @@ func NewParquetQueryable(
userID, _ := tenant.TenantID(ctx)
return int64(limits.ParquetMaxFetchedDataBytes(userID))
}),
+ queryable.WithMaterializedLabelsFilterCallback(materializedLabelsFilterCallback),
queryable.WithMaterializedSeriesCallback(func(ctx context.Context, cs []storage.ChunkSeries) error {
queryLimiter := limiter.QueryLimiterFromContextWithFallback(ctx)
lbls := make([][]cortexpb.LabelAdapter, 0, len(cs))
@@ -432,17 +434,11 @@ func (q *parquetQuerierWithFallback) Select(ctx context.Context, sortSeries bool
span, ctx := opentracing.StartSpanFromContext(ctx, "parquetQuerierWithFallback.Select")
defer span.Finish()
- userID, err := tenant.TenantID(ctx)
+ newMatchers, shardMatcher, err := querysharding.ExtractShardingMatchers(matchers)
if err != nil {
return storage.ErrSeriesSet(err)
}
-
- if q.limits.QueryVerticalShardSize(userID) > 1 {
- uLogger := util_log.WithUserID(userID, q.logger)
- level.Warn(uLogger).Log("msg", "parquet queryable enabled but vertical sharding > 1. Falling back to the block storage")
-
- return q.blocksStoreQuerier.Select(ctx, sortSeries, h, matchers...)
- }
+ defer shardMatcher.Close()
hints := storage.SelectHints{
Start: q.minT,
@@ -483,7 +479,11 @@ func (q *parquetQuerierWithFallback) Select(ctx context.Context, sortSeries bool
go func() {
span, _ := opentracing.StartSpanFromContext(ctx, "parquetQuerier.Select")
defer span.Finish()
- p <- q.parquetQuerier.Select(InjectBlocksIntoContext(ctx, parquet...), sortSeries, &hints, matchers...)
+ parquetCtx := InjectBlocksIntoContext(ctx, parquet...)
+ if shardMatcher != nil {
+ parquetCtx = injectShardMatcherIntoContext(parquetCtx, shardMatcher)
+ }
+ p <- q.parquetQuerier.Select(parquetCtx, sortSeries, &hints, newMatchers...)
}()
}
@@ -570,6 +570,26 @@ func (q *parquetQuerierWithFallback) incrementOpsMetric(method string, remaining
}
}
+type shardMatcherLabelsFilter struct {
+ shardMatcher *storepb.ShardMatcher
+}
+
+func (f *shardMatcherLabelsFilter) Filter(lbls labels.Labels) bool {
+ return f.shardMatcher.MatchesLabels(lbls)
+}
+
+func (f *shardMatcherLabelsFilter) Close() {
+ f.shardMatcher.Close()
+}
+
+func materializedLabelsFilterCallback(ctx context.Context, _ *storage.SelectHints) (search.MaterializedLabelsFilter, bool) {
+ shardMatcher, exists := extractShardMatcherFromContext(ctx)
+ if !exists || !shardMatcher.IsSharded() {
+ return nil, false
+ }
+ return &shardMatcherLabelsFilter{shardMatcher: shardMatcher}, true
+}
+
type cacheInterface[T any] interface {
Get(path string) T
Set(path string, reader T)
@@ -655,3 +675,19 @@ func (n noopCache[T]) Get(_ string) (r T) {
func (n noopCache[T]) Set(_ string, _ T) {
}
+
+var (
+ shardMatcherCtxKey contextKey = 1
+)
+
+func injectShardMatcherIntoContext(ctx context.Context, sm *storepb.ShardMatcher) context.Context {
+ return context.WithValue(ctx, shardMatcherCtxKey, sm)
+}
+
+func extractShardMatcherFromContext(ctx context.Context) (*storepb.ShardMatcher, bool) {
+ if sm := ctx.Value(shardMatcherCtxKey); sm != nil {
+ return sm.(*storepb.ShardMatcher), true
+ }
+
+ return nil, false
+}
diff --git a/pkg/querier/parquet_queryable_test.go b/pkg/querier/parquet_queryable_test.go
index 13cdde6cd57..73f7c50af21 100644
--- a/pkg/querier/parquet_queryable_test.go
+++ b/pkg/querier/parquet_queryable_test.go
@@ -5,6 +5,7 @@ import (
"fmt"
"math/rand"
"path/filepath"
+ "sync"
"testing"
"time"
@@ -75,49 +76,6 @@ func TestParquetQueryableFallbackLogic(t *testing.T) {
}
ctx := user.InjectOrgID(context.Background(), "user-1")
- t.Run("should fallback when vertical sharding is enabled", func(t *testing.T) {
- finder := &blocksFinderMock{}
- stores := createStore()
-
- q := &blocksStoreQuerier{
- minT: minT,
- maxT: maxT,
- finder: finder,
- stores: stores,
- consistency: NewBlocksConsistencyChecker(0, 0, log.NewNopLogger(), nil),
- logger: log.NewNopLogger(),
- metrics: newBlocksStoreQueryableMetrics(prometheus.NewPedanticRegistry()),
- limits: &blocksStoreLimitsMock{},
-
- storeGatewayConsistencyCheckMaxAttempts: 3,
- }
-
- mParquetQuerier := &mockParquetQuerier{}
- pq := &parquetQuerierWithFallback{
- minT: minT,
- maxT: maxT,
- finder: finder,
- blocksStoreQuerier: q,
- parquetQuerier: mParquetQuerier,
- metrics: newParquetQueryableFallbackMetrics(prometheus.NewRegistry()),
- limits: defaultOverrides(t, 4),
- logger: log.NewNopLogger(),
- defaultBlockStoreType: parquetBlockStore,
- }
-
- finder.On("GetBlocks", mock.Anything, "user-1", minT, maxT).Return(bucketindex.Blocks{
- &bucketindex.Block{ID: block1, Parquet: &parquet.ConverterMarkMeta{Version: 1}},
- &bucketindex.Block{ID: block2, Parquet: &parquet.ConverterMarkMeta{Version: 1}},
- }, map[ulid.ULID]*bucketindex.BlockDeletionMark(nil), nil)
-
- t.Run("select", func(t *testing.T) {
- ss := pq.Select(ctx, true, nil, matchers...)
- require.NoError(t, ss.Err())
- require.Len(t, stores.queriedBlocks, 2)
- require.Len(t, mParquetQuerier.queriedBlocks, 0)
- })
- })
-
t.Run("should fallback all blocks", func(t *testing.T) {
finder := &blocksFinderMock{}
stores := createStore()
@@ -671,3 +629,90 @@ func (m *mockParquetQuerier) Reset() {
func (mockParquetQuerier) Close() error {
return nil
}
+
+func TestMaterializedLabelsFilterCallback(t *testing.T) {
+ tests := []struct {
+ name string
+ setupContext func() context.Context
+ expectedFilterReturned bool
+ expectedCallbackReturned bool
+ }{
+ {
+ name: "no shard matcher in context",
+ setupContext: func() context.Context {
+ return context.Background()
+ },
+ expectedFilterReturned: false,
+ expectedCallbackReturned: false,
+ },
+ {
+ name: "shard matcher exists but is not sharded",
+ setupContext: func() context.Context {
+ // Create a ShardInfo with TotalShards = 0 (not sharded)
+ shardInfo := &storepb.ShardInfo{
+ ShardIndex: 0,
+ TotalShards: 0, // Not sharded
+ By: true,
+ Labels: []string{"__name__"},
+ }
+
+ buffers := &sync.Pool{New: func() interface{} {
+ b := make([]byte, 0, 100)
+ return &b
+ }}
+ shardMatcher := shardInfo.Matcher(buffers)
+
+ return injectShardMatcherIntoContext(context.Background(), shardMatcher)
+ },
+ expectedFilterReturned: false,
+ expectedCallbackReturned: false,
+ },
+ {
+ name: "shard matcher exists and is sharded",
+ setupContext: func() context.Context {
+ // Create a ShardInfo with TotalShards > 0 (sharded)
+ shardInfo := &storepb.ShardInfo{
+ ShardIndex: 0,
+ TotalShards: 2, // Sharded
+ By: true,
+ Labels: []string{"__name__"},
+ }
+
+ buffers := &sync.Pool{New: func() interface{} {
+ b := make([]byte, 0, 100)
+ return &b
+ }}
+ shardMatcher := shardInfo.Matcher(buffers)
+
+ return injectShardMatcherIntoContext(context.Background(), shardMatcher)
+ },
+ expectedFilterReturned: true,
+ expectedCallbackReturned: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ctx := tt.setupContext()
+
+ filter, exists := materializedLabelsFilterCallback(ctx, nil)
+
+ require.Equal(t, tt.expectedCallbackReturned, exists)
+
+ if tt.expectedFilterReturned {
+ require.NotNil(t, filter)
+
+ // Test that the filter can be used
+ testLabels := labels.FromStrings("__name__", "test_metric", "label1", "value1")
+ // We can't easily test the actual filtering logic without knowing the internal
+ // shard matching implementation, but we can at least verify the filter interface works
+ _ = filter.Filter(testLabels)
+
+ // Cleanup
+ filter.Close()
+ } else {
+ require.Nil(t, filter)
+ }
+ })
+ }
+}
diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go
index b1a94f1d40f..7ef15d9c037 100644
--- a/pkg/querier/querier.go
+++ b/pkg/querier/querier.go
@@ -85,9 +85,7 @@ type Config struct {
ShuffleShardingIngestersLookbackPeriod time.Duration `yaml:"shuffle_sharding_ingesters_lookback_period"`
- // Experimental. Use https://github.com/thanos-io/promql-engine rather than
- // the Prometheus query engine.
- ThanosEngine bool `yaml:"thanos_engine"`
+ ThanosEngine engine.ThanosEngineConfig `yaml:"thanos_engine"`
// Ignore max query length check at Querier.
IgnoreMaxQueryLength bool `yaml:"ignore_max_query_length"`
@@ -97,6 +95,7 @@ type Config struct {
EnableParquetQueryable bool `yaml:"enable_parquet_queryable" doc:"hidden"`
ParquetQueryableShardCacheSize int `yaml:"parquet_queryable_shard_cache_size" doc:"hidden"`
ParquetQueryableDefaultBlockStore string `yaml:"parquet_queryable_default_block_store" doc:"hidden"`
+ DistributedExecEnabled bool `yaml:"distributed_exec_enabled" doc:"hidden"`
}
var (
@@ -111,6 +110,8 @@ var (
// RegisterFlags adds the flags required to config this to the given FlagSet.
func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
+ cfg.ThanosEngine.RegisterFlagsWithPrefix("querier.", f)
+
//lint:ignore faillint Need to pass the global logger like this for warning on deprecated methods
flagext.DeprecatedFlag(f, "querier.ingester-streaming", "Deprecated: Use streaming RPCs to query ingester. QueryStream is always enabled and the flag is not effective anymore.", util_log.Logger)
//lint:ignore faillint Need to pass the global logger like this for warning on deprecated methods
@@ -139,13 +140,13 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
f.IntVar(&cfg.IngesterQueryMaxAttempts, "querier.ingester-query-max-attempts", 1, "The maximum number of times we attempt fetching data from ingesters for retryable errors (ex. partial data returned).")
f.DurationVar(&cfg.LookbackDelta, "querier.lookback-delta", 5*time.Minute, "Time since the last sample after which a time series is considered stale and ignored by expression evaluations.")
f.DurationVar(&cfg.ShuffleShardingIngestersLookbackPeriod, "querier.shuffle-sharding-ingesters-lookback-period", 0, "When distributor's sharding strategy is shuffle-sharding and this setting is > 0, queriers fetch in-memory series from the minimum set of required ingesters, selecting only ingesters which may have received series since 'now - lookback period'. The lookback period should be greater or equal than the configured 'query store after' and 'query ingesters within'. If this setting is 0, queriers always query all ingesters (ingesters shuffle sharding on read path is disabled).")
- f.BoolVar(&cfg.ThanosEngine, "querier.thanos-engine", false, "Experimental. Use Thanos promql engine https://github.com/thanos-io/promql-engine rather than the Prometheus promql engine.")
f.Int64Var(&cfg.MaxSubQuerySteps, "querier.max-subquery-steps", 0, "Max number of steps allowed for every subquery expression in query. Number of steps is calculated using subquery range / step. A value > 0 enables it.")
f.BoolVar(&cfg.IgnoreMaxQueryLength, "querier.ignore-max-query-length", false, "If enabled, ignore max query length check at Querier select method. Users can choose to ignore it since the validation can be done before Querier evaluation like at Query Frontend or Ruler.")
f.BoolVar(&cfg.EnablePromQLExperimentalFunctions, "querier.enable-promql-experimental-functions", false, "[Experimental] If true, experimental promQL functions are enabled.")
f.BoolVar(&cfg.EnableParquetQueryable, "querier.enable-parquet-queryable", false, "[Experimental] If true, querier will try to query the parquet files if available.")
f.IntVar(&cfg.ParquetQueryableShardCacheSize, "querier.parquet-queryable-shard-cache-size", 512, "[Experimental] [Experimental] Maximum size of the Parquet queryable shard cache. 0 to disable.")
f.StringVar(&cfg.ParquetQueryableDefaultBlockStore, "querier.parquet-queryable-default-block-store", string(parquetBlockStore), "Parquet queryable's default block store to query. Valid options are tsdb and parquet. If it is set to tsdb, parquet queryable always fallback to store gateway.")
+ f.BoolVar(&cfg.DistributedExecEnabled, "querier.distributed-exec-enabled", false, "Experimental: Enables distributed execution of queries by passing logical query plan fragments to downstream components.")
}
// Validate the config
@@ -181,6 +182,10 @@ func (cfg *Config) Validate() error {
}
}
+ if err := cfg.ThanosEngine.Validate(); err != nil {
+ return err
+ }
+
return nil
}
@@ -197,7 +202,7 @@ func getChunksIteratorFunction(_ Config) chunkIteratorFunc {
}
// New builds a queryable and promql engine.
-func New(cfg Config, limits *validation.Overrides, distributor Distributor, stores []QueryableWithFilter, reg prometheus.Registerer, logger log.Logger, isPartialDataEnabled partialdata.IsCfgEnabledFunc) (storage.SampleAndChunkQueryable, storage.ExemplarQueryable, promql.QueryEngine) {
+func New(cfg Config, limits *validation.Overrides, distributor Distributor, stores []QueryableWithFilter, reg prometheus.Registerer, logger log.Logger, isPartialDataEnabled partialdata.IsCfgEnabledFunc) (storage.SampleAndChunkQueryable, storage.ExemplarQueryable, engine.QueryEngine) {
iteratorFunc := getChunksIteratorFunction(cfg)
distributorQueryable := newDistributorQueryable(distributor, cfg.IngesterMetadataStreaming, cfg.IngesterLabelNamesWithMatchers, iteratorFunc, cfg.QueryIngestersWithin, isPartialDataEnabled, cfg.IngesterQueryMaxAttempts)
@@ -228,10 +233,6 @@ func New(cfg Config, limits *validation.Overrides, distributor Distributor, stor
})
maxConcurrentMetric.Set(float64(cfg.MaxConcurrent))
- // The holt_winters function is renamed to double_exponential_smoothing and has been experimental since Prometheus v3. (https://github.com/prometheus/prometheus/pull/14930)
- // The cortex supports holt_winters for users using this function.
- EnableExperimentalPromQLFunctions(cfg.EnablePromQLExperimentalFunctions, true)
-
opts := promql.EngineOpts{
Logger: util_log.GoKitLogToSlog(logger),
Reg: reg,
diff --git a/pkg/querier/tripperware/instantquery/instant_query.go b/pkg/querier/tripperware/instantquery/instant_query.go
index a3977207199..d34805e527e 100644
--- a/pkg/querier/tripperware/instantquery/instant_query.go
+++ b/pkg/querier/tripperware/instantquery/instant_query.go
@@ -183,7 +183,7 @@ func (c instantQueryCodec) EncodeRequest(ctx context.Context, r tripperware.Requ
}
}
- h.Add("Content-Type", "application/json")
+ h.Add("Content-Type", "application/x-www-form-urlencoded")
isSourceRuler := strings.Contains(h.Get("User-Agent"), tripperware.RulerUserAgent)
if !isSourceRuler {
@@ -191,16 +191,19 @@ func (c instantQueryCodec) EncodeRequest(ctx context.Context, r tripperware.Requ
tripperware.SetRequestHeaders(h, c.defaultCodecType, c.compression)
}
- byteBody, err := c.getSerializedBody(promReq)
+ bodyBytes, err := c.getSerializedBody(promReq)
if err != nil {
return nil, err
}
+ form := url.Values{}
+ form.Set("plan", string(bodyBytes))
+ formEncoded := form.Encode()
req := &http.Request{
Method: "POST",
RequestURI: u.String(), // This is what the httpgrpc code looks at.
URL: u,
- Body: io.NopCloser(bytes.NewReader(byteBody)),
+ Body: io.NopCloser(strings.NewReader(formEncoded)),
Header: h,
}
diff --git a/pkg/querier/tripperware/instantquery/instant_query_middlewares_test.go b/pkg/querier/tripperware/instantquery/instant_query_middlewares_test.go
index b6d445fe20b..0b1de391f8e 100644
--- a/pkg/querier/tripperware/instantquery/instant_query_middlewares_test.go
+++ b/pkg/querier/tripperware/instantquery/instant_query_middlewares_test.go
@@ -79,7 +79,6 @@ func TestRoundTrip(t *testing.T) {
time.Minute,
0,
0,
- false,
)
for i, tc := range []struct {
@@ -192,7 +191,6 @@ func TestRoundTripWithAndWithoutDistributedExec(t *testing.T) {
time.Minute,
0,
0,
- false,
)
ctx := user.InjectOrgID(context.Background(), "1")
@@ -214,8 +212,7 @@ func TestRoundTripWithAndWithoutDistributedExec(t *testing.T) {
require.NoError(t, err)
// check request body
- body, err := io.ReadAll(req.Body)
- require.NoError(t, err)
+ body := []byte(req.PostFormValue("plan"))
if tc.expectEmptyBody {
require.Empty(t, body)
} else {
diff --git a/pkg/querier/tripperware/instantquery/limits.go b/pkg/querier/tripperware/instantquery/limits.go
index 5c9514b957b..477fe4c36f4 100644
--- a/pkg/querier/tripperware/instantquery/limits.go
+++ b/pkg/querier/tripperware/instantquery/limits.go
@@ -5,9 +5,9 @@ import (
"net/http"
"time"
- "github.com/prometheus/prometheus/promql/parser"
"github.com/weaveworks/common/httpgrpc"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
"github.com/cortexproject/cortex/pkg/querier/tripperware"
"github.com/cortexproject/cortex/pkg/tenant"
"github.com/cortexproject/cortex/pkg/util/promql"
@@ -45,7 +45,7 @@ func (l limitsMiddleware) Do(ctx context.Context, r tripperware.Request) (trippe
// Enforce the max query length.
if maxQueryLength := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, l.MaxQueryLength); maxQueryLength > 0 {
- expr, err := parser.ParseExpr(r.GetQuery())
+ expr, err := cortexparser.ParseExpr(r.GetQuery())
if err != nil {
return nil, httpgrpc.Errorf(http.StatusBadRequest, "%s", err.Error())
}
diff --git a/pkg/querier/tripperware/instantquery/limits_test.go b/pkg/querier/tripperware/instantquery/limits_test.go
index 74f5c7c4d86..a365eab414c 100644
--- a/pkg/querier/tripperware/instantquery/limits_test.go
+++ b/pkg/querier/tripperware/instantquery/limits_test.go
@@ -6,13 +6,13 @@ import (
"testing"
"time"
- "github.com/prometheus/prometheus/promql/parser"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/weaveworks/common/httpgrpc"
"github.com/weaveworks/common/user"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
"github.com/cortexproject/cortex/pkg/querier/tripperware"
"github.com/cortexproject/cortex/pkg/util/validation"
)
@@ -24,7 +24,7 @@ func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
)
wrongQuery := `up[`
- _, parserErr := parser.ParseExpr(wrongQuery)
+ _, parserErr := cortexparser.ParseExpr(wrongQuery)
tests := map[string]struct {
maxQueryLength time.Duration
diff --git a/pkg/querier/tripperware/merge.go b/pkg/querier/tripperware/merge.go
index 0fd385cecf2..0e3d8aabb4b 100644
--- a/pkg/querier/tripperware/merge.go
+++ b/pkg/querier/tripperware/merge.go
@@ -10,6 +10,7 @@ import (
"github.com/thanos-io/thanos/pkg/strutil"
"github.com/cortexproject/cortex/pkg/cortexpb"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
)
const StatusSuccess = "success"
@@ -284,7 +285,7 @@ func getSortValueFromPair(samples []*pair, i int) float64 {
}
func sortPlanForQuery(q string) (sortPlan, error) {
- expr, err := promqlparser.ParseExpr(q)
+ expr, err := cortexparser.ParseExpr(q)
if err != nil {
return 0, err
}
diff --git a/pkg/querier/tripperware/query_attribute_matcher.go b/pkg/querier/tripperware/query_attribute_matcher.go
index 7edd9f0b098..002568b7a4e 100644
--- a/pkg/querier/tripperware/query_attribute_matcher.go
+++ b/pkg/querier/tripperware/query_attribute_matcher.go
@@ -6,9 +6,9 @@ import (
"time"
"github.com/prometheus/client_golang/prometheus"
- "github.com/prometheus/prometheus/promql/parser"
"github.com/weaveworks/common/httpgrpc"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
"github.com/cortexproject/cortex/pkg/querier/stats"
"github.com/cortexproject/cortex/pkg/util"
"github.com/cortexproject/cortex/pkg/util/validation"
@@ -24,7 +24,7 @@ func rejectQueryOrSetPriority(r *http.Request, now time.Time, lookbackDelta time
if op == "query" || op == "query_range" {
query := r.FormValue("query")
- expr, err := parser.ParseExpr(query)
+ expr, err := cortexparser.ParseExpr(query)
if err != nil {
return httpgrpc.Errorf(http.StatusBadRequest, "%s", err.Error())
}
diff --git a/pkg/querier/tripperware/queryrange/limits.go b/pkg/querier/tripperware/queryrange/limits.go
index 817fcf50834..7b1f17b55a9 100644
--- a/pkg/querier/tripperware/queryrange/limits.go
+++ b/pkg/querier/tripperware/queryrange/limits.go
@@ -7,9 +7,9 @@ import (
"github.com/go-kit/log/level"
"github.com/prometheus/prometheus/model/timestamp"
- "github.com/prometheus/prometheus/promql/parser"
"github.com/weaveworks/common/httpgrpc"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
"github.com/cortexproject/cortex/pkg/querier/tripperware"
"github.com/cortexproject/cortex/pkg/tenant"
"github.com/cortexproject/cortex/pkg/util"
@@ -82,7 +82,7 @@ func (l limitsMiddleware) Do(ctx context.Context, r tripperware.Request) (trippe
return nil, httpgrpc.Errorf(http.StatusBadRequest, validation.ErrQueryTooLong, queryLen, maxQueryLength)
}
- expr, err := parser.ParseExpr(r.GetQuery())
+ expr, err := cortexparser.ParseExpr(r.GetQuery())
if err != nil {
return nil, httpgrpc.Errorf(http.StatusBadRequest, "%s", err.Error())
}
diff --git a/pkg/querier/tripperware/queryrange/limits_test.go b/pkg/querier/tripperware/queryrange/limits_test.go
index 9a7668b9812..3690e1e0386 100644
--- a/pkg/querier/tripperware/queryrange/limits_test.go
+++ b/pkg/querier/tripperware/queryrange/limits_test.go
@@ -6,13 +6,13 @@ import (
"testing"
"time"
- "github.com/prometheus/prometheus/promql/parser"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/weaveworks/common/httpgrpc"
"github.com/weaveworks/common/user"
+ "github.com/cortexproject/cortex/pkg/parser"
"github.com/cortexproject/cortex/pkg/querier/tripperware"
"github.com/cortexproject/cortex/pkg/util"
"github.com/cortexproject/cortex/pkg/util/validation"
diff --git a/pkg/querier/tripperware/queryrange/query_range.go b/pkg/querier/tripperware/queryrange/query_range.go
index df721146f66..b46d8f9b34d 100644
--- a/pkg/querier/tripperware/queryrange/query_range.go
+++ b/pkg/querier/tripperware/queryrange/query_range.go
@@ -189,8 +189,7 @@ func (c prometheusCodec) EncodeRequest(ctx context.Context, r tripperware.Reques
h.Add(n, v)
}
}
-
- h.Add("Content-Type", "application/json")
+ h.Add("Content-Type", "application/x-www-form-urlencoded")
tripperware.SetRequestHeaders(h, c.defaultCodecType, c.compression)
@@ -199,11 +198,15 @@ func (c prometheusCodec) EncodeRequest(ctx context.Context, r tripperware.Reques
return nil, err
}
+ form := url.Values{}
+ form.Set("plan", string(bodyBytes))
+ formEncoded := form.Encode()
+
req := &http.Request{
Method: "POST",
RequestURI: u.String(), // This is what the httpgrpc code looks at.
URL: u,
- Body: io.NopCloser(bytes.NewReader(bodyBytes)),
+ Body: io.NopCloser(strings.NewReader(formEncoded)),
Header: h,
}
diff --git a/pkg/querier/tripperware/queryrange/query_range_middlewares_test.go b/pkg/querier/tripperware/queryrange/query_range_middlewares_test.go
index 4e19fe84a02..acf66698c16 100644
--- a/pkg/querier/tripperware/queryrange/query_range_middlewares_test.go
+++ b/pkg/querier/tripperware/queryrange/query_range_middlewares_test.go
@@ -86,7 +86,6 @@ func TestRoundTrip(t *testing.T) {
time.Minute,
0,
0,
- false,
)
for i, tc := range []struct {
@@ -211,7 +210,6 @@ func TestRoundTripWithAndWithoutDistributedExec(t *testing.T) {
time.Minute,
0,
0,
- false,
)
ctx := user.InjectOrgID(context.Background(), "1")
@@ -233,8 +231,7 @@ func TestRoundTripWithAndWithoutDistributedExec(t *testing.T) {
require.NoError(t, err)
// check request body
- body, err := io.ReadAll(req.Body)
- require.NoError(t, err)
+ body := []byte(req.PostFormValue("plan"))
if tc.expectEmptyBody {
require.Empty(t, body)
} else {
diff --git a/pkg/querier/tripperware/queryrange/results_cache.go b/pkg/querier/tripperware/queryrange/results_cache.go
index 4ae249efc89..db6d2f284f5 100644
--- a/pkg/querier/tripperware/queryrange/results_cache.go
+++ b/pkg/querier/tripperware/queryrange/results_cache.go
@@ -27,6 +27,7 @@ import (
"github.com/cortexproject/cortex/pkg/chunk/cache"
"github.com/cortexproject/cortex/pkg/cortexpb"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
"github.com/cortexproject/cortex/pkg/querier"
"github.com/cortexproject/cortex/pkg/querier/partialdata"
querier_stats "github.com/cortexproject/cortex/pkg/querier/stats"
@@ -326,7 +327,7 @@ func (s resultsCache) isAtModifierCachable(ctx context.Context, r tripperware.Re
if !strings.Contains(query, "@") {
return true
}
- expr, err := parser.ParseExpr(query)
+ expr, err := cortexparser.ParseExpr(query)
if err != nil {
// We are being pessimistic in such cases.
level.Warn(util_log.WithContext(ctx, s.logger)).Log("msg", "failed to parse query, considering @ modifier as not cacheable", "query", query, "err", err)
@@ -371,7 +372,7 @@ func (s resultsCache) isOffsetCachable(ctx context.Context, r tripperware.Reques
if !strings.Contains(query, "offset") {
return true
}
- expr, err := parser.ParseExpr(query)
+ expr, err := cortexparser.ParseExpr(query)
if err != nil {
level.Warn(util_log.WithContext(ctx, s.logger)).Log("msg", "failed to parse query, considering offset as not cacheable", "query", query, "err", err)
return false
diff --git a/pkg/querier/tripperware/queryrange/split_by_interval.go b/pkg/querier/tripperware/queryrange/split_by_interval.go
index 6ff1f1a15e6..980d2867a87 100644
--- a/pkg/querier/tripperware/queryrange/split_by_interval.go
+++ b/pkg/querier/tripperware/queryrange/split_by_interval.go
@@ -11,6 +11,7 @@ import (
"github.com/thanos-io/thanos/pkg/querysharding"
"github.com/weaveworks/common/httpgrpc"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
querier_stats "github.com/cortexproject/cortex/pkg/querier/stats"
"github.com/cortexproject/cortex/pkg/querier/tripperware"
"github.com/cortexproject/cortex/pkg/tenant"
@@ -111,7 +112,7 @@ func splitQuery(r tripperware.Request, interval time.Duration) ([]tripperware.Re
// For example given the start of the query is 10.00, `http_requests_total[1h] @ start()` query will be replaced with `http_requests_total[1h] @ 10.00`
// If the modifier is already a constant, it will be returned as is.
func evaluateAtModifierFunction(query string, start, end int64) (string, error) {
- expr, err := parser.ParseExpr(query)
+ expr, err := cortexparser.ParseExpr(query)
if err != nil {
return "", httpgrpc.Errorf(http.StatusBadRequest, "%s", err)
}
@@ -167,7 +168,7 @@ func dynamicIntervalFn(cfg Config, limits tripperware.Limits, queryAnalyzer quer
return ctx, baseInterval, nil
}
- queryExpr, err := parser.ParseExpr(r.GetQuery())
+ queryExpr, err := cortexparser.ParseExpr(r.GetQuery())
if err != nil {
return ctx, baseInterval, err
}
diff --git a/pkg/querier/tripperware/queryrange/split_by_interval_test.go b/pkg/querier/tripperware/queryrange/split_by_interval_test.go
index 2f219182bdb..31b0d82541d 100644
--- a/pkg/querier/tripperware/queryrange/split_by_interval_test.go
+++ b/pkg/querier/tripperware/queryrange/split_by_interval_test.go
@@ -10,15 +10,14 @@ import (
"testing"
"time"
+ "github.com/stretchr/testify/require"
"github.com/thanos-io/thanos/pkg/querysharding"
"github.com/weaveworks/common/httpgrpc"
-
- "github.com/prometheus/prometheus/promql/parser"
- "github.com/stretchr/testify/require"
"github.com/weaveworks/common/middleware"
"github.com/weaveworks/common/user"
"go.uber.org/atomic"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
"github.com/cortexproject/cortex/pkg/querier/tripperware"
)
@@ -434,7 +433,7 @@ func Test_evaluateAtModifier(t *testing.T) {
require.Equal(t, tt.expectedErrorCode, int(httpResp.Code))
} else {
require.NoError(t, err)
- expectedExpr, err := parser.ParseExpr(tt.expected)
+ expectedExpr, err := cortexparser.ParseExpr(tt.expected)
require.NoError(t, err)
require.Equal(t, expectedExpr.String(), out)
}
@@ -1044,7 +1043,7 @@ func Test_analyzeDurationFetchedByQuery(t *testing.T) {
},
} {
t.Run(tc.name, func(t *testing.T) {
- expr, err := parser.ParseExpr(tc.req.GetQuery())
+ expr, err := cortexparser.ParseExpr(tc.req.GetQuery())
require.Nil(t, err)
durationFetchedByRange, durationFetchedBySelectors := analyzeDurationFetchedByQueryExpr(expr, tc.req.GetStart(), tc.req.GetEnd(), tc.baseSplitInterval, tc.lookbackDelta)
require.Equal(t, tc.expectedDurationFetchedByRange, durationFetchedByRange)
diff --git a/pkg/querier/tripperware/roundtrip.go b/pkg/querier/tripperware/roundtrip.go
index b9be569d6d9..144bb04da36 100644
--- a/pkg/querier/tripperware/roundtrip.go
+++ b/pkg/querier/tripperware/roundtrip.go
@@ -31,7 +31,6 @@ import (
"github.com/weaveworks/common/httpgrpc"
"github.com/weaveworks/common/user"
- "github.com/cortexproject/cortex/pkg/querier"
"github.com/cortexproject/cortex/pkg/tenant"
"github.com/cortexproject/cortex/pkg/util"
"github.com/cortexproject/cortex/pkg/util/limiter"
@@ -117,13 +116,8 @@ func NewQueryTripperware(
defaultSubQueryInterval time.Duration,
maxSubQuerySteps int64,
lookbackDelta time.Duration,
- enablePromQLExperimentalFunctions bool,
) Tripperware {
- // The holt_winters function is renamed to double_exponential_smoothing and has been experimental since Prometheus v3. (https://github.com/prometheus/prometheus/pull/14930)
- // The cortex supports holt_winters for users using this function.
- querier.EnableExperimentalPromQLFunctions(enablePromQLExperimentalFunctions, true)
-
// Per tenant query metrics.
queriesPerTenant := promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{
Name: "cortex_query_frontend_queries_total",
diff --git a/pkg/querier/tripperware/roundtrip_test.go b/pkg/querier/tripperware/roundtrip_test.go
index a73623a0b70..ceb4510d479 100644
--- a/pkg/querier/tripperware/roundtrip_test.go
+++ b/pkg/querier/tripperware/roundtrip_test.go
@@ -347,7 +347,6 @@ cortex_query_frontend_queries_total{op="query", source="api", user="1"} 1
time.Minute,
tc.maxSubQuerySteps,
0,
- false,
)
resp, err := tw(downstream).RoundTrip(req)
if tc.expectedErr == nil {
diff --git a/pkg/querier/tripperware/subquery.go b/pkg/querier/tripperware/subquery.go
index cebce45f261..2226192a89a 100644
--- a/pkg/querier/tripperware/subquery.go
+++ b/pkg/querier/tripperware/subquery.go
@@ -6,6 +6,8 @@ import (
"github.com/prometheus/prometheus/promql/parser"
"github.com/weaveworks/common/httpgrpc"
+
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
)
var (
@@ -18,7 +20,7 @@ const (
// SubQueryStepSizeCheck ensures the query doesn't contain too small step size in subqueries.
func SubQueryStepSizeCheck(query string, defaultSubQueryInterval time.Duration, maxStep int64) error {
- expr, err := parser.ParseExpr(query)
+ expr, err := cortexparser.ParseExpr(query)
if err != nil {
// If query fails to parse, we don't throw step size error
// but fail query later on querier.
diff --git a/pkg/querier/tripperware/test_shard_by_query_utils.go b/pkg/querier/tripperware/test_shard_by_query_utils.go
index f03555f7fd4..d39a5dd2317 100644
--- a/pkg/querier/tripperware/test_shard_by_query_utils.go
+++ b/pkg/querier/tripperware/test_shard_by_query_utils.go
@@ -20,6 +20,7 @@ import (
"github.com/thanos-io/thanos/pkg/store/storepb"
"github.com/weaveworks/common/user"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
"github.com/cortexproject/cortex/pkg/querysharding"
"github.com/cortexproject/cortex/pkg/util/validation"
)
@@ -413,7 +414,7 @@ http_requests_total`,
s := httptest.NewServer(
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
q := r.FormValue("query")
- expr, _ := parser.ParseExpr(q)
+ expr, _ := cortexparser.ParseExpr(q)
shardIndex := int64(0)
parser.Inspect(expr, func(n parser.Node, _ []parser.Node) error {
diff --git a/pkg/querysharding/util.go b/pkg/querysharding/util.go
index 3f2a2d82432..eafc3a71b4f 100644
--- a/pkg/querysharding/util.go
+++ b/pkg/querysharding/util.go
@@ -4,9 +4,13 @@ import (
"encoding/base64"
"sync"
+ "github.com/pkg/errors"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql/parser"
+ "github.com/thanos-io/thanos/pkg/querysharding"
"github.com/thanos-io/thanos/pkg/store/storepb"
+
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
)
const (
@@ -18,10 +22,12 @@ var (
b := make([]byte, 0, 100)
return &b
}}
+
+ stop = errors.New("stop")
)
func InjectShardingInfo(query string, shardInfo *storepb.ShardInfo) (string, error) {
- expr, err := parser.ParseExpr(query)
+ expr, err := cortexparser.ParseExpr(query)
if err != nil {
return "", err
}
@@ -75,3 +81,43 @@ func ExtractShardingMatchers(matchers []*labels.Matcher) ([]*labels.Matcher, *st
return r, shardInfo.Matcher(&buffers), nil
}
+
+type disableBinaryExpressionAnalyzer struct {
+ analyzer querysharding.Analyzer
+}
+
+// NewDisableBinaryExpressionAnalyzer is a wrapper around the analyzer that disables binary expressions.
+func NewDisableBinaryExpressionAnalyzer(analyzer querysharding.Analyzer) *disableBinaryExpressionAnalyzer {
+ return &disableBinaryExpressionAnalyzer{analyzer: analyzer}
+}
+
+func (d *disableBinaryExpressionAnalyzer) Analyze(query string) (querysharding.QueryAnalysis, error) {
+ analysis, err := d.analyzer.Analyze(query)
+ if err != nil || !analysis.IsShardable() {
+ return analysis, err
+ }
+
+ expr, _ := cortexparser.ParseExpr(query)
+ isShardable := true
+ parser.Inspect(expr, func(node parser.Node, nodes []parser.Node) error {
+ switch n := node.(type) {
+ case *parser.BinaryExpr:
+ // No vector matching means one operand is not vector. Skip it.
+ if n.VectorMatching == nil {
+ return nil
+ }
+ // Vector matching ignore will add MetricNameLabel as sharding label.
+ // Mark this type of query not shardable.
+ if !n.VectorMatching.On {
+ isShardable = false
+ return stop
+ }
+ }
+ return nil
+ })
+ if !isShardable {
+ // Mark as not shardable.
+ return querysharding.QueryAnalysis{}, nil
+ }
+ return analysis, nil
+}
diff --git a/pkg/querysharding/util_test.go b/pkg/querysharding/util_test.go
new file mode 100644
index 00000000000..cba23190723
--- /dev/null
+++ b/pkg/querysharding/util_test.go
@@ -0,0 +1,145 @@
+package querysharding
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "github.com/thanos-io/thanos/pkg/querysharding"
+)
+
+func TestDisableBinaryExpressionAnalyzer_Analyze(t *testing.T) {
+ tests := []struct {
+ name string
+ query string
+ expectShardable bool
+ expectError bool
+ description string
+ }{
+ {
+ name: "binary expression with vector matching on",
+ query: `up{job="prometheus"} + on(instance) rate(cpu_usage[5m])`,
+ expectShardable: true,
+ expectError: false,
+ description: "Binary expression with 'on' matching should remain shardable",
+ },
+ {
+ name: "binary expression without explicit vector matching",
+ query: `up{job="prometheus"} + rate(cpu_usage[5m])`,
+ expectShardable: false,
+ expectError: false,
+ description: "No explicit vector matching means without. Not shardable.",
+ },
+ {
+ name: "binary expression with vector matching ignoring",
+ query: `up{job="prometheus"} + ignoring(instance) rate(cpu_usage[5m])`,
+ expectShardable: false,
+ expectError: false,
+ description: "Binary expression with 'ignoring' matching should not be shardable",
+ },
+ {
+ name: "complex expression with binary expr using on",
+ query: `sum(rate(http_requests_total[5m])) by (job) + on(job) avg(cpu_usage) by (job)`,
+ expectShardable: true,
+ expectError: false,
+ description: "Complex expression with 'on' matching should remain shardable",
+ },
+ {
+ name: "complex expression with binary expr using ignoring",
+ query: `sum(rate(http_requests_total[5m])) by (job) + ignoring(instance) avg(cpu_usage) by (job)`,
+ expectShardable: false,
+ expectError: false,
+ description: "Complex expression with 'ignoring' matching should not be shardable",
+ },
+ {
+ name: "nested binary expressions with one ignoring",
+ query: `(up + on(job) rate(cpu[5m])) * ignoring(instance) memory_usage`,
+ expectShardable: false,
+ expectError: false,
+ description: "Nested expressions with any 'ignoring' should not be shardable",
+ },
+ {
+ name: "aggregation",
+ query: `sum(rate(http_requests_total[5m])) by (job)`,
+ expectShardable: true,
+ expectError: false,
+ description: "Aggregations should remain shardable",
+ },
+ {
+ name: "aggregation with binary expression and scalar",
+ query: `sum(rate(http_requests_total[5m])) by (job) * 100`,
+ expectShardable: true,
+ expectError: false,
+ description: "Aggregations should remain shardable",
+ },
+ {
+ name: "invalid query",
+ query: "invalid{query",
+ expectShardable: false,
+ expectError: true,
+ description: "Invalid queries should return error",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Create the actual thanos analyzer
+ thanosAnalyzer := querysharding.NewQueryAnalyzer()
+
+ // Wrap it with our disable binary expression analyzer
+ analyzer := NewDisableBinaryExpressionAnalyzer(thanosAnalyzer)
+
+ // Test the wrapped analyzer
+ result, err := analyzer.Analyze(tt.query)
+
+ if tt.expectError {
+ require.Error(t, err, tt.description)
+ return
+ }
+
+ require.NoError(t, err, tt.description)
+ assert.Equal(t, tt.expectShardable, result.IsShardable(), tt.description)
+ })
+ }
+}
+
+func TestDisableBinaryExpressionAnalyzer_ComparedToOriginal(t *testing.T) {
+ // Test cases that verify the wrapper correctly modifies behavior
+ testCases := []struct {
+ name string
+ query string
+ }{
+ {
+ name: "ignoring expression should be disabled",
+ query: `up + ignoring(instance) rate(cpu[5m])`,
+ },
+ {
+ name: "nested ignoring expression should be disabled",
+ query: `(sum(rate(http_requests_total[5m])) by (job)) + ignoring(instance) avg(cpu_usage) by (job)`,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ // Test with original analyzer
+ originalAnalyzer := querysharding.NewQueryAnalyzer()
+ originalResult, err := originalAnalyzer.Analyze(tc.query)
+ require.NoError(t, err)
+
+ // Test with wrapped analyzer
+ wrappedAnalyzer := NewDisableBinaryExpressionAnalyzer(originalAnalyzer)
+ wrappedResult, err := wrappedAnalyzer.Analyze(tc.query)
+ require.NoError(t, err)
+
+ // The wrapped analyzer should make previously shardable queries non-shardable
+ // if they contain binary expressions with ignoring
+ if originalResult.IsShardable() {
+ assert.False(t, wrappedResult.IsShardable(),
+ "Wrapped analyzer should disable sharding for queries with ignoring vector matching")
+ } else {
+ // If original wasn't shardable, wrapped shouldn't be either
+ assert.False(t, wrappedResult.IsShardable())
+ }
+ })
+ }
+}
diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go
index 6235bae797c..92c343d6849 100644
--- a/pkg/ring/ring.go
+++ b/pkg/ring/ring.go
@@ -201,7 +201,8 @@ type Ring struct {
// List of zones for which there's at least 1 instance in the ring. This list is guaranteed
// to be sorted alphabetically.
- ringZones []string
+ ringZones []string
+ previousRingZones []string
// Cache of shuffle-sharded subrings per identifier. Invalidated when topology changes.
// If set to nil, no caching is done (used by tests, and subrings).
@@ -262,7 +263,7 @@ func NewWithStoreClientAndStrategy(cfg Config, name, key string, store kv.Client
Name: "ring_members",
Help: "Number of members in the ring",
ConstLabels: map[string]string{"name": name}},
- []string{"state"}),
+ []string{"state", "zone"}),
totalTokensGauge: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Name: "ring_tokens_total",
Help: "Number of tokens in the ring",
@@ -362,6 +363,7 @@ func (r *Ring) updateRingState(ringDesc *Desc) {
r.ringTokensByZone = ringTokensByZone
r.ringInstanceByToken = ringInstanceByToken
r.ringInstanceIdByAddr = ringInstanceByAddr
+ r.previousRingZones = r.ringZones
r.ringZones = ringZones
r.lastTopologyChange = now
if r.shuffledSubringCache != nil {
@@ -665,12 +667,19 @@ func (r *Ring) updateRingMetrics(compareResult CompareResult) {
return
}
- numByState := map[string]int{}
+ numByStateByZone := map[string]map[string]int{}
oldestTimestampByState := map[string]int64{}
// Initialized to zero so we emit zero-metrics (instead of not emitting anything)
for _, s := range []string{unhealthy, ACTIVE.String(), LEAVING.String(), PENDING.String(), JOINING.String(), READONLY.String()} {
- numByState[s] = 0
+ numByStateByZone[s] = map[string]int{}
+ // make sure removed zones got zero value
+ for _, zone := range r.previousRingZones {
+ numByStateByZone[s][zone] = 0
+ }
+ for _, zone := range r.ringZones {
+ numByStateByZone[s][zone] = 0
+ }
oldestTimestampByState[s] = 0
}
@@ -679,14 +688,19 @@ func (r *Ring) updateRingMetrics(compareResult CompareResult) {
if !r.IsHealthy(&instance, Reporting, r.KVClient.LastUpdateTime(r.key)) {
s = unhealthy
}
- numByState[s]++
+ if _, ok := numByStateByZone[s]; !ok {
+ numByStateByZone[s] = map[string]int{}
+ }
+ numByStateByZone[s][instance.Zone]++
if oldestTimestampByState[s] == 0 || instance.Timestamp < oldestTimestampByState[s] {
oldestTimestampByState[s] = instance.Timestamp
}
}
- for state, count := range numByState {
- r.numMembersGaugeVec.WithLabelValues(state).Set(float64(count))
+ for state, zones := range numByStateByZone {
+ for zone, count := range zones {
+ r.numMembersGaugeVec.WithLabelValues(state, zone).Set(float64(count))
+ }
}
for state, timestamp := range oldestTimestampByState {
r.oldestTimestampGaugeVec.WithLabelValues(state).Set(float64(timestamp))
diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go
index e2f7e0a8d1c..682cb7d942d 100644
--- a/pkg/ring/ring_test.go
+++ b/pkg/ring/ring_test.go
@@ -3202,12 +3202,12 @@ func TestUpdateMetrics(t *testing.T) {
ring_member_ownership_percent{member="B",name="test"} 0.5000000002328306
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
- ring_members{name="test",state="ACTIVE"} 2
- ring_members{name="test",state="JOINING"} 0
- ring_members{name="test",state="LEAVING"} 0
- ring_members{name="test",state="PENDING"} 0
- ring_members{name="test",state="READONLY"} 0
- ring_members{name="test",state="Unhealthy"} 0
+ ring_members{name="test",state="ACTIVE",zone=""} 2
+ ring_members{name="test",state="JOINING",zone=""} 0
+ ring_members{name="test",state="LEAVING",zone=""} 0
+ ring_members{name="test",state="PENDING",zone=""} 0
+ ring_members{name="test",state="READONLY",zone=""} 0
+ ring_members{name="test",state="Unhealthy",zone=""} 0
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
# TYPE ring_oldest_member_timestamp gauge
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
@@ -3230,12 +3230,12 @@ func TestUpdateMetrics(t *testing.T) {
Expected: `
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
- ring_members{name="test",state="ACTIVE"} 2
- ring_members{name="test",state="JOINING"} 0
- ring_members{name="test",state="LEAVING"} 0
- ring_members{name="test",state="PENDING"} 0
- ring_members{name="test",state="READONLY"} 0
- ring_members{name="test",state="Unhealthy"} 0
+ ring_members{name="test",state="ACTIVE",zone=""} 2
+ ring_members{name="test",state="JOINING",zone=""} 0
+ ring_members{name="test",state="LEAVING",zone=""} 0
+ ring_members{name="test",state="PENDING",zone=""} 0
+ ring_members{name="test",state="READONLY",zone=""} 0
+ ring_members{name="test",state="Unhealthy",zone=""} 0
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
# TYPE ring_oldest_member_timestamp gauge
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
@@ -3310,12 +3310,12 @@ func TestUpdateMetricsWithRemoval(t *testing.T) {
ring_member_ownership_percent{member="B",name="test"} 0.5000000002328306
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
- ring_members{name="test",state="ACTIVE"} 2
- ring_members{name="test",state="JOINING"} 0
- ring_members{name="test",state="LEAVING"} 0
- ring_members{name="test",state="PENDING"} 0
- ring_members{name="test",state="READONLY"} 0
- ring_members{name="test",state="Unhealthy"} 0
+ ring_members{name="test",state="ACTIVE",zone=""} 2
+ ring_members{name="test",state="JOINING",zone=""} 0
+ ring_members{name="test",state="LEAVING",zone=""} 0
+ ring_members{name="test",state="PENDING",zone=""} 0
+ ring_members{name="test",state="READONLY",zone=""} 0
+ ring_members{name="test",state="Unhealthy",zone=""} 0
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
# TYPE ring_oldest_member_timestamp gauge
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
@@ -3347,12 +3347,130 @@ func TestUpdateMetricsWithRemoval(t *testing.T) {
ring_member_ownership_percent{member="A",name="test"} 1
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
- ring_members{name="test",state="ACTIVE"} 1
- ring_members{name="test",state="JOINING"} 0
- ring_members{name="test",state="LEAVING"} 0
- ring_members{name="test",state="PENDING"} 0
- ring_members{name="test",state="READONLY"} 0
- ring_members{name="test",state="Unhealthy"} 0
+ ring_members{name="test",state="ACTIVE",zone=""} 1
+ ring_members{name="test",state="JOINING",zone=""} 0
+ ring_members{name="test",state="LEAVING",zone=""} 0
+ ring_members{name="test",state="PENDING",zone=""} 0
+ ring_members{name="test",state="READONLY",zone=""} 0
+ ring_members{name="test",state="Unhealthy",zone=""} 0
+ # HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
+ # TYPE ring_oldest_member_timestamp gauge
+ ring_oldest_member_timestamp{name="test",state="ACTIVE"} 22
+ ring_oldest_member_timestamp{name="test",state="JOINING"} 0
+ ring_oldest_member_timestamp{name="test",state="LEAVING"} 0
+ ring_oldest_member_timestamp{name="test",state="PENDING"} 0
+ ring_oldest_member_timestamp{name="test",state="READONLY"} 0
+ ring_oldest_member_timestamp{name="test",state="Unhealthy"} 0
+ # HELP ring_tokens_owned The number of tokens in the ring owned by the member
+ # TYPE ring_tokens_owned gauge
+ ring_tokens_owned{member="A",name="test"} 2
+ # HELP ring_tokens_total Number of tokens in the ring
+ # TYPE ring_tokens_total gauge
+ ring_tokens_total{name="test"} 2
+ `))
+ assert.NoError(t, err)
+}
+
+func TestUpdateMetricsWithZone(t *testing.T) {
+ cfg := Config{
+ KVStore: kv.Config{},
+ HeartbeatTimeout: 0, // get healthy stats
+ ReplicationFactor: 3,
+ ZoneAwarenessEnabled: true,
+ DetailedMetricsEnabled: true,
+ }
+
+ registry := prometheus.NewRegistry()
+
+ // create the ring to set up metrics, but do not start
+ ring, err := NewWithStoreClientAndStrategy(cfg, testRingName, testRingKey, &MockClient{}, NewDefaultReplicationStrategy(), registry, log.NewNopLogger())
+ require.NoError(t, err)
+
+ ringDesc := Desc{
+ Ingesters: map[string]InstanceDesc{
+ "A": {Addr: "127.0.0.1", Timestamp: 22, Zone: "zone1", Tokens: []uint32{math.MaxUint32 / 6, (math.MaxUint32 / 6) * 4}},
+ "B": {Addr: "127.0.0.2", Timestamp: 11, Zone: "zone2", Tokens: []uint32{(math.MaxUint32 / 6) * 2, (math.MaxUint32 / 6) * 5}},
+ "C": {Addr: "127.0.0.3", Timestamp: 33, Zone: "zone3", Tokens: []uint32{(math.MaxUint32 / 6) * 3, math.MaxUint32}},
+ },
+ }
+ ring.updateRingState(&ringDesc)
+
+ err = testutil.GatherAndCompare(registry, bytes.NewBufferString(`
+ # HELP ring_member_ownership_percent The percent ownership of the ring by member
+ # TYPE ring_member_ownership_percent gauge
+ ring_member_ownership_percent{member="A",name="test"} 0.3333333332557231
+ ring_member_ownership_percent{member="B",name="test"} 0.3333333330228925
+ ring_member_ownership_percent{member="C",name="test"} 0.3333333337213844
+ # HELP ring_members Number of members in the ring
+ # TYPE ring_members gauge
+ ring_members{name="test",state="ACTIVE",zone="zone1"} 1
+ ring_members{name="test",state="ACTIVE",zone="zone2"} 1
+ ring_members{name="test",state="ACTIVE",zone="zone3"} 1
+ ring_members{name="test",state="JOINING",zone="zone1"} 0
+ ring_members{name="test",state="JOINING",zone="zone2"} 0
+ ring_members{name="test",state="JOINING",zone="zone3"} 0
+ ring_members{name="test",state="LEAVING",zone="zone1"} 0
+ ring_members{name="test",state="LEAVING",zone="zone2"} 0
+ ring_members{name="test",state="LEAVING",zone="zone3"} 0
+ ring_members{name="test",state="PENDING",zone="zone1"} 0
+ ring_members{name="test",state="PENDING",zone="zone2"} 0
+ ring_members{name="test",state="PENDING",zone="zone3"} 0
+ ring_members{name="test",state="READONLY",zone="zone1"} 0
+ ring_members{name="test",state="READONLY",zone="zone2"} 0
+ ring_members{name="test",state="READONLY",zone="zone3"} 0
+ ring_members{name="test",state="Unhealthy",zone="zone1"} 0
+ ring_members{name="test",state="Unhealthy",zone="zone2"} 0
+ ring_members{name="test",state="Unhealthy",zone="zone3"} 0
+ # HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
+ # TYPE ring_oldest_member_timestamp gauge
+ ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
+ ring_oldest_member_timestamp{name="test",state="JOINING"} 0
+ ring_oldest_member_timestamp{name="test",state="LEAVING"} 0
+ ring_oldest_member_timestamp{name="test",state="PENDING"} 0
+ ring_oldest_member_timestamp{name="test",state="READONLY"} 0
+ ring_oldest_member_timestamp{name="test",state="Unhealthy"} 0
+ # HELP ring_tokens_owned The number of tokens in the ring owned by the member
+ # TYPE ring_tokens_owned gauge
+ ring_tokens_owned{member="A",name="test"} 2
+ ring_tokens_owned{member="B",name="test"} 2
+ ring_tokens_owned{member="C",name="test"} 2
+ # HELP ring_tokens_total Number of tokens in the ring
+ # TYPE ring_tokens_total gauge
+ ring_tokens_total{name="test"} 6
+ `))
+ require.NoError(t, err)
+
+ ringDescNew := Desc{
+ Ingesters: map[string]InstanceDesc{
+ "A": {Addr: "127.0.0.1", Timestamp: 22, Zone: "zone1", Tokens: []uint32{math.MaxUint32 / 6, (math.MaxUint32 / 6) * 4}},
+ },
+ }
+ ring.updateRingState(&ringDescNew)
+
+ err = testutil.GatherAndCompare(registry, bytes.NewBufferString(`
+ # HELP ring_member_ownership_percent The percent ownership of the ring by member
+ # TYPE ring_member_ownership_percent gauge
+ ring_member_ownership_percent{member="A",name="test"} 1
+ # HELP ring_members Number of members in the ring
+ # TYPE ring_members gauge
+ ring_members{name="test",state="ACTIVE",zone="zone1"} 1
+ ring_members{name="test",state="ACTIVE",zone="zone2"} 0
+ ring_members{name="test",state="ACTIVE",zone="zone3"} 0
+ ring_members{name="test",state="JOINING",zone="zone1"} 0
+ ring_members{name="test",state="JOINING",zone="zone2"} 0
+ ring_members{name="test",state="JOINING",zone="zone3"} 0
+ ring_members{name="test",state="LEAVING",zone="zone1"} 0
+ ring_members{name="test",state="LEAVING",zone="zone2"} 0
+ ring_members{name="test",state="LEAVING",zone="zone3"} 0
+ ring_members{name="test",state="PENDING",zone="zone1"} 0
+ ring_members{name="test",state="PENDING",zone="zone2"} 0
+ ring_members{name="test",state="PENDING",zone="zone3"} 0
+ ring_members{name="test",state="READONLY",zone="zone1"} 0
+ ring_members{name="test",state="READONLY",zone="zone2"} 0
+ ring_members{name="test",state="READONLY",zone="zone3"} 0
+ ring_members{name="test",state="Unhealthy",zone="zone1"} 0
+ ring_members{name="test",state="Unhealthy",zone="zone2"} 0
+ ring_members{name="test",state="Unhealthy",zone="zone3"} 0
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
# TYPE ring_oldest_member_timestamp gauge
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 22
diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go
index 862bcc54706..c8d8302e27a 100644
--- a/pkg/ruler/compat.go
+++ b/pkg/ruler/compat.go
@@ -15,13 +15,13 @@ import (
"github.com/prometheus/prometheus/model/metadata"
"github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/promql"
- "github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/storage"
"github.com/weaveworks/common/httpgrpc"
"github.com/weaveworks/common/user"
"github.com/cortexproject/cortex/pkg/cortexpb"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
"github.com/cortexproject/cortex/pkg/querier"
"github.com/cortexproject/cortex/pkg/querier/stats"
"github.com/cortexproject/cortex/pkg/ring/client"
@@ -171,7 +171,7 @@ func EngineQueryFunc(engine promql.QueryEngine, frontendClient *frontendClient,
// Enforce the max query length.
maxQueryLength := overrides.MaxQueryLength(userID)
if maxQueryLength > 0 {
- expr, err := parser.ParseExpr(qs)
+ expr, err := cortexparser.ParseExpr(qs)
// If failed to parse expression, skip checking select range.
// Fail the query in the engine.
if err == nil {
diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go
index 22d475fe720..70c07233f41 100644
--- a/pkg/ruler/ruler.go
+++ b/pkg/ruler/ruler.go
@@ -29,6 +29,8 @@ import (
"golang.org/x/sync/errgroup"
"github.com/cortexproject/cortex/pkg/cortexpb"
+ "github.com/cortexproject/cortex/pkg/engine"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
"github.com/cortexproject/cortex/pkg/ring"
"github.com/cortexproject/cortex/pkg/ring/kv"
"github.com/cortexproject/cortex/pkg/ruler/rulespb"
@@ -172,6 +174,8 @@ type Config struct {
EnableHAEvaluation bool `yaml:"enable_ha_evaluation"`
LivenessCheckTimeout time.Duration `yaml:"liveness_check_timeout"`
+
+ ThanosEngine engine.ThanosEngineConfig `yaml:"thanos_engine"`
}
// Validate config and returns error on failure
@@ -199,6 +203,11 @@ func (cfg *Config) Validate(limits validation.Limits, log log.Logger) error {
if !util.StringsContain(supportedQueryResponseFormats, cfg.QueryResponseFormat) {
return errInvalidQueryResponseFormat
}
+
+ if err := cfg.ThanosEngine.Validate(); err != nil {
+ return err
+ }
+
return nil
}
@@ -208,6 +217,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
cfg.GRPCClientConfig.RegisterFlagsWithPrefix("ruler.frontendClient", "", f)
cfg.Ring.RegisterFlags(f)
cfg.Notifier.RegisterFlags(f)
+ cfg.ThanosEngine.RegisterFlagsWithPrefix("ruler.", f)
// Deprecated Flags that will be maintained to avoid user disruption
@@ -1278,7 +1288,7 @@ func (r *Ruler) ruleGroupListToGroupStateDesc(userID string, backupGroups rulesp
}
var ruleDesc *RuleStateDesc
- query, err := parser.ParseExpr(r.GetExpr())
+ query, err := cortexparser.ParseExpr(r.GetExpr())
if err != nil {
return nil, errors.Errorf("failed to parse rule query '%v'", r.GetExpr())
}
diff --git a/pkg/util/promql/promql_test.go b/pkg/util/promql/promql_test.go
index ed35b89c2e3..284bd09ca6c 100644
--- a/pkg/util/promql/promql_test.go
+++ b/pkg/util/promql/promql_test.go
@@ -4,8 +4,9 @@ import (
"testing"
"time"
- "github.com/prometheus/prometheus/promql/parser"
"github.com/stretchr/testify/require"
+
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
)
func TestFindNonOverlapQueryLength(t *testing.T) {
@@ -78,7 +79,7 @@ func TestFindNonOverlapQueryLength(t *testing.T) {
},
} {
t.Run(tc.name, func(t *testing.T) {
- expr, err := parser.ParseExpr(tc.query)
+ expr, err := cortexparser.ParseExpr(tc.query)
require.NoError(t, err)
duration := FindNonOverlapQueryLength(expr, 0, 0, time.Minute*5)
require.Equal(t, tc.expectedLength, duration)
diff --git a/pkg/util/time_test.go b/pkg/util/time_test.go
index 239c4eb5b0b..6bdeb231938 100644
--- a/pkg/util/time_test.go
+++ b/pkg/util/time_test.go
@@ -8,11 +8,11 @@ import (
"testing"
"time"
- "github.com/prometheus/prometheus/promql/parser"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/atomic"
+ cortexparser "github.com/cortexproject/cortex/pkg/parser"
"github.com/cortexproject/cortex/pkg/util/test"
)
@@ -178,7 +178,7 @@ func TestFindMinMaxTime(t *testing.T) {
for testName, testData := range tests {
t.Run(testName, func(t *testing.T) {
- expr, _ := parser.ParseExpr(testData.query)
+ expr, _ := cortexparser.ParseExpr(testData.query)
url := "/query_range?query=" + testData.query +
"&start=" + strconv.FormatInt(testData.queryStartTime.Truncate(time.Minute).Unix(), 10) +