From 887efd9bfd71d212edb8b637dc50a5cc6065f128 Mon Sep 17 00:00:00 2001 From: Frederik Berg Date: Thu, 3 Jul 2025 13:30:26 +0200 Subject: [PATCH 1/3] rebase - allowing any order and structure --- detection_rules/rule.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 0c293141b70..39c3ba5539a 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -935,8 +935,15 @@ def validates_esql_data(self, data: dict[str, Any], **_: Any) -> None: query_lower = data["query"].lower() # Combine both patterns using an OR operator and compile the regex + # The first part matches the metadata fields in the from clause by allowing one or + # multiple indices and any order of the metadata fields + # The second part matches the stats command with the by clause combined_pattern = re.compile( - r"(from\s+\S+\s+metadata\s+_id,\s*_version,\s*_index)|(\bstats\b.*?\bby\b)", re.DOTALL + r"(from\s+(?:\S+\s*,\s*)*\S+\s+metadata\s+" + r"(_id,\s*_version,\s*_index|_id,\s*_index,\s*_version|_version,\s*_id,\s*_index|" + r"_version,\s*_index,\s*_id|_index,\s*_id,\s*_version|_index,\s*_version,\s*_id))" + r"|(\bstats\b.*?\bby\b)", + re.DOTALL ) # Ensure that non-aggregate queries have metadata @@ -948,9 +955,12 @@ def validates_esql_data(self, data: dict[str, Any], **_: Any) -> None: ) # Enforce KEEP command for ESQL rules - if "| keep" not in query_lower: + # Match | followed by optional whitespace/newlines and then 'keep' + keep_pattern = re.compile(r"\|\s*keep\b", re.IGNORECASE | re.DOTALL) + if not keep_pattern.search(query_lower): raise ValidationError( - f"Rule: {data['name']} does not contain a 'keep' command -> Add a 'keep' command to the query." + f"Rule: {data['name']} does not contain a 'keep' command ->" + f" Add a 'keep' command to the query." ) From 5722e96c9253ad9d328471600ce0526b4266773e Mon Sep 17 00:00:00 2001 From: Frederik Berg Date: Thu, 3 Jul 2025 18:48:49 +0200 Subject: [PATCH 2/3] ruff format --- detection_rules/rule.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 39c3ba5539a..b882f1b032d 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -935,7 +935,7 @@ def validates_esql_data(self, data: dict[str, Any], **_: Any) -> None: query_lower = data["query"].lower() # Combine both patterns using an OR operator and compile the regex - # The first part matches the metadata fields in the from clause by allowing one or + # The first part matches the metadata fields in the from clause by allowing one or # multiple indices and any order of the metadata fields # The second part matches the stats command with the by clause combined_pattern = re.compile( @@ -943,7 +943,7 @@ def validates_esql_data(self, data: dict[str, Any], **_: Any) -> None: r"(_id,\s*_version,\s*_index|_id,\s*_index,\s*_version|_version,\s*_id,\s*_index|" r"_version,\s*_index,\s*_id|_index,\s*_id,\s*_version|_index,\s*_version,\s*_id))" r"|(\bstats\b.*?\bby\b)", - re.DOTALL + re.DOTALL, ) # Ensure that non-aggregate queries have metadata @@ -959,8 +959,7 @@ def validates_esql_data(self, data: dict[str, Any], **_: Any) -> None: keep_pattern = re.compile(r"\|\s*keep\b", re.IGNORECASE | re.DOTALL) if not keep_pattern.search(query_lower): raise ValidationError( - f"Rule: {data['name']} does not contain a 'keep' command ->" - f" Add a 'keep' command to the query." + f"Rule: {data['name']} does not contain a 'keep' command -> Add a 'keep' command to the query." ) From c4c717b94a991123cc9ff414d00950811c32762c Mon Sep 17 00:00:00 2001 From: Frederik Berg <83548283+frederikb96@users.noreply.github.com> Date: Sat, 12 Jul 2025 03:10:43 +0200 Subject: [PATCH 3/3] Apply improved regex --- detection_rules/rule.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/detection_rules/rule.py b/detection_rules/rule.py index b882f1b032d..ceec6ce98e8 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -940,8 +940,7 @@ def validates_esql_data(self, data: dict[str, Any], **_: Any) -> None: # The second part matches the stats command with the by clause combined_pattern = re.compile( r"(from\s+(?:\S+\s*,\s*)*\S+\s+metadata\s+" - r"(_id,\s*_version,\s*_index|_id,\s*_index,\s*_version|_version,\s*_id,\s*_index|" - r"_version,\s*_index,\s*_id|_index,\s*_id,\s*_version|_index,\s*_version,\s*_id))" + r"(?:_id|_version|_index)(?:,\s*(?:_id|_version|_index)){2})" r"|(\bstats\b.*?\bby\b)", re.DOTALL, )