Skip to content

Commit bf3071d

Browse files
[FR] Add white space checking for KQL parse (#3789)
* Add whitespace checking for KQL parse * Add unit test for blank space check * Bump patch version * Add test cases for newline blank space * Add additional unit tests * Update to only walk tree once --------- Co-authored-by: Terrance DeJesus <[email protected]>
1 parent 1dc3926 commit bf3071d

File tree

5 files changed

+72
-3
lines changed

5 files changed

+72
-3
lines changed

lib/kql/kql/parser.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from kql.errors import KqlParseError
1818
from .ast import * # noqa: F403
19+
from .utils import check_whitespace, collect_token_positions
1920

2021

2122
STRING_FIELDS = ("keyword", "text")
@@ -376,7 +377,13 @@ def lark_parse(text):
376377
walker = BaseKqlParser(text)
377378

378379
try:
379-
return lark_parser.parse(text)
380+
tree = lark_parser.parse(text)
381+
382+
# Check for whitespace around "and" and "or" tokens
383+
lines = text.split('\n')
384+
check_whitespace(collect_token_positions(tree, ["and", "or"]), lines)
385+
386+
return tree
380387
except UnexpectedEOF:
381388
raise KqlParseError("Unexpected EOF", len(walker.lines), len(walker.lines[-1].strip()), walker.lines[-1])
382389
except LarkError as exc:

lib/kql/kql/utils.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
# or more contributor license agreements. Licensed under the Elastic License
3+
# 2.0; you may not use this file except in compliance with the Elastic License
4+
# 2.0.
5+
6+
import re
7+
8+
from lark import (
9+
Token,
10+
Tree,
11+
)
12+
13+
from kql.errors import KqlParseError
14+
15+
16+
def check_whitespace(token_positions: list[tuple[int, int, str]], lines: list[str]) -> None:
17+
"""Check for whitespace around a token."""
18+
for line_num, column, token in token_positions:
19+
# Check the substring at the given position
20+
line = lines[line_num - 1]
21+
start = column - 1
22+
end = column + len(token) - 1
23+
24+
# Handle cases where token starts at the beginning of the line and is followed by whitespace
25+
if start == 0 and (end < len(line) and re.match(r"\s", line[end])):
26+
continue
27+
28+
# Check for whitespace around the token
29+
if (
30+
start > 0
31+
and ((end < len(line) and re.match(r"\s", line[end])) or end == len(line))
32+
and re.match(r"\s", line[start - 1])
33+
):
34+
continue
35+
raise KqlParseError(
36+
error_msg=f"Missing whitespace around '{token}' token",
37+
line=line_num,
38+
column=column,
39+
source=line,
40+
width=len(token),
41+
trailer=None
42+
)
43+
44+
45+
def collect_token_positions(tree: Tree, token_list: list[str]) -> list[tuple[int, int, str]]:
46+
"""Collect token positions from a tree for a list of tokens."""
47+
token_positions = []
48+
for child in tree.children:
49+
if isinstance(child, Token) and child.value.lower() in [token.lower() for token in token_list]:
50+
token_positions.append((child.line, child.column, child.value))
51+
elif isinstance(child, Tree):
52+
token_positions.extend(collect_token_positions(child, token_list))
53+
return token_positions

lib/kql/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "detection-rules-kql"
3-
version = "0.1.8"
3+
version = "0.1.9"
44
description = "Kibana Query Language parser for Elastic Detection Rules"
55
license = {text = "Elastic License v2"}
66
keywords = ["Elastic", "sour", "Detection Rules", "Security", "Elasticsearch", "kql"]

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "detection_rules"
3-
version = "1.3.16"
3+
version = "1.3.17"
44
description = "Detection Rules is the home for rules used by Elastic Security. This repository is used for the development, maintenance, testing, validation, and release of rules for Elastic Security’s Detection Engine."
55
readme = "README.md"
66
requires-python = ">=3.12"

tests/kuery/test_parser.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,12 @@ def test_optimization(self):
103103
"{'match': {'destination.ip': '169.254.169.254'}}]}}]}}"
104104
)
105105
self.assertEqual(dsl_str, good_case, "DSL string does not match the good case, optimization failed.")
106+
107+
def test_blank_space(self):
108+
with self.assertRaises(kql.KqlParseError):
109+
kql.lark_parse('"Test-ServiceDaclPermission" or"Update-ExeFunctions"')
110+
kql.lark_parse('"Test-ServiceDaclPermission"and "Update-ExeFunctions"')
111+
kql.lark_parse('"Test-ServiceDaclPermission" or "Update-ExeFunctions"')
112+
kql.lark_parse('"Test-ServiceDaclPermission" \nor "Update-ExeFunctions"')
113+
kql.lark_parse('"Test-ServiceDaclPermission" or\n "Update-ExeFunctions"')
114+
kql.lark_parse('"Test-ServiceDaclPermissionOr" or\n "Update-ExeAndFunctions"')

0 commit comments

Comments
 (0)