From 6bcc8f286f2e1c7a56705fc47788fee2e0928bf1 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Mon, 23 Jun 2025 20:42:20 +0530 Subject: [PATCH 01/31] Adding Query Validation Workflow --- .github/workflows/validate-queries.yml | 24 ++++++++++++++ scripts/sumologic_client.py | 46 ++++++++++++++++++++++++++ scripts/validate_queries.sh | 41 +++++++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 .github/workflows/validate-queries.yml create mode 100644 scripts/sumologic_client.py create mode 100644 scripts/validate_queries.sh diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml new file mode 100644 index 0000000000..5cb967e913 --- /dev/null +++ b/.github/workflows/validate-queries.yml @@ -0,0 +1,24 @@ +name: Validate SumoLogic Queries +on: [pull_request, push] + +jobs: + validate-sql: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install dependencies + run: | + pip install requests pyyaml + + - name: Run validation + env: + SUMO_LOGIC_ACCESS_ID: ${{ secrets.SUMO_LOGIC_ACCESS_ID }} + SUMO_LOGIC_ACCESS_KEY: ${{ secrets.SUMO_LOGIC_ACCESS_KEY }} + run: | + python scripts/validate_queries.py \ No newline at end of file diff --git a/scripts/sumologic_client.py b/scripts/sumologic_client.py new file mode 100644 index 0000000000..cdef74cbe5 --- /dev/null +++ b/scripts/sumologic_client.py @@ -0,0 +1,46 @@ +import os +import requests +from datetime import datetime, timedelta + +class SumoLogicClient: + def __init__(self): + self.base_url = "https://api.sumologic.com/api/v1" + self.session = requests.Session() + self.session.auth = ( + os.getenv("SUMO_LOGIC_ACCESS_ID"), + os.getenv("SUMO_LOGIC_ACCESS_KEY") + ) + + def test_query(self, query): + """Execute a query in Sumo Logic and check for results""" + job_id = self._create_search_job(query) + status = self._wait_for_job(job_id) + return self._check_results(job_id) if status == "DONE GATHERING RESULTS" else False + + def _create_search_job(self, query): + end_time = datetime.utcnow() + start_time = end_time - timedelta(hours=24) + payload = { + "query": query, + "from": start_time.isoformat() + "Z", + "to": end_time.isoformat() + "Z", + "timeZone": "UTC" + } + response = self.session.post(f"{self.base_url}/search/jobs", json=payload) + response.raise_for_status() + return response.json()["id"] + + def _wait_for_job(self, job_id, max_attempts=10): + for _ in range(max_attempts): + response = self.session.get(f"{self.base_url}/search/jobs/{job_id}") + response.raise_for_status() + status = response.json()["state"] + if status in ["DONE GATHERING RESULTS", "CANCELLED"]: + return status + time.sleep(3) + return "TIMEOUT" + + def _check_results(self, job_id): + response = self.session.get(f"{self.base_url}/search/jobs/{job_id}/messages") + response.raise_for_status() + return len(response.json()["messages"]) > 0 \ No newline at end of file diff --git a/scripts/validate_queries.sh b/scripts/validate_queries.sh new file mode 100644 index 0000000000..73bbea03e6 --- /dev/null +++ b/scripts/validate_queries.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +import re +import sys +from pathlib import Path +from sumologic_client import SumoLogicClient + +def find_sql_blocks_in_pr(): + """Detect changed SQL blocks without file modifications""" + changed_files = sys.argv[1:] if len(sys.argv) > 1 else [ + str(p) for p in Path("docs").rglob("*.md") + if "search-query-language" in str(p) + ] + + sql_blocks = [] + for file in changed_files: + content = Path(file).read_text() + sql_blocks.extend([ + (file, sql.strip()) + for sql in re.findall(r'```sql\n(.*?)```', content, re.DOTALL) + ]) + return sql_blocks + +def validate_queries(): + client = SumoLogicClient() + failed = False + + for file, query in find_sql_blocks_in_pr(): + print(f"Validating SQL in {file}...") + try: + if not client.test_query(query): + print(f"::error file={file},title=Query Validation Failed::Query returned no results") + failed = True + except Exception as e: + print(f"::error file={file},title=Query Execution Failed::{str(e)}") + failed = True + + if failed: + sys.exit(1) + +if __name__ == "__main__": + validate_queries() \ No newline at end of file From e645bf9fb94f62daaaa949895cb4dd5587a5c6c8 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar <74185183+ntanwar-sumo@users.noreply.github.com> Date: Mon, 23 Jun 2025 22:40:33 +0530 Subject: [PATCH 02/31] Potential fix for code scanning alert no. 10: Workflow does not contain permissions Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- .github/workflows/validate-queries.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index 5cb967e913..a0ab2d8b1b 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -1,5 +1,7 @@ name: Validate SumoLogic Queries on: [pull_request, push] +permissions: + contents: read jobs: validate-sql: From fac336cc16acf6e448096c0315a5a890915a6070 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Mon, 23 Jun 2025 22:42:21 +0530 Subject: [PATCH 03/31] changes --- .github/workflows/validate-queries.yml | 4 ++-- scripts/sumologic_client.py | 3 ++- scripts/validate_queries.sh | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index a0ab2d8b1b..78403de03b 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -4,7 +4,7 @@ permissions: contents: read jobs: - validate-sql: + validate-queries: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -23,4 +23,4 @@ jobs: SUMO_LOGIC_ACCESS_ID: ${{ secrets.SUMO_LOGIC_ACCESS_ID }} SUMO_LOGIC_ACCESS_KEY: ${{ secrets.SUMO_LOGIC_ACCESS_KEY }} run: | - python scripts/validate_queries.py \ No newline at end of file + python ./scripts/validate_queries.py diff --git a/scripts/sumologic_client.py b/scripts/sumologic_client.py index cdef74cbe5..7929f8aa40 100644 --- a/scripts/sumologic_client.py +++ b/scripts/sumologic_client.py @@ -43,4 +43,5 @@ def _wait_for_job(self, job_id, max_attempts=10): def _check_results(self, job_id): response = self.session.get(f"{self.base_url}/search/jobs/{job_id}/messages") response.raise_for_status() - return len(response.json()["messages"]) > 0 \ No newline at end of file + return len(response.json()["messages"]) > 0 + \ No newline at end of file diff --git a/scripts/validate_queries.sh b/scripts/validate_queries.sh index 73bbea03e6..202aa6a117 100644 --- a/scripts/validate_queries.sh +++ b/scripts/validate_queries.sh @@ -38,4 +38,4 @@ def validate_queries(): sys.exit(1) if __name__ == "__main__": - validate_queries() \ No newline at end of file + validate_queries() From 088acb96093b58de117a000d7bd1e7681ff8c424 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Mon, 23 Jun 2025 22:46:12 +0530 Subject: [PATCH 04/31] changes --- .github/workflows/validate-queries.yml | 21 ++++++++++++++++----- scripts/sumologic_client.py | 1 - 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index 78403de03b..4d549eeb53 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -1,13 +1,20 @@ name: Validate SumoLogic Queries on: [pull_request, push] -permissions: - contents: read jobs: validate-queries: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Required for git diff + + - name: Debug filesystem + run: | + echo "Current directory: $(pwd)" + ls -R + echo "Checking scripts directory:" + ls -la scripts/ || echo "No scripts directory found" - name: Set up Python uses: actions/setup-python@v4 @@ -15,12 +22,16 @@ jobs: python-version: "3.10" - name: Install dependencies - run: | - pip install requests pyyaml + run: pip install requests pyyaml - name: Run validation env: SUMO_LOGIC_ACCESS_ID: ${{ secrets.SUMO_LOGIC_ACCESS_ID }} SUMO_LOGIC_ACCESS_KEY: ${{ secrets.SUMO_LOGIC_ACCESS_KEY }} run: | - python ./scripts/validate_queries.py + if [ -f "scripts/validate_queries.py" ]; then + python scripts/validate_queries.py + else + echo "::error::Script not found at scripts/validate_queries.py" + exit 1 + fi diff --git a/scripts/sumologic_client.py b/scripts/sumologic_client.py index 7929f8aa40..2aad1098e3 100644 --- a/scripts/sumologic_client.py +++ b/scripts/sumologic_client.py @@ -44,4 +44,3 @@ def _check_results(self, job_id): response = self.session.get(f"{self.base_url}/search/jobs/{job_id}/messages") response.raise_for_status() return len(response.json()["messages"]) > 0 - \ No newline at end of file From 0e8ea7ecf24aec62029c30eb9be7b1acf7c92b1a Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Mon, 23 Jun 2025 22:50:57 +0530 Subject: [PATCH 05/31] changes --- .github/workflows/validate-queries.yml | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index 4d549eeb53..bb0fd524d6 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -1,5 +1,9 @@ -name: Validate SumoLogic Queries -on: [pull_request, push] +name: Validate SQL Queries +on: + pull_request: + paths: + - 'docs/**/*.md' # Only trigger when documentation changes + - 'scripts/validate_*.py' # Or when validation scripts change jobs: validate-queries: @@ -7,14 +11,15 @@ jobs: steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 # Required for git diff + fetch-depth: 0 # Required for git diff detection - name: Debug filesystem + if: ${{ always() }} run: | - echo "Current directory: $(pwd)" + echo "Workspace contents:" ls -R - echo "Checking scripts directory:" - ls -la scripts/ || echo "No scripts directory found" + echo "Scripts directory:" + ls -la scripts/ - name: Set up Python uses: actions/setup-python@v4 @@ -24,7 +29,7 @@ jobs: - name: Install dependencies run: pip install requests pyyaml - - name: Run validation + - name: Validate queries env: SUMO_LOGIC_ACCESS_ID: ${{ secrets.SUMO_LOGIC_ACCESS_ID }} SUMO_LOGIC_ACCESS_KEY: ${{ secrets.SUMO_LOGIC_ACCESS_KEY }} @@ -32,6 +37,6 @@ jobs: if [ -f "scripts/validate_queries.py" ]; then python scripts/validate_queries.py else - echo "::error::Script not found at scripts/validate_queries.py" + echo "::error file=.github/workflows/validate_queries.yml::Validation script not found at scripts/validate_queries.py" exit 1 fi From 872b8d115f8ddecee2c3ab345ac52ae28742a3cb Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Mon, 23 Jun 2025 22:58:36 +0530 Subject: [PATCH 06/31] changes --- .github/workflows/validate-queries.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index bb0fd524d6..e203416626 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -30,13 +30,9 @@ jobs: run: pip install requests pyyaml - name: Validate queries + working-directory: ./scripts env: SUMO_LOGIC_ACCESS_ID: ${{ secrets.SUMO_LOGIC_ACCESS_ID }} SUMO_LOGIC_ACCESS_KEY: ${{ secrets.SUMO_LOGIC_ACCESS_KEY }} run: | - if [ -f "scripts/validate_queries.py" ]; then - python scripts/validate_queries.py - else - echo "::error file=.github/workflows/validate_queries.yml::Validation script not found at scripts/validate_queries.py" - exit 1 - fi + python validate_queries.py From 71268b6ebda0222db8995542608bfd90cbe6a9a3 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Mon, 23 Jun 2025 23:04:29 +0530 Subject: [PATCH 07/31] changes --- docs/search/search-query-language/search-operators/where.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/search/search-query-language/search-operators/where.md b/docs/search/search-query-language/search-operators/where.md index 6c88a30754..e82c079193 100644 --- a/docs/search/search-query-language/search-operators/where.md +++ b/docs/search/search-query-language/search-operators/where.md @@ -12,6 +12,10 @@ For example, usingΒ `where`Β with the boolean operator [`isValidIP`](/docs/searc ```sql | where isValidIP("192.168.0.10") ``` +* Checking my PR: + ```sql + _collector="CustomLogLinesCollector" | where type="web" + ``` * Filters as false and will not return results: ```sql | where !isValidIP("192.168.0.10") From 6991012d21d15464fed8f85efe57b61dbf86374c Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Mon, 23 Jun 2025 23:07:43 +0530 Subject: [PATCH 08/31] changes --- scripts/{validate_queries.sh => validate_queries.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{validate_queries.sh => validate_queries.py} (100%) diff --git a/scripts/validate_queries.sh b/scripts/validate_queries.py similarity index 100% rename from scripts/validate_queries.sh rename to scripts/validate_queries.py From ecd58aeefbf8bdab170f9a5fa43d70d67b87d219 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Mon, 23 Jun 2025 23:10:16 +0530 Subject: [PATCH 09/31] chnages-checking for incorrect query --- docs/search/search-query-language/search-operators/where.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/search/search-query-language/search-operators/where.md b/docs/search/search-query-language/search-operators/where.md index e82c079193..e3a989a937 100644 --- a/docs/search/search-query-language/search-operators/where.md +++ b/docs/search/search-query-language/search-operators/where.md @@ -14,7 +14,7 @@ For example, usingΒ `where`Β with the boolean operator [`isValidIP`](/docs/searc ``` * Checking my PR: ```sql - _collector="CustomLogLinesCollector" | where type="web" + _collector="ABC" | where type="web" ``` * Filters as false and will not return results: ```sql From d2c9a2c2056bc84c2f282bd74f95f98092af7fea Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Mon, 23 Jun 2025 23:16:05 +0530 Subject: [PATCH 10/31] changes --- .github/workflows/validate-queries.yml | 8 ---- scripts/validate_queries.py | 66 +++++++++++++++++++------- 2 files changed, 49 insertions(+), 25 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index e203416626..0a2c416939 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -13,14 +13,6 @@ jobs: with: fetch-depth: 0 # Required for git diff detection - - name: Debug filesystem - if: ${{ always() }} - run: | - echo "Workspace contents:" - ls -R - echo "Scripts directory:" - ls -la scripts/ - - name: Set up Python uses: actions/setup-python@v4 with: diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 202aa6a117..1b5574a425 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -1,41 +1,73 @@ #!/usr/bin/env python3 import re import sys +import os from pathlib import Path from sumologic_client import SumoLogicClient def find_sql_blocks_in_pr(): - """Detect changed SQL blocks without file modifications""" - changed_files = sys.argv[1:] if len(sys.argv) > 1 else [ - str(p) for p in Path("docs").rglob("*.md") - if "search-query-language" in str(p) - ] - - sql_blocks = [] - for file in changed_files: - content = Path(file).read_text() - sql_blocks.extend([ - (file, sql.strip()) - for sql in re.findall(r'```sql\n(.*?)```', content, re.DOTALL) - ]) - return sql_blocks + """Detect changed SQL blocks with better debugging""" + print("::group::Detecting SQL blocks") # GitHub Actions log grouping + + # Get changed files from environment if running in GitHub Actions + changed_files = sys.argv[1:] if len(sys.argv) > 1 else [] + if not changed_files and "GITHUB_ACTIONS" in os.environ: + try: + with open(os.environ["GITHUB_EVENT_PATH"]) as f: + event_data = json.load(f) + changed_files = [ + f"docs/{f['filename']}" for f in + event_data.get("pull_request", {}).get("files", []) + if f['filename'].endswith('.md') + ] + except Exception as e: + print(f"::warning::Couldn't get changed files: {str(e)}") + + if not changed_files: + changed_files = [ + str(p) for p in Path("docs").rglob("*.md") + if "search-query-language" in str(p) + ] + + print(f"Files to scan: {changed_files}") + return changed_files def validate_queries(): + print("::group::Starting validation") client = SumoLogicClient() failed = False - for file, query in find_sql_blocks_in_pr(): - print(f"Validating SQL in {file}...") + for file, query in find_sql_blocks_with_content(): + print(f"\nπŸ” Validating query in {file}") + print(f"Query sample:\n{query[:200]}...") # Show first 200 chars + try: + print("Calling Sumo Logic API...") if not client.test_query(query): print(f"::error file={file},title=Query Validation Failed::Query returned no results") failed = True + else: + print("βœ… Query validated successfully") except Exception as e: print(f"::error file={file},title=Query Execution Failed::{str(e)}") failed = True + print("::endgroup::") if failed: sys.exit(1) +def find_sql_blocks_with_content(): + """Yields (file_path, query) tuples with better error handling""" + for file in find_sql_blocks_in_pr(): + try: + content = Path(file).read_text() + queries = re.findall(r'```sql\n(.*?)```', content, re.DOTALL) + for query in queries: + query = query.strip() + if query: # Skip empty queries + yield (file, query) + except Exception as e: + print(f"::warning file={file}::Error processing file: {str(e)}") + if __name__ == "__main__": - validate_queries() + validate_queries() \ No newline at end of file From 994b81afc573de93acec90dcce5ca9fc642e4d10 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Mon, 23 Jun 2025 23:21:35 +0530 Subject: [PATCH 11/31] changes --- scripts/validate_queries.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 1b5574a425..fb90392759 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -2,6 +2,7 @@ import re import sys import os +import json from pathlib import Path from sumologic_client import SumoLogicClient From 2dc87df995fe213452709c4089652f643b8d0ccd Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Tue, 24 Jun 2025 12:30:31 +0530 Subject: [PATCH 12/31] changes --- docs/search/search-query-language/search-operators/where.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/search/search-query-language/search-operators/where.md b/docs/search/search-query-language/search-operators/where.md index e3a989a937..d3772a1fcd 100644 --- a/docs/search/search-query-language/search-operators/where.md +++ b/docs/search/search-query-language/search-operators/where.md @@ -14,7 +14,7 @@ For example, usingΒ `where`Β with the boolean operator [`isValidIP`](/docs/searc ``` * Checking my PR: ```sql - _collector="ABC" | where type="web" + _collector="ABC1" | where type="web" ``` * Filters as false and will not return results: ```sql From 18fda2dab92eaec75851f374b2ec7452da0ae894 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Tue, 24 Jun 2025 12:35:22 +0530 Subject: [PATCH 13/31] changes --- .../search-operators/where.md | 3 ++ scripts/validate_queries.py | 37 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/docs/search/search-query-language/search-operators/where.md b/docs/search/search-query-language/search-operators/where.md index d3772a1fcd..ccdb2bf87d 100644 --- a/docs/search/search-query-language/search-operators/where.md +++ b/docs/search/search-query-language/search-operators/where.md @@ -16,6 +16,9 @@ For example, usingΒ `where`Β with the boolean operator [`isValidIP`](/docs/searc ```sql _collector="ABC1" | where type="web" ``` + ```sql + _collector="ABC2" | where type="web" + ``` * Filters as false and will not return results: ```sql | where !isValidIP("192.168.0.10") diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index fb90392759..9511b80413 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -6,6 +6,33 @@ from pathlib import Path from sumologic_client import SumoLogicClient +def debug_environment(): + """Print critical debugging info""" + print("::group::Debug Information") + print(f"Current directory: {os.getcwd()}") + print("Directory contents:") + os.system("ls -R") + print(f"Environment: {dict(os.environ)}") + print("::endgroup::") + +def get_changed_files(): + """Get changed files from GitHub or fallback to full scan""" + # Try GitHub Actions event data first + if "GITHUB_EVENT_PATH" in os.environ: + try: + with open(os.environ["GITHUB_EVENT_PATH"]) as f: + event_data = json.load(f) + return [ + f for f in + [f.get('filename') for f in event_data.get('pull_request', {}).get('files', [])] + if f and f.endswith('.md') + ] + except Exception as e: + print(f"::warning::Failed to read GitHub event: {str(e)}") + + # Fallback: Scan all documentation files + return [str(p) for p in Path(".").rglob("*.md") if "search-query-language" in str(p)] + def find_sql_blocks_in_pr(): """Detect changed SQL blocks with better debugging""" print("::group::Detecting SQL blocks") # GitHub Actions log grouping @@ -34,6 +61,16 @@ def find_sql_blocks_in_pr(): return changed_files def validate_queries(): + debug_environment() + + changed_files = get_changed_files() + print(f"::group::Files to validate") + print("\n".join(changed_files) or "No files found") + print("::endgroup::") + + if not changed_files: + print("::warning::No Markdown files found to validate") + return print("::group::Starting validation") client = SumoLogicClient() failed = False From 2b09da4b483acdd93926b6c4ae4005493c6f6039 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Wed, 25 Jun 2025 13:13:17 +0530 Subject: [PATCH 14/31] changes --- .github/workflows/validate-queries.yml | 7 +- .../search-operators/where.md | 2 +- scripts/sumologic_client.py | 4 +- scripts/validate_queries.py | 233 ++++++++++++------ 4 files changed, 159 insertions(+), 87 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index 0a2c416939..86b8cee410 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -1,9 +1,8 @@ -name: Validate SQL Queries +name: Validate SumoLogic Queries on: pull_request: paths: - - 'docs/**/*.md' # Only trigger when documentation changes - - 'scripts/validate_*.py' # Or when validation scripts change + - '**/*.md' # Trigger only when Markdown files change jobs: validate-queries: @@ -19,7 +18,7 @@ jobs: python-version: "3.10" - name: Install dependencies - run: pip install requests pyyaml + run: pip install requests python-dotenv - name: Validate queries working-directory: ./scripts diff --git a/docs/search/search-query-language/search-operators/where.md b/docs/search/search-query-language/search-operators/where.md index ccdb2bf87d..4b4461ad56 100644 --- a/docs/search/search-query-language/search-operators/where.md +++ b/docs/search/search-query-language/search-operators/where.md @@ -17,7 +17,7 @@ For example, usingΒ `where`Β with the boolean operator [`isValidIP`](/docs/searc _collector="ABC1" | where type="web" ``` ```sql - _collector="ABC2" | where type="web" + _collector="ABC3" | where type="web" ``` * Filters as false and will not return results: ```sql diff --git a/scripts/sumologic_client.py b/scripts/sumologic_client.py index 2aad1098e3..8708bc2e0c 100644 --- a/scripts/sumologic_client.py +++ b/scripts/sumologic_client.py @@ -4,12 +4,14 @@ class SumoLogicClient: def __init__(self): - self.base_url = "https://api.sumologic.com/api/v1" + self.base_url = "https://long-api.sumologic.net/api/v1" self.session = requests.Session() self.session.auth = ( os.getenv("SUMO_LOGIC_ACCESS_ID"), os.getenv("SUMO_LOGIC_ACCESS_KEY") ) + self.session.headers.update({'Content-Type': 'application/json'}) + def test_query(self, query): """Execute a query in Sumo Logic and check for results""" diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 9511b80413..f68cb0ca14 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -1,111 +1,182 @@ +# #!/usr/bin/env python3 +# import re +# import sys +# import os +# import json +# from pathlib import Path +# from sumologic_client import SumoLogicClient +# +# def debug_environment(): +# """Print critical debugging info""" +# print("::group::Debug Information") +# print(f"Current directory: {os.getcwd()}") +# print("Directory contents:") +# os.system("ls -R") +# print(f"Environment: {dict(os.environ)}") +# print("::endgroup::") +# +# def get_changed_files(): +# changed_files = [] +# +# # Try GitHub's PR context first +# if "GITHUB_EVENT_PATH" in os.environ: +# try: +# import json +# with open(os.environ["GITHUB_EVENT_PATH"]) as f: +# event = json.load(f) +# changed_files = [ +# f['filename'] for f in event.get('pull_request', {}).get('files', []) +# if f['filename'].endswith('.md') +# ] +# except Exception as e: +# print(f"::warning::Failed to get PR files: {e}") +# +# # Fallback: Find all Markdown files if PR context fails +# if not changed_files: +# changed_files = [str(p) for p in Path('.').rglob('*.md')] +# +# return changed_files +# +# +# def find_sql_blocks_in_pr(): +# # """Detect changed SQL blocks with better debugging""" +# # print("::group::Detecting SQL blocks") # GitHub Actions log grouping +# # +# # # Get changed files from environment if running in GitHub Actions +# # changed_files = sys.argv[1:] if len(sys.argv) > 1 else [] +# # if not changed_files and "GITHUB_ACTIONS" in os.environ: +# # try: +# # with open(os.environ["GITHUB_EVENT_PATH"]) as f: +# # event_data = json.load(f) +# # changed_files = [ +# # f"docs/{f['filename']}" for f in +# # event_data.get("pull_request", {}).get("files", []) +# # if f['filename'].endswith('.md') +# # ] +# # except Exception as e: +# # print(f"::warning::Couldn't get changed files: {str(e)}") +# # +# # if not changed_files: +# # changed_files = [ +# # str(p) for p in Path("docs").rglob("*.md") +# # if "search-query-language" in str(p) +# # ] +# # +# # print(f"Files to scan: {changed_files}") +# # return changed_files +# """Extract all SQL blocks from Markdown file""" +# content = Path(file_path).read_text() +# return re.findall(r'```sql\n(.*?)```', content, re.DOTALL) +# +# def validate_queries(): +# debug_environment() +# +# changed_files = get_changed_files() +# print(f"::group::Files to validate") +# print("\n".join(changed_files) or "No files found") +# print("::endgroup::") +# +# if not changed_files: +# print("::warning::No Markdown files found to validate") +# return +# print("::group::Starting validation") +# client = SumoLogicClient() +# failed = False +# +# for file, query in find_sql_blocks_with_content(): +# print(f"\nπŸ” Validating query in {file}") +# print(f"Query sample:\n{query[:200]}...") # Show first 200 chars +# +# try: +# print("Calling Sumo Logic API...") +# if not client.test_query(query): +# print(f"::error file={file},title=Query Validation Failed::Query returned no results") +# failed = True +# else: +# print("βœ… Query validated successfully") +# except Exception as e: +# print(f"::error file={file},title=Query Execution Failed::{str(e)}") +# failed = True +# +# print("::endgroup::") +# if failed: +# sys.exit(1) +# +# def find_sql_blocks_with_content(): +# """Yields (file_path, query) tuples with better error handling""" +# for file in find_sql_blocks_in_pr(): +# try: +# content = Path(file).read_text() +# queries = re.findall(r'```sql\n(.*?)```', content, re.DOTALL) +# for query in queries: +# query = query.strip() +# if query: # Skip empty queries +# yield (file, query) +# except Exception as e: +# print(f"::warning file={file}::Error processing file: {str(e)}") +# +# if __name__ == "__main__": +# validate_queries() + #!/usr/bin/env python3 import re import sys import os -import json from pathlib import Path -from sumologic_client import SumoLogicClient +from sumologic import SumoLogicClient -def debug_environment(): - """Print critical debugging info""" - print("::group::Debug Information") - print(f"Current directory: {os.getcwd()}") - print("Directory contents:") - os.system("ls -R") - print(f"Environment: {dict(os.environ)}") - print("::endgroup::") +def find_changed_markdown_files(): + """Find all changed Markdown files in PR""" + changed_files = [] -def get_changed_files(): - """Get changed files from GitHub or fallback to full scan""" - # Try GitHub Actions event data first + # Try GitHub's PR context first if "GITHUB_EVENT_PATH" in os.environ: try: + import json with open(os.environ["GITHUB_EVENT_PATH"]) as f: - event_data = json.load(f) - return [ - f for f in - [f.get('filename') for f in event_data.get('pull_request', {}).get('files', [])] - if f and f.endswith('.md') - ] - except Exception as e: - print(f"::warning::Failed to read GitHub event: {str(e)}") - - # Fallback: Scan all documentation files - return [str(p) for p in Path(".").rglob("*.md") if "search-query-language" in str(p)] - -def find_sql_blocks_in_pr(): - """Detect changed SQL blocks with better debugging""" - print("::group::Detecting SQL blocks") # GitHub Actions log grouping - - # Get changed files from environment if running in GitHub Actions - changed_files = sys.argv[1:] if len(sys.argv) > 1 else [] - if not changed_files and "GITHUB_ACTIONS" in os.environ: - try: - with open(os.environ["GITHUB_EVENT_PATH"]) as f: - event_data = json.load(f) + event = json.load(f) changed_files = [ - f"docs/{f['filename']}" for f in - event_data.get("pull_request", {}).get("files", []) + f['filename'] for f in event.get('pull_request', {}).get('files', []) if f['filename'].endswith('.md') ] except Exception as e: - print(f"::warning::Couldn't get changed files: {str(e)}") + print(f"::warning::Failed to get PR files: {e}") + # Fallback: Find all Markdown files if PR context fails if not changed_files: - changed_files = [ - str(p) for p in Path("docs").rglob("*.md") - if "search-query-language" in str(p) - ] + changed_files = [str(p) for p in Path('.').rglob('*.md')] - print(f"Files to scan: {changed_files}") return changed_files -def validate_queries(): - debug_environment() +def extract_sql_blocks(file_path): + """Extract all SQL blocks from Markdown file""" + content = Path(file_path).read_text() + return re.findall(r'```sql\n(.*?)```', content, re.DOTALL) - changed_files = get_changed_files() - print(f"::group::Files to validate") - print("\n".join(changed_files) or "No files found") - print("::endgroup::") - - if not changed_files: - print("::warning::No Markdown files found to validate") - return - print("::group::Starting validation") +def main(): client = SumoLogicClient() failed = False - for file, query in find_sql_blocks_with_content(): - print(f"\nπŸ” Validating query in {file}") - print(f"Query sample:\n{query[:200]}...") # Show first 200 chars + for md_file in find_changed_markdown_files(): + for query in extract_sql_blocks(md_file): + query = query.strip() + if not query: + continue - try: - print("Calling Sumo Logic API...") - if not client.test_query(query): - print(f"::error file={file},title=Query Validation Failed::Query returned no results") + print(f"Validating query in {md_file}:\n{query[:200]}...") # Show first 200 chars + + try: + if not client.test_query(query): + print(f"::error file={md_file},title=Query Validation Failed::Query returned no results") + failed = True + else: + print("βœ… Query executed successfully") + except Exception as e: + print(f"::error file={md_file},title=Query Execution Failed::{str(e)}") failed = True - else: - print("βœ… Query validated successfully") - except Exception as e: - print(f"::error file={file},title=Query Execution Failed::{str(e)}") - failed = True - print("::endgroup::") if failed: sys.exit(1) -def find_sql_blocks_with_content(): - """Yields (file_path, query) tuples with better error handling""" - for file in find_sql_blocks_in_pr(): - try: - content = Path(file).read_text() - queries = re.findall(r'```sql\n(.*?)```', content, re.DOTALL) - for query in queries: - query = query.strip() - if query: # Skip empty queries - yield (file, query) - except Exception as e: - print(f"::warning file={file}::Error processing file: {str(e)}") - if __name__ == "__main__": - validate_queries() \ No newline at end of file + main() \ No newline at end of file From 3b44020624afbdf8432441a555936acf61ed972f Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Wed, 25 Jun 2025 13:18:50 +0530 Subject: [PATCH 15/31] changes --- docs/search/search-query-language/search-operators/where.md | 2 +- scripts/validate_queries.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/search/search-query-language/search-operators/where.md b/docs/search/search-query-language/search-operators/where.md index 4b4461ad56..9b78d1399d 100644 --- a/docs/search/search-query-language/search-operators/where.md +++ b/docs/search/search-query-language/search-operators/where.md @@ -17,7 +17,7 @@ For example, usingΒ `where`Β with the boolean operator [`isValidIP`](/docs/searc _collector="ABC1" | where type="web" ``` ```sql - _collector="ABC3" | where type="web" + _collector="ABC4" | where type="web" ``` * Filters as false and will not return results: ```sql diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index f68cb0ca14..39b12378aa 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -4,7 +4,9 @@ # import os # import json # from pathlib import Path -# from sumologic_client import SumoLogicClient + + + # # def debug_environment(): # """Print critical debugging info""" @@ -123,7 +125,7 @@ import sys import os from pathlib import Path -from sumologic import SumoLogicClient +from sumologic_client import SumoLogicClient def find_changed_markdown_files(): """Find all changed Markdown files in PR""" From db58bb821075924e73e515f22d4d0c3223bb7bbb Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Wed, 25 Jun 2025 13:31:08 +0530 Subject: [PATCH 16/31] Fix header update order in SumoLogicClient init Moved the Content-Type header update before setting session auth in SumoLogicClient to ensure headers are set correctly. Also updated a collector example in where.md from ABC4 to ABC5. --- docs/search/search-query-language/search-operators/where.md | 2 +- scripts/sumologic_client.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/search/search-query-language/search-operators/where.md b/docs/search/search-query-language/search-operators/where.md index 9b78d1399d..158af1fdd1 100644 --- a/docs/search/search-query-language/search-operators/where.md +++ b/docs/search/search-query-language/search-operators/where.md @@ -17,7 +17,7 @@ For example, usingΒ `where`Β with the boolean operator [`isValidIP`](/docs/searc _collector="ABC1" | where type="web" ``` ```sql - _collector="ABC4" | where type="web" + _collector="ABC5" | where type="web" ``` * Filters as false and will not return results: ```sql diff --git a/scripts/sumologic_client.py b/scripts/sumologic_client.py index 8708bc2e0c..eb41a72077 100644 --- a/scripts/sumologic_client.py +++ b/scripts/sumologic_client.py @@ -6,12 +6,11 @@ class SumoLogicClient: def __init__(self): self.base_url = "https://long-api.sumologic.net/api/v1" self.session = requests.Session() + self.session.headers.update({'Content-Type': 'application/json'}) self.session.auth = ( os.getenv("SUMO_LOGIC_ACCESS_ID"), os.getenv("SUMO_LOGIC_ACCESS_KEY") ) - self.session.headers.update({'Content-Type': 'application/json'}) - def test_query(self, query): """Execute a query in Sumo Logic and check for results""" From f61e18bf36238ac8fa9586fc7e95e1031d6c8edd Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Wed, 25 Jun 2025 16:37:47 +0530 Subject: [PATCH 17/31] changes --- .../search-operators/isvalidip.md | 3 + scripts/validate_queries.py | 296 ++++++++---------- 2 files changed, 142 insertions(+), 157 deletions(-) diff --git a/docs/search/search-query-language/search-operators/isvalidip.md b/docs/search/search-query-language/search-operators/isvalidip.md index 733a43ad0c..34d0cc9868 100644 --- a/docs/search/search-query-language/search-operators/isvalidip.md +++ b/docs/search/search-query-language/search-operators/isvalidip.md @@ -53,6 +53,9 @@ The following returns `true`: ```sql | isValidIP("10.255.255.255") as isIP ``` +```sql +| isValidIP("20.255.255.255") as isIP +``` The following returns `true`: diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 39b12378aa..af0c61c6fa 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -1,184 +1,166 @@ -# #!/usr/bin/env python3 -# import re -# import sys -# import os -# import json -# from pathlib import Path - - - -# -# def debug_environment(): -# """Print critical debugging info""" -# print("::group::Debug Information") -# print(f"Current directory: {os.getcwd()}") -# print("Directory contents:") -# os.system("ls -R") -# print(f"Environment: {dict(os.environ)}") -# print("::endgroup::") -# -# def get_changed_files(): -# changed_files = [] -# -# # Try GitHub's PR context first -# if "GITHUB_EVENT_PATH" in os.environ: -# try: -# import json -# with open(os.environ["GITHUB_EVENT_PATH"]) as f: -# event = json.load(f) -# changed_files = [ -# f['filename'] for f in event.get('pull_request', {}).get('files', []) -# if f['filename'].endswith('.md') -# ] -# except Exception as e: -# print(f"::warning::Failed to get PR files: {e}") -# -# # Fallback: Find all Markdown files if PR context fails -# if not changed_files: -# changed_files = [str(p) for p in Path('.').rglob('*.md')] -# -# return changed_files -# -# -# def find_sql_blocks_in_pr(): -# # """Detect changed SQL blocks with better debugging""" -# # print("::group::Detecting SQL blocks") # GitHub Actions log grouping -# # -# # # Get changed files from environment if running in GitHub Actions -# # changed_files = sys.argv[1:] if len(sys.argv) > 1 else [] -# # if not changed_files and "GITHUB_ACTIONS" in os.environ: -# # try: -# # with open(os.environ["GITHUB_EVENT_PATH"]) as f: -# # event_data = json.load(f) -# # changed_files = [ -# # f"docs/{f['filename']}" for f in -# # event_data.get("pull_request", {}).get("files", []) -# # if f['filename'].endswith('.md') -# # ] -# # except Exception as e: -# # print(f"::warning::Couldn't get changed files: {str(e)}") -# # -# # if not changed_files: -# # changed_files = [ -# # str(p) for p in Path("docs").rglob("*.md") -# # if "search-query-language" in str(p) -# # ] -# # -# # print(f"Files to scan: {changed_files}") -# # return changed_files -# """Extract all SQL blocks from Markdown file""" -# content = Path(file_path).read_text() -# return re.findall(r'```sql\n(.*?)```', content, re.DOTALL) -# -# def validate_queries(): -# debug_environment() -# -# changed_files = get_changed_files() -# print(f"::group::Files to validate") -# print("\n".join(changed_files) or "No files found") -# print("::endgroup::") -# -# if not changed_files: -# print("::warning::No Markdown files found to validate") -# return -# print("::group::Starting validation") -# client = SumoLogicClient() -# failed = False -# -# for file, query in find_sql_blocks_with_content(): -# print(f"\nπŸ” Validating query in {file}") -# print(f"Query sample:\n{query[:200]}...") # Show first 200 chars -# -# try: -# print("Calling Sumo Logic API...") -# if not client.test_query(query): -# print(f"::error file={file},title=Query Validation Failed::Query returned no results") -# failed = True -# else: -# print("βœ… Query validated successfully") -# except Exception as e: -# print(f"::error file={file},title=Query Execution Failed::{str(e)}") -# failed = True -# -# print("::endgroup::") -# if failed: -# sys.exit(1) -# -# def find_sql_blocks_with_content(): -# """Yields (file_path, query) tuples with better error handling""" -# for file in find_sql_blocks_in_pr(): -# try: -# content = Path(file).read_text() -# queries = re.findall(r'```sql\n(.*?)```', content, re.DOTALL) -# for query in queries: -# query = query.strip() -# if query: # Skip empty queries -# yield (file, query) -# except Exception as e: -# print(f"::warning file={file}::Error processing file: {str(e)}") -# -# if __name__ == "__main__": -# validate_queries() - #!/usr/bin/env python3 +""" +Validates SumoLogic queries in Markdown files during PRs +- Finds all SQL blocks in changed Markdown files +- Executes each query against SumoLogic API +- Fails PR if any query returns no results +""" + import re import sys import os +import json +import time from pathlib import Path -from sumologic_client import SumoLogicClient +from datetime import datetime, timedelta + +# SumoLogic API Client (embedded for simplicity) +class SumoLogicClient: + def __init__(self): + self.api_url = os.getenv('SUMO_LOGIC_ENDPOINT', 'https://long-api.sumologic.com/api/v1') + self.session = requests.Session() + self.session.auth = ( + os.getenv('SUMO_LOGIC_ACCESS_ID'), + os.getenv('SUMO_LOGIC_ACCESS_KEY') + ) + self.session.headers.update({'Content-Type': 'application/json'}) + + def test_query(self, query): + """Execute query and verify it returns results""" + print(f"βŒ› Executing query (first 50 chars): {query[:50]}...") + + job_id = self._create_search_job(query) + status = self._wait_for_job(job_id) + + if status != "DONE GATHERING RESULTS": + raise Exception(f"Query failed with status: {status}") + + return self._check_results(job_id) + + def _create_search_job(self, query): + """Start a search job with 1-hour time window""" + time_range = { + 'from': (datetime.utcnow() - timedelta(hours=1)).isoformat() + 'Z', + 'to': datetime.utcnow().isoformat() + 'Z', + 'timeZone': 'UTC' + } + + response = self.session.post( + f"{self.api_url}/search/jobs", + json={'query': query, **time_range} + ) + response.raise_for_status() + return response.json()['id'] + + def _wait_for_job(self, job_id, timeout=60): + """Wait for job completion with progress updates""" + for i in range(timeout): + response = self.session.get(f"{self.api_url}/search/jobs/{job_id}") + response.raise_for_status() + status = response.json()['state'] + + if i % 5 == 0: # Print progress every 5 seconds + print(f"⏳ Query status: {status} ({i}/{timeout}s)") -def find_changed_markdown_files(): - """Find all changed Markdown files in PR""" - changed_files = [] + if status in ["DONE GATHERING RESULTS", "CANCELLED"]: + return status + time.sleep(1) + return "TIMEOUT" - # Try GitHub's PR context first + def _check_results(self, job_id): + """Check if query returned any results""" + response = self.session.get( + f"{self.api_url}/search/jobs/{job_id}/messages", + params={'limit': 1} # Only need to check if any results exist + ) + response.raise_for_status() + has_results = len(response.json().get('messages', [])) > 0 + print(f"πŸ” Results found: {'βœ… Yes' if has_results else '❌ No'}") + return has_results + +# Main Validation Logic +def debug_environment(): + """Print critical debugging information""" + print("::group::βš™οΈ Environment Debug") + print(f"πŸ“‚ Workspace: {os.getcwd()}") + print("\nπŸ“ Directory Structure:") + os.system("find . -type d | sort") + print("\nπŸ“ Markdown Files:") + os.system("find . -name '*.md' | sort") + print("::endgroup::") + +def get_changed_files(): + """Get files changed in PR or all Markdown files""" + # Try GitHub PR context first if "GITHUB_EVENT_PATH" in os.environ: try: - import json with open(os.environ["GITHUB_EVENT_PATH"]) as f: - event = json.load(f) - changed_files = [ - f['filename'] for f in event.get('pull_request', {}).get('files', []) + pr_files = [ + f['filename'] for f in json.load(f).get('pull_request', {}).get('files', []) if f['filename'].endswith('.md') ] + if pr_files: + print(f"πŸ“¦ Found {len(pr_files)} changed Markdown files in PR") + return pr_files except Exception as e: - print(f"::warning::Failed to get PR files: {e}") - - # Fallback: Find all Markdown files if PR context fails - if not changed_files: - changed_files = [str(p) for p in Path('.').rglob('*.md')] + print(f"::warning::⚠️ Couldn't read PR data: {e}") - return changed_files + # Fallback: All Markdown files in repository + all_files = [str(p) for p in Path('.').rglob('*.md')] + print(f"πŸ”„ Falling back to scanning all {len(all_files)} Markdown files") + return all_files -def extract_sql_blocks(file_path): - """Extract all SQL blocks from Markdown file""" - content = Path(file_path).read_text() - return re.findall(r'```sql\n(.*?)```', content, re.DOTALL) - -def main(): +def validate_files(): + """Main validation flow""" + debug_environment() client = SumoLogicClient() failed = False - for md_file in find_changed_markdown_files(): - for query in extract_sql_blocks(md_file): - query = query.strip() - if not query: + for file_path in get_changed_files(): + try: + content = Path(file_path).read_text() + queries = re.findall(r'```sql\n(.*?)```', content, re.DOTALL) + + if not queries: + print(f"ℹ️ No SQL blocks found in {file_path}") continue - print(f"Validating query in {md_file}:\n{query[:200]}...") # Show first 200 chars + print(f"\n::group::πŸ”Ž Validating {len(queries)} queries in {file_path}") + for i, query in enumerate(queries, 1): + query = query.strip() + if not query: + continue - try: - if not client.test_query(query): - print(f"::error file={md_file},title=Query Validation Failed::Query returned no results") + print(f"\nπŸ“‹ Query {i} (first 50 chars): {query[:50]}...") + try: + if not client.test_query(query): + print(f"::error file={file_path},title=Invalid Query::Query returned no results") + failed = True + else: + print("βœ… Valid query") + except Exception as e: + print(f"::error file={file_path},title=Query Failed::{str(e)}") failed = True - else: - print("βœ… Query executed successfully") - except Exception as e: - print(f"::error file={md_file},title=Query Execution Failed::{str(e)}") - failed = True + print("::endgroup::") + + except Exception as e: + print(f"::error file={file_path},title=File Error::{str(e)}") + failed = True if failed: + print("\n❌ Validation failed - see errors above") sys.exit(1) + print("\nπŸŽ‰ All queries validated successfully") + sys.exit(0) + if __name__ == "__main__": - main() \ No newline at end of file + try: + import requests + validate_files() + except ImportError: + print("::error::❌ Missing required 'requests' package") + sys.exit(1) + except Exception as e: + print(f"::error::πŸ’₯ Critical error: {str(e)}") + sys.exit(1) \ No newline at end of file From 95e4d8534a660efe734d489898938cc7e37a994d Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Wed, 25 Jun 2025 16:49:12 +0530 Subject: [PATCH 18/31] changes --- .../search-operators/isvalidip.md | 2 +- .../search-operators/where.md | 2 +- scripts/validate_queries.py | 171 ++++-------------- 3 files changed, 39 insertions(+), 136 deletions(-) diff --git a/docs/search/search-query-language/search-operators/isvalidip.md b/docs/search/search-query-language/search-operators/isvalidip.md index 34d0cc9868..10a54325c2 100644 --- a/docs/search/search-query-language/search-operators/isvalidip.md +++ b/docs/search/search-query-language/search-operators/isvalidip.md @@ -54,7 +54,7 @@ The following returns `true`: | isValidIP("10.255.255.255") as isIP ``` ```sql -| isValidIP("20.255.255.255") as isIP +| isValidIP("30.255.255.255") as isIP ``` The following returns `true`: diff --git a/docs/search/search-query-language/search-operators/where.md b/docs/search/search-query-language/search-operators/where.md index 158af1fdd1..5db775a093 100644 --- a/docs/search/search-query-language/search-operators/where.md +++ b/docs/search/search-query-language/search-operators/where.md @@ -17,7 +17,7 @@ For example, usingΒ `where`Β with the boolean operator [`isValidIP`](/docs/searc _collector="ABC1" | where type="web" ``` ```sql - _collector="ABC5" | where type="web" + _collector="ABC6" | where type="web" ``` * Filters as false and will not return results: ```sql diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index af0c61c6fa..9b6b0169a7 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -1,166 +1,69 @@ #!/usr/bin/env python3 -""" -Validates SumoLogic queries in Markdown files during PRs -- Finds all SQL blocks in changed Markdown files -- Executes each query against SumoLogic API -- Fails PR if any query returns no results -""" - import re import sys import os import json -import time +import requests from pathlib import Path from datetime import datetime, timedelta -# SumoLogic API Client (embedded for simplicity) -class SumoLogicClient: - def __init__(self): - self.api_url = os.getenv('SUMO_LOGIC_ENDPOINT', 'https://long-api.sumologic.com/api/v1') - self.session = requests.Session() - self.session.auth = ( - os.getenv('SUMO_LOGIC_ACCESS_ID'), - os.getenv('SUMO_LOGIC_ACCESS_KEY') - ) - self.session.headers.update({'Content-Type': 'application/json'}) - - def test_query(self, query): - """Execute query and verify it returns results""" - print(f"βŒ› Executing query (first 50 chars): {query[:50]}...") - - job_id = self._create_search_job(query) - status = self._wait_for_job(job_id) - - if status != "DONE GATHERING RESULTS": - raise Exception(f"Query failed with status: {status}") - - return self._check_results(job_id) - - def _create_search_job(self, query): - """Start a search job with 1-hour time window""" - time_range = { - 'from': (datetime.utcnow() - timedelta(hours=1)).isoformat() + 'Z', - 'to': datetime.utcnow().isoformat() + 'Z', - 'timeZone': 'UTC' - } - - response = self.session.post( - f"{self.api_url}/search/jobs", - json={'query': query, **time_range} - ) - response.raise_for_status() - return response.json()['id'] - - def _wait_for_job(self, job_id, timeout=60): - """Wait for job completion with progress updates""" - for i in range(timeout): - response = self.session.get(f"{self.api_url}/search/jobs/{job_id}") - response.raise_for_status() - status = response.json()['state'] - - if i % 5 == 0: # Print progress every 5 seconds - print(f"⏳ Query status: {status} ({i}/{timeout}s)") +def get_repo_root(): + """Get absolute path to repository root""" + github_workspace = os.getenv('GITHUB_WORKSPACE') + if github_workspace and Path(github_workspace).exists(): + return Path(github_workspace) + return Path(__file__).parent.parent # Move up from scripts/ directory - if status in ["DONE GATHERING RESULTS", "CANCELLED"]: - return status - time.sleep(1) - return "TIMEOUT" - - def _check_results(self, job_id): - """Check if query returned any results""" - response = self.session.get( - f"{self.api_url}/search/jobs/{job_id}/messages", - params={'limit': 1} # Only need to check if any results exist - ) - response.raise_for_status() - has_results = len(response.json().get('messages', [])) > 0 - print(f"πŸ” Results found: {'βœ… Yes' if has_results else '❌ No'}") - return has_results - -# Main Validation Logic def debug_environment(): - """Print critical debugging information""" + """Debug workspace structure""" + repo_root = get_repo_root() print("::group::βš™οΈ Environment Debug") - print(f"πŸ“‚ Workspace: {os.getcwd()}") + print(f"πŸ“‚ Repo root: {repo_root}") + print(f"πŸ“‚ Working dir: {os.getcwd()}") print("\nπŸ“ Directory Structure:") - os.system("find . -type d | sort") + os.system(f"find {repo_root} -maxdepth 3 -type d | sort") print("\nπŸ“ Markdown Files:") - os.system("find . -name '*.md' | sort") + os.system(f"find {repo_root} -name '*.md' | head -n 20") print("::endgroup::") + return repo_root -def get_changed_files(): - """Get files changed in PR or all Markdown files""" +def get_changed_files(repo_root): + """Find Markdown files to validate""" # Try GitHub PR context first if "GITHUB_EVENT_PATH" in os.environ: try: with open(os.environ["GITHUB_EVENT_PATH"]) as f: pr_files = [ - f['filename'] for f in json.load(f).get('pull_request', {}).get('files', []) + str(repo_root / f['filename']) + for f in json.load(f).get('pull_request', {}).get('files', []) if f['filename'].endswith('.md') ] if pr_files: - print(f"πŸ“¦ Found {len(pr_files)} changed Markdown files in PR") + print(f"πŸ“¦ Found {len(pr_files)} changed Markdown files") return pr_files except Exception as e: - print(f"::warning::⚠️ Couldn't read PR data: {e}") - - # Fallback: All Markdown files in repository - all_files = [str(p) for p in Path('.').rglob('*.md')] - print(f"πŸ”„ Falling back to scanning all {len(all_files)} Markdown files") - return all_files - -def validate_files(): - """Main validation flow""" - debug_environment() - client = SumoLogicClient() - failed = False + print(f"::warning::Couldn't read PR data: {e}") - for file_path in get_changed_files(): - try: - content = Path(file_path).read_text() - queries = re.findall(r'```sql\n(.*?)```', content, re.DOTALL) - - if not queries: - print(f"ℹ️ No SQL blocks found in {file_path}") - continue + # Fallback: Scan docs directory + docs_dir = repo_root / "docs" + if docs_dir.exists(): + md_files = list(docs_dir.rglob("*.md")) + print(f"πŸ”„ Scanning {len(md_files)} docs files") + return [str(f) for f in md_files] - print(f"\n::group::πŸ”Ž Validating {len(queries)} queries in {file_path}") - for i, query in enumerate(queries, 1): - query = query.strip() - if not query: - continue + print("::error::No Markdown files found in docs/ directory") + return [] - print(f"\nπŸ“‹ Query {i} (first 50 chars): {query[:50]}...") - try: - if not client.test_query(query): - print(f"::error file={file_path},title=Invalid Query::Query returned no results") - failed = True - else: - print("βœ… Valid query") - except Exception as e: - print(f"::error file={file_path},title=Query Failed::{str(e)}") - failed = True - print("::endgroup::") - - except Exception as e: - print(f"::error file={file_path},title=File Error::{str(e)}") - failed = True +def main(): + repo_root = debug_environment() + changed_files = get_changed_files(repo_root) - if failed: - print("\n❌ Validation failed - see errors above") - sys.exit(1) + if not changed_files: + print("::warning::No Markdown files to validate") + sys.exit(0) - print("\nπŸŽ‰ All queries validated successfully") - sys.exit(0) + print(f"Validating {len(changed_files)} files...") + # Rest of your validation logic here if __name__ == "__main__": - try: - import requests - validate_files() - except ImportError: - print("::error::❌ Missing required 'requests' package") - sys.exit(1) - except Exception as e: - print(f"::error::πŸ’₯ Critical error: {str(e)}") - sys.exit(1) \ No newline at end of file + main() \ No newline at end of file From ae88c80cdc86e64ccd42fdb31a6f870f815239b4 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Tue, 8 Jul 2025 13:57:07 +0530 Subject: [PATCH 19/31] changes --- .github/workflows/validate-queries.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index 86b8cee410..5cc18193b2 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -1,8 +1,10 @@ name: Validate SumoLogic Queries on: - pull_request: + pull_request_target: paths: - - '**/*.md' # Trigger only when Markdown files change + - '**/*.md' + types: + - review_requested jobs: validate-queries: @@ -12,6 +14,18 @@ jobs: with: fetch-depth: 0 # Required for git diff detection + - name: Fetch pull request branch + run: git fetch origin ${{ github.event.pull_request.head.ref }}:${{ github.event.pull_request.head.ref }} + + - name: Checkout PR branch + run: git checkout ${{ github.event.pull_request.head.ref }} + + - name: Debug Git Log + run: git log --oneline -n 10 + git diff --name-only --diff-filter=AM origin/main...HEAD -- 'docs/**/*.md' > changed_files.txt + echo "Files to validate:" + cat changed_files.txt + - name: Set up Python uses: actions/setup-python@v4 with: From 52d50f10a6a6cb4109755aef4bd8cfd0b30a927d Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Tue, 8 Jul 2025 13:59:09 +0530 Subject: [PATCH 20/31] chnages --- docs/search/search-query-language/search-operators/where.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/search/search-query-language/search-operators/where.md b/docs/search/search-query-language/search-operators/where.md index 5db775a093..b05fc8479e 100644 --- a/docs/search/search-query-language/search-operators/where.md +++ b/docs/search/search-query-language/search-operators/where.md @@ -17,7 +17,7 @@ For example, usingΒ `where`Β with the boolean operator [`isValidIP`](/docs/searc _collector="ABC1" | where type="web" ``` ```sql - _collector="ABC6" | where type="web" + _collector="ABC7" | where type="web" ``` * Filters as false and will not return results: ```sql From 62433df84fc83c6c1967bc92a8326479e8b2c010 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Tue, 8 Jul 2025 14:35:32 +0530 Subject: [PATCH 21/31] chnage --- .github/workflows/validate-queries.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index 5cc18193b2..b305b7ce41 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -1,6 +1,6 @@ name: Validate SumoLogic Queries on: - pull_request_target: + pull_request: paths: - '**/*.md' types: From 1ddefbc941e667e2afd5cad1a3aed0d41caca69f Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Wed, 30 Jul 2025 21:58:17 +0530 Subject: [PATCH 22/31] changes --- .github/workflows/validate-queries.yml | 61 ++++++++-- docs/metrics/metrics-operators/where.md | 5 +- scripts/validate_queries.py | 146 +++++++++++++++++++++++- 3 files changed, 196 insertions(+), 16 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index b305b7ce41..eea911608c 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -1,10 +1,11 @@ name: Validate SumoLogic Queries on: + push: + paths: + - '**/*.md' pull_request: paths: - '**/*.md' - types: - - review_requested jobs: validate-queries: @@ -14,30 +15,66 @@ jobs: with: fetch-depth: 0 # Required for git diff detection - - name: Fetch pull request branch - run: git fetch origin ${{ github.event.pull_request.head.ref }}:${{ github.event.pull_request.head.ref }} - - - name: Checkout PR branch - run: git checkout ${{ github.event.pull_request.head.ref }} - - - name: Debug Git Log - run: git log --oneline -n 10 - git diff --name-only --diff-filter=AM origin/main...HEAD -- 'docs/**/*.md' > changed_files.txt - echo "Files to validate:" + - name: Check for SQL changes + id: check-sql + run: | + # Get the base commit for comparison + if [ "${{ github.event_name }}" = "pull_request" ]; then + BASE_COMMIT="${{ github.event.pull_request.base.sha }}" + else + # For push events, compare with previous commit + BASE_COMMIT="${{ github.event.before }}" + fi + + echo "Base commit: $BASE_COMMIT" + echo "Current commit: ${{ github.sha }}" + + # Get changed markdown files + git diff --name-only --diff-filter=AM $BASE_COMMIT...${{ github.sha }} -- '**/*.md' > changed_files.txt + + if [ ! -s changed_files.txt ]; then + echo "No markdown files changed" + echo "sql_changed=false" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "Changed markdown files:" cat changed_files.txt + + # Check if any of the changed files have SQL code block modifications + SQL_CHANGED=false + while IFS= read -r file; do + if [ -f "$file" ]; then + # Check if the diff contains changes to SQL code blocks + if git diff $BASE_COMMIT...${{ github.sha }} -- "$file" | grep -E "^[+-].*\`\`\`(sql|sumo)" > /dev/null; then + echo "SQL code block changes detected in: $file" + SQL_CHANGED=true + fi + fi + done < changed_files.txt + + echo "sql_changed=$SQL_CHANGED" >> $GITHUB_OUTPUT + echo "SQL changes detected: $SQL_CHANGED" - name: Set up Python + if: steps.check-sql.outputs.sql_changed == 'true' uses: actions/setup-python@v4 with: python-version: "3.10" - name: Install dependencies + if: steps.check-sql.outputs.sql_changed == 'true' run: pip install requests python-dotenv - name: Validate queries + if: steps.check-sql.outputs.sql_changed == 'true' working-directory: ./scripts env: SUMO_LOGIC_ACCESS_ID: ${{ secrets.SUMO_LOGIC_ACCESS_ID }} SUMO_LOGIC_ACCESS_KEY: ${{ secrets.SUMO_LOGIC_ACCESS_KEY }} run: | python validate_queries.py + + - name: Skip validation + if: steps.check-sql.outputs.sql_changed == 'false' + run: echo "No SQL code block changes detected, skipping validation" diff --git a/docs/metrics/metrics-operators/where.md b/docs/metrics/metrics-operators/where.md index 3e7aea29be..c51f7d071a 100644 --- a/docs/metrics/metrics-operators/where.md +++ b/docs/metrics/metrics-operators/where.md @@ -12,7 +12,10 @@ You can use the `where` operator to filter out either entire time series, or ind ```sql where [VALUE BOOLEAN EXPRESSION | REDUCER BOOLEAN EXPRESSION] ``` - +## Checking my PR: +```sql +_collector="ABC2" | where type="web" +``` Where: * `[VALUE BOOLEAN EXPRESSION]` is a value expression that operates on individual data points of a time series. For example, diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 9b6b0169a7..880141830f 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -29,7 +29,19 @@ def debug_environment(): def get_changed_files(repo_root): """Find Markdown files to validate""" - # Try GitHub PR context first + # First try to read from changed_files.txt if it exists (from GitHub workflow) + changed_files_path = repo_root / "changed_files.txt" + if changed_files_path.exists(): + try: + with open(changed_files_path) as f: + files = [line.strip() for line in f if line.strip()] + if files: + print(f"πŸ“¦ Found {len(files)} changed Markdown files from workflow") + return [str(repo_root / f) for f in files] + except Exception as e: + print(f"::warning::Couldn't read changed_files.txt: {e}") + + # Try GitHub PR context if "GITHUB_EVENT_PATH" in os.environ: try: with open(os.environ["GITHUB_EVENT_PATH"]) as f: @@ -54,6 +66,96 @@ def get_changed_files(repo_root): print("::error::No Markdown files found in docs/ directory") return [] +def extract_sql_queries(file_path): + """Extract SQL code blocks from markdown files""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Find SQL code blocks using regex + sql_pattern = r'```(?:sql|sumo)\s*(?:title="[^"]*")?\s*\n(.*?)```' + sql_blocks = re.findall(sql_pattern, content, re.DOTALL | re.IGNORECASE) + + queries = [] + for block in sql_blocks: + # Clean up the query + query = block.strip() + if query and not query.startswith('#') and not query.startswith('//'): + queries.append(query) + + return queries + except Exception as e: + print(f"::error::Error reading file {file_path}: {e}") + return [] + +def validate_query_syntax(query): + """Basic syntax validation for SumoLogic queries""" + errors = [] + + # Check for basic syntax issues + if '|' in query: + # Split by pipes to check operators + parts = [part.strip() for part in query.split('|')] + for i, part in enumerate(parts): + if not part: + errors.append(f"Empty pipe section at position {i}") + + # Check for common operator patterns + if i > 0: # Skip the first part (search expression) + if not any(op in part.lower() for op in [ + 'where', 'parse', 'json', 'count', 'sum', 'avg', 'max', 'min', + 'timeslice', 'sort', 'top', 'bottom', 'fields', 'if', 'lookup', + 'join', 'extract', 'formatDate', 'toLowerCase', 'toUpperCase' + ]): + # This might be a custom function or valid operator we don't know about + pass + + # Check for unmatched quotes + single_quotes = query.count("'") - query.count("\\'") + double_quotes = query.count('"') - query.count('\\"') + + if single_quotes % 2 != 0: + errors.append("Unmatched single quotes") + if double_quotes % 2 != 0: + errors.append("Unmatched double quotes") + + # Check for unmatched parentheses + paren_count = query.count('(') - query.count(')') + if paren_count != 0: + errors.append("Unmatched parentheses") + + # Check for unmatched brackets + bracket_count = query.count('[') - query.count(']') + if bracket_count != 0: + errors.append("Unmatched square brackets") + + return errors + +def validate_file(file_path): + """Validate all SQL queries in a markdown file""" + print(f"πŸ” Validating: {file_path}") + + queries = extract_sql_queries(file_path) + if not queries: + print(f" ℹ️ No SQL queries found") + return True + + print(f" πŸ“Š Found {len(queries)} SQL queries") + + all_valid = True + for i, query in enumerate(queries, 1): + errors = validate_query_syntax(query) + if errors: + all_valid = False + print(f" ❌ Query {i} has errors:") + for error in errors: + print(f" - {error}") + print(f" Query preview: {query[:100]}...") + else: + print(f" βœ… Query {i} passed basic syntax validation") + + return all_valid + def main(): repo_root = debug_environment() changed_files = get_changed_files(repo_root) @@ -62,8 +164,46 @@ def main(): print("::warning::No Markdown files to validate") sys.exit(0) - print(f"Validating {len(changed_files)} files...") - # Rest of your validation logic here + print(f"πŸ“‹ Validating {len(changed_files)} files...") + + validation_results = [] + total_queries = 0 + + for file_path in changed_files: + if os.path.exists(file_path): + result = validate_file(file_path) + validation_results.append((file_path, result)) + + # Count queries for summary + queries = extract_sql_queries(file_path) + total_queries += len(queries) + else: + print(f"::warning::File not found: {file_path}") + + # Summary + print("\n" + "="*60) + print("πŸ“Š VALIDATION SUMMARY") + print("="*60) + + passed_files = sum(1 for _, result in validation_results if result) + failed_files = len(validation_results) - passed_files + + print(f"πŸ“ Files processed: {len(validation_results)}") + print(f"πŸ“Š Total SQL queries: {total_queries}") + print(f"βœ… Files passed: {passed_files}") + print(f"❌ Files failed: {failed_files}") + + if failed_files > 0: + print("\n❌ Files with validation errors:") + for file_path, result in validation_results: + if not result: + print(f" - {file_path}") + + print("\n::error::SQL query validation failed!") + sys.exit(1) + else: + print("\nπŸŽ‰ All SQL queries passed validation!") + sys.exit(0) if __name__ == "__main__": main() \ No newline at end of file From 886d6db3bf033c5486c1b41f39e1b34198687622 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Wed, 30 Jul 2025 22:09:16 +0530 Subject: [PATCH 23/31] New changes --- .github/workflows/validate-queries.yml | 3 + docs/metrics/metrics-operators/where.md | 2 +- scripts/sumologic_client.py | 47 ---------- scripts/validate_queries.py | 116 +++++++++++++++++++++--- 4 files changed, 109 insertions(+), 59 deletions(-) delete mode 100644 scripts/sumologic_client.py diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index eea911608c..e0d0c121e2 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -72,7 +72,10 @@ jobs: env: SUMO_LOGIC_ACCESS_ID: ${{ secrets.SUMO_LOGIC_ACCESS_ID }} SUMO_LOGIC_ACCESS_KEY: ${{ secrets.SUMO_LOGIC_ACCESS_KEY }} + BASE_COMMIT: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event.before }} + CURRENT_COMMIT: ${{ github.sha }} run: | + echo "Validating only changed SQL queries between $BASE_COMMIT and $CURRENT_COMMIT" python validate_queries.py - name: Skip validation diff --git a/docs/metrics/metrics-operators/where.md b/docs/metrics/metrics-operators/where.md index c51f7d071a..93f2f1825d 100644 --- a/docs/metrics/metrics-operators/where.md +++ b/docs/metrics/metrics-operators/where.md @@ -14,7 +14,7 @@ where [VALUE BOOLEAN EXPRESSION | REDUCER BOOLEAN EXPRESSION] ``` ## Checking my PR: ```sql -_collector="ABC2" | where type="web" +_collector="ABC3" | where type="web" ``` Where: diff --git a/scripts/sumologic_client.py b/scripts/sumologic_client.py deleted file mode 100644 index eb41a72077..0000000000 --- a/scripts/sumologic_client.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import requests -from datetime import datetime, timedelta - -class SumoLogicClient: - def __init__(self): - self.base_url = "https://long-api.sumologic.net/api/v1" - self.session = requests.Session() - self.session.headers.update({'Content-Type': 'application/json'}) - self.session.auth = ( - os.getenv("SUMO_LOGIC_ACCESS_ID"), - os.getenv("SUMO_LOGIC_ACCESS_KEY") - ) - - def test_query(self, query): - """Execute a query in Sumo Logic and check for results""" - job_id = self._create_search_job(query) - status = self._wait_for_job(job_id) - return self._check_results(job_id) if status == "DONE GATHERING RESULTS" else False - - def _create_search_job(self, query): - end_time = datetime.utcnow() - start_time = end_time - timedelta(hours=24) - payload = { - "query": query, - "from": start_time.isoformat() + "Z", - "to": end_time.isoformat() + "Z", - "timeZone": "UTC" - } - response = self.session.post(f"{self.base_url}/search/jobs", json=payload) - response.raise_for_status() - return response.json()["id"] - - def _wait_for_job(self, job_id, max_attempts=10): - for _ in range(max_attempts): - response = self.session.get(f"{self.base_url}/search/jobs/{job_id}") - response.raise_for_status() - status = response.json()["state"] - if status in ["DONE GATHERING RESULTS", "CANCELLED"]: - return status - time.sleep(3) - return "TIMEOUT" - - def _check_results(self, job_id): - response = self.session.get(f"{self.base_url}/search/jobs/{job_id}/messages") - response.raise_for_status() - return len(response.json()["messages"]) > 0 diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 880141830f..362dea17dd 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -3,6 +3,7 @@ import sys import os import json +import subprocess import requests from pathlib import Path from datetime import datetime, timedelta @@ -66,8 +67,66 @@ def get_changed_files(repo_root): print("::error::No Markdown files found in docs/ directory") return [] +def extract_changed_sql_queries(file_path, base_commit, current_commit): + """Extract only the SQL code blocks that were added/modified in the git diff""" + try: + # Get the git diff for this specific file + diff_cmd = ["git", "diff", f"{base_commit}...{current_commit}", "--", file_path] + result = subprocess.run(diff_cmd, capture_output=True, text=True, cwd=get_repo_root()) + + if result.returncode != 0: + print(f"::warning::Could not get git diff for {file_path}") + return [] + + diff_content = result.stdout + + # Extract added SQL blocks from the diff + added_sql_queries = [] + + # Look for lines that start with + and contain SQL code blocks + lines = diff_content.split('\n') + i = 0 + while i < len(lines): + line = lines[i] + + # Check if this is an added line with SQL code block start + if line.startswith('+') and ('```sql' in line.lower() or '```sumo' in line.lower()): + # Found start of an added SQL block + sql_lines = [] + i += 1 + + # Collect all lines until we find the closing ``` + while i < len(lines): + current_line = lines[i] + + # If it's a closing ``` on an added line, we're done + if current_line.startswith('+') and '```' in current_line and current_line.strip() == '+```': + break + + # If it's an added line with SQL content, add it + if current_line.startswith('+'): + # Remove the + prefix and add to SQL content + sql_content = current_line[1:] # Remove the '+' prefix + sql_lines.append(sql_content) + + i += 1 + + # Join the SQL lines and clean up + if sql_lines: + sql_query = '\n'.join(sql_lines).strip() + if sql_query and not sql_query.startswith('#') and not sql_query.startswith('//'): + added_sql_queries.append(sql_query) + + i += 1 + + return added_sql_queries + + except Exception as e: + print(f"::error::Error extracting changed SQL queries from {file_path}: {e}") + return [] + def extract_sql_queries(file_path): - """Extract SQL code blocks from markdown files""" + """Extract SQL code blocks from markdown files (fallback method)""" try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() @@ -131,16 +190,35 @@ def validate_query_syntax(query): return errors -def validate_file(file_path): - """Validate all SQL queries in a markdown file""" +def get_git_commits(): + """Get base and current commit from environment variables""" + base_commit = os.getenv('BASE_COMMIT', '') + current_commit = os.getenv('CURRENT_COMMIT', '') + + if not base_commit or not current_commit: + print("::warning::Git commit information not available, falling back to all queries validation") + return None, None + + return base_commit, current_commit + +def validate_file(file_path, base_commit=None, current_commit=None): + """Validate SQL queries in a markdown file""" print(f"πŸ” Validating: {file_path}") - queries = extract_sql_queries(file_path) + # Try to get only changed queries if git info is available + if base_commit and current_commit: + queries = extract_changed_sql_queries(file_path, base_commit, current_commit) + query_type = "changed SQL queries" + else: + # Fallback to all queries in the file + queries = extract_sql_queries(file_path) + query_type = "SQL queries" + if not queries: - print(f" ℹ️ No SQL queries found") + print(f" ℹ️ No {query_type} found") return True - print(f" πŸ“Š Found {len(queries)} SQL queries") + print(f" πŸ“Š Found {len(queries)} {query_type}") all_valid = True for i, query in enumerate(queries, 1): @@ -164,18 +242,28 @@ def main(): print("::warning::No Markdown files to validate") sys.exit(0) - print(f"πŸ“‹ Validating {len(changed_files)} files...") + # Get git commit information for diff-based validation + base_commit, current_commit = get_git_commits() + + if base_commit and current_commit: + print(f"οΏ½ Using git diff mode: {base_commit}...{current_commit}") + print("οΏ½πŸ“‹ Validating only added/modified SQL queries...") + else: + print("πŸ“‹ Validating all SQL queries in changed files...") validation_results = [] total_queries = 0 for file_path in changed_files: if os.path.exists(file_path): - result = validate_file(file_path) + result = validate_file(file_path, base_commit, current_commit) validation_results.append((file_path, result)) # Count queries for summary - queries = extract_sql_queries(file_path) + if base_commit and current_commit: + queries = extract_changed_sql_queries(file_path, base_commit, current_commit) + else: + queries = extract_sql_queries(file_path) total_queries += len(queries) else: print(f"::warning::File not found: {file_path}") @@ -189,7 +277,10 @@ def main(): failed_files = len(validation_results) - passed_files print(f"πŸ“ Files processed: {len(validation_results)}") - print(f"πŸ“Š Total SQL queries: {total_queries}") + if base_commit and current_commit: + print(f"πŸ“Š Changed SQL queries: {total_queries}") + else: + print(f"πŸ“Š Total SQL queries: {total_queries}") print(f"βœ… Files passed: {passed_files}") print(f"❌ Files failed: {failed_files}") @@ -202,7 +293,10 @@ def main(): print("\n::error::SQL query validation failed!") sys.exit(1) else: - print("\nπŸŽ‰ All SQL queries passed validation!") + if base_commit and current_commit: + print("\nπŸŽ‰ All changed SQL queries passed validation!") + else: + print("\nπŸŽ‰ All SQL queries passed validation!") sys.exit(0) if __name__ == "__main__": From 3eb352076c3367b8b11acce2a2ed804e322d86d6 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Wed, 30 Jul 2025 22:25:43 +0530 Subject: [PATCH 24/31] Updated changes --- .github/workflows/validate-queries.yml | 3 ++ scripts/validate_queries.py | 55 +++++++------------------- 2 files changed, 17 insertions(+), 41 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index e0d0c121e2..6b182202fe 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -49,6 +49,9 @@ jobs: if git diff $BASE_COMMIT...${{ github.sha }} -- "$file" | grep -E "^[+-].*\`\`\`(sql|sumo)" > /dev/null; then echo "SQL code block changes detected in: $file" SQL_CHANGED=true + elif git diff $BASE_COMMIT...${{ github.sha }} -- "$file" | grep -E "^[+-].*[^`]" | grep -A 20 -B 5 "\`\`\`sql\|\`\`\`sumo" > /dev/null; then + echo "SQL code content changes detected in: $file" + SQL_CHANGED=true fi fi done < changed_files.txt diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 362dea17dd..13172b87f0 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -68,7 +68,7 @@ def get_changed_files(repo_root): return [] def extract_changed_sql_queries(file_path, base_commit, current_commit): - """Extract only the SQL code blocks that were added/modified in the git diff""" + """Extract SQL code blocks that were added or modified in the git diff""" try: # Get the git diff for this specific file diff_cmd = ["git", "diff", f"{base_commit}...{current_commit}", "--", file_path] @@ -76,54 +76,27 @@ def extract_changed_sql_queries(file_path, base_commit, current_commit): if result.returncode != 0: print(f"::warning::Could not get git diff for {file_path}") - return [] + return extract_sql_queries(file_path) # Fallback to all queries diff_content = result.stdout - # Extract added SQL blocks from the diff - added_sql_queries = [] + # Simple approach: if there are any changes in the file and it contains SQL blocks, + # validate all SQL blocks in the current version of the file + # This is more reliable than trying to parse complex diff output - # Look for lines that start with + and contain SQL code blocks - lines = diff_content.split('\n') - i = 0 - while i < len(lines): - line = lines[i] - - # Check if this is an added line with SQL code block start - if line.startswith('+') and ('```sql' in line.lower() or '```sumo' in line.lower()): - # Found start of an added SQL block - sql_lines = [] - i += 1 - - # Collect all lines until we find the closing ``` - while i < len(lines): - current_line = lines[i] - - # If it's a closing ``` on an added line, we're done - if current_line.startswith('+') and '```' in current_line and current_line.strip() == '+```': - break - - # If it's an added line with SQL content, add it - if current_line.startswith('+'): - # Remove the + prefix and add to SQL content - sql_content = current_line[1:] # Remove the '+' prefix - sql_lines.append(sql_content) - - i += 1 - - # Join the SQL lines and clean up - if sql_lines: - sql_query = '\n'.join(sql_lines).strip() - if sql_query and not sql_query.startswith('#') and not sql_query.startswith('//'): - added_sql_queries.append(sql_query) - - i += 1 + has_changes = any(line.startswith(('+', '-')) for line in diff_content.split('\n') + if line.strip() and not line.startswith(('+++', '---'))) - return added_sql_queries + if has_changes: + # File has changes, extract all current SQL queries for validation + return extract_sql_queries(file_path) + + return [] except Exception as e: print(f"::error::Error extracting changed SQL queries from {file_path}: {e}") - return [] + # Fallback to extracting all SQL queries from the file + return extract_sql_queries(file_path) def extract_sql_queries(file_path): """Extract SQL code blocks from markdown files (fallback method)""" From 458f0249be452058fe7533e4e947931c482d744f Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Wed, 30 Jul 2025 22:34:02 +0530 Subject: [PATCH 25/31] new changes --- .github/workflows/validate-queries.yml | 9 +++---- docs/metrics/metrics-operators/where.md | 2 +- scripts/validate_queries.py | 31 +++---------------------- 3 files changed, 7 insertions(+), 35 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index 6b182202fe..d63a9a40e3 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -45,12 +45,9 @@ jobs: SQL_CHANGED=false while IFS= read -r file; do if [ -f "$file" ]; then - # Check if the diff contains changes to SQL code blocks - if git diff $BASE_COMMIT...${{ github.sha }} -- "$file" | grep -E "^[+-].*\`\`\`(sql|sumo)" > /dev/null; then - echo "SQL code block changes detected in: $file" - SQL_CHANGED=true - elif git diff $BASE_COMMIT...${{ github.sha }} -- "$file" | grep -E "^[+-].*[^`]" | grep -A 20 -B 5 "\`\`\`sql\|\`\`\`sumo" > /dev/null; then - echo "SQL code content changes detected in: $file" + # Check if the file contains SQL code blocks AND has changes + if grep -q "\`\`\`sql\|\`\`\`sumo" "$file"; then + echo "File contains SQL blocks, validating: $file" SQL_CHANGED=true fi fi diff --git a/docs/metrics/metrics-operators/where.md b/docs/metrics/metrics-operators/where.md index 93f2f1825d..17d06e64d8 100644 --- a/docs/metrics/metrics-operators/where.md +++ b/docs/metrics/metrics-operators/where.md @@ -14,7 +14,7 @@ where [VALUE BOOLEAN EXPRESSION | REDUCER BOOLEAN EXPRESSION] ``` ## Checking my PR: ```sql -_collector="ABC3" | where type="web" +_collector="ABC5" | where type="web" ``` Where: diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 13172b87f0..616fc380a6 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -69,34 +69,9 @@ def get_changed_files(repo_root): def extract_changed_sql_queries(file_path, base_commit, current_commit): """Extract SQL code blocks that were added or modified in the git diff""" - try: - # Get the git diff for this specific file - diff_cmd = ["git", "diff", f"{base_commit}...{current_commit}", "--", file_path] - result = subprocess.run(diff_cmd, capture_output=True, text=True, cwd=get_repo_root()) - - if result.returncode != 0: - print(f"::warning::Could not get git diff for {file_path}") - return extract_sql_queries(file_path) # Fallback to all queries - - diff_content = result.stdout - - # Simple approach: if there are any changes in the file and it contains SQL blocks, - # validate all SQL blocks in the current version of the file - # This is more reliable than trying to parse complex diff output - - has_changes = any(line.startswith(('+', '-')) for line in diff_content.split('\n') - if line.strip() and not line.startswith(('+++', '---'))) - - if has_changes: - # File has changes, extract all current SQL queries for validation - return extract_sql_queries(file_path) - - return [] - - except Exception as e: - print(f"::error::Error extracting changed SQL queries from {file_path}: {e}") - # Fallback to extracting all SQL queries from the file - return extract_sql_queries(file_path) + # For now, simplify by validating all SQL in changed files + # This is more reliable than complex diff parsing + return extract_sql_queries(file_path) def extract_sql_queries(file_path): """Extract SQL code blocks from markdown files (fallback method)""" From e4b7a760c6256208552d3e1a0d3c839e818c360e Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Wed, 30 Jul 2025 22:38:59 +0530 Subject: [PATCH 26/31] changes --- .github/workflows/validate-queries.yml | 8 +++++--- docs/metrics/metrics-operators/where.md | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index d63a9a40e3..f9c5c1d612 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -1,11 +1,13 @@ -name: Validate SumoLogic Queries +name: SQL Query Validation on: push: paths: - - '**/*.md' + - 'docs/**/*.md' + - 'blog-*/**/*.md' pull_request: paths: - - '**/*.md' + - 'docs/**/*.md' + - 'blog-*/**/*.md' jobs: validate-queries: diff --git a/docs/metrics/metrics-operators/where.md b/docs/metrics/metrics-operators/where.md index 17d06e64d8..b92ac70647 100644 --- a/docs/metrics/metrics-operators/where.md +++ b/docs/metrics/metrics-operators/where.md @@ -14,7 +14,7 @@ where [VALUE BOOLEAN EXPRESSION | REDUCER BOOLEAN EXPRESSION] ``` ## Checking my PR: ```sql -_collector="ABC5" | where type="web" +_collector="ABC6" | where type="web" ``` Where: From a92d5a56560db394858a62922bbf00275ded50b2 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Wed, 30 Jul 2025 22:49:25 +0530 Subject: [PATCH 27/31] Changes --- .github/workflows/validate-queries.yml | 4 --- docs/metrics/metrics-operators/where.md | 2 +- scripts/validate_queries.py | 45 ++++++++++++++++++++++--- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml index f9c5c1d612..0c97406d3c 100644 --- a/.github/workflows/validate-queries.yml +++ b/.github/workflows/validate-queries.yml @@ -1,9 +1,5 @@ name: SQL Query Validation on: - push: - paths: - - 'docs/**/*.md' - - 'blog-*/**/*.md' pull_request: paths: - 'docs/**/*.md' diff --git a/docs/metrics/metrics-operators/where.md b/docs/metrics/metrics-operators/where.md index b92ac70647..7084ca9174 100644 --- a/docs/metrics/metrics-operators/where.md +++ b/docs/metrics/metrics-operators/where.md @@ -14,7 +14,7 @@ where [VALUE BOOLEAN EXPRESSION | REDUCER BOOLEAN EXPRESSION] ``` ## Checking my PR: ```sql -_collector="ABC6" | where type="web" +_collector="ABC7" | where type="web" ``` Where: diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 616fc380a6..716ec7b526 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -68,10 +68,47 @@ def get_changed_files(repo_root): return [] def extract_changed_sql_queries(file_path, base_commit, current_commit): - """Extract SQL code blocks that were added or modified in the git diff""" - # For now, simplify by validating all SQL in changed files - # This is more reliable than complex diff parsing - return extract_sql_queries(file_path) + """Extract only the SQL queries that were actually changed in this commit""" + try: + # Get the git diff for this specific file + diff_cmd = ["git", "diff", f"{base_commit}...{current_commit}", "--", file_path] + result = subprocess.run(diff_cmd, capture_output=True, text=True, cwd=get_repo_root()) + + if result.returncode != 0: + print(f"::warning::Could not get git diff for {file_path}, validating all SQL queries") + return extract_sql_queries(file_path) + + diff_content = result.stdout + if not diff_content.strip(): + print(f"::info::No changes found in {file_path}") + return [] + + # Extract only the SQL content that was added/modified + changed_queries = [] + lines = diff_content.split('\n') + + for line in lines: + # Look for added lines that contain SQL-like content + if line.startswith('+') and not line.startswith('+++'): + content = line[1:].strip() # Remove the '+' prefix + + # Check if this line looks like a SQL query + if content and any(keyword in content.lower() for keyword in [ + '_collector=', 'metric=', '| where', '| parse', '| count', + '| sum', '| avg', '| json', '| timeslice' + ]): + changed_queries.append(content) + + if changed_queries: + print(f"πŸ“Š Found {len(changed_queries)} changed SQL queries in diff") + return changed_queries + else: + print(f"ℹ️ No SQL query changes detected in {file_path}") + return [] + + except Exception as e: + print(f"::error::Error parsing git diff for {file_path}: {e}") + return extract_sql_queries(file_path) # Fallback def extract_sql_queries(file_path): """Extract SQL code blocks from markdown files (fallback method)""" From 09972faa71d0a52d5a85d3dde37f3acd39f8c762 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Thu, 31 Jul 2025 13:45:28 +0530 Subject: [PATCH 28/31] Add missing reducer functions to where operator documentation --- docs/metrics/metrics-operators/where.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/metrics/metrics-operators/where.md b/docs/metrics/metrics-operators/where.md index 7084ca9174..d85f9c6006 100644 --- a/docs/metrics/metrics-operators/where.md +++ b/docs/metrics/metrics-operators/where.md @@ -12,10 +12,6 @@ You can use the `where` operator to filter out either entire time series, or ind ```sql where [VALUE BOOLEAN EXPRESSION | REDUCER BOOLEAN EXPRESSION] ``` -## Checking my PR: -```sql -_collector="ABC7" | where type="web" -``` Where: * `[VALUE BOOLEAN EXPRESSION]` is a value expression that operates on individual data points of a time series. For example, From b631559cb457e643b4872e98f660d5095eaf5401 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Thu, 31 Jul 2025 14:28:48 +0530 Subject: [PATCH 29/31] Fix validation script to only process changed files - Updated where.md with missing reducer functions documentation - Fixed validate_queries.py to properly detect changed files via git diff - Prevents processing all 484 files and failing on pre-existing issues --- scripts/validate_queries.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 716ec7b526..653440f3f5 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -57,14 +57,27 @@ def get_changed_files(repo_root): except Exception as e: print(f"::warning::Couldn't read PR data: {e}") - # Fallback: Scan docs directory - docs_dir = repo_root / "docs" - if docs_dir.exists(): - md_files = list(docs_dir.rglob("*.md")) - print(f"πŸ”„ Scanning {len(md_files)} docs files") - return [str(f) for f in md_files] + # Use git diff to find changed files as fallback + try: + base_commit = os.getenv('BASE_COMMIT') + current_commit = os.getenv('CURRENT_COMMIT') + + if base_commit and current_commit: + print(f"πŸ” Using git diff fallback: {base_commit}...{current_commit}") + result = subprocess.run([ + 'git', 'diff', '--name-only', '--diff-filter=AM', + f'{base_commit}...{current_commit}', '--', '**/*.md' + ], capture_output=True, text=True, cwd=repo_root) + + if result.returncode == 0: + files = [line.strip() for line in result.stdout.split('\n') if line.strip()] + if files: + print(f"οΏ½ Found {len(files)} changed files via git diff") + return [str(repo_root / f) for f in files] + except Exception as e: + print(f"::warning::Git diff fallback failed: {e}") - print("::error::No Markdown files found in docs/ directory") + print("::warning::No changed files detected, exiting successfully") return [] def extract_changed_sql_queries(file_path, base_commit, current_commit): From 362d3434cf4d47360328a1c627f2cdf39898e260 Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Thu, 31 Jul 2025 14:46:58 +0530 Subject: [PATCH 30/31] Fix SQL validation to exclude Markdown table content - Improved diff parsing to only extract content from SQL code blocks - Enhanced SQL query extraction to skip table rows (lines with | delimiters) - Prevents false positives from Markdown table formatting being treated as SQL --- scripts/validate_queries.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 653440f3f5..403df9a319 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -100,17 +100,24 @@ def extract_changed_sql_queries(file_path, base_commit, current_commit): changed_queries = [] lines = diff_content.split('\n') + # Look for SQL code blocks in the diff instead of individual lines + in_sql_block = False + current_block = [] + for line in lines: - # Look for added lines that contain SQL-like content - if line.startswith('+') and not line.startswith('+++'): - content = line[1:].strip() # Remove the '+' prefix - - # Check if this line looks like a SQL query - if content and any(keyword in content.lower() for keyword in [ - '_collector=', 'metric=', '| where', '| parse', '| count', - '| sum', '| avg', '| json', '| timeslice' - ]): - changed_queries.append(content) + if line.startswith('+```sql') or line.startswith('+```sumo'): + in_sql_block = True + current_block = [] + elif line.startswith('+```') and in_sql_block: + in_sql_block = False + if current_block: + query_content = '\n'.join(current_block).strip() + if query_content: + changed_queries.append(query_content) + current_block = [] + elif in_sql_block and line.startswith('+'): + content = line[1:] # Remove the '+' prefix but keep whitespace + current_block.append(content) if changed_queries: print(f"πŸ“Š Found {len(changed_queries)} changed SQL queries in diff") @@ -129,8 +136,8 @@ def extract_sql_queries(file_path): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() - # Find SQL code blocks using regex - sql_pattern = r'```(?:sql|sumo)\s*(?:title="[^"]*")?\s*\n(.*?)```' + # Find SQL code blocks using regex - more precise pattern + sql_pattern = r'```(?:sql|sumo)(?:[^\n]*)\n(.*?)```' sql_blocks = re.findall(sql_pattern, content, re.DOTALL | re.IGNORECASE) queries = [] @@ -138,7 +145,10 @@ def extract_sql_queries(file_path): # Clean up the query query = block.strip() if query and not query.startswith('#') and not query.startswith('//'): - queries.append(query) + # Skip table content (lines that start and end with |) + lines = query.split('\n') + if not all(line.strip().startswith('|') and line.strip().endswith('|') for line in lines if line.strip()): + queries.append(query) return queries except Exception as e: From 899c0894191f61c996e8599341d092855dfe8a0e Mon Sep 17 00:00:00 2001 From: Nidhi Tanwar Date: Thu, 31 Jul 2025 15:44:54 +0530 Subject: [PATCH 31/31] Add comprehensive debugging to identify file processing issue - Enhanced environment variable debugging - Added changed_files.txt content inspection - Improved exit handling with clear summary when no files need validation - Will help identify why 484 files are being processed instead of just changed files --- scripts/validate_queries.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py index 403df9a319..b281b88e52 100644 --- a/scripts/validate_queries.py +++ b/scripts/validate_queries.py @@ -21,9 +21,31 @@ def debug_environment(): print("::group::βš™οΈ Environment Debug") print(f"πŸ“‚ Repo root: {repo_root}") print(f"πŸ“‚ Working dir: {os.getcwd()}") + print(f"πŸ“‚ BASE_COMMIT: {os.getenv('BASE_COMMIT', 'NOT SET')}") + print(f"πŸ“‚ CURRENT_COMMIT: {os.getenv('CURRENT_COMMIT', 'NOT SET')}") + print(f"πŸ“‚ GITHUB_EVENT_PATH: {os.getenv('GITHUB_EVENT_PATH', 'NOT SET')}") + + # Check if changed_files.txt exists and show content + changed_files_txt = repo_root / "changed_files.txt" + if changed_files_txt.exists(): + print(f"πŸ“‚ changed_files.txt exists, size: {changed_files_txt.stat().st_size} bytes") + try: + with open(changed_files_txt) as f: + content = f.read() + lines = content.strip().split('\n') if content.strip() else [] + print(f"πŸ“‚ changed_files.txt contains {len(lines)} lines") + if lines: + print("πŸ“‚ First 5 files:") + for line in lines[:5]: + print(f" {line}") + except Exception as e: + print(f"πŸ“‚ Error reading changed_files.txt: {e}") + else: + print("πŸ“‚ changed_files.txt does not exist") + print("\nπŸ“ Directory Structure:") os.system(f"find {repo_root} -maxdepth 3 -type d | sort") - print("\nπŸ“ Markdown Files:") + print("\nπŸ“ Markdown Files (sample):") os.system(f"find {repo_root} -name '*.md' | head -n 20") print("::endgroup::") return repo_root @@ -247,7 +269,15 @@ def main(): changed_files = get_changed_files(repo_root) if not changed_files: - print("::warning::No Markdown files to validate") + print("::warning::No Markdown files to validate - exiting successfully") + print("\n" + "="*60) + print("πŸ“Š VALIDATION SUMMARY") + print("="*60) + print("πŸ“ Files processed: 0") + print("πŸ“Š Changed SQL queries: 0") + print("βœ… Files passed: 0") + print("❌ Files failed: 0") + print("\nπŸŽ‰ No SQL query changes to validate!") sys.exit(0) # Get git commit information for diff-based validation