diff --git a/.github/workflows/validate-queries.yml b/.github/workflows/validate-queries.yml new file mode 100644 index 0000000000..0c97406d3c --- /dev/null +++ b/.github/workflows/validate-queries.yml @@ -0,0 +1,81 @@ +name: SQL Query Validation +on: + pull_request: + paths: + - 'docs/**/*.md' + - 'blog-*/**/*.md' + +jobs: + validate-queries: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Required for git diff detection + + - name: Check for SQL changes + id: check-sql + run: | + # Get the base commit for comparison + if [ "${{ github.event_name }}" = "pull_request" ]; then + BASE_COMMIT="${{ github.event.pull_request.base.sha }}" + else + # For push events, compare with previous commit + BASE_COMMIT="${{ github.event.before }}" + fi + + echo "Base commit: $BASE_COMMIT" + echo "Current commit: ${{ github.sha }}" + + # Get changed markdown files + git diff --name-only --diff-filter=AM $BASE_COMMIT...${{ github.sha }} -- '**/*.md' > changed_files.txt + + if [ ! -s changed_files.txt ]; then + echo "No markdown files changed" + echo "sql_changed=false" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "Changed markdown files:" + cat changed_files.txt + + # Check if any of the changed files have SQL code block modifications + SQL_CHANGED=false + while IFS= read -r file; do + if [ -f "$file" ]; then + # Check if the file contains SQL code blocks AND has changes + if grep -q "\`\`\`sql\|\`\`\`sumo" "$file"; then + echo "File contains SQL blocks, validating: $file" + SQL_CHANGED=true + fi + fi + done < changed_files.txt + + echo "sql_changed=$SQL_CHANGED" >> $GITHUB_OUTPUT + echo "SQL changes detected: $SQL_CHANGED" + + - name: Set up Python + if: steps.check-sql.outputs.sql_changed == 'true' + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install dependencies + if: steps.check-sql.outputs.sql_changed == 'true' + run: pip install requests python-dotenv + + - name: Validate queries + if: steps.check-sql.outputs.sql_changed == 'true' + working-directory: ./scripts + env: + SUMO_LOGIC_ACCESS_ID: ${{ secrets.SUMO_LOGIC_ACCESS_ID }} + SUMO_LOGIC_ACCESS_KEY: ${{ secrets.SUMO_LOGIC_ACCESS_KEY }} + BASE_COMMIT: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event.before }} + CURRENT_COMMIT: ${{ github.sha }} + run: | + echo "Validating only changed SQL queries between $BASE_COMMIT and $CURRENT_COMMIT" + python validate_queries.py + + - name: Skip validation + if: steps.check-sql.outputs.sql_changed == 'false' + run: echo "No SQL code block changes detected, skipping validation" diff --git a/docs/metrics/metrics-operators/where.md b/docs/metrics/metrics-operators/where.md index 3e7aea29be..d85f9c6006 100644 --- a/docs/metrics/metrics-operators/where.md +++ b/docs/metrics/metrics-operators/where.md @@ -12,7 +12,6 @@ You can use the `where` operator to filter out either entire time series, or ind ```sql where [VALUE BOOLEAN EXPRESSION | REDUCER BOOLEAN EXPRESSION] ``` - Where: * `[VALUE BOOLEAN EXPRESSION]` is a value expression that operates on individual data points of a time series. For example, diff --git a/docs/search/search-query-language/search-operators/isvalidip.md b/docs/search/search-query-language/search-operators/isvalidip.md index 733a43ad0c..10a54325c2 100644 --- a/docs/search/search-query-language/search-operators/isvalidip.md +++ b/docs/search/search-query-language/search-operators/isvalidip.md @@ -53,6 +53,9 @@ The following returns `true`: ```sql | isValidIP("10.255.255.255") as isIP ``` +```sql +| isValidIP("30.255.255.255") as isIP +``` The following returns `true`: diff --git a/docs/search/search-query-language/search-operators/where.md b/docs/search/search-query-language/search-operators/where.md index 6c88a30754..b05fc8479e 100644 --- a/docs/search/search-query-language/search-operators/where.md +++ b/docs/search/search-query-language/search-operators/where.md @@ -12,6 +12,13 @@ For example, usingΒ `where`Β with the boolean operator [`isValidIP`](/docs/searc ```sql | where isValidIP("192.168.0.10") ``` +* Checking my PR: + ```sql + _collector="ABC1" | where type="web" + ``` + ```sql + _collector="ABC7" | where type="web" + ``` * Filters as false and will not return results: ```sql | where !isValidIP("192.168.0.10") diff --git a/scripts/validate_queries.py b/scripts/validate_queries.py new file mode 100644 index 0000000000..b281b88e52 --- /dev/null +++ b/scripts/validate_queries.py @@ -0,0 +1,341 @@ +#!/usr/bin/env python3 +import re +import sys +import os +import json +import subprocess +import requests +from pathlib import Path +from datetime import datetime, timedelta + +def get_repo_root(): + """Get absolute path to repository root""" + github_workspace = os.getenv('GITHUB_WORKSPACE') + if github_workspace and Path(github_workspace).exists(): + return Path(github_workspace) + return Path(__file__).parent.parent # Move up from scripts/ directory + +def debug_environment(): + """Debug workspace structure""" + repo_root = get_repo_root() + print("::group::βš™οΈ Environment Debug") + print(f"πŸ“‚ Repo root: {repo_root}") + print(f"πŸ“‚ Working dir: {os.getcwd()}") + print(f"πŸ“‚ BASE_COMMIT: {os.getenv('BASE_COMMIT', 'NOT SET')}") + print(f"πŸ“‚ CURRENT_COMMIT: {os.getenv('CURRENT_COMMIT', 'NOT SET')}") + print(f"πŸ“‚ GITHUB_EVENT_PATH: {os.getenv('GITHUB_EVENT_PATH', 'NOT SET')}") + + # Check if changed_files.txt exists and show content + changed_files_txt = repo_root / "changed_files.txt" + if changed_files_txt.exists(): + print(f"πŸ“‚ changed_files.txt exists, size: {changed_files_txt.stat().st_size} bytes") + try: + with open(changed_files_txt) as f: + content = f.read() + lines = content.strip().split('\n') if content.strip() else [] + print(f"πŸ“‚ changed_files.txt contains {len(lines)} lines") + if lines: + print("πŸ“‚ First 5 files:") + for line in lines[:5]: + print(f" {line}") + except Exception as e: + print(f"πŸ“‚ Error reading changed_files.txt: {e}") + else: + print("πŸ“‚ changed_files.txt does not exist") + + print("\nπŸ“ Directory Structure:") + os.system(f"find {repo_root} -maxdepth 3 -type d | sort") + print("\nπŸ“ Markdown Files (sample):") + os.system(f"find {repo_root} -name '*.md' | head -n 20") + print("::endgroup::") + return repo_root + +def get_changed_files(repo_root): + """Find Markdown files to validate""" + # First try to read from changed_files.txt if it exists (from GitHub workflow) + changed_files_path = repo_root / "changed_files.txt" + if changed_files_path.exists(): + try: + with open(changed_files_path) as f: + files = [line.strip() for line in f if line.strip()] + if files: + print(f"πŸ“¦ Found {len(files)} changed Markdown files from workflow") + return [str(repo_root / f) for f in files] + except Exception as e: + print(f"::warning::Couldn't read changed_files.txt: {e}") + + # Try GitHub PR context + if "GITHUB_EVENT_PATH" in os.environ: + try: + with open(os.environ["GITHUB_EVENT_PATH"]) as f: + pr_files = [ + str(repo_root / f['filename']) + for f in json.load(f).get('pull_request', {}).get('files', []) + if f['filename'].endswith('.md') + ] + if pr_files: + print(f"πŸ“¦ Found {len(pr_files)} changed Markdown files") + return pr_files + except Exception as e: + print(f"::warning::Couldn't read PR data: {e}") + + # Use git diff to find changed files as fallback + try: + base_commit = os.getenv('BASE_COMMIT') + current_commit = os.getenv('CURRENT_COMMIT') + + if base_commit and current_commit: + print(f"πŸ” Using git diff fallback: {base_commit}...{current_commit}") + result = subprocess.run([ + 'git', 'diff', '--name-only', '--diff-filter=AM', + f'{base_commit}...{current_commit}', '--', '**/*.md' + ], capture_output=True, text=True, cwd=repo_root) + + if result.returncode == 0: + files = [line.strip() for line in result.stdout.split('\n') if line.strip()] + if files: + print(f"οΏ½ Found {len(files)} changed files via git diff") + return [str(repo_root / f) for f in files] + except Exception as e: + print(f"::warning::Git diff fallback failed: {e}") + + print("::warning::No changed files detected, exiting successfully") + return [] + +def extract_changed_sql_queries(file_path, base_commit, current_commit): + """Extract only the SQL queries that were actually changed in this commit""" + try: + # Get the git diff for this specific file + diff_cmd = ["git", "diff", f"{base_commit}...{current_commit}", "--", file_path] + result = subprocess.run(diff_cmd, capture_output=True, text=True, cwd=get_repo_root()) + + if result.returncode != 0: + print(f"::warning::Could not get git diff for {file_path}, validating all SQL queries") + return extract_sql_queries(file_path) + + diff_content = result.stdout + if not diff_content.strip(): + print(f"::info::No changes found in {file_path}") + return [] + + # Extract only the SQL content that was added/modified + changed_queries = [] + lines = diff_content.split('\n') + + # Look for SQL code blocks in the diff instead of individual lines + in_sql_block = False + current_block = [] + + for line in lines: + if line.startswith('+```sql') or line.startswith('+```sumo'): + in_sql_block = True + current_block = [] + elif line.startswith('+```') and in_sql_block: + in_sql_block = False + if current_block: + query_content = '\n'.join(current_block).strip() + if query_content: + changed_queries.append(query_content) + current_block = [] + elif in_sql_block and line.startswith('+'): + content = line[1:] # Remove the '+' prefix but keep whitespace + current_block.append(content) + + if changed_queries: + print(f"πŸ“Š Found {len(changed_queries)} changed SQL queries in diff") + return changed_queries + else: + print(f"ℹ️ No SQL query changes detected in {file_path}") + return [] + + except Exception as e: + print(f"::error::Error parsing git diff for {file_path}: {e}") + return extract_sql_queries(file_path) # Fallback + +def extract_sql_queries(file_path): + """Extract SQL code blocks from markdown files (fallback method)""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Find SQL code blocks using regex - more precise pattern + sql_pattern = r'```(?:sql|sumo)(?:[^\n]*)\n(.*?)```' + sql_blocks = re.findall(sql_pattern, content, re.DOTALL | re.IGNORECASE) + + queries = [] + for block in sql_blocks: + # Clean up the query + query = block.strip() + if query and not query.startswith('#') and not query.startswith('//'): + # Skip table content (lines that start and end with |) + lines = query.split('\n') + if not all(line.strip().startswith('|') and line.strip().endswith('|') for line in lines if line.strip()): + queries.append(query) + + return queries + except Exception as e: + print(f"::error::Error reading file {file_path}: {e}") + return [] + +def validate_query_syntax(query): + """Basic syntax validation for SumoLogic queries""" + errors = [] + + # Check for basic syntax issues + if '|' in query: + # Split by pipes to check operators + parts = [part.strip() for part in query.split('|')] + for i, part in enumerate(parts): + if not part: + errors.append(f"Empty pipe section at position {i}") + + # Check for common operator patterns + if i > 0: # Skip the first part (search expression) + if not any(op in part.lower() for op in [ + 'where', 'parse', 'json', 'count', 'sum', 'avg', 'max', 'min', + 'timeslice', 'sort', 'top', 'bottom', 'fields', 'if', 'lookup', + 'join', 'extract', 'formatDate', 'toLowerCase', 'toUpperCase' + ]): + # This might be a custom function or valid operator we don't know about + pass + + # Check for unmatched quotes + single_quotes = query.count("'") - query.count("\\'") + double_quotes = query.count('"') - query.count('\\"') + + if single_quotes % 2 != 0: + errors.append("Unmatched single quotes") + if double_quotes % 2 != 0: + errors.append("Unmatched double quotes") + + # Check for unmatched parentheses + paren_count = query.count('(') - query.count(')') + if paren_count != 0: + errors.append("Unmatched parentheses") + + # Check for unmatched brackets + bracket_count = query.count('[') - query.count(']') + if bracket_count != 0: + errors.append("Unmatched square brackets") + + return errors + +def get_git_commits(): + """Get base and current commit from environment variables""" + base_commit = os.getenv('BASE_COMMIT', '') + current_commit = os.getenv('CURRENT_COMMIT', '') + + if not base_commit or not current_commit: + print("::warning::Git commit information not available, falling back to all queries validation") + return None, None + + return base_commit, current_commit + +def validate_file(file_path, base_commit=None, current_commit=None): + """Validate SQL queries in a markdown file""" + print(f"πŸ” Validating: {file_path}") + + # Try to get only changed queries if git info is available + if base_commit and current_commit: + queries = extract_changed_sql_queries(file_path, base_commit, current_commit) + query_type = "changed SQL queries" + else: + # Fallback to all queries in the file + queries = extract_sql_queries(file_path) + query_type = "SQL queries" + + if not queries: + print(f" ℹ️ No {query_type} found") + return True + + print(f" πŸ“Š Found {len(queries)} {query_type}") + + all_valid = True + for i, query in enumerate(queries, 1): + errors = validate_query_syntax(query) + if errors: + all_valid = False + print(f" ❌ Query {i} has errors:") + for error in errors: + print(f" - {error}") + print(f" Query preview: {query[:100]}...") + else: + print(f" βœ… Query {i} passed basic syntax validation") + + return all_valid + +def main(): + repo_root = debug_environment() + changed_files = get_changed_files(repo_root) + + if not changed_files: + print("::warning::No Markdown files to validate - exiting successfully") + print("\n" + "="*60) + print("πŸ“Š VALIDATION SUMMARY") + print("="*60) + print("πŸ“ Files processed: 0") + print("πŸ“Š Changed SQL queries: 0") + print("βœ… Files passed: 0") + print("❌ Files failed: 0") + print("\nπŸŽ‰ No SQL query changes to validate!") + sys.exit(0) + + # Get git commit information for diff-based validation + base_commit, current_commit = get_git_commits() + + if base_commit and current_commit: + print(f"οΏ½ Using git diff mode: {base_commit}...{current_commit}") + print("οΏ½πŸ“‹ Validating only added/modified SQL queries...") + else: + print("πŸ“‹ Validating all SQL queries in changed files...") + + validation_results = [] + total_queries = 0 + + for file_path in changed_files: + if os.path.exists(file_path): + result = validate_file(file_path, base_commit, current_commit) + validation_results.append((file_path, result)) + + # Count queries for summary + if base_commit and current_commit: + queries = extract_changed_sql_queries(file_path, base_commit, current_commit) + else: + queries = extract_sql_queries(file_path) + total_queries += len(queries) + else: + print(f"::warning::File not found: {file_path}") + + # Summary + print("\n" + "="*60) + print("πŸ“Š VALIDATION SUMMARY") + print("="*60) + + passed_files = sum(1 for _, result in validation_results if result) + failed_files = len(validation_results) - passed_files + + print(f"πŸ“ Files processed: {len(validation_results)}") + if base_commit and current_commit: + print(f"πŸ“Š Changed SQL queries: {total_queries}") + else: + print(f"πŸ“Š Total SQL queries: {total_queries}") + print(f"βœ… Files passed: {passed_files}") + print(f"❌ Files failed: {failed_files}") + + if failed_files > 0: + print("\n❌ Files with validation errors:") + for file_path, result in validation_results: + if not result: + print(f" - {file_path}") + + print("\n::error::SQL query validation failed!") + sys.exit(1) + else: + if base_commit and current_commit: + print("\nπŸŽ‰ All changed SQL queries passed validation!") + else: + print("\nπŸŽ‰ All SQL queries passed validation!") + sys.exit(0) + +if __name__ == "__main__": + main() \ No newline at end of file