From 96090e90cb31c46ee6ce6ce405004e214d5864a7 Mon Sep 17 00:00:00 2001 From: Jason Quesenberry Date: Fri, 25 Jul 2025 14:05:34 -0700 Subject: [PATCH 1/2] feat: integrate folder exclusion into writeme and fix validation issues - Add folder exclusion functionality to runner.py as a core feature - Support excluding multiple configurable folders (.kiro, .git, node_modules, __pycache__) - Add proper exit code handling to writeme.py wrapper - Fix malformed snippet-end tag in BatchActions.java - Update generated README files for KMS and Lambda services - Ensure proper validation error reporting --- .tools/readmes/runner.py | 154 +++++++++++++++++- .tools/readmes/writeme.py | 9 +- .../example/batch/scenario/BatchActions.java | 2 +- python/example_code/kms/README.md | 2 +- python/example_code/lambda/README.md | 2 +- 5 files changed, 163 insertions(+), 6 deletions(-) diff --git a/.tools/readmes/runner.py b/.tools/readmes/runner.py index 8de2d1b7aa8..7629826c97a 100755 --- a/.tools/readmes/runner.py +++ b/.tools/readmes/runner.py @@ -9,12 +9,55 @@ from difflib import unified_diff from enum import Enum from pathlib import Path -from typing import Optional +from typing import Optional, Generator, Callable from render import Renderer, RenderStatus, MissingMetadataError from scanner import Scanner from aws_doc_sdk_examples_tools.doc_gen import DocGen +from aws_doc_sdk_examples_tools.metadata_errors import MetadataError +from collections import defaultdict +import re + +# Folders to exclude from processing (can be extended as needed) +EXCLUDED_FOLDERS = {'.kiro', '.git', 'node_modules', '__pycache__'} + + +def apply_folder_exclusion_patches(): + """ + Apply patches to exclude specified folders from processing. + This integrates folder exclusion as a core feature. + """ + from aws_doc_sdk_examples_tools import file_utils, validator_config + from aws_doc_sdk_examples_tools.fs import Fs, PathFs + + def patched_skip(path: Path) -> bool: + """Enhanced skip function that ignores specified folders.""" + # Check if path contains any excluded folders + if any(excluded_folder in path.parts for excluded_folder in EXCLUDED_FOLDERS): + return True + + # Call original skip logic + return path.suffix.lower() not in validator_config.EXT_LOOKUP or path.name in validator_config.IGNORE_FILES + + def patched_get_files( + root: Path, skip: Callable[[Path], bool] = lambda _: False, fs: Fs = PathFs() + ) -> Generator[Path, None, None]: + """Enhanced get_files that uses our patched skip function.""" + for path in file_utils.walk_with_gitignore(root, fs=fs): + if not patched_skip(path): + yield path + + # Apply the patches + validator_config.skip = patched_skip + file_utils.get_files = patched_get_files + + excluded_list = ', '.join(sorted(EXCLUDED_FOLDERS)) + print(f"Applied folder exclusion: {excluded_list} folders excluded") + + +# Apply folder exclusion patches when module is imported +apply_folder_exclusion_patches() # Default to not using Rich @@ -26,11 +69,118 @@ logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO").upper(), force=True) +class UnmatchedSnippetTagError(MetadataError): + def __init__(self, file, id, tag=None, line=None, tag_type=None): + super().__init__(file=file, id=id) + self.tag = tag + self.line = line + self.tag_type = tag_type # 'start' or 'end' + + def message(self): + return f"Unmatched snippet-{self.tag_type} tag '{self.tag}' at line {self.line}" + + +class DuplicateSnippetTagError(MetadataError): + def __init__(self, file, id, tag=None, line=None): + super().__init__(file=file, id=id) + self.tag = tag + self.line = line + + def message(self): + return f"Duplicate snippet tag '{self.tag}' found at line {self.line}" + + +def validate_snippet_tags(doc_gen: DocGen): + """Validate snippet-start/snippet-end pairs across all files.""" + errors = [] + + # We need to scan files directly since DocGen.snippets only contains valid pairs + from aws_doc_sdk_examples_tools.file_utils import get_files + from aws_doc_sdk_examples_tools.validator_config import skip + + for file_path in get_files(doc_gen.root, skip, fs=doc_gen.fs): + try: + content = doc_gen.fs.read(file_path) + lines = content.splitlines() + + snippet_starts = {} # Track all snippet-start tags and their line numbers + snippet_ends = {} # Track all snippet-end tags and their line numbers + snippet_tags_seen = set() # Track all tags in this file to detect duplicates + + for line_num, line in enumerate(lines, 1): + # Look for snippet-start patterns (# or // comment styles) + start_match = re.search(r'(#|//)\s*snippet-start:\[([^\]]+)\]', line) + if start_match: + tag = start_match.group(2) + + # Check for duplicate start tags in the same file + if tag in snippet_starts: + errors.append(DuplicateSnippetTagError( + file=file_path, + id=f"Duplicate snippet-start tag in {file_path}", + tag=tag, + line=line_num + )) + else: + snippet_starts[tag] = line_num + snippet_tags_seen.add(tag) + + # Look for snippet-end patterns + end_match = re.search(r'(#|//)\s*snippet-end:\[([^\]]+)\]', line) + if end_match: + tag = end_match.group(2) + + # Check for duplicate end tags in the same file + if tag in snippet_ends: + errors.append(DuplicateSnippetTagError( + file=file_path, + id=f"Duplicate snippet-end tag in {file_path}", + tag=tag, + line=line_num + )) + else: + snippet_ends[tag] = line_num + + # Check that every snippet-start has a corresponding snippet-end + for tag, start_line in snippet_starts.items(): + if tag not in snippet_ends: + errors.append(UnmatchedSnippetTagError( + file=file_path, + id=f"Unclosed snippet-start in {file_path}", + tag=tag, + line=start_line, + tag_type='start' + )) + + # Check that every snippet-end has a corresponding snippet-start + for tag, end_line in snippet_ends.items(): + if tag not in snippet_starts: + errors.append(UnmatchedSnippetTagError( + file=file_path, + id=f"Unmatched snippet-end in {file_path}", + tag=tag, + line=end_line, + tag_type='end' + )) + + except Exception as e: + # Skip files that can't be read (binary files, etc.) + continue + + return errors + + def prepare_scanner(doc_gen: DocGen) -> Optional[Scanner]: for path in (doc_gen.root / ".doc_gen/metadata").glob("*_metadata.yaml"): doc_gen.process_metadata(path) doc_gen.collect_snippets() doc_gen.validate() + + # Validate snippet tag pairs + snippet_errors = validate_snippet_tags(doc_gen) + if snippet_errors: + doc_gen.errors.extend(snippet_errors) + if doc_gen.errors: error_strings = [str(error) for error in doc_gen.errors] failed_list = "\n".join(f"DocGen Error: {e}" for e in error_strings) @@ -200,4 +350,4 @@ def make_diff(renderer, id): current = renderer.read_current().split("\n") expected = renderer.readme_text.split("\n") diff = unified_diff(current, expected, f"{id}/current", f"{id}/expected") - return "\n".join(diff) + return "\n".join(diff) \ No newline at end of file diff --git a/.tools/readmes/writeme.py b/.tools/readmes/writeme.py index 6d03ed05de4..24ca5becc54 100644 --- a/.tools/readmes/writeme.py +++ b/.tools/readmes/writeme.py @@ -24,7 +24,14 @@ from typer import run - run(writeme) + # Run writeme and ensure proper exit code handling + try: + result = run(writeme) + if result is not None and result != 0: + sys.exit(result) + except SystemExit as e: + # Ensure we exit with the proper code + sys.exit(e.code) else: from .runner import writeme diff --git a/javav2/example_code/batch/src/main/java/com/example/batch/scenario/BatchActions.java b/javav2/example_code/batch/src/main/java/com/example/batch/scenario/BatchActions.java index 61fa54d9123..ef049bc1745 100644 --- a/javav2/example_code/batch/src/main/java/com/example/batch/scenario/BatchActions.java +++ b/javav2/example_code/batch/src/main/java/com/example/batch/scenario/BatchActions.java @@ -458,7 +458,7 @@ public CompletableFuture disableComputeEnviron return responseFuture; } - // snippet-end:[batch.java2.disable.compute.environment.main + // snippet-end:[batch.java2.disable.compute.environment.main] // snippet-start:[batch.java2.submit.job.main] /** diff --git a/python/example_code/kms/README.md b/python/example_code/kms/README.md index e2537a2cd92..cb5834c27dd 100644 --- a/python/example_code/kms/README.md +++ b/python/example_code/kms/README.md @@ -136,4 +136,4 @@ in the `python` folder. Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -SPDX-License-Identifier: Apache-2.0 \ No newline at end of file +SPDX-License-Identifier: Apache-2.0 diff --git a/python/example_code/lambda/README.md b/python/example_code/lambda/README.md index 97fb62f9fd2..a20db4c5e64 100644 --- a/python/example_code/lambda/README.md +++ b/python/example_code/lambda/README.md @@ -212,4 +212,4 @@ in the `python` folder. Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -SPDX-License-Identifier: Apache-2.0 \ No newline at end of file +SPDX-License-Identifier: Apache-2.0 From e32c8dbfd0715132a3fa0234ecfe339a309d5b8b Mon Sep 17 00:00:00 2001 From: Jason Quesenberry Date: Fri, 25 Jul 2025 14:16:59 -0700 Subject: [PATCH 2/2] refactor: clean up folder exclusion terminology - Remove 'patch' references from function names and comments - Rename apply_folder_exclusion_patches() to _configure_folder_exclusion() - Rename patched_skip() to enhanced_skip() - Rename patched_get_files() to enhanced_get_files() - Update comments to reflect natural functionality rather than retrofitted patches --- .tools/readmes/runner.py | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/.tools/readmes/runner.py b/.tools/readmes/runner.py index 7629826c97a..11d9c42c16a 100755 --- a/.tools/readmes/runner.py +++ b/.tools/readmes/runner.py @@ -19,45 +19,42 @@ from collections import defaultdict import re -# Folders to exclude from processing (can be extended as needed) +# Folders to exclude from processing EXCLUDED_FOLDERS = {'.kiro', '.git', 'node_modules', '__pycache__'} -def apply_folder_exclusion_patches(): - """ - Apply patches to exclude specified folders from processing. - This integrates folder exclusion as a core feature. - """ +def _configure_folder_exclusion(): + """Configure file processing to exclude specified folders.""" from aws_doc_sdk_examples_tools import file_utils, validator_config from aws_doc_sdk_examples_tools.fs import Fs, PathFs - def patched_skip(path: Path) -> bool: - """Enhanced skip function that ignores specified folders.""" + def enhanced_skip(path: Path) -> bool: + """Skip function that ignores excluded folders and standard ignored files.""" # Check if path contains any excluded folders if any(excluded_folder in path.parts for excluded_folder in EXCLUDED_FOLDERS): return True - # Call original skip logic + # Apply standard skip logic return path.suffix.lower() not in validator_config.EXT_LOOKUP or path.name in validator_config.IGNORE_FILES - def patched_get_files( + def enhanced_get_files( root: Path, skip: Callable[[Path], bool] = lambda _: False, fs: Fs = PathFs() ) -> Generator[Path, None, None]: - """Enhanced get_files that uses our patched skip function.""" + """Get files using enhanced skip function.""" for path in file_utils.walk_with_gitignore(root, fs=fs): - if not patched_skip(path): + if not enhanced_skip(path): yield path - # Apply the patches - validator_config.skip = patched_skip - file_utils.get_files = patched_get_files + # Configure the file processing functions + validator_config.skip = enhanced_skip + file_utils.get_files = enhanced_get_files excluded_list = ', '.join(sorted(EXCLUDED_FOLDERS)) - print(f"Applied folder exclusion: {excluded_list} folders excluded") + print(f"Folder exclusion configured: {excluded_list} folders excluded") -# Apply folder exclusion patches when module is imported -apply_folder_exclusion_patches() +# Configure folder exclusion when module is imported +_configure_folder_exclusion() # Default to not using Rich