diff --git a/bin/doc-tools.js b/bin/doc-tools.js index 69a2781..73e8556 100755 --- a/bin/doc-tools.js +++ b/bin/doc-tools.js @@ -1111,6 +1111,8 @@ automation } else { env.OUTPUT_JSON_DIR = path.resolve(outputDir, 'examples'); env.OUTPUT_AUTOGENERATED_DIR = path.resolve(outputDir); + // Set property files to go to properties subdirectory + env.OUTPUT_ASCIIDOC_DIR = path.resolve(outputDir, 'pages', 'properties'); } const r = spawnSync('make', args, { cwd, stdio: 'inherit', env }); diff --git a/package-lock.json b/package-lock.json index ff3b438..2691a93 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@redpanda-data/docs-extensions-and-macros", - "version": "4.10.0", + "version": "4.10.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@redpanda-data/docs-extensions-and-macros", - "version": "4.10.0", + "version": "4.10.1", "license": "ISC", "dependencies": { "@asciidoctor/tabs": "^1.0.0-beta.6", diff --git a/package.json b/package.json index d996772..2be1446 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@redpanda-data/docs-extensions-and-macros", - "version": "4.10.0", + "version": "4.10.1", "description": "Antora extensions and macros developed for Redpanda documentation.", "keywords": [ "antora", diff --git a/tools/property-extractor/Makefile b/tools/property-extractor/Makefile index 65018d7..3513478 100644 --- a/tools/property-extractor/Makefile +++ b/tools/property-extractor/Makefile @@ -111,10 +111,10 @@ generate-docs: node-deps @# Use the enhanced properties file (with overrides) for documentation generation if it exists @if [ -f "$(TOOL_ROOT)/gen/$(TAG)-properties.json" ]; then \ cd $(TOOL_ROOT) && \ - node generate-handlebars-docs.js "gen/$(TAG)-properties.json" "$(OUTPUT_AUTOGENERATED_DIR)"; \ + node generate-handlebars-docs.js "gen/$(TAG)-properties.json" "$(OUTPUT_ASCIIDOC_DIR)"; \ else \ cd $(TOOL_ROOT) && \ - node generate-handlebars-docs.js "gen/properties-output.json" "$(OUTPUT_AUTOGENERATED_DIR)"; \ + node generate-handlebars-docs.js "gen/properties-output.json" "$(OUTPUT_ASCIIDOC_DIR)"; \ fi @echo "📄 Copying properties JSON files to $(OUTPUT_JSON_DIR)…" @if [ -f "$(TOOL_ROOT)/gen/$(TAG)-properties.json" ]; then \ diff --git a/tools/property-extractor/generate-handlebars-docs.js b/tools/property-extractor/generate-handlebars-docs.js index a63294a..248f68c 100644 --- a/tools/property-extractor/generate-handlebars-docs.js +++ b/tools/property-extractor/generate-handlebars-docs.js @@ -60,7 +60,17 @@ NOTE: Some cluster properties require that you restart the cluster for any updat sectionTitle: 'Cluster configuration', groups: [ { - filter: (prop) => prop.config_scope === 'cluster' && !prop.is_deprecated + filter: (prop) => prop.config_scope === 'cluster' && !prop.is_deprecated && !( + prop.name && ( + prop.name.includes('cloud_storage') || + prop.name.includes('s3_') || + prop.name.includes('azure_') || + prop.name.includes('gcs_') || + prop.name.includes('archival_') || + prop.name.includes('remote_') || + prop.name.includes('tiered_') + ) + ) } ], filename: 'cluster-properties.adoc' @@ -269,7 +279,8 @@ function generateDeprecatedDocs(properties, outputDir) { }; const output = template(data); - const outputPath = path.join(outputDir, 'deprecated', 'partials', 'deprecated-properties.adoc'); + // Navigate back from pages/properties to reference, then into partials/deprecated + const outputPath = path.join(path.dirname(path.dirname(outputDir)), 'partials', 'deprecated', 'deprecated-properties.adoc'); fs.mkdirSync(path.dirname(outputPath), { recursive: true }); fs.writeFileSync(outputPath, output, 'utf8'); @@ -332,7 +343,7 @@ function generateAllDocs(inputFile, outputDir) { // Generate each type of documentation for (const [type, config] of Object.entries(PROPERTY_CONFIG)) { - const count = generatePropertyDocs(properties, config, path.join(outputDir, 'pages')); + const count = generatePropertyDocs(properties, config, outputDir); totalProperties += count; if (type === 'broker') totalBrokerProperties = count; @@ -342,7 +353,7 @@ function generateAllDocs(inputFile, outputDir) { } // Generate deprecated properties documentation - const deprecatedCount = generateDeprecatedDocs(properties, path.join(outputDir, 'pages')); + const deprecatedCount = generateDeprecatedDocs(properties, outputDir); // Generate summary file const allPropertiesContent = Object.keys(properties).sort().join('\n'); diff --git a/tools/property-extractor/property_extractor.py b/tools/property-extractor/property_extractor.py index 20edb70..cb62d92 100644 --- a/tools/property-extractor/property_extractor.py +++ b/tools/property-extractor/property_extractor.py @@ -117,6 +117,24 @@ def process_enterprise_value(enterprise_str): """ enterprise_str = enterprise_str.strip() + # Handle special SASL mechanism function names + if enterprise_str == "is_enterprise_sasl_mechanism": + # Dynamically look up enterprise SASL mechanisms from source + enterprise_mechanisms = get_enterprise_sasl_mechanisms() + if enterprise_mechanisms: + return enterprise_mechanisms + else: + # Fallback to known values if lookup fails + return ["GSSAPI", "OAUTHBEARER"] + elif enterprise_str == "is_enterprise_sasl_mechanisms_override": + # Get the enterprise mechanisms dynamically for a more accurate description + enterprise_mechanisms = get_enterprise_sasl_mechanisms() + if enterprise_mechanisms: + mechanism_list = ", ".join(enterprise_mechanisms) + return f"Any override containing enterprise mechanisms ({mechanism_list})." + else: + return "Any override containing enterprise mechanisms." + # FIRST: Handle std::vector initialization patterns (highest priority) # This must come before enum processing because vectors can contain enums # Tolerate optional whitespace around braces @@ -196,19 +214,15 @@ def process_enterprise_value(enterprise_str): def resolve_cpp_function_call(function_name): """ - Resolve certain small, known C++ zero-argument functions to their literal return values by searching Redpanda source files. + Resolve a small set of known zero-argument C++ functions to their literal string return values by scanning a local Redpanda source tree. - This function looks up predefined search patterns for well-known functions (currently a small set under `model::*`), locates a local Redpanda source tree from several commonly used paths, and scans the listed files (and, if needed, the broader model directory) for a regex match that captures the string returned by the function. If a match is found the captured string is returned; if the source tree cannot be found or no match is located the function returns None. + Searches predefined files and regex patterns for the specified fully-qualified function name (e.g., "model::kafka_audit_logging_topic") and returns the captured string if found; returns None when no match or when the Redpanda source tree cannot be located. Parameters: - function_name (str): Fully-qualified C++ function name to resolve (e.g., "model::kafka_audit_logging_topic"). + function_name (str): Fully-qualified C++ function name to resolve. Returns: - str or None: The resolved literal string returned by the C++ function, or None when unresolved (source not found or no matching pattern). - - Notes: - - The function performs filesystem I/O and regex-based source searching; it does not raise on read errors but logs and continues. - - Only a small, hard-coded set of function names/patterns is supported; unknown names immediately return None. + str or None: The literal string returned by the C++ function when resolved, or `None` if unresolved. """ # Map function names to likely search patterns and file locations search_patterns = { @@ -322,7 +336,174 @@ def resolve_cpp_function_call(function_name): return None +def resolve_constexpr_identifier(identifier): + """ + Resolve a constexpr identifier from Redpanda source code to its literal string value. + + Searches common Redpanda source locations for constexpr string or string_view definitions matching the given identifier and returns the literal if found. + + Parameters: + identifier (str): The identifier name to resolve (e.g., "scram"). + + Returns: + str or None: The resolved literal string value if found, otherwise `None`. + """ + # Try to find the Redpanda source directory + redpanda_source_paths = [ + 'tmp/redpanda', # Current directory + '../tmp/redpanda', # Parent directory + 'tools/property-extractor/tmp/redpanda', # From project root + os.path.join(os.getcwd(), 'tools', 'property-extractor', 'tmp', 'redpanda') + ] + + redpanda_source = None + for path in redpanda_source_paths: + if os.path.exists(path): + redpanda_source = path + break + + if not redpanda_source: + logger.debug(f"Could not find Redpanda source directory to resolve identifier: {identifier}") + return None + + # Pattern to match constexpr string_view definitions + # Matches: inline constexpr std::string_view scram{"SCRAM"}; + patterns = [ + rf'inline\s+constexpr\s+std::string_view\s+{re.escape(identifier)}\s*\{{\s*"([^"]+)"\s*\}}', + rf'constexpr\s+std::string_view\s+{re.escape(identifier)}\s*\{{\s*"([^"]+)"\s*\}}', + rf'inline\s+constexpr\s+auto\s+{re.escape(identifier)}\s*=\s*"([^"]+)"', + rf'constexpr\s+auto\s+{re.escape(identifier)}\s*=\s*"([^"]+)"', + rf'static\s+constexpr\s+std::string_view\s+{re.escape(identifier)}\s*\{{\s*"([^"]+)"\s*\}}', + rf'static\s+inline\s+constexpr\s+std::string_view\s+{re.escape(identifier)}\s*\{{\s*"([^"]+)"\s*\}}', + ] + + # Search recursively through the config directory and other common locations + search_dirs = [ + os.path.join(redpanda_source, 'src', 'v', 'config'), + os.path.join(redpanda_source, 'src', 'v', 'kafka'), + os.path.join(redpanda_source, 'src', 'v', 'security'), + os.path.join(redpanda_source, 'src', 'v', 'pandaproxy'), + ] + + for search_dir in search_dirs: + if not os.path.exists(search_dir): + continue + + # Walk through the directory recursively + for root, dirs, files in os.walk(search_dir): + for file in files: + # Check both .h and .cc files since definitions can be in either + if file.endswith(('.h', '.cc', '.hpp', '.cpp')): + file_path = os.path.join(root, file) + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Try each pattern + for pattern in patterns: + match = re.search(pattern, content, re.MULTILINE) + if match: + resolved_value = match.group(1) + logger.debug(f"Resolved identifier '{identifier}' -> '{resolved_value}' from {file_path}") + return resolved_value + + except (FileNotFoundError, PermissionError, OSError, UnicodeDecodeError) as e: + logger.debug(f"Error reading {file_path}: {e}") + continue + + logger.debug(f"Could not resolve identifier: {identifier}") + return None + + +def get_enterprise_sasl_mechanisms(): + """ + Locate and resolve enterprise SASL mechanisms declared in Redpanda's sasl_mechanisms.h. + + Searches known Redpanda source locations for an inline constexpr definition of enterprise_sasl_mechanisms, + extracts the identifiers, and resolves each identifier to its literal string value where possible; unresolved + identifiers are converted to an uppercase fallback. + + Returns: + list or None: List of enterprise SASL mechanism strings (e.g., ["GSSAPI", "OAUTHBEARER"]), + or `None` if the lookup fails. + """ + # Try to find the Redpanda source directory + redpanda_source_paths = [ + 'tmp/redpanda', # Current directory + '../tmp/redpanda', # Parent directory + 'tools/property-extractor/tmp/redpanda', # From project root + os.path.join(os.getcwd(), 'tools', 'property-extractor', 'tmp', 'redpanda') + ] + + redpanda_source = None + for path in redpanda_source_paths: + if os.path.exists(path): + redpanda_source = path + break + + if not redpanda_source: + logger.debug("Could not find Redpanda source directory to resolve enterprise SASL mechanisms") + return None + + # Look for the enterprise_sasl_mechanisms definition in sasl_mechanisms.h + sasl_mechanisms_file = os.path.join(redpanda_source, 'src', 'v', 'config', 'sasl_mechanisms.h') + + if not os.path.exists(sasl_mechanisms_file): + logger.debug(f"sasl_mechanisms.h not found at {sasl_mechanisms_file}") + return None + + try: + with open(sasl_mechanisms_file, 'r', encoding='utf-8') as f: + content = f.read() + + # Pattern to match the enterprise_sasl_mechanisms array definition + # inline constexpr auto enterprise_sasl_mechanisms = std::to_array({gssapi, oauthbearer}); + pattern = r'inline\s+constexpr\s+auto\s+enterprise_sasl_mechanisms\s*=\s*std::to_array<[^>]+>\s*\(\s*\{\s*([^}]+)\s*\}\s*\)' + + match = re.search(pattern, content, re.MULTILINE | re.DOTALL) + if match: + # Extract the identifiers from the array (e.g., "gssapi, oauthbearer") + identifiers_str = match.group(1).strip() + + # Split by comma and clean up whitespace + identifiers = [id.strip() for id in identifiers_str.split(',') if id.strip()] + + # Resolve each identifier to its actual string value + mechanisms = [] + for identifier in identifiers: + resolved_value = resolve_constexpr_identifier(identifier) + if resolved_value: + mechanisms.append(resolved_value) + else: + logger.debug(f"Could not resolve SASL mechanism identifier: {identifier}") + # Fallback: use the identifier name in uppercase + mechanisms.append(identifier.upper()) + + if mechanisms: + logger.debug(f"Resolved enterprise SASL mechanisms: {mechanisms}") + return mechanisms + else: + logger.debug("Could not find enterprise_sasl_mechanisms definition in sasl_mechanisms.h") + return None + + except (OSError, UnicodeDecodeError, re.error) as e: + logger.debug(f"Error reading {sasl_mechanisms_file}: {e}") + return None + + def validate_paths(options): + """ + Validate that required file-system paths referenced by `options` exist and exit the process on failure. + + Checks: + - Verifies `options.path` exists; logs an error and exits with status code 1 if it does not. + - If `options.definitions` is provided, verifies that file exists; logs an error and exits with status code 1 if it does not. + + Parameters: + options: An object with at least the attributes: + - path (str): Path to the input source directory or file. + - definitions (Optional[str]): Path to the type definitions file (may be None or empty). + """ path = options.path if not os.path.exists(path): @@ -713,21 +894,16 @@ def add_config_scope(properties): def resolve_type_and_default(properties, definitions): """ - Resolve JSON Schema types and expand C++-style default values for all properties. + Normalize property types and expand C++-style default values into JSON-compatible Python structures. - This function: - - Resolves type references found in `properties` against `definitions` (supports "$ref" and direct type names) and normalizes property "type" to a JSON Schema primitive ("object", "string", "integer", "boolean", "array", "number") with sensible fallbacks. - - Expands C++ constructor/initializer syntax and common C++ patterns appearing in default values into JSON-compatible Python values (e.g., nested constructor calls -> dicts, initializer lists -> lists, `std::nullopt` -> None, enum-like tokens -> strings). - - Ensures array-typed properties (including one_or_many_property cases) have array defaults: single-object defaults are wrapped into a one-element list and "{}" string defaults become []. - - Updates array item type information when item types reference definitions. - - Applies a final pass to convert any remaining C++-patterned defaults and to transform any `enterprise_value` strings via process_enterprise_value. + This function resolves type references in each property against the provided definitions (supports "$ref" and direct type names), normalizes property "type" to a JSON Schema primitive when possible, expands C++ constructor/initializer and common C++ literal patterns found in "default" values into Python primitives/objects/lists, ensures array-typed properties have array defaults (including handling one_or_many_property cases), updates array item type information when item types reference definitions, and converts any `enterprise_value` strings via process_enterprise_value. Parameters: - properties (dict): Mapping of property names to property metadata dictionaries. Each property may include keys like "type", "default", "items", and "enterprise_value". - definitions (dict): Mapping of type names to JSON Schema definition dictionaries used to resolve $ref targets and to infer property shapes when expanding constructors. + properties (dict): Mapping of property names to metadata dictionaries. Relevant keys that may be modified include "type", "default", "items", and "enterprise_value". + definitions (dict): Mapping of definition names to JSON Schema definition dictionaries used to resolve $ref targets and to infer shapes for expanding constructor-style defaults. Returns: - dict: The same `properties` mapping after in-place normalization and expansion of types and defaults. + dict: The same `properties` mapping after in-place normalization and expansion of types, defaults, item types, and enterprise values. """ import ast import re @@ -742,20 +918,42 @@ def resolve_definition_type(defn): return defn def parse_constructor(s): - """Parse C++ constructor syntax into type name and arguments.""" + """ + Parse a C++-style constructor or initializer expression into its type name and argument list. + + Parses input forms such as `Type(arg1, arg2)`, `Type{arg1, arg2}`, or plain literals/enum-like tokens. For string literals the returned argument is a Python string value; for integer literals the returned argument is an int. Nested constructors and nested brace/paren groups are preserved as argument tokens. + + Parameters: + s (str): The C++ expression to parse. + + Returns: + tuple: + - type_name (str|None): The parsed type name for constructor forms, or `None` when `s` is a primitive literal or enum-like token. + - args (list): A list of argument tokens; tokens are raw strings for complex/nested arguments, Python `str` for quoted string literals, or `int` for integer literals. + """ s = s.strip() + original_s = s if s.startswith("{") and s.endswith("}"): s = s[1:-1].strip() + + # Try parentheses syntax first: type_name(args) match = re.match(r'([a-zA-Z0-9_:]+)\((.*)\)', s) - if not match: - # Primitive or enum - if s.startswith('"') and s.endswith('"'): - return None, [ast.literal_eval(s)] - try: - return None, [int(s)] - except Exception: - return None, [s] - type_name, arg_str = match.groups() + if match: + type_name, arg_str = match.groups() + else: + # Try curly brace syntax: type_name{args} + match = re.match(r'([a-zA-Z0-9_:]+)\{(.*)\}', s) + if match: + type_name, arg_str = match.groups() + else: + # Primitive or enum + if s.startswith('"') and s.endswith('"'): + return None, [ast.literal_eval(s)] + try: + return None, [int(s)] + except ValueError: + return None, [s] + args = [] depth = 0 current = '' @@ -768,9 +966,9 @@ def parse_constructor(s): args.append(current.strip()) current = '' else: - if c == '(' and not in_string: + if c in '({' and not in_string: depth += 1 - elif c == ')' and not in_string: + elif c in ')}' and not in_string: depth -= 1 current += c if current.strip(): @@ -779,13 +977,18 @@ def parse_constructor(s): def process_cpp_patterns(arg_str): """ - Process specific C++ patterns to user-friendly values. + Convert a C++-style expression string into a JSON-friendly literal representation. - Handles: - - net::unresolved_address("127.0.0.1", 9092) -> expands based on type definition + This function recognises common C++ patterns produced by the extractor and maps them to values suitable for JSON schema defaults and examples. Handled cases include: - std::nullopt -> null - - fips_mode_flag::disabled -> "disabled" - - model::kafka_audit_logging_topic() -> dynamically looked up from source + - zero-argument functions (e.g., model::kafka_audit_logging_topic()) resolved from source when possible + - enum tokens (e.g., fips_mode_flag::disabled -> "disabled") + - constexpr identifiers and simple string constructors resolved to their literal strings when available + - known default constructors and truncated type names mapped to sensible defaults (e.g., duration -> 0, path -> "") + - simple heuristics for unknown constructors and concatenated expressions + + Returns: + processed (str): A string representing the JSON-ready value (for example: '"value"', 'null', '0', or the original input when no mapping applied). """ arg_str = arg_str.strip() @@ -808,6 +1011,24 @@ def process_cpp_patterns(arg_str): enum_value = enum_match.group(1) return f'"{enum_value}"' + # Handle constexpr identifier resolution (such as scram -> "SCRAM") + # Check if this is a simple identifier that might be a constexpr variable + if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', arg_str): + resolved_value = resolve_constexpr_identifier(arg_str) + if resolved_value is not None: + return f'"{resolved_value}"' + + # Handle string constructor patterns like ss::sstring{identifier} + sstring_match = re.match(r'ss::sstring\{([a-zA-Z_][a-zA-Z0-9_]*)\}', arg_str) + if sstring_match: + identifier = sstring_match.group(1) + resolved_value = resolve_constexpr_identifier(identifier) + if resolved_value is not None: + return f'"{resolved_value}"' + else: + # Fallback to the identifier itself + return f'"{identifier}"' + # Handle default constructors and their default values # This handles cases where C++ default constructors are used but should map to specific values @@ -861,12 +1082,23 @@ def process_cpp_patterns(arg_str): def expand_default(type_name, default_str): """ - Expand C++ default values into structured JSON objects. + Convert a C++-style default initializer into a JSON-serializable Python value. - For array types with initializer list syntax like: - {model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))} + This expands C++ constructor and initializer-list syntax into Python primitives, dictionaries, and lists suitable for JSON output. Supported transformations include: + - String constructors and quoted literals → Python str. + - Integer and boolean literals → Python int and bool. + - Object constructors (Type(arg1, arg2) or Type{...}) → dict mapping constructor arguments to the object's properties when a corresponding type definition exists. + - Nested constructors → nested dicts with their fields expanded. + - Array initializer lists (e.g., {Type(...), Type(...)}) → Python list with each element expanded. + - Special-case mappings for known type patterns (for example, an address-type constructor expanded into {"address", "port"} when the target type expects that shape). + If a default cannot be resolved or the type is an enum, the original input is returned unchanged; the string "null" is converted to None. If default_str is not a string, it is returned as-is. - This creates: [{address: "127.0.0.1", port: 9644}] + Parameters: + type_name (str): The resolved type name for the default value (e.g., "model::broker_endpoint" or a primitive type like "string"). + default_str (str | any): The C++ default expression to expand, or a non-string value already decoded. + + Returns: + The expanded Python representation of the default: a dict for objects, a list for arrays, a primitive (str/int/bool), None for null, or the original value/string when expansion is not possible. """ # Handle non-string defaults if not isinstance(default_str, str): @@ -883,6 +1115,19 @@ def expand_default(type_name, default_str): return ast.literal_eval(processed) else: return processed + + # Handle string type with constructor syntax (e.g., ss::sstring{scram}) + if type_name == "string" and ("{" in default_str or "(" in default_str): + tname, args = parse_constructor(default_str) + if tname and args: + # For string constructors, resolve the first argument and return it as the string value + first_arg = args[0] if args else "" + # Apply C++ pattern processing to resolve identifiers + processed_arg = process_cpp_patterns(first_arg) + if processed_arg.startswith('"') and processed_arg.endswith('"'): + return ast.literal_eval(processed_arg) # Remove quotes + else: + return processed_arg type_def = resolve_definition_type(definitions.get(type_name, {})) if "enum" in type_def: @@ -1619,4 +1864,4 @@ def generate_options(): sys.exit(1) if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/tools/property-extractor/transformers.py b/tools/property-extractor/transformers.py index 182b658..6efc5d2 100644 --- a/tools/property-extractor/transformers.py +++ b/tools/property-extractor/transformers.py @@ -12,12 +12,12 @@ # the centralized enterprise value processing logic without creating import cycles. def get_process_enterprise_value(): """ - Lazily import and return the centralized `process_enterprise_value` function from `property_extractor`. + Lazily load the centralized process_enterprise_value function from property_extractor. - Attempts to import `process_enterprise_value` and return it to avoid circular-import issues. If the import fails an error message is printed and None is returned. + Attempts to import and return the `process_enterprise_value` callable; logs an error and returns `None` if the import fails. Returns: - Callable or None: The `process_enterprise_value` callable when available, otherwise `None`. + The `process_enterprise_value` callable if available, `None` otherwise. """ try: from property_extractor import process_enterprise_value @@ -27,6 +27,23 @@ def get_process_enterprise_value(): return None +def get_resolve_constexpr_identifier(): + """ + Lazily import and return the `resolve_constexpr_identifier` function from `property_extractor`. + + Attempts to import `resolve_constexpr_identifier` and return it to avoid circular-import issues. + + Returns: + Callable or None: The `resolve_constexpr_identifier` callable when available, otherwise `None`. + """ + try: + from property_extractor import resolve_constexpr_identifier + return resolve_constexpr_identifier + except ImportError as e: + logger.exception("Cannot import resolve_constexpr_identifier from property_extractor: %s", e) + return None + + class BasicInfoTransformer: def accepts(self, info, file_pair): """ @@ -182,29 +199,59 @@ def accepts(self, info, file_pair): def get_cpp_type_from_declaration(self, declaration): """ - Extract the inner type from C++ property declarations. - - This method handles various C++ template types and extracts the core type T from: - - property -> T - - std::optional -> T - - std::vector -> T - - one_or_many_property -> T (Redpanda's flexible array type) + Extract the inner C++ type from wrapped declarations like `property`, `std::optional`, `std::vector`, or `one_or_many_property`. - For one_or_many_property, this is crucial because it allows the same property - to accept either a single value or an array of values in the configuration. - Examples: - - one_or_many_property -> model::broker_endpoint - - one_or_many_property -> endpoint_tls_config + Parses common wrapper templates and returns the unwrapped type name (for example, returns `model::broker_endpoint` from `one_or_many_property`). The returned type is intended for downstream mapping to JSON schema types and default value resolution. - The extracted type is then used to determine the JSON schema type and - for resolving default values from the definitions. + Returns: + raw_type (str): The extracted inner C++ type as a string, or a best-effort fragment of the declaration if a precise extraction cannot be performed. """ one_line_declaration = declaration.replace("\n", "").strip() - raw_type = ( - re.sub(r"^.*property<(.+)>.*", "\\1", one_line_declaration) - .split()[0] - .replace(",", "") - ) + + # Extract property template content with proper nesting handling + # This handles cases like property> + def extract_template_content(text, template_name): + """ + Extracts the inner contents of the first occurrence of a template with the given name, correctly handling nested angle brackets. + + Parameters: + text (str): The string to search for the template. + template_name (str): The template name (e.g., "std::vector" or "property"). + + Returns: + str or None: The substring inside the outermost angle brackets for the matched template (excluding the brackets), + or `None` if the template is not found or angle brackets are unbalanced. + """ + start_idx = text.find(f'{template_name}<') + if start_idx == -1: + return None + + start_idx += len(f'{template_name}<') + bracket_count = 1 + i = start_idx + + while i < len(text) and bracket_count > 0: + if text[i] == '<': + bracket_count += 1 + elif text[i] == '>': + bracket_count -= 1 + i += 1 + + if bracket_count == 0: + return text[start_idx:i-1] + return None + + # Extract the content from property<...> + property_content = extract_template_content(one_line_declaration, 'property') + if property_content: + raw_type = property_content.split()[0].replace(",", "") + else: + # Fallback to original regex for simpler cases + raw_type = ( + re.sub(r"^.*property<(.+)>.*", "\\1", one_line_declaration) + .split()[0] + .replace(",", "") + ) if self.OPTIONAL_PATTERN in raw_type: raw_type = re.sub(".*std::optional<(.+)>.*", "\\1", raw_type) @@ -223,6 +270,15 @@ def get_cpp_type_from_declaration(self, declaration): return raw_type def get_type_from_declaration(self, declaration): + """ + Map a C++ type declaration string to a simplified, user-facing type name. + + Parameters: + declaration (str): C++ type declaration or template expression from which the effective type will be derived. + + Returns: + str: A JSON-schema-friendly type name such as "integer", "number", "string", "string[]", or "boolean". If no mapping matches, returns the normalized/raw extracted C++ type. + """ raw_type = self.get_cpp_type_from_declaration(declaration) type_mapping = [ # (regex, type) ("^u(nsigned|int)", "integer"), @@ -240,9 +296,29 @@ def get_type_from_declaration(self, declaration): if re.search(m[0], raw_type): return m[1] + # Handle specific user-unfriendly C++ types with descriptive alternatives + # Map complex C++ config types to user-friendly JSON schema types + user_friendly_types = { + "config::sasl_mechanisms_override": "object", + } + + if raw_type in user_friendly_types: + return user_friendly_types[raw_type] + return raw_type def parse(self, property, info, file_pair): + """ + Set the property's "type" field to the JSON schema type derived from the C++ declaration. + + Parameters: + property (dict): Mutable property bag to be updated. + info (dict): Parsed property metadata; its "declaration" field is used to determine the type. + file_pair: Unused here; present for transformer interface compatibility. + + Returns: + property (dict): The same property bag with "type" set to the derived type string. + """ property["type"] = self.get_type_from_declaration(info["declaration"]) return property @@ -394,42 +470,170 @@ class FriendlyDefaultTransformer: # Class-level constants for pattern matching in default values ARRAY_PATTERN_STD_VECTOR = "std::vector" + SSTRING_CONSTRUCTOR_PATTERN = r'ss::sstring\{([a-zA-Z_][a-zA-Z0-9_]*)\}' + VECTOR_INITIALIZER_PATTERN = r'std::vector<[^>]+>\s*\{(.*)\}$' + CHRONO_PATTERN = r"std::chrono::(\w+)\(([^)]+)\)" + + def __init__(self): + """ + Initialize the transformer and set up a placeholder for a lazily-loaded resolver. + + Sets self._resolver to None to indicate the resolver has not been loaded yet. + """ + self._resolver = None def accepts(self, info, file_pair): + """ + Determine whether the transformer should run for the given property info by checking for a fourth parameter. + + Parameters: + info (dict): Parsed property metadata; expects a "params" list when present. + file_pair (tuple): Source/implementation file pair (unused by this check). + + Returns: + `true` if `info["params"]` exists and contains at least four items, `false` otherwise. + """ return info.get("params") and len(info["params"]) > 3 + def _get_resolver(self): + """ + Lazily load and cache the constexpr identifier resolver. + + Returns: + callable or None: The resolver function if available, or `None` if it could not be loaded. + """ + if self._resolver is None: + resolver = get_resolve_constexpr_identifier() + self._resolver = resolver if resolver else False + return self._resolver if self._resolver is not False else None + + def _resolve_identifier(self, identifier): + """ + Resolve a constexpr identifier to its corresponding string value. + + Parameters: + identifier (str): Identifier to resolve (for example, "scram" or "gssapi"). + + Returns: + str or None: The resolved string value if successful, `None` when the identifier is invalid or cannot be resolved. + """ + if not identifier or not isinstance(identifier, str): + logger.warning(f"Invalid identifier for resolution: {identifier}") + return None + + resolver = self._get_resolver() + if resolver: + try: + return resolver(identifier) + except (AttributeError, TypeError, ValueError) as e: + logger.debug(f"Failed to resolve identifier '{identifier}': {e}") + except Exception as e: + logger.exception(f"Unexpected error resolving identifier '{identifier}': {e}") + + return None + + def _process_sstring_constructor(self, item): + """ + Convert an ss::sstring{identifier} constructor string to its resolved value when possible. + + If the input matches the ss::sstring{...} pattern, attempts to resolve the enclosed identifier and returns the resolved string. If resolution fails, returns the raw identifier. If the input does not match the pattern or is falsy, returns it unchanged. + + Parameters: + item (str): The constructor expression or string to process. + + Returns: + str: The resolved string when resolution succeeds, the extracted identifier if resolution fails, or the original input if it does not match. + """ + if not item: + return item + + match = re.match(self.SSTRING_CONSTRUCTOR_PATTERN, item) + if not match: + return item + + identifier = match.group(1) + resolved = self._resolve_identifier(identifier) + + if resolved: + logger.debug(f"Resolved ss::sstring{{{identifier}}} -> '{resolved}'") + return resolved + + # Log warning but continue with original identifier + logger.warning(f"Could not resolve identifier '{identifier}' in ss::sstring constructor") + return identifier + + def _parse_vector_contents(self, contents): + """ + Parse a comma-separated std::vector initializer string into a list of cleaned, processed items. + + Parameters: + contents (str): The inner contents of a vector initializer (e.g. '\"a\", ss::sstring{ID}, \"b\"'); may be empty or None. + + Returns: + list: Ordered list of processed, unquoted items with empty entries omitted. + """ + if not contents: + return [] + + # Split by comma and process each item + raw_items = [contents] if ',' not in contents else contents.split(',') + + processed_items = [] + for item in raw_items: + item = item.strip(' "\'') + if item: # Skip empty items + processed_item = self._process_sstring_constructor(item) + processed_items.append(processed_item) + + return processed_items + def parse(self, property, info, file_pair): + """ + Convert a C++ default expression into a JSON-friendly value and store it on the property under the "default" key. + + Parameters: + property (dict): Property dictionary to modify; updated in place with a "default" entry. + info (dict): Parsed property information; the default expression is expected at info["params"][3]["value"]. + file_pair: File pair context (ignored by this function). + + Returns: + dict: The modified property dictionary with a normalized "default" value. + """ default = info["params"][3]["value"] + + # Handle null/empty defaults + if not default: + return property - # Transform std::nullopt into None. + # Transform std::nullopt into None if "std::nullopt" in default: property["default"] = None return property - # Transform std::numeric_limits expressions. + # Transform std::numeric_limits expressions if "std::numeric_limits" in default: property["default"] = "Maximum value" return property - # Transform std::chrono durations. + # Transform std::chrono durations if "std::chrono" in default: - m = re.search(r"std::chrono::(\w+)\(([^)]+)\)", default) - if m: - unit = m.group(1) - value = m.group(2).strip() + match = re.search(self.CHRONO_PATTERN, default) + if match: + unit = match.group(1) + value = match.group(2).strip() property["default"] = f"{value} {unit}" return property - # Transform std::vector defaults. + # Transform std::vector defaults if self.ARRAY_PATTERN_STD_VECTOR in default: - m = re.search(r'\{([^}]+)\}', default) - if m: - contents = m.group(1).strip() - items = [item.strip(' "\'') for item in contents.split(',')] + vector_match = re.search(self.VECTOR_INITIALIZER_PATTERN, default) + if vector_match: + contents = vector_match.group(1).strip() + items = self._parse_vector_contents(contents) property["default"] = items return property - # Otherwise, leave the default as-is. + # For all other cases, leave the default as-is property["default"] = default return property @@ -566,4 +770,4 @@ def parse(self, property, info, file_pair): key, value = item.split('=') meta_dict[key.strip().replace('.', '')] = value.strip() meta_dict['type'] = 'initializer_list' # Enforce required type - param['value'] = meta_dict + param['value'] = meta_dict \ No newline at end of file