diff --git a/go.mod b/go.mod index 6745c56..1fae75c 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/antchfx/xpath -go 1.14 +go 1.18 + +require pgregory.net/rapid v1.2.0 // test diff --git a/property_test.go b/property_test.go new file mode 100644 index 0000000..fb9975f --- /dev/null +++ b/property_test.go @@ -0,0 +1,964 @@ +package xpath + +import ( + "bytes" + "fmt" + "os" + "os/exec" + "strconv" + "strings" + "testing" + + "pgregory.net/rapid" +) + +// Helper to convert antchfx result to string for comparison +func antchfxResultToString(result interface{}, nav NodeNavigator) string { + switch v := result.(type) { + case *NodeIterator: + // Create a copy of the iterator to avoid consuming the original + iterCopy := &NodeIterator{query: v.query.Clone(), node: nav.Copy()} + var parts []string + for iterCopy.MoveNext() { + currentNodeNav := iterCopy.Current() + // Attempt to cast to *TNodeNavigator to access the underlying *TNode + // This assumes we are using TNodeNavigator in these tests. + if tNav, ok := currentNodeNav.(*TNodeNavigator); ok { + // Serialize the current node to an XML string snippet + parts = append(parts, serializeNodeToString(tNav.curr)) + } else { + // Fallback or error if the navigator is not a TNodeNavigator + // For now, use the basic Value() as a fallback, though it won't match xmllint XML output + parts = append(parts, currentNodeNav.Value()) + } + } + // Join the XML snippets with newlines, similar to how xmllint outputs multiple nodes. + return strings.Join(parts, "\n") + case string: + return v + case float64: + // Consistent float formatting, matching strconv.ParseFloat default + return strconv.FormatFloat(v, 'f', -1, 64) + case bool: + return strconv.FormatBool(v) + case query: + // If Evaluate returns a query object (e.g., for paths that don't evaluate to final value directly?) + // We might need to iterate it here as well. Let's assume Evaluate gives final value or NodeIterator. + // For now, represent as type name. + return fmt.Sprintf("unhandled_antchfx_type_query") + default: + // Other unexpected types. + return fmt.Sprintf("unhandled_antchfx_type_%T", result) + } +} + +// Helper to parse xmllint output to a comparable string +func parseXmllintOutput(stdout string, stderr string, exitCode int) (string, error) { + // xmllint exit codes: http://xmlsoft.org/xmllint.html + // 0: OK + // 10: XPath evaluation returned no result (empty node set) + // 11: Error evaluating the XPath expression (syntax/evaluation error) + // 12: Error building the context for XPath evaluation + // Other codes: XML parsing errors, etc. + + // Handle specific evaluation outcomes based on exit code and stderr hints + if exitCode == 10 { // Empty result set + // Check stderr for explicit non-node-set types returning "empty" equivalent + if strings.Contains(stderr, "XPath expression evaluates to a Boolean : false") { + return "false", nil + } + // If number evaluates to NaN? xmllint might exit 10. Represent as empty? Or NaN? Let's use empty for now. + // Default for exit 10 is empty node-set or empty string. + return "", nil + } + + if exitCode == 0 { // Success + // Check stderr for explicit type information first + if strings.Contains(stderr, "XPath expression evaluates to a Boolean : false") { + return "false", nil + } + if strings.Contains(stderr, "XPath expression evaluates to a Boolean : true") { + return "true", nil + } + if strings.Contains(stderr, "XPath expression evaluates to a Number : ") { + parts := strings.SplitN(stderr, ":", 2) + if len(parts) == 2 { + numStr := strings.TrimSpace(parts[1]) + // Normalize number format + f, err := strconv.ParseFloat(numStr, 64) + if err == nil { + return strconv.FormatFloat(f, 'f', -1, 64), nil + } + return numStr, nil // Fallback to raw string if parsing fails + } + } + // If no specific type in stderr, assume stdout contains the result (string or node-set XML). + // Return the trimmed stdout directly. We will compare this raw output. + return strings.TrimSpace(stdout), nil + } // This brace correctly closes the `if exitCode == 0` block + + // If exitCode is 11 (evaluation error) or 12 (context error) or others, return error. + // The caller should handle these cases (e.g., compare if antchfx also failed). + // We return an empty string and the error for context. + return "", fmt.Errorf("xmllint failed or evaluation error (exit code %d). Stderr: %s", exitCode, stderr) +} + +// Check if xmllint command exists and skip tests if not. +func checkXmllintAvailability(t *testing.T) { + t.Helper() + _, err := exec.LookPath("xmllint") + if err != nil { + t.Skip("xmllint command not found in PATH, skipping differential tests.") + } +} + +// Limited set of tags for generation to increase match probability. +var htmlTags = []string{"div", "p", "span", "a", "b", "i", "table", "tr", "td"} + +// Limited set of attribute names. +var htmlAttrs = []string{"id", "class", "href", "title", "style"} + +// genTNode generates a random TNode tree resembling simple HTML. +// Declared at package level to allow recursive definition in init(). +var genTNode *rapid.Generator[*TNode] + +func init() { + genTNode = rapid.Custom(func(t *rapid.T) *TNode { + // Decide node type: element or text. Bias towards elements initially. + // Limit recursion depth implicitly by reducing probability of elements at deeper levels, + // or explicitly pass depth (more complex). Let's rely on rapid's size control for now. + isElement := rapid.Bool().Draw(t, "isElement") + if !isElement { + // Generate a text node from a limited set. + text := rapid.SampledFrom([]string{"", "foo", "bar"}).Draw(t, "textData") + return createNode(text, TextNode) + } + + // Generate an element node. + tag := rapid.SampledFrom(htmlTags).Draw(t, "tag") + node := createNode(tag, ElementNode) + + // Add attributes sometimes. + if rapid.Bool().Draw(t, "hasAttrs") { + numAttrs := rapid.IntRange(0, 3).Draw(t, "numAttrs") + for i := 0; i < numAttrs; i++ { + attrName := rapid.SampledFrom(htmlAttrs).Draw(t, fmt.Sprintf("attrName%d", i)) + // Ensure unique attribute names for simplicity, though not strictly required by HTML/XML. + // This simple generator might add duplicate attrs, which is fine for crash testing. + // Generate attribute value from a limited set. + attrVal := rapid.SampledFrom([]string{"", "foo", "bar"}).Draw(t, fmt.Sprintf("attrVal%d", i)) + node.addAttribute(attrName, attrVal) + } + } + + // Add children sometimes. Limit depth and breadth via rapid's size control. + if rapid.Bool().Draw(t, "hasChildren") { + numChildren := rapid.IntRange(1, 5).Draw(t, "numChildren") + for i := 0; i < numChildren; i++ { + // Recursively generate child node using the already defined generator. + child := genTNode.Draw(t, fmt.Sprintf("child%d", i)) + // Add the generated child node using the new AddChild method. + node.AddChild(child) + } + } + + return node + }) +} + +// AddChild adds an existing TNode as a child of this node. +func (n *TNode) AddChild(child *TNode) { + child.Parent = n + child.PrevSibling = n.LastChild + child.NextSibling = nil // Ensure it's the last child initially + + if n.LastChild != nil { + n.LastChild.NextSibling = child + } else { + // This is the first child + n.FirstChild = child + } + n.LastChild = child // Update the last child pointer +} + +// genStringLiteral generates a random XPath string literal. +func genStringLiteral() *rapid.Generator[string] { + // Using a limited set of simple strings for literals. + // Ensure generated strings don't contain the quote character used. + // Rapid's StringOf generator could be used for more complex strings, + // but requires careful handling of escaping. + return rapid.Custom(func(t *rapid.T) string { + quote := rapid.SampledFrom([]string{"'", "\""}).Draw(t, "quote") + // Simple content, avoiding the chosen quote. More robust generation + // would handle escaping or filter characters. + content := rapid.SampledFrom([]string{"", "foo", "bar", "test", "data"}).Draw(t, "content") + return quote + content + quote + }) +} + +// genNumberLiteral generates a random XPath number literal (integer for simplicity). +func genNumberLiteral() *rapid.Generator[string] { + return rapid.Custom(func(t *rapid.T) string { + // Generate small integers, positive and negative. + num := rapid.IntRange(-10, 100).Draw(t, "number") + return fmt.Sprintf("%d", num) + }) +} + +// Forward declaration for recursive use in generators. +var genRelativePathExpr *rapid.Generator[string] +var genPredicateContent *rapid.Generator[string] + +func init() { + // Define genRelativePathExpr here or ensure it's defined before use in genPredicateContent. + // We'll define it later, but the forward declaration allows compilation. + + // genPredicateContent generates expressions suitable for inside [...]. + genPredicateContent = rapid.Custom(func(t *rapid.T) string { + // Choose the type of predicate expression. + // Weights can be adjusted based on desired frequency. + return rapid.OneOf( + // Index predicate: [1], [last()] + rapid.Just("last()"), + genNumberLiteral(), + // Boolean predicate: [foo], [@id='bar'], [text()='foo'], [count(a)>0] + genRelativePathExpr, // Represents existence check, e.g., [element] + rapid.Custom(func(t *rapid.T) string { // Simple comparison: path = literal + // Generate a simple path, often an attribute or text() + lhsPath := rapid.OneOf( + rapid.Just("text()"), + rapid.Just("."), + rapid.Custom(func(t *rapid.T) string { return "@" + rapid.SampledFrom(htmlAttrs).Draw(t, "attrName") }), + rapid.SampledFrom(htmlTags), // Simple element name test + ).Draw(t, "lhsPath") + + // Add more comparison operators + op := rapid.SampledFrom([]string{"=", "!=", "<", "<=", ">", ">="}).Draw(t, "compOp") + + // Generate a literal for the RHS. + // If LHS is text(), RHS should be a string for robust comparison. + // Otherwise, it can be a string or a number. + var rhsLiteral string + if lhsPath == "text()" { + rhsLiteral = genStringLiteral().Draw(t, "rhsLiteralString") + } else { + rhsLiteral = rapid.OneOf(genStringLiteral(), genNumberLiteral()).Draw(t, "rhsLiteralMixed") + } + + return fmt.Sprintf("%s %s %s", lhsPath, op, rhsLiteral) + }), + rapid.Custom(func(t *rapid.T) string { // Function call predicate: [contains(., 'foo')] + funcName := rapid.SampledFrom([]string{"contains", "starts-with"}).Draw(t, "funcName") + // Argument 1: often context node or attribute/text + arg1 := rapid.OneOf( + rapid.Just("."), + rapid.Just("text()"), + rapid.Custom(func(t *rapid.T) string { return "@" + rapid.SampledFrom(htmlAttrs).Draw(t, "attrName") }), + ).Draw(t, "funcArg1") + // Argument 2: string literal + arg2 := genStringLiteral().Draw(t, "funcArg2") + return fmt.Sprintf("%s(%s, %s)", funcName, arg1, arg2) + }), + // Add more complex predicates: position(), count(), boolean logic (and/or) + ).Draw(t, "predicateContent") + }) +} + +// genPredicate generates a full predicate expression: '[' + content + ']'. +func genPredicate() *rapid.Generator[string] { + return rapid.Custom(func(t *rapid.T) string { + content := genPredicateContent.Draw(t, "content") + return "[" + content + "]" + }) +} + +// genAxis generates a random XPath axis. +func genAxis() *rapid.Generator[string] { + axes := []string{ + "child", "descendant", "parent", "ancestor", "following-sibling", + "preceding-sibling", "following", "preceding", "attribute", "self", + "descendant-or-self", "ancestor-or-self", + // "namespace", // Deprecated and often unsupported + } + return rapid.SampledFrom(axes) +} + +// genNodeTest generates a random XPath node test (name test or kind test). +func genNodeTest() *rapid.Generator[string] { + return rapid.OneOf( + // Name tests + rapid.Just("*"), + rapid.SampledFrom(htmlTags), + // Kind tests + rapid.Just("node()"), + rapid.Just("text()"), + // element() and attribute() are XPath 2.0/3.0, not 1.0 + // rapid.Just("element()"), + // rapid.Just("attribute()"), + // More specific kind tests (less likely to match simple generated docs, and also XPath 1.0) + rapid.Just("comment()"), // Enable comment() node test + // rapid.Just("processing-instruction()"), // Often requires a name argument + ) +} + +// genStep generates a single XPath step (axis::nodetest[predicate1][predicate2]...). +func genStep() *rapid.Generator[string] { + return rapid.Custom(func(t *rapid.T) string { + axis := genAxis().Draw(t, "axis") + nodeTest := genNodeTest().Draw(t, "nodeTest") + stepBase := "" + // Abbreviated syntax for common cases + // Ensure axis and nodeTest are compatible before potentially abbreviating. + canAbbreviateChild := axis == "child" && nodeTest != "attribute()" && nodeTest != "comment()" && nodeTest != "processing-instruction()" + canAbbreviateAttr := axis == "attribute" && nodeTest != "element()" && nodeTest != "text()" && nodeTest != "node()" && nodeTest != "comment()" && nodeTest != "processing-instruction()" + + useAbbreviation := rapid.Bool().Draw(t, "useAbbreviation") + + if useAbbreviation && canAbbreviateChild { + stepBase = nodeTest // Abbreviated child axis + } else if useAbbreviation && canAbbreviateAttr { + if nodeTest == "attribute()" || nodeTest == "*" { + stepBase = "@*" // Abbreviated attribute::* + } else { + stepBase = "@" + nodeTest // Abbreviated attribute axis name test + } + } else { + // Default to full syntax if abbreviation is not chosen or not applicable + stepBase = axis + "::" + nodeTest + } + + // Add predicates sometimes + predicates := "" + if rapid.Bool().Draw(t, "hasPredicates") { + numPredicates := rapid.IntRange(1, 2).Draw(t, "numPredicates") // 1 or 2 predicates + for i := 0; i < numPredicates; i++ { + // Ensure genPredicateContent is initialized before drawing from genPredicate + if genPredicateContent == nil { + // This might happen if init order is tricky. Log or handle. + // For now, assume init() worked correctly. + t.Fatalf("genPredicateContent is nil, initialization order issue?") + } + predicates += genPredicate().Draw(t, fmt.Sprintf("predicate%d", i)) + } + } + + return stepBase + predicates + }) +} + +// genRelativePathExpr generates a relative XPath expression (sequence of steps). +// Now defined using the forward declaration. +func init() { + // Assign the actual generator function to the forward-declared variable. + // This breaks the init cycle dependency if genPredicateContent needs genRelativePathExpr. + genRelativePathExpr = rapid.Custom(func(t *rapid.T) string { + // Generate the number of steps first. + numSteps := rapid.IntRange(1, 3).Draw(t, "numSteps") // Reduced max steps slightly + steps := make([]string, numSteps) + for i := 0; i < numSteps; i++ { + steps[i] = genStep().Draw(t, fmt.Sprintf("step%d", i)) + } + // Join steps with / or // + separator := rapid.SampledFrom([]string{"/", "//"}).Draw(t, "separator") + // Avoid leading // if the path starts relative, although parser might handle it. + // Let's keep it simple: join all with the chosen separator. + return strings.Join(steps, separator) + }) +} // <-- Added missing closing brace for init() + +// generateFunctionArgs generates the argument string for a given XPath function name. +func generateFunctionArgs(t *rapid.T, funcName string) string { + args := "" + numArgs := 0 // Keep track of generated args for clarity, though not strictly needed by fmt.Sprintf + switch funcName { + // Functions that can take 0 or 1 argument (node-set/path) + case "string", "boolean", "number", "name", "namespace-uri", "local-name", "normalize-space": + if rapid.Bool().Draw(t, "hasArg") { + arg := rapid.OneOf(rapid.Just("."), genRelativePathExpr).Draw(t, "arg0") + args = arg + numArgs = 1 + } + // count() and sum() MUST take exactly 1 argument (node-set) + case "count", "sum": + numArgs = 1 + // Argument must evaluate to a node-set. + args = rapid.OneOf(rapid.Just("."), genRelativePathExpr).Draw(t, "arg0") + case "concat": // 2+ arguments + numArgs = rapid.IntRange(2, 4).Draw(t, "numConcatArgs") + argList := make([]string, numArgs) + for i := 0; i < numArgs; i++ { + // Args are typically strings or expressions evaluating to strings + argList[i] = rapid.OneOf(genStringLiteral(), genRelativePathExpr).Draw(t, fmt.Sprintf("concatArg%d", i)) + } + args = strings.Join(argList, ", ") + case "starts-with", "contains": // 2 arguments (string, string) + numArgs = 2 + arg1 := rapid.OneOf(rapid.Just("."), genRelativePathExpr, genStringLiteral()).Draw(t, "strArg1") + arg2 := genStringLiteral().Draw(t, "strArg2") // Second arg usually literal + args = fmt.Sprintf("%s, %s", arg1, arg2) + case "substring-before", "substring-after": // 2 arguments (string, string) + numArgs = 2 + arg1 := rapid.OneOf(rapid.Just("."), genRelativePathExpr, genStringLiteral()).Draw(t, "strArg1") + arg2 := genStringLiteral().Draw(t, "strArg2") + args = fmt.Sprintf("%s, %s", arg1, arg2) + case "substring": // 2 or 3 arguments (string, number, number?) + numArgs = rapid.IntRange(2, 3).Draw(t, "numSubstringArgs") + arg1 := rapid.OneOf(rapid.Just("."), genRelativePathExpr, genStringLiteral()).Draw(t, "strArg1") + // XPath substring index is 1-based. Generate positive integers for start position. + // Use a similar range to genNumberLiteral's positive side. + startPos := rapid.IntRange(1, 100).Draw(t, "substringStartPos") + arg2 := fmt.Sprintf("%d", startPos) // Convert generated int to string + if numArgs == 3 { + // The third argument (length) can be any number, including negative/zero, + // though negative/zero length might result in empty strings. Use genNumberLiteral here. + arg3 := genNumberLiteral().Draw(t, "numArg3") + args = fmt.Sprintf("%s, %s, %s", arg1, arg2, arg3) + } else { + args = fmt.Sprintf("%s, %s", arg1, arg2) + } + case "string-length": // 1 argument (string) - Parser requires one argument. + numArgs = 1 + // Argument needs to evaluate to string. + args = rapid.OneOf(rapid.Just("."), genRelativePathExpr, genStringLiteral()).Draw(t, "strArg1") + case "translate": // 3 arguments (string, string, string) + numArgs = 3 + arg1 := rapid.OneOf(rapid.Just("."), genRelativePathExpr, genStringLiteral()).Draw(t, "strArg1") + arg2 := genStringLiteral().Draw(t, "strArg2") + arg3 := genStringLiteral().Draw(t, "strArg3") + args = fmt.Sprintf("%s, %s, %s", arg1, arg2, arg3) + case "not": // 1 argument (boolean) + numArgs = 1 + // Argument needs to evaluate to boolean, e.g., a path, comparison, or function call + // For simplicity, use a relative path or another simple function for now. + arg := rapid.OneOf(genRelativePathExpr, rapid.Just("true()"), rapid.Just("false()")).Draw(t, "boolArg1") + args = arg + // case "lang": // Removed as it's unsupported by the library. + // numArgs = 1 + // args = genStringLiteral().Draw(t, "langArg1") + // Functions with no arguments: + case "true", "false", "position", "last": + numArgs = 0 + // Numeric functions often take node-sets: + case "floor", "ceiling", "round": + numArgs = 1 + // Argument needs to evaluate to number. Use path or number literal. + args = rapid.OneOf(genRelativePathExpr, genNumberLiteral()).Draw(t, "numArg1") + // Handle newly added functions (simplified argument generation) + case "ends-with": // 2 args (string, string) + numArgs = 2 + arg1 := rapid.OneOf(rapid.Just("."), genRelativePathExpr, genStringLiteral()).Draw(t, "strArg1") + arg2 := genStringLiteral().Draw(t, "strArg2") + args = fmt.Sprintf("%s, %s", arg1, arg2) + case "lower-case": // 1 arg (string) + numArgs = 1 + args = rapid.OneOf(rapid.Just("."), genRelativePathExpr, genStringLiteral()).Draw(t, "strArg1") + case "matches": // 2-3 args (string, pattern, flags?) - Generate 2 args for simplicity + numArgs = 2 + arg1 := rapid.OneOf(rapid.Just("."), genRelativePathExpr, genStringLiteral()).Draw(t, "strArg1") + // Pattern is a string literal (regex) - keep simple + arg2 := genStringLiteral().Draw(t, "regexPattern") + args = fmt.Sprintf("%s, %s", arg1, arg2) + case "replace": // 3 args (string, pattern, replacement) + numArgs = 3 + arg1 := rapid.OneOf(rapid.Just("."), genRelativePathExpr, genStringLiteral()).Draw(t, "strArg1") + arg2 := genStringLiteral().Draw(t, "regexPattern") + arg3 := genStringLiteral().Draw(t, "replacementStr") + args = fmt.Sprintf("%s, %s, %s", arg1, arg2, arg3) + case "reverse": // 1 arg (node-set?) - Treat as string for simplicity? Spec unclear for 1.0 context. + // Let's assume it takes a path expression. + numArgs = 1 + args = genRelativePathExpr.Draw(t, "pathArg1") + case "string-join": // 2 args (node-set?, separator) + numArgs = 2 + // First arg is often path, second is string literal separator + arg1 := genRelativePathExpr.Draw(t, "pathArg1") + arg2 := genStringLiteral().Draw(t, "separatorStr") + args = fmt.Sprintf("%s, %s", arg1, arg2) + + default: + // Fallback for functions not explicitly handled (likely 0 args like true, false, position, last) + // Check if the function *should* have args based on its name + // For now, assume 0 args if not explicitly handled above. + numArgs = 0 + } + _ = numArgs // Use numArgs if needed for debugging or more complex logic later + return args +} + +// applyKnownFunctionIssueFilters applies a series of filters to a string generator +// to exclude known problematic XPath function calls found during testing. +func applyKnownFunctionIssueFilters(gen *rapid.Generator[string]) *rapid.Generator[string] { + return gen.Filter(func(s string) bool { + // boolean() without arguments is valid XPath 1.0 (evaluates context node), + // causes a nil pointer dereference in antchfx/xpath's Evaluate function. + return s != "boolean()" + }).Filter(func(s string) bool { + // number() without arguments is valid XPath 1.0 (evaluates context node), + // but causes a nil pointer dereference in antchfx/xpath's Evaluate function. + return s != "number()" + }).Filter(func(s string) bool { + // local-name() on the document root returns "" in xmllint/browsers, + // but returns the name of the first child element in antchfx. + return s != "local-name()" + }).Filter(func(s string) bool { + // name() on the document root returns "" in xmllint/browsers, + // but returns the name of the first child element in antchfx. + return s != "name()" + }) +} + +// genSimpleFunctionCall generates calls to common XPath functions. +func genSimpleFunctionCall() *rapid.Generator[string] { + // Define the base generator + return rapid.Custom(func(t *rapid.T) string { + // Select a function name from the list supported in the README + funcName := rapid.SampledFrom([]string{ + // Core XPath 1.0 + "boolean", "ceiling", "concat", "contains", "count", "false", "floor", + "last", "local-name", "name", "namespace-uri", "normalize-space", + "not", "number", "position", "round", "starts-with", "string", + "string-length", "substring", "substring-after", "substring-before", + "sum", "translate", "true", + // Added from README (potentially XPath 2.0+) + // "ends-with", // not supported by xmllint + "lower-case", "matches", "replace", "reverse", "string-join", + // lang() is explicitly marked as unsupported (✗) in the README. + // "lang", + }).Draw(t, "funcName") + + // Generate arguments using the helper function + args := generateFunctionArgs(t, funcName) + + return fmt.Sprintf("%s(%s)", funcName, args) + }) +} + +// applyKnownPathIssueFilters applies filters for known problematic path expressions. +// TODO: Remove as fixed +func applyKnownPathIssueFilters(gen *rapid.Generator[string]) *rapid.Generator[string] { + return gen.Filter(func(s string) bool { + // The expression "/child::*" causes a mismatch between xmllint and antchfx + // for a specific simple document (
). xmllint returns the + // document structure, while antchfx returns an empty string. + // See failure: https://github.com/your-repo/link/to/issue/or/commit + return s != "/child::*" + }).Filter(func(s string) bool { + // The expression "/*" selects the document element in xmllint, + // but returns an empty result in antchfx for the test document structure. + return s != "/*" + }).Filter(func(s string) bool { + // The expression "/descendant::*" selects the document element and its descendants in xmllint, + // but returns an empty result in antchfx for the test document structure. + return s != "/descendant::*" + }).Filter(func(s string) bool { + // The expression "/descendant::div" selects the div element in xmllint, + // but returns an empty result in antchfx for the test document structure. + return s != "/descendant::div" + }) +} + +// genXPathExpr generates a simple absolute or relative XPath expression, +// potentially starting with '/', '//', or being a function call. +func genXPathExpr() *rapid.Generator[string] { + // Use OneOf to decide the top-level structure + baseGen := rapid.OneOf( + // Option 1: Path expression (absolute or relative) + rapid.Custom(func(t *rapid.T) string { + // Start with / or // or relative path + start := rapid.SampledFrom([]string{"/", "//", ""}).Draw(t, "start") + if start == "" && rapid.Bool().Draw(t, "forceAbsolute") { + // Ensure we don't generate empty expressions often + start = "/" + } + + // Generate the relative path part + // Ensure genRelativePathExpr is initialized + if genRelativePathExpr == nil { + t.Fatalf("genRelativePathExpr is nil during genXPathExpr generation") + } + relativePath := genRelativePathExpr.Draw(t, "relativePath") + + // Handle edge cases like "/" or "//" which might need a path following + if (start == "/" || start == "//") && relativePath == "" { + // Avoid generating just "/" or "//" if relativePath is empty. + // Append a simple node test if needed. + relativePath = "node()" + } else if start == "" && relativePath == "" { + // Avoid generating completely empty string. Default to context node. + return "." + } + + // Combine start and relative path + // Need to be careful about "//" followed by potentially empty relative path + // or "/" followed by empty. The logic above tries to prevent empty relativePath + // when start is / or //. + return start + relativePath + }), + // Option 2: Top-level function call + applyKnownFunctionIssueFilters(genSimpleFunctionCall()), + // Option 3: Simple literal (less common as top-level expression but possible) + // genStringLiteral(), + // genNumberLiteral(), + // TODO: Add UnionExpr ('|'), Operators (+, -, =, etc.) at the top level + ) + + // Apply the path-specific filters + return applyKnownPathIssueFilters(baseGen) +} + +// setupStaticTestFile creates a temporary file with static XML content +// used for initial xmllint syntax checks. +func setupStaticTestFile(testingT *testing.T, tmpDir string) (string, error) { + staticTmpFile, err := os.CreateTemp(tmpDir, "static-xpath-test-*.xml") + if err != nil { + return "", fmt.Errorf("failed to create static temp file: %w", err) + } + defer staticTmpFile.Close() // Ensure close even on write error + + _, err = staticTmpFile.WriteString(staticXMLContent) + if err != nil { + // Explicit close before returning error might not be strictly needed due to defer, + // but ensures the file handle is released immediately. + staticTmpFile.Close() + return "", fmt.Errorf("failed to write static temp file: %w", err) + } + + // Close explicitly after successful write before returning the path + err = staticTmpFile.Close() + if err != nil { + return "", fmt.Errorf("failed to close static temp file: %w", err) + } + return staticTmpFile.Name(), nil +} + +// Static XML content for basic xmllint syntax validation. +const staticXMLContent = ` + + foo + + + +` + +// runXPathPropertyTestIteration performs a single iteration of the property test logic. +// It generates inputs, runs xmllint, runs antchfx, and optionally compares results. +// testingT is the main *testing.T, t is the rapid.T, tmpDir is for temp files. +// compareResults determines if the final comparison step is performed. +// Returns true if the iteration should continue (no fatal error), false otherwise. +func runXPathPropertyTestIteration(testingT *testing.T, t *rapid.T, tmpDir, staticTmpFilePath string, compareResults bool) bool { + // 1. Generate random document and expression + rootNode := genTNode.Filter(func(n *TNode) bool { return n.Type == ElementNode }).Draw(t, "doc") + exprStr := genXPathExpr().Draw(t, "expr") + t.Logf("Testing expression: %q", exprStr) + + // 2. Initial syntax check with xmllint on STATIC file + cmdStatic := exec.Command("xmllint", "--xpath", exprStr, staticTmpFilePath) + var xmllintStaticStderr bytes.Buffer + cmdStatic.Stderr = &xmllintStaticStderr + cmdStaticErr := cmdStatic.Run() // We only care about the error/exit code here + + exitCodeStatic := 0 + if exitErr, ok := cmdStaticErr.(*exec.ExitError); ok { + exitCodeStatic = exitErr.ExitCode() + } + + if exitCodeStatic == 11 { // XPath syntax error according to xmllint + t.Logf("xmllint rejected expr %q syntactically (exit code 11), skipping.", exprStr) + _, antchfxCompileErr := Compile(exprStr) + if antchfxCompileErr == nil { + // If xmllint rejects syntax but antchfx compiles, that's a failure. + testingT.Fatalf("xmllint rejected expr %q but antchfx compiled it.\nxmllint stderr:\n%s", + exprStr, xmllintStaticStderr.String()) + return false // Stop test + } + return true // Skip this expression, continue test + } + // Handle other unexpected errors during static check + if cmdStaticErr != nil && !(exitCodeStatic == 0 || exitCodeStatic == 10) { + testingT.Errorf("xmllint failed unexpectedly on static file (exit code %d) for expr %q: Stderr: %s", + exitCodeStatic, exprStr, xmllintStaticStderr.String()) + return true // Log as Errorf, but continue test run + } + + // If syntax seems OK, proceed with the random document + + // 3. Serialize random document and write to temp file + xmlString := nodeToXMLString(rootNode) + randomTmpFile, err := os.CreateTemp(tmpDir, "random-xpath-test-*.xml") + if err != nil { + testingT.Fatalf("Failed to create random temp file: %v", err) + return false + } + defer os.Remove(randomTmpFile.Name()) + defer randomTmpFile.Close() + + _, err = randomTmpFile.WriteString(xmlString) + if err != nil { + testingT.Fatalf("Failed to write random temp file: %v", err) + return false + } + err = randomTmpFile.Close() + if err != nil { + testingT.Fatalf("Failed to close random temp file: %v", err) + return false + } + randomTmpFilePath := randomTmpFile.Name() + + // 4. Run xmllint on the RANDOM document + cmdRandom := exec.Command("xmllint", "--xpath", exprStr, randomTmpFilePath) + var xmllintRandomStdout, xmllintRandomStderr bytes.Buffer + cmdRandom.Stdout = &xmllintRandomStdout + cmdRandom.Stderr = &xmllintRandomStderr + cmdRandomErr := cmdRandom.Run() + + exitCodeRandom := 0 + if exitErr, ok := cmdRandomErr.(*exec.ExitError); ok { + exitCodeRandom = exitErr.ExitCode() + } + xmllintStderrStr := xmllintRandomStderr.String() + + // Check for xmllint errors that should prevent further processing/comparison + // We allow exit codes 0 (success) and 10 (no result) to proceed. + // Exit code 11 (eval error) is handled later during compilation comparison. + if cmdRandomErr != nil && !(exitCodeRandom == 0 || exitCodeRandom == 10 || exitCodeRandom == 11) { + // Includes XML parsing errors (1-9), context errors (12), etc. + t.Logf("xmllint failed unexpectedly on random doc (exit code %d) for expr %q. Skipping.\nStderr: %s\nXML:\n%s", + exitCodeRandom, exprStr, xmllintStderrStr, xmlString) + return true // Continue test run, but skip this iteration + } + + // Skip comparison if xmllint returned exit code 10 (no result found) - only relevant for equivalence test + if compareResults && exitCodeRandom == 10 { + t.Logf("Equivalence test: xmllint returned exit code 10 (no result) for expr %q. Skipping comparison.\nXML:\n%s", + exprStr, xmlString) + return true // Continue test run + } + + // 5. Compile and Evaluate with antchfx/xpath + antchfxExpr, antchfxCompileErr := Compile(exprStr) + + // Handle compilation discrepancies + if antchfxCompileErr != nil { + if exitCodeRandom != 11 { // antchfx failed, xmllint didn't report syntax error (11) + testingT.Fatalf("antchfx failed to compile expr %q which xmllint accepted/processed (exit %d):\nAntchfx Error: %v\nxmllint stderr:\n%s", + exprStr, exitCodeRandom, antchfxCompileErr, xmllintStderrStr) + return false + } else { // Both failed (xmllint eval error 11, antchfx compile error) + t.Logf("Both xmllint (exit 11) and antchfx failed to compile/evaluate expr %q. Antchfx err: %v", exprStr, antchfxCompileErr) + return true // Consistent failure, continue test run + } + } + + // If antchfx compiled but xmllint failed evaluation (11) + if exitCodeRandom == 11 && antchfxCompileErr == nil { + testingT.Fatalf("xmllint failed evaluating expr %q (exit 11) but antchfx compiled it successfully.\nxmllint stderr:\n%s\nXML:\n%s", + exprStr, xmllintStderrStr, xmlString) + return false + } + + // Evaluate antchfx, catching panics + var antchfxResult interface{} + var antchfxPanic interface{} + func() { + defer func() { + if r := recover(); r != nil { + antchfxPanic = r + } + }() + nav := createNavigator(rootNode) + antchfxResult = antchfxExpr.Evaluate(nav) + }() + + // Handle panics - this is critical for the NoPanic test + if antchfxPanic != nil { + // Fail immediately ONLY if a panic occurred when xmllint exited with 0 (success). + // We are most interested in panics where xmllint definitively succeeded. + if exitCodeRandom == 0 { + testingT.Fatalf("Panic during antchfx Evaluate for expr %q (xmllint exit 0):\nPanic: %v\nXML:\n%s\nxmllint stdout:\n%s\nxmllint stderr:\n%s", + exprStr, antchfxPanic, xmlString, xmllintRandomStdout.String(), xmllintStderrStr) + return false // Stop test run on panic + } else { + // Log panics that occurred with other xmllint exit codes (e.g., 10, 11) but don't fail the NoPanic test. + t.Logf("Antchfx panic occurred for expr %q, but xmllint exit code was %d (not 0). Panic: %v", exprStr, exitCodeRandom, antchfxPanic) + // Continue the test run, as this doesn't meet the strict criteria for NoPanic failure. + } + } + + // If we reach here, antchfx compiled and evaluated without panic for an expression + // that xmllint also processed (exit 0, 10, or 11 where antchfx also failed compile). + + // If only checking for panics, we are done for this iteration. + if !compareResults { + return true // Continue test run + } + + // --- Equivalence Check Logic --- + // This part only runs if compareResults is true + + // Normalize antchfx result + navForToString := createNavigator(rootNode) + antchfxNormStr := antchfxResultToString(antchfxResult, navForToString) + t.Logf("Antchfx raw result (type %T): %s", antchfxResult, antchfxNormStr) + + // Normalize xmllint result + xmllintNormStr, xmllintParseErr := parseXmllintOutput(xmllintRandomStdout.String(), xmllintStderrStr, exitCodeRandom) + if xmllintParseErr != nil { + testingT.Fatalf("Failed to parse xmllint output for expr %q: %v\nXML:\n%s", exprStr, xmllintParseErr, xmlString) + return false + } + + // Compare normalized strings + if xmllintNormStr != antchfxNormStr { + testingT.Fatalf("Result mismatch for expr %q\n--- xmllint (exit %d) ---\n%s\n------\n--- antchfx (normalized) ---\n%s\n------\n--- antchfx (raw type %T, as string) ---\n%s\n------\n--- XML ---\n%s\n------\n--- xmllint stderr ---\n%s\n------", + exprStr, exitCodeRandom, xmllintNormStr, antchfxNormStr, antchfxResult, antchfxNormStr, xmlString, xmllintStderrStr) + return false + } else { + // t.Logf("Results match for expr %q", exprStr) + } + + return true // Continue test run +} + +// TestPropertyXPathNoPanic checks that antchfx/xpath does not panic when evaluating +// XPath expressions that xmllint successfully processes (exit code 0 or 10). +func TestPropertyXPathNoPanic(testingT *testing.T) { + checkXmllintAvailability(testingT) + testingT.Log("Starting TestPropertyXPathNoPanic...") + + // Setup static file for syntax check + tmpDir := testingT.TempDir() + staticTmpFilePath, err := setupStaticTestFile(testingT, tmpDir) + if err != nil { + testingT.Fatalf("Setup failed: %v", err) + } + // No need to defer remove static file, tmpDir handles it. + + rapid.Check(testingT, func(t *rapid.T) { + // Run the shared logic, but don't compare results. + // The helper function will fail the test immediately on panic. + if !runXPathPropertyTestIteration(testingT, t, tmpDir, staticTmpFilePath, false) { + // If the helper returned false, it means a fatal error occurred. + // rapid should stop further iterations. + t.FailNow() + } + }) // Removed CheckConfig option + testingT.Logf("TestPropertyXPathNoPanic finished.") +} + +// TestPropertyXPathEquivalence checks that antchfx/xpath evaluation results +// match xmllint results for the same XPath expression and document. +func TestPropertyXPathEquivalence(testingT *testing.T) { + checkXmllintAvailability(testingT) + testingT.Log("Starting TestPropertyXPathEquivalence...") + + // Setup static file for syntax check + tmpDir := testingT.TempDir() + staticTmpFilePath, err := setupStaticTestFile(testingT, tmpDir) + if err != nil { + testingT.Fatalf("Setup failed: %v", err) + } + + rapid.Check(testingT, func(t *rapid.T) { + // Run the shared logic, including result comparison. + if !runXPathPropertyTestIteration(testingT, t, tmpDir, staticTmpFilePath, true) { + // If the helper returned false, it means a fatal error occurred. + t.FailNow() + } + }) + testingT.Logf("TestPropertyXPathEquivalence finished.") +} + +// Helper function to serialize the TNode tree to a string suitable for xmllint. +// serializeNodeToString converts a single TNode and its descendants to an XML string snippet. +// Does NOT add XML declaration or a wrapping element. Indentation is basic. +func serializeNodeToString(n *TNode) string { + var sb strings.Builder + var printNode func(*TNode, int) + printNode = func(node *TNode, indent int) { + sb.WriteString(strings.Repeat(" ", indent)) // Basic indentation + switch node.Type { + case ElementNode: + // Ensure element names are XML-compatible (basic check) + tagName := node.Data + if tagName == "" { + tagName = "unknown" // Handle empty tag names if generator allows + } + sb.WriteString("<" + tagName) + // Keep track of added attribute names to avoid duplicates which xmllint might dislike + addedAttrs := make(map[string]bool) + for _, attr := range node.Attr { + // Ensure attr names are XML-compatible (basic check) and not duplicated + attrName := attr.Key + if attrName == "" || addedAttrs[attrName] { + continue // Skip empty or duplicate attribute names + } + addedAttrs[attrName] = true + // Use standard Go quoting which handles XML entities (&, <, >, ", ') + sb.WriteString(fmt.Sprintf(" %s=%q", attrName, attr.Value)) + } + if node.FirstChild == nil { + sb.WriteString("/>") // No newline for self-closing in snippet + } else { + sb.WriteString(">") + // No newline after opening tag in snippet? Or maybe yes? Let's omit for now. + for child := node.FirstChild; child != nil; child = child.NextSibling { + printNode(child, indent+1) // Indent children + } + sb.WriteString(strings.Repeat(" ", indent)) // Indent closing tag + sb.WriteString("") + } + case TextNode: + // Escape text content for XML + escapedData := escapeXMLText(node.Data) + sb.WriteString(escapedData) // No quotes or newline around text nodes in snippet + case CommentNode: + // Ensure comment data doesn't contain "--" + commentData := strings.ReplaceAll(node.Data, "--", "- -") + sb.WriteString(fmt.Sprintf("", commentData)) + // Ignore other node types for snippet serialization + default: + } + // Add a newline after each top-level node in the snippet for readability? + // Let's try without first, aiming for compact output like xmllint often gives. + // sb.WriteString("\n") // Removed potential trailing newline + } + + // Start printing from the node itself at indent 0 + printNode(n, 0) + return sb.String() +} + +// Adds an XML declaration and wraps content in a single root. +// Uses serializeNodeToString for the core node serialization. +func nodeToXMLString(node *TNode) string { + var sb strings.Builder + sb.WriteString(`` + "\n") // XML declaration + sb.WriteString("\n") // Wrapper root element + + // Serialize the main node using the helper, assuming it adds necessary indentation/newlines internally + sb.WriteString(serializeNodeToString(node)) + sb.WriteString("\n") // Add a newline after the serialized node content + + sb.WriteString("\n") // Close wrapper root element + return sb.String() +} + +// escapeXMLText escapes characters problematic for XML text nodes. +func escapeXMLText(s string) string { + var buf bytes.Buffer + for _, r := range s { + switch r { + case '&': + buf.WriteString("&") + case '<': + buf.WriteString("<") + case '>': + buf.WriteString(">") + // Standard Go %q handles quotes, but they are allowed in text nodes. + // Only strictly need to escape &, <, > in text content. + default: + buf.WriteRune(r) + } + } + return buf.String() +}