Skip to content

Commit db584e2

Browse files
committed
fix(parsers): enable proper XPath array element access in JSON parser
The xpath_json parser previously failed to correctly parse JSON array elements using XPath expressions like //array[1]/field. This was because the underlying jsonquery library treats arrays as single container nodes with unnamed children, making it impossible to index into arrays directly. This change modifies the JSON document handler to: - Convert JSON to CBOR format internally - Use cborquery for parsing, which correctly handles array elements as named siblings that can be indexed with XPath expressions The fix enables users to access JSON array elements using intuitive XPath syntax like //nvme_namespaces[1]/capacity/bytes instead of requiring the workaround //*[local-name()='nvme_namespaces']/*[1]/capacity/bytes. Note: This is a breaking change for some XPath query patterns: - Array selection: Use //array instead of //array/* to select elements - String representation of objects/arrays differs from JSON format Fixes #18145 Signed-off-by: majiayu000 <1835304752@qq.com>
1 parent 67ee5c0 commit db584e2

File tree

14 files changed

+135
-49
lines changed

14 files changed

+135
-49
lines changed
Lines changed: 46 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,38 @@
11
package xpath
22

33
import (
4-
"reflect"
4+
"bytes"
5+
"encoding/json"
6+
"fmt"
57
"strconv"
6-
"strings"
78

8-
"github.com/antchfx/jsonquery"
99
path "github.com/antchfx/xpath"
10+
"github.com/fxamacker/cbor/v2"
11+
"github.com/srebhan/cborquery"
1012
)
1113

1214
type jsonDocument struct{}
1315

1416
func (*jsonDocument) Parse(buf []byte) (dataNode, error) {
15-
return jsonquery.Parse(strings.NewReader(string(buf)))
17+
// First parse JSON to an interface{}
18+
var data interface{}
19+
if err := json.Unmarshal(buf, &data); err != nil {
20+
return nil, fmt.Errorf("failed to parse JSON: %w", err)
21+
}
22+
23+
// Convert to CBOR to leverage cborquery's correct array handling
24+
cborData, err := cbor.Marshal(data)
25+
if err != nil {
26+
return nil, fmt.Errorf("failed to convert JSON to CBOR: %w", err)
27+
}
28+
29+
// Parse with cborquery which handles arrays correctly
30+
return cborquery.Parse(bytes.NewReader(cborData))
1631
}
1732

1833
func (*jsonDocument) QueryAll(node dataNode, expr string) ([]dataNode, error) {
1934
// If this panics it's a programming error as we changed the document type while processing
20-
native, err := jsonquery.QueryAll(node.(*jsonquery.Node), expr)
35+
native, err := cborquery.QueryAll(node.(*cborquery.Node), expr)
2136
if err != nil {
2237
return nil, err
2338
}
@@ -31,15 +46,15 @@ func (*jsonDocument) QueryAll(node dataNode, expr string) ([]dataNode, error) {
3146

3247
func (*jsonDocument) CreateXPathNavigator(node dataNode) path.NodeNavigator {
3348
// If this panics it's a programming error as we changed the document type while processing
34-
return jsonquery.CreateXPathNavigator(node.(*jsonquery.Node))
49+
return cborquery.CreateXPathNavigator(node.(*cborquery.Node))
3550
}
3651

3752
func (d *jsonDocument) GetNodePath(node, relativeTo dataNode, sep string) string {
3853
names := make([]string, 0)
3954

4055
// If these panic it's a programming error as we changed the document type while processing
41-
nativeNode := node.(*jsonquery.Node)
42-
nativeRelativeTo := relativeTo.(*jsonquery.Node)
56+
nativeNode := node.(*cborquery.Node)
57+
nativeRelativeTo := relativeTo.(*cborquery.Node)
4358

4459
// Climb up the tree and collect the node names
4560
n := nativeNode.Parent
@@ -64,40 +79,41 @@ func (d *jsonDocument) GetNodePath(node, relativeTo dataNode, sep string) string
6479

6580
func (d *jsonDocument) GetNodeName(node dataNode, sep string, withParent bool) string {
6681
// If this panics it's a programming error as we changed the document type while processing
67-
nativeNode := node.(*jsonquery.Node)
82+
nativeNode := node.(*cborquery.Node)
6883

69-
name := nativeNode.Data
84+
name := nativeNode.Name
7085

71-
// Check if the node is part of an array. If so, determine the index and
72-
// concatenate the parent name and the index.
73-
kind := reflect.Invalid
74-
if nativeNode.Parent != nil && nativeNode.Parent.Value() != nil {
75-
kind = reflect.TypeOf(nativeNode.Parent.Value()).Kind()
76-
}
77-
78-
switch kind {
79-
case reflect.Slice, reflect.Array:
80-
// Determine the index for array elements
81-
if name == "" && nativeNode.Parent != nil && withParent {
82-
name = nativeNode.Parent.Data + sep
86+
// In cborquery, array elements appear as siblings with the same name.
87+
// Check if this node is part of an array by looking for siblings with the same name.
88+
if nativeNode.Parent != nil && name != "" {
89+
idx, count := d.siblingIndex(nativeNode)
90+
if count > 1 {
91+
// This is an array element, append the index
92+
return name + sep + strconv.Itoa(idx)
8393
}
84-
return name + d.index(nativeNode)
8594
}
8695

8796
return name
8897
}
8998

9099
func (*jsonDocument) OutputXML(node dataNode) string {
91-
native := node.(*jsonquery.Node)
100+
native := node.(*cborquery.Node)
92101
return native.OutputXML()
93102
}
94103

95-
func (*jsonDocument) index(node *jsonquery.Node) string {
96-
idx := 0
97-
98-
for n := node; n.PrevSibling != nil; n = n.PrevSibling {
99-
idx++
104+
func (*jsonDocument) siblingIndex(node *cborquery.Node) (idx, count int) {
105+
if node.Parent == nil {
106+
return 0, 1
100107
}
101108

102-
return strconv.Itoa(idx)
109+
// Count siblings with the same name and find our index among them
110+
for sibling := node.Parent.FirstChild; sibling != nil; sibling = sibling.NextSibling {
111+
if sibling.Name == node.Name {
112+
if sibling == node {
113+
idx = count
114+
}
115+
count++
116+
}
117+
}
118+
return idx, count
103119
}

plugins/parsers/xpath/msgpack_document.go

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,39 @@ package xpath
22

33
import (
44
"bytes"
5+
"encoding/json"
56
"fmt"
67

7-
"github.com/antchfx/jsonquery"
88
path "github.com/antchfx/xpath"
9+
"github.com/fxamacker/cbor/v2"
10+
"github.com/srebhan/cborquery"
911
"github.com/tinylib/msgp/msgp"
1012
)
1113

1214
type msgpackDocument jsonDocument
1315

1416
func (*msgpackDocument) Parse(buf []byte) (dataNode, error) {
15-
var json bytes.Buffer
17+
var jsonBuf bytes.Buffer
1618

17-
// Unmarshal the message-pack binary message to JSON and proceed with the jsonquery class
18-
if _, err := msgp.UnmarshalAsJSON(&json, buf); err != nil {
19+
// Unmarshal the message-pack binary message to JSON
20+
if _, err := msgp.UnmarshalAsJSON(&jsonBuf, buf); err != nil {
1921
return nil, fmt.Errorf("unmarshalling to json failed: %w", err)
2022
}
21-
return jsonquery.Parse(&json)
23+
24+
// Parse JSON to interface{}
25+
var data interface{}
26+
if err := json.Unmarshal(jsonBuf.Bytes(), &data); err != nil {
27+
return nil, fmt.Errorf("failed to parse JSON: %w", err)
28+
}
29+
30+
// Convert to CBOR to leverage cborquery's correct array handling
31+
cborData, err := cbor.Marshal(data)
32+
if err != nil {
33+
return nil, fmt.Errorf("failed to convert JSON to CBOR: %w", err)
34+
}
35+
36+
// Parse with cborquery which handles arrays correctly
37+
return cborquery.Parse(bytes.NewReader(cborData))
2238
}
2339

2440
func (d *msgpackDocument) QueryAll(node dataNode, expr string) ([]dataNode, error) {

plugins/parsers/xpath/parser.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"strings"
1212
"time"
1313

14-
"github.com/antchfx/jsonquery"
1514
path "github.com/antchfx/xpath"
1615
"github.com/srebhan/cborquery"
1716
"github.com/srebhan/protobufquery"
@@ -493,8 +492,6 @@ func (p *Parser) executeQuery(doc, selected dataNode, query string) (r interface
493492
switch nn := current.(type) {
494493
case *cborquery.NodeNavigator:
495494
return nn.GetValue(), nil
496-
case *jsonquery.NodeNavigator:
497-
return nn.GetValue(), nil
498495
case *protobufquery.NodeNavigator:
499496
return nn.GetValue(), nil
500497
}
@@ -562,10 +559,18 @@ func (p *Parser) constructFieldName(root, node dataNode, name string, expand boo
562559

563560
// In case the name is empty we should determine the current node's name.
564561
// This involves array index expansion in case the parent of the node is
565-
// and array. If we expanded here, we should skip our parent as this is
566-
// already encoded in the name
562+
// an array. If we expanded here, we should skip our parent as this is
563+
// already encoded in the name.
567564
if name == "" {
568565
name = p.document.GetNodeName(node, "_", !expand)
566+
} else {
567+
// For non-empty names, check if this is an array element and append index.
568+
// GetNodeName returns the name with array index for array elements.
569+
nodeName := p.document.GetNodeName(node, "_", false)
570+
if nodeName != name && strings.Contains(nodeName, name+"_") {
571+
// The node name includes an array index (e.g., "cpus_0" vs "cpus")
572+
name = nodeName
573+
}
569574
}
570575

571576
// If name expansion is requested, construct a path between the current

plugins/parsers/xpath/parser_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1604,7 +1604,7 @@ const benchmarkDataJSON = `
16041604
`
16051605

16061606
var benchmarkConfigJSON = Config{
1607-
Selection: "data/*",
1607+
Selection: "//data",
16081608
Tags: map[string]string{
16091609
"tags_host": "tags_host",
16101610
"tags_sdkver": "tags_sdkver",
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nvme,device=/dev/nvme1,model_name=Samsung\ SSD,serial_number=ABC123 ns1_capacity=960197124096i,ns1_id=1i,ns1_utilization=86638583808i,ns2_capacity=500107862016i,ns2_id=2i
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
[[inputs.file]]
2+
files = ["./testcases/json_array_indexing/test.json"]
3+
data_format = "xpath_json"
4+
5+
xpath_native_types = true
6+
7+
[[inputs.file.xpath]]
8+
metric_name = "'nvme'"
9+
10+
[inputs.file.xpath.tags]
11+
device = "string(/device/name)"
12+
model_name = "string(/model_name)"
13+
serial_number = "string(/serial_number)"
14+
15+
[inputs.file.xpath.fields_int]
16+
# Test accessing array elements by index - this is the fix for issue #18145
17+
ns1_id = "number(//nvme_namespaces[1]/id)"
18+
ns1_capacity = "number(//nvme_namespaces[1]/capacity/bytes)"
19+
ns1_utilization = "number(//nvme_namespaces[1]/utilization/bytes)"
20+
ns2_id = "number(//nvme_namespaces[2]/id)"
21+
ns2_capacity = "number(//nvme_namespaces[2]/capacity/bytes)"
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"device": {
3+
"name": "/dev/nvme1"
4+
},
5+
"model_name": "Samsung SSD",
6+
"serial_number": "ABC123",
7+
"nvme_namespaces": [
8+
{
9+
"id": 1,
10+
"capacity": {
11+
"bytes": 960197124096
12+
},
13+
"utilization": {
14+
"bytes": 86638583808
15+
}
16+
},
17+
{
18+
"id": 2,
19+
"capacity": {
20+
"bytes": 500107862016
21+
},
22+
"utilization": {
23+
"bytes": 42949672960
24+
}
25+
}
26+
]
27+
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
foo a="a string",b=3.1415,c=true,d="{\"d1\":1,\"d2\":\"foo\",\"d3\":true,\"d4\":null}",e="[\"master\",42,true]",timestamp=1690193829 1690193829000000000
1+
foo a="a string",b=3.1415,c=true,d="map[d1:1 d2:foo d3:true d4:<nil>]",e="master",e_0="master",e_1=42,e_2=true,timestamp=1690193829 1690193829000000000
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
foo a="a string",b=3.1415,c=true,d="map[d1:1 d2:foo d3:true d4:<nil>]",e="[master 42 true]",timestamp=1690193829 1690193829000000000
1+
foo a="a string",b=3.1415,c=true,d="map[d1:1 d2:foo d3:true d4:<nil>]",e_0="master",e_1=42,e_2=true,timestamp=1690193829 1690193829000000000
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
foo a="a string",b="3.1415",c="true",d="{\"d1\":1,\"d2\":\"foo\",\"d3\":true,\"d4\":null}",e="[\"master\",42,true]",timestamp="1690193829" 1690193829000000000
1+
foo a="a string",b="3.1415",c="true",d="map[d1:1 d2:foo d3:true d4:<nil>]",e_0="master",e_1="42",e_2="true",timestamp="1.690193829e+09" 1690193829000000000

0 commit comments

Comments
 (0)