Skip to content

Commit 34a67e1

Browse files
craig[bot]paulniziolek
andcommitted
Merge #151597
151597: sql: add LTREE key encoding r=paulniziolek a=paulniziolek #### sql: add LTREE keyside encoding This adds basic keyside encoding for LTREE, which is encoded similarly to a string array: ltrees have their own terminator and their labels are encoded via escaped bytes. Informs: #44657 Epic: CRDB-148 Release note: None Co-authored-by: Paul Niziolek <[email protected]>
2 parents 40e00eb + fe85207 commit 34a67e1

File tree

12 files changed

+267
-41
lines changed

12 files changed

+267
-41
lines changed

pkg/sql/catalog/colinfo/col_type_info.go

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -148,18 +148,14 @@ func ColumnTypeIsIndexable(t *types.T) bool {
148148
}
149149

150150
switch t.Family() {
151-
case types.TupleFamily, types.RefCursorFamily, types.JsonpathFamily, types.LTreeFamily:
152-
// TODO(paulniziolek): LTreeFamily should be supported in keyside encoding.
153-
// Temporarily, we disallow it, until implemented.
151+
case types.TupleFamily, types.RefCursorFamily, types.JsonpathFamily:
154152
return false
155153
}
156154

157155
// If the type is an array, check its content type as well.
158156
if unwrapped := t.ArrayContents(); unwrapped != nil {
159157
switch unwrapped.Family() {
160-
case types.TupleFamily, types.RefCursorFamily, types.JsonpathFamily, types.LTreeFamily:
161-
// TODO(paulniziolek): LTreeFamily should be supported in keyside encoding.
162-
// Temporarily, we disallow it, until implemented.
158+
case types.TupleFamily, types.RefCursorFamily, types.JsonpathFamily:
163159
return false
164160
}
165161
}
@@ -175,9 +171,7 @@ func ColumnTypeIsInvertedIndexable(t *types.T) bool {
175171
switch t.Family() {
176172
case types.ArrayFamily:
177173
switch t.ArrayContents().Family() {
178-
case types.RefCursorFamily, types.JsonpathFamily, types.LTreeFamily:
179-
// TODO(paulniziolek): LTreeFamily should be supported in keyside encoding.
180-
// Temporarily, we disallow it, until implemented.
174+
case types.RefCursorFamily, types.JsonpathFamily:
181175
return false
182176
default:
183177
return true
@@ -230,10 +224,6 @@ func MustBeValueEncoded(semanticType *types.T) bool {
230224
return true
231225
case types.PGVectorFamily:
232226
return true
233-
case types.LTreeFamily:
234-
// TODO(paulniziolek): LTreeFamily should be supported in keyside encoding.
235-
// Temporarily, we disallow it, until implemented.
236-
return true
237227
// NB: if you're adding a new type here, you probably also want to
238228
// include it into rowenc.mustUseValueEncodingForFingerprinting.
239229
}

pkg/sql/rowenc/encoded_datum.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -331,9 +331,6 @@ func mustUseValueEncodingForFingerprinting(t *types.T) bool {
331331
// behavior can result in incorrect results in mixed version clusters).
332332
case types.JsonFamily, types.TSQueryFamily, types.TSVectorFamily, types.PGVectorFamily:
333333
return true
334-
case types.LTreeFamily:
335-
// TODO(paulniziolek): remove this once key encoding is added.
336-
return true
337334
case types.ArrayFamily:
338335
// Note that at time of this writing we don't support arrays of JSON
339336
// (tracked via #23468) nor of TSQuery / TSVector / PGVector types (tracked by

pkg/sql/rowenc/encoded_datum_test.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -218,10 +218,6 @@ func TestEncDatumCompare(t *testing.T) {
218218
case types.AnyFamily, types.UnknownFamily, types.ArrayFamily, types.JsonFamily, types.TupleFamily, types.VoidFamily,
219219
types.TSQueryFamily, types.TSVectorFamily, types.PGVectorFamily, types.TriggerFamily, types.JsonpathFamily:
220220
continue
221-
case types.LTreeFamily:
222-
// TODO(paulniziolek): Temporarily skip LTrees as they are
223-
// currently missing keyside indexing support.
224-
continue
225221
case types.CollatedStringFamily:
226222
typ = types.MakeCollatedString(types.String, *randgen.RandCollationLocale(rng))
227223
}

pkg/sql/rowenc/index_encoding_test.go

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -587,11 +587,6 @@ func TestEncodeContainingArrayInvertedIndexSpans(t *testing.T) {
587587
if typ.ArrayContents().Family() == types.JsonpathFamily {
588588
continue
589589
}
590-
// TODO(paulniziolek): Temporarily skip arrays with LTREEs as they are
591-
// currently missing keyside indexing support.
592-
if typ.ArrayContents().Family() == types.LTreeFamily {
593-
continue
594-
}
595590

596591
// Generate two random arrays and evaluate the result of `left @> right`.
597592
left := randgen.RandArray(rng, typ, 0 /* nullChance */)
@@ -734,12 +729,6 @@ func TestEncodeContainedArrayInvertedIndexSpans(t *testing.T) {
734729
for i := 0; i < 100; i++ {
735730
typ := randgen.RandArrayType(rng)
736731

737-
// TODO(paulniziolek): Temporarily skip arrays with LTREEs as they are
738-
// currently missing keyside indexing support.
739-
if typ.ArrayContents().Family() == types.LTreeFamily {
740-
continue
741-
}
742-
743732
// Generate two random arrays and evaluate the result of `left <@ right`.
744733
left := randgen.RandArray(rng, typ, 0 /* nullChance */)
745734
right := randgen.RandArray(rng, typ, 0 /* nullChance */)
@@ -984,11 +973,6 @@ func TestEncodeOverlapsArrayInvertedIndexSpans(t *testing.T) {
984973
if typ.ArrayContents().Family() == types.JsonpathFamily {
985974
continue
986975
}
987-
// TODO(paulniziolek): Temporarily skip arrays with LTREEs as they are
988-
// currently missing keyside indexing support.
989-
if typ.ArrayContents().Family() == types.LTreeFamily {
990-
continue
991-
}
992976

993977
// Generate two random arrays and evaluate the result of `left && right`.
994978
// Using 1/9th as the Null Chance to generate arrays with a small

pkg/sql/rowenc/keyside/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ go_library(
2323
"//pkg/util/encoding",
2424
"//pkg/util/ipaddr",
2525
"//pkg/util/json",
26+
"//pkg/util/ltree",
2627
"//pkg/util/timetz",
2728
"//pkg/util/timeutil/pgdate",
2829
"//pkg/util/uuid",

pkg/sql/rowenc/keyside/decode.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"github.com/cockroachdb/cockroach/pkg/util/encoding"
2121
"github.com/cockroachdb/cockroach/pkg/util/ipaddr"
2222
"github.com/cockroachdb/cockroach/pkg/util/json"
23+
"github.com/cockroachdb/cockroach/pkg/util/ltree"
2324
"github.com/cockroachdb/cockroach/pkg/util/timetz"
2425
"github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate"
2526
"github.com/cockroachdb/cockroach/pkg/util/uuid"
@@ -273,6 +274,17 @@ func Decode(
273274
rkey, i, err = encoding.DecodeVarintDescending(key)
274275
}
275276
return a.NewDOid(tree.MakeDOid(oid.Oid(i), valType)), rkey, err
277+
case types.LTreeFamily:
278+
var l ltree.T
279+
if dir == encoding.Ascending {
280+
rkey, l, err = encoding.DecodeLTreeAscending(key)
281+
} else {
282+
rkey, l, err = encoding.DecodeLTreeDescending(key)
283+
}
284+
if err != nil {
285+
return nil, nil, err
286+
}
287+
return tree.NewDLTree(l), rkey, err
276288
case types.EnumFamily:
277289
var r []byte
278290
if dir == encoding.Ascending {

pkg/sql/rowenc/keyside/encode.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,13 +181,13 @@ func Encode(b []byte, val tree.Datum, dir encoding.Direction) ([]byte, error) {
181181
return append(b, []byte(*t)...), nil
182182
case *tree.DJSON:
183183
return encodeJSONKey(b, t, dir)
184+
case *tree.DLTree:
185+
if dir == encoding.Ascending {
186+
return encoding.EncodeLTreeAscending(b, t.LTree), nil
187+
}
188+
return encoding.EncodeLTreeDescending(b, t.LTree), nil
184189
}
185190
if buildutil.CrdbTestBuild {
186-
if _, isLTree := val.(*tree.DLTree); isLTree {
187-
// TODO(paulniziolek): remove this exception once key encoding is
188-
// added.
189-
return nil, errors.Newf("LTREE key encoding is not implemented yet")
190-
}
191191
return nil, errors.AssertionFailedf("unable to encode table key: %T", val)
192192
}
193193
return nil, errors.Errorf("unable to encode table key: %T", val)

pkg/util/encoding/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ go_test(
6161
"//pkg/util/ipaddr",
6262
"//pkg/util/json",
6363
"//pkg/util/log",
64+
"//pkg/util/ltree",
6465
"//pkg/util/randutil",
6566
"//pkg/util/timeofday",
6667
"//pkg/util/timetz",

pkg/util/encoding/encoding.go

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,20 @@ const (
136136
jsonArrayKeyDescendingMarker = jsonTrueKeyDescendingMarker - 1
137137
jsonObjectKeyDescendingMarker = jsonArrayKeyDescendingMarker - 1
138138

139+
// LTREE key encoding markers
140+
ltreeKeyMarker = jsonEmptyArrayKeyDescendingMarker + 1
141+
ltreeKeyDescendingMarker = ltreeKeyMarker + 1
142+
139143
// Terminators for JSON Key encoding.
140144
jsonKeyTerminator byte = 0x00
141145
jsonKeyDescendingTerminator byte = 0xFF
142146

147+
// Terminators for LTREE Key encoding.
148+
ltreeKeyTerminator byte = 0x00
149+
ltreeKeyDescendingTerminator byte = 0xFF
150+
ltreeLabelKeyTerminator byte = 0x01
151+
ltreeLabelKeyDescendingTerminator byte = 0xFE
152+
143153
// IntMin is chosen such that the range of int tags does not overlap the
144154
// ascii character set that is frequently used in testing.
145155
IntMin = 0x80 // 128
@@ -863,6 +873,20 @@ func getBytesLength(b []byte, e escapes) (int, error) {
863873
}
864874
}
865875

876+
// getLTreeLength finds the length of a ltree encoding.
877+
func getLTreeLength(b []byte, dir Direction) (int, error) {
878+
var i int
879+
if dir == Ascending {
880+
i = bytes.IndexByte(b, ltreeKeyTerminator)
881+
} else {
882+
i = bytes.IndexByte(b, ltreeKeyDescendingTerminator)
883+
}
884+
if i == -1 {
885+
return 0, errors.Errorf("did not find terminator")
886+
}
887+
return i + 1, nil
888+
}
889+
866890
// prettyPrintInvertedIndexKey returns a string representation of the path part of a JSON inverted
867891
// index.
868892
func prettyPrintInvertedIndexKey(b []byte) (string, []byte, error) {
@@ -1726,6 +1750,101 @@ func DecodeBitArrayDescending(b []byte) ([]byte, bitarray.BitArray, error) {
17261750
return b, ba, err
17271751
}
17281752

1753+
// EncodeLTreeAscending encodes a ltree.T value, appends it to the
1754+
// supplied buffer, and returns the final buffer. The encoding is guaranteed to
1755+
// be ordered such that if t1 < t2 then bytes.Compare will order them the same
1756+
// way after encoding.
1757+
//
1758+
// The encoding is in the below format:
1759+
// [ ltreeMarker ]
1760+
// for each label:
1761+
//
1762+
// [ label raw bytes ] [ ltreeLabelKeyTerminator ]
1763+
//
1764+
// [ ltreeKeyTerminator ]
1765+
func EncodeLTreeAscending(b []byte, d ltree.T) []byte {
1766+
b = append(b, ltreeKeyMarker)
1767+
d.ForEachLabel(func(i int, label string) {
1768+
b = append(b, []byte(label)...)
1769+
b = append(b, ltreeLabelKeyTerminator)
1770+
})
1771+
b = append(b, ltreeKeyTerminator)
1772+
return b
1773+
}
1774+
1775+
// EncodeLTreeDescending is the descending version of EncodeLTreeAscending.
1776+
func EncodeLTreeDescending(b []byte, d ltree.T) []byte {
1777+
b = append(b, ltreeKeyDescendingMarker)
1778+
d.ForEachLabel(func(i int, label string) {
1779+
n := len(b)
1780+
b = append(b, []byte(label)...)
1781+
onesComplement(b[n:])
1782+
b = append(b, ltreeLabelKeyDescendingTerminator)
1783+
})
1784+
b = append(b, ltreeKeyDescendingTerminator)
1785+
return b
1786+
}
1787+
1788+
// DecodeLTreeAscending decodes a ltree.T value which was encoded using
1789+
// EncodeLTreeAscending. The remainder of the input buffer and the
1790+
// decoded ltree.T are returned.
1791+
func DecodeLTreeAscending(b []byte) ([]byte, ltree.T, error) {
1792+
if PeekType(b) != LTree {
1793+
return nil, ltree.Empty, errors.Errorf("did not find marker %#x", b)
1794+
}
1795+
b = b[1:]
1796+
1797+
var labels []string
1798+
for {
1799+
if len(b) != 0 && b[0] == ltreeKeyTerminator {
1800+
b = b[1:]
1801+
break
1802+
}
1803+
i := bytes.IndexByte(b, ltreeLabelKeyTerminator)
1804+
if i == -1 {
1805+
return nil, ltree.Empty, errors.Errorf("malformed ltree encoding")
1806+
}
1807+
labels = append(labels, string(b[:i]))
1808+
b = b[i+1:]
1809+
}
1810+
l, err := ltree.ParseLTreeFromLabels(labels)
1811+
if err != nil {
1812+
return nil, ltree.Empty, err
1813+
}
1814+
return b, l, nil
1815+
}
1816+
1817+
// DecodeLTreeDescending is the descending version of DecodeLTreeAscending.
1818+
func DecodeLTreeDescending(b []byte) ([]byte, ltree.T, error) {
1819+
if PeekType(b) != LTreeDesc {
1820+
return nil, ltree.Empty, errors.Errorf("did not find marker %#x", b)
1821+
}
1822+
b = b[1:]
1823+
1824+
var labels []string
1825+
for {
1826+
if len(b) != 0 && b[0] == ltreeKeyDescendingTerminator {
1827+
b = b[1:]
1828+
break
1829+
}
1830+
i := bytes.IndexByte(b, ltreeLabelKeyDescendingTerminator)
1831+
if i == -1 {
1832+
return nil, ltree.Empty, errors.Errorf("malformed ltree encoding")
1833+
}
1834+
// Deep copying here is necessary to avoid modifying the input buffer slice.
1835+
var label []byte
1836+
label = append(label, b[:i]...)
1837+
onesComplement(label)
1838+
labels = append(labels, string(label))
1839+
b = b[i+1:]
1840+
}
1841+
l, err := ltree.ParseLTreeFromLabels(labels)
1842+
if err != nil {
1843+
return nil, ltree.Empty, err
1844+
}
1845+
return b, l, nil
1846+
}
1847+
17291848
// Type represents the type of a value encoded by
17301849
// Encode{Null,NotNull,Varint,Uvarint,Float,Bytes}.
17311850
//
@@ -1788,6 +1907,7 @@ const (
17881907
JsonEmptyArrayDesc Type = 43
17891908
PGVector Type = 44
17901909
LTree Type = 45
1910+
LTreeDesc Type = 46
17911911
)
17921912

17931913
// typMap maps an encoded type byte to a decoded Type. It's got 256 slots, one
@@ -1890,6 +2010,10 @@ func slowPeekType(b []byte) Type {
18902010
return Decimal
18912011
case m == voidMarker:
18922012
return Void
2013+
case m == ltreeKeyMarker:
2014+
return LTree
2015+
case m == ltreeKeyDescendingMarker:
2016+
return LTreeDesc
18932017
}
18942018
}
18952019
return Unknown
@@ -2086,6 +2210,10 @@ func PeekLength(b []byte) (int, error) {
20862210
return 0, errors.Errorf("slice too short for float (%d)", len(b))
20872211
}
20882212
return 9, nil
2213+
case ltreeKeyMarker:
2214+
return getLTreeLength(b, Ascending)
2215+
case ltreeKeyDescendingMarker:
2216+
return getLTreeLength(b, Descending)
20892217
}
20902218
if m >= IntMin && m <= IntMax {
20912219
return getVarintLen(b)
@@ -2384,6 +2512,26 @@ func prettyPrintFirstValue(dir Direction, b []byte) ([]byte, string, error) {
23842512
return b, "", err
23852513
}
23862514
return b, d.StringNanos(), nil
2515+
case LTree:
2516+
if dir == Descending {
2517+
return b, "", errors.Errorf("ascending ltree column dir but descending ltree encoding")
2518+
}
2519+
var l ltree.T
2520+
b, l, err = DecodeLTreeAscending(b)
2521+
if err != nil {
2522+
return b, "", err
2523+
}
2524+
return b, l.String(), nil
2525+
case LTreeDesc:
2526+
if dir == Ascending {
2527+
return b, "", errors.Errorf("descending ltree column dir but ascending ltree encoding")
2528+
}
2529+
var l ltree.T
2530+
b, l, err = DecodeLTreeDescending(b)
2531+
if err != nil {
2532+
return b, "", err
2533+
}
2534+
return b, l.String(), nil
23872535
default:
23882536
if len(b) >= 1 {
23892537
switch b[0] {

0 commit comments

Comments
 (0)