Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Commit fb0a625

Browse files
authored
[Backport 5.2] Search: fix delimiting escape sequences (#58935)
Search: fix delimiting escape sequences (#57877) For code insights, we wrap regex regex patterns with slashes when we're rebuilding the query string from the parsed query. However, we cannot just wrap a regex pattern in /.../ because the query scanning logic respects escape sequences, so anything that would be interpreted as an escape sequence by the query scanner would break the intent of the original regex. This fixes the StringHuman method by correctly escaping regex patterns when delimiting them with /.../. (cherry picked from commit 807c357)
1 parent da5f914 commit fb0a625

File tree

7 files changed

+70
-25
lines changed

7 files changed

+70
-25
lines changed

internal/insights/query/querybuilder/builder_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,16 @@ func TestPointDiffQuery(t *testing.T) {
889889
SearchQuery: BasicQuery(`content:"TEST" patternType:regexp`),
890890
},
891891
autogold.Expect(BasicQuery(`after:2022-01-01T01:01:00Z before:2022-02-01T01:01:00Z type:diff patterntype:regexp content:"TEST"`)),
892+
}, {
893+
// Test for #57877. Previously, a slash in a regex pattern would not be escaped when we wrapped it with slashes.
894+
"no mangle slashes",
895+
PointDiffQueryOpts{
896+
Before: before,
897+
After: &after,
898+
RepoList: []string{},
899+
SearchQuery: BasicQuery(`patterntype:regexp <tag>value</tag>`),
900+
},
901+
autogold.Expect(BasicQuery("after:2022-01-01T01:01:00Z before:2022-02-01T01:01:00Z type:diff patterntype:regexp /<tag>value<\\/tag>/")),
892902
},
893903
}
894904
for _, test := range tests {

internal/insights/query/querybuilder/regexp.go

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,7 @@ type PatternReplacer interface {
117117
HasCaptureGroups() bool
118118
}
119119

120-
var ptn = regexp.MustCompile(`[^\\]\/`)
121-
122120
func (r *regexpReplacer) replaceContent(replacement string) (BasicQuery, error) {
123-
if r.needsSlashEscape {
124-
replacement = strings.ReplaceAll(replacement, `/`, `\/`)
125-
}
126-
127121
modified := searchquery.MapPattern(r.original.ToQ(), func(patternValue string, negated bool, annotation searchquery.Annotation) searchquery.Node {
128122
return searchquery.Pattern{
129123
Value: replacement,
@@ -136,10 +130,9 @@ func (r *regexpReplacer) replaceContent(replacement string) (BasicQuery, error)
136130
}
137131

138132
type regexpReplacer struct {
139-
original searchquery.Plan
140-
pattern string
141-
groups []group
142-
needsSlashEscape bool
133+
original searchquery.Plan
134+
pattern string
135+
groups []group
143136
}
144137

145138
func (r *regexpReplacer) Replace(replacement string) (BasicQuery, error) {
@@ -188,19 +181,11 @@ func NewPatternReplacer(query BasicQuery, searchType searchquery.SearchType) (Pa
188181
return nil, UnsupportedPatternTypeErr
189182
}
190183

191-
needsSlashEscape := true
192184
pattern := patterns[0]
193185
if !pattern.Annotation.Labels.IsSet(searchquery.Regexp) {
194186
return nil, UnsupportedPatternTypeErr
195-
} else if !ptn.MatchString(pattern.Value) {
196-
// because regexp annotated patterns implicitly escapes slashes in the regular expression we need to translate the pattern into
197-
// a compatible pattern with `patternType:standard`, ie. escape the slashes `/`. We need to do this _before_ the replacement
198-
// otherwise we may accidentally double escape in places we don't intend. However, if the string was already escaped we don't
199-
// want to re-escape because it will break the semantic of the query. This means the only time we _don't_ escape slashes
200-
// is if we detect a pattern that has an escaped slash.
201-
needsSlashEscape = false
202187
}
203188

204189
regexpGroups := findGroups(pattern.Value)
205-
return &regexpReplacer{original: plan, groups: regexpGroups, pattern: pattern.Value, needsSlashEscape: needsSlashEscape}, nil
190+
return &regexpReplacer{original: plan, groups: regexpGroups, pattern: pattern.Value}, nil
206191
}

internal/insights/query/querybuilder/regexp_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,25 +212,25 @@ func TestReplace_Valid(t *testing.T) {
212212
{
213213
query: `\/insi(g)ht[s]\/`,
214214
replacement: "ggg",
215-
want: autogold.Expect(BasicQuery(`/\/insi(?:ggg)ht[s]\//`)),
215+
want: autogold.Expect(BasicQuery("/\\\\\\/insi(?:ggg)ht[s]\\\\\\//")),
216216
searchType: query.SearchTypeRegex,
217217
},
218218
{
219219
query: `<title>(.*)</title>`,
220220
replacement: "findme",
221-
want: autogold.Expect(BasicQuery(`/<title>(?:findme)<\/title>/`)),
221+
want: autogold.Expect(BasicQuery("/<title>(?:findme)<\\/title>/")),
222222
searchType: query.SearchTypeRegex,
223223
},
224224
{
225225
query: `(/\w+/)`,
226226
replacement: `/sourcegraph/`,
227-
want: autogold.Expect(BasicQuery(`/(?:\/sourcegraph\/)/`)),
227+
want: autogold.Expect(BasicQuery("/(?:\\/sourcegraph\\/)/")),
228228
searchType: query.SearchTypeRegex,
229229
},
230230
{
231231
query: `/<title>(.*)<\/title>/`,
232232
replacement: "findme",
233-
want: autogold.Expect(BasicQuery(`/<title>(?:findme)<\/title>/`)),
233+
want: autogold.Expect(BasicQuery("/<title>(?:findme)<\\/title>/")),
234234
searchType: query.SearchTypeStandard,
235235
},
236236
}

internal/search/query/parser.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,25 @@ loop:
574574
return string(result), count, nil
575575
}
576576

577+
// Delimit inverts the process of ScanDelimiter, escaping any special
578+
// characters or delimiters in s.
579+
//
580+
// NOTE: this does not provide a clean roundtrip with ScanDelimited because
581+
// ScanDelimited is lossy. We cannot know whether a backslash was passed
582+
// through because it was escaped or because its successor rune was not
583+
// escapable.
584+
func Delimit(s string, delimiter rune) string {
585+
ds := string(delimiter)
586+
delimitReplacer := strings.NewReplacer(
587+
"\n", "\\n",
588+
"\r", "\\r",
589+
"\t", "\\t",
590+
"\\", "\\\\",
591+
ds, "\\"+ds,
592+
)
593+
return ds + delimitReplacer.Replace(s) + ds
594+
}
595+
577596
// ScanField scans an optional '-' at the beginning of a string, and then scans
578597
// one or more alphabetic characters until it encounters a ':'. The prefix
579598
// string is checked against valid fields. If it is valid, the function returns

internal/search/query/parser_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,37 @@ func TestScanDelimited(t *testing.T) {
592592
_ = test(`a"`, '"')
593593
}
594594

595+
func TestDelimited(t *testing.T) {
596+
inputs := []string{
597+
"test",
598+
"test\nabc",
599+
"test\r\nabc",
600+
"test\a\fabc",
601+
"test\t\tabc",
602+
"'test'",
603+
"\"test\"",
604+
"\"/test/\"",
605+
"/test/",
606+
"/test\\/abc/",
607+
"\\\\",
608+
"\\",
609+
"\\/",
610+
}
611+
delimiters := []rune{'/', '"', '\''}
612+
613+
for _, input := range inputs {
614+
for _, delimiter := range delimiters {
615+
delimited := Delimit(input, delimiter)
616+
undelimited, _, err := ScanDelimited([]byte(delimited), false, delimiter)
617+
if err != nil {
618+
t.Fatal(err)
619+
}
620+
redelimited := Delimit(undelimited, delimiter)
621+
require.Equal(t, delimited, redelimited)
622+
}
623+
}
624+
}
625+
595626
func TestMergePatterns(t *testing.T) {
596627
test := func(input string) string {
597628
p := &parser{buf: []byte(input), heuristics: parensAsPatterns}

internal/search/query/printer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ func stringHumanPattern(nodes []Node) string {
1717
v = strconv.Quote(v)
1818
}
1919
if n.Annotation.Labels.IsSet(Regexp) {
20-
v = fmt.Sprintf("/%s/", v)
20+
v = Delimit(v, '/')
2121
}
2222
if _, _, ok := ScanBalancedPattern([]byte(v)); !ok && !n.Annotation.Labels.IsSet(IsAlias) && n.Annotation.Labels.IsSet(Literal) {
2323
v = fmt.Sprintf(`content:%s`, strconv.Quote(v))
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
22
"Input": "/abcd\\// patterntype:regexp",
3-
"Result": "patterntype:regexp /abcd//"
3+
"Result": "patterntype:regexp /abcd\\//"
44
}

0 commit comments

Comments
 (0)