Skip to content

Commit d90139d

Browse files
Scan HTTP headers (#18)
* Scan simple headers * WIP: interpolated headers * Fix silly bug with headers * Add a scanner fuzz test
1 parent 2db2e46 commit d90139d

File tree

7 files changed

+223
-21
lines changed

7 files changed

+223
-21
lines changed

Taskfile.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ tasks:
6969

7070
lint:
7171
desc: Run linting
72+
deps:
73+
- fmt
7274
sources:
7375
- "**/*.go"
7476
- .golangci.yml

internal/syntax/scanner/scanner.go

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ func scanEq(s *Scanner) scanFn {
429429
return scanStart
430430
}
431431

432-
// scanInterp scans an opening '{{' token.
432+
// scanInterp scans an entire interpolation of {{ <contents> }}.
433433
func scanInterp(s *Scanner) scanFn {
434434
// Absorb no more than 2 '{'
435435
count := 0
@@ -537,10 +537,99 @@ func scanURL(s *Scanner) scanFn {
537537

538538
s.emit(token.URL)
539539

540+
// Is the next thing headers?
541+
s.skip(unicode.IsSpace)
542+
543+
if isAlpha(s.peek()) {
544+
return scanHeaders
545+
}
546+
540547
// TODO(@FollowTheProcess): Handle HTTP version, headers, body etc.
541548
return scanStart
542549
}
543550

551+
// scanHeaders scans a series of HTTP headers, one per line, emitting
552+
// tokens as it goes.
553+
//
554+
// It stops when it sees the next character is not a valid ident character
555+
// and so could not be another header.
556+
func scanHeaders(s *Scanner) scanFn {
557+
s.takeWhile(isIdent)
558+
559+
// Header without a colon or value e.g. 'Content-Type'
560+
// this is unfinished so is an error, like an unterminated interpolation.
561+
if s.peek() == eof {
562+
s.error("unexpected eof")
563+
return nil
564+
}
565+
566+
s.emit(token.Header)
567+
568+
if s.peek() != ':' {
569+
s.errorf("expected ':', got %q", s.peek())
570+
return nil
571+
}
572+
573+
// Consume the ':' now we know it exists, and skip over any whitespace
574+
// on the same line until we get to the header value
575+
s.next()
576+
s.emit(token.Colon)
577+
s.skip(isLineSpace)
578+
579+
// for next := s.next(); next != '\n' && next != eof; next = s.next() {
580+
// if s.restHasPrefix("{{") {
581+
// // Emit whatever we've captured at this point as text (if there is anything)
582+
// // and go scan the interpolation
583+
// if s.start != s.pos {
584+
// // We have absorbed stuff
585+
// s.emit(token.Text)
586+
// }
587+
// scanInterp(s)
588+
// }
589+
// }
590+
591+
// Handle interpolation somewhere inside the header value
592+
// e.g. Authorization: Bearer {{ token }}
593+
for {
594+
if s.restHasPrefix("{{") {
595+
// Emit what we have captured up to this point (if there is anything) as Text and then
596+
// switch to scanning the interpolation
597+
if s.start != s.pos {
598+
// We have absorbed stuff, emit it
599+
s.emit(token.Text)
600+
}
601+
602+
scanInterp(s)
603+
}
604+
605+
// Scan any text on the same line
606+
next := s.peek()
607+
if next == '\n' || next == eof {
608+
break
609+
}
610+
611+
s.next()
612+
}
613+
614+
// If we absorbed any text, emit it.
615+
//
616+
// This could be empty because the entire header value could have just been an interp
617+
// e.g. X-Api-Key: {{ key }}
618+
if s.start != s.pos {
619+
s.emit(token.Text)
620+
}
621+
622+
// Now for the fun bit, call itself if there are more headers
623+
s.skip(unicode.IsSpace)
624+
625+
if isAlpha(s.peek()) {
626+
return scanHeaders
627+
}
628+
629+
// TODO(@FollowTheProcess): Handle request body
630+
return scanStart
631+
}
632+
544633
// isAlpha reports whether r is an alpha character.
545634
func isAlpha(r rune) bool {
546635
return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z')

internal/syntax/scanner/scanner_test.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,53 @@ func TestValid(t *testing.T) {
373373
}
374374
}
375375

376+
func FuzzScanner(f *testing.F) {
377+
// Get all the .http source from testdata for the corpus
378+
pattern := filepath.Join("testdata", "valid", "*.txtar")
379+
files, err := filepath.Glob(pattern)
380+
test.Ok(f, err)
381+
382+
for _, file := range files {
383+
archive, err := txtar.ParseFile(file)
384+
test.Ok(f, err)
385+
386+
src, ok := archive.Read("src.http")
387+
test.True(f, ok, test.Context("file %s does not contain 'src.http'", file))
388+
389+
f.Add(src)
390+
}
391+
392+
// Property: The scanner never panics or loops indefinitely, fuzz
393+
// by default will catch both of these
394+
f.Fuzz(func(t *testing.T, src string) {
395+
// Note: no ErrorHandler installed, because if we let the scanner report syntax
396+
// errors it would kill the fuzz test straight away e.g. on the first invalid
397+
// utf-8 char
398+
scanner := scanner.New("fuzz", []byte(src), nil)
399+
400+
for {
401+
tok := scanner.Scan()
402+
if tok.Is(token.EOF, token.Error) {
403+
break
404+
}
405+
406+
// Property: Positions must be positive integers
407+
test.True(t, tok.Start >= 0, test.Context("token start position (%d) was negative", tok.Start))
408+
test.True(t, tok.End >= 0, test.Context("token end position (%d) was negative", tok.End))
409+
410+
// Property: The kind must be one of the known kinds
411+
test.True(
412+
t,
413+
(tok.Kind >= token.EOF) && (tok.Kind <= token.MethodTrace),
414+
test.Context("token %s was not one of the pre-defined kinds", tok),
415+
)
416+
417+
// Property: End must be >= Start
418+
test.True(t, tok.End >= tok.Start, test.Context("token %s had invalid start and end positions", tok))
419+
}
420+
})
421+
}
422+
376423
// testFailHandler returns a [syntax.ErrorHandler] that handles scanning errors by failing
377424
// the enclosing test.
378425
func testFailHandler(tb testing.TB) syntax.ErrorHandler {
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
-- src.http --
2+
### Test
3+
GET https://api.somewhere.com/items/1
4+
Content-Type: application/json
5+
Accept: application/json
6+
X-Something-Else: yes
7+
-- tokens.txt --
8+
<Token::Separator start=0, end=3>
9+
<Token::Comment start=4, end=8>
10+
<Token::MethodGet start=9, end=12>
11+
<Token::URL start=13, end=46>
12+
<Token::Header start=47, end=59>
13+
<Token::Colon start=59, end=60>
14+
<Token::Text start=61, end=77>
15+
<Token::Header start=78, end=84>
16+
<Token::Colon start=84, end=85>
17+
<Token::Text start=86, end=102>
18+
<Token::Header start=103, end=119>
19+
<Token::Colon start=119, end=120>
20+
<Token::Text start=121, end=124>
21+
<Token::EOF start=125, end=125>
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
-- src.http --
2+
@token = shhh
3+
@json = application/json
4+
5+
### Test
6+
GET https://api.somewhere.com/items/1
7+
Content-Type: {{ json }}
8+
Authorization: Bearer {{ token }}
9+
Accept: {{ json }}
10+
-- tokens.txt --
11+
<Token::At start=0, end=1>
12+
<Token::Ident start=1, end=6>
13+
<Token::Eq start=7, end=8>
14+
<Token::Text start=9, end=13>
15+
<Token::At start=14, end=15>
16+
<Token::Ident start=15, end=19>
17+
<Token::Eq start=20, end=21>
18+
<Token::Text start=22, end=38>
19+
<Token::Separator start=40, end=43>
20+
<Token::Comment start=44, end=48>
21+
<Token::MethodGet start=49, end=52>
22+
<Token::URL start=53, end=86>
23+
<Token::Header start=87, end=99>
24+
<Token::Colon start=99, end=100>
25+
<Token::OpenInterp start=101, end=103>
26+
<Token::Ident start=104, end=108>
27+
<Token::CloseInterp start=109, end=111>
28+
<Token::Header start=112, end=125>
29+
<Token::Colon start=125, end=126>
30+
<Token::Text start=127, end=134>
31+
<Token::OpenInterp start=134, end=136>
32+
<Token::Ident start=137, end=142>
33+
<Token::CloseInterp start=143, end=145>
34+
<Token::Header start=146, end=152>
35+
<Token::Colon start=152, end=153>
36+
<Token::OpenInterp start=154, end=156>
37+
<Token::Ident start=157, end=161>
38+
<Token::CloseInterp start=162, end=164>
39+
<Token::EOF start=165, end=165>

internal/syntax/token/kind_string.go

Lines changed: 22 additions & 20 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/syntax/token/token.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ const (
1818
At // At
1919
Ident // Ident
2020
Eq // Eq
21+
Colon // Colon
2122
Text // Text
2223
URL // URL
24+
Header // Header
2325
OpenInterp // OpenInterp
2426
CloseInterp // CloseInterp
2527
Name // Name

0 commit comments

Comments
 (0)