Skip to content

Commit cf47d20

Browse files
Add a state stack to allow returning to previous states (#137)
* Add a state stack to allow returning to previous states * Add a note
1 parent 71bdc69 commit cf47d20

File tree

1 file changed

+59
-42
lines changed

1 file changed

+59
-42
lines changed

internal/syntax/scanner/v2/scanner.go

Lines changed: 59 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
const (
3535
eof = rune(-1) // eof signifies we have reached the end of the input.
3636
bufferSize = 32 // benchmarks suggest this is the optimum token channel buffer size.
37+
stackSize = 10 // size of the state stack, should be plenty to avoid a re-allocation
3738
)
3839

3940
// stateFn represents the state of the scanner as a function that does the work
@@ -42,21 +43,30 @@ type stateFn func(*Scanner) stateFn
4243

4344
// Scanner is the http file scanner.
4445
type Scanner struct {
45-
tokens chan token.Token // Channel on which to emit scanned tokens.
46-
state stateFn // The scanner's current state
47-
name string // Name of the file
48-
diagnostics []syntax.Diagnostic // Diagnostics gathered during scanning
49-
src []byte // Raw source text
50-
start int // The start position of the current token
51-
pos int // Current scanner position in src (bytes, 0 indexed)
52-
line int // Current line number (1 indexed)
53-
currentLineOffset int // Offset at which the current line started, used for column calculation
46+
tokens chan token.Token // Channel on which to emit scanned tokens.
47+
state stateFn // The scanner's current state
48+
name string // Name of the file
49+
diagnostics []syntax.Diagnostic // Diagnostics gathered during scanning
50+
src []byte // Raw source text
51+
52+
// A stack of state functions used to maintain context.
53+
//
54+
// The idea is to reuse parts of the state machine in various places. For
55+
// example, interpolations can appear in multiple contexts, and how do we
56+
// know which state to return to when we're done with the '}}'.
57+
stack []stateFn
58+
59+
start int // The start position of the current token
60+
pos int // Current scanner position in src (bytes, 0 indexed)
61+
line int // Current line number (1 indexed)
62+
currentLineOffset int // Offset at which the current line started, used for column calculation
5463
}
5564

5665
// New returns a new [Scanner].
5766
func New(name string, src []byte) *Scanner {
5867
s := &Scanner{
5968
tokens: make(chan token.Token, bufferSize),
69+
stack: make([]stateFn, 0, stackSize),
6070
name: name,
6171
src: src,
6272
state: scanStart,
@@ -89,6 +99,29 @@ func (s *Scanner) Diagnostics() []syntax.Diagnostic {
8999
return s.diagnostics
90100
}
91101

102+
// pushState pushes a stateFn onto the stack so the scanner can
103+
// "remember" where it just came from.
104+
func (s *Scanner) pushState(state stateFn) {
105+
s.stack = append(s.stack, state)
106+
}
107+
108+
// popState pops a stateFn off the stack so the scanner can return
109+
// to where it just came from in certain contexts.
110+
func (s *Scanner) popState() stateFn {
111+
size := len(s.stack)
112+
113+
if size == 0 {
114+
// TODO(@FollowTheProcess): Could we be safer and return scanStart here?
115+
// Or do an error and return nil
116+
panic("pop from empty state stack")
117+
}
118+
119+
last := s.stack[size-1]
120+
s.stack = s.stack[:size-1]
121+
122+
return last
123+
}
124+
92125
// atEOF reports whether the scanner is at the end of the input.
93126
func (s *Scanner) atEOF() bool {
94127
return s.pos >= len(s.src)
@@ -299,10 +332,13 @@ func scanStart(s *Scanner) stateFn {
299332
// next() already emits an error for this
300333
return nil
301334
case '#':
335+
s.pushState(scanStart) // Come back here when we're done
302336
return scanHash
303337
case '/':
338+
s.pushState(scanStart)
304339
return scanSlash
305340
case '@':
341+
s.pushState(scanStart)
306342
return scanAt
307343
default:
308344
s.errorf("unexpected character: %q", char)
@@ -342,10 +378,10 @@ func scanAt(s *Scanner) stateFn {
342378
s.emit(token.At)
343379

344380
if isAlpha(s.peek()) {
345-
return scanIdent
381+
return scanGlobalVariable
346382
}
347383

348-
return scanStart
384+
return s.popState()
349385
}
350386

351387
// scanComment scans a line comment started by either a '#' or '//'.
@@ -357,7 +393,7 @@ func scanComment(s *Scanner) stateFn {
357393
s.takeUntil('\n', eof)
358394
s.emit(token.Comment)
359395

360-
return scanStart
396+
return s.popState()
361397
}
362398

363399
// scanSeparator scans a '###' request separator.
@@ -378,8 +414,8 @@ func scanSeparator(s *Scanner) stateFn {
378414
return scanRequest
379415
}
380416

381-
// scanIdent scans a continuous string of characters as an identifier.
382-
func scanIdent(s *Scanner) stateFn {
417+
// scanGlobalVariable scans a global variable declaration.
418+
func scanGlobalVariable(s *Scanner) stateFn {
383419
s.takeWhile(isIdent)
384420

385421
// Is it a keyword?
@@ -406,7 +442,7 @@ func scanIdent(s *Scanner) stateFn {
406442
return scanText
407443
}
408444

409-
return scanStart
445+
return s.popState()
410446
}
411447

412448
// scanOpenInterp scans an opening '{{' marking the beginning
@@ -450,13 +486,11 @@ func scanCloseInterp(s *Scanner) stateFn {
450486
return scanText
451487
}
452488

453-
// TODO(@FollowTheProcess): How can we get back to the right state here?
454-
//
455-
// We might not always be in start
456-
return scanStart
489+
// Go back to whatever state we were in before entering the interp
490+
return s.popState()
457491
}
458492

459-
// scanPrompt scans a global prompt statement.
493+
// scanPrompt scans a prompt statement.
460494
//
461495
// It assumes the '@prompt' has already been consumed.
462496
func scanPrompt(s *Scanner) stateFn {
@@ -472,7 +506,7 @@ func scanPrompt(s *Scanner) stateFn {
472506
s.emit(token.Text)
473507
}
474508

475-
return scanStart
509+
return s.popState()
476510
}
477511

478512
// scanText scans a continuous string of text.
@@ -500,8 +534,10 @@ func scanRequest(s *Scanner) stateFn {
500534
// next() already emits an error for this
501535
return nil
502536
case '#':
537+
s.pushState(scanRequest) // Come back here (not scanStart) when we're done
503538
return scanRequestHash
504539
case '/':
540+
s.pushState(scanRequest)
505541
return scanRequestSlash
506542
default:
507543
if isUpperAlpha(char) {
@@ -546,7 +582,7 @@ func scanRequestComment(s *Scanner) stateFn {
546582
s.takeUntil('\n', eof)
547583
s.emit(token.Comment)
548584

549-
return scanRequest
585+
return s.popState()
550586
}
551587

552588
// scanRequestVariable scans a request variable declaration.
@@ -563,7 +599,7 @@ func scanRequestVariable(s *Scanner) stateFn {
563599
s.skip(isLineSpace)
564600

565601
if kind == token.Prompt {
566-
return scanRequestPrompt
602+
return scanPrompt
567603
}
568604

569605
if s.take("=") {
@@ -579,25 +615,6 @@ func scanRequestVariable(s *Scanner) stateFn {
579615
return scanRequest
580616
}
581617

582-
// scanRequestPrompt scans a request prompt statement.
583-
//
584-
// It assumes the '@prompt' has already been consumed.
585-
func scanRequestPrompt(s *Scanner) stateFn {
586-
if isIdent(s.peek()) {
587-
s.takeWhile(isIdent)
588-
s.emit(token.Ident)
589-
}
590-
591-
s.skip(isLineSpace)
592-
593-
if isAlpha(s.peek()) {
594-
s.takeUntil('\n', eof)
595-
s.emit(token.Text)
596-
}
597-
598-
return scanRequest
599-
}
600-
601618
// scanMethod scans a HTTP method.
602619
func scanMethod(s *Scanner) stateFn {
603620
s.takeWhile(isUpperAlpha)

0 commit comments

Comments
 (0)