Skip to content

Commit 43c6b46

Browse files
better comments
1 parent 25f155d commit 43c6b46

File tree

6 files changed

+17
-12
lines changed

6 files changed

+17
-12
lines changed

Validator/Derive/Enter.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def enter (x: Expr) (res: List IfExpr := []): List IfExpr :=
77
match x with
88
| Expr.emptyset => res
99
| Expr.epsilon => res
10-
| Expr.tree p y => (IfExpr.mk p y Expr.emptyset) :: res
10+
| Expr.tree pred childrenExpr => (IfExpr.mk pred childrenExpr Expr.emptyset) :: res
1111
| Expr.or y z => enter y (enter z res)
1212
| Expr.concat y z =>
1313
if Expr.nullable y

Validator/Learning/Cegex/Cegex.lean

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
inductive Cegex where
33
| emptyset
44
| epsilon
5+
-- NEW: matched operator
56
| matched (c: Char)
67
| char (c: Char)
78
| or (y z: Cegex)

Validator/Learning/Parser.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def deriveLeave [ValidateM m] (xs: List Expr) (cs: List Expr): m (List Expr) :=
2727
Leave.DeriveLeave.deriveLeave xs (List.map Expr.nullable cs)
2828

2929
def deriveValue [ValidateM m] (xs: List Expr): m (List Expr) := do
30-
deriveLeave xs (<- deriveEnter xs)
30+
deriveEnter xs >>= deriveLeave xs
3131

3232
-- TODO: Is it possible to have a Parser type that can be proved to be of the correct shape, and have not expection throwing
3333
-- for example: can you prove that your Parser will never return an Hint.leave after returning a Hint.field.

Validator/Learning/Regex/RegexCapture.lean

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ def neutralize (x: Regex): Regex :=
1212
match x with
1313
| Regex.emptyset => Regex.emptyset
1414
| Regex.epsilon => Regex.epsilon
15+
-- only char changes to emptyset
1516
| Regex.char _ => Regex.emptyset
1617
| Regex.or y z => Regex.smartOr (neutralize y) (neutralize z)
1718
| Regex.concat y z => Regex.concat (neutralize y) (neutralize z)

Validator/Learning/Tegex/Tegex.lean

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,21 @@ import Validator.Parser.ParseTree
55
inductive Tegex where
66
| emptyset
77
| epsilon
8+
-- Cegex.matched is extended to trees
89
| matched (tok: Token) (childExpr: Tegex)
910
| tree (tok: Token) (childExpr: Tegex)
1011
| or (y z: Tegex)
1112
| concat (y z: Tegex)
1213
| star (y: Tegex)
13-
-- group is the only new operator compared to a Tegex without capturing groups.
14+
-- group is a copy of Cegex.group without the captured string.
1415
| group (id: Nat) (x: Tegex)
1516
deriving DecidableEq, Ord, Repr, Hashable
1617

1718
def Tegex.nullable (x: Tegex): Bool :=
1819
match x with
1920
| Tegex.emptyset => false
2021
| Tegex.epsilon => true
22+
-- matched is technically the same as epsilon, except that it stores the matched token and childExpr.
2123
| Tegex.matched _ _ => true
2224
| Tegex.tree _ _ => false
2325
| Tegex.or y z => nullable y || nullable z

Validator/Learning/Tegex/TegexCapture.lean

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ import Validator.Learning.Tegex.Tegex
88

99
namespace TegexCapture
1010

11-
-- neutralize replaces all chars with emptyset.
12-
-- This way the expression will stay nullable and not change based on derivative input.
13-
-- This makes it possible to keep all the capture groups inside for later extraction.
11+
-- neutralize replaces all tree operators with emptyset.
12+
-- It is used when deriving concat.
13+
-- This way we do not lose capture information on nullable expressions.
1414
def neutralize (x: Tegex): Tegex :=
1515
match x with
1616
| Tegex.emptyset => Tegex.emptyset
@@ -26,6 +26,7 @@ partial def derive (x: Tegex) (tree: ParseTree): Tegex :=
2626
match x with
2727
| Tegex.emptyset => Tegex.emptyset
2828
| Tegex.epsilon => Tegex.emptyset
29+
-- remember matched is just epsilon, so has the same derivative.
2930
| Tegex.matched _ _ => Tegex.emptyset
3031
| Tegex.tree tok' childExpr =>
3132
match tree with
@@ -43,12 +44,11 @@ partial def derive (x: Tegex) (tree: ParseTree): Tegex :=
4344
then Tegex.smartOr
4445
(Tegex.smartConcat (derive y tree) z)
4546
-- A difference from the usual derive algorithm:
46-
-- Instead of (derive z tree), we write:
47+
-- To preserve the capture information in the nullable expression y,
48+
-- instead of (derive z tree), we write:
4749
(Tegex.smartConcat (neutralize y) (derive z tree))
4850
else Tegex.concat (derive y tree) z
4951
| Tegex.star y => Tegex.smartConcat (derive y tree) x
50-
-- group is the new operator compared to Expr.
51-
-- We store the input tree in the expression.
5252
| Tegex.group n y =>
5353
Tegex.group n (derive y tree)
5454

@@ -57,7 +57,7 @@ def extract (x: Tegex): List ParseTree :=
5757
-- should never be encountered, since emptyset is not nullable.
5858
| Tegex.emptyset => []
5959
| Tegex.epsilon => []
60-
-- should never be encountered, since char is not nullable.
60+
-- should never be encountered, since tree is not nullable.
6161
| Tegex.tree _ _ => []
6262
| Tegex.matched tok childExpr => [ParseTree.mk tok (extract childExpr)]
6363
| Tegex.or y z =>
@@ -70,7 +70,7 @@ def extract (x: Tegex): List ParseTree :=
7070
-- Groups under a star are ignored.
7171
-- Recursively extracting under the star causes empty captures to be reported, which we do not want under POSIX semantics.
7272
| Tegex.star _ => []
73-
-- ignore group, this group will be extracted later by extractGroups.
73+
-- Ignore group, this group will be extracted later by extractGroups.
7474
| Tegex.group _ y => extract y
7575

7676
def extractGroups (x: Tegex): List (Nat × List ParseTree) :=
@@ -80,6 +80,7 @@ def extractGroups (x: Tegex): List (Nat × List ParseTree) :=
8080
| Tegex.epsilon => []
8181
-- should never be encountered, since tree is not nullable.
8282
| Tegex.tree _ _ => []
83+
-- There may be groups in the childExpr that needs to be extracted.
8384
| Tegex.matched _ childExpr => extractGroups childExpr
8485
| Tegex.or y z =>
8586
-- Under POSIX semantics, we prefer matching the left alternative.
@@ -91,7 +92,7 @@ def extractGroups (x: Tegex): List (Nat × List ParseTree) :=
9192
-- Groups under a star are ignored.
9293
-- Recursively extracting under the star causes empty captures to be reported, which we do not want under POSIX semantics.
9394
| Tegex.star _ => []
94-
-- extract the string
95+
-- extract the forest for the single group id
9596
| Tegex.group id y => (id, extract y) :: extractGroups y
9697

9798
def captures (x: Tegex) (forest: List ParseTree): Option (List (Nat × List ParseTree)) :=

0 commit comments

Comments
 (0)