Skip to content

Commit 1c2518b

Browse files
committed
Implement SLR parse table generation
1 parent ea878e2 commit 1c2518b

File tree

6 files changed

+199
-187
lines changed

6 files changed

+199
-187
lines changed

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/CharacterClass.java

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
public class CharacterClass extends Symbol implements ICharacterClass {
1818

1919
private static final long serialVersionUID = 1619024888383357090L;
20+
public static final CharacterClass EMPTY = new CharacterClass(CharacterClassFactory.EMPTY_CHARACTER_CLASS);
21+
public static final CharacterClass FULL = new CharacterClass(CharacterClassFactory.FULL_RANGE);
2022
private ICharacterClass cc;
2123

2224
// private final BitSet bs;
@@ -98,10 +100,6 @@ public IStrategoTerm toStateAterm(ITermFactory tf) {
98100

99101
}
100102

101-
public static CharacterClass getFullCharacterClass() {
102-
return new CharacterClass(CharacterClassFactory.FULL_RANGE);
103-
}
104-
105103
public boolean isEmptyCC() {
106104
if(cc == null) {
107105
return true;
@@ -164,9 +162,7 @@ public static CharacterClass intersection(CharacterClass cc1, CharacterClass cc2
164162
}
165163

166164
public CharacterClass difference(CharacterClass cc2) {
167-
CharacterClass result = new CharacterClass(ParseTableIO.getCharacterClassFactory().difference(cc, cc2.cc));
168-
169-
return result;
165+
return new CharacterClass(ParseTableIO.getCharacterClassFactory().difference(cc, cc2.cc));
170166
}
171167

172168
@Deprecated public BitSet getBitSet() {

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/IProduction.java

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
import org.metaborg.sdf2table.deepconflicts.Context;
88
import org.spoofax.interpreter.terms.IStrategoTerm;
9-
import org.spoofax.interpreter.terms.ITermFactory;
109

1110
import com.google.common.collect.SetMultimap;
1211

@@ -18,19 +17,18 @@ public interface IProduction {
1817

1918
// To calculate Deep Priority Conflicts
2019
int leftRecursivePosition();
20+
2121
int rightRecursivePosition();
22+
2223
void calculateRecursion(NormGrammar grammar);
2324

2425
@Override int hashCode();
26+
2527
@Override boolean equals(Object obj);
2628

2729
IStrategoTerm toAterm(SetMultimap<IProduction, IAttribute> prod_attrs);
28-
IStrategoTerm toSDF3Aterm(SetMultimap<IProduction, IAttribute> prod_attrs,
29-
Map<Set<Context>, Integer> ctx_vals, Integer ctx_val);
30-
31-
// TODO: FIRST AND FOLLOW SETS OF PRODUCTIONS
32-
// void calculateDependencies(NormGrammar g);
33-
//
34-
// TableSet firstSet();
35-
// TableSet followSet();
30+
31+
IStrategoTerm toSDF3Aterm(SetMultimap<IProduction, IAttribute> prod_attrs, Map<Set<Context>, Integer> ctx_vals,
32+
Integer ctx_val);
33+
3634
}

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/Symbol.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ public abstract class Symbol implements Serializable {
1919
protected List<CharacterClass[]> followRestrictionsLookahead;
2020

2121
private boolean nullable = false;
22+
// TODO it would probably be cleaner if these fields are pure ICharacterClass'es instead.
23+
private CharacterClass first;
24+
private CharacterClass follow;
2225

2326
public abstract String name();
2427

@@ -30,6 +33,22 @@ public void setNullable(boolean nullable) {
3033
this.nullable = nullable;
3134
}
3235

36+
public CharacterClass getFirst() {
37+
return first;
38+
}
39+
40+
public void setFirst(CharacterClass first) {
41+
this.first = first;
42+
}
43+
44+
public CharacterClass getFollow() {
45+
return follow;
46+
}
47+
48+
public void setFollow(CharacterClass follow) {
49+
this.follow = follow;
50+
}
51+
3352
@Override public String toString() {
3453
return name();
3554
}
@@ -55,7 +74,7 @@ public void addFollowRestrictionsLookahead(List<CharacterClass[]> frlList) {
5574
if(currentFRL.length != 2) {
5675
continue;
5776
}
58-
77+
5978
CharacterClass intersection = CharacterClass.intersection(currentFRL[0], frl[0]);
6079
boolean equals = intersection.equals(currentFRL[0]);
6180
if(equals) {

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java

Lines changed: 156 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,22 @@
11
package org.metaborg.sdf2table.parsetable;
22

33
import java.io.Serializable;
4-
import java.util.List;
5-
import java.util.Map;
6-
import java.util.Queue;
7-
import java.util.Set;
4+
import java.util.*;
85

96
import org.metaborg.parsetable.IParseTable;
107
import org.metaborg.parsetable.IState;
11-
import org.metaborg.sdf2table.deepconflicts.Context;
12-
import org.metaborg.sdf2table.deepconflicts.ContextPosition;
13-
import org.metaborg.sdf2table.deepconflicts.ContextType;
14-
import org.metaborg.sdf2table.deepconflicts.ContextualProduction;
15-
import org.metaborg.sdf2table.deepconflicts.ContextualSymbol;
16-
import org.metaborg.sdf2table.deepconflicts.DeepConflictsAnalyzer;
17-
import org.metaborg.sdf2table.grammar.GeneralAttribute;
18-
import org.metaborg.sdf2table.grammar.IPriority;
19-
import org.metaborg.sdf2table.grammar.IProduction;
20-
import org.metaborg.sdf2table.grammar.NormGrammar;
21-
import org.metaborg.sdf2table.grammar.Priority;
22-
import org.metaborg.sdf2table.grammar.Symbol;
23-
24-
import com.google.common.collect.BiMap;
25-
import com.google.common.collect.HashBiMap;
26-
import com.google.common.collect.HashMultimap;
27-
import com.google.common.collect.Lists;
28-
import com.google.common.collect.Maps;
29-
import com.google.common.collect.Queues;
30-
import com.google.common.collect.SetMultimap;
31-
import com.google.common.collect.Sets;
8+
import org.metaborg.sdf2table.deepconflicts.*;
9+
import org.metaborg.sdf2table.grammar.*;
10+
11+
import com.google.common.collect.*;
3212

3313
public class ParseTable implements IParseTable, Serializable {
3414

3515
private static final long serialVersionUID = -1845408435423897026L;
3616

3717
public static final int FIRST_PRODUCTION_LABEL = 257;
3818
public static final int INITIAL_STATE_NUMBER = 0;
39-
public static final int VERSION_NUMBER = 6;
19+
public static final int VERSION_NUMBER = 7;
4020

4121
// private static final ILogger logger = LoggerUtils.logger(ParseTable.class);
4222
private NormGrammar grammar;
@@ -46,10 +26,6 @@ public class ParseTable implements IParseTable, Serializable {
4626

4727
private IProduction initialProduction;
4828

49-
// FIXME Currently generating an LR(0) table, compute first/follow sets to generate SLR(1)
50-
// create first/follow sets by calculating dependencies and using Tarjan's algorithm
51-
// see http://compilers.iecc.com/comparch/article/01-04-079
52-
5329
// deep priority conflict resolution is left to parse time
5430
private final boolean dataDependent;
5531

@@ -94,7 +70,7 @@ public ParseTable(NormGrammar grammar, boolean dynamic, boolean dataDependent, b
9470

9571
// calculate deep priority conflicts based on current priorities
9672
// and generate contextual productions
97-
if (solveDeepConflicts) {
73+
if(solveDeepConflicts) {
9874
final DeepConflictsAnalyzer analysis = DeepConflictsAnalyzer.fromParseTable(this);
9975
analysis.patchParseTable();
10076

@@ -103,6 +79,10 @@ public ParseTable(NormGrammar grammar, boolean dynamic, boolean dataDependent, b
10379

10480
createJSGLRParseTableProductions(productionLabels);
10581

82+
// calculate FIRST-set and FOLLOW-set
83+
calculateFirst();
84+
calculateFollow();
85+
10686
// create states if the table should not be generated dynamically
10787
initialProduction = grammar.getInitialProduction();
10888

@@ -143,6 +123,145 @@ private void calculateNullable() {
143123
} while(markedNullable);
144124
}
145125

126+
// Based on https://compilers.iecc.com/comparch/article/01-04-079
127+
private void calculateFirst() {
128+
SetMultimap<Symbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
129+
Set<Symbol> symbols = grammar.getSymbols();
130+
SetMultimap<Symbol, Symbol> containsTheFirstOf = HashMultimap.create();
131+
132+
for(Symbol s : symbols) {
133+
s.setFirst(CharacterClass.EMPTY);
134+
}
135+
136+
for(Symbol s : symbols) {
137+
// CharacterClass symbols are their own FIRST set.
138+
if(s instanceof CharacterClass) {
139+
s.setFirst((CharacterClass) s);
140+
continue;
141+
}
142+
143+
for(IProduction p : symbolProductionsMapping.get(s)) {
144+
// Direct contributions:
145+
// If p is of the shape A = A0 ... Ak a Am ... An where all symbols up to Ak are nullable
146+
for(Symbol rhs : p.rightHand()) {
147+
// Then, a is in FIRST(A).
148+
if(rhs instanceof CharacterClass) {
149+
s.setFirst((CharacterClass) rhs);
150+
assert !rhs.isNullable();
151+
break;
152+
}
153+
154+
// Indirect contributions: calculate contains-the-FIRSTs-of
155+
// If p is of the shape A = A0 ... Ak B Am ... An where all symbols up to Ak are nullable
156+
// Then, A contains-the-FIRSTs-of B
157+
containsTheFirstOf.put(s, rhs);
158+
159+
if(!rhs.isNullable())
160+
break;
161+
}
162+
}
163+
}
164+
165+
// Indirect contributions: Tarjan's algorithm for strongly connected components
166+
final int DONE = symbols.size();
167+
final Map<Symbol, Integer> low = new HashMap<>();
168+
final Stack<Symbol> stack = new Stack<>();
169+
for(Symbol v : symbols) {
170+
if(low.get(v) == null /* CLEAN */)
171+
traverseFirst(v, containsTheFirstOf, DONE, low, stack);
172+
}
173+
}
174+
175+
private void traverseFirst(Symbol v, SetMultimap<Symbol, Symbol> containsTheFirstOf, int DONE,
176+
Map<Symbol, Integer> low, Stack<Symbol> stack) {
177+
stack.push(v);
178+
int top1 = stack.size() - 1;
179+
low.put(v, top1);
180+
for(Symbol w : containsTheFirstOf.get(v)) {
181+
if(low.get(w) == null /* CLEAN */) {
182+
traverseFirst(w, containsTheFirstOf, DONE, low, stack);
183+
}
184+
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
185+
v.setFirst(CharacterClass.union(v.getFirst(), w.getFirst())); // union!
186+
if(low.get(w) < low.get(v))
187+
low.put(v, low.get(w));
188+
}
189+
if(low.get(v) == top1) // v is the root of this SCC
190+
while(stack.size() - 1 >= top1) {
191+
Symbol w = stack.pop();
192+
w.setFirst(v.getFirst()); // distribute!
193+
low.put(w, DONE);
194+
}
195+
}
196+
197+
// Based on https://compilers.iecc.com/comparch/article/01-04-079
198+
// and Modern Compiler Implementation in Java, Second Edition - Andrew Appel, 2004
199+
private void calculateFollow() {
200+
SetMultimap<Symbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
201+
Set<Symbol> symbols = grammar.getSymbols();
202+
SetMultimap<Symbol, Symbol> containsTheFirstOf = HashMultimap.create();
203+
SetMultimap<Symbol, Symbol> containsTheFollowOf = HashMultimap.create();
204+
205+
for(Symbol s : symbols) {
206+
s.setFollow(CharacterClass.EMPTY);
207+
}
208+
209+
for(Symbol s : symbols) {
210+
for(IProduction p : symbolProductionsMapping.get(s)) {
211+
List<Symbol> rightHand = p.rightHand();
212+
for(int i = 0, rightHandSize = rightHand.size(); i < rightHandSize; i++) {
213+
Symbol symbolI = rightHand.get(i);
214+
215+
// If p is of the shape A = A0 ... Ai Ak ... Am Aj ... An
216+
for(int j = i + 1; j < rightHandSize; j++) {
217+
// If Ak ... Am are all nullable, FOLLOW(Ai) contains FIRST(Aj)
218+
Symbol symbolJ = rightHand.get(j);
219+
containsTheFirstOf.put(symbolI, symbolJ);
220+
if(!symbolJ.isNullable())
221+
break;
222+
}
223+
224+
// If Ak ... An are all nullable, FOLLOW(Ai) contains FOLLOW(A)
225+
containsTheFollowOf.put(symbolI, s);
226+
}
227+
}
228+
}
229+
230+
// Indirect contributions: Tarjan's algorithm for strongly connected components
231+
final int DONE = symbols.size();
232+
final Map<Symbol, Integer> low = new HashMap<>();
233+
final Stack<Symbol> stack = new Stack<>();
234+
for(Symbol v : symbols) {
235+
if(low.get(v) == null /* CLEAN */)
236+
traverseFollow(v, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
237+
}
238+
}
239+
240+
private void traverseFollow(Symbol v, SetMultimap<Symbol, Symbol> containsTheFirstOf,
241+
SetMultimap<Symbol, Symbol> containsTheFollowOf, int DONE, Map<Symbol, Integer> low, Stack<Symbol> stack) {
242+
stack.push(v);
243+
int top1 = stack.size() - 1;
244+
low.put(v, top1);
245+
for(Symbol w : containsTheFirstOf.get(v)) {
246+
v.setFollow(CharacterClass.union(v.getFollow(), w.getFirst())); // union!
247+
}
248+
for(Symbol w : containsTheFollowOf.get(v)) {
249+
if(low.get(w) == null /* CLEAN */) {
250+
traverseFollow(w, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
251+
}
252+
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
253+
v.setFollow(CharacterClass.union(v.getFollow(), w.getFollow())); // union!
254+
if(low.get(w) < low.get(v))
255+
low.put(v, low.get(w));
256+
}
257+
if(low.get(v) == top1) // v is the root of this SCC
258+
while(stack.size() - 1 >= top1) {
259+
Symbol w = stack.pop();
260+
w.setFollow(v.getFollow()); // distribute!
261+
low.put(w, DONE);
262+
}
263+
}
264+
146265
private void calculateRecursion() {
147266
// direct and indirect left recursion :
148267
// depth first search, whenever finding a cycle, those symbols are left recursive with respect to each other
@@ -411,28 +530,6 @@ private boolean mutuallyRecursive(IPriority p) {
411530
|| grammar.getRightRecursiveSymbolsMapping().get(p.higher().leftHand()).contains(p.lower().leftHand());
412531
}
413532

414-
/*
415-
* TODO calculate first and follow sets private void calculateFirstFollow() { for(IProduction p :
416-
* getGrammar().prods.values()) { p.calculateDependencies(getGrammar()); }
417-
*
418-
* tarjanStack = new Stack<>(); first_components = Sets.newHashSet(); for(IProduction p :
419-
* getGrammar().prods.values()) { if(p.firstSet().index == -1) { stronglyConnectedTarjan(p.firstSet(),
420-
* first_components); } } }
421-
*
422-
*
423-
* private void stronglyConnectedTarjan(TableSet v, Set<Set<TableSet>> components) { // Set the depth index for v to
424-
* the smallest unused index v.index = index; v.low_link = index; index++; tarjanStack.push(v); v.onStack = true;
425-
*
426-
* for(TableSet d : v.depends_on) { if(d.index == -1) { // Successor w has not yet been visited; recurse on it
427-
* stronglyConnectedTarjan(d, components); v.add(d.value); d.low_link = Math.min(v.low_link, d.low_link); } else
428-
* if(d.onStack) { // Successor w is in stack S and hence in the current SCC v.low_link = Math.min(v.low_link,
429-
* d.index); } }
430-
*
431-
* TableSet t; // If v is a root node, pop the stack and generate an SCC if(v.low_link == v.index) { Set<TableSet>
432-
* component = Sets.newHashSet(); do { t = tarjanStack.pop(); t.onStack = false; t.add(v.value); component.add(t); }
433-
* while(t != v); components.add(component); } }
434-
*/
435-
436533
private void createLabels() {
437534
BiMap<IProduction, Integer> labels = HashBiMap.create();
438535

@@ -450,7 +547,7 @@ private void updateLabelsContextualProductions() {
450547
deriveContextualProductions();
451548

452549
for(IProduction p : grammar.getUniqueProductionMapping().values()) {
453-
if (grammar.getProdContextualProdMapping().containsKey(p)) {
550+
if(grammar.getProdContextualProdMapping().containsKey(p)) {
454551
labels.inverse().put(labels.get(p), grammar.getProdContextualProdMapping().get(p));
455552
}
456553
}
@@ -472,7 +569,7 @@ private void updateLabelsContextualProductions() {
472569
}
473570

474571
for(IProduction p : grammar.getUniqueProductionMapping().values()) {
475-
if (grammar.getProdContextualProdMapping().containsKey(p)) {
572+
if(grammar.getProdContextualProdMapping().containsKey(p)) {
476573
labels.inverse().put(labels.get(p), grammar.getProdContextualProdMapping().get(p));
477574
}
478575
}
@@ -520,8 +617,10 @@ private void deriveContextualProductions() {
520617
int labelP = productionLabels.get(p);
521618

522619
// generate new productions for deep contexts
523-
Context deepLeft_ctx = new Context(labelP, ContextType.DEEP, ContextPosition.LEFTMOST, false, leftmostContextsMapping, rightmostContextsMapping);
524-
Context deepRight_ctx = new Context(labelP, ContextType.DEEP, ContextPosition.RIGHTMOST, false, leftmostContextsMapping, rightmostContextsMapping);
620+
Context deepLeft_ctx = new Context(labelP, ContextType.DEEP, ContextPosition.LEFTMOST, false,
621+
leftmostContextsMapping, rightmostContextsMapping);
622+
Context deepRight_ctx = new Context(labelP, ContextType.DEEP, ContextPosition.RIGHTMOST, false,
623+
leftmostContextsMapping, rightmostContextsMapping);
525624
if(ctx_s.getContexts().contains(deepLeft_ctx) || ctx_s.getContexts().contains(deepRight_ctx)) {
526625
continue;
527626
}

0 commit comments

Comments
 (0)