Skip to content

Commit 421845e

Browse files
committed
feat: Implement a SQL front-end (tokenizer and pratt-based parser)
1 parent 2ef9dbd commit 421845e

21 files changed

+1370
-0
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package co.clflushopt.glint.sql;
2+
3+
import java.util.HashMap;
4+
import java.util.Map;
5+
6+
/**
7+
* SQL keywords.
8+
*
9+
*/
10+
public enum Keyword implements TokenType {
11+
SCHEMA, DATABASE, TABLE, COLUMN, VIEW, INDEX, TRIGGER, PROCEDURE, TABLESPACE, FUNCTION,
12+
SEQUENCE, CURSOR, FROM, TO, OF, IF, ON, FOR, WHILE, DO, NO, BY, WITH, WITHOUT, TRUE, FALSE,
13+
TEMPORARY, TEMP, COMMENT, CREATE, REPLACE, BEFORE, AFTER, INSTEAD, EACH, ROW, STATEMENT,
14+
EXECUTE, BITMAP, NOSORT, REVERSE, COMPILE, ALTER, ADD, MODIFY, RENAME, ENABLE, DISABLE,
15+
VALIDATE, USER, IDENTIFIED, TRUNCATE, DROP, CASCADE, INSERT, INTO, VALUES, UPDATE, SET, DELETE,
16+
SELECT, DISTINCT, AS, CASE, WHEN, ELSE, THEN, END, LEFT, RIGHT, FULL, INNER, OUTER, CROSS, JOIN,
17+
USE, USING, NATURAL, WHERE, ORDER, ASC, DESC, GROUP, HAVING, UNION, DECLARE, GRANT, FETCH,
18+
REVOKE, CLOSE, CAST, NEW, ESCAPE, LOCK, SOME, LEAVE, ITERATE, REPEAT, UNTIL, OPEN, OUT, INOUT,
19+
OVER, ADVISE, SIBLINGS, LOOP, EXPLAIN, DEFAULT, EXCEPT, INTERSECT, MINUS, PASSWORD, LOCAL,
20+
GLOBAL, STORAGE, DATA, COALESCE, CHAR, CHARACTER, VARYING, VARCHAR, VARCHAR2, INTEGER, INT,
21+
SMALLINT, DECIMAL, DEC, NUMERIC, FLOAT, REAL, DOUBLE, PRECISION, DATE, TIME, INTERVAL, BOOLEAN,
22+
BLOB, AND, OR, XOR, IS, NOT, NULL, IN, BETWEEN, LIKE, ANY, ALL, EXISTS, AVG, MAX, MIN, SUM,
23+
COUNT, GREATEST, LEAST, ROUND, TRUNC, POSITION, EXTRACT, LENGTH, CHAR_LENGTH, SUBSTRING, SUBSTR,
24+
INSTR, INITCAP, UPPER, LOWER, TRIM, LTRIM, RTRIM, BOTH, LEADING, TRAILING, TRANSLATE, CONVERT,
25+
LPAD, RPAD, DECODE, NVL, CONSTRAINT, UNIQUE, PRIMARY, FOREIGN, KEY, CHECK, REFERENCES;
26+
27+
/**
28+
* Keyword lookups.
29+
*
30+
*/
31+
private static final Map<String, Keyword> KEYWORDS;
32+
33+
static {
34+
KEYWORDS = new HashMap<>();
35+
for (Keyword keyword : values()) {
36+
KEYWORDS.put(keyword.name(), keyword);
37+
}
38+
}
39+
40+
public static Keyword textOf(String text) {
41+
return KEYWORDS.get(text.toUpperCase());
42+
}
43+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package co.clflushopt.glint.sql;
2+
3+
/**
4+
* Literals for (long, double, string datatypes).
5+
*
6+
*/
7+
public enum Literal implements TokenType {
8+
LONG, DOUBLE, STRING, IDENTIFIER;
9+
10+
/**
11+
* Check if it's a digit or a decimal
12+
*
13+
* @param ch
14+
* @return
15+
*/
16+
public static boolean isNumber(char ch) {
17+
return Character.isDigit(ch) || '.' == ch;
18+
}
19+
20+
/**
21+
* Check if it's an identifier.
22+
*
23+
* @param ch
24+
* @return
25+
*/
26+
public static boolean isIdentifier(char ch) {
27+
return Character.isLetter(ch);
28+
}
29+
30+
/**
31+
* Check if we are within a keyword space.
32+
*
33+
* @param ch
34+
* @return
35+
*/
36+
public static boolean isIdentifierPart(char ch) {
37+
return Character.isLetter(ch) || Character.isDigit(ch) || ch == '_';
38+
}
39+
40+
/**
41+
* Check if we are within a literal space.
42+
*
43+
* @param ch
44+
* @return
45+
*/
46+
public static boolean isCharsStart(char ch) {
47+
return '\'' == ch || '"' == ch;
48+
}
49+
}
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
package co.clflushopt.glint.sql;
2+
3+
import java.util.ArrayList;
4+
import java.util.Arrays;
5+
import java.util.Collections;
6+
import java.util.List;
7+
import java.util.logging.Logger;
8+
9+
public class Parser {
10+
private static final Logger logger = Logger.getLogger(Parser.class.getSimpleName());
11+
12+
private final TokenStream tokens;
13+
14+
public Parser(TokenStream tokens) {
15+
this.tokens = tokens;
16+
}
17+
18+
public int nextPrecedence() {
19+
Token token = tokens.peek();
20+
if (token == null) {
21+
return 0;
22+
}
23+
24+
int precedence;
25+
TokenType type = token.getType();
26+
27+
if (type == Keyword.AS || type == Keyword.ASC || type == Keyword.DESC) {
28+
precedence = 10;
29+
} else if (type == Keyword.OR) {
30+
precedence = 20;
31+
} else if (type == Keyword.AND) {
32+
precedence = 30;
33+
} else if (type == Symbol.LT || type == Symbol.LT_EQ || type == Symbol.EQ
34+
|| type == Symbol.BANG_EQ || type == Symbol.GT_EQ || type == Symbol.GT) {
35+
precedence = 40;
36+
} else if (type == Symbol.PLUS || type == Symbol.SUB) {
37+
precedence = 50;
38+
} else if (type == Symbol.STAR || type == Symbol.SLASH) {
39+
precedence = 60;
40+
} else if (type == Symbol.LEFT_PAREN) {
41+
precedence = 70;
42+
} else {
43+
precedence = 0;
44+
}
45+
46+
logger.fine("nextPrecedence(" + token + ") returning " + precedence);
47+
return precedence;
48+
}
49+
50+
public SqlExpression parsePrefix() {
51+
logger.fine("parsePrefix() next token = " + tokens.peek());
52+
Token token = tokens.next();
53+
if (token == null) {
54+
return null;
55+
}
56+
57+
SqlExpression expr;
58+
TokenType type = token.getType();
59+
60+
if (type == Keyword.SELECT) {
61+
expr = parseSelect();
62+
} else if (type == Keyword.CAST) {
63+
expr = parseCast();
64+
} else if (type == Keyword.MAX) {
65+
expr = new SqlIdentifier(token.getText());
66+
} else if (type == Keyword.INT || type == Keyword.DOUBLE) {
67+
expr = new SqlIdentifier(token.getText());
68+
} else if (type == Literal.IDENTIFIER) {
69+
expr = new SqlIdentifier(token.getText());
70+
} else if (type == Literal.STRING) {
71+
expr = new SqlString(token.getText());
72+
} else if (type == Literal.LONG) {
73+
expr = new SqlLong(Long.parseLong(token.getText()));
74+
} else if (type == Literal.DOUBLE) {
75+
expr = new SqlDouble(Double.parseDouble(token.getText()));
76+
} else {
77+
throw new IllegalStateException("Unexpected token " + token);
78+
}
79+
80+
logger.fine("parsePrefix() returning " + expr);
81+
return expr;
82+
}
83+
84+
public SqlExpression parseInfix(SqlExpression left, int precedence) {
85+
logger.fine("parseInfix() next token = " + tokens.peek());
86+
Token token = tokens.peek();
87+
if (token == null) {
88+
throw new IllegalStateException("Unexpected end of input");
89+
}
90+
91+
SqlExpression expr;
92+
TokenType type = token.getType();
93+
94+
if (type == Symbol.PLUS || type == Symbol.SUB || type == Symbol.STAR || type == Symbol.SLASH
95+
|| type == Symbol.EQ || type == Symbol.GT || type == Symbol.LT) {
96+
tokens.next(); // consume the token
97+
expr = new SqlBinaryExpression(left, token.getText(), parse(precedence));
98+
} else if (type == Keyword.AS) {
99+
tokens.next(); // consume the token
100+
expr = new SqlAlias(left, parseIdentifier());
101+
} else if (type == Keyword.AND || type == Keyword.OR) {
102+
tokens.next(); // consume the token
103+
expr = new SqlBinaryExpression(left, token.getText(), parse(precedence));
104+
} else if (type == Keyword.ASC || type == Keyword.DESC) {
105+
tokens.next();
106+
expr = new SqlSort(left, type == Keyword.ASC);
107+
} else if (type == Symbol.LEFT_PAREN) {
108+
if (left instanceof SqlIdentifier) {
109+
tokens.next(); // consume the token
110+
List<SqlExpression> args = parseExprList();
111+
Token next = tokens.next();
112+
if (next == null || next.getType() != Symbol.RIGHT_PAREN) {
113+
throw new IllegalStateException("Expected right parenthesis");
114+
}
115+
expr = new SqlFunction(((SqlIdentifier) left).getId(), args);
116+
} else {
117+
throw new IllegalStateException("Unexpected LPAREN");
118+
}
119+
} else {
120+
throw new IllegalStateException("Unexpected infix token " + token);
121+
}
122+
123+
logger.fine("parseInfix() returning " + expr);
124+
return expr;
125+
}
126+
127+
private List<SqlExpression> parseOrder() {
128+
List<SqlExpression> sortList = new ArrayList<>();
129+
SqlExpression sort = parseExpr();
130+
131+
while (sort != null) {
132+
if (sort instanceof SqlIdentifier) {
133+
sort = new SqlSort(sort, true);
134+
} else if (!(sort instanceof SqlSort)) {
135+
throw new IllegalStateException(
136+
"Unexpected expression " + sort + " after order by.");
137+
}
138+
139+
sortList.add((SqlSort) sort);
140+
141+
if (tokens.peek() != null && tokens.peek().getType() == Symbol.COMMA) {
142+
tokens.next();
143+
} else {
144+
break;
145+
}
146+
sort = parseExpr();
147+
}
148+
return sortList;
149+
}
150+
151+
private SqlCast parseCast() {
152+
if (!tokens.consumeTokenType(Symbol.LEFT_PAREN)) {
153+
throw new IllegalStateException("Expected left parenthesis");
154+
}
155+
156+
SqlExpression expr = parseExpr();
157+
if (expr == null) {
158+
throw new RuntimeException("Expected expression in CAST");
159+
}
160+
161+
if (!(expr instanceof SqlAlias)) {
162+
throw new IllegalStateException("Expected AS in CAST");
163+
}
164+
165+
SqlAlias alias = (SqlAlias) expr;
166+
167+
if (!tokens.consumeTokenType(Symbol.RIGHT_PAREN)) {
168+
throw new IllegalStateException("Expected right parenthesis");
169+
}
170+
171+
return new SqlCast(alias.getExpr(), alias.getAlias());
172+
}
173+
174+
private SqlSelect parseSelect() {
175+
List<SqlExpression> projection = parseExprList();
176+
177+
if (!tokens.consumeKeyword("FROM")) {
178+
throw new IllegalStateException("Expected FROM keyword, found " + tokens.peek());
179+
}
180+
181+
SqlExpression tableExpr = parseExpr();
182+
if (!(tableExpr instanceof SqlIdentifier)) {
183+
throw new IllegalStateException("Expected table name");
184+
}
185+
SqlIdentifier table = (SqlIdentifier) tableExpr;
186+
187+
// parse optional WHERE clause
188+
SqlExpression filterExpr = null;
189+
if (tokens.consumeKeyword("WHERE")) {
190+
filterExpr = parseExpr();
191+
}
192+
193+
// parse optional GROUP BY clause
194+
List<SqlExpression> groupBy = Collections.emptyList();
195+
if (tokens.consumeKeywords(Arrays.asList("GROUP", "BY"))) {
196+
groupBy = parseExprList();
197+
}
198+
199+
// parse optional HAVING clause
200+
SqlExpression havingExpr = null;
201+
if (tokens.consumeKeyword("HAVING")) {
202+
havingExpr = parseExpr();
203+
}
204+
205+
// parse optional ORDER BY clause
206+
List<SqlExpression> orderBy = Collections.emptyList();
207+
if (tokens.consumeKeywords(Arrays.asList("ORDER", "BY"))) {
208+
orderBy = parseOrder();
209+
}
210+
211+
return new SqlSelect(projection, filterExpr, groupBy, orderBy, havingExpr, table.getId());
212+
}
213+
214+
private List<SqlExpression> parseExprList() {
215+
logger.fine("parseExprList()");
216+
List<SqlExpression> list = new ArrayList<>();
217+
SqlExpression expr = parseExpr();
218+
219+
while (expr != null) {
220+
list.add(expr);
221+
222+
if (tokens.peek() != null && tokens.peek().getType() == Symbol.COMMA) {
223+
tokens.next();
224+
} else {
225+
break;
226+
}
227+
expr = parseExpr();
228+
}
229+
230+
logger.fine("parseExprList() returning " + list);
231+
return list;
232+
}
233+
234+
private SqlExpression parseExpr() {
235+
return parse(0);
236+
}
237+
238+
private SqlIdentifier parseIdentifier() {
239+
SqlExpression expr = parseExpr();
240+
if (expr == null) {
241+
throw new RuntimeException("Expected identifier, found EOF");
242+
}
243+
if (!(expr instanceof SqlIdentifier)) {
244+
throw new RuntimeException("Expected identifier, found " + expr);
245+
}
246+
return (SqlIdentifier) expr;
247+
}
248+
249+
protected SqlExpression parse(int precedence) {
250+
SqlExpression left = parsePrefix();
251+
if (left == null) {
252+
return null;
253+
}
254+
255+
while (precedence < nextPrecedence()) {
256+
left = parseInfix(left, precedence);
257+
}
258+
259+
return left;
260+
}
261+
}

0 commit comments

Comments
 (0)