Skip to content

Commit 4b4f5a1

Browse files
committed
Expand require_semicolon_stmt_delimiter parser option & tests
- a corresponding `supports_statements_without_semicolon_delimiter` Dialect trait function - this is optional for SQL Server, so it's set to `true` for that dialect - for the implementation, `RETURN` parsing needs to be tightened up to avoid ambiguity & tests that formerly asserted "end of statement" now maybe need to assert "an SQL statement" - a new `assert_err_parse_statements` splits the dialects based on semicolon requirements & asserts the expected error message accordingly
1 parent 4921846 commit 4b4f5a1

File tree

7 files changed

+646
-136
lines changed

7 files changed

+646
-136
lines changed

src/dialect/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,11 @@ pub trait Dialect: Debug + Any {
11361136
fn supports_notnull_operator(&self) -> bool {
11371137
false
11381138
}
1139+
1140+
/// Returns true if the dialect supports parsing statements without a semicolon delimiter.
1141+
fn supports_statements_without_semicolon_delimiter(&self) -> bool {
1142+
false
1143+
}
11391144
}
11401145

11411146
/// This represents the operators for which precedence must be defined

src/dialect/mssql.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ impl Dialect for MsSqlDialect {
6767
}
6868

6969
fn supports_connect_by(&self) -> bool {
70-
true
70+
false
7171
}
7272

7373
fn supports_eq_alias_assignment(&self) -> bool {
@@ -123,6 +123,10 @@ impl Dialect for MsSqlDialect {
123123
true
124124
}
125125

126+
fn supports_statements_without_semicolon_delimiter(&self) -> bool {
127+
true
128+
}
129+
126130
/// See <https://learn.microsoft.com/en-us/sql/relational-databases/security/authentication-access/server-level-roles>
127131
fn get_reserved_grantees_types(&self) -> &[GranteesType] {
128132
&[GranteesType::Public]
@@ -280,6 +284,9 @@ impl MsSqlDialect {
280284
) -> Result<Vec<Statement>, ParserError> {
281285
let mut stmts = Vec::new();
282286
loop {
287+
while let Token::SemiColon = parser.peek_token_ref().token {
288+
parser.advance_token();
289+
}
283290
if let Token::EOF = parser.peek_token_ref().token {
284291
break;
285292
}

src/keywords.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,6 +1072,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
10721072
Keyword::ANTI,
10731073
Keyword::SEMI,
10741074
Keyword::RETURNING,
1075+
Keyword::RETURN,
10751076
Keyword::ASOF,
10761077
Keyword::MATCH_CONDITION,
10771078
// for MSSQL-specific OUTER APPLY (seems reserved in most dialects)
@@ -1126,6 +1127,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
11261127
Keyword::CLUSTER,
11271128
Keyword::DISTRIBUTE,
11281129
Keyword::RETURNING,
1130+
Keyword::RETURN,
11291131
// Reserved only as a column alias in the `SELECT` clause
11301132
Keyword::FROM,
11311133
Keyword::INTO,
@@ -1140,6 +1142,7 @@ pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[
11401142
Keyword::LIMIT,
11411143
Keyword::HAVING,
11421144
Keyword::WHERE,
1145+
Keyword::RETURN,
11431146
];
11441147

11451148
/// Global list of reserved keywords that cannot be parsed as identifiers
@@ -1150,4 +1153,5 @@ pub const RESERVED_FOR_IDENTIFIER: &[Keyword] = &[
11501153
Keyword::INTERVAL,
11511154
Keyword::STRUCT,
11521155
Keyword::TRIM,
1156+
Keyword::RETURN,
11531157
];

src/parser/mod.rs

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,22 @@ impl ParserOptions {
266266
self.unescape = unescape;
267267
self
268268
}
269+
270+
/// Set if semicolon statement delimiters are required.
271+
///
272+
/// If this option is `true`, the following SQL will not parse. If the option is `false`, the SQL will parse.
273+
///
274+
/// ```sql
275+
/// SELECT 1
276+
/// SELECT 2
277+
/// ```
278+
pub fn with_require_semicolon_stmt_delimiter(
279+
mut self,
280+
require_semicolon_stmt_delimiter: bool,
281+
) -> Self {
282+
self.require_semicolon_stmt_delimiter = require_semicolon_stmt_delimiter;
283+
self
284+
}
269285
}
270286

271287
#[derive(Copy, Clone)]
@@ -362,7 +378,11 @@ impl<'a> Parser<'a> {
362378
state: ParserState::Normal,
363379
dialect,
364380
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
365-
options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
381+
options: ParserOptions::new()
382+
.with_trailing_commas(dialect.supports_trailing_commas())
383+
.with_require_semicolon_stmt_delimiter(
384+
!dialect.supports_statements_without_semicolon_delimiter(),
385+
),
366386
}
367387
}
368388

@@ -485,10 +505,10 @@ impl<'a> Parser<'a> {
485505
match self.peek_token().token {
486506
Token::EOF => break,
487507

488-
// end of statement
489-
Token::Word(word) => {
490-
if expecting_statement_delimiter && word.keyword == Keyword::END {
491-
break;
508+
// don't expect a semicolon statement delimiter after a newline when not otherwise required
509+
Token::Whitespace(Whitespace::Newline) => {
510+
if !self.options.require_semicolon_stmt_delimiter {
511+
expecting_statement_delimiter = false;
492512
}
493513
}
494514
_ => {}
@@ -500,7 +520,7 @@ impl<'a> Parser<'a> {
500520

501521
let statement = self.parse_statement()?;
502522
stmts.push(statement);
503-
expecting_statement_delimiter = true;
523+
expecting_statement_delimiter = self.options.require_semicolon_stmt_delimiter;
504524
}
505525
Ok(stmts)
506526
}
@@ -4558,6 +4578,9 @@ impl<'a> Parser<'a> {
45584578
) -> Result<Vec<Statement>, ParserError> {
45594579
let mut values = vec![];
45604580
loop {
4581+
// ignore empty statements (between successive statement delimiters)
4582+
while self.consume_token(&Token::SemiColon) {}
4583+
45614584
match &self.peek_nth_token_ref(0).token {
45624585
Token::EOF => break,
45634586
Token::Word(w) => {
@@ -4569,7 +4592,13 @@ impl<'a> Parser<'a> {
45694592
}
45704593

45714594
values.push(self.parse_statement()?);
4572-
self.expect_token(&Token::SemiColon)?;
4595+
4596+
if self.options.require_semicolon_stmt_delimiter {
4597+
self.expect_token(&Token::SemiColon)?;
4598+
}
4599+
4600+
// ignore empty statements (between successive statement delimiters)
4601+
while self.consume_token(&Token::SemiColon) {}
45734602
}
45744603
Ok(values)
45754604
}
@@ -16464,7 +16493,28 @@ impl<'a> Parser<'a> {
1646416493

1646516494
/// Parse [Statement::Return]
1646616495
fn parse_return(&mut self) -> Result<Statement, ParserError> {
16467-
match self.maybe_parse(|p| p.parse_expr())? {
16496+
let rs = self.maybe_parse(|p| {
16497+
let expr = p.parse_expr()?;
16498+
16499+
match &expr {
16500+
Expr::Value(_)
16501+
| Expr::Function(_)
16502+
| Expr::UnaryOp { .. }
16503+
| Expr::BinaryOp { .. }
16504+
| Expr::Case { .. }
16505+
| Expr::Cast { .. }
16506+
| Expr::Convert { .. }
16507+
| Expr::Subquery(_) => Ok(expr),
16508+
// todo: how to retstrict to variables?
16509+
Expr::Identifier(id) if id.value.starts_with('@') => Ok(expr),
16510+
_ => parser_err!(
16511+
"Non-returnable expression found following RETURN",
16512+
p.peek_token().span.start
16513+
),
16514+
}
16515+
})?;
16516+
16517+
match rs {
1646816518
Some(expr) => Ok(Statement::Return(ReturnStatement {
1646916519
value: Some(ReturnStatementValue::Expr(expr)),
1647016520
})),

src/test_utils.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#[cfg(not(feature = "std"))]
2626
use alloc::{
2727
boxed::Box,
28+
format,
2829
string::{String, ToString},
2930
vec,
3031
vec::Vec,
@@ -186,6 +187,37 @@ impl TestedDialects {
186187
statements
187188
}
188189

190+
/// The same as [`statements_parse_to`] but it will strip semicolons from the SQL text.
191+
pub fn statements_without_semicolons_parse_to(
192+
&self,
193+
sql: &str,
194+
canonical: &str,
195+
) -> Vec<Statement> {
196+
let sql_without_semicolons = sql
197+
.replace("; ", " ")
198+
.replace(" ;", " ")
199+
.replace(";\n", "\n")
200+
.replace("\n;", "\n")
201+
.replace(";", " ");
202+
let statements = self
203+
.parse_sql_statements(&sql_without_semicolons)
204+
.expect(&sql_without_semicolons);
205+
if !canonical.is_empty() && sql != canonical {
206+
assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements);
207+
} else {
208+
assert_eq!(
209+
sql,
210+
statements
211+
.iter()
212+
// note: account for format_statement_list manually inserted semicolons
213+
.map(|s| s.to_string().trim_end_matches(";").to_string())
214+
.collect::<Vec<_>>()
215+
.join("; ")
216+
);
217+
}
218+
statements
219+
}
220+
189221
/// Ensures that `sql` parses as an [`Expr`], and that
190222
/// re-serializing the parse result produces canonical
191223
pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr {
@@ -318,6 +350,43 @@ where
318350
all_dialects_where(|d| !except(d))
319351
}
320352

353+
/// Returns all dialects that don't support statements without semicolon delimiters.
354+
/// (i.e. dialects that require semicolon delimiters.)
355+
pub fn all_dialects_requiring_semicolon_statement_delimiter() -> TestedDialects {
356+
let tested_dialects =
357+
all_dialects_except(|d| d.supports_statements_without_semicolon_delimiter());
358+
assert_ne!(tested_dialects.dialects.len(), 0);
359+
tested_dialects
360+
}
361+
362+
/// Returns all dialects that do support statements without semicolon delimiters.
363+
/// (i.e. dialects not requiring semicolon delimiters.)
364+
pub fn all_dialects_not_requiring_semicolon_statement_delimiter() -> TestedDialects {
365+
let tested_dialects =
366+
all_dialects_where(|d| d.supports_statements_without_semicolon_delimiter());
367+
assert_ne!(tested_dialects.dialects.len(), 0);
368+
tested_dialects
369+
}
370+
371+
/// Asserts an error for `parse_sql_statements`:
372+
/// - "end of statement" for dialects that require semicolon delimiters
373+
/// - "an SQL statement" for dialects that don't require semicolon delimiters.
374+
pub fn assert_err_parse_statements(sql: &str, found: &str) {
375+
assert_eq!(
376+
ParserError::ParserError(format!("Expected: end of statement, found: {found}")),
377+
all_dialects_requiring_semicolon_statement_delimiter()
378+
.parse_sql_statements(sql)
379+
.unwrap_err()
380+
);
381+
382+
assert_eq!(
383+
ParserError::ParserError(format!("Expected: an SQL statement, found: {found}")),
384+
all_dialects_not_requiring_semicolon_statement_delimiter()
385+
.parse_sql_statements(sql)
386+
.unwrap_err()
387+
);
388+
}
389+
321390
pub fn assert_eq_vec<T: ToString>(expected: &[&str], actual: &[T]) {
322391
assert_eq!(
323392
expected,

0 commit comments

Comments
 (0)