From 4b4f5a1113cc860ee36a81a409466e7eceeaa68b Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Wed, 16 Jul 2025 14:45:51 -0400 Subject: [PATCH 1/8] Expand `require_semicolon_stmt_delimiter` parser option & tests - a corresponding `supports_statements_without_semicolon_delimiter` Dialect trait function - this is optional for SQL Server, so it's set to `true` for that dialect - for the implementation, `RETURN` parsing needs to be tightened up to avoid ambiguity & tests that formerly asserted "end of statement" now maybe need to assert "an SQL statement" - a new `assert_err_parse_statements` splits the dialects based on semicolon requirements & asserts the expected error message accordingly --- src/dialect/mod.rs | 5 + src/dialect/mssql.rs | 9 +- src/keywords.rs | 4 + src/parser/mod.rs | 66 ++++++- src/test_utils.rs | 69 ++++++++ tests/sqlparser_common.rs | 273 +++++++++++++++++------------ tests/sqlparser_mssql.rs | 356 ++++++++++++++++++++++++++++++++++++-- 7 files changed, 646 insertions(+), 136 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index c78b00033..56ae5a095 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1136,6 +1136,11 @@ pub trait Dialect: Debug + Any { fn supports_notnull_operator(&self) -> bool { false } + + /// Returns true if the dialect supports parsing statements without a semicolon delimiter. + fn supports_statements_without_semicolon_delimiter(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 36bd222b8..93a810bc3 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -67,7 +67,7 @@ impl Dialect for MsSqlDialect { } fn supports_connect_by(&self) -> bool { - true + false } fn supports_eq_alias_assignment(&self) -> bool { @@ -123,6 +123,10 @@ impl Dialect for MsSqlDialect { true } + fn supports_statements_without_semicolon_delimiter(&self) -> bool { + true + } + /// See fn get_reserved_grantees_types(&self) -> &[GranteesType] { &[GranteesType::Public] @@ -280,6 +284,9 @@ impl MsSqlDialect { ) -> Result, ParserError> { let mut stmts = Vec::new(); loop { + while let Token::SemiColon = parser.peek_token_ref().token { + parser.advance_token(); + } if let Token::EOF = parser.peek_token_ref().token { break; } diff --git a/src/keywords.rs b/src/keywords.rs index 7781939bc..7655d03a6 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -1072,6 +1072,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::ANTI, Keyword::SEMI, Keyword::RETURNING, + Keyword::RETURN, Keyword::ASOF, Keyword::MATCH_CONDITION, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) @@ -1126,6 +1127,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::CLUSTER, Keyword::DISTRIBUTE, Keyword::RETURNING, + Keyword::RETURN, // Reserved only as a column alias in the `SELECT` clause Keyword::FROM, Keyword::INTO, @@ -1140,6 +1142,7 @@ pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[ Keyword::LIMIT, Keyword::HAVING, Keyword::WHERE, + Keyword::RETURN, ]; /// Global list of reserved keywords that cannot be parsed as identifiers @@ -1150,4 +1153,5 @@ pub const RESERVED_FOR_IDENTIFIER: &[Keyword] = &[ Keyword::INTERVAL, Keyword::STRUCT, Keyword::TRIM, + Keyword::RETURN, ]; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8d5a55da0..df35c8677 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -266,6 +266,22 @@ impl ParserOptions { self.unescape = unescape; self } + + /// Set if semicolon statement delimiters are required. + /// + /// If this option is `true`, the following SQL will not parse. If the option is `false`, the SQL will parse. + /// + /// ```sql + /// SELECT 1 + /// SELECT 2 + /// ``` + pub fn with_require_semicolon_stmt_delimiter( + mut self, + require_semicolon_stmt_delimiter: bool, + ) -> Self { + self.require_semicolon_stmt_delimiter = require_semicolon_stmt_delimiter; + self + } } #[derive(Copy, Clone)] @@ -362,7 +378,11 @@ impl<'a> Parser<'a> { state: ParserState::Normal, dialect, recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH), - options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()), + options: ParserOptions::new() + .with_trailing_commas(dialect.supports_trailing_commas()) + .with_require_semicolon_stmt_delimiter( + !dialect.supports_statements_without_semicolon_delimiter(), + ), } } @@ -485,10 +505,10 @@ impl<'a> Parser<'a> { match self.peek_token().token { Token::EOF => break, - // end of statement - Token::Word(word) => { - if expecting_statement_delimiter && word.keyword == Keyword::END { - break; + // don't expect a semicolon statement delimiter after a newline when not otherwise required + Token::Whitespace(Whitespace::Newline) => { + if !self.options.require_semicolon_stmt_delimiter { + expecting_statement_delimiter = false; } } _ => {} @@ -500,7 +520,7 @@ impl<'a> Parser<'a> { let statement = self.parse_statement()?; stmts.push(statement); - expecting_statement_delimiter = true; + expecting_statement_delimiter = self.options.require_semicolon_stmt_delimiter; } Ok(stmts) } @@ -4558,6 +4578,9 @@ impl<'a> Parser<'a> { ) -> Result, ParserError> { let mut values = vec![]; loop { + // ignore empty statements (between successive statement delimiters) + while self.consume_token(&Token::SemiColon) {} + match &self.peek_nth_token_ref(0).token { Token::EOF => break, Token::Word(w) => { @@ -4569,7 +4592,13 @@ impl<'a> Parser<'a> { } values.push(self.parse_statement()?); - self.expect_token(&Token::SemiColon)?; + + if self.options.require_semicolon_stmt_delimiter { + self.expect_token(&Token::SemiColon)?; + } + + // ignore empty statements (between successive statement delimiters) + while self.consume_token(&Token::SemiColon) {} } Ok(values) } @@ -16464,7 +16493,28 @@ impl<'a> Parser<'a> { /// Parse [Statement::Return] fn parse_return(&mut self) -> Result { - match self.maybe_parse(|p| p.parse_expr())? { + let rs = self.maybe_parse(|p| { + let expr = p.parse_expr()?; + + match &expr { + Expr::Value(_) + | Expr::Function(_) + | Expr::UnaryOp { .. } + | Expr::BinaryOp { .. } + | Expr::Case { .. } + | Expr::Cast { .. } + | Expr::Convert { .. } + | Expr::Subquery(_) => Ok(expr), + // todo: how to retstrict to variables? + Expr::Identifier(id) if id.value.starts_with('@') => Ok(expr), + _ => parser_err!( + "Non-returnable expression found following RETURN", + p.peek_token().span.start + ), + } + })?; + + match rs { Some(expr) => Ok(Statement::Return(ReturnStatement { value: Some(ReturnStatementValue::Expr(expr)), })), diff --git a/src/test_utils.rs b/src/test_utils.rs index 654f2723e..393fed526 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -25,6 +25,7 @@ #[cfg(not(feature = "std"))] use alloc::{ boxed::Box, + format, string::{String, ToString}, vec, vec::Vec, @@ -186,6 +187,37 @@ impl TestedDialects { statements } + /// The same as [`statements_parse_to`] but it will strip semicolons from the SQL text. + pub fn statements_without_semicolons_parse_to( + &self, + sql: &str, + canonical: &str, + ) -> Vec { + let sql_without_semicolons = sql + .replace("; ", " ") + .replace(" ;", " ") + .replace(";\n", "\n") + .replace("\n;", "\n") + .replace(";", " "); + let statements = self + .parse_sql_statements(&sql_without_semicolons) + .expect(&sql_without_semicolons); + if !canonical.is_empty() && sql != canonical { + assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements); + } else { + assert_eq!( + sql, + statements + .iter() + // note: account for format_statement_list manually inserted semicolons + .map(|s| s.to_string().trim_end_matches(";").to_string()) + .collect::>() + .join("; ") + ); + } + statements + } + /// Ensures that `sql` parses as an [`Expr`], and that /// re-serializing the parse result produces canonical pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr { @@ -318,6 +350,43 @@ where all_dialects_where(|d| !except(d)) } +/// Returns all dialects that don't support statements without semicolon delimiters. +/// (i.e. dialects that require semicolon delimiters.) +pub fn all_dialects_requiring_semicolon_statement_delimiter() -> TestedDialects { + let tested_dialects = + all_dialects_except(|d| d.supports_statements_without_semicolon_delimiter()); + assert_ne!(tested_dialects.dialects.len(), 0); + tested_dialects +} + +/// Returns all dialects that do support statements without semicolon delimiters. +/// (i.e. dialects not requiring semicolon delimiters.) +pub fn all_dialects_not_requiring_semicolon_statement_delimiter() -> TestedDialects { + let tested_dialects = + all_dialects_where(|d| d.supports_statements_without_semicolon_delimiter()); + assert_ne!(tested_dialects.dialects.len(), 0); + tested_dialects +} + +/// Asserts an error for `parse_sql_statements`: +/// - "end of statement" for dialects that require semicolon delimiters +/// - "an SQL statement" for dialects that don't require semicolon delimiters. +pub fn assert_err_parse_statements(sql: &str, found: &str) { + assert_eq!( + ParserError::ParserError(format!("Expected: end of statement, found: {found}")), + all_dialects_requiring_semicolon_statement_delimiter() + .parse_sql_statements(sql) + .unwrap_err() + ); + + assert_eq!( + ParserError::ParserError(format!("Expected: an SQL statement, found: {found}")), + all_dialects_not_requiring_semicolon_statement_delimiter() + .parse_sql_statements(sql) + .unwrap_err() + ); +} + pub fn assert_eq_vec(expected: &[&str], actual: &[T]) { assert_eq!( expected, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5d8284a46..009b6abd8 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -55,6 +55,9 @@ use sqlparser::ast::DateTimeField::Seconds; use sqlparser::ast::Expr::{Identifier, UnaryOp}; use sqlparser::ast::Value::Number; use sqlparser::test_utils::all_dialects_except; +use sqlparser::test_utils::all_dialects_not_requiring_semicolon_statement_delimiter; +use sqlparser::test_utils::all_dialects_requiring_semicolon_statement_delimiter; +use sqlparser::test_utils::assert_err_parse_statements; #[test] fn parse_numeric_literal_underscore() { @@ -272,20 +275,39 @@ fn parse_insert_default_values() { "INSERT INTO test_table DEFAULT VALUES (some_column)"; assert_eq!( ParserError::ParserError("Expected: end of statement, found: (".to_string()), - parse_sql_statements(insert_with_default_values_and_hive_after_columns).unwrap_err() + all_dialects_requiring_semicolon_statement_delimiter() + .parse_sql_statements(insert_with_default_values_and_hive_after_columns) + .unwrap_err() ); - - let insert_with_default_values_and_hive_partition = - "INSERT INTO test_table DEFAULT VALUES PARTITION (some_column)"; assert_eq!( - ParserError::ParserError("Expected: end of statement, found: PARTITION".to_string()), - parse_sql_statements(insert_with_default_values_and_hive_partition).unwrap_err() + ParserError::ParserError( + "Expected: SELECT, VALUES, or a subquery in the query body, found: some_column" + .to_string() + ), + all_dialects_not_requiring_semicolon_statement_delimiter() + .parse_sql_statements(insert_with_default_values_and_hive_after_columns) + .unwrap_err() + ); + + assert_err_parse_statements( + "INSERT INTO test_table DEFAULT VALUES PARTITION (some_column)", + "PARTITION", ); let insert_with_default_values_and_values_list = "INSERT INTO test_table DEFAULT VALUES (1)"; assert_eq!( ParserError::ParserError("Expected: end of statement, found: (".to_string()), - parse_sql_statements(insert_with_default_values_and_values_list).unwrap_err() + all_dialects_requiring_semicolon_statement_delimiter() + .parse_sql_statements(insert_with_default_values_and_values_list) + .unwrap_err() + ); + assert_eq!( + ParserError::ParserError( + "Expected: SELECT, VALUES, or a subquery in the query body, found: 1".to_string() + ), + all_dialects_not_requiring_semicolon_statement_delimiter() + .parse_sql_statements(insert_with_default_values_and_values_list) + .unwrap_err() ); } @@ -414,11 +436,7 @@ fn parse_update() { ); let sql = "UPDATE t SET a = 1 extrabadstuff"; - let res = parse_sql_statements(sql); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: extrabadstuff".to_string()), - res.unwrap_err() - ); + assert_err_parse_statements(sql, "extrabadstuff"); } #[test] @@ -916,9 +934,7 @@ fn parse_limit() { #[test] fn parse_invalid_limit_by() { - all_dialects() - .parse_sql_statements("SELECT * FROM user BY name") - .expect_err("BY without LIMIT"); + assert_err_parse_statements("SELECT * FROM user BY name", "name"); } #[test] @@ -1088,11 +1104,7 @@ fn parse_select_into() { // Do not allow aliases here let sql = "SELECT * INTO table0 asdf FROM table1"; - let result = parse_sql_statements(sql); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: asdf".to_string()), - result.unwrap_err() - ) + assert_err_parse_statements(sql, "asdf"); } #[test] @@ -1128,11 +1140,7 @@ fn parse_select_wildcard() { ); let sql = "SELECT * + * FROM foo;"; - let result = parse_sql_statements(sql); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: +".to_string()), - result.unwrap_err(), - ); + assert_err_parse_statements(sql, "+"); } #[test] @@ -1342,11 +1350,7 @@ fn parse_not() { #[test] fn parse_invalid_infix_not() { - let res = parse_sql_statements("SELECT c FROM t WHERE c NOT ("); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: NOT".to_string()), - res.unwrap_err(), - ); + assert_err_parse_statements("SELECT c FROM t WHERE c NOT (", "NOT"); } #[test] @@ -4774,10 +4778,7 @@ fn parse_rename_table() { _ => unreachable!(), }; - assert_eq!( - parse_sql_statements("RENAME TABLE old_table TO new_table a").unwrap_err(), - ParserError::ParserError("Expected: end of statement, found: a".to_string()) - ); + assert_err_parse_statements("RENAME TABLE old_table TO new_table a", "a"); assert_eq!( parse_sql_statements("RENAME TABLE1 old_table TO new_table a").unwrap_err(), @@ -5084,13 +5085,22 @@ fn parse_alter_table_alter_column_type() { "{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'" )); - let dialects = all_dialects_except(|d| d.supports_alter_column_type_using()); - let res = dialects.parse_sql_statements(&format!( - "{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'" - )); + let sql = "{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'"; + let dialects = all_dialects_where(|d| { + !d.supports_alter_column_type_using() + && !d.supports_statements_without_semicolon_delimiter() + }); assert_eq!( - ParserError::ParserError("Expected: end of statement, found: USING".to_string()), - res.unwrap_err() + ParserError::ParserError("Expected: an SQL statement, found: {".to_string()), + dialects.parse_sql_statements(sql).unwrap_err() + ); + + let dialects = all_dialects_where(|d| { + !d.supports_alter_column_type_using() && d.supports_statements_without_semicolon_delimiter() + }); + assert_eq!( + ParserError::ParserError("Expected: an SQL statement, found: {".to_string()), + dialects.parse_sql_statements(sql).unwrap_err() ); } @@ -5118,11 +5128,7 @@ fn parse_alter_table_drop_constraint() { } } - let res = parse_sql_statements("ALTER TABLE tab DROP CONSTRAINT is_active TEXT"); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: TEXT".to_string()), - res.unwrap_err() - ); + assert_err_parse_statements("ALTER TABLE tab DROP CONSTRAINT is_active TEXT", "TEXT"); } #[test] @@ -5329,10 +5335,13 @@ fn parse_explain_query_plan() { // missing PLAN keyword should return error assert_eq!( ParserError::ParserError("Expected: end of statement, found: SELECT".to_string()), - all_dialects() + all_dialects_requiring_semicolon_statement_delimiter() .parse_sql_statements("EXPLAIN QUERY SELECT sqrt(id) FROM foo") .unwrap_err() ); + assert!(all_dialects_not_requiring_semicolon_statement_delimiter() + .parse_sql_statements("EXPLAIN QUERY SELECT sqrt(id) FROM foo") + .is_ok()); } #[test] @@ -6068,16 +6077,22 @@ fn parse_interval_all() { expr_from_projection(only(&select.projection)), ); - let result = parse_sql_statements("SELECT INTERVAL '1' SECOND TO SECOND"); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: SECOND".to_string()), - result.unwrap_err(), - ); + assert_err_parse_statements("SELECT INTERVAL '1' SECOND TO SECOND", "SECOND"); - let result = parse_sql_statements("SELECT INTERVAL '10' HOUR (1) TO HOUR (2)"); + let incorrect_hour_interval = "SELECT INTERVAL '10' HOUR (1) TO HOUR (2)"; assert_eq!( ParserError::ParserError("Expected: end of statement, found: (".to_string()), - result.unwrap_err(), + all_dialects_requiring_semicolon_statement_delimiter() + .parse_sql_statements(incorrect_hour_interval) + .unwrap_err(), + ); + assert_eq!( + ParserError::ParserError( + "Expected: SELECT, VALUES, or a subquery in the query body, found: 2".to_string() + ), + all_dialects_not_requiring_semicolon_statement_delimiter() + .parse_sql_statements(incorrect_hour_interval) + .unwrap_err(), ); verified_only_select("SELECT INTERVAL '1' YEAR"); @@ -7700,11 +7715,17 @@ fn parse_multiple_statements() { // Check that extra semicolon at the end is stripped by normalization: one_statement_parses_to(&(sql1.to_owned() + ";"), sql1); // Check that forgetting the semicolon results in an error: - let res = parse_sql_statements(&(sql1.to_owned() + " " + sql2_kw + sql2_rest)); + // (if configured as required by the dialect) + let sql = sql1.to_owned() + " " + sql2_kw + sql2_rest; assert_eq!( ParserError::ParserError("Expected: end of statement, found: ".to_string() + sql2_kw), - res.unwrap_err() + all_dialects_requiring_semicolon_statement_delimiter() + .parse_sql_statements(&sql) + .unwrap_err() ); + assert!(all_dialects_not_requiring_semicolon_statement_delimiter() + .parse_sql_statements(&sql) + .is_ok()); } test_with("SELECT foo", "SELECT", " bar"); // ensure that SELECT/WITH is not parsed as a table or column alias if ';' @@ -8408,11 +8429,7 @@ fn parse_drop_user() { #[test] fn parse_invalid_subquery_without_parens() { - let res = parse_sql_statements("SELECT SELECT 1 FROM bar WHERE 1=1 FROM baz"); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: 1".to_string()), - res.unwrap_err() - ); + assert_err_parse_statements("SELECT SELECT 1 FROM bar WHERE 1=1 FROM baz", "1"); } #[test] @@ -8638,10 +8655,17 @@ fn lateral_derived() { chk(true); let sql = "SELECT * FROM LATERAL UNNEST ([10,20,30]) as numbers WITH OFFSET;"; - let res = parse_sql_statements(sql); assert_eq!( ParserError::ParserError("Expected: end of statement, found: WITH".to_string()), - res.unwrap_err() + all_dialects_requiring_semicolon_statement_delimiter() + .parse_sql_statements(sql) + .unwrap_err() + ); + assert_eq!( + ParserError::ParserError("Expected: AS, found: ;".to_string()), + all_dialects_not_requiring_semicolon_statement_delimiter() + .parse_sql_statements(sql) + .unwrap_err() ); let sql = "SELECT * FROM a LEFT JOIN LATERAL (b CROSS JOIN c)"; @@ -8774,11 +8798,7 @@ fn parse_start_transaction() { res.unwrap_err() ); - let res = dialects.parse_sql_statements("START TRANSACTION BAD"); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: BAD".to_string()), - res.unwrap_err() - ); + assert_err_parse_statements("START TRANSACTION BAD", "BAD"); let res = dialects.parse_sql_statements("START TRANSACTION READ ONLY,"); assert_eq!( @@ -10152,23 +10172,9 @@ fn parse_offset_and_limit() { verified_stmt("SELECT foo FROM bar OFFSET 2"); // Can't repeat OFFSET / LIMIT - let res = parse_sql_statements("SELECT foo FROM bar OFFSET 2 OFFSET 2"); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: OFFSET".to_string()), - res.unwrap_err() - ); - - let res = parse_sql_statements("SELECT foo FROM bar LIMIT 2 LIMIT 2"); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: LIMIT".to_string()), - res.unwrap_err() - ); - - let res = parse_sql_statements("SELECT foo FROM bar OFFSET 2 LIMIT 2 OFFSET 2"); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: OFFSET".to_string()), - res.unwrap_err() - ); + assert_err_parse_statements("SELECT foo FROM bar OFFSET 2 OFFSET 2", "OFFSET"); + assert_err_parse_statements("SELECT foo FROM bar LIMIT 2 LIMIT 2", "LIMIT"); + assert_err_parse_statements("SELECT foo FROM bar OFFSET 2 LIMIT 2 OFFSET 2", "OFFSET"); } #[test] @@ -10636,11 +10642,7 @@ fn parse_uncache_table() { } ); - let res = parse_sql_statements("UNCACHE TABLE 'table_name' foo"); - assert_eq!( - ParserError::ParserError("Expected: end of statement, found: foo".to_string()), - res.unwrap_err() - ); + assert_err_parse_statements("UNCACHE TABLE 'table_name' foo", "foo"); let res = parse_sql_statements("UNCACHE 'table_name' foo"); assert_eq!( @@ -13182,13 +13184,7 @@ fn test_drop_policy() { "sql parser error: Expected: ON, found: EOF" ); // Wrong option name - assert_eq!( - all_dialects() - .parse_sql_statements("DROP POLICY my_policy ON my_table WRONG") - .unwrap_err() - .to_string(), - "sql parser error: Expected: end of statement, found: WRONG" - ); + assert_err_parse_statements("DROP POLICY my_policy ON my_table WRONG", "WRONG"); } #[test] @@ -13229,18 +13225,27 @@ fn test_alter_policy() { verified_stmt("ALTER POLICY my_policy ON my_table"); // mixing RENAME and APPLY expressions + let sql = "ALTER POLICY old_policy ON my_table TO public RENAME TO new_policy"; assert_eq!( - parse_sql_statements("ALTER POLICY old_policy ON my_table TO public RENAME TO new_policy") + all_dialects_requiring_semicolon_statement_delimiter() + .parse_sql_statements(sql) .unwrap_err() .to_string(), - "sql parser error: Expected: end of statement, found: RENAME" + "sql parser error: Expected: end of statement, found: RENAME".to_string(), ); assert_eq!( - parse_sql_statements("ALTER POLICY old_policy ON my_table RENAME TO new_policy TO public") + all_dialects_not_requiring_semicolon_statement_delimiter() + .parse_sql_statements(sql) .unwrap_err() .to_string(), - "sql parser error: Expected: end of statement, found: TO" + "sql parser error: Expected: KEYWORD `TABLE` after RENAME, found: TO".to_string(), + ); + + assert_err_parse_statements( + "ALTER POLICY old_policy ON my_table RENAME TO new_policy TO public", + "TO", ); + // missing TO in RENAME TO assert_eq!( parse_sql_statements("ALTER POLICY old_policy ON my_table RENAME") @@ -13438,14 +13443,9 @@ fn test_alter_connector() { } // Wrong option name - assert_eq!( - dialects - .parse_sql_statements( - "ALTER CONNECTOR my_connector SET WRONG 'jdbc:mysql://localhost:3306/mydb'" - ) - .unwrap_err() - .to_string(), - "sql parser error: Expected: end of statement, found: WRONG" + assert_err_parse_statements( + "ALTER CONNECTOR my_connector SET WRONG 'jdbc:mysql://localhost:3306/mydb'", + "WRONG", ); } @@ -14189,12 +14189,25 @@ fn parse_create_table_select() { r#"CREATE TABLE foo (baz INT, name STRING) AS SELECT bar, oth_name FROM test.table_a"#; let _ = dialects.one_statement_parses_to(sql_2, expected); - let dialects = all_dialects_where(|d| !d.supports_create_table_select()); + let err_dialects = all_dialects_where(|d| { + !d.supports_create_table_select() && !d.supports_statements_without_semicolon_delimiter() + }); + let multi_statement_dialects = all_dialects_where(|d| { + !d.supports_create_table_select() && d.supports_statements_without_semicolon_delimiter() + }); for sql in [sql_1, sql_2] { assert_eq!( - dialects.parse_sql_statements(sql).unwrap_err(), + err_dialects.parse_sql_statements(sql).unwrap_err(), ParserError::ParserError("Expected: end of statement, found: SELECT".to_string()) ); + + assert_eq!( + multi_statement_dialects + .parse_sql_statements(sql) + .unwrap() + .len(), + 2 + ); } } @@ -14483,7 +14496,17 @@ fn parse_update_from_before_select() { "UPDATE t1 FROM (SELECT name, id FROM t1 GROUP BY id) AS t2 SET name = t2.name FROM (SELECT name from t2) AS t2"; assert_eq!( ParserError::ParserError("Expected: end of statement, found: FROM".to_string()), - parse_sql_statements(query).unwrap_err() + all_dialects_requiring_semicolon_statement_delimiter() + .parse_sql_statements(query) + .unwrap_err() + ); + assert_eq!( + ParserError::ParserError( + "Expected: SELECT, VALUES, or a subquery in the query body, found: FROM".to_string() + ), + all_dialects_not_requiring_semicolon_statement_delimiter() + .parse_sql_statements(query) + .unwrap_err() ); } #[test] @@ -15897,6 +15920,34 @@ fn parse_return() { assert_eq!(stmt, Statement::Return(ReturnStatement { value: None })); let _ = all_dialects().verified_stmt("RETURN 1"); + let _ = all_dialects().verified_stmt("RETURN -1"); + let _ = all_dialects_where(|d| d.is_identifier_start('@')).verified_stmt("RETURN @my_var"); + let _ = all_dialects().verified_stmt("RETURN CAST(1 AS INT)"); + let _ = all_dialects().verified_stmt("RETURN dbo.my_func()"); + let _ = all_dialects().verified_stmt("RETURN (SELECT 1)"); + let _ = all_dialects().verified_stmt("RETURN CASE 1 WHEN 1 THEN 2 END"); + + let _ = all_dialects_where(|d| { + d.is::() + || d.is::() + || d.is::() + || d.is::() + || d.is::() + || d.is::() + || d.is::() + || d.is::() + }) + .verified_stmt("RETURN CONVERT(1, INT)"); + + let _ = all_dialects_except(|d| { + d.is::() + || d.is::() + || d.is::() + || d.is::() + || d.is::() + || d.is::() + }) + .verified_stmt("RETURN CONVERT(1, INT)"); } #[test] diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 50c6448d9..f5208736a 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -34,6 +34,9 @@ use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, MsSqlDialect}; use sqlparser::parser::{Parser, ParserError, ParserOptions}; +#[cfg(test)] +use pretty_assertions::assert_eq; + #[test] fn parse_mssql_identifiers() { let sql = "SELECT @@version, _foo$123 FROM ##temp"; @@ -196,6 +199,10 @@ fn parse_mssql_create_procedure() { let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN SELECT [foo], CASE WHEN [foo] IS NULL THEN 'empty' ELSE 'notempty' END AS [foo]; END"); // Multiple statements let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN UPDATE bar SET col = 'test'; SELECT [foo] FROM BAR WHERE [FOO] > 10; END"); + // early return + let _ = ms().verified_stmt( + "CREATE PROCEDURE [foo] AS BEGIN IF 1 = 0 RETURN;; DECLARE @x INT; RETURN @x; END", + ); } #[test] @@ -246,6 +253,7 @@ fn parse_create_function() { remote_connection: None, }), ); + let _ = ms().statements_without_semicolons_parse_to(return_expression_function, ""); let multi_statement_function = "\ CREATE FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) \ @@ -257,6 +265,7 @@ fn parse_create_function() { END\ "; let _ = ms().verified_stmt(multi_statement_function); + let _ = ms().statements_without_semicolons_parse_to(multi_statement_function, ""); let multi_statement_function_without_as = multi_statement_function.replace(" AS", ""); let _ = ms().one_statement_parses_to( @@ -277,6 +286,7 @@ fn parse_create_function() { END\ "; let _ = ms().verified_stmt(create_function_with_conditional); + let _ = ms().statements_without_semicolons_parse_to(create_function_with_conditional, ""); let create_or_alter_function = "\ CREATE OR ALTER FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) \ @@ -288,6 +298,7 @@ fn parse_create_function() { END\ "; let _ = ms().verified_stmt(create_or_alter_function); + let _ = ms().statements_without_semicolons_parse_to(create_or_alter_function, ""); let create_function_with_return_expression = "\ CREATE FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) \ @@ -298,6 +309,7 @@ fn parse_create_function() { END\ "; let _ = ms().verified_stmt(create_function_with_return_expression); + let _ = ms().statements_without_semicolons_parse_to(create_function_with_return_expression, ""); let create_inline_table_value_function = "\ CREATE FUNCTION some_inline_tvf(@foo INT, @bar VARCHAR(256)) \ @@ -1567,6 +1579,7 @@ fn test_mssql_cursor() { DEALLOCATE Employee_Cursor\ "; let _ = ms().statements_parse_to(full_cursor_usage, ""); + let _ = ms().statements_without_semicolons_parse_to(full_cursor_usage, ""); } #[test] @@ -2181,7 +2194,7 @@ fn parse_mssql_if_else() { // Multiple statements let stmts = ms() - .parse_sql_statements("DECLARE @A INT; IF 1=1 BEGIN SET @A = 1 END ELSE SET @A = 2") + .parse_sql_statements("DECLARE @A INT; IF 1=1 BEGIN SET @A = 1; END ELSE SET @A = 2;") .unwrap(); match &stmts[..] { [Statement::Declare { .. }, Statement::If(stmt)] => { @@ -2196,11 +2209,11 @@ fn parse_mssql_if_else() { #[test] fn test_mssql_if_else_span() { - let sql = "IF 1 = 1 SELECT '1' ELSE SELECT '2'"; + let sql = "IF 1 = 1 SELECT '1'; ELSE SELECT '2';"; let mut parser = Parser::new(&MsSqlDialect {}).try_with_sql(sql).unwrap(); assert_eq!( parser.parse_statement().unwrap().span(), - Span::new(Location::new(1, 1), Location::new(1, sql.len() as u64 + 1)) + Span::new(Location::new(1, 1), Location::new(1, sql.len() as u64)) ); } @@ -2223,7 +2236,7 @@ fn test_mssql_if_else_multiline_span() { #[test] fn test_mssql_if_statements_span() { // Simple statements - let mut sql = "IF 1 = 1 SELECT '1' ELSE SELECT '2'"; + let mut sql = "IF 1 = 1 SELECT '1'; ELSE SELECT '2'"; let mut parser = Parser::new(&MsSqlDialect {}).try_with_sql(sql).unwrap(); match parser.parse_statement().unwrap() { Statement::If(IfStatement { @@ -2237,14 +2250,15 @@ fn test_mssql_if_statements_span() { ); assert_eq!( else_block.span(), - Span::new(Location::new(1, 21), Location::new(1, 36)) + Span::new(Location::new(1, 22), Location::new(1, 37)) ); } stmt => panic!("Unexpected statement: {stmt:?}"), } + let _ = ms().statements_without_semicolons_parse_to(sql, ""); // Blocks - sql = "IF 1 = 1 BEGIN SET @A = 1; END ELSE BEGIN SET @A = 2 END"; + sql = "IF 1 = 1 BEGIN SET @A = 1; END ELSE BEGIN SET @A = 2; END"; parser = Parser::new(&MsSqlDialect {}).try_with_sql(sql).unwrap(); match parser.parse_statement().unwrap() { Statement::If(IfStatement { @@ -2258,11 +2272,12 @@ fn test_mssql_if_statements_span() { ); assert_eq!( else_block.span(), - Span::new(Location::new(1, 32), Location::new(1, 57)) + Span::new(Location::new(1, 32), Location::new(1, 58)) ); } stmt => panic!("Unexpected statement: {stmt:?}"), } + let _ = ms().statements_without_semicolons_parse_to(sql, ""); } #[test] @@ -2421,6 +2436,7 @@ fn parse_create_trigger() { END\ "; let _ = ms().verified_stmt(multi_statement_trigger); + let _ = ms().statements_without_semicolons_parse_to(multi_statement_trigger, ""); let create_trigger_with_return = "\ CREATE TRIGGER some_trigger ON some_table FOR INSERT \ @@ -2430,15 +2446,7 @@ fn parse_create_trigger() { END\ "; let _ = ms().verified_stmt(create_trigger_with_return); - - let create_trigger_with_return = "\ - CREATE TRIGGER some_trigger ON some_table FOR INSERT \ - AS \ - BEGIN \ - RETURN; \ - END\ - "; - let _ = ms().verified_stmt(create_trigger_with_return); + let _ = ms().statements_without_semicolons_parse_to(create_trigger_with_return, ""); let create_trigger_with_conditional = "\ CREATE TRIGGER some_trigger ON some_table FOR INSERT \ @@ -2452,6 +2460,7 @@ fn parse_create_trigger() { END\ "; let _ = ms().verified_stmt(create_trigger_with_conditional); + let _ = ms().statements_without_semicolons_parse_to(create_trigger_with_conditional, ""); } #[test] @@ -2507,3 +2516,318 @@ DECLARE @Y AS NVARCHAR(MAX)='y' assert_eq!(stmts.len(), 2); assert!(stmts.iter().all(|s| matches!(s, Statement::Declare { .. }))); } + +#[test] +fn test_supports_statements_without_semicolon_delimiter() { + use sqlparser::ast::Ident; + + use sqlparser::tokenizer::Location; + + fn parse_n_statements(n: usize, sql: &str) -> Vec { + let dialect = MsSqlDialect {}; + let parser = Parser::new(&dialect) + .with_options(ParserOptions::default().with_require_semicolon_stmt_delimiter(false)); + let stmts = parser + .try_with_sql(sql) + .unwrap() + .parse_statements() + .unwrap(); + assert_eq!(stmts.len(), n); + stmts + } + + let multiple_statements = "SELECT 1 SELECT 2"; + assert_eq!( + parse_n_statements(2, multiple_statements), + vec![ + Statement::Query(Box::new(Query { + with: None, + limit_clause: None, + fetch: None, + locks: vec![], + for_clause: None, + order_by: None, + settings: None, + format_clause: None, + pipe_operators: vec![], + body: Box::new(SetExpr::Select(Box::new(Select { + select_token: AttachedToken::empty(), + distinct: None, + top: None, + top_before_distinct: false, + projection: vec![SelectItem::UnnamedExpr(Expr::Value( + (Value::Number("1".parse().unwrap(), false)).with_empty_span() + ))], + exclude: None, + into: None, + from: vec![], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + window_before_qualify: false, + qualify: None, + value_table_mode: None, + connect_by: None, + flavor: SelectFlavor::Standard, + }))), + })), + Statement::Query(Box::new(Query { + with: None, + limit_clause: None, + fetch: None, + locks: vec![], + for_clause: None, + order_by: None, + settings: None, + format_clause: None, + pipe_operators: vec![], + body: Box::new(SetExpr::Select(Box::new(Select { + select_token: AttachedToken::empty(), + distinct: None, + top: None, + top_before_distinct: false, + projection: vec![SelectItem::UnnamedExpr(Expr::Value( + (Value::Number("2".parse().unwrap(), false)).with_empty_span() + ))], + exclude: None, + into: None, + from: vec![], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + window_before_qualify: false, + qualify: None, + value_table_mode: None, + connect_by: None, + flavor: SelectFlavor::Standard + }))), + })), + ] + ); + + let udf = "CREATE OR ALTER FUNCTION utc_now() + RETURNS SMALLDATETIME \ + AS \ + BEGIN \ + RETURN GETUTCDATE() + END \ + "; + assert_eq!( + parse_n_statements(1, udf)[0], + Statement::CreateFunction(CreateFunction { + or_alter: true, + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName::from(vec![sqlparser::ast::Ident::with_span( + Span::new(Location::new(1, 26), Location::new(1, 33)), + "utc_now" + )]), + args: Some(vec![]), + return_type: Some(sqlparser::ast::DataType::Custom( + ObjectName(vec![sqlparser::ast::ObjectNamePart::Identifier(Ident { + value: "SMALLDATETIME".to_string(), + quote_style: None, + span: Span { + start: Location::new(2, 17), + end: Location::new(2, 30) + }, + })]), + vec![] + )), + function_body: Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { + begin_token: AttachedToken(TokenWithSpan { + token: Token::Word(Word { + value: "BEGIN".to_string(), + quote_style: None, + keyword: Keyword::BEGIN + }), + span: Span::new(Location::new(2, 47), Location::new(2, 57)), + }), + statements: vec![Statement::Return(ReturnStatement { + value: Some(ReturnStatementValue::Expr(Expr::Function(Function { + name: ObjectName::from(vec![Ident::new("GETUTCDATE")]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![], + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + }))), + })], + end_token: AttachedToken(TokenWithSpan { + token: Token::Word(Word { + value: "END".to_string(), + quote_style: None, + keyword: Keyword::END + }), + span: Span::new(Location::new(3, 9), Location::new(3, 12)), + }) + })), + behavior: None, + called_on_null: None, + parallel: None, + using: None, + language: None, + determinism_specifier: None, + options: None, + remote_connection: None + }) + ); + + let sp = "CREATE OR ALTER PROCEDURE example_sp \ + AS \ + IF USER_NAME() = 'X' \ + RETURN \ + IF 1 = 2 \ + RETURN (SELECT 1) \ + \ + RETURN CONVERT(INT, 123) \ + "; + assert_eq!( + parse_n_statements(1, sp)[0], + Statement::CreateProcedure { + or_alter: true, + name: ObjectName::from(vec![Ident::new("example_sp")]), + params: Some(vec![]), + language: None, + body: ConditionalStatements::Sequence { + statements: vec![ + Statement::If(IfStatement { + if_block: ConditionalStatementBlock { + start_token: AttachedToken::empty(), + condition: Some(Expr::BinaryOp { + left: Box::new(Expr::Function(Function { + name: ObjectName::from(vec![Ident::new("USER_NAME")]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![], + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + })), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(ValueWithSpan { + value: Value::SingleQuotedString("X".to_string()), + span: Span::new(Location::new(1, 58), Location::new(1, 61)), + })), + }), + then_token: None, + conditional_statements: ConditionalStatements::Sequence { + statements: vec![Statement::Return(ReturnStatement { + value: None + })], + }, + }, + elseif_blocks: vec![], + else_block: None, + end_token: None, + }), + Statement::If(IfStatement { + if_block: ConditionalStatementBlock { + start_token: AttachedToken::empty(), + condition: Some(Expr::BinaryOp { + left: Box::new(Expr::Value(number("1").with_span(Span::new( + Location::new(1, 73), + Location::new(1, 74) + )))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(number("2").with_span(Span::new( + Location::new(1, 76), + Location::new(1, 77) + )))), + }), + then_token: None, + conditional_statements: ConditionalStatements::Sequence { + statements: vec![Statement::Return(ReturnStatement { + value: Some(ReturnStatementValue::Expr(Expr::Subquery( + Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + select_token: AttachedToken::empty(), + distinct: None, + top: None, + top_before_distinct: false, + projection: vec![SelectItem::UnnamedExpr( + Expr::Value(number("1").with_span(Span::new( + Location::new(1, 93), + Location::new(1, 94) + ))) + ),], + exclude: None, + into: None, + from: vec![], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![],), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + flavor: SelectFlavor::Standard, + }),)), + order_by: None, + limit_clause: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + pipe_operators: vec![], + }), + ))), + })], + }, + }, + elseif_blocks: vec![], + else_block: None, + end_token: None, + }), + Statement::Return(ReturnStatement { + value: Some(ReturnStatementValue::Expr(Expr::Convert { + is_try: false, + expr: Box::new(Expr::Value( + number("123").with_span(Span::new( + Location::new(1, 89), + Location::new(1, 92) + )) + )), + data_type: Some(DataType::Int(None)), + charset: None, + target_before_value: true, + styles: vec![], + })), + }), + ], + }, + } + ); +} From 7c173554168ded4ab7f231ce0ada5b3a95cdedc9 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Wed, 7 May 2025 15:09:56 -0400 Subject: [PATCH 2/8] Reduce ambiguity for consecutive statements - at least all of select/insert/update/delete (plus exec) can be added --- src/keywords.rs | 5 +++++ tests/sqlparser_common.rs | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/keywords.rs b/src/keywords.rs index 7655d03a6..1ba3d5a8c 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -1098,6 +1098,11 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::TABLESAMPLE, Keyword::FROM, Keyword::OPEN, + Keyword::INSERT, + Keyword::UPDATE, + Keyword::DELETE, + Keyword::EXEC, + Keyword::EXECUTE, ]; /// Can't be used as a column alias, so that `SELECT alias` diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 009b6abd8..e8e3e262a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -668,6 +668,23 @@ fn parse_select_with_table_alias() { ); } +#[test] +fn parse_consecutive_queries() { + let select_then_exec = "SELECT * FROM deleted; EXECUTE my_sp 'some', 'params'"; + let _ = all_dialects() + .parse_sql_statements(select_then_exec) + .unwrap(); + let _ = all_dialects_not_requiring_semicolon_statement_delimiter() + .statements_without_semicolons_parse_to(select_then_exec, ""); + + let select_then_update = "SELECT 1 FROM x; UPDATE y SET z = 1"; + let _ = all_dialects() + .parse_sql_statements(select_then_update) + .unwrap(); + let _ = all_dialects_not_requiring_semicolon_statement_delimiter() + .statements_without_semicolons_parse_to(select_then_update, ""); +} + #[test] fn parse_analyze() { verified_stmt("ANALYZE TABLE test_table"); From b83c24553fc88e02995f4f545ced55482203886a Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Thu, 8 May 2025 13:44:56 -0400 Subject: [PATCH 3/8] Disallow keywords as table aliases for parsing statements without semicolons --- src/dialect/mod.rs | 10 ++++++++-- tests/sqlparser_common.rs | 39 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 56ae5a095..fccff0754 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1036,8 +1036,14 @@ pub trait Dialect: Debug + Any { /// Returns true if the specified keyword should be parsed as a table factor alias. /// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided /// to enable looking ahead if needed. - fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { - explicit || self.is_table_alias(kw, parser) + /// + /// When the dialect supports statements without semicolon delimiter, actual keywords aren't parsed as aliases. + fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool { + if self.supports_statements_without_semicolon_delimiter() { + kw == &Keyword::NoKeyword + } else { + explicit || self.is_table_alias(kw, _parser) + } } /// Returns true if this dialect supports querying historical table data diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e8e3e262a..f66af43b9 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -668,6 +668,28 @@ fn parse_select_with_table_alias() { ); } +#[test] +fn parse_select_with_table_alias_keyword() { + // note: DECLARE isn't included in RESERVED_FOR_TABLE_ALIAS + let table_alias_non_reserved_keyword = "SELECT a FROM lineitem DECLARE"; + let statements = all_dialects_requiring_semicolon_statement_delimiter() + .parse_sql_statements(table_alias_non_reserved_keyword) + .unwrap(); + assert_eq!(1, statements.len()); + assert_eq!( + ParserError::ParserError("Expected: identifier, found: EOF".to_string()), + all_dialects_not_requiring_semicolon_statement_delimiter() + .parse_sql_statements(table_alias_non_reserved_keyword) + .unwrap_err() + ); + + let table_alias_quoted_keyword = "SELECT a FROM lineitem \"DECLARE\""; + let statements = all_dialects() + .parse_sql_statements(table_alias_quoted_keyword) + .unwrap(); + assert_eq!(1, statements.len()); +} + #[test] fn parse_consecutive_queries() { let select_then_exec = "SELECT * FROM deleted; EXECUTE my_sp 'some', 'params'"; @@ -951,7 +973,18 @@ fn parse_limit() { #[test] fn parse_invalid_limit_by() { - assert_err_parse_statements("SELECT * FROM user BY name", "name"); + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: name".to_string()), + all_dialects_requiring_semicolon_statement_delimiter() + .parse_sql_statements("SELECT * FROM user BY name") + .unwrap_err() + ); + assert_eq!( + ParserError::ParserError("Expected: an SQL statement, found: BY".to_string()), + all_dialects_not_requiring_semicolon_statement_delimiter() + .parse_sql_statements("SELECT * FROM user BY name") + .unwrap_err() + ); } #[test] @@ -11082,7 +11115,9 @@ fn parse_select_table_with_index_hints() { // Test that dialects that don't support table hints will keep parsing the USE as table alias let sql = "SELECT * FROM T USE LIMIT 1"; - let unsupported_dialects = all_dialects_where(|d| !d.supports_table_hints()); + let unsupported_dialects = all_dialects_where(|d| { + !d.supports_table_hints() && !d.supports_statements_without_semicolon_delimiter() + }); let select = unsupported_dialects .verified_only_select_with_canonical(sql, "SELECT * FROM T AS USE LIMIT 1"); assert_eq!( From 53d7930be07d1d2400a266cc7e7e78722d2aa5b1 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Tue, 20 May 2025 18:06:38 -0400 Subject: [PATCH 4/8] Enable parsing comma lists without semicolons --- src/parser/mod.rs | 12 ++++ tests/sqlparser_mssql.rs | 115 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index df35c8677..6d6e40b2c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4561,6 +4561,18 @@ impl<'a> Parser<'a> { return Ok(vec![]); } + if end_token == Token::SemiColon + && self + .dialect + .supports_statements_without_semicolon_delimiter() + { + if let Token::Word(ref kw) = self.peek_token().token { + if kw.keyword != Keyword::NoKeyword { + return Ok(vec![]); + } + } + } + if self.options.trailing_commas && self.peek_tokens() == [Token::Comma, end_token] { let _ = self.consume_token(&Token::Comma); return Ok(vec![]); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index f5208736a..abe383169 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2520,7 +2520,6 @@ DECLARE @Y AS NVARCHAR(MAX)='y' #[test] fn test_supports_statements_without_semicolon_delimiter() { use sqlparser::ast::Ident; - use sqlparser::tokenizer::Location; fn parse_n_statements(n: usize, sql: &str) -> Vec { @@ -2830,4 +2829,118 @@ fn test_supports_statements_without_semicolon_delimiter() { }, } ); + + let exec_then_update = "\ + EXEC my_sp \ + UPDATE my_table SET col = 1 \ + "; + assert_eq!( + parse_n_statements(2, exec_then_update), + vec![ + Statement::Execute { + name: Some(ObjectName::from(vec![Ident::new("my_sp")])), + parameters: vec![], + has_parentheses: false, + immediate: false, + into: vec![], + using: vec![], + output: false, + default: false, + }, + Statement::Update { + table: TableWithJoins { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::new("my_table")]), + alias: None, + with_hints: vec![], + args: None, + version: None, + with_ordinality: false, + partitions: vec![], + json_path: None, + sample: None, + index_hints: vec![] + }, + joins: vec![], + }, + assignments: vec![Assignment { + value: Expr::Value( + number("1") + .with_span(Span::new(Location::new(3, 16), Location::new(3, 17))) + ), + target: AssignmentTarget::ColumnName(ObjectName::from(vec![Ident::new("col")])), + },], + selection: None, + returning: None, + from: None, + or: None + }, + ] + ); + + let exec_params_then_update = "\ + EXEC my_sp 1, 2 \ + UPDATE my_table SET col = 1 \ + "; + assert_eq!( + parse_n_statements(2, exec_params_then_update), + vec![ + Statement::Execute { + name: Some(ObjectName::from(vec![Ident::with_span( + Span::new(Location::new(1, 6), Location::new(1, 11)), + "my_sp" + )])), + parameters: vec![ + Expr::Value( + number("1") + .with_span(Span::new(Location::new(1, 12), Location::new(1, 13))) + ), + Expr::Value( + number("2") + .with_span(Span::new(Location::new(1, 15), Location::new(1, 17))) + ), + ], + has_parentheses: false, + immediate: false, + into: vec![], + using: vec![], + output: false, + default: false, + }, + Statement::Update { + table: TableWithJoins { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::with_span( + Span::new(Location::new(1, 24), Location::new(1, 32)), + "my_table" + )]), + alias: None, + with_hints: vec![], + args: None, + version: None, + with_ordinality: false, + partitions: vec![], + json_path: None, + sample: None, + index_hints: vec![] + }, + joins: vec![], + }, + assignments: vec![Assignment { + value: Expr::Value( + number("1") + .with_span(Span::new(Location::new(3, 16), Location::new(3, 17))) + ), + target: AssignmentTarget::ColumnName(ObjectName::from(vec![Ident::with_span( + Span::new(Location::new(1, 37), Location::new(1, 40)), + "col" + )])), + },], + selection: None, + returning: None, + from: None, + or: None + }, + ] + ); } From 0026bb7f0975879a1235cf97be525cb8fcebc27a Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Mon, 21 Jul 2025 17:10:58 -0400 Subject: [PATCH 5/8] Add more descriptive docs comment --- src/dialect/mod.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index fccff0754..ad2a26802 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1144,6 +1144,13 @@ pub trait Dialect: Debug + Any { } /// Returns true if the dialect supports parsing statements without a semicolon delimiter. + /// + /// If returns true, the following SQL will not parse. If returns `false` the SQL will parse + /// + /// ```sql + /// SELECT 1 + /// SELECT 2 + /// ``` fn supports_statements_without_semicolon_delimiter(&self) -> bool { false } From 75cb98eceec15502e5fb61d8b6e4f0e8dbd1cba3 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Mon, 21 Jul 2025 17:28:30 -0400 Subject: [PATCH 6/8] Only check the parser option - the dialect's original support informs the parser option, but the parser behavior itself should just check it's own options --- src/parser/mod.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6d6e40b2c..802935425 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4561,11 +4561,7 @@ impl<'a> Parser<'a> { return Ok(vec![]); } - if end_token == Token::SemiColon - && self - .dialect - .supports_statements_without_semicolon_delimiter() - { + if end_token == Token::SemiColon && !self.options.require_semicolon_stmt_delimiter { if let Token::Word(ref kw) = self.peek_token().token { if kw.keyword != Keyword::NoKeyword { return Ok(vec![]); From bbd32ed192fd88565de85da6c851d2795330c2eb Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Mon, 21 Jul 2025 17:50:57 -0400 Subject: [PATCH 7/8] Simplify semicolon replacement for input string --- src/test_utils.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index 393fed526..74dca1098 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -193,12 +193,7 @@ impl TestedDialects { sql: &str, canonical: &str, ) -> Vec { - let sql_without_semicolons = sql - .replace("; ", " ") - .replace(" ;", " ") - .replace(";\n", "\n") - .replace("\n;", "\n") - .replace(";", " "); + let sql_without_semicolons = sql.replace(";", " "); let statements = self .parse_sql_statements(&sql_without_semicolons) .expect(&sql_without_semicolons); From 4bde5b478db8a5b475ea5de9c0df07bb80a7d357 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Mon, 21 Jul 2025 17:58:38 -0400 Subject: [PATCH 8/8] Restore former "end of statement" logic --- src/parser/mod.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 802935425..a225d3ac2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -504,7 +504,12 @@ impl<'a> Parser<'a> { match self.peek_token().token { Token::EOF => break, - + // end of statement + Token::Word(word) => { + if expecting_statement_delimiter && word.keyword == Keyword::END { + break; + } + } // don't expect a semicolon statement delimiter after a newline when not otherwise required Token::Whitespace(Whitespace::Newline) => { if !self.options.require_semicolon_stmt_delimiter {