Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ast/ddl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
//! (commonly referred to as Data Definition Language, or DDL)

#[cfg(not(feature = "std"))]
use alloc::{boxed::Box, format, string::String, vec, vec::Vec};
use alloc::{boxed::Box, format, string::String, vec::Vec};
use core::fmt::{self, Display, Write};

#[cfg(feature = "serde")]
Expand Down
303 changes: 298 additions & 5 deletions src/ast/dml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@
// under the License.

#[cfg(not(feature = "std"))]
use alloc::{boxed::Box, format, string::ToString, vec::Vec};
use alloc::{
boxed::Box,
format,
string::{String, ToString},
vec::Vec,
};

use core::fmt::{self, Display};
#[cfg(feature = "serde")]
Expand All @@ -27,10 +32,11 @@ use sqlparser_derive::{Visit, VisitMut};
use crate::display_utils::{indented_list, Indent, SpaceOrNewline};

use super::{
display_comma_separated, helpers::attached_token::AttachedToken, query::InputFormatClause,
Assignment, Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert,
OrderByExpr, Query, SelectItem, Setting, SqliteOnConflict, TableObject, TableWithJoins,
UpdateTableFromKind,
display_comma_separated, display_separated, helpers::attached_token::AttachedToken,
query::InputFormatClause, Assignment, CopyLegacyCsvOption, CopyLegacyOption, CopyOption,
CopySource, CopyTarget, Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName,
OnInsert, OrderByExpr, Query, SelectItem, Setting, SqliteOnConflict, TableObject,
TableWithJoins, UpdateTableFromKind,
};

/// INSERT statement.
Expand Down Expand Up @@ -310,3 +316,290 @@ impl Display for Update {
Ok(())
}
}

/// CSV formatting options extracted from COPY options.
///
/// This struct encapsulates the CSV formatting settings used when parsing
/// or formatting COPY statement data. It extracts relevant options from both
/// modern [`CopyOption`] and legacy [`CopyLegacyOption`] variants.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CsvFormatOptions {
/// The field delimiter character (default: tab)
pub(crate) delimiter: char,
/// The quote character used to enclose fields (default: `"`)
pub(crate) quote: char,
/// The escape character (default: `\`)
pub(crate) escape: char,
/// The string representing NULL values (default: `\\N`)
pub(crate) null_symbol: String,
}

impl Default for CsvFormatOptions {
fn default() -> Self {
Self {
delimiter: '\t',
quote: '"',
escape: '\\',
null_symbol: "\\N".to_string(),
}
}
}

impl CsvFormatOptions {
/// Extract CSV format options from CopyOption and CopyLegacyOption lists.
///
/// This method processes both modern and legacy COPY options to determine
/// the CSV formatting settings. Later options in the lists override earlier ones.
///
/// # Arguments
///
/// * `options` - Modern COPY options (PostgreSQL 9.0+)
/// * `legacy_options` - Legacy COPY options (pre-PostgreSQL 9.0)
///
/// # Returns
///
/// A `CsvFormatOptions` instance with the extracted settings, using defaults
/// for any options not specified.
pub(crate) fn from_copy_options(
options: &[CopyOption],
legacy_options: &[CopyLegacyOption],
) -> Self {
let mut csv_options = Self::default();

// Apply options
for option in options {
match option {
CopyOption::Delimiter(c) => {
csv_options.delimiter = *c;
}
CopyOption::Quote(c) => {
csv_options.quote = *c;
}
CopyOption::Escape(c) => {
csv_options.escape = *c;
}
CopyOption::Null(null) => {
csv_options.null_symbol = null.clone();
}
// These options don't affect CSV formatting
CopyOption::Format(_)
| CopyOption::Freeze(_)
| CopyOption::Header(_)
| CopyOption::ForceQuote(_)
| CopyOption::ForceNotNull(_)
| CopyOption::ForceNull(_)
| CopyOption::Encoding(_) => {}
}
}

// Apply legacy options
for option in legacy_options {
match option {
CopyLegacyOption::Delimiter(c) => {
csv_options.delimiter = *c;
}
CopyLegacyOption::Null(null) => {
csv_options.null_symbol = null.clone();
}
CopyLegacyOption::Csv(csv_opts) => {
for csv_option in csv_opts {
match csv_option {
CopyLegacyCsvOption::Quote(c) => {
csv_options.quote = *c;
}
CopyLegacyCsvOption::Escape(c) => {
csv_options.escape = *c;
}
// These CSV options don't affect CSV formatting
CopyLegacyCsvOption::Header
| CopyLegacyCsvOption::ForceQuote(_)
| CopyLegacyCsvOption::ForceNotNull(_) => {}
}
}
}
// These legacy options don't affect CSV formatting
CopyLegacyOption::AcceptAnyDate
| CopyLegacyOption::AcceptInvChars(_)
| CopyLegacyOption::AddQuotes
| CopyLegacyOption::AllowOverwrite
| CopyLegacyOption::Binary
| CopyLegacyOption::BlankAsNull
| CopyLegacyOption::Bzip2
| CopyLegacyOption::CleanPath
| CopyLegacyOption::CompUpdate { .. }
| CopyLegacyOption::DateFormat(_)
| CopyLegacyOption::EmptyAsNull
| CopyLegacyOption::Encrypted { .. }
| CopyLegacyOption::Escape
| CopyLegacyOption::Extension(_)
| CopyLegacyOption::FixedWidth(_)
| CopyLegacyOption::Gzip
| CopyLegacyOption::Header
| CopyLegacyOption::IamRole(_)
| CopyLegacyOption::IgnoreHeader(_)
| CopyLegacyOption::Json
| CopyLegacyOption::Manifest { .. }
| CopyLegacyOption::MaxFileSize(_)
| CopyLegacyOption::Parallel(_)
| CopyLegacyOption::Parquet
| CopyLegacyOption::PartitionBy(_)
| CopyLegacyOption::Region(_)
| CopyLegacyOption::RemoveQuotes
| CopyLegacyOption::RowGroupSize(_)
| CopyLegacyOption::StatUpdate(_)
| CopyLegacyOption::TimeFormat(_)
| CopyLegacyOption::TruncateColumns
| CopyLegacyOption::Zstd => {}
}
}

csv_options
}

/// Format a single CSV field, adding quotes and escaping if necessary.
///
/// This method handles CSV field formatting according to the configured options:
/// - Writes NULL values using the configured `null_symbol`
/// - Adds quotes around fields containing delimiters, quotes, or newlines
/// - Escapes quote characters by doubling them
/// - Escapes escape characters
///
/// # Arguments
///
/// * `f` - The formatter to write to
/// * `field` - The field value to format, or `None` for NULL
///
/// # Returns
///
/// A `fmt::Result` indicating success or failure of the write operation.
fn format_csv_field(&self, f: &mut fmt::Formatter, field: Option<&str>) -> fmt::Result {
let field_value = field.unwrap_or(&self.null_symbol);

// Check if field needs quoting
let needs_quoting = field_value.contains(self.delimiter)
|| field_value.contains(self.quote)
|| field_value.contains('\n')
|| field_value.contains('\r');

if needs_quoting {
write!(f, "{}", self.quote)?;
for ch in field_value.chars() {
if ch == self.quote {
// Escape quote by doubling it
write!(f, "{}{}", self.quote, self.quote)?;
} else if ch == self.escape {
// Escape escape character
write!(f, "{}{}", self.escape, self.escape)?;
} else {
write!(f, "{}", ch)?;
}
}
write!(f, "{}", self.quote)?;
} else {
write!(f, "{}", field_value)?;
}
Ok(())
}
}

/// COPY statement.
///
/// Represents a PostgreSQL COPY statement for bulk data transfer between
/// a file and a table. The statement can copy data FROM a file to a table
/// or TO a file from a table or query.
///
/// # Syntax
///
/// ```sql
/// COPY table_name [(column_list)] FROM { 'filename' | STDIN | PROGRAM 'command' }
/// COPY { table_name [(column_list)] | (query) } TO { 'filename' | STDOUT | PROGRAM 'command' }
/// ```
///
/// # Examples
///
/// ```
/// # use sqlparser::ast::{Copy, CopySource, CopyTarget, ObjectName};
/// # use sqlparser::dialect::PostgreSqlDialect;
/// # use sqlparser::parser::Parser;
/// let sql = "COPY users FROM 'data.csv'";
/// let dialect = PostgreSqlDialect {};
/// let ast = Parser::parse_sql(&dialect, sql).unwrap();
/// ```
///
/// See [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-copy.html)
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct Copy {
/// The source of 'COPY TO', or the target of 'COPY FROM'.
/// Can be a table name with optional column list, or a query (for COPY TO only).
pub source: CopySource,
/// Direction of the copy operation.
/// - `true` for COPY TO (table/query to file)
/// - `false` for COPY FROM (file to table)
pub to: bool,
/// The target of 'COPY TO', or the source of 'COPY FROM'.
/// Can be a file, STDIN, STDOUT, or a PROGRAM command.
pub target: CopyTarget,
/// Modern COPY options (PostgreSQL 9.0+), specified within parentheses.
/// Examples: FORMAT, DELIMITER, NULL, HEADER, QUOTE, ESCAPE, etc.
pub options: Vec<CopyOption>,
/// Legacy COPY options (pre-PostgreSQL 9.0), specified without parentheses.
/// Also used by AWS Redshift extensions like IAM_ROLE, MANIFEST, etc.
pub legacy_options: Vec<CopyLegacyOption>,
/// CSV data rows for COPY FROM STDIN statements.
/// Each row is a vector of optional strings (None represents NULL).
/// Populated only when copying from STDIN with inline data.
pub values: Vec<Vec<Option<String>>>,
}

impl Display for Copy {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "COPY")?;
match &self.source {
CopySource::Query(query) => write!(f, " ({query})")?,
CopySource::Table {
table_name,
columns,
} => {
write!(f, " {table_name}")?;
if !columns.is_empty() {
write!(f, " ({})", display_comma_separated(columns))?;
}
}
}
write!(
f,
" {} {}",
if self.to { "TO" } else { "FROM" },
self.target
)?;
if !self.options.is_empty() {
write!(f, " ({})", display_comma_separated(&self.options))?;
}
if !self.legacy_options.is_empty() {
write!(f, " {}", display_separated(&self.legacy_options, " "))?;
}

if !self.values.is_empty() {
writeln!(f, ";")?;

let csv_options =
CsvFormatOptions::from_copy_options(&self.options, &self.legacy_options);

// Write CSV data
for row in &self.values {
for (idx, column) in row.iter().enumerate() {
if idx > 0 {
write!(f, "{}", csv_options.delimiter)?;
}
csv_options.format_csv_field(f, column.as_deref())?;
}
writeln!(f)?;
}

write!(f, "\\.")?;
}
Ok(())
}
}
Loading