diff --git a/keyvalues-parser/fuzz/Cargo.toml b/keyvalues-parser/fuzz/Cargo.toml index 0fdffab..eb965f5 100644 --- a/keyvalues-parser/fuzz/Cargo.toml +++ b/keyvalues-parser/fuzz/Cargo.toml @@ -24,3 +24,10 @@ name = "parse" path = "fuzz_targets/parse.rs" test = false doc = false + +[[bin]] +name = "error_invariants" +path = "fuzz_targets/error_invariants.rs" +test = false +doc = false +bench = false diff --git a/keyvalues-parser/fuzz/fuzz_targets/error_invariants.rs b/keyvalues-parser/fuzz/fuzz_targets/error_invariants.rs new file mode 100644 index 0000000..dcae1a5 --- /dev/null +++ b/keyvalues-parser/fuzz/fuzz_targets/error_invariants.rs @@ -0,0 +1,16 @@ +#![no_main] + +use keyvalues_parser::Vdf; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|text: &str| { + if let Err(err) = Vdf::parse(text) { + // Lots of fiddly logic in displaying that can panic + err.to_string(); + + // The error snippet should match the original text sliced using the error span + let from_orig = err.index_span().slice(text); + let from_snippet = err.error_snippet(); + assert_eq!(from_orig, from_snippet); + } +}); diff --git a/keyvalues-parser/src/error.rs b/keyvalues-parser/src/error.rs index 5a95fa0..51f8750 100644 --- a/keyvalues-parser/src/error.rs +++ b/keyvalues-parser/src/error.rs @@ -1,32 +1,27 @@ //! All error information for parsing and rendering -use std::fmt; +use std::{ + fmt, + ops::{RangeFrom, RangeInclusive}, +}; -/// Just a type alias for `Result` with a [`Error`] -pub type Result = std::result::Result; +/// An alias for `Result` with an [`RenderError`] +pub type RenderResult = std::result::Result; -// TODO: Swap out the `EscapedParseError` and `RawParseError` for an opaque `Error::Parse` variant -// that handles displaying the error -// TODO: should this whole thing be overhauled (future me here: yes) -// TODO: split the `Error` into a separate parse and render error - -/// All possible errors when parsing or rendering VDF text -/// -/// Currently the two variants are parse errors which currently only occurs when `pest` encounters +/// Errors encountered while rendering VDF text #[derive(Clone, Debug, PartialEq, Eq)] -pub enum Error { +pub enum RenderError { RenderError(fmt::Error), RawRenderError { invalid_char: char }, - Todo, } -impl From for Error { +impl From for RenderError { fn from(e: fmt::Error) -> Self { Self::RenderError(e) } } -impl fmt::Display for Error { +impl fmt::Display for RenderError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::RenderError(e) => write!(f, "Failed rendering input Error: {e}"), @@ -34,9 +29,401 @@ impl fmt::Display for Error { f, "Encountered invalid character in raw string: {invalid_char:?}" ), - Self::Todo => write!(f, "TODO"), } } } -impl std::error::Error for Error {} +impl std::error::Error for RenderError {} + +/// An alias for `Result` with a [`ParseError`] +pub type ParseResult = std::result::Result; + +#[derive(Clone, Debug)] +pub struct ParseError { + // TODO: move resolving the error into here and just pass in the original string instead of + // having all of these be `pub(crate)` and constructing the error outside of this module + pub(crate) inner: ParseErrorInner, + pub(crate) index_span: Span, + pub(crate) display_span: Span, + pub(crate) lines: String, + pub(crate) lines_start: usize, +} + +impl ParseError { + pub fn inner(&self) -> ParseErrorInner { + self.inner + } + + /// The span indicating where the error is in the original text + /// + /// # Example + /// + /// ``` + /// # use keyvalues_parser::Vdf; + /// let vdf_text = "key value >:V extra bytes"; + /// let err = Vdf::parse(vdf_text).unwrap_err(); + /// let error_snippet = err.index_span().slice(vdf_text); + /// assert_eq!(error_snippet, ">:V extra bytes"); + /// ``` + pub fn index_span(&self) -> Span { + self.index_span.clone() + } + + pub fn line_col_span(&self) -> Span { + self.display_span.clone() + } + + pub fn lines(&self) -> &str { + &self.lines + } + + pub fn error_snippet(&self) -> &str { + let (mut start, end) = self.index_span.clone().into_inner(); + start -= self.lines_start; + match end { + Some(mut end) => { + end -= self.lines_start; + &self.lines[start..=end] + } + None => &self.lines[start..], + } + } +} + +// TODO: we could avoid virtually all of the allocations done in here +// TODO: could use loooots of wrappers to clean up the display code +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { + inner, + display_span, + lines, + .. + } = self; + let (display_start, display_end) = display_span.clone().into_inner(); + + writeln!(f, "error: {inner}")?; + writeln!(f, "at: {display_span}")?; + + let mut lines_iter = lines.lines().zip(display_start.line..).peekable(); + while let Some((line, line_idx)) = lines_iter.next() { + let line = line.replace('\n', " ").replace('\r', " "); + let line_idx_str = line_idx.to_string(); + writeln!(f, "{line_idx_str} | {line}")?; + + let on_start_line = line_idx == display_start.line; + let num_before = if on_start_line { + display_start.col.saturating_sub(1) + } else { + 0 + }; + let padding_before = " ".repeat(num_before); + + let (num_after, append_extra_arrow) = if let Some(display_end) = display_end { + let num_after = line.len().checked_sub(display_end.col).unwrap(); + (num_after, false) + } else { + let is_last_line = lines_iter.peek().is_none(); + (0, is_last_line) + }; + + let num_arrows = line.len().checked_sub(num_before + num_after).unwrap() + + append_extra_arrow as usize; + let arrows = "^".repeat(num_arrows); + + let blank_idx = " ".repeat(line_idx_str.len()); + + writeln!(f, "{blank_idx} | {padding_before}{arrows}")?; + } + + Ok(()) + } +} + +impl std::error::Error for ParseError {} + +/// Errors encountered while parsing VDF text +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ParseErrorInner { + /// Indicates that there were significant bytes found after the top-level pair + /// + /// # Example + /// + /// ``` + /// # use keyvalues_parser::{Vdf, error::ParseErrorInner}; + /// let err = Vdf::parse("key value >:V extra bytes").unwrap_err(); + /// assert_eq!(err.inner(), ParseErrorInner::LingeringBytes); + /// # print!("{err}"); + /// let expected = r#" + /// error: Found bytes after the top-level pair + /// at: 1:11 to the end of input + /// 1 | key value >:V extra bytes + /// | ^^^^^^^^^^^^^^^^ + /// "#.trim_start(); + /// assert_eq!(err.to_string(), expected); + /// ``` + LingeringBytes, + /// There was required whitespace that wasn't present + /// + /// There are very few places where whitespace is strictly _required_ + /// + /// # Example + /// + /// ``` + /// # use keyvalues_parser::{Vdf, error::ParseErrorInner}; + /// let err = Vdf::parse("#baseBAD").unwrap_err(); + /// assert_eq!(err.inner(), ParseErrorInner::ExpectedWhitespace); + /// # print!("{err}"); + /// let expected = r#" + /// error: Expected whitespace + /// at: 1:6 + /// 1 | #baseBAD + /// | ^ + /// "#.trim_start(); + /// assert_eq!(err.to_string(), expected); + /// ``` + ExpectedWhitespace, + /// The required top-level key-value pair was missing + /// + /// # Example + /// + /// ``` + /// # use keyvalues_parser::{Vdf, error::ParseErrorInner}; + /// let err = Vdf::parse("#base robot_standard.pop").unwrap_err(); + /// assert_eq!(err.inner(), ParseErrorInner::ExpectedNewlineAfterMacro); + /// # print!("{err}"); + /// let expected = r#" + /// error: Expected newline after macro definition + /// at: 1:25 to the end of input + /// 1 | #base robot_standard.pop + /// | ^ + /// "#.trim_start(); + /// assert_eq!(err.to_string(), expected); + /// ``` + ExpectedNewlineAfterMacro, + /// Encountered the end of input while parsing a string + /// + /// # Example + /// + /// ``` + /// # use keyvalues_parser::{Vdf, error::ParseErrorInner}; + /// let err = Vdf::parse("key \"incomplete ...").unwrap_err(); + /// assert_eq!(err.inner(), ParseErrorInner::EoiParsingQuotedString); + /// # print!("{err}"); + /// let expected = r#" + /// error: Encountered the end of input while parsing a quoted string + /// at: 1:5 to the end of input + /// 1 | key "incomplete ... + /// | ^^^^^^^^^^^^^^^^ + /// "#.trim_start(); + /// assert_eq!(err.to_string(), expected); + /// ``` + EoiParsingQuotedString, + EoiExpectedMacroPath, + InvalidMacroPath, + // TODO: remove this error variant in favor of a MissingTopLevelPair and + // ExpectedPairKeyOrMapClose + EoiExpectedPairKey, + InvalidPairKey, + EoiExpectedPairValue, + InvalidPairValue, + /// Encountered an invalid escape character in a quoted string + /// + /// # Example + /// + /// ``` + /// # use keyvalues_parser::{Vdf, error::ParseErrorInner}; + /// let err = Vdf::parse(r#"key "invalid -> \u""#).unwrap_err(); + /// # print!("{err}"); + /// let expected = r#" + /// error: Invalid escaped string character \u + /// at: 1:17 to 1:18 + /// 1 | key "invalid -> \u" + /// | ^^ + /// "#.trim_start(); + /// assert_eq!(err.to_string(), expected); + /// ``` + InvalidEscapedCharacter { + invalid: char, + }, + /// Encountered the end of input while parsing a map + /// + /// # Example + /// + /// ``` + /// # use keyvalues_parser::{Vdf, error::ParseErrorInner}; + /// let err = Vdf::parse("key {\n foo {}").unwrap_err(); + /// assert_eq!(err.inner(), ParseErrorInner::EoiParsingMap); + /// # print!("{err}"); + /// let expected = r#" + /// error: Encountered the end of input while parsing a map + /// at: 1:5 to the end of input + /// 1 | key { + /// | ^ + /// 2 | foo {} + /// | ^^^^^^^^^ + /// "#.trim_start(); + /// assert_eq!(err.to_string(), expected); + /// ``` + EoiParsingMap, + /// Encountered an invalid control character while parsing a comment + /// + /// # Example + /// + /// ``` + /// # use keyvalues_parser::{Vdf, error::ParseErrorInner}; + /// let err = Vdf::parse("// \0 is invalid").unwrap_err(); + /// assert_eq!(err.inner(), ParseErrorInner::CommentControlCharacter); + /// # print!("{err}"); + /// let expected = " + /// error: Encountered an invalid control character while parsing a comment + /// at: 1:4 + /// 1 | // \0 is invalid + /// | ^ + /// ".trim_start(); + /// assert_eq!(err.to_string(), expected); + /// ``` + /// + /// ``` + /// # use keyvalues_parser::{Vdf, error::ParseErrorInner}; + /// let err = Vdf::parse("// Only valid before a newline -> \r uh oh").unwrap_err(); + /// assert_eq!(err.inner(), ParseErrorInner::CommentControlCharacter); + /// # print!("{err}"); + /// let expected = r#" + /// error: Encountered an invalid control character while parsing a comment + /// at: 1:35 + /// 1 | // Only valid before a newline -> uh oh + /// | ^ + /// "#.trim_start(); + /// assert_eq!(err.to_string(), expected); + /// ``` + // TODO: pretty up the display of `\r` akin to how we did in sd? + // TODO: store the invalid character + CommentControlCharacter, +} + +pub enum Component { + MacroPath, + PairKey, + PairValue, +} + +impl fmt::Display for ParseErrorInner { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::LingeringBytes => f.write_str("Found bytes after the top-level pair"), + Self::ExpectedWhitespace => f.write_str("Expected whitespace"), + Self::ExpectedNewlineAfterMacro => { + f.write_str("Expected newline after macro definition") + } + Self::EoiExpectedMacroPath => { + f.write_str("Found the end of input while looking for a macro path") + } + Self::InvalidMacroPath => { + f.write_str("Encountered an invalid character while parsing a macro path") + } + Self::EoiExpectedPairKey => { + f.write_str("Encountered the end of input while looking for a pair's key") + } + Self::InvalidPairKey => { + f.write_str("Encountered an invalid character while looking for a pair's key") + } + Self::EoiExpectedPairValue => { + f.write_str("Encountered the end of input while looking for a pair's key") + } + Self::InvalidPairValue => { + f.write_str("Encountered an invalid character while looking for a pair's key") + } + Self::EoiParsingQuotedString => { + f.write_str("Encountered the end of input while parsing a quoted string") + } + Self::InvalidEscapedCharacter { invalid } => { + write!(f, "Invalid escaped string character \\{invalid}") + } + Self::EoiParsingMap => f.write_str("Encountered the end of input while parsing a map"), + Self::CommentControlCharacter => { + f.write_str("Encountered an invalid control character while parsing a comment") + } + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct LineCol { + pub line: usize, + pub col: usize, +} + +impl Default for LineCol { + fn default() -> Self { + Self { line: 1, col: 1 } + } +} +impl fmt::Display for LineCol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { line, col } = self; + write!(f, "{line}:{col}") + } +} + +// TODO: Hide internals so that we can make changes without breaking the public API later +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Span { + Inclusive(RangeInclusive), + ToEoi(RangeFrom), +} + +impl Span { + pub fn new(start: T, maybe_end: Option) -> Self { + match maybe_end { + Some(end) => Self::Inclusive(start..=end), + None => Self::ToEoi(start..), + } + } + + pub fn into_inner(self) -> (T, Option) { + match self { + Self::Inclusive(r) => { + let (start, end) = r.into_inner(); + (start, Some(end)) + } + Self::ToEoi(r) => (r.start, None), + } + } +} + +impl Span { + pub fn slice<'span, 'text>(&'span self, s: &'text str) -> &'text str { + match self.to_owned() { + Self::Inclusive(r) => &s[r], + Self::ToEoi(r) => &s[r], + } + } +} + +impl From> for Span { + fn from(r: RangeInclusive) -> Self { + Self::Inclusive(r) + } +} + +impl From> for Span { + fn from(r: RangeFrom) -> Self { + Self::ToEoi(r) + } +} + +impl fmt::Display for Span { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Inclusive(r) => { + if r.start() == r.end() { + write!(f, "{}", r.start()) + } else { + write!(f, "{} to {}", r.start(), r.end()) + } + } + Self::ToEoi(r) => write!(f, "{} to the end of input", r.start), + } + } +} diff --git a/keyvalues-parser/src/lib.rs b/keyvalues-parser/src/lib.rs index 0fc9094..ae01abd 100644 --- a/keyvalues-parser/src/lib.rs +++ b/keyvalues-parser/src/lib.rs @@ -65,7 +65,7 @@ pub type Key<'text> = Cow<'text, str>; /// // "Inner Key" "Inner Value" /// // } /// println!("{}", parsed); -/// # Ok::<(), keyvalues_parser::error::Error>(()) +/// # Ok::<(), keyvalues_parser::error::ParseError>(()) /// ``` #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Vdf<'text> { diff --git a/keyvalues-parser/src/text/mod.rs b/keyvalues-parser/src/text/mod.rs index 969a513..d0b9383 100644 --- a/keyvalues-parser/src/text/mod.rs +++ b/keyvalues-parser/src/text/mod.rs @@ -1,2 +1,3 @@ +// TODO: remove the `text` module since we're only focused on text atm pub mod parse; pub mod render; diff --git a/keyvalues-parser/src/text/parse.rs b/keyvalues-parser/src/text/parse.rs index b841b14..aeb177d 100644 --- a/keyvalues-parser/src/text/parse.rs +++ b/keyvalues-parser/src/text/parse.rs @@ -1,7 +1,7 @@ use std::{borrow::Cow, str::Chars}; use crate::{ - error::{Error, Result}, + error::{LineCol, ParseError, ParseErrorInner, ParseResult, Span}, Key, Obj, PartialVdf, Value, Vdf, }; @@ -9,35 +9,35 @@ use crate::{ impl<'a> PartialVdf<'a> { /// Attempts to parse VDF text to a [`PartialVdf`] - pub fn parse(s: &'a str) -> Result { + pub fn parse(s: &'a str) -> ParseResult { escaped_parse(s) } - pub fn parse_raw(s: &'a str) -> Result { + pub fn parse_raw(s: &'a str) -> ParseResult { raw_parse(s) } } impl<'a> Vdf<'a> { /// Attempts to parse VDF text to a [`Vdf`] - pub fn parse(s: &'a str) -> Result { + pub fn parse(s: &'a str) -> ParseResult { Ok(Vdf::from(PartialVdf::parse(s)?)) } - pub fn parse_raw(s: &'a str) -> Result { + pub fn parse_raw(s: &'a str) -> ParseResult { Ok(Vdf::from(PartialVdf::parse_raw(s)?)) } } -pub fn raw_parse(s: &str) -> Result { +pub fn raw_parse(s: &str) -> ParseResult { parse_(s, false) } -pub fn escaped_parse(s: &str) -> Result { +pub fn escaped_parse(s: &str) -> ParseResult { parse_(s, true) } -pub fn parse_(s: &str, escape_chars: bool) -> Result { +pub fn parse_(s: &str, escape_chars: bool) -> ParseResult { let mut chars = CharIter::new(s); let bases = parse_macros(&mut chars)?; @@ -51,13 +51,13 @@ pub fn parse_(s: &str, escape_chars: bool) -> Result { eat_comments_whitespace_and_newlines(&mut chars)?; if chars.peek().is_some() { - Err(Error::Todo) // Lingering bytes + Err(chars.err(ParseErrorInner::LingeringBytes, chars.index()..)) } else { Ok(PartialVdf { bases, key, value }) } } -fn parse_macros<'text>(chars: &mut CharIter<'text>) -> Result> { +fn parse_macros<'text>(chars: &mut CharIter<'text>) -> ParseResult> { let mut macros = Vec::new(); loop { eat_comments_whitespace_and_newlines(chars)?; @@ -73,13 +73,19 @@ fn parse_macros<'text>(chars: &mut CharIter<'text>) -> Result> { fn parse_maybe_macro<'text>( chars: &mut CharIter<'text>, macros: &mut Vec<&'text str>, -) -> Result> { +) -> ParseResult> { + // FIXME: this should also support `#include` too if !chars.next_n_if_eq(['#', 'b', 'a', 's', 'e']) { return Ok(None); } if !eat_whitespace(chars) { - return Err(Error::Todo); + let start_idx = chars.index(); + let err_span: Span<_> = match chars.next() { + None => (start_idx..).into(), + Some(_) => (start_idx..=start_idx).into(), + }; + return Err(chars.err(ParseErrorInner::ExpectedWhitespace, err_span)); } let macro_ = parse_quoted_raw_or_unquoted_string(chars)?; @@ -90,32 +96,59 @@ fn parse_maybe_macro<'text>( if eat_newlines(chars) { Ok(Some(())) } else { - Err(Error::Todo) + Err(chars.err(ParseErrorInner::ExpectedNewlineAfterMacro, chars.index()..)) } } -fn parse_quoted_raw_or_unquoted_string<'text>(chars: &mut CharIter<'text>) -> Result<&'text str> { +fn parse_quoted_raw_or_unquoted_string<'text>( + chars: &mut CharIter<'text>, +) -> ParseResult<&'text str> { if chars.peek() == Some('"') { parse_quoted_raw_string(chars) } else { - parse_unquoted_string(chars) + parse_unquoted_string(chars).map_err(|(kind, span)| { + let kind = match kind { + InvalidUnquotedString::Eoi => ParseErrorInner::EoiExpectedMacroPath, + InvalidUnquotedString::InvalidChar => ParseErrorInner::InvalidMacroPath, + }; + chars.err(kind, span) + }) } } // TODO: error on `\r` or `\n` in quoted str (wait no i think that's valid) -fn parse_quoted_raw_string<'text>(chars: &mut CharIter<'text>) -> Result<&'text str> { +fn parse_quoted_raw_string<'text>(chars: &mut CharIter<'text>) -> ParseResult<&'text str> { assert!(chars.ensure_next('"')); let start_idx = chars.index(); - while chars.next().ok_or(Error::Todo)? != '"' {} + while chars + .next() + .ok_or_else(|| chars.err(ParseErrorInner::EoiParsingQuotedString, start_idx..))? + != '"' + {} let end_idx = chars.index() - '"'.len_utf8(); Ok(&chars.original_str()[start_idx..end_idx]) } -fn parse_unquoted_string<'text>(chars: &mut CharIter<'text>) -> Result<&'text str> { +// Unquoted strings are often used as the fallthrough for various alternations. This can lead to +// confusing error messages when treating them with a global error type, so instead we force a +// vague local error that gets translated and bubbled up depending on where it's called +enum InvalidUnquotedString { + Eoi, + InvalidChar, +} + +fn parse_unquoted_string<'text>( + chars: &mut CharIter<'text>, +) -> Result<&'text str, (InvalidUnquotedString, Span)> { let start_idx = chars.index(); - match chars.next().ok_or(Error::Todo)? { - '"' | '{' | '}' | ' ' | '\t' | '\r' | '\n' => return Err(Error::Todo), + match chars + .next() + .ok_or((InvalidUnquotedString::Eoi, (0..).into()))? + { + '"' | '{' | '}' | ' ' | '\t' | '\r' | '\n' => { + return Err((InvalidUnquotedString::InvalidChar, (0..).into())); + } _ => {} } @@ -134,31 +167,38 @@ fn parse_unquoted_string<'text>(chars: &mut CharIter<'text>) -> Result<&'text st } } -fn parse_pair<'text>(chars: &mut CharIter<'text>, escape_chars: bool) -> Result> { - let key = parse_string(chars, escape_chars)?; +fn parse_pair<'text>(chars: &mut CharIter<'text>, escape_chars: bool) -> ParseResult> { + let key = if chars.peek() == Some('"') { + parse_quoted_string(chars, escape_chars) + } else { + match parse_unquoted_string(chars) { + Ok(s) => Ok(Cow::Borrowed(s)), + Err((kind, span)) => { + let kind = match kind { + InvalidUnquotedString::Eoi => ParseErrorInner::EoiExpectedPairKey, + InvalidUnquotedString::InvalidChar => ParseErrorInner::InvalidPairKey, + }; + Err(chars.err(kind, span)) + } + } + }?; eat_comments_whitespace_and_newlines(chars)?; let value = parse_value(chars, escape_chars)?; Ok(Vdf { key, value }) } -fn parse_string<'text>(chars: &mut CharIter<'text>, escape_chars: bool) -> Result> { - if chars.peek() == Some('"') { - parse_quoted_string(chars, escape_chars) - } else { - let s = parse_unquoted_string(chars)?; - Ok(Cow::Borrowed(s)) - } -} - fn parse_quoted_string<'text>( chars: &mut CharIter<'text>, escape_chars: bool, -) -> Result> { +) -> ParseResult> { + let str_start_index = chars.index(); assert!(chars.ensure_next('"')); let start_idx = chars.index(); loop { - let peeked = chars.peek().ok_or(Error::Todo)?; + let peeked = chars + .peek() + .ok_or_else(|| chars.err(ParseErrorInner::EoiParsingQuotedString, str_start_index..))?; // We only care about potential escaped characters if `escape_chars` is set. Otherwise we // only break on " for a quoted string if peeked == '"' || (peeked == '\\' && escape_chars) { @@ -171,23 +211,40 @@ fn parse_quoted_string<'text>( let text_chunk = &chars.original_str()[start_idx..end_idx]; // If our string contains escaped characters then it has to be a `Cow::Owned` otherwise it can // be `Cow::Borrowed` - let key = if chars.peek().ok_or(Error::Todo)? == '"' { + let key = if chars + .peek() + .ok_or_else(|| chars.err(ParseErrorInner::EoiParsingQuotedString, str_start_index..))? + == '"' + { assert!(chars.ensure_next('"')); Cow::Borrowed(text_chunk) } else { assert!(chars.peek().unwrap() == '\\'); let mut escaped = text_chunk.to_owned(); loop { - let ch = chars.next().ok_or(Error::Todo)?; + let ch = chars.next().ok_or_else(|| { + chars.err(ParseErrorInner::EoiParsingQuotedString, str_start_index..) + })?; match ch { '"' => break Cow::Owned(escaped), - '\\' => match chars.next().ok_or(Error::Todo)? { + '\\' => match chars.next().ok_or_else(|| { + chars.err(ParseErrorInner::EoiParsingQuotedString, str_start_index..) + })? { 'n' => escaped.push('\n'), 'r' => escaped.push('\r'), 't' => escaped.push('\t'), '\\' => escaped.push('\\'), '\"' => escaped.push('\"'), - _ => return Err(Error::Todo), + invalid => { + // Backtrack to reconstruct the error span since this is a hot loop and we + // don't want to track a span for every character + let err_span_end = chars.index() - invalid.len_utf8(); + let err_span_start = err_span_end - '\\'.len_utf8(); + return Err(chars.err( + ParseErrorInner::InvalidEscapedCharacter { invalid }, + err_span_start..=err_span_end, + )); + } }, regular => escaped.push(regular), } @@ -197,27 +254,48 @@ fn parse_quoted_string<'text>( Ok(key) } -fn parse_value<'text>(chars: &mut CharIter<'text>, escape_chars: bool) -> Result> { +fn parse_value<'text>( + chars: &mut CharIter<'text>, + escape_chars: bool, +) -> ParseResult> { let value = match chars.peek() { Some('{') => { let obj = parse_obj(chars, escape_chars)?; Value::Obj(obj) } - _ => { - let s = parse_string(chars, escape_chars)?; + Some('"') => { + let s = parse_quoted_string(chars, escape_chars)?; Value::Str(s) } + _ => match parse_unquoted_string(chars) { + Ok(s) => Value::Str(Cow::Borrowed(s)), + Err((kind, span)) => { + let kind = match kind { + InvalidUnquotedString::Eoi => ParseErrorInner::EoiExpectedPairValue, + InvalidUnquotedString::InvalidChar => ParseErrorInner::InvalidPairValue, + }; + return Err(chars.err(kind, span)); + } + }, }; Ok(value) } -fn parse_obj<'text>(chars: &mut CharIter<'text>, escape_chars: bool) -> Result> { +fn parse_obj<'text>(chars: &mut CharIter<'text>, escape_chars: bool) -> ParseResult> { + let err_span_start = chars.index(); assert!(chars.ensure_next('{')); eat_comments_whitespace_and_newlines(chars)?; let mut obj = Obj::new(); - while chars.peek().ok_or(Error::Todo)? != '}' { + while chars + .peek() + // TODO: Switch error to Expected pair key or end of map + .ok_or_else(|| chars.err(ParseErrorInner::EoiParsingMap, err_span_start..))? + != '}' + { + // TODO: assert that the error isn't eoi parsing key because that should be represented in + // the error above let Vdf { key, value } = parse_pair(chars, escape_chars)?; obj.0.entry(key).or_default().push(value); @@ -228,7 +306,7 @@ fn parse_obj<'text>(chars: &mut CharIter<'text>, escape_chars: bool) -> Result) -> Result { +fn eat_comments_whitespace_and_newlines(chars: &mut CharIter<'_>) -> ParseResult { let mut ate = false; while eat_whitespace_and_newlines(chars) || eat_comments(chars)? { ate = true; @@ -237,7 +315,7 @@ fn eat_comments_whitespace_and_newlines(chars: &mut CharIter<'_>) -> Result) -> Result { +fn eat_comments_and_whitespace(chars: &mut CharIter<'_>) -> ParseResult { let mut ate = false; while eat_comments(chars)? || eat_whitespace(chars) { ate = true; @@ -256,23 +334,33 @@ fn eat_whitespace_and_newlines(chars: &mut CharIter<'_>) -> bool { } // All characters other than some control characters are permitted -fn eat_comments(chars: &mut CharIter<'_>) -> Result { +fn eat_comments(chars: &mut CharIter<'_>) -> ParseResult { if !chars.next_n_if_eq(['/', '/']) { Ok(false) } else { loop { match chars.peek() { Some('\r') => { + let err_index = chars.index(); chars.unwrap_next(); match chars.next() { Some('\n') => break, - _ => return Err(Error::Todo), + _ => { + return Err(chars.err( + ParseErrorInner::CommentControlCharacter, + err_index..=err_index, + )) + } } } None | Some('\n') => break, // Various control characters Some('\u{00}'..='\u{08}' | '\u{0A}'..='\u{1F}' | '\u{7F}') => { - return Err(Error::Todo) + let err_index = chars.index(); + return Err(chars.err( + ParseErrorInner::CommentControlCharacter, + err_index..=err_index, + )); } _ => _ = chars.unwrap_next(), } @@ -396,6 +484,109 @@ impl<'text> CharIter<'text> { } arr } + + /// Emit an error given its kind and span + /// + /// We don't keep track of line/col during parsing to keep the happy path fast. Instead we track + /// indices into the original string and only translate them to line/col and resolve the full + /// lines that the error lies on when we go to emit the error while we still have access to the + /// original text + #[must_use] + fn err(&self, inner: ParseErrorInner, index_span: impl Into>) -> ParseError { + let index_span = index_span.into(); + + let (start, end) = index_span.clone().into_inner(); + + // TODO: switch this to be peekable, so that we can peek instead of breaking so eagerly? + // Hopefully that removes the need to manually track `last_index` and `last_c` + let mut chars = self.original_str().char_indices(); + let mut line_col = LineCol::default(); + let mut current_line_start = 0; + + // Resolve the line/col for the start of the span and find the start of the line + let mut last_index = 0; + let mut last_c = '\0'; + let (error_lines_start, error_span_start) = loop { + let Some((i, c)) = chars.next() else { + break (current_line_start, line_col); + }; + last_index = i; + last_c = c; + + if i >= start { + break (current_line_start, line_col); + } + + if c == '\n' { + current_line_start = i + '\n'.len_utf8(); + line_col.col = 1; + line_col.line += 1; + } else { + line_col.col += 1; + } + }; + + let maybe_end = end.map(|end| { + assert!(start <= end, "No backwards error span"); + // Resolve the line/col for the end of the span and maybe we happened to be at the end of + // the final line + let (maybe_error_lines_end, error_span_end) = if last_index >= end { + let maybe_error_lines_end = (last_c == '\n').then_some(current_line_start); + (maybe_error_lines_end, line_col) + } else { + loop { + let Some((i, c)) = chars.next() else { + break (None, line_col); + }; + last_index = i; + + if c == '\n' { + current_line_start = i + '\n'.len_utf8(); + line_col.col = 1; + line_col.line += 1; + } else { + line_col.col += 1; + } + + if i >= end { + let maybe_error_lines_end = (c == '\n').then_some(current_line_start); + break (maybe_error_lines_end, line_col); + } + } + }; + + // Find the end of the last error line + let error_lines_end = maybe_error_lines_end.unwrap_or_else(|| loop { + match chars.next() { + Some((i, '\n')) => break i, + Some((i, _)) => last_index = i, + // FIXME if we run up to the EOI trying to find the end of the error line then + // we should have it be `None` instead of the final index + None => break last_index, + } + }); + + (error_lines_end, error_span_end) + }); + let (error_lines_end, error_span_end) = match maybe_end { + Some((lines, span)) => (Some(lines), Some(span)), + None => (None, None), + }; + + let lines_start = error_lines_start; + let display_span = Span::new(error_span_start, error_span_end); + let lines = Span::new(error_lines_start, error_lines_end) + .slice(self.original_str()) + .to_owned(); + + ParseError { + inner, + index_span, + display_span, + lines, + lines_start, + } + } } impl Iterator for CharIter<'_> { diff --git a/keyvalues-parser/src/text/render.rs b/keyvalues-parser/src/text/render.rs index 1a3b4fd..d2fdec1 100644 --- a/keyvalues-parser/src/text/render.rs +++ b/keyvalues-parser/src/text/render.rs @@ -1,6 +1,9 @@ use std::fmt::{self, Write}; -use crate::{error::Error, Obj, PartialVdf, Value, Vdf}; +use crate::{ + error::{RenderError, RenderResult}, + Obj, PartialVdf, Value, Vdf, +}; fn multiple_char(c: char, amount: usize) -> String { std::iter::repeat(c).take(amount).collect() @@ -83,13 +86,13 @@ impl fmt::Display for PartialVdf<'_> { impl PartialVdf<'_> { // TODO: do we really want to return a crate error here? It will always be a formatting error - pub fn render(&self, writer: &mut impl Write) -> crate::error::Result<()> { + pub fn render(&self, writer: &mut impl Write) -> RenderResult<()> { self._render(writer, RenderType::Raw).map_err(Into::into) } - pub fn render_raw(&self, writer: &mut impl Write) -> crate::error::Result<()> { + pub fn render_raw(&self, writer: &mut impl Write) -> RenderResult<()> { match self.find_invalid_raw_char() { - Some(invalid_char) => Err(Error::RawRenderError { invalid_char }), + Some(invalid_char) => Err(RenderError::RawRenderError { invalid_char }), None => self._render(writer, RenderType::Raw).map_err(Into::into), } } @@ -118,13 +121,13 @@ impl fmt::Display for Vdf<'_> { } impl Vdf<'_> { - pub fn render(&self, writer: &mut impl Write) -> crate::error::Result<()> { + pub fn render(&self, writer: &mut impl Write) -> RenderResult<()> { write!(writer, "{}", self).map_err(Into::into) } - pub fn render_raw(&self, writer: &mut impl Write) -> crate::error::Result<()> { + pub fn render_raw(&self, writer: &mut impl Write) -> RenderResult<()> { match self.find_invalid_raw_char() { - Some(invalid_char) => Err(Error::RawRenderError { invalid_char }), + Some(invalid_char) => Err(RenderError::RawRenderError { invalid_char }), None => self .write_indented(writer, 0, RenderType::Raw) .map_err(Into::into), diff --git a/keyvalues-parser/tests/regressions/fuzzer.rs b/keyvalues-parser/tests/regressions/fuzzer.rs index 9fd207e..89aef46 100644 --- a/keyvalues-parser/tests/regressions/fuzzer.rs +++ b/keyvalues-parser/tests/regressions/fuzzer.rs @@ -1,31 +1,12 @@ use keyvalues_parser::Vdf; -use pretty_assertions::assert_eq; - -type BoxedResult = Result>; - -// Mimics the behavior of the parse fuzzer test for regressions testing -fn parse_valid(contents: &str) -> BoxedResult<()> { - let parsed = Vdf::parse(contents).expect("Input has to be valid here"); - let vdf_text = parsed.to_string(); - let reparsed = Vdf::parse(&vdf_text)?; - assert_eq!(parsed, reparsed); - - Ok(()) -} - -// Checks that we return an error instead of panicking or hanging -fn parse_invalid(contents: &str) -> BoxedResult<()> { - Vdf::parse(contents).unwrap_err(); - Ok(()) -} macro_rules! gen_fuzzer_tests { ( $test_fn:ident, $( ( $name:ident, $input:expr ) ),* $(,)? ) => { $( #[test] - fn $name() -> BoxedResult<()> { - let contents = $input; - $test_fn(contents) + fn $name() { + let vdf_text = $input; + $test_fn(vdf_text) } )* }; @@ -34,6 +15,14 @@ macro_rules! gen_fuzzer_tests { mod valid { use super::*; + // Mimics the behavior of the parse fuzzer test for regressions testing + fn parse_valid(vdf_text: &str) { + let parsed = Vdf::parse(vdf_text).expect("Input has to be valid here"); + let vdf_text = parsed.to_string(); + let reparsed = Vdf::parse(&vdf_text).expect("Roundtrip should still parse"); + pretty_assertions::assert_eq!(parsed, reparsed); + } + gen_fuzzer_tests!( parse_valid, (unqoted_backslash_key, r#"\ """#), @@ -44,6 +33,11 @@ mod valid { mod invalid { use super::*; + // Checks that we return an error instead of panicking or hanging + fn parse_invalid(vdf_text: &str) { + Vdf::parse(vdf_text).unwrap_err(); + } + gen_fuzzer_tests!( parse_invalid, (empty, ""), @@ -52,4 +46,25 @@ mod invalid { (macrolike_key_then_str, "#base no_vdf"), (trailing_bytes, "foo {}\n\ntrailing bytes"), ); + + fn error_invariants(vdf_text: &str) { + let err = Vdf::parse(vdf_text).unwrap_err(); + + // Lots of fiddly logic in displaying that can panic + err.to_string(); + + // The error snippet should match the original text sliced using the error span + let from_orig = err.index_span().slice(vdf_text); + let from_snippet = err.error_snippet(); + assert_eq!(from_orig, from_snippet); + } + + gen_fuzzer_tests!( + error_invariants, + // FIXME: I think this needs to allow for the error line ending to be optional even when + // the error ending isn't. The end of the line span should run up to EOI and trying to use + // `RangeInclusive` to represent that runs into issues with multi-byte chars + (newline_in_error_span_before_eoi, "\"\\\n"), + (newline_as_invalid_escaped_char, "\"\\\n\""), + ); } diff --git a/keyvalues-serde/src/error.rs b/keyvalues-serde/src/error.rs index dabbb7c..6bad2ee 100644 --- a/keyvalues-serde/src/error.rs +++ b/keyvalues-serde/src/error.rs @@ -3,7 +3,7 @@ // TODO: figure this out before the next breaking release #![allow(clippy::large_enum_variant)] -use keyvalues_parser::error::Error as ParserError; +use keyvalues_parser::error::ParseError; use serde::{de, ser}; use std::{ @@ -18,7 +18,7 @@ pub type Result = std::result::Result; #[derive(Debug)] pub enum Error { Message(String), - Parse(ParserError), + Parse(ParseError), Io(io::Error), NonFiniteFloat(f32), EofWhileParsingAny, @@ -64,8 +64,8 @@ impl From for Error { } } -impl From for Error { - fn from(e: ParserError) -> Self { +impl From for Error { + fn from(e: ParseError) -> Self { Self::Parse(e) } }