diff --git a/harper-core/src/linting/confident.rs b/harper-core/src/linting/confident.rs index 7150131a0..4204c6cd4 100644 --- a/harper-core/src/linting/confident.rs +++ b/harper-core/src/linting/confident.rs @@ -1,6 +1,6 @@ use crate::{ Token, - patterns::{OwnedPatternExt, Pattern, SequencePattern, Word}, + patterns::{Pattern, new_syntax_experiment::prelude::*}, }; use super::{Lint, LintKind, PatternLinter, Suggestion}; @@ -11,16 +11,8 @@ pub struct Confident { impl Default for Confident { fn default() -> Self { - let pattern = SequencePattern::default() - .then( - (|tok: &Token, _source: &[char]| tok.kind.is_verb() || tok.kind.is_determiner()) - .or(Box::new(Word::new("very"))), - ) - .then_whitespace() - .t_aco("confidant"); - Self { - pattern: Box::new(pattern), + pattern: Box::new(seq![Verb | Det | "very", WS, "confidant"]), } } } diff --git a/harper-core/src/linting/despite_of.rs b/harper-core/src/linting/despite_of.rs index 90ed3d9f8..eaa9cfded 100644 --- a/harper-core/src/linting/despite_of.rs +++ b/harper-core/src/linting/despite_of.rs @@ -1,6 +1,6 @@ use crate::{ Token, TokenStringExt, - patterns::{Pattern, SequencePattern}, + patterns::{Pattern, new_syntax_experiment::prelude::*}, }; use super::{Lint, LintKind, PatternLinter, Suggestion}; @@ -11,12 +11,8 @@ pub struct DespiteOf { impl Default for DespiteOf { fn default() -> Self { - let pattern = SequencePattern::aco("despite") - .then_whitespace() - .then_exact_word("of"); - Self { - pattern: Box::new(pattern), + pattern: Box::new(seq!["despite", WS, exact("of")]), } } } diff --git a/harper-core/src/linting/modal_of.rs b/harper-core/src/linting/modal_of.rs index e5b1291e6..4c0a82a2b 100644 --- a/harper-core/src/linting/modal_of.rs +++ b/harper-core/src/linting/modal_of.rs @@ -1,6 +1,6 @@ use crate::{ - Lrc, Token, TokenStringExt, - patterns::{EitherPattern, Pattern, SequencePattern, WordSet}, + Token, TokenStringExt, + patterns::{Pattern, new_syntax_experiment::prelude::*}, }; use super::{Lint, LintKind, PatternLinter, Suggestion}; @@ -21,43 +21,11 @@ impl Default for ModalOf { words.add(&format!("{}n't", word)); }); - let modal_of = Lrc::new( - SequencePattern::default() - .then(words) - .then_whitespace() - .t_aco("of"), - ); - - let ws_course = Lrc::new(SequencePattern::default().then_whitespace().t_aco("course")); - - let modal_of_course = Lrc::new( - SequencePattern::default() - .then(modal_of.clone()) - .then(ws_course.clone()), - ); - - let anyword_might_of = Lrc::new( - SequencePattern::default() - .then_any_word() - .then_whitespace() - .t_aco("might") - .then_whitespace() - .t_aco("of"), - ); - - let anyword_might_of_course = Lrc::new( - SequencePattern::default() - .then(anyword_might_of.clone()) - .then(ws_course.clone()), - ); - Self { - pattern: Box::new(EitherPattern::new(vec![ - Box::new(anyword_might_of_course), - Box::new(modal_of_course), - Box::new(anyword_might_of), - Box::new(modal_of), - ])), + pattern: Box::new(choice![ + seq![AnyWord, WS, "might", WS, "of", not_next![WS, "course"]], + seq![words, WS, "of", not_next![WS, "course"]], + ]), } } } @@ -69,20 +37,7 @@ impl PatternLinter for ModalOf { fn match_to_lint(&self, matched_toks: &[Token], source_chars: &[char]) -> Option { let modal_index = match matched_toks.len() { - // Without context, always an error from the start - 3 => 0, 5 => { - // False positives: modal _ of _ course / adj. _ might _ of / art. _ might _ of - let w3_text = matched_toks - .last() - .unwrap() - .span - .get_content(source_chars) - .iter() - .collect::(); - if w3_text.as_str() != "of" { - return None; - } let w1_kind = &matched_toks.first().unwrap().kind; // the might of something, great might of something if w1_kind.is_adjective() || w1_kind.is_determiner() { @@ -91,9 +46,8 @@ impl PatternLinter for ModalOf { // not a false positive, skip context before 2 } - // False positive: _ might _ of _ course - 7 => return None, - _ => unreachable!(), + // Without context, always an error from the start + _ => 0, }; let span_modal_of = matched_toks[modal_index..modal_index + 3].span().unwrap(); diff --git a/harper-core/src/patterns/any_pattern.rs b/harper-core/src/patterns/any_pattern.rs index 44063572a..0c4367e9c 100644 --- a/harper-core/src/patterns/any_pattern.rs +++ b/harper-core/src/patterns/any_pattern.rs @@ -3,6 +3,7 @@ use crate::Token; use super::Pattern; /// A [`Pattern`] that will match any single token. +#[derive(Clone, Copy)] pub struct AnyPattern; impl Pattern for AnyPattern { diff --git a/harper-core/src/patterns/mod.rs b/harper-core/src/patterns/mod.rs index 2b857fe01..91d6e1f5f 100644 --- a/harper-core/src/patterns/mod.rs +++ b/harper-core/src/patterns/mod.rs @@ -16,6 +16,7 @@ mod indefinite_article; mod inflection_of_be; mod invert; mod naive_pattern_group; +pub mod new_syntax_experiment; mod nominal_phrase; mod pattern_map; mod repeating_pattern; @@ -144,13 +145,18 @@ where } #[cfg(feature = "concurrent")] -impl Pattern for F -where - F: Fn(&Token, &[char]) -> bool, - F: Send + Sync, -{ +pub trait SinlgeTokenPattern: Send + Sync + 'static { + fn matches_token(&self, token: &Token, source: &[char]) -> bool; +} +#[cfg(not(feature = "concurrent"))] +pub trait SinlgeTokenPattern: 'static { + fn matches_token(&self, token: &Token, source: &[char]) -> bool; +} + +impl Pattern for P { fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - if self(tokens.first()?, source) { + let t = tokens.first()?; + if self.matches_token(t, source) { Some(1) } else { None @@ -158,17 +164,25 @@ where } } +#[cfg(feature = "concurrent")] +impl SinlgeTokenPattern for F +where + F: Fn(&Token, &[char]) -> bool, + F: Send + Sync + 'static, +{ + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + self(token, source) + } +} + #[cfg(not(feature = "concurrent"))] -impl Pattern for F +impl SinlgeTokenPattern for F where F: Fn(&Token, &[char]) -> bool, + F: 'static, { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - if self(tokens.first()?, source) { - Some(1) - } else { - None - } + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + self(token, source) } } diff --git a/harper-core/src/patterns/new_syntax_experiment.rs b/harper-core/src/patterns/new_syntax_experiment.rs new file mode 100644 index 000000000..74e3a7108 --- /dev/null +++ b/harper-core/src/patterns/new_syntax_experiment.rs @@ -0,0 +1,408 @@ +use crate::{CharString, Token}; + +use super::{Pattern, SinlgeTokenPattern, WhitespacePattern, Word}; + +pub trait IntoPattern { + type Output: Pattern + 'static; + fn into_pattern(self) -> Self::Output; + + fn into_pattern_boxed(self) -> Box + where + Self: Sized, + { + Box::new(self.into_pattern()) + } +} +impl IntoPattern for T { + type Output = T; + fn into_pattern(self) -> Self::Output { + self + } +} +impl IntoPattern for &'static str { + type Output = Word; + fn into_pattern(self) -> Self::Output { + Word::new(self) + } +} + +pub struct Sequence { + patterns: Vec>, +} +impl Sequence { + pub fn new(patterns: Vec>) -> Self { + Self { patterns } + } +} +impl Pattern for Sequence { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + let mut tok_cursor = 0; + + for pat in &self.patterns { + let match_length = pat.matches(&tokens[tok_cursor..], source)?; + tok_cursor += match_length; + } + + Some(tok_cursor) + } +} + +pub struct Choice { + patterns: Vec>, +} +impl Choice { + pub fn new(patterns: Vec>) -> Self { + Self { patterns } + } +} +impl Pattern for Choice { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + let mut longest: Option = None; + + for pattern in self.patterns.iter() { + let Some(match_len) = pattern.matches(tokens, source) else { + continue; + }; + + longest = Some(longest.unwrap_or(0).max(match_len)); + } + + longest + } +} + +struct ExactWord { + word: CharString, +} +impl SinlgeTokenPattern for ExactWord { + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + if !token.kind.is_word() { + return false; + } + if token.span.len() != self.word.len() { + return false; + } + + let chars = token.span.get_content(source); + chars == self.word.as_slice() + } +} +pub fn exact(word: &str) -> impl Pattern { + ExactWord { + word: word.chars().collect(), + } +} + +struct Not

(P); +impl Pattern for Not

{ + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + if self.0.matches(tokens, source).is_some() { + None + } else { + Some(0) + } + } +} +pub fn not(pattern: impl Pattern) -> impl Pattern { + Not(pattern) +} + +struct Ahead

(P); +impl Pattern for Ahead

{ + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + if self.0.matches(tokens, source).is_some() { + Some(0) + } else { + None + } + } +} +pub fn ahead(pattern: impl Pattern) -> impl Pattern { + Ahead(pattern) +} + +/// Matches any single token, regardless of its kind. +#[derive(Clone, Copy)] +pub struct AnyToken; +impl SinlgeTokenPattern for AnyToken { + fn matches_token(&self, _token: &Token, _source: &[char]) -> bool { + true + } +} + +/// Matches any word token. +#[derive(Clone, Copy)] +pub struct AnyWord; +impl SinlgeTokenPattern for AnyWord { + fn matches_token(&self, token: &Token, _source: &[char]) -> bool { + token.kind.is_word() + } +} + +pub const WS: WhitespacePattern = WhitespacePattern; + +pub mod predicates { + use crate::{ + Token, TokenKind, + patterns::{SinlgeTokenPattern, Word}, + }; + + pub trait IntoSingleTokenPattern { + type Output: SinlgeTokenPattern; + fn into_single_token_pattern(self) -> Self::Output; + } + impl IntoSingleTokenPattern for T { + type Output = T; + fn into_single_token_pattern(self) -> Self::Output { + self + } + } + impl IntoSingleTokenPattern for &'static str { + type Output = Word; + fn into_single_token_pattern(self) -> Self::Output { + Word::new(self) + } + } + + #[derive(Clone, Copy)] + pub struct Not(P); + impl SinlgeTokenPattern for Not

{ + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + !self.0.matches_token(token, source) + } + } + impl std::ops::Not for Not

{ + type Output = P; + fn not(self) -> Self::Output { + self.0 + } + } + impl std::ops::BitAnd for Not

{ + type Output = And; + fn bitand(self, rhs: R) -> Self::Output { + And(self, rhs.into_single_token_pattern()) + } + } + impl std::ops::BitOr for Not

{ + type Output = Or; + fn bitor(self, rhs: R) -> Self::Output { + Or(self, rhs.into_single_token_pattern()) + } + } + + #[derive(Clone, Copy)] + pub struct And(P1, P2); + impl SinlgeTokenPattern for And { + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + self.0.matches_token(token, source) && self.1.matches_token(token, source) + } + } + impl std::ops::Not for And { + type Output = Not; + fn not(self) -> Self::Output { + Not(self) + } + } + impl + std::ops::BitAnd for And + { + type Output = And; + fn bitand(self, rhs: R) -> Self::Output { + And(self, rhs.into_single_token_pattern()) + } + } + impl + std::ops::BitOr for And + { + type Output = Or; + fn bitor(self, rhs: R) -> Self::Output { + Or(self, rhs.into_single_token_pattern()) + } + } + + #[derive(Clone, Copy)] + pub struct Or(P1, P2); + impl SinlgeTokenPattern for Or { + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + self.0.matches_token(token, source) || self.1.matches_token(token, source) + } + } + impl + std::ops::BitAnd for Or + { + type Output = And; + fn bitand(self, rhs: R) -> Self::Output { + And(self, rhs.into_single_token_pattern()) + } + } + impl + std::ops::BitOr for Or + { + type Output = Or; + fn bitor(self, rhs: R) -> Self::Output { + Or(self, rhs.into_single_token_pattern()) + } + } + + macro_rules! add_operators { + ($name:ident) => { + impl std::ops::Not for $name { + type Output = Not<$name>; + fn not(self) -> Self::Output { + Not(self) + } + } + impl std::ops::BitAnd for $name { + type Output = And<$name, R::Output>; + fn bitand(self, rhs: R) -> Self::Output { + And(self, rhs.into_single_token_pattern()) + } + } + impl std::ops::BitOr for $name { + type Output = Or<$name, R::Output>; + fn bitor(self, rhs: R) -> Self::Output { + Or(self, rhs.into_single_token_pattern()) + } + } + }; + } + macro_rules! create_predicte { + ($name:ident, $fn:expr) => { + #[derive(Clone, Copy)] + pub struct $name; + impl SinlgeTokenPattern for $name { + fn matches_token(&self, token: &Token, _: &[char]) -> bool { + $fn(&token.kind) + } + } + + add_operators!($name); + }; + } + + create_predicte!(Noun, TokenKind::is_noun); + create_predicte!(NounPl, TokenKind::is_plural_noun); + create_predicte!(Pronoun, TokenKind::is_pronoun); + create_predicte!(PronounPl, TokenKind::is_plural_pronoun); + create_predicte!(Nominal, TokenKind::is_nominal); + create_predicte!(NominalPl, TokenKind::is_plural_nominal); + + create_predicte!(Verb, TokenKind::is_verb); + create_predicte!(AuxVerb, TokenKind::is_auxiliary_verb); + create_predicte!(LinkingVerb, TokenKind::is_linking_verb); + create_predicte!(Adj, TokenKind::is_adjective); + create_predicte!(Adverb, TokenKind::is_adverb); + create_predicte!(Det, TokenKind::is_determiner); + create_predicte!(Prep, TokenKind::is_preposition); + + create_predicte!(Common, TokenKind::is_common_word); + create_predicte!(Homograph, TokenKind::is_likely_homograph); + + #[derive(Clone, Copy)] + pub struct Punct; + + macro_rules! define_punct { + ($name:ident, $char:literal, $fn:expr) => { + impl Punct<$char> { + pub const $name: Punct<$char> = Self; + } + impl SinlgeTokenPattern for Punct<$char> { + fn matches_token(&self, token: &Token, _source: &[char]) -> bool { + $fn(&token.kind) + } + } + }; + } + + define_punct!(HYPHEN, '-', TokenKind::is_hyphen); + define_punct!(COMMA, ',', TokenKind::is_comma); + define_punct!(QUOTE, '"', TokenKind::is_quote); + define_punct!(APOS, '\'', TokenKind::is_apostrophe); + define_punct!(PERIOD, '.', TokenKind::is_period); + define_punct!(AT, '@', TokenKind::is_at); + + fn foo() { + let a = (Adj | Noun | Det) & !Verb & Punct::COMMA; + let b = Verb | Det | "very"; + } +} + +pub mod prelude { + + pub use super::super::{Pattern, WordSet}; + pub use super::predicates::{ + Adj, Adverb, AuxVerb, Common, Det, Homograph, LinkingVerb, Nominal, NominalPl, Noun, + NounPl, Prep, Pronoun, PronounPl, Punct, Verb, + }; + pub use super::{AnyToken, AnyWord, Choice, IntoPattern, Sequence, WS, exact}; + + /// Matches a sequence of patterns. + /// + /// This is the same as concatenating the patterns together. + /// + /// ## Examples + /// + /// ```rust + /// use crate::patterns::new_syntax_experiment::preluse::*; + /// let confession = seq!["I", WS, "love", WS, "you"]; + /// ``` + macro_rules! seq { + ($item:expr $(,)?) => { + IntoPattern::into_pattern($item) + }; + ($($item:expr),* $(,)?) => { + Sequence::new(vec![$(IntoPattern::into_pattern_boxed($item)),*]) + }; + } + /// Matches any of the given patterns. + /// + /// ## Examples + /// + /// ```rust + /// use crate::patterns::new_syntax_experiment::preluse::*; + /// let fav_animal = choice!["dog", "cat", seq!["black", WS, "bear"]]; + /// ``` + macro_rules! choice { + ($($item:literal),+ $(,)?) => { + WordSet::new(&[$($item),*]) + }; + ($($item:expr),* $(,)?) => { + Choice::new(vec![$(IntoPattern::into_pattern_boxed($item)),*]) + }; + } + + /// An assertion that matches the given sequence of patterns, but does NOT + /// consume any tokens. + /// + /// ## Examples + /// + /// ```rust + /// use crate::patterns::new_syntax_experiment::preluse::*; + /// let love = seq!["I", WS, "love", next![WS, "you"]]; + /// ``` + macro_rules! next { + ($($item:expr),* $(,)?) => { + crate::patterns::new_syntax_experiment::ahead(seq![$($item),*]) + }; + } + /// An assertion that matches anything but the given sequence of patterns. + /// No tokens are consumed. + /// + /// ## Examples + /// + /// ```rust + /// use crate::patterns::new_syntax_experiment::preluse::*; + /// let love_no_ego = seq!["I", WS, "love", not_next![WS, "myself"]]; + /// ``` + macro_rules! not_next { + ($($item:expr),* $(,)?) => { + crate::patterns::new_syntax_experiment::not( + crate::patterns::new_syntax_experiment::ahead( + seq![$($item),*] + ) + ) + }; + } + + pub(crate) use {choice, next, not_next, seq}; +} diff --git a/harper-core/src/patterns/whitespace_pattern.rs b/harper-core/src/patterns/whitespace_pattern.rs index 69cb92113..722ece32a 100644 --- a/harper-core/src/patterns/whitespace_pattern.rs +++ b/harper-core/src/patterns/whitespace_pattern.rs @@ -1,5 +1,6 @@ use super::Pattern; +#[derive(Clone, Copy)] pub struct WhitespacePattern; impl Pattern for WhitespacePattern { diff --git a/harper-core/src/patterns/word.rs b/harper-core/src/patterns/word.rs index 6ae5b5145..ea740e3fb 100644 --- a/harper-core/src/patterns/word.rs +++ b/harper-core/src/patterns/word.rs @@ -1,4 +1,4 @@ -use super::Pattern; +use super::SinlgeTokenPattern; use crate::{CharString, Token}; @@ -36,17 +36,16 @@ impl Word { } } -impl Pattern for Word { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - let tok = tokens.first()?; - if !tok.kind.is_word() { - return None; +impl SinlgeTokenPattern for Word { + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + if !token.kind.is_word() { + return false; } - if tok.span.len() != self.word.len() { - return None; + if token.span.len() != self.word.len() { + return false; } - let chars = tok.span.get_content(source); + let chars = token.span.get_content(source); let eq = if self.exact { chars == self.word.as_slice() } else { @@ -56,7 +55,7 @@ impl Pattern for Word { .all(|(a, b)| a.eq_ignore_ascii_case(b)) }; - if eq { Some(1) } else { None } + eq } }