diff --git a/.dune-prelude b/.dune-prelude index 57eed36e..68969579 100755 --- a/.dune-prelude +++ b/.dune-prelude @@ -1,19 +1,19 @@ -# _ -# /\ | | -# / \ __| | __ _ _ __ ___ -# / /\ \ / _` |/ _` | '_ ` _ \ -# / ____ \ (_| | (_| | | | | | | -# /_/ __\_\__,_|\__,_|_| |_| |_| _ _ _ -# | \/ | | __ \ (_) | ( ) -# | \ / | ___| | | | __ _ _ __ _ ___| |/ ___ +# _ +# /\ | | +# / \ __| | __ _ _ __ ___ +# / /\ \ / _` |/ _` | '_ ` _ \ +# / ____ \ (_| | (_| | | | | | | +# /_/ __\_\__,_|\__,_|_| |_| |_| _ _ _ +# | \/ | | __ \ (_) | ( ) +# | \ / | ___| | | | __ _ _ __ _ ___| |/ ___ # | |\/| |/ __| | | |/ _` | '_ \| |/ _ \ | / __| # | | | | (__| |__| | (_| | | | | | __/ | \__ \ # |_|__|_|\___|_____/ \__,_|_|_|_|_|\___|_| |___/ -# | __ \ | | | | -# | |__) | __ ___| |_ _ __| | ___ -# | ___/ '__/ _ \ | | | |/ _` |/ _ \ -# | | | | | __/ | |_| | (_| | __/ -# |_| |_| \___|_|\__,_|\__,_|\___| +# | __ \ | | | | +# | |__) | __ ___| |_ _ __| | ___ +# | ___/ '__/ _ \ | | | |/ _` |/ _ \ +# | | | | | __/ | |_| | (_| | __/ +# |_| |_| \___|_|\__,_|\__,_|\___| # # Overview: # Splash screen: @@ -39,7 +39,7 @@ let open = fs@read; let concat = x -> y -> (str y) + (str x); let ssh = addr -> user -> { - 'ssh -X (str user) + "@" + (str addr); + ssh -X (str user) + "@" + (str addr); }; let scp = addr -> user -> src -> dst -> { @@ -81,15 +81,15 @@ let MOON-PHASES = [ ]; let WEATHER-ASCII = { - Unknown = " .-. - __) - ( - `-’ + Unknown = " .-. + __) + ( + `-’ •", - Sunny = " \\ / - .-. - ― ( ) ― - `-’ + Sunny = " \\ / + .-. + ― ( ) ― + `-’ / \\ ", PartlyCloudy =" \\ / _ /\"\".-. @@ -177,7 +177,7 @@ let PLANTS = [" _ _ (_\\_) (__<_{} (_/_) - |\\ | + |\\ | \\\\| /| \\|// |/ @@ -279,7 +279,7 @@ let get-weather-ascii = desc -> { else WEATHER-ASCII@Unknown }; -let reload-weather-api = _ ~> { +let reload-weather-api = _ ~> { let weather-api = units -> city -> { # Confirm that the API key is not None if WEATHER-API-KEY == None { diff --git a/.gitignore b/.gitignore index 3aa22b9f..19ac95dc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -/target +/target* history.txt *.cast diff --git a/CHANGELOG.md b/CHANGELOG.md index 0480f21d..5d083bcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,27 @@ # Changelog -All notable changes to this project will be documented in this file. +## [0.2.0-symbol] -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +this branch fix symbols: + +- allow nonspace operators. +such as `let a=2+3;` `a>3` `let add=x->x+1` + +but space is needed when you need to differ negtive numbers with operator: +such as `let a=2+ -3` + +- allow args in command. +such as `ls -l --color=auto /tmp` + + short args: `-c` + + long args: `--chars` + + paths: `./dir` or `/dir` or `..` + + but unfortunlately, single `/` is not added currently, as this may be used as operator someday. + + single `.` was ignored and default to cwd. + +- allow `:` to define dict. +`let dict={x:1,y:2}` as well as the old one : +`let dict={x=1,y=2}` ## [Unreleased] diff --git a/Cargo.lock b/Cargo.lock index 306d4544..6f957e9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -333,7 +333,7 @@ checksum = "453440c271cf5577fd2a40e4942540cb7d0d2f85e27c8d07dd0023c925a67541" [[package]] name = "dune" -version = "0.2.0-alpha" +version = "0.2.0-symbol" dependencies = [ "chess-engine", "chrono", diff --git a/Cargo.toml b/Cargo.toml index c1ec1f76..c094068e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dune" -version = "0.2.0-alpha" +version = "0.2.0-symbol" authors = ["Adam McDaniel "] edition = "2021" license = "Apache-2.0" diff --git a/src/.default-dune-prelude b/src/.default-dune-prelude index 18bf3d20..74820b1e 100644 --- a/src/.default-dune-prelude +++ b/src/.default-dune-prelude @@ -1,16 +1,16 @@ -# _____ _ -# | __ \ ( ) -# | | | |_ _ _ __ ___|/ ___ -# | | | | | | | '_ \ / _ \ / __| -# | |__| | |_| | | | | __/ \__ \ -# |_____/ \__,_|_| |_|\___| |___/ -# | __ \ / _| | | | -# | | | | ___| |_ __ _ _ _| | |_ -# | | | |/ _ \ _/ _` | | | | | __| -# | |__| | __/ || (_| | |_| | | |_ -# |_____/ \___|_| \__,_|\__,_|_|\__| -# | __ \ | | | | -# | |__) | __ ___| |_ _ __| | ___ +# _____ _ +# | __ \ ( ) +# | | | |_ _ _ __ ___|/ ___ +# | | | | | | | '_ \ / _ \ / __| +# | |__| | |_| | | | | __/ \__ \ +# |_____/ \__,_|_| |_|\___| |___/ +# | __ \ / _| | | | +# | | | | ___| |_ __ _ _ _| | |_ +# | | | |/ _ \ _/ _` | | | | | __| +# | |__| | __/ || (_| | |_| | | |_ +# |_____/ \___|_| \__,_|\__,_|_|\__| +# | __ \ | | | | +# | |__) | __ ___| |_ _ __| | ___ # | ___/ '__/ _ \ | | | |/ _` |/ _ \ # | | | | | __/ | |_| | (_| | __/ # |_| |_| \___|_|\__,_|\__,_|\___| @@ -39,7 +39,7 @@ let open = fs@read; let concat = x -> y -> (str y) + (str x); let ssh = addr -> user -> { - 'ssh -X (str user) + "@" + (str addr); + ssh -X (str user) + "@" + (str addr); }; let scp = addr -> user -> src -> dst -> { @@ -81,15 +81,15 @@ let MOON-PHASES = [ ]; let WEATHER-ASCII = { - Unknown = " .-. - __) - ( - `-’ + Unknown = " .-. + __) + ( + `-’ •", - Sunny = " \\ / - .-. - ― ( ) ― - `-’ + Sunny = " \\ / + .-. + ― ( ) ― + `-’ / \\ ", PartlyCloudy =" \\ / _ /\"\".-. @@ -177,7 +177,7 @@ let PLANTS = [" _ _ (_\\_) (__<_{} (_/_) - |\\ | + |\\ | \\\\| /| \\|// |/ @@ -279,7 +279,7 @@ let get-weather-ascii = desc -> { else WEATHER-ASCII@Unknown }; -let reload-weather-api = _ ~> { +let reload-weather-api = _ ~> { let weather-api = units -> city -> { # Confirm that the API key is not None if WEATHER-API-KEY == None { @@ -755,4 +755,4 @@ if WEATHER-API-KEY == None { echo (fmt@italics (fmt@red "Please set your `openweathermap.org` API key and your city with the `set-weather-api-key` macro in your .dune-prelude or .dune-secrets file!")); echo (fmt@green "You can remove these messages by editing your .dune-prelude!") -} \ No newline at end of file +} diff --git a/src/bin.rs b/src/bin.rs index c85931be..3e77da81 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -218,6 +218,11 @@ fn syntax_highlight(line: &str) -> String { is_colored = true; result.push_str(k); } + (TokenKind::StringRaw, s) => { + result.push_str("\x1b[38;5;203m"); + is_colored = true; + result.push_str(s); + } (TokenKind::StringLiteral, s) => { result.push_str("\x1b[38;5;208m"); is_colored = true; @@ -546,14 +551,14 @@ fn run_file(path: PathBuf, env: &mut Environment) -> Result { fn main() -> Result<(), Error> { let matches = App::new( r#" - 888 - 888 - 888 - .d88888 888 888 88888b. .d88b. - d88" 888 888 888 888 "88b d8P Y8b - 888 888 888 888 888 888 88888888 - Y88b 888 Y88b 888 888 888 Y8b. - "Y88888 "Y88888 888 888 "Y8888 + 888 + 888 + 888 + .d88888 888 888 88888b. .d88b. + d88" 888 888 888 888 "88b d8P Y8b + 888 888 888 888 888 888 88888888 + Y88b 888 Y88b 888 888 888 Y8b. + "Y88888 "Y88888 888 888 "Y8888 "#, ) .author(crate_authors!()) diff --git a/src/binary/init/operator_module.rs b/src/binary/init/operator_module.rs index f12eef47..fecd304e 100644 --- a/src/binary/init/operator_module.rs +++ b/src/binary/init/operator_module.rs @@ -95,10 +95,10 @@ pub fn get(env: &mut Environment) -> Expression { ); tmp.define( - "//", + "/", curry( Expression::builtin( - "//", + "/", |args, env| { let mut result = args[0].clone().eval(env)?; for arg in &args[1..] { diff --git a/src/error.rs b/src/error.rs index 27d69b8a..d0cc1278 100644 --- a/src/error.rs +++ b/src/error.rs @@ -18,6 +18,8 @@ pub enum Error { ProgramNotFound(String), SyntaxError(Str, SyntaxError), CustomError(String), + Redeclaration(String), + UndeclaredVariable(String), } impl Error { @@ -58,6 +60,8 @@ impl Error { Self::PermissionDenied(..) => Self::ERROR_CODE_CUSTOM_ERROR, Self::ProgramNotFound(..) => Self::ERROR_CODE_CUSTOM_ERROR, Self::SyntaxError(..) => Self::ERROR_CODE_CUSTOM_ERROR, + Self::Redeclaration(..) => Self::ERROR_CODE_CUSTOM_ERROR, + Self::UndeclaredVariable(..) => Self::ERROR_CODE_CUSTOM_ERROR, } } } @@ -65,6 +69,12 @@ impl Error { impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + Self::Redeclaration(name) => { + write!(f, "redeclaration of {:?}", name) + } + Self::UndeclaredVariable(name) => { + write!(f, "undeclared var: {:?}", name) + } Self::CannotApply(expr, args) => { write!(f, "cannot apply `{:?}` to the arguments {:?}", expr, args) } diff --git a/src/expr.rs b/src/expr.rs index 639924e6..29d8665c 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -110,6 +110,8 @@ pub enum Expression { Map(BTreeMap), None, + Del(String), // 新增删除操作 + Declare(String, Box), // Assign an expression to a variable Assign(String, Box), @@ -204,7 +206,9 @@ impl fmt::Debug for Expression { .join("; ") ), - Self::Assign(name, expr) => write!(f, "let {} = {:?}", name, expr), + Self::Del(name) => write!(f, "del {}", name), + Self::Declare(name, expr) => write!(f, "let {} = {:?}", name, expr), + Self::Assign(name, expr) => write!(f, "{} = {:?}", name, expr), Self::If(cond, true_expr, false_expr) => { write!(f, "if {:?} {:?} else {:?}", cond, true_expr, false_expr) } @@ -333,7 +337,9 @@ impl fmt::Display for Expression { .join("; ") ), - Self::Assign(name, expr) => write!(f, "let {} = {:?}", name, expr), + Self::Del(name) => write!(f, "del {}", name), + Self::Declare(name, expr) => write!(f, "let {} = {:?}", name, expr), + Self::Assign(name, expr) => write!(f, "{} = {:?}", name, expr), Self::If(cond, true_expr, false_expr) => { write!(f, "if {:?} {:?} else {:?}", cond, true_expr, false_expr) } @@ -423,7 +429,8 @@ impl Expression { | Self::Bytes(_) | Self::String(_) | Self::Boolean(_) - | Self::Builtin(_) => vec![], + | Self::Builtin(_) + | Self::Del(_) => vec![], Self::For(_, list, body) => { let mut result = vec![]; @@ -451,6 +458,7 @@ impl Expression { Self::Lambda(_, body, _) => body.get_used_symbols(), Self::Macro(_, body) => body.get_used_symbols(), + Self::Declare(_, expr) => expr.get_used_symbols(), Self::Assign(_, expr) => expr.get_used_symbols(), Self::If(cond, t, e) => { let mut result = vec![]; @@ -491,8 +499,25 @@ impl Expression { None => Self::Symbol(name.clone()), }) } - + Self::Del(name) => { + env.undefine(&name); + return Ok(Self::None); + } + // 处理变量声明(仅允许未定义变量) + Self::Declare(name, expr) => { + // TODO: redefine is rejected. but why never report err? + if env.is_defined(&name) { + return Err(Error::Redeclaration(name)); + } + let value = expr.eval_mut(env, depth + 1)?; + env.define(&name, value); // 新增 declare 方法 + return Ok(Self::None); + } Self::Assign(name, expr) => { + // TODO: enable check while in strict mode. + // if !env.is_defined(&name) { + // return Err(Error::UndeclaredVariable(name)); + // } let x = expr.eval_mut(env, depth + 1)?; env.define(&name, x); return Ok(Self::None); diff --git a/src/parser.rs b/src/parser.rs index 32f8567f..60b0eaa9 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -175,7 +175,7 @@ fn is_symbol_like(kind: TokenKind) -> bool { kind, TokenKind::Symbol | TokenKind::Keyword - | TokenKind::Operator + // | TokenKind::Operator //to allow ++ -- to be overload | TokenKind::BooleanLiteral | TokenKind::FloatLiteral | TokenKind::IntegerLiteral @@ -292,13 +292,13 @@ fn parse_none(input: Tokens<'_>) -> IResult, (), SyntaxError> { } } -fn parse_quote(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { - let (input, _) = text("'")(input)?; - - map(parse_expression_prec_two, |x| { - Expression::Quote(Box::new(x)) - })(input) -} +// fn parse_quote(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { +// let (input, _) = text("'")(input)?; +// map(parse_expression_prec_two, |x| { +// dbg!(x.clone()); +// Expression::Quote(Box::new(x)) +// })(input) +// } fn parse_not(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { let (input, _) = text("!")(input)?; @@ -308,6 +308,28 @@ fn parse_not(input: Tokens<'_>) -> IResult, Expression, SyntaxError> })(input) } +#[inline] +fn parse_string_raw(input: Tokens<'_>) -> IResult, String, SyntaxError> { + let (input, expr) = kind(TokenKind::StringRaw)(input)?; + let raw_str = expr.to_str(input.str); + + // 检查首尾单引号 + if raw_str.len() >= 2 { + // 通过StrSlice直接计算子范围 + let start = expr.start() + 1; + let end = expr.end() - 1; + let content = input.str.get(start..end); // 截取中间部分 + Ok((input, content.to_str(input.str).to_string())) + } else { + Err(SyntaxError::unrecoverable( + expr, + "raw string enclosed in single quotes", + Some(raw_str.to_string()), + Some("raw strings must surround with '"), + )) + } +} + #[inline] fn parse_string(input: Tokens<'_>) -> IResult, String, SyntaxError> { let (input, string) = kind(TokenKind::StringLiteral)(input)?; @@ -316,28 +338,91 @@ fn parse_string(input: Tokens<'_>) -> IResult, String, SyntaxError> { snailquote::unescape(string.to_str(input.str)).unwrap(), )) } - -fn parse_assign(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { +// 新增延迟赋值解析逻辑 +fn parse_lazy_assign(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { + let (input, _) = text("let")(input)?; + let (input, symbol) = parse_symbol(input)?; + let (input, _) = text(":=")(input)?; // 使用:=作为延迟赋值符号 + let (input, expr) = parse_expression(input)?; + dbg!(&expr); + Ok(( + input, + Expression::Assign(symbol, Box::new(Expression::Quote(Box::new(expr)))), + )) +} +// 新增 parse_assignment 函数 +fn parse_assignment(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { + let (input, symbol) = parse_symbol(input)?; + let (input, _) = text("=")(input)?; + let (input, expr) = parse_expression(input)?; + Ok((input, Expression::Assign(symbol, Box::new(expr)))) +} +// allow muti vars declare +fn parse_declare(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { let (input, _) = text("let")(input)?; - let (input, symbol) = alt((parse_symbol, parse_operator))(input).map_err(|_| { + // 解析逗号分隔的多个符号 + let (input, symbols) = separated_list0(text(","), alt((parse_symbol, parse_operator)))(input) + .map_err(|_| { SyntaxError::unrecoverable( input.get_str_slice(), - "symbol", + "symbol list", None, - Some("try using a valid symbol such as `x` in `let x = 5`"), + Some("try: `let x, y = 1, 2`"), ) })?; - let (input, _) = text("=")(input).map_err(|_| { + + // 解析等号和多表达式 + let (input, exprs) = opt(preceded( + text("="), + separated_list0(text(","), parse_expression), + ))(input)?; + + // 构建右侧表达式 + let assignments = match exprs { + Some(e) if e.len() == symbols.len() => (0..symbols.len()) + .map(|i| Expression::Declare(symbols[i].clone(), Box::new(e[i].clone()))) + .collect(), + Some(e) if e.len() == 1 => (0..symbols.len()) + .map(|i| Expression::Declare(symbols[i].clone(), Box::new(e[0].clone()))) + .collect(), + Some(e) => { + return Err(SyntaxError::unrecoverable( + input.get_str_slice(), + "matching values count", + Some(format!( + "got {} variables but {} values", + symbols.len(), + e.len() + )), + Some("ensure each variable has a corresponding value"), + )) + } + None => vec![], // Expression::None, // 单变量允许无初始值 + //TODO: must has initialization in strict mode. + // None => + // return Err(SyntaxError::unrecoverable( + // input.get_str_slice(), + // "initialization value", + // None, + // Some("multi-variable declaration requires initialization") + // )) + }; + Ok((input, Expression::Do(assignments))) + + // Ok((input, Expression::Declare(symbols, Box::new(expr)))) +} +fn parse_del(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { + let (input, _) = text("del")(input)?; + let (input, symbol) = parse_symbol(input).map_err(|_| { SyntaxError::unrecoverable( input.get_str_slice(), - "`=`", - None, - Some("let expressions must use an `=` sign"), + "symbol", + Some("no symbol".into()), + Some("you can only del symbol"), ) })?; - let (input, expr) = parse_expression(input)?; - Ok((input, Expression::Assign(symbol, Box::new(expr)))) + Ok((input, Expression::Del(symbol))) } fn parse_group(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { @@ -377,7 +462,7 @@ fn parse_map(input: Tokens<'_>) -> IResult, Expression, SyntaxError> text(","), separated_pair( alt((parse_symbol, parse_operator)), - text("="), + alt((text("="), text(":"))), //allow : parse_expression, ), )(input)?; @@ -615,9 +700,12 @@ fn parse_expression(input: Tokens<'_>) -> IResult, Expression, Syntax fn parse_expression_prec_seven(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { no_terminating_punctuation(input)?; alt(( + parse_del, parse_for_loop, parse_if, - parse_assign, + parse_lazy_assign, + parse_declare, + parse_assignment, parse_callable, parse_apply, parse_apply_operator, @@ -774,7 +862,7 @@ fn parse_expression_prec_three(input: Tokens<'_>) -> IResult, Express let (input, mut head) = expr_parser(input)?; let (input, mut list) = - many0(pair(alt((text("*"), text("//"), text("%"))), expr_parser))(input)?; + many0(pair(alt((text("*"), text("/"), text("%"))), expr_parser))(input)?; if list.is_empty() { return Ok((input, head)); @@ -819,7 +907,7 @@ fn parse_expression_prec_two(input: Tokens<'_>) -> IResult, Expressio fn parse_expression_prec_one(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { alt(( parse_group, - parse_quote, + // parse_quote, parse_map, parse_block, parse_list, @@ -827,6 +915,7 @@ fn parse_expression_prec_one(input: Tokens<'_>) -> IResult, Expressio map(parse_none, |_| Expression::None), map(parse_float, Expression::Float), map(parse_integer, Expression::Integer), + map(parse_string_raw, Expression::String), map(parse_string, Expression::String), map(parse_symbol, Expression::Symbol), ))(input) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c0941937..3f9572a9 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -44,13 +44,15 @@ fn parse_token(input: Input) -> TokenizationResult<'_, (Token, Diagnostic)> { } else { Ok(alt(( map_valid_token(long_operator, TokenKind::Operator), + map_valid_token(argument_symbol, TokenKind::StringLiteral), //argument first to allow args such as = - + map_valid_token(custome_operator, TokenKind::Operator), //before short_operator map_valid_token(any_punctuation, TokenKind::Punctuation), map_valid_token(any_keyword, TokenKind::Keyword), - map_valid_token(short_operator, TokenKind::Operator), map_valid_token(bool_literal, TokenKind::BooleanLiteral), map_valid_token(comment, TokenKind::Comment), string_literal, number_literal, + map_valid_token(short_operator, TokenKind::Operator), //atfter number to avoid -4. map_valid_token(symbol, TokenKind::Symbol), map_valid_token(whitespace, TokenKind::Whitespace), ))(input) @@ -75,31 +77,32 @@ fn map_valid_token( fn any_punctuation(input: Input<'_>) -> TokenizationResult<'_> { alt(( + keyword_tag(":="), punctuation_tag("("), punctuation_tag(")"), punctuation_tag("["), punctuation_tag("]"), punctuation_tag("{"), punctuation_tag("}"), - punctuation_tag("\'"), + // punctuation_tag("\'"), punctuation_tag(","), punctuation_tag(";"), - punctuation_tag("="), - keyword_tag("->"), // `->foo` is also a valid symbol - keyword_tag("~>"), // `~>foo` is also a valid symbol + // punctuation_tag("="), + punctuation_tag(":"), + punctuation_tag("->"), // `->foo` is also a valid symbol + punctuation_tag("~>"), // `~>foo` is also a valid symbol ))(input) } fn long_operator(input: Input<'_>) -> TokenizationResult<'_> { alt(( keyword_tag("to"), - keyword_tag("=="), - keyword_tag("!="), - keyword_tag(">="), - keyword_tag("<="), + operator_tag("=="), //to allow a==b + operator_tag("!="), + operator_tag(">="), + operator_tag("<="), keyword_tag("&&"), keyword_tag("||"), - keyword_tag("//"), keyword_tag("<<"), keyword_tag(">>"), keyword_tag(">>>"), @@ -108,12 +111,17 @@ fn long_operator(input: Input<'_>) -> TokenizationResult<'_> { fn short_operator(input: Input<'_>) -> TokenizationResult<'_> { alt(( + operator_tag("++"), // for op overload use. + operator_tag("--"), // for op overload use. + operator_tag("**"), // for op overload use. keyword_tag("<"), keyword_tag(">"), - keyword_tag("+"), - keyword_tag("-"), - keyword_tag("*"), - keyword_tag("%"), + operator_tag("+"), // to allow a) -> TokenizationResult<'_> { keyword_tag("for"), keyword_tag("if"), keyword_tag("in"), + keyword_tag("del"), ))(input) } +// custrom operator for op overload, such as _*+ , must around with space. +fn custome_operator(input: Input<'_>) -> TokenizationResult<'_> { + if input.starts_with("_") { + // 检查前一个字符是否为空格或行首 + if input.previous_char().map_or(true, |c| c.is_whitespace()) { + let places = input.chars().take_while(char::is_ascii_punctuation).count(); + if places > 1 { + return Ok(input.split_at(places)); + } + } + } + Err(NOT_FOUND) +} +// parse argument such as ls -l --color=auto ./ +fn argument_symbol(input: Input<'_>) -> TokenizationResult<'_> { + // begin with -+./ + let mut it = input.chars(); + let first_char = it.next().ok_or(NOT_FOUND)?; + if !matches!(first_char, '-' | '.' | '/') { + return Err(NOT_FOUND); + } + // followed by letter/num + let next_char = it.next().ok_or(NOT_FOUND)?; + dbg!(first_char, next_char); + let valid = match (first_char, next_char) { + ('-', '-') => it.next().ok_or(NOT_FOUND)?.is_ascii_alphabetic(), + ('-', c) => c.is_ascii_alphabetic(), + ('/', c) => c.is_ascii_alphanumeric(), + ('.', '/') => true, + ('.', '.') => true, + _ => false, + }; + if valid { + // prev_char must be blank + let prev_char = input.previous_char().ok_or(NOT_FOUND)?; + if prev_char.is_ascii_whitespace() { + let len = input + .chars() + .take_while(|&c| !c.is_whitespace()) + .map(char::len_utf8) + .sum(); + + dbg!(len); + return Ok(input.split_at(len)); + } + } + Err(NOT_FOUND) +} +// fn string_literal(input: Input<'_>) -> TokenizationResult<'_, (Token, Diagnostic)> { +// // 解析开始引号 +// let (rest_after_start_quote, start_quote_range) = punctuation_tag("\"")(input)?; +// // 解析内容部分 +// let (rest_after_content, diagnostics) = parse_string_inner(rest_after_start_quote)?; +// // 解析结束引号或处理EOF +// let (rest_after_end_quote, end_quote_range) = alt(( +// map(punctuation_tag("\""), |(rest, range)| (rest, range)), +// map(eof, |_| (input.split_empty(), input.split_empty())), +// ))(rest_after_content)?; + +// // 计算内容的起始和结束位置 +// let content_start = start_quote_range.end(); +// let content_end = end_quote_range.start(); + +// // 生成内容范围,确保有效性 +// let content_range = if content_start <= content_end { +// // 使用input的方法来分割内容范围 +// let (_, range) = input.split_at(content_start); +// let (_, range) = range.split_at(content_end - content_start); +// range +// } else { +// // 处理未闭合的情况,取到输入末尾 +// let (_, range) = input.split_at(content_start); +// range +// }; + +// // 创建Token +// let token = Token::new(TokenKind::StringLiteral, content_range); +// Ok((rest_after_end_quote, (token, diagnostics))) +// } fn string_literal(input: Input<'_>) -> TokenizationResult<'_, (Token, Diagnostic)> { - let (rest, _) = punctuation_tag("\"")(input)?; - let (rest, diagnostics) = parse_string_inner(rest)?; - let (rest, _) = alt((punctuation_tag("\""), map(eof, |_| input.split_empty())))(rest)?; + // 1. 解析开始引号 + let (rest_after_start, start_quote_range) = + alt((punctuation_tag("\""), punctuation_tag("'")))(input)?; + let quote_char = start_quote_range.to_str(input.as_original_str()); + + // 2. 解析字符串内容(含转义处理) + let is_double = quote_char == "\""; + let (rest_after_content, diagnostics) = parse_string_inner(rest_after_start, is_double)?; + + // 3. 解析结束引号(或EOF) + let (rest_after_end, _) = alt(( + punctuation_tag(quote_char), + map(eof, |_| input.split_empty()), + ))(rest_after_content)?; + // 4.split + let (_, content_range) = input.split_until(rest_after_end); + // 4. 计算内容范围 + // let content_start = start_quote_range.end(); + // let content_end = end_quote_range.start(); + // let (_, content_range) = rest_after_start.split_until(rest_after_content); + + // // 5. 处理未闭合字符串(当end_quote_range为空时) + // let content_range = if content_start < content_end { + // content_range + // } else { + // // 若未闭合,取到输入末尾 + // let (_, full_range) = input.split_until(rest_after_start); + // full_range + // }; + + // 6. 根据引号类型生成TokenKind + let kind = if is_double { + TokenKind::StringLiteral + } else { + TokenKind::StringRaw + }; - let (rest, range) = input.split_until(rest); - let token = Token::new(TokenKind::StringLiteral, range); - Ok((rest, (token, diagnostics))) + let token = Token::new(kind, content_range); + Ok((rest_after_end, (token, diagnostics))) } +// fn string_literal(input: Input<'_>) -> TokenizationResult<'_, (Token, Diagnostic)> { +// let (rest, quote_range) = alt((punctuation_tag("\""), punctuation_tag("'")))(input)?; +// let quote_char = quote_range.to_str(input.as_original_str()); +// let is_double_quote = quote_char == "\""; +// let (rest, diagnostics) = parse_string_inner(rest, is_double_quote)?; + +// let places = rest.chars().take_while(|&c| c != quote_char).count(); +// let (_, mut range) = input.split_at(1); +// if places > 1 { +// (_, range) = input.split_at(places - 1); +// } +// // 根据引号类型生成不同的TokenKind +// let kind = if quote_char == "\"" { +// TokenKind::StringLiteral +// } else { +// TokenKind::StringRaw +// }; +// // remove quoter +// let token = Token::new(kind, range); +// Ok((rest, (token, diagnostics))) +// } fn number_literal(input: Input<'_>) -> TokenizationResult<'_, (Token, Diagnostic)> { + // 检查负号 `-` 是否合法(前面是空格或行首) + let is_negative = input.starts_with("-"); + if is_negative { + // 检查前一个字符是否为空格或行首 + if !input.previous_char().map_or(false, |c| c.is_whitespace()) { + return Err(NOT_FOUND); // 前面有非空格字符,不解析为负数 + } + } + // skip sign let (rest, _) = input.strip_prefix("-").unwrap_or_else(|| input.split_at(0)); @@ -216,7 +366,7 @@ fn comment(input: Input<'_>) -> TokenizationResult<'_> { let len = input .chars() .take_while(|&c| !matches!(c, '\r' | '\n')) - .map(|c| c.len_utf8()) + .map(char::len_utf8) .sum(); Ok(input.split_at(len)) @@ -225,21 +375,32 @@ fn comment(input: Input<'_>) -> TokenizationResult<'_> { } } -fn parse_string_inner(input: Input<'_>) -> TokenizationResult<'_, Diagnostic> { +fn parse_string_inner( + input: Input<'_>, + is_double_quote: bool, +) -> TokenizationResult<'_, Diagnostic> { let mut rest = input; let mut errors = Vec::new(); - - loop { - match rest.chars().next() { - Some('"') | None => break, - Some('\\') => { - let (r, diagnostic) = parse_escape(rest)?; - rest = r; - if let Diagnostic::InvalidStringEscapes(ranges) = diagnostic { - errors.push(ranges[0]); + if is_double_quote { + loop { + match rest.chars().next() { + Some('"') | None => break, + Some('\\') => { + let (r, diagnostic) = parse_escape(rest)?; + rest = r; + if let Diagnostic::InvalidStringEscapes(ranges) = diagnostic { + errors.push(ranges[0]); + } } + Some(ch) => rest = rest.split_at(ch.len_utf8()).0, + } + } + } else { + loop { + match rest.chars().next() { + Some('\'') | None => break, + Some(ch) => rest = rest.split_at(ch.len_utf8()).0, } - Some(ch) => rest = rest.split_at(ch.len_utf8()).0, } } @@ -331,6 +492,15 @@ fn keyword_tag(keyword: &str) -> impl '_ + Fn(Input<'_>) -> TokenizationResult<' .ok_or(NOT_FOUND) } } +/// This parser ensures that the word is *not* immediately followed by punctuation. +fn operator_tag(keyword: &str) -> impl '_ + Fn(Input<'_>) -> TokenizationResult<'_> { + move |input: Input<'_>| { + input + .strip_prefix(keyword) + .filter(|(rest, _)| !rest.starts_with(|c: char| c.is_ascii_punctuation())) + .ok_or(NOT_FOUND) + } +} /// Parses a word that is allowed to be immediately followed by symbol characters. /// @@ -344,9 +514,10 @@ fn punctuation_tag(punct: &str) -> impl '_ + Fn(Input<'_>) -> TokenizationResult fn is_symbol_char(c: char) -> bool { macro_rules! special_char_pattern { () => { - '_' | '+' | '-' | '.' | '~' | '\\' | '/' | '?' | - '&' | '<' | '>' | '$' | '%' | '#' | '^' | ':' - }; + '_' | '.' | '~' | '\\' | '?' | '&' | '#' | '^' | '$' + }; // remove + - / % > < to allow non space operator such as a+1 + // remove : to use in dict + // $ to use as var prefix, compatil with bash } static ASCII_SYMBOL_CHARS: [bool; 128] = { @@ -388,6 +559,7 @@ pub(crate) fn parse_tokens(mut input: Input<'_>) -> (Vec, Vec if !input.is_empty() { diagnostics.push(Diagnostic::NotTokenized(input.as_str_slice())) } + dbg!(input, &tokens); (tokens, diagnostics) } diff --git a/src/tokens.rs b/src/tokens.rs index 339cbfa9..0db2a35c 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -70,6 +70,21 @@ impl<'a> Input<'a> { None } } + // 新增方法:获取当前 offset 的前一个字符 + pub fn previous_char(&self) -> Option { + if self.offset == 0 { + return None; // 已经是字符串开头 + } + + let s = &self.str[..self.offset]; + let mut iter = s.char_indices().rev(); + + // 找到前一个字符的起始位置 + match iter.next() { + Some((idx, _)) => s[idx..].chars().next(), + None => None, + } + } } impl nom::InputLength for Input<'_> { @@ -100,6 +115,7 @@ pub enum TokenKind { Operator, Keyword, StringLiteral, + StringRaw, IntegerLiteral, FloatLiteral, BooleanLiteral,