From 7d53bc7bef2ead50d436d16a012ff402c89b324f Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Sat, 23 Mar 2024 15:31:54 +0300 Subject: [PATCH 01/13] ast: Use TypedVariable in places where types are required by the grammar --- src/ast/mod.rs | 25 +++++++++++++++++++++++-- src/generator/c.rs | 14 ++++++++++++-- src/generator/js.rs | 2 +- src/generator/qbe.rs | 32 +++++++++----------------------- src/parser/rules.rs | 10 +++++----- 5 files changed, 50 insertions(+), 33 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9fcd9df7..e324cc42 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -54,7 +54,7 @@ impl Module { #[derive(Debug, Clone)] pub struct Function { pub name: String, - pub arguments: Vec, + pub arguments: Vec, pub body: Statement, pub ret_type: Option, } @@ -62,7 +62,7 @@ pub struct Function { #[derive(Debug, Clone)] pub struct StructDef { pub name: String, - pub fields: Vec, + pub fields: Vec, pub methods: Vec, } @@ -78,6 +78,27 @@ impl AsRef for Variable { } } +impl From for Variable { + fn from(typed: TypedVariable) -> Self { + Self { + name: typed.name, + ty: Some(typed.ty), + } + } +} + +#[derive(Debug, Eq, PartialEq, Clone)] +pub struct TypedVariable { + pub name: String, + pub ty: Type, +} + +impl AsRef for TypedVariable { + fn as_ref(&self) -> &Self { + self + } +} + #[derive(Debug, Eq, PartialEq, Clone)] pub enum Statement { /// (Statements, Scoped variables) diff --git a/src/generator/c.rs b/src/generator/c.rs index 04189eea..dbe8a8c4 100644 --- a/src/generator/c.rs +++ b/src/generator/c.rs @@ -52,7 +52,11 @@ pub fn generate_struct(def: StructDef) -> String { def.fields.iter().for_each(|f| { // int counter; - buf += &format!("{} {};\n", generate_type(Either::Left(f.clone())), f.name,); + buf += &format!( + "{} {};\n", + generate_type(Either::Left(f.clone().into())), + f.name + ); }); // }; @@ -103,7 +107,13 @@ fn generate_function_signature(func: Function) -> String { let arguments: String = func .arguments .into_iter() - .map(|var| format!("{} {}", generate_type(Either::Left(var.clone())), var.name)) + .map(|var| { + format!( + "{} {}", + generate_type(Either::Left(var.clone().into())), + var.name + ) + }) .collect::>() .join(", "); let t = generate_type(Either::Right(func.ret_type)); diff --git a/src/generator/js.rs b/src/generator/js.rs index 49aa2d9b..ec7bed31 100644 --- a/src/generator/js.rs +++ b/src/generator/js.rs @@ -48,7 +48,7 @@ impl Generator for JsGenerator { } } -fn generate_arguments(args: Vec) -> String { +fn generate_arguments(args: Vec) -> String { args.into_iter() .map(|var| var.name) .collect::>() diff --git a/src/generator/qbe.rs b/src/generator/qbe.rs index 968105a2..d078597d 100644 --- a/src/generator/qbe.rs +++ b/src/generator/qbe.rs @@ -93,13 +93,7 @@ impl QbeGenerator { let mut offset = 0_u64; for field in &def.fields { - let ty = self.get_type( - field - .ty - .as_ref() - .ok_or_else(|| "Structure field must have a type".to_owned())? - .to_owned(), - )?; + let ty = self.get_type(&field.ty)?; meta.insert(field.name.clone(), (ty.clone(), offset)); typedef.items.push((ty.clone(), 1)); @@ -120,19 +114,14 @@ impl QbeGenerator { let mut arguments: Vec<(qbe::Type, qbe::Value)> = Vec::new(); for arg in &func.arguments { - let ty = self.get_type( - arg.ty - .as_ref() - .ok_or("Function arguments must have a type")? - .to_owned(), - )?; + let ty = self.get_type(&arg.ty)?; let tmp = self.new_var(&ty, &arg.name)?; arguments.push((ty.into_abi(), tmp)); } let return_ty = if let Some(ty) = &func.ret_type { - Some(self.get_type(ty.to_owned())?.into_abi()) + Some(self.get_type(ty)?.into_abi()) } else { None }; @@ -188,13 +177,10 @@ impl QbeGenerator { self.scopes.pop(); } Statement::Declare { variable, value } => { - let ty = self.get_type( - variable - .ty - .as_ref() - .ok_or_else(|| format!("Missing type for variable '{}'", &variable.name))? - .to_owned(), - )?; + let ty = + self.get_type(variable.ty.as_ref().ok_or_else(|| { + format!("Missing type for variable '{}'", &variable.name) + })?)?; let tmp = self.new_var(&ty, &variable.name)?; if let Some(expr) = value { @@ -765,7 +751,7 @@ impl QbeGenerator { } /// Returns a QBE type for the given AST type - fn get_type(&self, ty: Type) -> GeneratorResult { + fn get_type(&self, ty: &Type) -> GeneratorResult { match ty { Type::Any => Err("'any' type is not supported".into()), Type::Int => Ok(qbe::Type::Word), @@ -774,7 +760,7 @@ impl QbeGenerator { Type::Struct(name) => { let (ty, ..) = self .struct_map - .get(&name) + .get(name) .ok_or_else(|| format!("Use of undeclared struct '{}'", name))? .to_owned(); Ok(ty) diff --git a/src/parser/rules.rs b/src/parser/rules.rs index 5a537f24..39644a18 100644 --- a/src/parser/rules.rs +++ b/src/parser/rules.rs @@ -83,7 +83,7 @@ impl Parser { }) } - fn parse_typed_variable_list(&mut self) -> Result, String> { + fn parse_typed_variable_list(&mut self) -> Result, String> { let mut args = Vec::new(); // If there is an argument @@ -101,12 +101,12 @@ impl Parser { Ok(args) } - fn parse_typed_variable(&mut self) -> Result { + fn parse_typed_variable(&mut self) -> Result { let next = self.next()?; if let TokenKind::Identifier(name) = next.kind { - return Ok(Variable { + return Ok(TypedVariable { name, - ty: Some(self.parse_type()?), + ty: self.parse_type()?, }); } @@ -147,7 +147,7 @@ impl Parser { self.match_token(TokenKind::BraceOpen)?; - let arguments: Vec = match self.peek()? { + let arguments: Vec = match self.peek()? { t if t.kind == TokenKind::BraceClose => Vec::new(), _ => self.parse_typed_variable_list()?, }; From 4ea1701181bc2a403fc5acb3ff6aab6e70022ac7 Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Sat, 23 Mar 2024 15:35:58 +0300 Subject: [PATCH 02/13] parser: Consume the semicolon in 'return;' The parser only checks the following token for a 'return;' construct but doesn't actually consume the semicolon. When the parser starts processing the next statement, it will fail because semicolon is not a valid token in that context. --- src/parser/rules.rs | 5 ++++- src/parser/tests.rs | 12 ++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/parser/rules.rs b/src/parser/rules.rs index 39644a18..9c43fc06 100644 --- a/src/parser/rules.rs +++ b/src/parser/rules.rs @@ -317,7 +317,10 @@ impl Parser { self.match_keyword(Keyword::Return)?; let peeked = self.peek()?; match peeked.kind { - TokenKind::SemiColon => Ok(Statement::Return(None)), + TokenKind::SemiColon => { + self.next()?; + Ok(Statement::Return(None)) + } _ => Ok(Statement::Return(Some(self.parse_expression()?))), } } diff --git a/src/parser/tests.rs b/src/parser/tests.rs index 6cae3a5f..f339bcc2 100644 --- a/src/parser/tests.rs +++ b/src/parser/tests.rs @@ -37,6 +37,18 @@ fn test_parse_function_with_return() { assert!(tree.is_ok()) } +#[test] +fn test_parse_function_with_void_return() { + let raw = " + fn main() { + return; + } + "; + let tokens = tokenize(raw).unwrap(); + let tree = parse(tokens, Some(raw.to_string()), "".into()); + assert!(tree.is_ok()) +} + #[test] fn test_parse_redundant_semicolon() { let raw = " From 4f431aeaa8104f6b5f1eb6d1fdae706fa0e0fcc3 Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Sat, 23 Mar 2024 17:00:42 +0300 Subject: [PATCH 03/13] ast: Remove redundant capacity field of Expression::Array --- src/ast/mod.rs | 5 +---- src/generator/c.rs | 9 +++------ src/generator/js.rs | 10 ++-------- src/generator/qbe.rs | 6 ++---- src/parser/rules.rs | 3 +-- 5 files changed, 9 insertions(+), 24 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e324cc42..5b989524 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -145,10 +145,7 @@ pub enum Expression { Bool(bool), /// Represents "self" keyword Selff, - Array { - capacity: usize, - elements: Vec, - }, + Array(Vec), FunctionCall { fn_name: String, args: Vec, diff --git a/src/generator/c.rs b/src/generator/c.rs index dbe8a8c4..4289eab7 100644 --- a/src/generator/c.rs +++ b/src/generator/c.rs @@ -168,7 +168,7 @@ fn generate_expression(expr: Expression) -> String { Expression::Str(val) => super::string_syntax(val), Expression::Bool(b) => b.to_string(), Expression::FunctionCall { fn_name, args } => generate_function_call(fn_name, args), - Expression::Array { capacity, elements } => generate_array(capacity, elements), + Expression::Array(elements) => generate_array(elements), Expression::ArrayAccess { name, index } => generate_array_access(name, *index), Expression::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), Expression::StructInitialization { name: _, fields } => { @@ -191,7 +191,7 @@ fn generate_while_loop(expr: Expression, body: Statement) -> String { out_str } -fn generate_array(_size: usize, elements: Vec) -> String { +fn generate_array(elements: Vec) -> String { let mut out_str = String::from("["); out_str += &elements @@ -270,10 +270,7 @@ fn generate_function_call(func: String, args: Vec) -> String { Expression::FunctionCall { fn_name, args } => generate_function_call(fn_name, args), Expression::Str(s) => super::string_syntax(s), Expression::Variable(s) => s, - Expression::Array { - capacity: _, - elements: _, - } => todo!(), + Expression::Array(_) => todo!(), Expression::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), Expression::StructInitialization { name: _, fields } => { generate_struct_initialization(fields) diff --git a/src/generator/js.rs b/src/generator/js.rs index ec7bed31..226f4ef9 100644 --- a/src/generator/js.rs +++ b/src/generator/js.rs @@ -160,10 +160,7 @@ fn generate_expression(expr: Expression) -> String { Expression::Variable(val) => val, Expression::Bool(b) => b.to_string(), Expression::FunctionCall { fn_name, args } => generate_function_call(fn_name, args), - Expression::Array { - capacity: _, - elements, - } => generate_array(elements), + Expression::Array(elements) => generate_array(elements), Expression::ArrayAccess { name, index } => generate_array_access(name, *index), Expression::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), Expression::StructInitialization { name, fields } => { @@ -318,10 +315,7 @@ fn generate_function_call(func: String, args: Vec) -> String { Expression::FunctionCall { fn_name, args } => generate_function_call(fn_name, args), Expression::Str(s) => super::string_syntax(s), Expression::Variable(s) => s, - Expression::Array { - capacity: _, - elements, - } => generate_array(elements), + Expression::Array(elements) => generate_array(elements), Expression::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), Expression::StructInitialization { name, fields } => { generate_struct_initialization(name, fields) diff --git a/src/generator/qbe.rs b/src/generator/qbe.rs index d078597d..05f37af8 100644 --- a/src/generator/qbe.rs +++ b/src/generator/qbe.rs @@ -261,9 +261,7 @@ impl QbeGenerator { Ok((qbe::Type::Word, tmp)) } - Expression::Array { capacity, elements } => { - self.generate_array(func, *capacity, elements) - } + Expression::Array(elements) => self.generate_array(func, elements), Expression::FunctionCall { fn_name, args } => { let mut new_args: Vec<(qbe::Type, qbe::Value)> = Vec::new(); for arg in args.iter() { @@ -634,9 +632,9 @@ impl QbeGenerator { fn generate_array( &mut self, func: &mut qbe::Function, - len: usize, items: &[Expression], ) -> GeneratorResult<(qbe::Type, qbe::Value)> { + let len = items.len(); let mut first_type: Option = None; let mut results: Vec = Vec::new(); diff --git a/src/parser/rules.rs b/src/parser/rules.rs index 9c43fc06..1619d1c2 100644 --- a/src/parser/rules.rs +++ b/src/parser/rules.rs @@ -485,9 +485,8 @@ impl Parser { } self.match_token(TokenKind::SquareBraceClose)?; - let capacity = elements.len(); - Ok(Expression::Array { capacity, elements }) + Ok(Expression::Array(elements)) } fn parse_array_access(&mut self, arr_name: Option) -> Result { From e483d7ae79a438817e4ddf8264b5ea0aadc2d434 Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Mon, 25 Mar 2024 22:58:40 +0300 Subject: [PATCH 04/13] command/run: Simplify --- src/command/run.rs | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/command/run.rs b/src/command/run.rs index 415b2ea6..9dfadb03 100644 --- a/src/command/run.rs +++ b/src/command/run.rs @@ -15,7 +15,6 @@ */ use crate::command::build; use crate::generator::Target; -use std::io::Read; use std::io::Write; use std::path::PathBuf; use std::process::Command; @@ -27,28 +26,19 @@ pub fn run(target: Target, in_file: PathBuf) -> Result<(), String> { match target { Target::JS => { - let process = Command::new("node") + let mut process = Command::new("node") .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) .spawn() .map_err(|e| format!("Could not spawn Node.js process: {}", e))?; process .stdin + .as_ref() .unwrap() .write_all(&buf) .map_err(|e| format!("Could not write to Node.js process: {}", e))?; - let mut s = Vec::new(); - process - .stdout - .unwrap() - .read_to_end(&mut s) - .map_err(|e| format!("Could not read from child process: {}", e))?; - std::io::stdout() - .write_all(&s) - .map_err(|e| format!("Could not write to stdout: {}", e))?; + process.wait().unwrap(); } _ => todo!(), } From f6eebee1b8396e9df7a5355d8eaca76f2aaf3e44 Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Sat, 23 Mar 2024 20:30:32 +0300 Subject: [PATCH 05/13] parser: Require either an expression or type for variable declarations --- src/parser/rules.rs | 12 ++++++++++-- src/parser/tests.rs | 37 ++++++++++++++++++++++++------------- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/src/parser/rules.rs b/src/parser/rules.rs index 1619d1c2..b47ea338 100644 --- a/src/parser/rules.rs +++ b/src/parser/rules.rs @@ -668,9 +668,17 @@ impl Parser { fn parse_declare(&mut self) -> Result { self.match_keyword(Keyword::Let)?; let name = self.match_identifier()?; - let ty = match self.peek()?.kind { + let token = self.peek()?; + let ty = match &token.kind { TokenKind::Colon => Some(self.parse_type()?), - _ => None, + TokenKind::Assign => None, + _ => { + // FIXME: context for this error message is not ideal + return Err(self.make_error_msg( + token.pos, + format!("Expected ':' or '=', found {:?}", token.kind), + )); + } }; match self.peek()?.kind { diff --git a/src/parser/tests.rs b/src/parser/tests.rs index f339bcc2..3a6dcefe 100644 --- a/src/parser/tests.rs +++ b/src/parser/tests.rs @@ -102,6 +102,30 @@ fn test_parse_variable_declaration() { assert!(tree.is_ok()) } +#[test] +fn test_parse_variable_uninitialized() { + let raw = " + fn main() { + let x: int + } + "; + let tokens = tokenize(raw).unwrap(); + let tree = parse(tokens, Some(raw.to_string()), "".into()); + assert!(tree.is_ok()) +} + +#[test] +fn test_parse_variable_disallow_untyped_uninitialized() { + let raw = " + fn main() { + let x + } + "; + let tokens = tokenize(raw).unwrap(); + let tree = parse(tokens, Some(raw.to_string()), "".into()); + assert!(tree.is_err()); +} + #[test] fn test_parse_variable_reassignment() { let raw = " @@ -529,19 +553,6 @@ fn test_array_access_in_if() { assert!(tree.is_ok()) } -#[test] -fn test_uninitialized_variables() { - let raw = " - fn main() { - let x - let y - } - "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); - assert!(tree.is_ok()) -} - #[test] fn test_function_call_math() { let raw = " From a486e808e0a744dff4256df9f6e0a9547666b086 Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Sat, 23 Mar 2024 15:46:20 +0300 Subject: [PATCH 06/13] Get rid of parser-level type inferencer --- src/ast/mod.rs | 13 ------- src/parser/infer.rs | 90 -------------------------------------------- src/parser/mod.rs | 1 - src/parser/parser.rs | 7 +--- 4 files changed, 1 insertion(+), 110 deletions(-) delete mode 100644 src/parser/infer.rs diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5b989524..d097a4b7 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -21,9 +21,6 @@ use std::collections::HashSet; pub mod types; use types::Type; -/// Table that contains all symbol and its types -pub type SymbolTable = HashMap>; - #[derive(Debug, Clone)] pub struct Module { pub path: String, @@ -39,16 +36,6 @@ impl Module { self.structs.append(&mut other.structs); self.globals.append(&mut other.globals) } - - pub fn get_symbol_table(&self) -> SymbolTable { - let mut table = SymbolTable::new(); - - for func in self.func.clone() { - table.insert(func.name, func.ret_type); - } - - table - } } #[derive(Debug, Clone)] diff --git a/src/parser/infer.rs b/src/parser/infer.rs deleted file mode 100644 index 8e6a2eee..00000000 --- a/src/parser/infer.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::ast::types::Type; -use crate::ast::{Expression, Module, Statement, SymbolTable}; - -/** - * Copyright 2021 Garrit Franke - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/// Try to infer types of variables -/// -/// TODO: Global symbol table is passed around randomly. -/// This could probably be cleaned up. -pub(super) fn infer(program: &mut Module) { - let table = &program.get_symbol_table(); - // TODO: Fix aweful nesting - for func in &mut program.func { - if let Statement::Block { - statements, - scope: _, - } = &mut func.body - { - for statement in statements { - if let Statement::Declare { variable, value } = statement { - if variable.ty.is_none() { - if let Some(e) = value { - variable.ty = infer_expression(e, table); - #[cfg(debug_assertions)] - if variable.ty.is_none() { - println!( - "Type of {} could not be infered: {:?}", - &variable.name, e - ); - } - } - } - } - } - } - } -} - -/// Function table is needed to infer possible function calls -fn infer_expression(expr: &Expression, table: &SymbolTable) -> Option { - match expr { - Expression::Int(_) => Some(Type::Int), - Expression::Bool(_) => Some(Type::Bool), - Expression::Str(_) => Some(Type::Str), - Expression::StructInitialization { name, fields: _ } => { - Some(Type::Struct(name.to_string())) - } - Expression::FunctionCall { fn_name, args: _ } => infer_function_call(fn_name, table), - Expression::Array { - capacity: _, - elements, - } => infer_array(elements, table), - _ => None, - } -} - -fn infer_array(elements: &[Expression], table: &SymbolTable) -> Option { - let types: Vec> = elements - .iter() - .map(|el| infer_expression(el, table)) - .collect(); - - // TODO: This approach only relies on the first element. - // It will not catch that types are possibly inconsistent. - types - .first() - .and_then(|ty| ty.to_owned()) - .map(|ty| Type::Array(Box::new(ty), Some(types.len()))) -} - -fn infer_function_call(name: &str, table: &SymbolTable) -> Option { - match table.get(name) { - Some(t) => t.to_owned(), - None => None, - } -} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1dc380b6..42c23056 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -mod infer; // TODO: Resolve this lint by renaming the module #[allow(clippy::module_inception)] mod parser; diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 5b2a4c50..37e053ed 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -17,7 +17,6 @@ use crate::ast::*; use crate::lexer::Keyword; use crate::lexer::Position; use crate::lexer::{Token, TokenKind}; -use crate::parser::infer::infer; use crate::util::string_util::highlight_position_in_file; use std::convert::TryFrom; use std::iter::Peekable; @@ -50,11 +49,7 @@ impl Parser { } pub fn parse(&mut self) -> Result { - let mut program = self.parse_module()?; - // infer types - infer(&mut program); - - Ok(program) + self.parse_module() } pub(super) fn next(&mut self) -> Result { From 843826734f7443f4f664fd05a0de9827d50716d0 Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Sat, 23 Mar 2024 19:44:18 +0300 Subject: [PATCH 07/13] Refactor assignment and suffix expressions Short assignment forms (+=, -=, *=, /=) are now combined with the Assign statement instead of being expressions. To implement that, convoluted suffix expressions (field/array access and calls) handling in the parser had to be reworked. Right now all suffix expressions are parsed inside parse_expression. The assignment logic is also deduplicated between parse_statement and various expression parsers. --- src/ast/mod.rs | 46 +++++++--- src/generator/c.rs | 64 +++++++------- src/generator/js.rs | 64 +++++++------- src/generator/qbe.rs | 76 ++++++++-------- src/parser/rules.rs | 200 ++++++++++++++----------------------------- src/parser/tests.rs | 29 +++++++ 6 files changed, 225 insertions(+), 254 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d097a4b7..059bdd32 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -99,6 +99,7 @@ pub enum Statement { }, Assign { lhs: Box, + op: AssignOp, rhs: Box, }, Return(Option), @@ -125,6 +126,37 @@ pub enum Statement { Exp(Expression), } +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum AssignOp { + /// '=' + Set, + /// '+=' + Add, + /// '-=' + Subtract, + /// '*=' + Multiply, + /// '/=' + Divide, +} + +impl TryFrom for AssignOp { + type Error = String; + fn try_from(token: TokenKind) -> Result { + match token { + TokenKind::Assign => Ok(AssignOp::Set), + TokenKind::PlusEqual => Ok(AssignOp::Add), + TokenKind::MinusEqual => Ok(AssignOp::Subtract), + TokenKind::StarEqual => Ok(AssignOp::Multiply), + TokenKind::SlashEqual => Ok(AssignOp::Divide), + other => Err(format!( + "Token {:?} cannot be converted into an AssignOp", + other + )), + } + } +} + #[derive(Debug, Eq, PartialEq, Clone)] pub enum Expression { Int(usize), @@ -134,12 +166,12 @@ pub enum Expression { Selff, Array(Vec), FunctionCall { - fn_name: String, + expr: Box, args: Vec, }, Variable(String), ArrayAccess { - name: String, + expr: Box, index: Box, }, BinOp { @@ -153,7 +185,7 @@ pub enum Expression { }, FieldAccess { expr: Box, - field: Box, + field: String, }, } @@ -202,10 +234,6 @@ pub enum BinOp { NotEqual, And, Or, - AddAssign, - SubtractAssign, - MultiplyAssign, - DivideAssign, } impl TryFrom for BinOp { @@ -225,10 +253,6 @@ impl TryFrom for BinOp { TokenKind::NotEqual => Ok(BinOp::NotEqual), TokenKind::And => Ok(BinOp::And), TokenKind::Or => Ok(BinOp::Or), - TokenKind::PlusEqual => Ok(BinOp::AddAssign), - TokenKind::MinusEqual => Ok(BinOp::SubtractAssign), - TokenKind::StarEqual => Ok(BinOp::MultiplyAssign), - TokenKind::SlashEqual => Ok(BinOp::DivideAssign), other => Err(format!( "Token {:?} cannot be converted into a BinOp", other diff --git a/src/generator/c.rs b/src/generator/c.rs index 4289eab7..711e25c5 100644 --- a/src/generator/c.rs +++ b/src/generator/c.rs @@ -142,7 +142,7 @@ fn generate_statement(statement: Statement) -> String { body, else_branch, } => generate_conditional(condition, *body, else_branch.map(|x| *x)), - Statement::Assign { lhs, rhs } => generate_assign(*lhs, *rhs), + Statement::Assign { lhs, op, rhs } => generate_assign(*lhs, op, *rhs), Statement::Block { statements, scope } => generate_block(statements, scope), Statement::While { condition, body } => generate_while_loop(condition, *body), Statement::For { @@ -167,14 +167,14 @@ fn generate_expression(expr: Expression) -> String { Expression::Variable(val) => val, Expression::Str(val) => super::string_syntax(val), Expression::Bool(b) => b.to_string(), - Expression::FunctionCall { fn_name, args } => generate_function_call(fn_name, args), + Expression::FunctionCall { expr, args } => generate_function_call(*expr, args), Expression::Array(elements) => generate_array(elements), - Expression::ArrayAccess { name, index } => generate_array_access(name, *index), + Expression::ArrayAccess { expr, index } => generate_array_access(*expr, *index), Expression::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), Expression::StructInitialization { name: _, fields } => { generate_struct_initialization(fields) } - Expression::FieldAccess { expr, field } => generate_field_access(*expr, *field), + Expression::FieldAccess { expr, field } => generate_field_access(*expr, field), Expression::Selff => todo!(), } } @@ -208,8 +208,12 @@ fn generate_array(elements: Vec) -> String { out_str } -fn generate_array_access(name: String, expr: Expression) -> String { - format!("{n}[{e}]", n = name, e = generate_expression(expr)) +fn generate_array_access(expr: Expression, index: Expression) -> String { + format!( + "{}[{}]", + generate_expression(expr), + generate_expression(index) + ) } fn generate_conditional( @@ -260,27 +264,17 @@ fn generate_declare(var: Variable, val: Option) -> String { } } -fn generate_function_call(func: String, args: Vec) -> String { +fn generate_function_call(func: Expression, args: Vec) -> String { let formatted_args = args .into_iter() - .map(|arg| match arg { - Expression::Int(i) => i.to_string(), - Expression::Bool(v) => v.to_string(), - Expression::ArrayAccess { name, index } => generate_array_access(name, *index), - Expression::FunctionCall { fn_name, args } => generate_function_call(fn_name, args), - Expression::Str(s) => super::string_syntax(s), - Expression::Variable(s) => s, - Expression::Array(_) => todo!(), - Expression::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), - Expression::StructInitialization { name: _, fields } => { - generate_struct_initialization(fields) - } - Expression::FieldAccess { expr, field } => generate_field_access(*expr, *field), - Expression::Selff => todo!(), - }) + .map(generate_expression) .collect::>() .join(","); - format!("{N}({A})", N = func, A = formatted_args) + format!( + "{N}({A})", + N = generate_expression(func), + A = formatted_args + ) } fn generate_return(ret: Option) -> String { @@ -300,10 +294,6 @@ fn generate_bin_op(left: Expression, op: BinOp, right: Expression) -> String { BinOp::GreaterThanOrEqual => ">=", BinOp::LessThan => "<", BinOp::LessThanOrEqual => "<=", - BinOp::AddAssign => "+=", - BinOp::SubtractAssign => "-=", - BinOp::MultiplyAssign => "*=", - BinOp::DivideAssign => "/=", BinOp::Modulus => "%", BinOp::Multiplication => "*", BinOp::NotEqual => "!=", @@ -330,18 +320,22 @@ fn generate_struct_initialization(fields: HashMap>) -> S buf } -fn generate_field_access(expr: Expression, field: Expression) -> String { - format!( - "{}.{}", - generate_expression(expr), - generate_expression(field) - ) +fn generate_field_access(expr: Expression, field: String) -> String { + format!("{}.{}", generate_expression(expr), field) } -fn generate_assign(name: Expression, expr: Expression) -> String { +fn generate_assign(name: Expression, op: AssignOp, expr: Expression) -> String { + let op_str = match op { + AssignOp::Set => "=", + AssignOp::Add => "+=", + AssignOp::Subtract => "-=", + AssignOp::Multiply => "*=", + AssignOp::Divide => "/=", + }; format!( - "{} = {};", + "{} {} {};", generate_expression(name), + op_str, generate_expression(expr) ) } diff --git a/src/generator/js.rs b/src/generator/js.rs index 226f4ef9..63bcae1f 100644 --- a/src/generator/js.rs +++ b/src/generator/js.rs @@ -137,7 +137,7 @@ fn generate_statement(statement: Statement) -> String { body, else_branch, } => generate_conditional(condition, *body, else_branch.map(|x| *x)), - Statement::Assign { lhs, rhs } => generate_assign(*lhs, *rhs), + Statement::Assign { lhs, op, rhs } => generate_assign(*lhs, op, *rhs), Statement::Block { statements: _, scope: _, @@ -159,14 +159,14 @@ fn generate_expression(expr: Expression) -> String { Expression::Str(val) => super::string_syntax(val), Expression::Variable(val) => val, Expression::Bool(b) => b.to_string(), - Expression::FunctionCall { fn_name, args } => generate_function_call(fn_name, args), + Expression::FunctionCall { expr, args } => generate_function_call(*expr, args), Expression::Array(elements) => generate_array(elements), - Expression::ArrayAccess { name, index } => generate_array_access(name, *index), + Expression::ArrayAccess { expr, index } => generate_array_access(*expr, *index), Expression::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), Expression::StructInitialization { name, fields } => { generate_struct_initialization(name, fields) } - Expression::FieldAccess { expr, field } => generate_field_access(*expr, *field), + Expression::FieldAccess { expr, field } => generate_field_access(*expr, field), } } @@ -246,8 +246,12 @@ fn generate_array(elements: Vec) -> String { out_str } -fn generate_array_access(name: String, expr: Expression) -> String { - format!("{n}[{e}]", n = name, e = generate_expression(expr)) +fn generate_array_access(expr: Expression, index: Expression) -> String { + format!( + "{}[{}]", + generate_expression(expr), + generate_expression(index) + ) } fn generate_conditional( @@ -304,27 +308,17 @@ fn generate_declare>(identifier: V, val: Option) } } -fn generate_function_call(func: String, args: Vec) -> String { +fn generate_function_call(func: Expression, args: Vec) -> String { let formatted_args = args .into_iter() - .map(|arg| match arg { - Expression::Int(i) => i.to_string(), - Expression::Bool(v) => v.to_string(), - Expression::Selff => "this".to_string(), - Expression::ArrayAccess { name, index } => generate_array_access(name, *index), - Expression::FunctionCall { fn_name, args } => generate_function_call(fn_name, args), - Expression::Str(s) => super::string_syntax(s), - Expression::Variable(s) => s, - Expression::Array(elements) => generate_array(elements), - Expression::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), - Expression::StructInitialization { name, fields } => { - generate_struct_initialization(name, fields) - } - Expression::FieldAccess { expr, field } => generate_field_access(*expr, *field), - }) + .map(generate_expression) .collect::>() .join(","); - format!("{N}({A})", N = func, A = formatted_args) + format!( + "{N}({A})", + N = generate_expression(func), + A = formatted_args + ) } fn generate_return(ret: Option) -> String { @@ -349,10 +343,6 @@ fn generate_bin_op(left: Expression, op: BinOp, right: Expression) -> String { BinOp::NotEqual => "!==", BinOp::Or => "||", BinOp::Subtraction => "-", - BinOp::AddAssign => "+=", - BinOp::SubtractAssign => "-=", - BinOp::MultiplyAssign => "*=", - BinOp::DivideAssign => "/=", }; format!( "{l} {op} {r}", @@ -376,18 +366,22 @@ fn generate_struct_initialization( out_str } -fn generate_field_access(expr: Expression, field: Expression) -> String { - format!( - "{}.{}", - generate_expression(expr), - generate_expression(field) - ) +fn generate_field_access(expr: Expression, field: String) -> String { + format!("{}.{}", generate_expression(expr), field) } -fn generate_assign(name: Expression, expr: Expression) -> String { +fn generate_assign(name: Expression, op: AssignOp, expr: Expression) -> String { + let op_str = match op { + AssignOp::Set => "=", + AssignOp::Add => "+=", + AssignOp::Subtract => "-=", + AssignOp::Multiply => "*=", + AssignOp::Divide => "/=", + }; format!( - "{} = {}", + "{} {} {}", generate_expression(name), + op_str, generate_expression(expr) ) } diff --git a/src/generator/qbe.rs b/src/generator/qbe.rs index 05f37af8..70173c3d 100644 --- a/src/generator/qbe.rs +++ b/src/generator/qbe.rs @@ -16,6 +16,7 @@ use super::{Generator, GeneratorResult}; use crate::ast::types::Type; use crate::ast::*; +use crate::util::Either; use std::collections::HashMap; pub struct QbeGenerator { @@ -188,10 +189,8 @@ impl QbeGenerator { func.assign_instr(tmp, ty, qbe::Instr::Copy(result)); } } - Statement::Assign { lhs, rhs } => { - let (_, rhs) = self.generate_expression(func, rhs)?; - // TODO: type check - self.generate_assignment(func, lhs, rhs)?; + Statement::Assign { lhs, op, rhs } => { + self.generate_assignment(func, lhs, *op, Either::Right(rhs))?; } Statement::Return(val) => match val { Some(expr) => { @@ -262,18 +261,23 @@ impl QbeGenerator { Ok((qbe::Type::Word, tmp)) } Expression::Array(elements) => self.generate_array(func, elements), - Expression::FunctionCall { fn_name, args } => { + Expression::FunctionCall { expr, args } => { let mut new_args: Vec<(qbe::Type, qbe::Value)> = Vec::new(); for arg in args.iter() { new_args.push(self.generate_expression(func, arg)?); } + let fn_name = match expr.as_ref() { + Expression::Variable(name) => name.to_owned(), + _ => todo!("methods"), + }; + let tmp = self.new_temporary(); func.assign_instr( tmp.clone(), // TODO: get that type properly qbe::Type::Word, - qbe::Instr::Call(fn_name.clone(), new_args), + qbe::Instr::Call(fn_name, new_args), ); Ok((qbe::Type::Word, tmp)) @@ -431,10 +435,10 @@ impl QbeGenerator { tmp.clone(), ty.clone(), match op { - BinOp::Addition | BinOp::AddAssign => qbe::Instr::Add(lhs_val, rhs_val), - BinOp::Subtraction | BinOp::SubtractAssign => qbe::Instr::Sub(lhs_val, rhs_val), - BinOp::Multiplication | BinOp::MultiplyAssign => qbe::Instr::Mul(lhs_val, rhs_val), - BinOp::Division | BinOp::DivideAssign => qbe::Instr::Div(lhs_val, rhs_val), + BinOp::Addition => qbe::Instr::Add(lhs_val, rhs_val), + BinOp::Subtraction => qbe::Instr::Sub(lhs_val, rhs_val), + BinOp::Multiplication => qbe::Instr::Mul(lhs_val, rhs_val), + BinOp::Division => qbe::Instr::Div(lhs_val, rhs_val), BinOp::Modulus => qbe::Instr::Rem(lhs_val, rhs_val), BinOp::And => qbe::Instr::And(lhs_val, rhs_val), @@ -458,19 +462,6 @@ impl QbeGenerator { }, ); - // *Assign BinOps work just like normal ones except that here the - // result is assigned to the left hand side. This essentially makes - // `a += 1` the same as `a = a + 1`. - match op { - BinOp::AddAssign - | BinOp::SubtractAssign - | BinOp::MultiplyAssign - | BinOp::DivideAssign => { - self.generate_assignment(func, lhs, tmp.clone())?; - } - _ => {} - }; - Ok((ty, tmp)) } @@ -480,8 +471,30 @@ impl QbeGenerator { &mut self, func: &mut qbe::Function, lhs: &Expression, - rhs: qbe::Value, + op: AssignOp, + rhs: Either, ) -> GeneratorResult<()> { + if op != AssignOp::Set { + let binop = match op { + AssignOp::Add => BinOp::Addition, + AssignOp::Subtract => BinOp::Subtraction, + AssignOp::Multiply => BinOp::Multiplication, + AssignOp::Divide => BinOp::Division, + _ => unreachable!(), + }; + let rhs = match rhs { + Either::Left(_) => unreachable!(), + Either::Right(expr) => expr, + }; + // Desugar 'a += b' to 'a = a + b' + let (_, new_value) = self.generate_binop(func, lhs, &binop, rhs)?; + return self.generate_assignment(func, lhs, AssignOp::Set, Either::Left(new_value)); + } + + let rhs = match rhs { + Either::Left(qval) => qval, + Either::Right(expr) => self.generate_expression(func, expr)?.1, + }; match lhs { Expression::Variable(name) => { let (vty, tmp) = self.get_var(name)?; @@ -503,7 +516,7 @@ impl QbeGenerator { func.add_instr(qbe::Instr::Store(ty, field_ptr, rhs)); } - Expression::ArrayAccess { name: _, index: _ } => todo!(), + Expression::ArrayAccess { .. } => todo!(), _ => return Err("Left side of an assignment must be either a variable, field access or array access".to_owned()), } @@ -556,7 +569,7 @@ impl QbeGenerator { &mut self, func: &mut qbe::Function, obj: &Expression, - field: &Expression, + field: &str, ) -> GeneratorResult<(qbe::Type, qbe::Value)> { let (src, ty, offset) = self.resolve_field_access(obj, field)?; @@ -581,7 +594,7 @@ impl QbeGenerator { fn resolve_field_access( &mut self, obj: &Expression, - field: &Expression, + field: &str, ) -> GeneratorResult<(qbe::Value, qbe::Type, u64)> { let (ty, src) = match obj { Expression::Variable(var) => self.get_var(var)?.to_owned(), @@ -594,15 +607,6 @@ impl QbeGenerator { )); } }; - let field = match field { - Expression::Variable(v) => v, - Expression::FunctionCall { - fn_name: _, - args: _, - } => unimplemented!("methods"), - // Parser should ensure this won't happen - _ => unreachable!(), - }; // XXX: this is very hacky and inefficient let (name, meta) = self diff --git a/src/parser/rules.rs b/src/parser/rules.rs index b47ea338..131d3e28 100644 --- a/src/parser/rules.rs +++ b/src/parser/rules.rs @@ -209,72 +209,36 @@ impl Parser { fn parse_statement(&mut self) -> Result { let token = self.peek()?; - match &token.kind { - TokenKind::CurlyBracesOpen => self.parse_block(), - TokenKind::BraceOpen | TokenKind::Keyword(Keyword::Selff) => { - Ok(Statement::Exp(self.parse_expression()?)) - } - TokenKind::Keyword(Keyword::Let) => self.parse_declare(), - TokenKind::Keyword(Keyword::Return) => self.parse_return(), - TokenKind::Keyword(Keyword::If) => self.parse_conditional_statement(), - TokenKind::Keyword(Keyword::While) => self.parse_while_loop(), - TokenKind::Keyword(Keyword::Break) => self.parse_break(), - TokenKind::Keyword(Keyword::Continue) => self.parse_continue(), - TokenKind::Keyword(Keyword::For) => self.parse_for_loop(), - TokenKind::Keyword(Keyword::Match) => self.parse_match_statement(), - TokenKind::Identifier(_) => { - let ident = self.match_identifier()?; - let expr = if self.peek_token(TokenKind::Dot).is_ok() { - self.parse_field_access(Expression::Variable(ident.clone()))? - } else { - Expression::Variable(ident.clone()) - }; - - // TODO: Use match statement - if self.peek_token(TokenKind::BraceOpen).is_ok() { - let state = self.parse_function_call(Some(ident))?; - Ok(Statement::Exp(state)) - } else if self.peek_token(TokenKind::Assign).is_ok() { - let state = self.parse_assignent(Some(expr))?; - Ok(state) - } else if self.peek_token(TokenKind::SquareBraceOpen).is_ok() { - let expr = self.parse_array_access(Some(ident))?; - - let next = self.peek()?; - match next.kind { - TokenKind::Assign => self.parse_assignent(Some(expr)), - _ => Ok(Statement::Exp(expr)), - } - } else if BinOp::try_from(self.peek()?.kind).is_ok() { - // Parse Binary operation - let expr = Expression::Variable(ident); - let state = Statement::Exp(self.parse_bin_op(Some(expr))?); - Ok(state) - } else if self.peek_token(TokenKind::Dot).is_ok() { - Ok(Statement::Exp( - self.parse_field_access(Expression::Variable(ident))?, - )) - } else { - Ok(Statement::Exp(expr)) - } - } - TokenKind::Literal(_) => Ok(Statement::Exp(self.parse_expression()?)), + let expr = match &token.kind { + TokenKind::CurlyBracesOpen => return self.parse_block(), + TokenKind::Keyword(Keyword::Let) => return self.parse_declare(), + TokenKind::Keyword(Keyword::Return) => return self.parse_return(), + TokenKind::Keyword(Keyword::If) => return self.parse_conditional_statement(), + TokenKind::Keyword(Keyword::While) => return self.parse_while_loop(), + TokenKind::Keyword(Keyword::Break) => return self.parse_break(), + TokenKind::Keyword(Keyword::Continue) => return self.parse_continue(), + TokenKind::Keyword(Keyword::For) => return self.parse_for_loop(), + TokenKind::Keyword(Keyword::Match) => return self.parse_match_statement(), + TokenKind::BraceOpen + | TokenKind::Keyword(Keyword::Selff) + | TokenKind::Identifier(_) + | TokenKind::Literal(_) => self.parse_expression()?, TokenKind::Keyword(Keyword::Struct) => { - Err("Struct definitions inside functions are not allowed".to_string()) + return Err("Struct definitions inside functions are not allowed".to_string()) + } + _ => { + return Err(self.make_error_msg(token.pos, "Failed to parse statement".to_string())) } - _ => Err(self.make_error_msg(token.pos, "Failed to parse statement".to_string())), + }; + let suffix = self.peek()?; + if AssignOp::try_from(suffix.kind).is_ok() { + Ok(self.parse_assignment(expr)?) + } else { + Ok(Statement::Exp(expr)) } } - /// Parses a function call from tokens. - /// The name of the function needs to be passed here, because we have already passed it with our cursor. - /// If no function name is provided, the next token will be fetched - fn parse_function_call(&mut self, func_name: Option) -> Result { - let fn_name = match func_name { - Some(name) => name, - None => self.next()?.raw, - }; - + fn parse_function_call(&mut self, expr: Expression) -> Result { self.match_token(TokenKind::BraceOpen)?; let mut args = Vec::new(); @@ -306,11 +270,10 @@ impl Parser { } self.match_token(TokenKind::BraceClose)?; - let expr = Expression::FunctionCall { fn_name, args }; - match self.peek()?.kind { - TokenKind::Dot => self.parse_field_access(expr), - _ => Ok(expr), - } + Ok(Expression::FunctionCall { + expr: Box::new(expr), + args, + }) } fn parse_return(&mut self) -> Result { @@ -328,7 +291,8 @@ impl Parser { fn parse_expression(&mut self) -> Result { let token = self.next()?; - let expr = match token.kind { + // TODO: don't mut + let mut expr = match token.kind { // (1 + 2) TokenKind::BraceOpen => { let expr = self.parse_expression()?; @@ -364,17 +328,8 @@ impl Parser { TokenKind::Literal(Value::Str(string)) => Expression::Str(string), // self TokenKind::Keyword(Keyword::Selff) => Expression::Selff, - TokenKind::Identifier(val) => { - let next = self.peek()?; - match &next.kind { - // foo() - TokenKind::BraceOpen => self.parse_function_call(Some(val))?, - // arr[0] - TokenKind::SquareBraceOpen => self.parse_array_access(Some(val))?, - // some_var - _ => Expression::Variable(val), - } - } + // name + TokenKind::Identifier(val) => Expression::Variable(val), // [1, 2, 3] TokenKind::SquareBraceOpen => self.parse_array()?, // new Foo {} @@ -383,41 +338,35 @@ impl Parser { }; // Check if the parsed expression continues - if self.peek_token(TokenKind::Dot).is_ok() { - // foo.bar - self.parse_field_access(expr) - } else if BinOp::try_from(self.peek()?.kind).is_ok() { - // 1 + 2 - self.parse_bin_op(Some(expr)) - } else { - // Nope, the expression was fully parsed - Ok(expr) + loop { + if self.peek_token(TokenKind::Dot).is_ok() { + // foo.bar + expr = self.parse_field_access(expr)?; + } else if self.peek_token(TokenKind::SquareBraceOpen).is_ok() { + // foo[0] + expr = self.parse_array_access(expr)?; + } else if self.peek_token(TokenKind::BraceOpen).is_ok() { + // foo(a, b) + expr = self.parse_function_call(expr)?; + } else if BinOp::try_from(self.peek()?.kind).is_ok() { + // a + b + expr = self.parse_bin_op(expr)?; + } else { + // The expression was fully parsed + return Ok(expr); + } } } fn parse_field_access(&mut self, lhs: Expression) -> Result { self.match_token(TokenKind::Dot)?; - // Only possible options are identifier or function call, - // So it's safe to assume that the next token should be an identifier - let id = self.match_identifier()?; - let next = self.peek()?; - - let field = match next.kind { - TokenKind::BraceOpen => self.parse_function_call(Some(id))?, - _ => Expression::Variable(id), - }; + let field = self.match_identifier()?; let expr = Expression::FieldAccess { expr: Box::new(lhs), - field: Box::new(field), + field, }; - if self.peek_token(TokenKind::Dot).is_ok() { - self.parse_field_access(expr) - } else if BinOp::try_from(self.peek()?.kind).is_ok() { - self.parse_bin_op(Some(expr)) - } else { - Ok(expr) - } + Ok(expr) } /// TODO: Cleanup @@ -489,19 +438,14 @@ impl Parser { Ok(Expression::Array(elements)) } - fn parse_array_access(&mut self, arr_name: Option) -> Result { - let name = match arr_name { - Some(name) => name, - None => self.next()?.raw, - }; - + fn parse_array_access(&mut self, expr: Expression) -> Result { self.match_token(TokenKind::SquareBraceOpen)?; - let expr = self.parse_expression()?; + let index = self.parse_expression()?; self.match_token(TokenKind::SquareBraceClose)?; Ok(Expression::ArrayAccess { - name, - index: Box::new(expr), + expr: Box::new(expr), + index: Box::new(index), }) } @@ -641,25 +585,11 @@ impl Parser { /// foo(1) * 2 /// ``` /// In this case, the function call has already been evaluated, and needs to be passed to this function. - fn parse_bin_op(&mut self, lhs: Option) -> Result { - let left = match lhs { - Some(lhs) => lhs, - None => { - let prev = self.prev().ok_or("Expected token")?; - match &prev.kind { - TokenKind::Identifier(_) | TokenKind::Literal(_) | TokenKind::Keyword(_) => { - Ok(Expression::try_from(prev)?) - } - _ => Err(self - .make_error_msg(prev.pos, "Failed to parse binary operation".to_string())), - }? - } - }; - + fn parse_bin_op(&mut self, lhs: Expression) -> Result { let op = self.match_operator()?; Ok(Expression::BinOp { - lhs: Box::from(left), + lhs: Box::from(lhs), op, rhs: Box::from(self.parse_expression()?), }) @@ -697,18 +627,14 @@ impl Parser { } } - fn parse_assignent(&mut self, name: Option) -> Result { - let name = match name { - Some(name) => name, - None => Expression::Variable(self.match_identifier()?), - }; - - self.match_token(TokenKind::Assign)?; + fn parse_assignment(&mut self, lhs: Expression) -> Result { + let op = AssignOp::try_from(self.next()?.kind).unwrap(); let expr = self.parse_expression()?; Ok(Statement::Assign { - lhs: Box::new(name), + lhs: Box::new(lhs), + op, rhs: Box::new(expr), }) } diff --git a/src/parser/tests.rs b/src/parser/tests.rs index 3a6dcefe..9b7af8c2 100644 --- a/src/parser/tests.rs +++ b/src/parser/tests.rs @@ -140,6 +140,35 @@ fn test_parse_variable_reassignment() { assert!(tree.is_ok()) } +#[test] +fn test_parse_short_reassignment() { + let raw = " + fn main() { + x += 1 + x -= 1 + x *= 2 + x /= 2 + x.y += 1 + x[0] += 1 + } + "; + let tokens = tokenize(raw).unwrap(); + let tree = parse(tokens, Some(raw.to_string()), "".into()); + assert!(tree.is_ok()) +} + +#[test] +fn test_parse_disallow_short_reassignment_in_expressions() { + let raw = " + fn main() { + return (x += 1) + 2 + } + "; + let tokens = tokenize(raw).unwrap(); + let tree = parse(tokens, Some(raw.to_string()), "".into()); + assert!(tree.is_err()) +} + #[test] fn test_parse_variable_declaration_added() { let raw = " From 7f70d6d5e4bc53b7831903172ed930ac5b79399a Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Sun, 24 Mar 2024 14:57:03 +0300 Subject: [PATCH 08/13] Split Function and Method in the AST, add function prototypes --- src/ast/mod.rs | 17 ++++++++++++++--- src/generator/c.rs | 18 +++++++++++------- src/generator/js.rs | 20 +++++++++++++------- src/generator/qbe.rs | 19 +++++++++++-------- src/generator/x86.rs | 12 +++++++----- src/parser/rules.rs | 31 +++++++++++++++++++++++-------- src/parser/tests.rs | 18 +++++++++++++++++- 7 files changed, 96 insertions(+), 39 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 059bdd32..1aeae0b3 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -39,18 +39,29 @@ impl Module { } #[derive(Debug, Clone)] -pub struct Function { +pub struct Callable { pub name: String, pub arguments: Vec, - pub body: Statement, pub ret_type: Option, } +#[derive(Debug, Clone)] +pub struct Function { + pub callable: Callable, + pub body: Option, +} + #[derive(Debug, Clone)] pub struct StructDef { pub name: String, pub fields: Vec, - pub methods: Vec, + pub methods: Vec, +} + +#[derive(Debug, Clone)] +pub struct Method { + pub callable: Callable, + pub body: Statement, } #[derive(Debug, Eq, PartialEq, Clone)] diff --git a/src/generator/c.rs b/src/generator/c.rs index 711e25c5..7017256a 100644 --- a/src/generator/c.rs +++ b/src/generator/c.rs @@ -96,15 +96,19 @@ pub(super) fn generate_type(t: Either>) -> String { fn generate_function(func: Function) -> String { let mut buf = String::new(); buf += &format!("{} ", &generate_function_signature(func.clone())); - if let Statement::Block { statements, scope } = func.body { - buf += &generate_block(statements, scope); - } - + match func.body { + Some(Statement::Block { statements, scope }) => { + buf += &generate_block(statements, scope); + } + Some(_) => unreachable!(), + None => return String::new(), + }; buf } fn generate_function_signature(func: Function) -> String { - let arguments: String = func + let callable = func.callable; + let arguments: String = callable .arguments .into_iter() .map(|var| { @@ -116,8 +120,8 @@ fn generate_function_signature(func: Function) -> String { }) .collect::>() .join(", "); - let t = generate_type(Either::Right(func.ret_type)); - format!("{T} {N}({A})", T = t, N = func.name, A = arguments) + let t = generate_type(Either::Right(callable.ret_type)); + format!("{T} {N}({A})", T = t, N = callable.name, A = arguments) } fn generate_block(block: Vec, _scope: Vec) -> String { diff --git a/src/generator/js.rs b/src/generator/js.rs index 63bcae1f..851ffe2a 100644 --- a/src/generator/js.rs +++ b/src/generator/js.rs @@ -56,24 +56,30 @@ fn generate_arguments(args: Vec) -> String { } fn generate_function(func: Function) -> String { - let arguments: String = generate_arguments(func.arguments); + let callable = func.callable; + let body = match func.body { + Some(body) => body, + None => return String::new(), + }; + let arguments: String = generate_arguments(callable.arguments); - let mut raw = format!("function {N}({A})", N = func.name, A = arguments); + let mut raw = format!("function {N}({A})", N = callable.name, A = arguments); - raw += &generate_block(func.body, None); + raw += &generate_block(body, None); raw += "\n"; raw } -fn generate_method(subject: String, func: Function) -> String { +fn generate_method(subject: String, method: Method) -> String { + let callable = method.callable; let mut buf = format!( "{}.prototype.{} = function({})", subject, - func.name, - generate_arguments(func.arguments) + callable.name, + generate_arguments(callable.arguments) ); - buf += &generate_block(func.body, None); + buf += &generate_block(method.body, None); buf += "\n"; buf diff --git a/src/generator/qbe.rs b/src/generator/qbe.rs index 70173c3d..b66add54 100644 --- a/src/generator/qbe.rs +++ b/src/generator/qbe.rs @@ -65,8 +65,10 @@ impl Generator for QbeGenerator { } for func in &prog.func { - let func = generator.generate_function(func)?; - buf.push_str(&format!("{}\n", func)); + if func.body.is_some() { + let func = generator.generate_function(func)?; + buf.push_str(&format!("{}\n", func)); + } } for def in &generator.typedefs { @@ -113,15 +115,16 @@ impl QbeGenerator { // Function argument scope self.scopes.push(HashMap::new()); + let callable = &func.callable; let mut arguments: Vec<(qbe::Type, qbe::Value)> = Vec::new(); - for arg in &func.arguments { + for arg in &callable.arguments { let ty = self.get_type(&arg.ty)?; let tmp = self.new_var(&ty, &arg.name)?; arguments.push((ty.into_abi(), tmp)); } - let return_ty = if let Some(ty) = &func.ret_type { + let return_ty = if let Some(ty) = &callable.ret_type { Some(self.get_type(ty)?.into_abi()) } else { None @@ -129,7 +132,7 @@ impl QbeGenerator { let mut qfunc = qbe::Function { linkage: qbe::Linkage::public(), - name: func.name.clone(), + name: callable.name.clone(), arguments, return_ty, blocks: Vec::new(), @@ -137,7 +140,7 @@ impl QbeGenerator { qfunc.add_block("start".to_owned()); - self.generate_statement(&mut qfunc, &func.body)?; + self.generate_statement(&mut qfunc, func.body.as_ref().unwrap())?; let returns = qfunc.last_block().statements.last().map_or(false, |i| { matches!(i, qbe::Statement::Volatile(qbe::Instr::Ret(_))) @@ -145,12 +148,12 @@ impl QbeGenerator { // Automatically add return in void functions unless it already returns, // non-void functions raise an error if !returns { - if func.ret_type.is_none() { + if callable.ret_type.is_none() { qfunc.add_instr(qbe::Instr::Ret(None)); } else { return Err(format!( "Function '{}' does not return in all code paths", - &func.name + &callable.name )); } } diff --git a/src/generator/x86.rs b/src/generator/x86.rs index 3a618e7a..febe2c97 100644 --- a/src/generator/x86.rs +++ b/src/generator/x86.rs @@ -79,19 +79,21 @@ impl X86Generator { fn gen_function(&mut self, func: Function) -> Assembly { let mut asm = Assembly::new(); + let callable = func.callable; let has_return: bool = match &func.body { - Statement::Block { + Some(Statement::Block { statements, scope: _, - } => statements + }) => statements .iter() .any(|s| matches!(*s, Statement::Return(_))), - _ => panic!("Function body should be of type Block"), + Some(_) => panic!("Function body should be of type Block"), + None => return asm, }; - asm.add(format!(".globl _{}", func.name)); - asm.add(format!("_{}:", func.name)); + asm.add(format!(".globl _{}", callable.name)); + asm.add(format!("_{}:", callable.name)); asm.add("push rbp"); asm.add("mov rbp, rsp"); diff --git a/src/parser/rules.rs b/src/parser/rules.rs index 131d3e28..1f2a2db4 100644 --- a/src/parser/rules.rs +++ b/src/parser/rules.rs @@ -65,7 +65,7 @@ impl Parser { let next = self.peek()?; match next.kind { TokenKind::Keyword(Keyword::Function) => { - methods.push(self.parse_function()?); + methods.push(self.parse_method()?); } TokenKind::Identifier(_) => fields.push(self.parse_typed_variable()?), _ => { @@ -138,10 +138,28 @@ impl Parser { Ok(Statement::Block { statements, scope }) } - /// To reduce code duplication, this method can be either be used to parse a function or a method. - /// If a function is parsed, the `fn` keyword is matched. - /// If a method is parsed, `fn` will be omitted fn parse_function(&mut self) -> Result { + let callable = self.parse_callable()?; + + let body = match self.peek()?.kind { + TokenKind::SemiColon => { + self.next()?; + None + } + _ => Some(self.parse_block()?), + }; + + Ok(Function { callable, body }) + } + + fn parse_method(&mut self) -> Result { + let callable = self.parse_callable()?; + let body = self.parse_block()?; + + Ok(Method { callable, body }) + } + + fn parse_callable(&mut self) -> Result { self.match_keyword(Keyword::Function)?; let name = self.match_identifier()?; @@ -159,12 +177,9 @@ impl Parser { _ => None, }; - let body = self.parse_block()?; - - Ok(Function { + Ok(Callable { name, arguments, - body, ret_type: ty, }) } diff --git a/src/parser/tests.rs b/src/parser/tests.rs index 9b7af8c2..8c991a1e 100644 --- a/src/parser/tests.rs +++ b/src/parser/tests.rs @@ -17,6 +17,22 @@ use crate::ast::types::Type; use crate::lexer::*; use crate::parser::parse; +#[test] +fn test_parse_function_prototype() { + let raw = "fn external();"; + let tokens = tokenize(raw).unwrap(); + let tree = parse(tokens, Some(raw.to_string()), "".into()); + assert!(tree.is_ok()) +} + +#[test] +fn test_parse_function_prototype_with_return_type() { + let raw = "fn external(): int;"; + let tokens = tokenize(raw).unwrap(); + let tree = parse(tokens, Some(raw.to_string()), "".into()); + assert!(tree.is_ok()) +} + #[test] fn test_parse_empty_function() { let raw = "fn main() {}"; @@ -654,7 +670,7 @@ fn test_function_with_return_type() { let tokens = tokenize(raw).unwrap(); let tree = parse(tokens, Some(raw.to_string()), "".into()); assert!(tree.is_ok()); - assert_eq!(tree.unwrap().func[0].ret_type, Some(Type::Int)); + assert_eq!(tree.unwrap().func[0].callable.ret_type, Some(Type::Int)); } #[test] From e559a8980f1d76230064b897d01d762d68e43734 Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Tue, 26 Mar 2024 00:08:23 +0300 Subject: [PATCH 09/13] lexer: Remove Tab and CarriageReturn tokens Those are already handled under Whitespace --- src/lexer/mod.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 044cf026..0e371422 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -120,10 +120,6 @@ pub enum TokenKind { CurlyBracesOpen, /// "}" CurlyBracesClose, - /// "\t" - Tab, - /// "\n" - CarriageReturn, /// Unknown token, not expected by the lexer, e.g. "â„–" Unknown, } @@ -335,8 +331,6 @@ impl Cursor<'_> { TokenKind::Keyword(kind) } } - '\n' => CarriageReturn, - '\t' => Tab, _ => Unknown, }; From e7431857df738cdd35adcf478e450d485d8e775d Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Tue, 26 Mar 2024 13:07:43 +0300 Subject: [PATCH 10/13] Refactor error handling - Introduce a FileTable for easily referencing files instead of cloning paths - Attach a Position to errors returned from lexer and parser - Add End token signifying the end of file to provide position when reaching an unexpected EOF - Move util::string_util::highlight_position_in_file to a method of lexer::Error --- src/ast/mod.rs | 7 +- src/builder/mod.rs | 23 +- src/generator/x86.rs | 8 +- src/lexer/cursor.rs | 11 +- src/lexer/file_table.rs | 43 +++ src/lexer/mod.rs | 95 ++++-- src/lexer/tests.rs | 741 +++++++++++++++++++++++++--------------- src/parser/mod.rs | 10 +- src/parser/parser.rs | 89 +++-- src/parser/rules.rs | 144 ++++---- src/parser/tests.rs | 209 +++++------- src/util/mod.rs | 1 - src/util/string_util.rs | 36 -- 13 files changed, 801 insertions(+), 616 deletions(-) create mode 100644 src/lexer/file_table.rs delete mode 100644 src/util/string_util.rs diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1aeae0b3..e72f8609 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1,6 +1,3 @@ -use crate::lexer::*; -use core::convert::TryFrom; -use std::collections::HashMap; /** * Copyright 2021 Garrit Franke * @@ -16,6 +13,9 @@ use std::collections::HashMap; * See the License for the specific language governing permissions and * limitations under the License. */ +use crate::lexer::{Keyword, Token, TokenKind, Value}; +use core::convert::TryFrom; +use std::collections::HashMap; use std::collections::HashSet; pub mod types; @@ -23,7 +23,6 @@ use types::Type; #[derive(Debug, Clone)] pub struct Module { - pub path: String, pub imports: HashSet, pub func: Vec, pub structs: Vec, diff --git a/src/builder/mod.rs b/src/builder/mod.rs index 2ed143e0..aa2629d2 100644 --- a/src/builder/mod.rs +++ b/src/builder/mod.rs @@ -26,6 +26,7 @@ use std::io::Write; pub struct Builder { in_file: PathBuf, + files: lexer::FileTable, modules: Vec, } @@ -33,6 +34,7 @@ impl Builder { pub fn new(entrypoint: PathBuf) -> Self { Self { in_file: entrypoint, + files: lexer::FileTable::new(), modules: Vec::new(), } } @@ -89,12 +91,11 @@ impl Builder { file.read_to_string(&mut contents) .expect("Could not read file"); - let tokens = lexer::tokenize(&contents)?; - let module = parser::parse( - tokens, - Some(contents), - resolved_file_path.display().to_string(), - )?; + + let file = self.files.insert(resolved_file_path.clone(), contents); + + let tokens = lexer::tokenize(file, &self.files).map_err(|err| err.format(&self.files))?; + let module = parser::parse(tokens).map_err(|err| err.format(&self.files))?; for import in &module.imports { // Prevent circular imports if seen.contains(import) { @@ -157,11 +158,13 @@ impl Builder { for file in assets { let stdlib_raw = Lib::get(&file).expect("Standard library not found. This should not occur."); - let stblib_str = + let stdlib_str = std::str::from_utf8(&stdlib_raw).expect("Could not interpret standard library."); - let stdlib_tokens = lexer::tokenize(stblib_str)?; - let module = parser::parse(stdlib_tokens, Some(stblib_str.into()), file.to_string()) - .expect("Could not parse stdlib"); + let file = self + .files + .insert(format!("std:{}", file).into(), stdlib_str.to_owned()); + let stdlib_tokens = lexer::tokenize(file, &self.files).expect("Could not parse stdlib"); + let module = parser::parse(stdlib_tokens).expect("Could not parse stdlib"); self.modules.push(module); } diff --git a/src/generator/x86.rs b/src/generator/x86.rs index febe2c97..d48fc10e 100644 --- a/src/generator/x86.rs +++ b/src/generator/x86.rs @@ -55,13 +55,7 @@ impl X86Generator { fn gen_program(&mut self, prog: Module) -> Assembly { let mut asm = Assembly::new(); - let Module { - func, - globals, - structs: _, - path: _, - imports: _, - } = prog; + let Module { func, globals, .. } = prog; asm.add(".intel_syntax noprefix"); asm.add(".text"); diff --git a/src/lexer/cursor.rs b/src/lexer/cursor.rs index 1d91bf39..1a48cd3f 100644 --- a/src/lexer/cursor.rs +++ b/src/lexer/cursor.rs @@ -90,18 +90,11 @@ impl<'a> Cursor<'a> { /// Moves to the next character. pub(crate) fn bump(&mut self) -> Option { let c = self.chars.next()?; - // If first token, the position should be set to 0 - match self.pos.raw { - usize::MAX => self.pos.raw = 0, - _ => { - self.pos.raw += 1; - self.pos.offset += 1; - } - } + self.pos.column += 1; if c == '\n' { self.pos.line += 1; - self.pos.offset = 0; + self.pos.column = 0; } #[cfg(debug_assertions)] diff --git a/src/lexer/file_table.rs b/src/lexer/file_table.rs new file mode 100644 index 00000000..aef3d08b --- /dev/null +++ b/src/lexer/file_table.rs @@ -0,0 +1,43 @@ +use std::path::PathBuf; + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub struct FileId { + index: usize, +} + +impl FileId { + pub fn path<'a>(&self, table: &'a FileTable) -> &'a PathBuf { + &self.source_file(table).path + } + + pub fn contents<'a>(&self, table: &'a FileTable) -> &'a String { + &self.source_file(table).contents + } + + fn source_file<'a>(&self, table: &'a FileTable) -> &'a SourceFile { + &table.files[self.index] + } +} + +#[derive(Debug)] +struct SourceFile { + path: PathBuf, + contents: String, +} + +#[derive(Debug)] +pub struct FileTable { + files: Vec, +} + +impl FileTable { + pub fn new() -> FileTable { + FileTable { files: Vec::new() } + } + + pub fn insert(&mut self, path: PathBuf, contents: String) -> FileId { + let index = self.files.len(); + self.files.push(SourceFile { path, contents }); + FileId { index } + } +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 0e371422..b3240e52 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -23,6 +23,60 @@ use regex::Regex; #[cfg(test)] mod tests; +mod file_table; +pub use file_table::*; + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub struct Position { + pub file: FileId, + pub line: usize, + pub column: usize, +} + +#[derive(Debug)] +pub struct Error { + pub pos: Position, + pub msg: String, +} + +impl Error { + pub fn new(pos: Position, msg: String) -> Error { + Error { pos, msg } + } + + pub fn format(&self, file_table: &FileTable) -> String { + let mut buf = String::new(); + + buf.push_str(&format!( + "{}:{},{}: {}\n", + self.pos.file.path(file_table).to_string_lossy(), + self.pos.line, + self.pos.column, + self.msg + )); + + let file_contents = self.pos.file.contents(file_table); + let line = file_contents.lines().nth(self.pos.line - 1).unwrap(); + // TODO: do something better, code can be more than 9999 lines + buf.push_str(&format!("{:>4} | {}\n", self.pos.line, line)); + buf.push_str(" | "); + + buf.push_str( + &line + .chars() + .take(self.pos.column - 1) + .map(|c| if c == '\t' { '\t' } else { ' ' }) + .collect::(), + ); + buf.push_str("^ "); + buf.push_str(&self.msg); + + buf + } +} + +pub type Result = std::result::Result; + #[derive(Debug, PartialEq, Eq, Clone)] pub struct Token { pub kind: TokenKind, @@ -42,16 +96,11 @@ impl Token { } } -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub struct Position { - pub line: usize, - pub offset: usize, - pub raw: usize, -} - /// Enum representing common lexeme types. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum TokenKind { + /// End of file + End, /// Any whitespace characters sequence. Whitespace, Identifier(String), @@ -152,25 +201,27 @@ pub enum Keyword { } /// Creates an iterator that produces tokens from the input string. -pub fn tokenize(mut input: &str) -> Result, String> { +pub fn tokenize(file: FileId, table: &FileTable) -> Result> { let mut pos = Position { - raw: usize::MAX, + file, line: 1, - offset: 0, + column: 0, }; let mut tokens: Vec = Vec::new(); + let mut input = file.contents(table).as_str(); while !input.is_empty() { let token = first_token(input, &mut pos)?; input = &input[token.len..]; tokens.push(token); } - + pos.column += 1; + tokens.push(Token::new(TokenKind::End, 0, String::new(), pos)); Ok(tokens) } /// Parses the first token from the provided input string. -pub fn first_token(input: &str, pos: &mut Position) -> Result { +pub fn first_token(input: &str, pos: &mut Position) -> Result { debug_assert!(!input.is_empty()); Cursor::new(input, pos).advance_token() } @@ -222,7 +273,7 @@ pub fn is_id_continue(c: char) -> bool { impl Cursor<'_> { /// Parses a token from the input string. - fn advance_token(&mut self) -> Result { + fn advance_token(&mut self) -> Result { // Original chars used to identify the token later on let original_chars = self.chars(); // FIXME: Identical value, since it will be used twice and is not clonable later @@ -385,7 +436,7 @@ impl Cursor<'_> { TokenKind::Literal(Value::Int) } - fn string(&mut self, end: char) -> Result { + fn string(&mut self, end: char) -> Result { Ok(TokenKind::Literal(Value::Str(self.eat_string(end)?))) } @@ -496,7 +547,7 @@ impl Cursor<'_> { has_digits } - fn eat_escape(&mut self) -> Result { + fn eat_escape(&mut self) -> Result { let ch = self.first(); let ch = match ch { 'n' => '\n', // Newline @@ -506,7 +557,7 @@ impl Cursor<'_> { 't' => '\t', // Horizontal tab '"' | '\\' => ch, ch => { - return Err(self.make_error_msg(format!("Unknown escape sequence \\{}", ch))); + return Err(self.make_error(format!("Unknown escape sequence \\{}", ch))); } }; self.bump(); @@ -514,11 +565,11 @@ impl Cursor<'_> { Ok(ch) } - fn eat_string(&mut self, end: char) -> Result { + fn eat_string(&mut self, end: char) -> Result { let mut buf = String::new(); loop { match self.first() { - '\n' => return Err(self.make_error_msg("String does not end on same line".into())), + '\n' => return Err(self.make_error("String does not end on same line".into())), '\\' => { self.bump(); buf.push(self.eat_escape()?) @@ -537,8 +588,10 @@ impl Cursor<'_> { Ok(buf) } - fn make_error_msg(&self, msg: String) -> String { - let pos = self.pos(); - format!("{}:{}: {}", pos.line, pos.offset, msg) + fn make_error(&self, msg: String) -> Error { + Error { + pos: self.pos(), + msg, + } } } diff --git a/src/lexer/tests.rs b/src/lexer/tests.rs index 8b9f6ce0..8e472267 100644 --- a/src/lexer/tests.rs +++ b/src/lexer/tests.rs @@ -15,330 +15,503 @@ */ use crate::lexer::*; -#[test] -fn test_basic_tokenizing() { - let raw = tokenize("1 = 2").unwrap(); - let mut tokens = raw.into_iter(); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 1, - kind: TokenKind::Literal(Value::Int), - raw: "1".to_owned(), - pos: Position { - raw: 0, - line: 1, - offset: 0 - } - } - ); +fn test_tokenize(input: String, expected: F) +where + F: Fn(FileId) -> Vec, +{ + let mut table = FileTable::new(); + let file = table.insert("".into(), input); + let tokens = tokenize(file, &table).unwrap(); - assert_eq!( - tokens.next().unwrap(), - Token { - len: 1, - kind: TokenKind::Whitespace, - raw: " ".to_owned(), - pos: Position { - raw: 1, - line: 1, - offset: 1 - } - } - ); + assert_eq!(tokens, expected(file)); +} - assert_eq!( - tokens.next().unwrap(), - Token { - len: 1, - kind: TokenKind::Assign, - raw: "=".to_owned(), - pos: Position { - raw: 2, - line: 1, - offset: 2 - } - } - ); +fn test_tokenize_ignoring_whitespace(input: String, expected: F) +where + F: Fn(FileId) -> Vec, +{ + let mut table = FileTable::new(); + let file = table.insert("".into(), input); + let tokens = tokenize(file, &table) + .unwrap() + .into_iter() + .filter(|token| token.kind != TokenKind::Whitespace) + .collect::>(); - assert_eq!( - tokens.next().unwrap(), - Token { - len: 1, - kind: TokenKind::Whitespace, - raw: " ".to_owned(), - pos: Position { - raw: 3, - line: 1, - offset: 3 - } - } - ); + assert_eq!(tokens, expected(file)); +} - assert_eq!( - tokens.next().unwrap(), - Token { - len: 1, - kind: TokenKind::Literal(Value::Int), - raw: "2".to_owned(), - pos: Position { - raw: 4, - line: 1, - offset: 4 - } - } - ); +#[test] +fn test_basic_tokenizing() { + test_tokenize("1 = 2".to_owned(), |file| { + vec![ + Token { + len: 1, + kind: TokenKind::Literal(Value::Int), + raw: "1".to_owned(), + pos: Position { + file, + line: 1, + column: 1, + }, + }, + Token { + len: 1, + kind: TokenKind::Whitespace, + raw: " ".to_owned(), + pos: Position { + file, + line: 1, + column: 2, + }, + }, + Token { + len: 1, + kind: TokenKind::Assign, + raw: "=".to_owned(), + pos: Position { + file, + line: 1, + column: 3, + }, + }, + Token { + len: 1, + kind: TokenKind::Whitespace, + raw: " ".to_owned(), + pos: Position { + file, + line: 1, + column: 4, + }, + }, + Token { + len: 1, + kind: TokenKind::Literal(Value::Int), + raw: "2".to_owned(), + pos: Position { + file, + line: 1, + column: 5, + }, + }, + Token { + len: 0, + kind: TokenKind::End, + raw: "".to_owned(), + pos: Position { + file, + line: 1, + column: 6, + }, + }, + ] + }); } #[test] fn test_tokenizing_without_whitespace() { - let mut tokens = tokenize("1=2").unwrap().into_iter(); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 1, - kind: TokenKind::Literal(Value::Int), - raw: "1".to_owned(), - pos: Position { - raw: 0, - line: 1, - offset: 0 - } - } - ); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 1, - kind: TokenKind::Assign, - raw: "=".to_owned(), - pos: Position { - raw: 1, - line: 1, - offset: 1 - } - } - ); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 1, - kind: TokenKind::Literal(Value::Int), - raw: "2".to_owned(), - pos: Position { - raw: 2, - line: 1, - offset: 2 - } - } - ); + test_tokenize("1=2".to_owned(), |file| { + vec![ + Token { + len: 1, + kind: TokenKind::Literal(Value::Int), + raw: "1".to_owned(), + pos: Position { + file, + line: 1, + column: 1, + }, + }, + Token { + len: 1, + kind: TokenKind::Assign, + raw: "=".to_owned(), + pos: Position { + file, + line: 1, + column: 2, + }, + }, + Token { + len: 1, + kind: TokenKind::Literal(Value::Int), + raw: "2".to_owned(), + pos: Position { + file, + line: 1, + column: 3, + }, + }, + Token { + len: 0, + kind: TokenKind::End, + raw: "".to_owned(), + pos: Position { + file, + line: 1, + column: 4, + }, + }, + ] + }); } #[test] fn test_string() { - let mut tokens = tokenize("'aaa' \"bbb\"").unwrap().into_iter(); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 5, - kind: TokenKind::Literal(Value::Str("aaa".into())), - raw: "'aaa'".to_owned(), - pos: Position { - raw: 4, - line: 1, - offset: 4 - } - } - ); - - assert_eq!( - tokens.nth(1).unwrap(), - Token { - len: 5, - kind: TokenKind::Literal(Value::Str("bbb".into())), - raw: "\"bbb\"".to_owned(), - pos: Position { - raw: 10, - line: 1, - offset: 10 - } - } - ); + test_tokenize_ignoring_whitespace("'aaa' \"bbb\"".to_owned(), |file| { + vec![ + Token { + len: 5, + kind: TokenKind::Literal(Value::Str("aaa".into())), + raw: "'aaa'".to_owned(), + pos: Position { + file, + line: 1, + column: 5, + }, + }, + Token { + len: 5, + kind: TokenKind::Literal(Value::Str("bbb".into())), + raw: "\"bbb\"".to_owned(), + pos: Position { + file, + line: 1, + column: 11, + }, + }, + Token { + len: 0, + kind: TokenKind::End, + raw: "".to_owned(), + pos: Position { + file, + line: 1, + column: 12, + }, + }, + ] + }); } #[test] fn test_string_markers_within_string() { - let mut tokens = tokenize("'\"aaa' \"'bbb\"").unwrap().into_iter(); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 6, - kind: TokenKind::Literal(Value::Str("\"aaa".into())), - raw: "'\"aaa'".to_owned(), - pos: Position { - raw: 5, - line: 1, - offset: 5 - } - } - ); - - assert_eq!( - tokens.nth(1).unwrap(), - Token { - len: 6, - kind: TokenKind::Literal(Value::Str("'bbb".into())), - raw: "\"'bbb\"".to_owned(), - pos: Position { - raw: 12, - line: 1, - offset: 12 - } - } - ); + test_tokenize_ignoring_whitespace("'\"aaa' \"'bbb\"".to_owned(), |file| { + vec![ + Token { + len: 6, + kind: TokenKind::Literal(Value::Str("\"aaa".into())), + raw: "'\"aaa'".to_owned(), + pos: Position { + file, + line: 1, + column: 6, + }, + }, + Token { + len: 6, + kind: TokenKind::Literal(Value::Str("'bbb".into())), + raw: "\"'bbb\"".to_owned(), + pos: Position { + file, + line: 1, + column: 13, + }, + }, + Token { + len: 0, + kind: TokenKind::End, + raw: "".to_owned(), + pos: Position { + file, + line: 1, + column: 14, + }, + }, + ] + }); } #[test] fn test_numbers() { - let mut tokens = tokenize("42").unwrap().into_iter(); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 2, - kind: TokenKind::Literal(Value::Int), - raw: "42".to_owned(), - pos: Position { - raw: 1, - line: 1, - offset: 1 - } - } - ); + test_tokenize("42".to_owned(), |file| { + vec![ + Token { + len: 2, + kind: TokenKind::Literal(Value::Int), + raw: "42".to_owned(), + pos: Position { + file, + line: 1, + column: 2, + }, + }, + Token { + len: 0, + kind: TokenKind::End, + raw: "".to_owned(), + pos: Position { + file, + line: 1, + column: 3, + }, + }, + ] + }); } #[test] fn test_binary_numbers() { - let mut tokens = tokenize("0b101010").unwrap().into_iter(); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 8, - kind: TokenKind::Literal(Value::Int), - raw: "0b101010".to_owned(), - pos: Position { - raw: 7, - line: 1, - offset: 7 - } - } - ); + test_tokenize("0b101010".to_owned(), |file| { + vec![ + Token { + len: 8, + kind: TokenKind::Literal(Value::Int), + raw: "0b101010".to_owned(), + pos: Position { + file, + line: 1, + column: 8, + }, + }, + Token { + len: 0, + kind: TokenKind::End, + raw: "".to_owned(), + pos: Position { + file, + line: 1, + column: 9, + }, + }, + ] + }); } #[test] fn test_octal_numbers() { - let mut tokens = tokenize("0o52").unwrap().into_iter(); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 4, - kind: TokenKind::Literal(Value::Int), - raw: "0o52".to_owned(), - pos: Position { - raw: 3, - line: 1, - offset: 3 - } - } - ); + test_tokenize("0o52".to_owned(), |file| { + vec![ + Token { + len: 4, + kind: TokenKind::Literal(Value::Int), + raw: "0o52".to_owned(), + pos: Position { + file, + line: 1, + column: 4, + }, + }, + Token { + len: 0, + kind: TokenKind::End, + raw: "".to_owned(), + pos: Position { + file, + line: 1, + column: 5, + }, + }, + ] + }); } #[test] fn test_hex_numbers() { - let mut tokens = tokenize("0x2A").unwrap().into_iter(); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 4, - kind: TokenKind::Literal(Value::Int), - raw: "0x2A".to_owned(), - pos: Position { - raw: 3, - line: 1, - offset: 3 - } - } - ); + test_tokenize("0x2A".to_owned(), |file| { + vec![ + Token { + len: 4, + kind: TokenKind::Literal(Value::Int), + raw: "0x2A".to_owned(), + pos: Position { + file, + line: 1, + column: 4, + }, + }, + Token { + len: 0, + kind: TokenKind::End, + raw: "".to_owned(), + pos: Position { + file, + line: 1, + column: 5, + }, + }, + ] + }); } #[test] fn test_functions() { - let mut tokens = tokenize("fn fib() {}").unwrap().into_iter(); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 2, - kind: TokenKind::Keyword(Keyword::Function), - raw: "fn".to_owned(), - pos: Position { - raw: 1, - line: 1, - offset: 1 - } - } - ); + test_tokenize_ignoring_whitespace("fn fib() {}".to_owned(), |file| { + vec![ + Token { + len: 2, + kind: TokenKind::Keyword(Keyword::Function), + raw: "fn".to_owned(), + pos: Position { + file, + line: 1, + column: 2, + }, + }, + Token { + len: 3, + kind: TokenKind::Identifier("fib".into()), + raw: "fib".to_owned(), + pos: Position { + file, + line: 1, + column: 6, + }, + }, + Token { + len: 1, + kind: TokenKind::BraceOpen, + raw: "(".to_owned(), + pos: Position { + file, + line: 1, + column: 7, + }, + }, + Token { + len: 1, + kind: TokenKind::BraceClose, + raw: ")".to_owned(), + pos: Position { + file, + line: 1, + column: 8, + }, + }, + Token { + len: 1, + kind: TokenKind::CurlyBracesOpen, + raw: "{".to_owned(), + pos: Position { + file, + line: 1, + column: 10, + }, + }, + Token { + len: 1, + kind: TokenKind::CurlyBracesClose, + raw: "}".to_owned(), + pos: Position { + file, + line: 1, + column: 11, + }, + }, + Token { + len: 0, + kind: TokenKind::End, + raw: "".to_owned(), + pos: Position { + file, + line: 1, + column: 12, + }, + }, + ] + }); } #[test] fn test_comments() { - let mut tokens = tokenize( + test_tokenize_ignoring_whitespace( "// foo -fn fib() {} - ", - ) - .unwrap() - .into_iter() - .filter(|t| { - t.kind != TokenKind::Whitespace - && t.kind != TokenKind::Tab - && t.kind != TokenKind::CarriageReturn - }); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 6, - kind: TokenKind::Comment, - raw: "// foo".to_owned(), - pos: Position { - raw: 5, - line: 1, - offset: 5 - } - } - ); - - assert_eq!( - tokens.next().unwrap(), - Token { - len: 2, - kind: TokenKind::Keyword(Keyword::Function), - raw: "fn".to_owned(), - pos: Position { - raw: 8, - line: 2, - offset: 2 - } - } +fn fib() {}" + .to_owned(), + |file| { + vec![ + Token { + len: 6, + kind: TokenKind::Comment, + raw: "// foo".to_owned(), + pos: Position { + file, + line: 1, + column: 6, + }, + }, + Token { + len: 2, + kind: TokenKind::Keyword(Keyword::Function), + raw: "fn".to_owned(), + pos: Position { + file, + line: 2, + column: 2, + }, + }, + Token { + len: 3, + kind: TokenKind::Identifier("fib".into()), + raw: "fib".to_owned(), + pos: Position { + file, + line: 2, + column: 6, + }, + }, + Token { + len: 1, + kind: TokenKind::BraceOpen, + raw: "(".to_owned(), + pos: Position { + file, + line: 2, + column: 7, + }, + }, + Token { + len: 1, + kind: TokenKind::BraceClose, + raw: ")".to_owned(), + pos: Position { + file, + line: 2, + column: 8, + }, + }, + Token { + len: 1, + kind: TokenKind::CurlyBracesOpen, + raw: "{".to_owned(), + pos: Position { + file, + line: 2, + column: 10, + }, + }, + Token { + len: 1, + kind: TokenKind::CurlyBracesClose, + raw: "}".to_owned(), + pos: Position { + file, + line: 2, + column: 11, + }, + }, + Token { + len: 0, + kind: TokenKind::End, + raw: "".to_owned(), + pos: Position { + file, + line: 2, + column: 12, + }, + }, + ] + }, ); } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 42c23056..371aa27e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -18,11 +18,15 @@ mod parser; mod rules; use crate::ast::Module; -use crate::lexer::Token; +use crate::lexer::{self, Token}; #[cfg(test)] mod tests; -pub fn parse(tokens: Vec, raw: Option, path: String) -> Result { - let mut parser = parser::Parser::new(tokens, raw, path); +pub type Error = lexer::Error; + +pub type Result = lexer::Result; + +pub fn parse(tokens: Vec) -> Result { + let mut parser = parser::Parser::new(tokens); parser.parse() } diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 37e053ed..157c99dc 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -13,47 +13,38 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +use super::{Error, Result}; use crate::ast::*; -use crate::lexer::Keyword; -use crate::lexer::Position; -use crate::lexer::{Token, TokenKind}; -use crate::util::string_util::highlight_position_in_file; +use crate::lexer::{Keyword, Token, TokenKind}; use std::convert::TryFrom; use std::iter::Peekable; use std::vec::IntoIter; pub struct Parser { - pub path: String, tokens: Peekable>, peeked: Vec, current: Option, - prev: Option, - raw: Option, } impl Parser { #[allow(clippy::needless_collect)] // TODO - pub fn new(tokens: Vec, raw: Option, file_name: String) -> Parser { + pub fn new(tokens: Vec) -> Parser { let tokens_without_whitespace: Vec = tokens .into_iter() .filter(|token| token.kind != TokenKind::Whitespace && token.kind != TokenKind::Comment) .collect(); Parser { - path: file_name, tokens: tokens_without_whitespace.into_iter().peekable(), peeked: vec![], current: None, - prev: None, - raw, } } - pub fn parse(&mut self) -> Result { + pub fn parse(&mut self) -> Result { self.parse_module() } - pub(super) fn next(&mut self) -> Result { - self.prev = self.current.to_owned(); + pub(super) fn next(&mut self) -> Result { let item = if self.peeked.is_empty() { self.tokens.next() } else { @@ -61,10 +52,21 @@ impl Parser { }; self.current = item.to_owned(); - item.ok_or_else(|| "Expected token".into()) + match item { + Some(Token { + kind: TokenKind::End, + pos, + .. + }) => Err(Error::new( + pos, + "Expected token, found End of file".to_owned(), + )), + Some(token) => Ok(token), + None => unreachable!(), + } } - pub(super) fn peek(&mut self) -> Result { + pub(super) fn peek(&mut self) -> Result { let token = self.next()?; self.push(token.to_owned()); Ok(token) @@ -75,24 +77,34 @@ impl Parser { } pub(super) fn has_more(&mut self) -> bool { - !self.peeked.is_empty() || self.tokens.peek().is_some() + if !self.peeked.is_empty() { + return true; + } + match self.tokens.peek() { + None => false, + Some(Token { + kind: TokenKind::End, + .. + }) => false, + Some(_) => true, + } } - pub(super) fn match_token(&mut self, token_kind: TokenKind) -> Result { + pub(super) fn match_token(&mut self, token_kind: TokenKind) -> Result { match self.next()? { token if token.kind == token_kind => Ok(token), other => Err(self.make_error(token_kind, other)), } } - pub(super) fn peek_token(&mut self, token_kind: TokenKind) -> Result { + pub(super) fn peek_token(&mut self, token_kind: TokenKind) -> Result { match self.peek()? { token if token.kind == token_kind => Ok(token), other => Err(self.make_error(token_kind, other)), } } - pub(super) fn match_keyword(&mut self, keyword: Keyword) -> Result<(), String> { + pub(super) fn match_keyword(&mut self, keyword: Keyword) -> Result<()> { let token = self.next()?; match &token.kind { TokenKind::Keyword(ref k) if k == &keyword => Ok(()), @@ -100,41 +112,26 @@ impl Parser { } } - pub(super) fn match_operator(&mut self) -> Result { - BinOp::try_from(self.next()?.kind) + pub(super) fn match_operator(&mut self) -> Result { + let token = self.next()?; + BinOp::try_from(token.kind.clone()).map_err(|err| Error::new(token.pos, err)) } - pub(super) fn match_identifier(&mut self) -> Result { + + pub(super) fn match_identifier(&mut self) -> Result { let token = self.next()?; match &token.kind { TokenKind::Identifier(n) => Ok(n.to_string()), - other => { - Err(self - .make_error_msg(token.pos, format!("Expected Identifier, found {:?}", other))) - } + other => Err(Error::new( + token.pos, + format!("Expected Identifier, found {:?}", other), + )), } } - pub(super) fn make_error(&mut self, token_kind: TokenKind, other: Token) -> String { - self.make_error_msg( + pub(super) fn make_error(&mut self, token_kind: TokenKind, other: Token) -> Error { + Error::new( other.pos, format!("Token {:?} not found, found {:?}", token_kind, other), ) } - - pub(super) fn make_error_msg(&mut self, pos: Position, msg: String) -> String { - match &self.raw { - Some(raw_file) => format!( - "{}:{}: {}\n{}", - pos.line, - pos.offset, - msg, - highlight_position_in_file(raw_file.to_string(), pos) - ), - None => format!("{}:{}: {}", pos.line, pos.offset, msg), - } - } - - pub(super) fn prev(&mut self) -> Option { - self.prev.clone() - } } diff --git a/src/parser/rules.rs b/src/parser/rules.rs index 1f2a2db4..bce88018 100644 --- a/src/parser/rules.rs +++ b/src/parser/rules.rs @@ -1,9 +1,4 @@ use super::parser::Parser; -use crate::ast::types::Type; -use crate::ast::*; -use crate::lexer::Keyword; -use crate::lexer::{TokenKind, Value}; -use std::collections::HashMap; /** * Copyright 2020 Garrit Franke * @@ -19,11 +14,16 @@ use std::collections::HashMap; * See the License for the specific language governing permissions and * limitations under the License. */ +use super::{Error, Result}; +use crate::ast::types::Type; +use crate::ast::*; +use crate::lexer::{Keyword, TokenKind, Value}; +use std::collections::HashMap; use std::collections::HashSet; use std::convert::TryFrom; impl Parser { - pub fn parse_module(&mut self) -> Result { + pub fn parse_module(&mut self) -> Result { let mut functions = Vec::new(); let mut structs = Vec::new(); let mut imports = HashSet::new(); @@ -39,7 +39,7 @@ impl Parser { TokenKind::Keyword(Keyword::Struct) => { structs.push(self.parse_struct_definition()?) } - _ => return Err(format!("Unexpected token: {}", next.raw)), + _ => return Err(Error::new(next.pos, "Unexpected token".to_owned())), } } @@ -49,12 +49,11 @@ impl Parser { func: functions, structs, globals, - path: self.path.clone(), imports, }) } - fn parse_struct_definition(&mut self) -> Result { + fn parse_struct_definition(&mut self) -> Result { self.match_keyword(Keyword::Struct)?; let name = self.match_identifier()?; @@ -69,9 +68,10 @@ impl Parser { } TokenKind::Identifier(_) => fields.push(self.parse_typed_variable()?), _ => { - return Err( - self.make_error_msg(next.pos, "Expected struct field or method".into()) - ) + return Err(Error::new( + next.pos, + "Expected struct field or method".into(), + )) } } } @@ -83,7 +83,7 @@ impl Parser { }) } - fn parse_typed_variable_list(&mut self) -> Result, String> { + fn parse_typed_variable_list(&mut self) -> Result> { let mut args = Vec::new(); // If there is an argument @@ -101,7 +101,7 @@ impl Parser { Ok(args) } - fn parse_typed_variable(&mut self) -> Result { + fn parse_typed_variable(&mut self) -> Result { let next = self.next()?; if let TokenKind::Identifier(name) = next.kind { return Ok(TypedVariable { @@ -110,10 +110,13 @@ impl Parser { }); } - Err(format!("Argument could not be parsed: {}", next.raw)) + Err(Error::new( + next.pos, + "Argument could not be parsed".to_owned(), + )) } - fn parse_block(&mut self) -> Result { + fn parse_block(&mut self) -> Result { self.match_token(TokenKind::CurlyBracesOpen)?; let mut statements = vec![]; @@ -138,7 +141,7 @@ impl Parser { Ok(Statement::Block { statements, scope }) } - fn parse_function(&mut self) -> Result { + fn parse_function(&mut self) -> Result { let callable = self.parse_callable()?; let body = match self.peek()?.kind { @@ -152,14 +155,14 @@ impl Parser { Ok(Function { callable, body }) } - fn parse_method(&mut self) -> Result { + fn parse_method(&mut self) -> Result { let callable = self.parse_callable()?; let body = self.parse_block()?; Ok(Method { callable, body }) } - fn parse_callable(&mut self) -> Result { + fn parse_callable(&mut self) -> Result { self.match_keyword(Keyword::Function)?; let name = self.match_identifier()?; @@ -184,27 +187,28 @@ impl Parser { }) } - fn parse_import(&mut self) -> Result { + fn parse_import(&mut self) -> Result { self.match_keyword(Keyword::Import)?; let token = self.next()?; let path = match token.kind { TokenKind::Literal(Value::Str(path)) => path, other => { - return Err( - self.make_error_msg(token.pos, format!("Expected string, got {:?}", other)) - ) + return Err(Error::new( + token.pos, + format!("Expected string, got {:?}", other), + )) } }; Ok(path) } - fn parse_type(&mut self) -> Result { + fn parse_type(&mut self) -> Result { self.match_token(TokenKind::Colon)?; let next = self.peek()?; let typ = match next.kind { - TokenKind::Identifier(_) => Type::try_from(self.next()?.raw), - _ => Err("Expected type".into()), + TokenKind::Identifier(_) => Ok(Type::try_from(self.next()?.raw).unwrap()), + _ => Err(Error::new(next.pos, "Expected type".to_owned())), }?; if self.peek_token(TokenKind::SquareBraceOpen).is_ok() { self.match_token(TokenKind::SquareBraceOpen)?; @@ -222,7 +226,7 @@ impl Parser { } } - fn parse_statement(&mut self) -> Result { + fn parse_statement(&mut self) -> Result { let token = self.peek()?; let expr = match &token.kind { TokenKind::CurlyBracesOpen => return self.parse_block(), @@ -239,10 +243,16 @@ impl Parser { | TokenKind::Identifier(_) | TokenKind::Literal(_) => self.parse_expression()?, TokenKind::Keyword(Keyword::Struct) => { - return Err("Struct definitions inside functions are not allowed".to_string()) + return Err(Error::new( + token.pos, + "Struct definitions inside functions are not allowed".to_owned(), + )) } _ => { - return Err(self.make_error_msg(token.pos, "Failed to parse statement".to_string())) + return Err(Error::new( + token.pos, + "Failed to parse statement".to_owned(), + )) } }; let suffix = self.peek()?; @@ -253,7 +263,7 @@ impl Parser { } } - fn parse_function_call(&mut self, expr: Expression) -> Result { + fn parse_function_call(&mut self, expr: Expression) -> Result { self.match_token(TokenKind::BraceOpen)?; let mut args = Vec::new(); @@ -291,7 +301,7 @@ impl Parser { }) } - fn parse_return(&mut self) -> Result { + fn parse_return(&mut self) -> Result { self.match_keyword(Keyword::Return)?; let peeked = self.peek()?; match peeked.kind { @@ -303,7 +313,7 @@ impl Parser { } } - fn parse_expression(&mut self) -> Result { + fn parse_expression(&mut self) -> Result { let token = self.next()?; // TODO: don't mut @@ -315,9 +325,12 @@ impl Parser { expr } // true | false - TokenKind::Keyword(Keyword::Boolean) => { - Expression::Bool(token.raw.parse::().map_err(|e| e.to_string())?) - } + TokenKind::Keyword(Keyword::Boolean) => Expression::Bool( + token + .raw + .parse::() + .map_err(|e| Error::new(token.pos, e.to_string()))?, + ), // 5 TokenKind::Literal(Value::Int) => { // Ignore spacing character (E.g. 1_000_000) @@ -325,18 +338,16 @@ impl Parser { let val = match clean_str { c if c.starts_with("0b") => { usize::from_str_radix(token.raw.trim_start_matches("0b"), 2) - .map_err(|e| e.to_string())? } c if c.starts_with("0o") => { usize::from_str_radix(token.raw.trim_start_matches("0o"), 8) - .map_err(|e| e.to_string())? } c if c.starts_with("0x") => { usize::from_str_radix(token.raw.trim_start_matches("0x"), 16) - .map_err(|e| e.to_string())? } - c => c.parse::().map_err(|e| e.to_string())?, - }; + c => c.parse::(), + } + .map_err(|e| Error::new(token.pos, e.to_string()))?; Expression::Int(val) } // "A string" @@ -349,7 +360,12 @@ impl Parser { TokenKind::SquareBraceOpen => self.parse_array()?, // new Foo {} TokenKind::Keyword(Keyword::New) => self.parse_struct_initialization()?, - other => return Err(format!("Expected Expression, found {:?}", other)), + other => { + return Err(Error::new( + token.pos, + format!("Expected Expression, found {:?}", other), + )) + } }; // Check if the parsed expression continues @@ -373,7 +389,7 @@ impl Parser { } } - fn parse_field_access(&mut self, lhs: Expression) -> Result { + fn parse_field_access(&mut self, lhs: Expression) -> Result { self.match_token(TokenKind::Dot)?; let field = self.match_identifier()?; @@ -385,7 +401,7 @@ impl Parser { } /// TODO: Cleanup - fn parse_struct_initialization(&mut self) -> Result { + fn parse_struct_initialization(&mut self) -> Result { let name = self.match_identifier()?; self.match_token(TokenKind::CurlyBracesOpen)?; let fields = self.parse_struct_fields()?; @@ -394,7 +410,7 @@ impl Parser { Ok(Expression::StructInitialization { name, fields }) } - fn parse_struct_fields(&mut self) -> Result>, String> { + fn parse_struct_fields(&mut self) -> Result>> { let mut map = HashMap::new(); // If there is a field @@ -413,17 +429,20 @@ impl Parser { Ok(map) } - fn parse_struct_field(&mut self) -> Result<(String, Box), String> { + fn parse_struct_field(&mut self) -> Result<(String, Box)> { let next = self.next()?; if let TokenKind::Identifier(name) = next.kind { self.match_token(TokenKind::Colon)?; return Ok((name, Box::new(self.parse_expression()?))); } - Err(format!("Struct field could not be parsed: {}", next.raw)) + Err(Error::new( + next.pos, + format!("Struct field could not be parsed: {}", next.raw), + )) } - fn parse_array(&mut self) -> Result { + fn parse_array(&mut self) -> Result { let mut elements = Vec::new(); loop { let next = self.peek()?; @@ -434,7 +453,7 @@ impl Parser { .next()? .raw .parse::() - .map_err(|e| e.to_string())?; + .map_err(|e| Error::new(next.pos, e.to_string()))?; elements.push(Expression::Int(value)); } _ => { @@ -453,7 +472,7 @@ impl Parser { Ok(Expression::Array(elements)) } - fn parse_array_access(&mut self, expr: Expression) -> Result { + fn parse_array_access(&mut self, expr: Expression) -> Result { self.match_token(TokenKind::SquareBraceOpen)?; let index = self.parse_expression()?; self.match_token(TokenKind::SquareBraceClose)?; @@ -464,7 +483,7 @@ impl Parser { }) } - fn parse_while_loop(&mut self) -> Result { + fn parse_while_loop(&mut self) -> Result { self.match_keyword(Keyword::While)?; let condition = self.parse_expression()?; let body = self.parse_block()?; @@ -475,17 +494,17 @@ impl Parser { }) } - fn parse_break(&mut self) -> Result { + fn parse_break(&mut self) -> Result { self.match_keyword(Keyword::Break)?; Ok(Statement::Break) } - fn parse_continue(&mut self) -> Result { + fn parse_continue(&mut self) -> Result { self.match_keyword(Keyword::Continue)?; Ok(Statement::Continue) } - fn parse_for_loop(&mut self) -> Result { + fn parse_for_loop(&mut self) -> Result { self.match_keyword(Keyword::For)?; let ident = self.match_identifier()?; @@ -508,7 +527,7 @@ impl Parser { }) } - fn parse_match_statement(&mut self) -> Result { + fn parse_match_statement(&mut self) -> Result { self.match_keyword(Keyword::Match)?; let subject = self.parse_expression()?; self.match_token(TokenKind::CurlyBracesOpen)?; @@ -524,7 +543,7 @@ impl Parser { | TokenKind::Keyword(Keyword::Boolean) => arms.push(self.parse_match_arm()?), TokenKind::Keyword(Keyword::Else) => { if has_else { - return Err(self.make_error_msg( + return Err(Error::new( next.pos, "Multiple else arms are not allowed".to_string(), )); @@ -533,14 +552,14 @@ impl Parser { arms.push(self.parse_match_arm()?); } TokenKind::CurlyBracesClose => break, - _ => return Err(self.make_error_msg(next.pos, "Illegal token".to_string())), + _ => return Err(Error::new(next.pos, "Illegal token".to_string())), } } self.match_token(TokenKind::CurlyBracesClose)?; Ok(Statement::Match { subject, arms }) } - fn parse_match_arm(&mut self) -> Result { + fn parse_match_arm(&mut self) -> Result { let next = self.peek()?; match next.kind { @@ -559,7 +578,7 @@ impl Parser { } } - fn parse_conditional_statement(&mut self) -> Result { + fn parse_conditional_statement(&mut self) -> Result { self.match_keyword(Keyword::If)?; let condition = self.parse_expression()?; @@ -600,7 +619,7 @@ impl Parser { /// foo(1) * 2 /// ``` /// In this case, the function call has already been evaluated, and needs to be passed to this function. - fn parse_bin_op(&mut self, lhs: Expression) -> Result { + fn parse_bin_op(&mut self, lhs: Expression) -> Result { let op = self.match_operator()?; Ok(Expression::BinOp { @@ -610,7 +629,7 @@ impl Parser { }) } - fn parse_declare(&mut self) -> Result { + fn parse_declare(&mut self) -> Result { self.match_keyword(Keyword::Let)?; let name = self.match_identifier()?; let token = self.peek()?; @@ -618,8 +637,7 @@ impl Parser { TokenKind::Colon => Some(self.parse_type()?), TokenKind::Assign => None, _ => { - // FIXME: context for this error message is not ideal - return Err(self.make_error_msg( + return Err(Error::new( token.pos, format!("Expected ':' or '=', found {:?}", token.kind), )); @@ -642,7 +660,7 @@ impl Parser { } } - fn parse_assignment(&mut self, lhs: Expression) -> Result { + fn parse_assignment(&mut self, lhs: Expression) -> Result { let op = AssignOp::try_from(self.next()?.kind).unwrap(); let expr = self.parse_expression()?; diff --git a/src/parser/tests.rs b/src/parser/tests.rs index 8c991a1e..3d760dca 100644 --- a/src/parser/tests.rs +++ b/src/parser/tests.rs @@ -1,4 +1,3 @@ -use crate::ast::types::Type; /** * Copyright 2020 Garrit Franke * @@ -14,30 +13,32 @@ use crate::ast::types::Type; * See the License for the specific language governing permissions and * limitations under the License. */ -use crate::lexer::*; -use crate::parser::parse; +use crate::ast::{types::Type, Module}; +use crate::lexer::{tokenize, FileTable}; +use crate::parser::{parse, Result}; + +fn test_parse(source: String) -> Result { + let mut table = FileTable::new(); + let file = table.insert("".into(), source); + let tokens = tokenize(file, &table)?; + parse(tokens) +} #[test] fn test_parse_function_prototype() { - let raw = "fn external();"; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse("fn external();".to_owned()); assert!(tree.is_ok()) } #[test] fn test_parse_function_prototype_with_return_type() { - let raw = "fn external(): int;"; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse("fn external(): int;".to_owned()); assert!(tree.is_ok()) } #[test] fn test_parse_empty_function() { - let raw = "fn main() {}"; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse("fn main() {}".to_owned()); assert!(tree.is_ok()) } @@ -48,8 +49,7 @@ fn test_parse_function_with_return() { return 1 } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -60,8 +60,7 @@ fn test_parse_function_with_void_return() { return; } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -72,8 +71,7 @@ fn test_parse_redundant_semicolon() { return 1; } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_err()) } @@ -82,8 +80,7 @@ fn test_parse_no_function_context() { let raw = " let x = 1 "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_err()) } @@ -100,8 +97,7 @@ fn test_parse_multiple_functions() { return y } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -113,8 +109,7 @@ fn test_parse_variable_declaration() { return x } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -125,8 +120,7 @@ fn test_parse_variable_uninitialized() { let x: int } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -137,8 +131,7 @@ fn test_parse_variable_disallow_untyped_uninitialized() { let x } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_err()); } @@ -151,8 +144,7 @@ fn test_parse_variable_reassignment() { return x } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -168,8 +160,7 @@ fn test_parse_short_reassignment() { x[0] += 1 } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -180,8 +171,7 @@ fn test_parse_disallow_short_reassignment_in_expressions() { return (x += 1) + 2 } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_err()) } @@ -194,8 +184,8 @@ fn test_parse_variable_declaration_added() { return x + y } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -206,8 +196,7 @@ fn test_parse_function_with_args() { return foo } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -222,8 +211,7 @@ fn test_parse_function_call() { foo(2) } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -238,8 +226,7 @@ fn test_parse_return_function_call() { return fib(2) } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -254,8 +241,7 @@ fn test_parse_function_call_multiple_arguments() { return 2 } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -270,8 +256,7 @@ fn test_parse_nexted_function_call() { return 2 } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -282,8 +267,7 @@ fn test_parse_basic_ops() { return 2 * 5 } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -294,8 +278,7 @@ fn test_parse_compound_ops() { 2 * 5 / 3 } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -306,8 +289,7 @@ fn test_parse_compound_ops_with_function_call() { return 2 * fib(1) / 3 } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -318,8 +300,7 @@ fn test_parse_compound_ops_with_strings() { return 2 * \"Hello\" } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -330,8 +311,7 @@ fn test_parse_compound_ops_with_identifier() { return 2 * n } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -342,8 +322,7 @@ fn test_parse_compound_ops_with_identifier_first() { return n * 2 } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -354,8 +333,7 @@ fn test_parse_compound_ops_return() { return 2 * n } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -368,8 +346,7 @@ fn test_parse_basic_conditional() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -383,8 +360,7 @@ fn test_parse_basic_conditional_with_multiple_statements() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -400,8 +376,7 @@ fn test_parse_conditional_else_if_branch() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -419,8 +394,7 @@ fn test_parse_conditional_multiple_else_if_branch_branches() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -436,8 +410,7 @@ fn test_parse_conditional_else_branch() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -457,8 +430,7 @@ fn test_parse_conditional_elseif_else_branch() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -470,8 +442,7 @@ fn test_int_array() { return arr } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -482,8 +453,7 @@ fn test_string_array() { return [\"Foo\", \"Bar\", \"Baz\"] } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -497,8 +467,7 @@ fn test_basic_while_loop() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -512,8 +481,7 @@ fn test_while_loop_boolean_expression() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -529,8 +497,7 @@ fn test_boolean_arithmetic() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -548,8 +515,7 @@ fn test_array_access_in_loop() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -562,8 +528,7 @@ fn test_array_access_standalone() { x[0] } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -577,8 +542,7 @@ fn test_array_access_assignment() { return x } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -593,8 +557,7 @@ fn test_array_access_in_if() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -605,8 +568,7 @@ fn test_function_call_math() { main(m - 1) } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -617,8 +579,7 @@ fn test_function_multiple_args() { main(m, n) } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -629,8 +590,7 @@ fn test_array_position_assignment() { new_arr[i] = 1 } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -643,8 +603,7 @@ fn test_typed_declare() { let z: bool = false } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()) } @@ -655,8 +614,7 @@ fn test_no_function_args_without_type() { return n } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_err()) } @@ -667,8 +625,7 @@ fn test_function_with_return_type() { return n } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); assert_eq!(tree.unwrap().func[0].callable.ret_type, Some(Type::Int)); } @@ -684,8 +641,7 @@ fn test_booleans_in_function_call() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } @@ -704,8 +660,7 @@ fn test_late_initializing_variable() { _printf(y) } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } @@ -720,8 +675,7 @@ fn test_simple_for_loop() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } @@ -738,8 +692,7 @@ fn test_nested_for_loop() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } @@ -756,8 +709,7 @@ fn test_nested_array() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } @@ -769,8 +721,7 @@ fn test_simple_nested_expression() { println(x) } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } @@ -789,8 +740,7 @@ fn test_continue() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } @@ -809,8 +759,7 @@ fn test_break() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } @@ -827,8 +776,7 @@ fn test_complex_nested_expressions() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } @@ -839,8 +787,7 @@ fn test_array_as_argument() { println([1, 2, 3]) } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } @@ -861,8 +808,7 @@ fn test_struct_initialization() { } } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } @@ -870,28 +816,28 @@ fn test_struct_initialization() { fn test_arithmetic() { // These should pass let raw = "fn main() {1*1}"; - let tree = parse(tokenize(raw).unwrap(), Some(raw.to_string()), raw.into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); let raw = "fn main() {2+3*4}"; - let tree = parse(tokenize(raw).unwrap(), Some(raw.to_string()), raw.into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); let raw = "fn main() {(2+2)*3}"; - let tree = parse(tokenize(raw).unwrap(), Some(raw.to_string()), raw.into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); // These should fail let raw = "fn main() {(22+)+1}"; - let tree = parse(tokenize(raw).unwrap(), Some(raw.to_string()), raw.into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_err()); let raw = "fn main() {1++1}"; - let tree = parse(tokenize(raw).unwrap(), Some(raw.to_string()), raw.into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_err()); let raw = "fn main() {3)+1}"; - let tree = parse(tokenize(raw).unwrap(), Some(raw.to_string()), raw.into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_err()); } @@ -905,7 +851,6 @@ fn test_array_capacity() { let arr3: int[5] = [1, 2, 3, 4, 5] } "; - let tokens = tokenize(raw).unwrap(); - let tree = parse(tokens, Some(raw.to_string()), "".into()); + let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); } diff --git a/src/util/mod.rs b/src/util/mod.rs index 42c9d2b6..c4b52679 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -pub mod string_util; /// Datatype that holds one of two types #[derive(Debug)] diff --git a/src/util/string_util.rs b/src/util/string_util.rs deleted file mode 100644 index 109e7747..00000000 --- a/src/util/string_util.rs +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Copyright 2020 Garrit Franke - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -use crate::lexer::Position; - -pub fn highlight_position_in_file(input: String, position: Position) -> String { - let mut buf = String::new(); - - let line = input.lines().nth(position.line - 1).unwrap(); - // TODO: do something better, code can be more than 9999 lines - buf.push_str(&format!("{:>4} | {}\n", position.line, line)); - buf.push_str(" | "); - - buf.push_str( - &line - .chars() - .take(position.offset - 1) - .map(|c| if c == '\t' { '\t' } else { ' ' }) - .collect::(), - ); - buf.push('^'); - - buf -} From d4395fe03664af2a477f8f1a80ff194d64c1498f Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Tue, 26 Mar 2024 14:54:57 +0300 Subject: [PATCH 11/13] ast: Add location information to Statement and Expression --- src/ast/mod.rs | 71 +++++--- src/ast/types.rs | 11 +- src/generator/c.rs | 89 +++++----- src/generator/js.rs | 63 ++++---- src/generator/qbe.rs | 76 ++++----- src/generator/tests/c_tests.rs | 4 +- src/generator/x86.rs | 24 ++- src/parser/parser.rs | 16 +- src/parser/rules.rs | 287 +++++++++++++++++++++------------ src/parser/tests.rs | 13 +- 10 files changed, 402 insertions(+), 252 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e72f8609..0b49ea24 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -use crate::lexer::{Keyword, Token, TokenKind, Value}; +use crate::lexer::{self, Keyword, Position, Token, TokenKind, Value}; use core::convert::TryFrom; use std::collections::HashMap; use std::collections::HashSet; @@ -39,6 +39,7 @@ impl Module { #[derive(Debug, Clone)] pub struct Callable { + pub pos: Position, pub name: String, pub arguments: Vec, pub ret_type: Option, @@ -52,6 +53,7 @@ pub struct Function { #[derive(Debug, Clone)] pub struct StructDef { + pub pos: Position, pub name: String, pub fields: Vec, pub methods: Vec, @@ -65,6 +67,7 @@ pub struct Method { #[derive(Debug, Eq, PartialEq, Clone)] pub struct Variable { + pub pos: Position, pub name: String, pub ty: Option, } @@ -78,6 +81,7 @@ impl AsRef for Variable { impl From for Variable { fn from(typed: TypedVariable) -> Self { Self { + pos: typed.pos, name: typed.name, ty: Some(typed.ty), } @@ -86,6 +90,7 @@ impl From for Variable { #[derive(Debug, Eq, PartialEq, Clone)] pub struct TypedVariable { + pub pos: Position, pub name: String, pub ty: Type, } @@ -97,7 +102,13 @@ impl AsRef for TypedVariable { } #[derive(Debug, Eq, PartialEq, Clone)] -pub enum Statement { +pub struct Statement { + pub pos: Position, + pub kind: StatementKind, +} + +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum StatementKind { /// (Statements, Scoped variables) Block { statements: Vec, @@ -168,7 +179,13 @@ impl TryFrom for AssignOp { } #[derive(Debug, Eq, PartialEq, Clone)] -pub enum Expression { +pub struct Expression { + pub pos: Position, + pub kind: ExpressionKind, +} + +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum ExpressionKind { Int(usize), Str(String), Bool(bool), @@ -200,25 +217,43 @@ pub enum Expression { } impl TryFrom for Expression { - type Error = String; + type Error = lexer::Error; - fn try_from(token: Token) -> std::result::Result { + fn try_from(token: Token) -> lexer::Result { let kind = token.kind; + let pos = token.pos; match kind { - TokenKind::Identifier(val) => Ok(Expression::Variable(val)), - TokenKind::Literal(Value::Int) => Ok(Expression::Int( - token - .raw - .parse() - .map_err(|_| "Int value could not be parsed")?, + TokenKind::Identifier(val) => Ok(Expression { + pos, + kind: ExpressionKind::Variable(val), + }), + TokenKind::Literal(Value::Int) => Ok(Expression { + pos, + kind: ExpressionKind::Int(token.raw.parse().map_err(|_| { + lexer::Error::new(pos, "Int value could not be parsed".to_owned()) + })?), + }), + TokenKind::Keyword(Keyword::Boolean) => Ok(Expression { + pos, + kind: ExpressionKind::Bool(match token.raw.as_ref() { + "true" => true, + "false" => false, + _ => { + return Err(lexer::Error::new( + pos, + "Boolean value could not be parsed".to_owned(), + )) + } + }), + }), + TokenKind::Literal(Value::Str(string)) => Ok(Expression { + pos, + kind: ExpressionKind::Str(string), + }), + _ => Err(lexer::Error::new( + pos, + "Value could not be parsed".to_owned(), )), - TokenKind::Keyword(Keyword::Boolean) => match token.raw.as_ref() { - "true" => Ok(Expression::Bool(true)), - "false" => Ok(Expression::Bool(false)), - _ => Err("Boolean value could not be parsed".into()), - }, - TokenKind::Literal(Value::Str(string)) => Ok(Expression::Str(string)), - _ => Err("Value could not be parsed".into()), } } } diff --git a/src/ast/types.rs b/src/ast/types.rs index 4bf7ad4c..5bc10c5f 100644 --- a/src/ast/types.rs +++ b/src/ast/types.rs @@ -1,3 +1,4 @@ +use crate::lexer::Position; /** * Copyright 2021 Garrit Franke * @@ -16,7 +17,13 @@ use std::convert::TryFrom; #[derive(Debug, Eq, PartialEq, Clone)] -pub enum Type { +pub struct Type { + pub pos: Position, + pub kind: TypeKind, +} + +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum TypeKind { Any, Int, Str, @@ -25,7 +32,7 @@ pub enum Type { Struct(String), } -impl TryFrom for Type { +impl TryFrom for TypeKind { type Error = String; fn try_from(s: String) -> Result { match s.as_ref() { diff --git a/src/generator/c.rs b/src/generator/c.rs index 7017256a..d6c9cbab 100644 --- a/src/generator/c.rs +++ b/src/generator/c.rs @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -use crate::ast::types::Type; +use crate::ast::types::TypeKind; use crate::ast::*; use crate::generator::{Generator, GeneratorResult}; use crate::util::Either; @@ -65,28 +65,28 @@ pub fn generate_struct(def: StructDef) -> String { buf } -pub(super) fn generate_type(t: Either>) -> String { +pub(super) fn generate_type(t: Either>) -> String { let (ty, name) = match t { - Either::Left(var) => (var.ty, Some(var.name)), + Either::Left(var) => (var.ty.map(|ty| ty.kind), Some(var.name)), Either::Right(ty) => (ty, None), }; match ty { Some(t) => match t { - Type::Int => "int".into(), - Type::Str => "char *".into(), - Type::Any => "void *".into(), - Type::Bool => "bool".into(), - Type::Struct(name) => format!("struct {}", name), - Type::Array(t, capacity) => match name { + TypeKind::Int => "int".into(), + TypeKind::Str => "char *".into(), + TypeKind::Any => "void *".into(), + TypeKind::Bool => "bool".into(), + TypeKind::Struct(name) => format!("struct {}", name), + TypeKind::Array(t, capacity) => match name { Some(n) => format!( "{T} {N}[{C}]", - T = generate_type(Either::Right(Some(*t))), + T = generate_type(Either::Right(Some(t.kind))), N = n, C = capacity .map(|val| val.to_string()) .unwrap_or_else(|| "".to_string()), ), - None => format!("{}[]", generate_type(Either::Right(Some(*t)))), + None => format!("{}[]", generate_type(Either::Right(Some(t.kind)))), }, }, None => "void".into(), @@ -97,7 +97,10 @@ fn generate_function(func: Function) -> String { let mut buf = String::new(); buf += &format!("{} ", &generate_function_signature(func.clone())); match func.body { - Some(Statement::Block { statements, scope }) => { + Some(Statement { + kind: StatementKind::Block { statements, scope }, + .. + }) => { buf += &generate_block(statements, scope); } Some(_) => unreachable!(), @@ -120,7 +123,7 @@ fn generate_function_signature(func: Function) -> String { }) .collect::>() .join(", "); - let t = generate_type(Either::Right(callable.ret_type)); + let t = generate_type(Either::Right(callable.ret_type.map(|ty| ty.kind))); format!("{T} {N}({A})", T = t, N = callable.name, A = arguments) } @@ -137,26 +140,26 @@ fn generate_block(block: Vec, _scope: Vec) -> String { } fn generate_statement(statement: Statement) -> String { - let state = match statement { - Statement::Return(ret) => generate_return(ret), - Statement::Declare { variable, value } => generate_declare(variable, value), - Statement::Exp(val) => generate_expression(val) + ";\n", - Statement::If { + let state = match statement.kind { + StatementKind::Return(ret) => generate_return(ret), + StatementKind::Declare { variable, value } => generate_declare(variable, value), + StatementKind::Exp(val) => generate_expression(val) + ";\n", + StatementKind::If { condition, body, else_branch, } => generate_conditional(condition, *body, else_branch.map(|x| *x)), - Statement::Assign { lhs, op, rhs } => generate_assign(*lhs, op, *rhs), - Statement::Block { statements, scope } => generate_block(statements, scope), - Statement::While { condition, body } => generate_while_loop(condition, *body), - Statement::For { + StatementKind::Assign { lhs, op, rhs } => generate_assign(*lhs, op, *rhs), + StatementKind::Block { statements, scope } => generate_block(statements, scope), + StatementKind::While { condition, body } => generate_while_loop(condition, *body), + StatementKind::For { ident: _, expr: _, body: _, } => todo!(), - Statement::Continue => todo!(), - Statement::Break => todo!(), - Statement::Match { + StatementKind::Continue => todo!(), + StatementKind::Break => todo!(), + StatementKind::Match { subject: _, arms: _, } => todo!(), @@ -166,20 +169,20 @@ fn generate_statement(statement: Statement) -> String { } fn generate_expression(expr: Expression) -> String { - match expr { - Expression::Int(val) => val.to_string(), - Expression::Variable(val) => val, - Expression::Str(val) => super::string_syntax(val), - Expression::Bool(b) => b.to_string(), - Expression::FunctionCall { expr, args } => generate_function_call(*expr, args), - Expression::Array(elements) => generate_array(elements), - Expression::ArrayAccess { expr, index } => generate_array_access(*expr, *index), - Expression::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), - Expression::StructInitialization { name: _, fields } => { + match expr.kind { + ExpressionKind::Int(val) => val.to_string(), + ExpressionKind::Variable(val) => val, + ExpressionKind::Str(val) => super::string_syntax(val), + ExpressionKind::Bool(b) => b.to_string(), + ExpressionKind::FunctionCall { expr, args } => generate_function_call(*expr, args), + ExpressionKind::Array(elements) => generate_array(elements), + ExpressionKind::ArrayAccess { expr, index } => generate_array_access(*expr, *index), + ExpressionKind::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), + ExpressionKind::StructInitialization { name: _, fields } => { generate_struct_initialization(fields) } - Expression::FieldAccess { expr, field } => generate_field_access(*expr, field), - Expression::Selff => todo!(), + ExpressionKind::FieldAccess { expr, field } => generate_field_access(*expr, field), + ExpressionKind::Selff => todo!(), } } @@ -189,7 +192,7 @@ fn generate_while_loop(expr: Expression, body: Statement) -> String { out_str += &generate_expression(expr); out_str += ") "; - if let Statement::Block { statements, scope } = body { + if let StatementKind::Block { statements, scope } = body.kind { out_str += &generate_block(statements, scope); } out_str @@ -200,9 +203,9 @@ fn generate_array(elements: Vec) -> String { out_str += &elements .iter() - .map(|el| match el { - Expression::Int(i) => i.to_string(), - Expression::Str(s) => super::string_syntax(s.to_owned()), + .map(|el| match &el.kind { + ExpressionKind::Int(i) => i.to_string(), + ExpressionKind::Str(s) => super::string_syntax(s.to_owned()), _ => todo!("Not yet implemented"), }) .collect::>() @@ -227,8 +230,8 @@ fn generate_conditional( ) -> String { let expr_str = generate_expression(expr); - let body = match if_state { - Statement::Block { + let body = match if_state.kind { + StatementKind::Block { statements, scope: _, } => statements, diff --git a/src/generator/js.rs b/src/generator/js.rs index 851ffe2a..ae16bddb 100644 --- a/src/generator/js.rs +++ b/src/generator/js.rs @@ -16,7 +16,7 @@ use crate::ast::*; use crate::generator::{Generator, GeneratorResult}; use std::collections::HashMap; -use types::Type; +use types::{Type, TypeKind}; pub struct JsGenerator; @@ -116,8 +116,8 @@ fn generate_block(block: Statement, prepend: Option) -> String { } // TODO: Prepend statements - let statements = match block { - Statement::Block { + let statements = match block.kind { + StatementKind::Block { statements, scope: _, } => statements, @@ -134,45 +134,45 @@ fn generate_block(block: Statement, prepend: Option) -> String { } fn generate_statement(statement: Statement) -> String { - let state = match statement { - Statement::Return(ret) => generate_return(ret), - Statement::Declare { variable, value } => generate_declare(variable, value), - Statement::Exp(val) => generate_expression(val), - Statement::If { + let state = match statement.kind { + StatementKind::Return(ret) => generate_return(ret), + StatementKind::Declare { variable, value } => generate_declare(variable, value), + StatementKind::Exp(val) => generate_expression(val), + StatementKind::If { condition, body, else_branch, } => generate_conditional(condition, *body, else_branch.map(|x| *x)), - Statement::Assign { lhs, op, rhs } => generate_assign(*lhs, op, *rhs), - Statement::Block { + StatementKind::Assign { lhs, op, rhs } => generate_assign(*lhs, op, *rhs), + StatementKind::Block { statements: _, scope: _, } => generate_block(statement, None), - Statement::While { condition, body } => generate_while_loop(condition, *body), - Statement::For { ident, expr, body } => generate_for_loop(ident, expr, *body), - Statement::Continue => generate_continue(), - Statement::Break => generate_break(), - Statement::Match { subject, arms } => generate_match(subject, arms), + StatementKind::While { condition, body } => generate_while_loop(condition, *body), + StatementKind::For { ident, expr, body } => generate_for_loop(ident, expr, *body), + StatementKind::Continue => generate_continue(), + StatementKind::Break => generate_break(), + StatementKind::Match { subject, arms } => generate_match(subject, arms), }; format!("{};\n", state) } fn generate_expression(expr: Expression) -> String { - match expr { - Expression::Int(val) => val.to_string(), - Expression::Selff => "this".to_string(), - Expression::Str(val) => super::string_syntax(val), - Expression::Variable(val) => val, - Expression::Bool(b) => b.to_string(), - Expression::FunctionCall { expr, args } => generate_function_call(*expr, args), - Expression::Array(elements) => generate_array(elements), - Expression::ArrayAccess { expr, index } => generate_array_access(*expr, *index), - Expression::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), - Expression::StructInitialization { name, fields } => { + match expr.kind { + ExpressionKind::Int(val) => val.to_string(), + ExpressionKind::Selff => "this".to_string(), + ExpressionKind::Str(val) => super::string_syntax(val), + ExpressionKind::Variable(val) => val, + ExpressionKind::Bool(b) => b.to_string(), + ExpressionKind::FunctionCall { expr, args } => generate_function_call(*expr, args), + ExpressionKind::Array(elements) => generate_array(elements), + ExpressionKind::ArrayAccess { expr, index } => generate_array_access(*expr, *index), + ExpressionKind::BinOp { lhs, op, rhs } => generate_bin_op(*lhs, op, *rhs), + ExpressionKind::StructInitialization { name, fields } => { generate_struct_initialization(name, fields) } - Expression::FieldAccess { expr, field } => generate_field_access(*expr, field), + ExpressionKind::FieldAccess { expr, field } => generate_field_access(*expr, field), } } @@ -267,8 +267,8 @@ fn generate_conditional( ) -> String { let expr_str = generate_expression(expr); - let body = match if_state { - Statement::Block { + let body = match if_state.kind { + StatementKind::Block { statements, scope: _, } => statements, @@ -308,7 +308,10 @@ fn generate_declare>(identifier: V, val: Option) // But this works: // var x = []; // x[0] = 1; - Some(Type::Array(_, _)) => format!("var {} = []", ident.name), + Some(Type { + kind: TypeKind::Array(_, _), + .. + }) => format!("var {} = []", ident.name), _ => format!("var {}", ident.name), }, } diff --git a/src/generator/qbe.rs b/src/generator/qbe.rs index b66add54..88aeedc6 100644 --- a/src/generator/qbe.rs +++ b/src/generator/qbe.rs @@ -14,7 +14,7 @@ * limitations under the License. */ use super::{Generator, GeneratorResult}; -use crate::ast::types::Type; +use crate::ast::types::{Type, TypeKind}; use crate::ast::*; use crate::util::Either; use std::collections::HashMap; @@ -169,8 +169,8 @@ impl QbeGenerator { func: &mut qbe::Function, stmt: &Statement, ) -> GeneratorResult<()> { - match stmt { - Statement::Block { + match &stmt.kind { + StatementKind::Block { statements, scope: _, } => { @@ -180,7 +180,7 @@ impl QbeGenerator { } self.scopes.pop(); } - Statement::Declare { variable, value } => { + StatementKind::Declare { variable, value } => { let ty = self.get_type(variable.ty.as_ref().ok_or_else(|| { format!("Missing type for variable '{}'", &variable.name) @@ -192,10 +192,10 @@ impl QbeGenerator { func.assign_instr(tmp, ty, qbe::Instr::Copy(result)); } } - Statement::Assign { lhs, op, rhs } => { + StatementKind::Assign { lhs, op, rhs } => { self.generate_assignment(func, lhs, *op, Either::Right(rhs))?; } - Statement::Return(val) => match val { + StatementKind::Return(val) => match val { Some(expr) => { let (_, result) = self.generate_expression(func, expr)?; // TODO: Cast to function return type @@ -203,31 +203,31 @@ impl QbeGenerator { } None => func.add_instr(qbe::Instr::Ret(None)), }, - Statement::If { + StatementKind::If { condition, body, else_branch, } => { self.generate_if(func, condition, body, else_branch)?; } - Statement::While { condition, body } => { + StatementKind::While { condition, body } => { self.generate_while(func, condition, body)?; } - Statement::Break => { + StatementKind::Break => { if let Some(label) = &self.loop_labels.last() { func.add_instr(qbe::Instr::Jmp(format!("{}.end", label))); } else { return Err("break used outside of a loop".to_owned()); } } - Statement::Continue => { + StatementKind::Continue => { if let Some(label) = &self.loop_labels.last() { func.add_instr(qbe::Instr::Jmp(format!("{}.cond", label))); } else { return Err("continue used outside of a loop".to_owned()); } } - Statement::Exp(expr) => { + StatementKind::Exp(expr) => { self.generate_expression(func, expr)?; } _ => todo!("statement: {:?}", stmt), @@ -241,8 +241,8 @@ impl QbeGenerator { func: &mut qbe::Function, expr: &Expression, ) -> GeneratorResult<(qbe::Type, qbe::Value)> { - match expr { - Expression::Int(literal) => { + match &expr.kind { + ExpressionKind::Int(literal) => { let tmp = self.new_temporary(); func.assign_instr( tmp.clone(), @@ -252,8 +252,8 @@ impl QbeGenerator { Ok((qbe::Type::Word, tmp)) } - Expression::Str(string) => self.generate_string(string), - Expression::Bool(literal) => { + ExpressionKind::Str(string) => self.generate_string(string), + ExpressionKind::Bool(literal) => { let tmp = self.new_temporary(); func.assign_instr( tmp.clone(), @@ -263,15 +263,15 @@ impl QbeGenerator { Ok((qbe::Type::Word, tmp)) } - Expression::Array(elements) => self.generate_array(func, elements), - Expression::FunctionCall { expr, args } => { + ExpressionKind::Array(elements) => self.generate_array(func, elements), + ExpressionKind::FunctionCall { expr, args } => { let mut new_args: Vec<(qbe::Type, qbe::Value)> = Vec::new(); for arg in args.iter() { new_args.push(self.generate_expression(func, arg)?); } - let fn_name = match expr.as_ref() { - Expression::Variable(name) => name.to_owned(), + let fn_name = match &expr.as_ref().kind { + ExpressionKind::Variable(name) => name.to_owned(), _ => todo!("methods"), }; @@ -285,12 +285,12 @@ impl QbeGenerator { Ok((qbe::Type::Word, tmp)) } - Expression::Variable(name) => self.get_var(name).map(|v| v.to_owned()), - Expression::BinOp { lhs, op, rhs } => self.generate_binop(func, lhs, op, rhs), - Expression::StructInitialization { name, fields } => { + ExpressionKind::Variable(name) => self.get_var(name).map(|v| v.to_owned()), + ExpressionKind::BinOp { lhs, op, rhs } => self.generate_binop(func, lhs, op, rhs), + ExpressionKind::StructInitialization { name, fields } => { self.generate_struct_init(func, name, fields) } - Expression::FieldAccess { expr, field } => { + ExpressionKind::FieldAccess { expr, field } => { self.generate_field_access(func, expr, field) } _ => todo!("expression: {:?}", expr), @@ -498,8 +498,8 @@ impl QbeGenerator { Either::Left(qval) => qval, Either::Right(expr) => self.generate_expression(func, expr)?.1, }; - match lhs { - Expression::Variable(name) => { + match &lhs.kind { + ExpressionKind::Variable(name) => { let (vty, tmp) = self.get_var(name)?; func.assign_instr( tmp.to_owned(), @@ -507,7 +507,7 @@ impl QbeGenerator { qbe::Instr::Copy(rhs), ); } - Expression::FieldAccess { expr, field } => { + ExpressionKind::FieldAccess { expr, field } => { let (src, ty, offset) = self.resolve_field_access(expr, field)?; let field_ptr = self.new_temporary(); @@ -519,7 +519,7 @@ impl QbeGenerator { func.add_instr(qbe::Instr::Store(ty, field_ptr, rhs)); } - Expression::ArrayAccess { .. } => todo!(), + ExpressionKind::ArrayAccess { .. } => todo!(), _ => return Err("Left side of an assignment must be either a variable, field access or array access".to_owned()), } @@ -599,10 +599,10 @@ impl QbeGenerator { obj: &Expression, field: &str, ) -> GeneratorResult<(qbe::Value, qbe::Type, u64)> { - let (ty, src) = match obj { - Expression::Variable(var) => self.get_var(var)?.to_owned(), - Expression::FieldAccess { .. } => todo!("nested field access"), - Expression::Selff => unimplemented!("methods"), + let (ty, src) = match &obj.kind { + ExpressionKind::Variable(var) => self.get_var(var)?.to_owned(), + ExpressionKind::FieldAccess { .. } => todo!("nested field access"), + ExpressionKind::Selff => unimplemented!("methods"), other => { return Err(format!( "Invalid field access type: expected variable, field access or 'self', got {:?}", @@ -757,12 +757,12 @@ impl QbeGenerator { /// Returns a QBE type for the given AST type fn get_type(&self, ty: &Type) -> GeneratorResult { - match ty { - Type::Any => Err("'any' type is not supported".into()), - Type::Int => Ok(qbe::Type::Word), - Type::Bool => Ok(qbe::Type::Byte), - Type::Str => Ok(qbe::Type::Long), - Type::Struct(name) => { + match &ty.kind { + TypeKind::Any => Err("'any' type is not supported".into()), + TypeKind::Int => Ok(qbe::Type::Word), + TypeKind::Bool => Ok(qbe::Type::Byte), + TypeKind::Str => Ok(qbe::Type::Long), + TypeKind::Struct(name) => { let (ty, ..) = self .struct_map .get(name) @@ -770,7 +770,7 @@ impl QbeGenerator { .to_owned(); Ok(ty) } - Type::Array(..) => Ok(qbe::Type::Long), + TypeKind::Array(..) => Ok(qbe::Type::Long), } } } diff --git a/src/generator/tests/c_tests.rs b/src/generator/tests/c_tests.rs index 003097dd..891085b6 100644 --- a/src/generator/tests/c_tests.rs +++ b/src/generator/tests/c_tests.rs @@ -1,9 +1,9 @@ -use crate::ast::types::Type; +use crate::ast::types::TypeKind; use crate::generator::c::generate_type; use crate::util::Either; #[test] fn test_generate_type_regular_type() { - let t = generate_type(Either::Right(Some(Type::Int))); + let t = generate_type(Either::Right(Some(TypeKind::Int))); assert_eq!(t, "int") } diff --git a/src/generator/x86.rs b/src/generator/x86.rs index d48fc10e..c91a2069 100644 --- a/src/generator/x86.rs +++ b/src/generator/x86.rs @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -use crate::ast::{Function, Module, Statement}; +use crate::ast::{Function, Module, Statement, StatementKind}; use crate::generator::{Generator, GeneratorResult}; struct Assembly { @@ -76,12 +76,22 @@ impl X86Generator { let callable = func.callable; let has_return: bool = match &func.body { - Some(Statement::Block { - statements, - scope: _, - }) => statements - .iter() - .any(|s| matches!(*s, Statement::Return(_))), + Some(Statement { + kind: + StatementKind::Block { + statements, + scope: _, + }, + .. + }) => statements.iter().any(|s| { + matches!( + *s, + Statement { + kind: StatementKind::Return(_), + .. + } + ) + }), Some(_) => panic!("Function body should be of type Block"), None => return asm, }; diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 157c99dc..000108a6 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -15,7 +15,7 @@ */ use super::{Error, Result}; use crate::ast::*; -use crate::lexer::{Keyword, Token, TokenKind}; +use crate::lexer::{Keyword, Position, Token, TokenKind}; use std::convert::TryFrom; use std::iter::Peekable; use std::vec::IntoIter; @@ -104,23 +104,25 @@ impl Parser { } } - pub(super) fn match_keyword(&mut self, keyword: Keyword) -> Result<()> { + pub(super) fn match_keyword(&mut self, keyword: Keyword) -> Result { let token = self.next()?; match &token.kind { - TokenKind::Keyword(ref k) if k == &keyword => Ok(()), + TokenKind::Keyword(ref k) if k == &keyword => Ok(token), _ => Err(self.make_error(TokenKind::SemiColon, token)), } } - pub(super) fn match_operator(&mut self) -> Result { + pub(super) fn match_operator(&mut self) -> Result<(BinOp, Position)> { let token = self.next()?; - BinOp::try_from(token.kind.clone()).map_err(|err| Error::new(token.pos, err)) + BinOp::try_from(token.kind.clone()) + .map_err(|err| Error::new(token.pos, err)) + .map(|op| (op, token.pos)) } - pub(super) fn match_identifier(&mut self) -> Result { + pub(super) fn match_identifier(&mut self) -> Result<(String, Position)> { let token = self.next()?; match &token.kind { - TokenKind::Identifier(n) => Ok(n.to_string()), + TokenKind::Identifier(n) => Ok((n.to_string(), token.pos)), other => Err(Error::new( token.pos, format!("Expected Identifier, found {:?}", other), diff --git a/src/parser/rules.rs b/src/parser/rules.rs index bce88018..c1575210 100644 --- a/src/parser/rules.rs +++ b/src/parser/rules.rs @@ -15,9 +15,9 @@ use super::parser::Parser; * limitations under the License. */ use super::{Error, Result}; -use crate::ast::types::Type; +use crate::ast::types::{Type, TypeKind}; use crate::ast::*; -use crate::lexer::{Keyword, TokenKind, Value}; +use crate::lexer::{Keyword, Position, TokenKind, Value}; use std::collections::HashMap; use std::collections::HashSet; use std::convert::TryFrom; @@ -54,8 +54,8 @@ impl Parser { } fn parse_struct_definition(&mut self) -> Result { - self.match_keyword(Keyword::Struct)?; - let name = self.match_identifier()?; + let pos = self.match_keyword(Keyword::Struct)?.pos; + let (name, _) = self.match_identifier()?; self.match_token(TokenKind::CurlyBracesOpen)?; let mut fields = Vec::new(); @@ -77,6 +77,7 @@ impl Parser { } self.match_token(TokenKind::CurlyBracesClose)?; Ok(StructDef { + pos, name, fields, methods, @@ -105,6 +106,7 @@ impl Parser { let next = self.next()?; if let TokenKind::Identifier(name) = next.kind { return Ok(TypedVariable { + pos: next.pos, name, ty: self.parse_type()?, }); @@ -117,7 +119,7 @@ impl Parser { } fn parse_block(&mut self) -> Result { - self.match_token(TokenKind::CurlyBracesOpen)?; + let pos = self.match_token(TokenKind::CurlyBracesOpen)?.pos; let mut statements = vec![]; let mut scope = vec![]; @@ -128,7 +130,7 @@ impl Parser { // If the current statement is a variable declaration, // let the scope know - if let Statement::Declare { variable, value: _ } = &statement { + if let StatementKind::Declare { variable, .. } = &statement.kind { // TODO: Not sure if we should clone here scope.push(variable.to_owned()); } @@ -138,7 +140,10 @@ impl Parser { self.match_token(TokenKind::CurlyBracesClose)?; - Ok(Statement::Block { statements, scope }) + Ok(Statement { + pos, + kind: StatementKind::Block { statements, scope }, + }) } fn parse_function(&mut self) -> Result { @@ -163,8 +168,8 @@ impl Parser { } fn parse_callable(&mut self) -> Result { - self.match_keyword(Keyword::Function)?; - let name = self.match_identifier()?; + let pos = self.match_keyword(Keyword::Function)?.pos; + let (name, _) = self.match_identifier()?; self.match_token(TokenKind::BraceOpen)?; @@ -181,6 +186,7 @@ impl Parser { }; Ok(Callable { + pos, name, arguments, ret_type: ty, @@ -207,7 +213,10 @@ impl Parser { self.match_token(TokenKind::Colon)?; let next = self.peek()?; let typ = match next.kind { - TokenKind::Identifier(_) => Ok(Type::try_from(self.next()?.raw).unwrap()), + TokenKind::Identifier(_) => Ok(Type { + pos: next.pos, + kind: TypeKind::try_from(self.next()?.raw).unwrap(), + }), _ => Err(Error::new(next.pos, "Expected type".to_owned())), }?; if self.peek_token(TokenKind::SquareBraceOpen).is_ok() { @@ -220,7 +229,10 @@ impl Parser { Err(_) => None, }; self.match_token(TokenKind::SquareBraceClose)?; - Ok(Type::Array(Box::new(typ), capacity)) + Ok(Type { + pos: next.pos, + kind: TypeKind::Array(Box::new(typ), capacity), + }) } else { Ok(typ) } @@ -259,12 +271,15 @@ impl Parser { if AssignOp::try_from(suffix.kind).is_ok() { Ok(self.parse_assignment(expr)?) } else { - Ok(Statement::Exp(expr)) + Ok(Statement { + pos: token.pos, + kind: StatementKind::Exp(expr), + }) } } fn parse_function_call(&mut self, expr: Expression) -> Result { - self.match_token(TokenKind::BraceOpen)?; + let pos = self.match_token(TokenKind::BraceOpen)?.pos; let mut args = Vec::new(); @@ -285,8 +300,8 @@ impl Parser { TokenKind::SquareBraceOpen => { // TODO: Expression parsing currently uses `next` instead of `peek`. // We have to eat that token here until that is resolved - self.match_token(TokenKind::SquareBraceOpen)?; - args.push(self.parse_array()?); + let pos = self.match_token(TokenKind::SquareBraceOpen)?.pos; + args.push(self.parse_array(pos)?); } _ => { return Err(self.make_error(TokenKind::BraceClose, next)); @@ -295,22 +310,28 @@ impl Parser { } self.match_token(TokenKind::BraceClose)?; - Ok(Expression::FunctionCall { - expr: Box::new(expr), - args, + Ok(Expression { + pos, + kind: ExpressionKind::FunctionCall { + expr: Box::new(expr), + args, + }, }) } fn parse_return(&mut self) -> Result { - self.match_keyword(Keyword::Return)?; + let pos = self.match_keyword(Keyword::Return)?.pos; let peeked = self.peek()?; - match peeked.kind { - TokenKind::SemiColon => { - self.next()?; - Ok(Statement::Return(None)) - } - _ => Ok(Statement::Return(Some(self.parse_expression()?))), - } + Ok(Statement { + pos, + kind: StatementKind::Return(match peeked.kind { + TokenKind::SemiColon => { + self.next()?; + None + } + _ => Some(self.parse_expression()?), + }), + }) } fn parse_expression(&mut self) -> Result { @@ -325,12 +346,15 @@ impl Parser { expr } // true | false - TokenKind::Keyword(Keyword::Boolean) => Expression::Bool( - token - .raw - .parse::() - .map_err(|e| Error::new(token.pos, e.to_string()))?, - ), + TokenKind::Keyword(Keyword::Boolean) => Expression { + pos: token.pos, + kind: ExpressionKind::Bool( + token + .raw + .parse::() + .map_err(|e| Error::new(token.pos, e.to_string()))?, + ), + }, // 5 TokenKind::Literal(Value::Int) => { // Ignore spacing character (E.g. 1_000_000) @@ -348,16 +372,28 @@ impl Parser { c => c.parse::(), } .map_err(|e| Error::new(token.pos, e.to_string()))?; - Expression::Int(val) + Expression { + pos: token.pos, + kind: ExpressionKind::Int(val), + } } // "A string" - TokenKind::Literal(Value::Str(string)) => Expression::Str(string), + TokenKind::Literal(Value::Str(string)) => Expression { + pos: token.pos, + kind: ExpressionKind::Str(string), + }, // self - TokenKind::Keyword(Keyword::Selff) => Expression::Selff, + TokenKind::Keyword(Keyword::Selff) => Expression { + pos: token.pos, + kind: ExpressionKind::Selff, + }, // name - TokenKind::Identifier(val) => Expression::Variable(val), + TokenKind::Identifier(val) => Expression { + pos: token.pos, + kind: ExpressionKind::Variable(val), + }, // [1, 2, 3] - TokenKind::SquareBraceOpen => self.parse_array()?, + TokenKind::SquareBraceOpen => self.parse_array(token.pos)?, // new Foo {} TokenKind::Keyword(Keyword::New) => self.parse_struct_initialization()?, other => { @@ -390,24 +426,27 @@ impl Parser { } fn parse_field_access(&mut self, lhs: Expression) -> Result { - self.match_token(TokenKind::Dot)?; + let pos = self.match_token(TokenKind::Dot)?.pos; - let field = self.match_identifier()?; - let expr = Expression::FieldAccess { + let (field, _) = self.match_identifier()?; + let expr = ExpressionKind::FieldAccess { expr: Box::new(lhs), field, }; - Ok(expr) + Ok(Expression { pos, kind: expr }) } /// TODO: Cleanup fn parse_struct_initialization(&mut self) -> Result { - let name = self.match_identifier()?; + let (name, pos) = self.match_identifier()?; self.match_token(TokenKind::CurlyBracesOpen)?; let fields = self.parse_struct_fields()?; self.match_token(TokenKind::CurlyBracesClose)?; - Ok(Expression::StructInitialization { name, fields }) + Ok(Expression { + pos, + kind: ExpressionKind::StructInitialization { name, fields }, + }) } fn parse_struct_fields(&mut self) -> Result>> { @@ -442,20 +481,12 @@ impl Parser { )) } - fn parse_array(&mut self) -> Result { + fn parse_array(&mut self, pos: Position) -> Result { let mut elements = Vec::new(); loop { let next = self.peek()?; match next.kind { TokenKind::SquareBraceClose => {} - TokenKind::Literal(Value::Int) => { - let value = self - .next()? - .raw - .parse::() - .map_err(|e| Error::new(next.pos, e.to_string()))?; - elements.push(Expression::Int(value)); - } _ => { let expr = self.parse_expression()?; elements.push(expr); @@ -469,45 +500,60 @@ impl Parser { self.match_token(TokenKind::SquareBraceClose)?; - Ok(Expression::Array(elements)) + Ok(Expression { + pos, + kind: ExpressionKind::Array(elements), + }) } fn parse_array_access(&mut self, expr: Expression) -> Result { - self.match_token(TokenKind::SquareBraceOpen)?; + let pos = self.match_token(TokenKind::SquareBraceOpen)?.pos; let index = self.parse_expression()?; self.match_token(TokenKind::SquareBraceClose)?; - Ok(Expression::ArrayAccess { - expr: Box::new(expr), - index: Box::new(index), + Ok(Expression { + pos, + kind: ExpressionKind::ArrayAccess { + expr: Box::new(expr), + index: Box::new(index), + }, }) } fn parse_while_loop(&mut self) -> Result { - self.match_keyword(Keyword::While)?; + let pos = self.match_keyword(Keyword::While)?.pos; let condition = self.parse_expression()?; let body = self.parse_block()?; - Ok(Statement::While { - condition, - body: Box::new(body), + Ok(Statement { + pos, + kind: StatementKind::While { + condition, + body: Box::new(body), + }, }) } fn parse_break(&mut self) -> Result { - self.match_keyword(Keyword::Break)?; - Ok(Statement::Break) + let pos = self.match_keyword(Keyword::Break)?.pos; + Ok(Statement { + pos, + kind: StatementKind::Break, + }) } fn parse_continue(&mut self) -> Result { - self.match_keyword(Keyword::Continue)?; - Ok(Statement::Continue) + let pos = self.match_keyword(Keyword::Continue)?.pos; + Ok(Statement { + pos, + kind: StatementKind::Continue, + }) } fn parse_for_loop(&mut self) -> Result { - self.match_keyword(Keyword::For)?; + let pos = self.match_keyword(Keyword::For)?.pos; - let ident = self.match_identifier()?; + let (ident, ident_pos) = self.match_identifier()?; let ident_ty = match self.peek()?.kind { TokenKind::Colon => Some(self.parse_type()?), _ => None, @@ -517,18 +563,22 @@ impl Parser { let body = self.parse_block()?; - Ok(Statement::For { - ident: Variable { - name: ident, - ty: ident_ty, + Ok(Statement { + pos, + kind: StatementKind::For { + ident: Variable { + pos: ident_pos, + name: ident, + ty: ident_ty, + }, + expr, + body: Box::new(body), }, - expr, - body: Box::new(body), }) } fn parse_match_statement(&mut self) -> Result { - self.match_keyword(Keyword::Match)?; + let pos = self.match_keyword(Keyword::Match)?.pos; let subject = self.parse_expression()?; self.match_token(TokenKind::CurlyBracesOpen)?; let mut arms: Vec = Vec::new(); @@ -556,7 +606,10 @@ impl Parser { } } self.match_token(TokenKind::CurlyBracesClose)?; - Ok(Statement::Match { subject, arms }) + Ok(Statement { + pos, + kind: StatementKind::Match { subject, arms }, + }) } fn parse_match_arm(&mut self) -> Result { @@ -579,7 +632,7 @@ impl Parser { } fn parse_conditional_statement(&mut self) -> Result { - self.match_keyword(Keyword::If)?; + let pos = self.match_keyword(Keyword::If)?.pos; let condition = self.parse_expression()?; let body = self.parse_block()?; @@ -599,16 +652,22 @@ impl Parser { Some(branch) => branch, None => self.parse_conditional_statement()?, }; - Ok(Statement::If { - condition, - body: Box::new(body), - else_branch: Some(Box::new(else_branch)), + Ok(Statement { + pos, + kind: StatementKind::If { + condition, + body: Box::new(body), + else_branch: Some(Box::new(else_branch)), + }, }) } - _ => Ok(Statement::If { - condition, - body: Box::new(body), - else_branch: None, + _ => Ok(Statement { + pos, + kind: StatementKind::If { + condition, + body: Box::new(body), + else_branch: None, + }, }), } } @@ -620,18 +679,21 @@ impl Parser { /// ``` /// In this case, the function call has already been evaluated, and needs to be passed to this function. fn parse_bin_op(&mut self, lhs: Expression) -> Result { - let op = self.match_operator()?; - - Ok(Expression::BinOp { - lhs: Box::from(lhs), - op, - rhs: Box::from(self.parse_expression()?), + let (op, pos) = self.match_operator()?; + + Ok(Expression { + pos, + kind: ExpressionKind::BinOp { + lhs: Box::from(lhs), + op, + rhs: Box::from(self.parse_expression()?), + }, }) } fn parse_declare(&mut self) -> Result { - self.match_keyword(Keyword::Let)?; - let name = self.match_identifier()?; + let pos = self.match_keyword(Keyword::Let)?.pos; + let (name, name_pos) = self.match_identifier()?; let token = self.peek()?; let ty = match &token.kind { TokenKind::Colon => Some(self.parse_type()?), @@ -648,27 +710,46 @@ impl Parser { TokenKind::Assign => { self.match_token(TokenKind::Assign)?; let expr = self.parse_expression()?; - Ok(Statement::Declare { - variable: Variable { name, ty }, - value: Some(expr), + Ok(Statement { + pos, + kind: StatementKind::Declare { + variable: Variable { + pos: name_pos, + name, + ty, + }, + value: Some(expr), + }, }) } - _ => Ok(Statement::Declare { - variable: Variable { name, ty }, - value: None, + _ => Ok(Statement { + pos, + kind: StatementKind::Declare { + variable: Variable { + pos: name_pos, + name, + ty, + }, + value: None, + }, }), } } fn parse_assignment(&mut self, lhs: Expression) -> Result { - let op = AssignOp::try_from(self.next()?.kind).unwrap(); + let token = self.next()?; + let pos = token.pos; + let op = AssignOp::try_from(token.kind).unwrap(); let expr = self.parse_expression()?; - Ok(Statement::Assign { - lhs: Box::new(lhs), - op, - rhs: Box::new(expr), + Ok(Statement { + pos, + kind: StatementKind::Assign { + lhs: Box::new(lhs), + op, + rhs: Box::new(expr), + }, }) } } diff --git a/src/parser/tests.rs b/src/parser/tests.rs index 3d760dca..a1b8efc3 100644 --- a/src/parser/tests.rs +++ b/src/parser/tests.rs @@ -13,7 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -use crate::ast::{types::Type, Module}; +use crate::ast::{ + types::{Type, TypeKind}, + Module, +}; use crate::lexer::{tokenize, FileTable}; use crate::parser::{parse, Result}; @@ -627,7 +630,13 @@ fn test_function_with_return_type() { "; let tree = test_parse(raw.to_owned()); assert!(tree.is_ok()); - assert_eq!(tree.unwrap().func[0].callable.ret_type, Some(Type::Int)); + assert!(matches!( + tree.unwrap().func[0].callable.ret_type, + Some(Type { + kind: TypeKind::Int, + .. + }) + )); } #[test] From 5597474054ae264f0f1d904ed69fff62f404e521 Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Tue, 26 Mar 2024 15:43:49 +0300 Subject: [PATCH 12/13] Run tests using create::command::run instead of spawning cargo This significantly speeds up tests --- src/builder/mod.rs | 26 ----------------- src/tests/test_examples.rs | 57 ++++++-------------------------------- 2 files changed, 9 insertions(+), 74 deletions(-) diff --git a/src/builder/mod.rs b/src/builder/mod.rs index aa2629d2..4b98b3a7 100644 --- a/src/builder/mod.rs +++ b/src/builder/mod.rs @@ -19,7 +19,6 @@ use crate::lexer; use crate::parser; use crate::Lib; use crate::PathBuf; -use std::env; use std::fs::File; use std::io::Read; use std::io::Write; @@ -39,36 +38,11 @@ impl Builder { } } - fn get_base_path(&self) -> Result { - Ok(self - .in_file - .parent() - .ok_or("File does not have a parent")? - .to_path_buf()) - } - pub fn build(&mut self, target: &Target) -> Result<(), String> { - let in_file = self.in_file.clone(); - // Resolve path deltas between working directory and entrypoint - let base_directory = self.get_base_path()?; - - // During building, we change the environment directory. - // After we're done, we have to set it back to the initial directory. - let initial_directory = env::current_dir().expect("Current directory does not exist"); - if let Ok(resolved_delta) = in_file.strip_prefix(&base_directory) { - // TODO: This error could probably be handled better - let _ = env::set_current_dir(base_directory); - self.in_file = resolved_delta.to_path_buf(); - } self.build_module(self.in_file.clone(), &mut Vec::new())?; - - // Append standard library if matches!(target, Target::JS) { self.build_stdlib()?; } - - // Change back to the initial directory - env::set_current_dir(initial_directory).expect("Could not set current directory"); Ok(()) } diff --git a/src/tests/test_examples.rs b/src/tests/test_examples.rs index 8ac34e3e..1906a57d 100644 --- a/src/tests/test_examples.rs +++ b/src/tests/test_examples.rs @@ -1,4 +1,3 @@ -use std::fs; /** * Copyright 2020 Garrit Franke * @@ -14,76 +13,38 @@ use std::fs; * See the License for the specific language governing permissions and * limitations under the License. */ -use std::io::Error; -use std::process::Command; +use crate::command::run; +use crate::generator::Target; -fn test_directory(dir_in: &str) -> Result<(), Error> { - let dir_out = format!("{}_out", dir_in); +fn test_directory(dir_in: &str) -> Result<(), String> { let dir = std::env::current_dir().unwrap(); - let examples = std::fs::read_dir(dir.join(dir_in))?; - - let _ = fs::create_dir(&dir_out); - - let out_file_suffix = ".js"; + let examples = std::fs::read_dir(dir.join(dir_in)).map_err(|err| err.to_string())?; for ex in examples { - let example = ex?; + let example = ex.map_err(|err| err.to_string())?; let in_file = dir.join(dir_in).join(example.file_name()); // We don't want to build submodules, since they don't run without a main function if in_file.is_dir() { continue; } - let out_file = dir.join(&dir_out).join( - example - .file_name() - .into_string() - .unwrap() - .replace(".sb", out_file_suffix), - ); - let success = Command::new("cargo") - .arg("run") - .arg("build") - .arg(&in_file) - .arg("-o") - .arg(&out_file) - .spawn()? - .wait()? - .success(); - assert!(success, "{:?}", &in_file); - let node_installed = Command::new("node").arg("-v").spawn()?.wait()?.success(); - if node_installed { - let execution = Command::new("node") - .arg(out_file) - .spawn()? - .wait()? - .success(); - assert!(execution, "{:?}", &in_file) - } + run::run(Target::JS, in_file)?; } Ok(()) } #[test] -fn test_examples() -> Result<(), Error> { +fn test_examples() -> Result<(), String> { test_directory("examples")?; Ok(()) } #[test] -fn test_testcases() -> Result<(), Error> { +fn test_testcases() -> Result<(), String> { let dir = std::env::current_dir().unwrap(); let in_file = dir.join("tests/main.sb"); - let success = Command::new("cargo") - .arg("run") - .arg("run") - .arg(&in_file) - .spawn()? - .wait()? - .success(); - assert!(success, "{:?}", &in_file); - Ok(()) + run::run(Target::JS, in_file) } From d20784e2bea5de8ffef53f7afca469ed130420f9 Mon Sep 17 00:00:00 2001 From: Alexey Yerin Date: Sat, 23 Mar 2024 15:44:34 +0300 Subject: [PATCH 13/13] WIP: Type checker --- src/check/mod.rs | 1049 ++++++++++++++++++++++++++++++++++++++ src/check/scope.rs | 166 ++++++ src/check/types.rs | 298 +++++++++++ src/command/mod.rs | 1 + src/command/typecheck.rs | 23 + src/main.rs | 4 + 6 files changed, 1541 insertions(+) create mode 100644 src/check/mod.rs create mode 100644 src/check/scope.rs create mode 100644 src/check/types.rs create mode 100644 src/command/typecheck.rs diff --git a/src/check/mod.rs b/src/check/mod.rs new file mode 100644 index 00000000..788fc545 --- /dev/null +++ b/src/check/mod.rs @@ -0,0 +1,1049 @@ +use crate::ast::{self, AssignOp, BinOp}; +use crate::lexer::{self, Position}; +use std::collections::HashMap; + +pub mod scope; +pub mod types; + +pub type Error = lexer::Error; + +pub type Result = lexer::Result; + +#[derive(Debug)] +pub struct Module { + pub type_table: types::Table, + pub scope_table: scope::Table, + pub functions: Vec, + pub structs: Vec, +} + +struct Context { + type_table: types::Table, + scope_table: scope::Table, +} + +impl Module { + pub fn from_ast(module: ast::Module) -> Result { + let type_table = types::Table::new(); + let scope_table = scope::Table::new(); + + let mut ctx = Context { + type_table, + scope_table, + }; + let scope = ctx.scope_table.add_root(); + + // TODO: dependency resolution (!) + let structs = module + .structs + .into_iter() + .map(|def| Struct::from_ast(&mut ctx, scope, def)) + .collect::>>()?; + + for func in &module.func { + scope.insert( + func.callable.name.clone(), + ctx.type_table.insert_ast_callable(&func.callable)?, + &mut ctx.scope_table, + ); + } + + let functions: Vec = module + .func + .into_iter() + .map(|func| Function::from_ast(&mut ctx, scope, func)) + .collect::>>()?; + + // TODO: globals + + Ok(Self { + type_table: ctx.type_table, + scope_table: ctx.scope_table, + functions, + structs, + }) + } +} + +#[derive(Debug)] +pub struct Struct { + pub ty: types::Id, + pub methods: Vec, +} + +impl Struct { + fn from_ast(ctx: &mut Context, scope: scope::Id, def: ast::StructDef) -> Result { + let ty = ctx.type_table.insert_ast_struct(&def)?; + let methods = def + .methods + .into_iter() + .map(|method| Method::from_ast(ctx, scope, ty, method)) + .collect::>>()?; + + Ok(Struct { ty, methods }) + } +} + +#[derive(Debug)] +pub struct Callable { + pub pos: Position, + pub scope: scope::Id, + pub name: String, + pub parameters: Vec, + pub return_type: types::Id, +} + +#[derive(Debug)] +pub struct Function { + pub callable: Callable, + pub body: Option, +} + +impl Function { + fn from_ast(ctx: &mut Context, scope: scope::Id, func: ast::Function) -> Result { + let callable = func.callable; + let return_type = match callable.ret_type { + Some(ty) => ctx.type_table.insert_ast_type(&ty)?, + None => ctx.type_table.void, + }; + let scope = scope.push_function(return_type, &mut ctx.scope_table); + let parameters = callable + .arguments + .into_iter() + .map(|param| { + let ty = ctx.type_table.insert_ast_type(¶m.ty)?; + Ok(scope.insert(param.name, ty, &mut ctx.scope_table)) + }) + .collect::>>()?; + let body = func + .body + .map(|body| Statement::from_ast(ctx, scope, body)) + .transpose()?; + + Ok(Self { + callable: Callable { + pos: callable.pos, + scope, + name: callable.name, + parameters, + return_type, + }, + body, + }) + } +} + +#[derive(Debug)] +pub struct Method { + pub callable: Callable, + pub self_parameter: scope::VariableId, + pub body: Statement, +} + +impl Method { + fn from_ast( + ctx: &mut Context, + scope: scope::Id, + struct_type: types::Id, + method: ast::Method, + ) -> Result { + let callable = method.callable; + let return_type = match callable.ret_type { + Some(ty) => ctx.type_table.insert_ast_type(&ty)?, + None => ctx.type_table.void, + }; + let scope = scope.push_function(return_type, &mut ctx.scope_table); + let self_parameter = scope.insert("self".into(), struct_type, &mut ctx.scope_table); + let parameters = callable + .arguments + .into_iter() + .map(|param| { + let ty = ctx.type_table.insert_ast_type(¶m.ty)?; + Ok(scope.insert(param.name, ty, &mut ctx.scope_table)) + }) + .collect::>>()?; + + Ok(Self { + callable: Callable { + pos: callable.pos, + scope, + name: callable.name, + parameters, + return_type, + }, + self_parameter, + body: Statement::from_ast(ctx, scope, method.body)?, + }) + } +} + +#[derive(Debug)] +pub struct Statement { + pub pos: Position, + pub kind: StatementKind, +} + +#[derive(Debug)] +pub enum StatementKind { + Block { + statements: Vec, + scope: scope::Id, + }, + Declare { + variable: scope::VariableId, + value: Option, + }, + Assign { + lhs: Expression, + rhs: Expression, + }, + Return(Option), + If { + condition: Expression, + body: Box, + else_branch: Option>, + }, + Match { + subject: Expression, + arms: Vec, + else_branch: Option>, + }, + While { + scope: scope::Id, + condition: Expression, + body: Box, + }, + For { + scope: scope::Id, + variable: scope::VariableId, + expr: Expression, + body: Box, + }, + Break(scope::Id), + Continue(scope::Id), + Exp(Expression), +} + +#[derive(Debug)] +pub struct MatchArm { + pub condition: Expression, + pub body: Statement, +} + +impl Statement { + fn from_ast(ctx: &mut Context, scope: scope::Id, stmt: ast::Statement) -> Result { + let kind = match stmt.kind { + ast::StatementKind::Block { statements, .. } => { + Self::block_from_ast(ctx, scope, statements) + } + ast::StatementKind::Declare { variable, value } => { + Self::declare_from_ast(ctx, scope, variable, value) + } + ast::StatementKind::Assign { lhs, op, rhs } => { + Self::assign_from_ast(ctx, scope, stmt.pos, *lhs, op, *rhs) + } + ast::StatementKind::Return(value) => Self::return_from_ast(ctx, scope, stmt.pos, value), + ast::StatementKind::If { + condition, + body, + else_branch, + } => Self::if_from_ast(ctx, scope, condition, *body, else_branch), + ast::StatementKind::Match { subject, arms } => { + Self::match_from_ast(ctx, scope, stmt.pos, subject, arms) + } + ast::StatementKind::While { condition, body } => { + Self::while_from_ast(ctx, scope, stmt.pos, condition, *body) + } + ast::StatementKind::For { ident, expr, body } => { + Self::for_from_ast(ctx, scope, ident, expr, *body) + } + ast::StatementKind::Break => Self::break_from_ast(ctx, scope, stmt.pos), + ast::StatementKind::Continue => Self::continue_from_ast(ctx, scope, stmt.pos), + ast::StatementKind::Exp(expr) => { + let expr = Expression::from_ast(ctx, scope, expr)?; + Ok(StatementKind::Exp(expr)) + } + }?; + Ok(Statement { + pos: stmt.pos, + kind, + }) + } + + fn block_from_ast( + ctx: &mut Context, + scope: scope::Id, + statements: Vec, + ) -> Result { + let scope = scope.push(&mut ctx.scope_table); + let statements = statements + .into_iter() + .map(|stmt| Statement::from_ast(ctx, scope, stmt)) + .collect::>>()?; + + Ok(StatementKind::Block { scope, statements }) + } + + fn declare_from_ast( + ctx: &mut Context, + scope: scope::Id, + variable: ast::Variable, + value: Option, + ) -> Result { + let value = match value { + Some(value) => Some(Expression::from_ast(ctx, scope, value)?), + None => None, + }; + let ty = match variable.ty { + Some(ty) => ctx.type_table.insert_ast_type(&ty)?, + // The parser should ensure that an expression is available for + // declarations without an explicit type + None => value.as_ref().unwrap().result, + }; + + if let Some(ref value) = value { + if !ctx.type_table.assignable(ty, value.result) { + return Err(Error::new( + value.pos, + "Initializer is not assignable to variable type".to_owned(), + )); + } + } + + let variable = scope.insert(variable.name, ty, &mut ctx.scope_table); + + Ok(StatementKind::Declare { variable, value }) + } + + fn assign_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + lhs: ast::Expression, + op: AssignOp, + rhs: ast::Expression, + ) -> Result { + let lhs = Expression::from_ast(ctx, scope, lhs)?; + let rhs = Expression::from_ast(ctx, scope, rhs)?; + match lhs.kind { + ExpressionKind::Variable(..) + | ExpressionKind::FieldAccess { .. } + | ExpressionKind::ArrayAccess { .. } => {} + _ => { + return Err(Error::new( + lhs.pos, + "Left side of assignment must be a variable, field access or array access" + .to_owned(), + )); + } + } + if !ctx.type_table.assignable(lhs.result, rhs.result) { + return Err(Error::new( + rhs.pos, + "Value is not assignable to variable or field type".to_owned(), + )); + } + match op { + AssignOp::Set => {} + AssignOp::Add => { + if lhs.result == ctx.type_table.int { + // Addition variant + if rhs.result != ctx.type_table.int { + return Err(Error::new( + pos, + "Could not add and int".to_owned(), + )); + } + } else if lhs.result == ctx.type_table.string { + // Concatenation variant + if !(rhs.result == ctx.type_table.int + || rhs.result == ctx.type_table.boolean + || rhs.result == ctx.type_table.string) + { + return Err(Error::new( + pos, + "Could not concatenate 'string' and ".to_owned(), + )); + } + } else { + return Err(Error::new( + pos, + format!("Could not apply {op:?} to those types"), + )); + } + } + AssignOp::Subtract | AssignOp::Multiply | AssignOp::Divide => { + if rhs.result != ctx.type_table.int { + return Err(Error::new( + pos, + format!("Right side of {op:?} must be an int"), + )); + } + } + } + + Ok(StatementKind::Assign { lhs, rhs }) + } + + fn return_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + value: Option, + ) -> Result { + let value = match value { + Some(value) => Some(Expression::from_ast(ctx, scope, value)?), + None => None, + }; + + let return_type = scope + .return_type(&ctx.scope_table) + .ok_or_else(|| Error::new(pos, "'return' used outside of a function?".to_owned()))?; + + match value { + Some(ref value) => { + if !ctx.type_table.assignable(return_type, value.result) { + return Err(Error::new( + value.pos, + "Value is not assignable to function return type".to_owned(), + )); + } + } + None => { + if return_type != ctx.type_table.void { + return Err(Error::new(pos, "Must return a value".to_owned())); + } + } + } + + Ok(StatementKind::Return(value)) + } + + fn if_from_ast( + ctx: &mut Context, + scope: scope::Id, + condition: ast::Expression, + body: ast::Statement, + else_branch: Option>, + ) -> Result { + let condition = Expression::from_ast(ctx, scope, condition)?; + if condition.result != ctx.type_table.boolean { + return Err(Error::new( + condition.pos, + "Condition must be a 'bool'".to_owned(), + )); + } + let body = Box::new(Statement::from_ast(ctx, scope, body)?); + let else_branch = match else_branch { + Some(stmt) => Some(Box::new(Statement::from_ast(ctx, scope, *stmt)?)), + None => None, + }; + + Ok(StatementKind::If { + condition, + body, + else_branch, + }) + } + + fn match_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + subject: ast::Expression, + ast_arms: Vec, + ) -> Result { + let subject = Expression::from_ast(ctx, scope, subject)?; + let mut else_branch: Option> = None; + let mut arms: Vec = Vec::new(); + for arm in ast_arms { + match arm { + ast::MatchArm::Case(condition, body) => { + let condition = Expression::from_ast(ctx, scope, condition)?; + if subject.result != condition.result { + // TODO: location for match arms + return Err(Error::new( + pos, + "Condition does not match subject type".to_owned(), + )); + } + let body = Statement::from_ast(ctx, scope, body)?; + arms.push(MatchArm { condition, body }); + } + ast::MatchArm::Else(body) => { + assert!(else_branch.is_none()); // Enforced by the parser + let body = Box::new(Statement::from_ast(ctx, scope, body)?); + else_branch = Some(body); + } + } + } + + Ok(StatementKind::Match { + subject, + arms, + else_branch, + }) + } + + fn while_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + condition: ast::Expression, + body: ast::Statement, + ) -> Result { + let condition = Expression::from_ast(ctx, scope, condition)?; + if condition.result != ctx.type_table.boolean { + return Err(Error::new(pos, "Condition must be a 'bool'".to_owned())); + } + let scope = scope.push_loop(&mut ctx.scope_table); + let body = Box::new(Statement::from_ast(ctx, scope, body)?); + + Ok(StatementKind::While { + scope, + condition, + body, + }) + } + + fn for_from_ast( + ctx: &mut Context, + scope: scope::Id, + variable: ast::Variable, + expr: ast::Expression, + body: ast::Statement, + ) -> Result { + let expr = Expression::from_ast(ctx, scope, expr)?; + let scope = scope.push_loop(&mut ctx.scope_table); + let variable = if let types::Repr::Array { member_type, .. } = + expr.result.dealiased_repr(&ctx.type_table) + { + scope.insert(variable.name, *member_type, &mut ctx.scope_table) + } else { + return Err(Error::new( + expr.pos, + "'for' must iterate over an array".to_owned(), + )); + }; + let body = Box::new(Statement::from_ast(ctx, scope, body)?); + + Ok(StatementKind::For { + scope, + variable, + expr, + body, + }) + } + + fn break_from_ast(ctx: &mut Context, scope: scope::Id, pos: Position) -> Result { + match scope.nearest_loop(&ctx.scope_table) { + Some(loop_id) => Ok(StatementKind::Break(loop_id)), + None => Err(Error::new( + pos, + "'break' must be used within a loop".to_owned(), + )), + } + } + + fn continue_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + ) -> Result { + match scope.nearest_loop(&ctx.scope_table) { + Some(loop_id) => Ok(StatementKind::Continue(loop_id)), + None => Err(Error::new( + pos, + "'continue' must be used within a loop".to_owned(), + )), + } + } +} + +#[derive(Debug)] +pub struct Expression { + pub pos: Position, + pub kind: ExpressionKind, + pub result: types::Id, +} + +#[derive(Debug)] +pub enum ExpressionKind { + Int(usize), + Str(String), + Bool(bool), + Array(Vec), + Variable(scope::VariableId), + FunctionCall { + fn_name: String, + args: Vec, + }, + MethodCall { + method: String, // TODO: some kind of method identifier? + instance: Box, + args: Vec, + }, + BinOp { + lhs: Box, + op: BinOp, + rhs: Box, + }, + ArrayAccess { + target: Box, + index: Box, + }, + FieldAccess { + target: Box, + field: String, // TODO: some kind of field identifier? + }, + StructInitialization { + ty: types::Id, + fields: HashMap, + }, +} + +impl Expression { + fn from_ast(ctx: &mut Context, scope: scope::Id, aexpr: ast::Expression) -> Result { + match aexpr.kind { + ast::ExpressionKind::Int(value) => Ok(Expression { + pos: aexpr.pos, + kind: ExpressionKind::Int(value), + result: ctx.type_table.int, + }), + ast::ExpressionKind::Str(value) => Ok(Expression { + pos: aexpr.pos, + kind: ExpressionKind::Str(value), + result: ctx.type_table.string, + }), + ast::ExpressionKind::Bool(value) => Ok(Expression { + pos: aexpr.pos, + kind: ExpressionKind::Bool(value), + result: ctx.type_table.boolean, + }), + ast::ExpressionKind::Array(elements) => { + Self::array_from_ast(ctx, scope, aexpr.pos, elements) + } + ast::ExpressionKind::Variable(name) => { + Self::variable_from_ast(ctx, scope, aexpr.pos, name) + } + ast::ExpressionKind::Selff => Self::self_from_ast(ctx, scope, aexpr.pos), + ast::ExpressionKind::FunctionCall { expr, args } => { + Self::call_from_ast(ctx, scope, aexpr.pos, *expr, args) + } + ast::ExpressionKind::BinOp { lhs, op, rhs } => { + Self::binop_from_ast(ctx, scope, aexpr.pos, *lhs, op, *rhs) + } + ast::ExpressionKind::ArrayAccess { expr, index } => { + Self::array_access_from_ast(ctx, scope, aexpr.pos, *expr, *index) + } + ast::ExpressionKind::FieldAccess { expr, field } => { + Self::field_access_from_ast(ctx, scope, aexpr.pos, *expr, field) + } + ast::ExpressionKind::StructInitialization { name, fields } => { + Self::struct_init_from_ast(ctx, scope, aexpr.pos, name, fields) + } + } + } + + fn variable_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + name: String, + ) -> Result { + let (id, ty) = scope + .lookup(&name, &ctx.scope_table) + .ok_or_else(|| Error::new(pos, format!("Undefined variable '{name}'")))?; + + Ok(Expression { + pos, + kind: ExpressionKind::Variable(id), + result: ty, + }) + } + + fn self_from_ast(ctx: &mut Context, scope: scope::Id, pos: Position) -> Result { + let (id, ty) = scope.lookup("self", &ctx.scope_table).ok_or_else(|| { + Error::new(pos, "'self' is not allowed outside of a method".to_owned()) + })?; + + Ok(Expression { + pos, + kind: ExpressionKind::Variable(id), + result: ty, + }) + } + + fn call_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + target: ast::Expression, + args: Vec, + ) -> Result { + match target.kind { + ast::ExpressionKind::Variable(name) => { + Self::function_call_from_ast(ctx, scope, pos, name, args) + } + ast::ExpressionKind::FieldAccess { expr, field } => { + let instance = Expression::from_ast(ctx, scope, *expr)?; + Self::method_call_from_ast(ctx, scope, pos, instance, field, args) + } + // TODO: better error message + _ => Err(Error::new(pos, "Invalid call".to_owned())), + } + } + + fn function_call_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + fn_name: String, + args: Vec, + ) -> Result { + let (_, fn_type) = scope + .lookup(&fn_name, &ctx.scope_table) + .ok_or_else(|| Error::new(pos, format!("Undefined function '{fn_name}'")))?; + let (args, return_type) = Self::check_call(ctx, scope, pos, fn_type, args)?; + + Ok(Expression { + pos, + kind: ExpressionKind::FunctionCall { fn_name, args }, + result: return_type, + }) + } + + fn method_call_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + instance: Expression, + method: String, + args: Vec, + ) -> Result { + let ty = instance.result.dealiased_repr(&ctx.type_table); + + let methods = if let types::Repr::Struct { methods, .. } = ty { + methods + } else { + return Err(Error::new( + pos, + "Cannot call a method of a non-struct type".to_owned(), + )); + }; + + let fn_type = methods + .get(&method) + .ok_or_else(|| Error::new(pos, format!("No such method '{method}'")))?; + + let (args, return_type) = Self::check_call(ctx, scope, pos, *fn_type, args)?; + + Ok(Expression { + pos, + kind: ExpressionKind::MethodCall { + instance: Box::new(instance), + method, + args, + }, + result: return_type, + }) + } + + fn check_call( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + fn_type: types::Id, + args: Vec, + ) -> Result<(Vec, types::Id)> { + let ty = fn_type.repr(&ctx.type_table); + + let (parameters, return_type) = if let types::Repr::Function { + parameters, + return_type, + } = ty + { + // FIXME: this clone should not be necessary + (parameters.clone(), *return_type) + } else { + return Err(Error::new(pos, "Cannot call non-function type".to_owned())); + }; + + if args.len() < parameters.len() { + return Err(Error::new( + pos, + format!( + "Not enough arguments: expected {}, got {}", + parameters.len(), + args.len() + ), + )); + } + if args.len() > parameters.len() { + return Err(Error::new( + pos, + format!( + "Too many arguments: expected {}, got {}", + parameters.len(), + args.len() + ), + )); + } + + let args = std::iter::zip(parameters, args) + .map(|(param_type, arg)| { + let arg = Expression::from_ast(ctx, scope, arg)?; + if !ctx.type_table.assignable(param_type, arg.result) { + return Err(Error::new( + arg.pos, + "Argument is not assignable to parameter type".to_owned(), + )); + } + Ok(arg) + }) + .collect::>>()?; + + Ok((args, return_type)) + } + + fn array_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + elements: Vec, + ) -> Result { + let mut member_type: Option = None; + let elements = elements + .into_iter() + .map(|elem| { + let elem = Expression::from_ast(ctx, scope, elem)?; + if let Some(previous_type) = member_type { + if elem.result != previous_type { + return Err(Error::new( + elem.pos, + "Array elements must have a uniform type".to_owned(), + )); + } + } else { + member_type = Some(elem.result); + } + Ok(elem) + }) + .collect::>>()?; + let member_type = + member_type.ok_or_else(|| Error::new(pos, "TODO: empty arrays".to_owned()))?; + let array_type = ctx + .type_table + .insert_array(member_type, elements.len()) + .unwrap(); + + Ok(Expression { + pos, + kind: ExpressionKind::Array(elements), + result: array_type, + }) + } + + fn binop_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + lhs: ast::Expression, + op: BinOp, + rhs: ast::Expression, + ) -> Result { + let lhs = Expression::from_ast(ctx, scope, lhs)?; + let rhs = Expression::from_ast(ctx, scope, rhs)?; + let result = match op { + BinOp::Equal | BinOp::NotEqual => { + if lhs.result != rhs.result { + return Err(Error::new( + pos, + "Cannot compare values of different types".to_owned(), + )); + } + ctx.type_table.boolean + } + BinOp::And | BinOp::Or => { + if lhs.result != ctx.type_table.boolean { + return Err(Error::new( + pos, + format!("Left side of {op:?} must be a bool"), + )); + } + if rhs.result != ctx.type_table.boolean { + return Err(Error::new( + pos, + format!("Right side of {op:?} must be a bool"), + )); + } + ctx.type_table.boolean + } + BinOp::Addition => { + if lhs.result == ctx.type_table.int { + // Addition variant + if rhs.result != ctx.type_table.int { + return Err(Error::new( + pos, + "Could not add and int".to_owned(), + )); + } + ctx.type_table.int + } else if lhs.result == ctx.type_table.string { + // Concatenation variant + if !(rhs.result == ctx.type_table.int + || rhs.result == ctx.type_table.boolean + || rhs.result == ctx.type_table.string) + { + return Err(Error::new( + pos, + "Could not concatenate 'string' and ".to_owned(), + )); + } + ctx.type_table.string + } else { + return Err(Error::new( + pos, + format!("Could not apply {op:?} to those types"), + )); + } + } + // Comparisons + _ => { + if lhs.result != ctx.type_table.int { + return Err(Error::new( + pos, + format!("Left side of {op:?} must be an int"), + )); + } + if rhs.result != ctx.type_table.int { + return Err(Error::new( + pos, + format!("Right side of {op:?} must be an int"), + )); + } + ctx.type_table.int + } + }; + Ok(Expression { + pos, + kind: ExpressionKind::BinOp { + lhs: Box::new(lhs), + op, + rhs: Box::new(rhs), + }, + result, + }) + } + + fn array_access_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + target: ast::Expression, + index: ast::Expression, + ) -> Result { + let target = Box::new(Expression::from_ast(ctx, scope, target)?); + let index = Box::new(Expression::from_ast(ctx, scope, index)?); + let member_type = if let types::Repr::Array { member_type, .. } = + target.result.dealiased_repr(&ctx.type_table) + { + *member_type + } else { + return Err(Error::new( + pos, + "Cannot index a value of non-array type".to_owned(), + )); + }; + if index.result != ctx.type_table.int { + return Err(Error::new(pos, "Index must be an 'int'".to_owned())); + } + + Ok(Expression { + pos, + kind: ExpressionKind::ArrayAccess { target, index }, + result: member_type, + }) + } + + fn field_access_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + target: ast::Expression, + field: String, + ) -> Result { + let target = Box::new(Expression::from_ast(ctx, scope, target)?); + let struct_repr = target.result.dealiased_repr(&ctx.type_table); + let fields = if let types::Repr::Struct { fields, .. } = &struct_repr { + fields + } else { + return Err(Error::new( + pos, + "Cannot get a field of non-struct type".to_owned(), + )); + }; + + let field_type = fields + .get(&field) + .ok_or_else(|| Error::new(pos, format!("No such field '{field}'")))? + .ty; + + Ok(Expression { + pos, + kind: ExpressionKind::FieldAccess { target, field }, + result: field_type, + }) + } + + fn struct_init_from_ast( + ctx: &mut Context, + scope: scope::Id, + pos: Position, + struct_name: String, + fields: HashMap>, + ) -> Result { + let ty = ctx + .type_table + .by_name(&struct_name) + .ok_or_else(|| Error::new(pos, format!("No such struct '{struct_name}'")))?; + let repr = ty.dealiased_repr(&ctx.type_table); + let available_fields = if let types::Repr::Struct { fields, .. } = &repr { + fields + } else { + panic!("Unexpected non-struct named type") + }; + + // Borrow checker shenanigan + let types = fields + .keys() + .map(|name| { + let types::StructField { ty: field_type, .. } = available_fields + .get(name) + .ok_or_else(|| Error::new(pos, format!("Unknown field '{name}'")))?; + Ok((name.clone(), *field_type)) + }) + .collect::>>()?; + + let fields = fields + .into_iter() + .map(|(name, value)| { + let field_type = types.get(&name).unwrap(); + let value = Expression::from_ast(ctx, scope, *value)?; + if !ctx.type_table.assignable(*field_type, value.result) { + return Err(Error::new( + value.pos, + "Initializer is not assignable to field type".to_owned(), + )); + } + Ok((name, value)) + }) + .collect::>>()?; + + Ok(Expression { + pos, + kind: ExpressionKind::StructInitialization { ty, fields }, + result: ty, + }) + } +} diff --git a/src/check/scope.rs b/src/check/scope.rs new file mode 100644 index 00000000..e8406daf --- /dev/null +++ b/src/check/scope.rs @@ -0,0 +1,166 @@ +use crate::check::types; +use std::collections::HashMap; + +#[derive(Debug, Eq, PartialEq)] +enum ScopeKind { + Root, + Block, + Loop, + Function { return_type: types::Id }, +} + +#[derive(Debug)] +struct Scope { + parent: Option, + kind: ScopeKind, + name_map: HashMap, + types: Vec, +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +struct InnerId { + index: usize, +} + +impl Scope { + fn lookup(&self, name: &str) -> Option<(InnerId, types::Id)> { + self.name_map + .get(name) + .map(|inner| (*inner, self.types[inner.index])) + } + + fn insert(&mut self, name: String, ty: types::Id) -> InnerId { + let inner = InnerId { + index: self.types.len(), + }; + self.types.push(ty); + self.name_map.insert(name, inner); + inner + } +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub struct Id { + index: usize, +} + +impl Id { + pub fn lookup(&self, name: &str, table: &Table) -> Option<(VariableId, types::Id)> { + let scope = self.get(table); + match scope.lookup(name) { + Some((inner, ty)) => Some(( + VariableId { + scope: *self, + inner, + }, + ty, + )), + None => match scope.parent { + Some(parent) => parent.lookup(name, table), + None => None, + }, + } + } + + pub fn insert(&self, name: String, ty: types::Id, table: &mut Table) -> VariableId { + let inner = table.scopes[self.index].insert(name, ty); + VariableId { + scope: *self, + inner, + } + } + + pub fn nearest_loop(&self, table: &Table) -> Option { + let scope = self.get(table); + if scope.kind == ScopeKind::Loop { + Some(*self) + } else { + match scope.parent { + Some(parent) => parent.nearest_loop(table), + None => None, + } + } + } + + pub fn return_type(&self, table: &Table) -> Option { + let scope = self.get(table); + match scope.kind { + ScopeKind::Function { return_type } => Some(return_type), + _ => match scope.parent { + Some(parent) => parent.return_type(table), + None => None, + }, + } + } + + pub fn push(&self, table: &mut Table) -> Id { + table.add_node(Scope { + parent: Some(*self), + kind: ScopeKind::Block, + name_map: HashMap::new(), + types: Vec::new(), + }) + } + + pub fn push_function(&self, return_type: types::Id, table: &mut Table) -> Id { + table.add_node(Scope { + parent: Some(*self), + kind: ScopeKind::Function { return_type }, + name_map: HashMap::new(), + types: Vec::new(), + }) + } + + pub fn push_loop(&self, table: &mut Table) -> Id { + table.add_node(Scope { + parent: Some(*self), + kind: ScopeKind::Loop, + name_map: HashMap::new(), + types: Vec::new(), + }) + } + + fn get<'a>(&self, table: &'a Table) -> &'a Scope { + &table.scopes[self.index] + } +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub struct VariableId { + scope: Id, + inner: InnerId, +} + +impl VariableId { + #[allow(dead_code)] + pub fn get_type(&self, table: &Table) -> types::Id { + let scope = self.scope.get(table); + scope.types[self.inner.index] + } +} + +#[derive(Debug)] +pub struct Table { + scopes: Vec, +} + +impl Table { + pub fn new() -> Self { + Self { scopes: Vec::new() } + } + + pub fn add_root(&mut self) -> Id { + self.add_node(Scope { + parent: None, + kind: ScopeKind::Root, + name_map: HashMap::new(), + types: Vec::new(), + }) + } + + fn add_node(&mut self, scope: Scope) -> Id { + let id = self.scopes.len(); + self.scopes.push(scope); + Id { index: id } + } +} diff --git a/src/check/types.rs b/src/check/types.rs new file mode 100644 index 00000000..fe41d961 --- /dev/null +++ b/src/check/types.rs @@ -0,0 +1,298 @@ +use super::{Error, Result as CheckResult}; +use crate::ast; +use crate::ast::types::Type as AstType; +use crate::ast::types::TypeKind as AstTypeKind; +use std::collections::HashMap; + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub struct Id { + index: usize, +} + +impl Id { + pub fn dealias(&self, table: &Table) -> Id { + match self.repr(table) { + Repr::Named { inner, .. } => inner.dealias(table), + _ => *self, + } + } + + pub fn repr<'a>(&self, table: &'a Table) -> &'a Repr { + &table.types[self.index].repr + } + + pub fn dealiased_repr<'a>(&self, table: &'a Table) -> &'a Repr { + self.dealias(table).repr(table) + } + + pub fn size(&self, table: &Table) -> usize { + table.types[self.index].size + } + + pub fn alignment(&self, table: &Table) -> usize { + table.types[self.index].alignment + } +} + +// TODO: put size/alignment in a struct of their own, like Dimensions + +#[derive(Debug, Eq, PartialEq)] +struct Type { + pub repr: Repr, + pub size: usize, + pub alignment: usize, +} + +#[derive(Debug, Eq, PartialEq)] +pub enum Repr { + Any, + Void, + Int, + Str, + Bool, + Array { + member_type: Id, + length: usize, + }, + DynamicArray(Id), + Struct { + fields: HashMap, + methods: HashMap, + }, + Named { + name: String, + inner: Id, + }, + Function { + parameters: Vec, + return_type: Id, + }, +} + +#[derive(Debug, Eq, PartialEq)] +pub struct StructField { + pub ty: Id, + pub offset: usize, +} + +#[derive(Debug)] +pub struct Table { + types: Vec, + // Builtin types + pub any: Id, + pub void: Id, + pub int: Id, + pub string: Id, + pub boolean: Id, +} + +impl Table { + pub fn new() -> Self { + let mut table = Self { + types: vec![], + any: Id { index: 0 }, + void: Id { index: 0 }, + int: Id { index: 0 }, + string: Id { index: 0 }, + boolean: Id { index: 0 }, + }; + let any = table.insert(Type { + repr: Repr::Any, + // Undefined size + size: usize::MAX, + alignment: usize::MAX, + }); + let void = table.insert(Type { + repr: Repr::Void, + size: 0, + alignment: 0, + }); + let int = table.insert(Type { + repr: Repr::Int, + size: 4, + alignment: 4, + }); + let string = table.insert(Type { + repr: Repr::Str, + // TODO: 32-bit targets + size: 8, + alignment: 8, + }); + let boolean = table.insert(Type { + repr: Repr::Bool, + size: 1, + alignment: 1, + }); + Self { + any, + void, + int, + string, + boolean, + ..table + } + } + + pub fn by_name(&self, name: &str) -> Option { + // FIXME: this is slow + self.types + .iter() + .enumerate() + .find(|(_, existing)| + matches!(&existing.repr, Repr::Named { name: existing_name, .. } if existing_name == name)) + .map(|(index, _)| Id { index }) + } + + fn insert(&mut self, ty: Type) -> Id { + // FIXME: this is slow + self.types + .iter() + .enumerate() + .find(|(_, existing)| existing == &&ty) + .map(|(index, _)| Id { index }) + .unwrap_or_else(|| { + let index = self.types.len(); + self.types.push(ty); + Id { index } + }) + } + + pub fn insert_named(&mut self, name: String, inner: Id) -> Result { + if self.by_name(&name).is_some() { + return Err(format!("Redefinition of type '{name}'")); + } + let (size, alignment) = (inner.size(self), inner.alignment(self)); + Ok(self.insert(Type { + repr: Repr::Named { name, inner }, + size, + alignment, + })) + } + + pub fn insert_array(&mut self, member_type: Id, length: usize) -> Result { + if length == 0 { + return Err("Array length must not be 0".into()); + } + + let (member_size, alignment) = (member_type.size(self), member_type.size(self)); + Ok(self.insert(Type { + repr: Repr::Array { + member_type, + length, + }, + size: length * member_size, + alignment, + })) + } + + pub fn insert_dynamic_array(&mut self, member_type: Id) -> Id { + self.insert(Type { + repr: Repr::DynamicArray(member_type), + size: 8 + 8, // struct { data: *T, len: u64 } + alignment: 8, + }) + } + + pub fn insert_ast_callable(&mut self, callable: &ast::Callable) -> CheckResult { + let parameters = callable + .arguments + .iter() + .map(|ast::TypedVariable { ty, .. }| self.insert_ast_type(ty)) + .collect::, _>>()?; + let return_type = callable + .ret_type + .as_ref() + .map(|ty| self.insert_ast_type(ty)) + .transpose()? + .unwrap_or(self.void); + + Ok(self.insert(Type { + repr: Repr::Function { + parameters, + return_type, + }, + // Undefined size + size: usize::MAX, + alignment: usize::MAX, + })) + } + + pub fn insert_ast_struct(&mut self, def: &ast::StructDef) -> CheckResult { + let mut current_offset = 0usize; + let fields = def + .fields + .iter() + .map(|ast::TypedVariable { name, ty, .. }| { + let ty = self.insert_ast_type(ty)?; + let (size, alignment) = (ty.size(self), ty.alignment(self)); + assert!(size > 0); // TODO? + + let offset = current_offset; + current_offset += (alignment - current_offset % alignment) % alignment; + current_offset += size; + Ok((name.clone(), StructField { ty, offset })) + }) + .collect::>>()?; + + let alignment = fields + .values() + .map(|field| field.ty.alignment(self)) + .max() + .unwrap(); // TODO: zero-size structs? + let size = current_offset; + + let methods = def + .methods + .iter() + .map(|function| { + let ty = self.insert_ast_callable(&function.callable)?; + Ok((function.callable.name.clone(), ty)) + }) + .collect::>>()?; + + let struct_type = self.insert(Type { + repr: Repr::Struct { fields, methods }, + size, + alignment, + }); + self.insert_named(def.name.clone(), struct_type) + .map_err(|msg| Error::new(def.pos, msg)) + } + + pub fn insert_ast_type(&mut self, ty: &AstType) -> CheckResult { + match &ty.kind { + AstTypeKind::Any => Ok(self.any), + AstTypeKind::Int => Ok(self.int), + AstTypeKind::Str => Ok(self.string), + AstTypeKind::Bool => Ok(self.boolean), + AstTypeKind::Array(member_type, Some(size)) => { + let member_type = self.insert_ast_type(member_type)?; + self.insert_array(member_type, *size) + .map_err(|msg| Error::new(ty.pos, msg)) + } + AstTypeKind::Array(member_type, None) => { + let member_type = self.insert_ast_type(member_type)?; + Ok(self.insert_dynamic_array(member_type)) + } + AstTypeKind::Struct(name) => self + .by_name(name) + .ok_or_else(|| Error::new(ty.pos, format!("Could not resolve type '{name}'"))), + } + } + + pub fn assignable(&self, left: Id, right: Id) -> bool { + if left == right { + return true; + } + + match (left.repr(self), right.repr(self)) { + (Repr::Any, _) => true, + (Repr::DynamicArray(dyn_member), Repr::Array { member_type, .. }) + if dyn_member == member_type => + { + true + } + _ => false, + } + } +} diff --git a/src/command/mod.rs b/src/command/mod.rs index 9288df9a..5d9d7cc6 100644 --- a/src/command/mod.rs +++ b/src/command/mod.rs @@ -15,3 +15,4 @@ */ pub mod build; pub mod run; +pub mod typecheck; diff --git a/src/command/typecheck.rs b/src/command/typecheck.rs new file mode 100644 index 00000000..9b175511 --- /dev/null +++ b/src/command/typecheck.rs @@ -0,0 +1,23 @@ +use crate::check::Module as CheckedModule; +use crate::lexer; +use crate::parser; +use std::fs::File; +use std::io::Read; +use std::path::PathBuf; + +pub fn check(in_file: PathBuf) -> Result<(), String> { + let mut file = File::open(&in_file).unwrap(); + let mut contents = String::new(); + file.read_to_string(&mut contents) + .map_err(|err| err.to_string())?; + + let mut table = lexer::FileTable::new(); + let file = table.insert(in_file, contents); + + let tokens = lexer::tokenize(file, &table).map_err(|err| err.format(&table))?; + let module = parser::parse(tokens).map_err(|err| err.format(&table))?; + println!("Parsed: {:#?}", module); + let checked_module = CheckedModule::from_ast(module).map_err(|err| err.format(&table))?; + println!("Checked: {:#?}", checked_module); + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index e0a52438..322ba900 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,6 +26,7 @@ use structopt::StructOpt; mod ast; mod builder; +mod check; mod command; mod generator; mod lexer; @@ -54,6 +55,8 @@ enum Command { }, #[structopt()] Run { in_file: PathBuf }, + #[structopt()] + Typecheck { in_file: PathBuf }, } #[derive(StructOpt, Debug)] @@ -91,6 +94,7 @@ fn run() -> Result<(), String> { command::build::build(&target, &in_file, &out_file)? } Command::Run { in_file } => command::run::run(opts.target.unwrap_or(Target::JS), in_file)?, + Command::Typecheck { in_file } => command::typecheck::check(in_file)?, }; Ok(())