diff --git a/.env.example b/.env.example index f1da507..55db752 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,61 @@ -PRINT_COMMAND="Your Print Command!" -VARIABLE_DECLARE="Your Variable declaration Command!" +PRINT_COMMAND=print +VARIABLE_DECLARE=var +# Customizable keyword mappings +KEYWORD_PRINT=show +KEYWORD_IF=when +KEYWORD_ELSE=otherwise +KEYWORD_WHILE=repeat +KEYWORD_FOR=loop +KEYWORD_FUNCTION=def +KEYWORD_RETURN=give +KEYWORD_BREAK=stop +KEYWORD_CONTINUE=skip +KEYWORD_AND=also +KEYWORD_OR=either +KEYWORD_NOT=negate +KEYWORD_TRUE=yes +KEYWORD_FALSE=no +KEYWORD_NONE=nothing +KEYWORD_VAR=let + +# Custom Error Messages +# You can override any error message by uncommenting and modifying these lines + +# Syntax Errors +#ERROR_SYNTAX_GENERIC="🦆 Oops! Your code has a syntax error at line {line}" +#ERROR_MISSING_BRACKET="🦆 Missing a {bracket_type} at line {line}. Every duck needs its pair!" +#ERROR_INVALID_TOKEN="🦆 Found an unexpected {token} at line {line}. That's not duck-friendly!" +#ERROR_UNEXPECTED_TOKEN="🦆 Found a {token} where it shouldn't be at line {line}. Ducks like order!" +#ERROR_INVALID_INDENTATION="🦆 Your code's indentation at line {line} is making ducks dizzy!" + +# Runtime Errors +#ERROR_DIVISION_BY_ZERO="🦆 Even ducks know you can't divide by zero!" +#ERROR_UNDEFINED_VAR="🦆 Can't find the variable '{var_name}'. Did it fly away?" +#ERROR_TYPE_MISMATCH="🦆 Expected a {expected_type} but got a {actual_type}. Ducks of a feather..." +#ERROR_INDEX_OUT_OF_RANGE="🦆 Index {index} is out of the pond! Maximum is {max_index}" +#ERROR_STACK_OVERFLOW="🦆 Too many nested ducks! They're starting to pile up!" + +# Function Errors +#ERROR_UNDEFINED_FUNCTION="🦆 The function '{func_name}' is MIA (Missing In Action)!" +#ERROR_INVALID_ARGUMENTS="🦆 Expected {expected} ducks, but got {actual}. Count them again!" +#ERROR_RECURSION_LIMIT="🦆 Your ducks are going in circles! Time to break the loop" + +# Type Errors +#ERROR_TYPE_CONVERSION="🦆 Can't transform a {from_type} into a {to_type}. Magic has limits!" +#ERROR_INVALID_OPERATION="🦆 Can't {operation} with {type1} and {type2}. They don't mix!" +#ERROR_NULL_REFERENCE="🦆 Found an empty nest where a value should be!" + +# IO Errors +#ERROR_FILE_NOT_FOUND="🦆 The file '{file}' is playing hide and seek... and winning!" +#ERROR_PERMISSION_DENIED="🦆 No access to '{file}'. This pond is private!" +#ERROR_IO_ERROR="🦆 Duck communication error: {details}" + +# Configuration +#ERROR_INVALID_KEYWORD="🦆 The keyword '{keyword}' isn't in the duck dictionary!" +#ERROR_DUPLICATE_KEYWORD="🦆 '{keyword}' is already used for {existing_use}. Ducks need unique names!" +#ERROR_INVALID_CONFIG="🦆 Your duck configuration is invalid: {details}" + +# Import/Module +#ERROR_MODULE_NOT_FOUND="🦆 Can't find the module '{module}'. Is it hiding?" +#ERROR_CIRCULAR_IMPORT="🦆 Your imports are going in circles! Ducks are getting dizzy!" diff --git a/custom_program.duck b/custom_program.duck new file mode 100644 index 0000000..59db634 --- /dev/null +++ b/custom_program.duck @@ -0,0 +1,47 @@ +# Function definition using 'def' instead of 'function' +def factorial(n) { + # Using 'when' instead of 'if' + when n < 0 { + give 0 + } + + when n <= 1 { + give 1 + } + + let result = 1 + let counter = n + + # Using 'repeat' instead of 'while' + repeat counter > 1 { + result = result * counter + counter = counter - 1 + } + + give result +} + +# Using 'let' instead of 'var' +let numbers = [1, 2, 3, 4, 5] + +# Using 'show' instead of 'print' +show "Testing custom keywords!" + +# Using 'when/otherwise' instead of 'if/else' +when 5 > 3 { + show "5 is greater than 3" +} otherwise { + show "This won't be shown" +} + +# Test logical operators +let x = yes # Using 'yes' instead of 'true' +let y = no # Using 'no' instead of 'false' + +when x also negate y { # Using 'also' instead of 'and', 'negate' instead of 'not' + show "Logic works!" +} + +show "Factorial of 5 is:" +let result = factorial(5) +show result \ No newline at end of file diff --git a/docs/Rules.md b/docs/Rules.md index 2413669..786ae73 100644 --- a/docs/Rules.md +++ b/docs/Rules.md @@ -1,4 +1,425 @@ -## KeyWords +# DuckLang Programming Language Documentation -- For Print -> ```quack``` (eg. quack Hello World ) +## Table of Contents +- [Introduction](#introduction) +- [Getting Started](#getting-started) +- [Language Customization](#language-customization) +- [Basic Syntax](#basic-syntax) +- [Data Types](#data-types) +- [Control Flow](#control-flow) +- [Functions](#functions) +- [Examples](#examples) +- [Configuration Guide](#configuration-guide) +- [Error Handling](#error-handling) + +## Introduction + +DuckLang is a flexible, customizable programming language that allows users to define their own keywords and syntax elements through configuration. This makes it ideal for educational purposes, localization, and creating domain-specific language variants. + +## Getting Started + +### Running DuckLang Programs +To run a DuckLang program: + +1. Save your code with the `.duck` extension (e.g., `program.duck`) +2. Open a terminal in the project directory +3. Run the following command: +```bash +python main.py your_program.duck +``` + +For example: +```bash +# Running a simple program +python main.py hello.duck + +# Running with custom keywords (make sure .env is configured) +python main.py custom_program.duck +``` + +## Language Customization + +### Customizable Keywords +All keywords in Duck can be customized through a `.env` file. Here are the default keywords and their customizable environment variables: + +| Default Keyword | Environment Variable | Purpose | +|----------------|---------------------|---------| +| `print` | `KEYWORD_PRINT` | Output command | +| `if` | `KEYWORD_IF` | Conditional statement | +| `else` | `KEYWORD_ELSE` | Alternative condition | +| `while` | `KEYWORD_WHILE` | Loop construct | +| `for` | `KEYWORD_FOR` | Iteration construct | +| `function` | `KEYWORD_FUNCTION` | Function definition | +| `return` | `KEYWORD_RETURN` | Function return | +| `break` | `KEYWORD_BREAK` | Loop termination | +| `continue` | `KEYWORD_CONTINUE` | Skip loop iteration | +| `and` | `KEYWORD_AND` | Logical AND | +| `or` | `KEYWORD_OR` | Logical OR | +| `not` | `KEYWORD_NOT` | Logical NOT | +| `true` | `KEYWORD_TRUE` | Boolean true | +| `false` | `KEYWORD_FALSE` | Boolean false | +| `none` | `KEYWORD_NONE` | Null value | +| `var` | `KEYWORD_VAR` | Variable declaration | + +### Example Configuration +```env +KEYWORD_PRINT=show +KEYWORD_IF=when +KEYWORD_ELSE=otherwise +KEYWORD_WHILE=repeat +KEYWORD_FUNCTION=def +KEYWORD_RETURN=give +``` + +## Basic Syntax + +### Statement Separation +Statements in Duck are separated by newlines. Semicolons are not required or used as statement separators. + +```python +# Correct way +var x = 5 +print(x) + +# Not needed (but will work) +var x = 5; +print(x); +``` + +### Variables +```python +# Variable declaration +var x = 5 # Default syntax +let x = 5 # With KEYWORD_VAR=let + +# Variable update (no 'var' keyword needed) +x = 10 # Updating existing variable +``` + +### Output +```python +print("Hello") # Default syntax +show("Hello") # With KEYWORD_PRINT=show +``` + +### Comments +```python +# Single line comment +``` + +## Data Types + +Duck supports the following data types: + +### Primitive Types +- Numbers (integers and floating-point) +- Strings (text enclosed in quotes) +- Booleans (`true`/`false` or customized values) +- None (`none` or customized value) + +### Complex Types +- Arrays: `[1, 2, 3]` +- Functions + +### Type Examples +```python +# Numbers +var num = 42 +var float_num = 3.14 + +# Strings +var text = "Hello, World!" + +# Booleans +var flag = true # Default +var flag = yes # With KEYWORD_TRUE=yes + +# Arrays +var numbers = [1, 2, 3, 4, 5] +``` + +## Control Flow + +### Conditional Statements +```python +# Default syntax +if x > 0 { + print("Positive") +} else { + print("Non-positive") +} + +# Customized syntax (with configured keywords) +when x > 0 { + show("Positive") +} otherwise { + show("Non-positive") +} +``` + +### Loops +```python +# While loop (default) +while x > 0 { + print(x) + x = x - 1 +} + +# While loop (customized) +repeat x > 0 { + show(x) + x = x - 1 +} +``` + +## Functions + +### Function Definition +```python +# Default syntax +function factorial(n) { + if n <= 1 { + return 1 + } + return n * factorial(n - 1) +} + +# Customized syntax +def factorial(n) { + when n <= 1 { + give 1 + } + give n * factorial(n - 1) +} +``` + +### Function Calls +```python +var result = factorial(5) +``` + +## Examples + +### Complete Program Example (Default Syntax) +```python +function factorial(n) { + if n < 0 { + return 0 + } + if n <= 1 { + return 1 + } + return n * factorial(n - 1) +} + +var numbers = [1, 2, 3, 4, 5] +print("Computing factorials:") + +for num in numbers { + print(factorial(num)) +} +``` + +### Same Program with Custom Keywords +```python +def factorial(n) { + when n < 0 { + give 0 + } + when n <= 1 { + give 1 + } + give n * factorial(n - 1) +} + +let numbers = [1, 2, 3, 4, 5] +show("Computing factorials:") + +loop num in numbers { + show(factorial(num)) +} +``` + +## Configuration Guide + +### Setting Up Custom Keywords + +1. Create a `.env` file in your project root +2. Define your custom keywords: +```env +KEYWORD_PRINT=show +KEYWORD_IF=when +KEYWORD_ELSE=otherwise +KEYWORD_WHILE=repeat +KEYWORD_FOR=loop +KEYWORD_FUNCTION=def +KEYWORD_RETURN=give +KEYWORD_BREAK=stop +KEYWORD_CONTINUE=skip +KEYWORD_AND=also +KEYWORD_OR=either +KEYWORD_NOT=negate +KEYWORD_TRUE=yes +KEYWORD_FALSE=no +KEYWORD_NONE=nothing +KEYWORD_VAR=let +``` + +### Configuration Rules +- Keywords must be unique +- Keywords cannot contain spaces +- Keywords are case-insensitive +- Keywords cannot be special characters used by the language (like operators) + +## Error Handling + +DuckLang provides a robust and customizable error handling system. Errors can be customized through the `.env` file, allowing you to set your own error messages while maintaining helpful debugging information. + +### Error Types + +1. **Syntax Errors** + - Invalid token sequences + - Missing brackets or parentheses + - Incorrect indentation + - Unknown keywords + +2. **Runtime Errors** + - Division by zero + - Undefined variables + - Type mismatches + - Stack overflow + - Invalid function calls + +3. **Configuration Errors** + - Invalid keyword definitions + - Duplicate keywords + - Missing required configurations + +### Customizing Error Messages + +Error messages can be customized in your `.env` file: + +```env +# Syntax Errors +ERROR_SYNTAX_GENERIC="Oops! Something's not quite right at line {line}" +ERROR_MISSING_BRACKET="Hey! You forgot a {bracket_type} at line {line}" +ERROR_INVALID_TOKEN="Unexpected {token} at line {line}" + +# Runtime Errors +ERROR_DIVISION_BY_ZERO="Division by zero? That's infinitely bad!" +ERROR_UNDEFINED_VAR="Can't find '{var_name}'. Did it fly away?" +ERROR_TYPE_MISMATCH="Expected {expected_type} but got {actual_type}" + +# Configuration Errors +ERROR_INVALID_KEYWORD="'{keyword}' isn't a valid keyword name" +ERROR_DUPLICATE_KEYWORD="'{keyword}' is already used for {existing_use}" +``` + +### Error Message Format + +Each error message can include placeholders for dynamic information: +- `{line}` - Line number +- `{column}` - Column number +- `{token}` - The problematic token +- `{expected}` - Expected value/type +- `{actual}` - Actual value/type +- `{file}` - File name + +### Default Fun Error Messages + +If no custom error messages are defined, DuckLang uses these quirky fallbacks: + +```python +# Syntax Errors +"Quack! Your code seems a bit scrambled at line {line}" +"Waddle waddle... can't parse that!" +"This syntax makes me want to duck for cover!" + +# Runtime Errors +"Your code took a wrong turn at the duck pond" +"That operation is like dividing by zero ducks... impossible!" +"This variable seems to have flown south for the winter" + +# Type Errors +"Expected a duck, but got a goose!" +"These types don't flock together" +``` + +### Error Output Format + +Errors are displayed in a clear, structured format: + +``` +🦆 DuckLang Error: Type Mismatch +📍 Line 42, Column 10 in 'example.duck' +❌ Expected number but got string + +Code: + 41 | var x = 5 +-> 42 | x = "hello" + 43 | print(x) + +Custom Message: These types don't flock together! +Suggestion: Try converting "hello" to a number first +``` + +### Error Handling Best Practices + +1. **Clear Messages** + - Use descriptive but concise messages + - Include relevant variable/function names + - Provide suggestions when possible + +2. **Consistent Format** + - Always include line numbers + - Show the problematic code snippet + - Provide a suggestion if applicable + +3. **Custom Messages** + - Keep messages user-friendly + - Use appropriate technical terms + - Maintain helpful context + +4. **Configuration** + - Back up default error messages + - Test custom messages thoroughly + - Keep messages language-appropriate + +### Example Error Configurations + +Here's a complete example of custom error messages: + +```env +# Professional Style +ERROR_SYNTAX="Syntax Error: Invalid syntax at line {line}" +ERROR_TYPE="Type Error: Cannot perform {operation} with {type1} and {type2}" +ERROR_NAME="Name Error: '{name}' is not defined in current scope" + +# Fun Style +ERROR_SYNTAX="Quack Attack! Code went wonky at line {line}" +ERROR_TYPE="These types are like oil and water, they don't mix!" +ERROR_NAME="Looks like '{name}' took a swim and never came back" + +# Educational Style +ERROR_SYNTAX="Let's check the syntax at line {line}. Remember: {rule}" +ERROR_TYPE="Hint: {type1} operations can't work with {type2} values" +ERROR_NAME="'{name}' hasn't been created yet. Did you forget to declare it?" +``` + +## Development Status + +The Duck programming language is under active development. Current features: +- ✓ Customizable keywords +- ✓ Basic arithmetic operations +- ✓ Control flow statements +- ✓ Functions +- ✓ Arrays +- ✓ Variable declarations + +Future enhancements may include: +- Custom operator definitions +- Additional data types +- Module system +- Standard library +- More customization options diff --git a/docs/error_handling.md b/docs/error_handling.md new file mode 100644 index 0000000..1dec728 --- /dev/null +++ b/docs/error_handling.md @@ -0,0 +1,164 @@ +# DuckLang Error Handling System + +The DuckLang error handling system provides detailed, user-friendly error messages with context and suggestions for fixing issues. The system is designed to be both informative and entertaining, using duck-themed messages to make debugging more enjoyable. + +## Features + +- 🎯 **Precise Error Location**: Line and column numbers for exact error positioning +- 📝 **Code Context**: Shows surrounding code with the error line highlighted +- 💡 **Helpful Suggestions**: Provides specific suggestions for fixing each error +- 🎨 **Color-Coded Output**: Uses colors to highlight different parts of error messages +- 🦆 **Duck-Themed Messages**: Fun, memorable error messages with a duck theme +- ⚙️ **Customizable Messages**: Ability to override default messages via `.env` file + +## Error Categories + +DuckLang provides several categories of errors to help identify and fix issues: + +### Syntax Errors +- `SYNTAX_GENERIC`: General syntax errors +- `MISSING_BRACKET`: Missing closing brackets/parentheses +- `INVALID_TOKEN`: Invalid characters or tokens +- `UNEXPECTED_TOKEN`: Valid tokens in wrong places +- `INVALID_INDENTATION`: Incorrect code indentation + +### Runtime Errors +- `DIVISION_BY_ZERO`: Attempt to divide by zero +- `UNDEFINED_VAR`: Using undefined variables +- `TYPE_MISMATCH`: Type compatibility issues +- `INDEX_OUT_OF_RANGE`: Array index out of bounds +- `STACK_OVERFLOW`: Too many nested function calls +- `MEMORY_ERROR`: Out of memory errors + +### Function Errors +- `UNDEFINED_FUNCTION`: Calling undefined functions +- `INVALID_ARGUMENTS`: Wrong number of function arguments +- `RECURSION_LIMIT`: Exceeded maximum recursion depth + +### Type Errors +- `TYPE_CONVERSION`: Invalid type conversion attempts +- `INVALID_OPERATION`: Invalid operations between types +- `NULL_REFERENCE`: Using uninitialized variables + +### IO Errors +- `FILE_NOT_FOUND`: File not found during import/read +- `PERMISSION_DENIED`: No permission to access file +- `IO_ERROR`: General IO operation errors + +### Configuration Errors +- `INVALID_KEYWORD`: Invalid keyword configuration +- `DUPLICATE_KEYWORD`: Duplicate keyword definitions +- `INVALID_CONFIG`: General configuration errors + +### Import/Module Errors +- `MODULE_NOT_FOUND`: Missing module during import +- `CIRCULAR_IMPORT`: Circular dependencies detected + +## Customizing Error Messages + +You can customize error messages by adding entries to your `.env` file. Each error type can have its own custom message: + +```env +# Example custom error messages +ERROR_SYNTAX_GENERIC="🦆 Oops! Your code has a syntax error at line {line}" +ERROR_UNDEFINED_VAR="🦆 Can't find the variable '{var_name}'. Did it fly away?" +ERROR_TYPE_MISMATCH="🦆 Expected a {expected_type} but got a {actual_type}. Ducks of a feather..." +``` + +### Available Placeholders + +Different error types support different placeholders in their messages: + +- `{line}`: Line number (all errors) +- `{column}`: Column number (all errors) +- `{var_name}`: Variable name (UNDEFINED_VAR) +- `{expected_type}`, `{actual_type}`: Type information (TYPE_MISMATCH) +- `{bracket_type}`: Type of bracket (MISSING_BRACKET) +- `{token}`: Invalid token (INVALID_TOKEN) +- `{func_name}`: Function name (UNDEFINED_FUNCTION) +- `{expected}`, `{actual}`: Argument counts (INVALID_ARGUMENTS) +- `{file}`: File path (FILE_NOT_FOUND, PERMISSION_DENIED) +- `{module}`: Module name (MODULE_NOT_FOUND) +- `{details}`: Additional error details (various errors) + +## Example Error Messages + +Here are some example error messages you might encounter: + +### Syntax Error +``` +🦆 DuckLang Error: Syntax Error +📍 Line 5, Column 10 in 'example.duck' +❌ [MISSING_BRACKET] Waddle waddle... missing a } at line 5 + +Code: + 3 | def greet(name) { + 4 | show("Hello, " + name) +> 5 | return + 6 | + +💡 Suggestion: Add a closing } to match the opening one. +``` + +### Runtime Error +``` +🦆 DuckLang Error: Runtime Error +📍 Line 12, Column 15 in 'example.duck' +❌ [TYPE_MISMATCH] Expected a duck, but got a goose! Expected number but got string + +Code: + 11 | let name = "John" +> 12 | let result = 42 + name + 13 | show(result) + ^ + +💡 Suggestion: Convert your string to number before this operation. +``` + +## Best Practices + +1. **Read the Entire Message**: Error messages include the exact location, context, and a suggestion for fixing the issue. + +2. **Check the Context**: Look at the code shown around the error line to understand the context. + +3. **Follow Suggestions**: Each error comes with a specific suggestion for fixing the issue. + +4. **Customize Messages**: Use the `.env` file to customize error messages for your team or preferences. + +5. **Use Error Types**: When catching errors in your code, use the specific error types to handle different cases appropriately. + +## Error Handling in Code + +Here's how to use the error handling system in your code: + +```python +from src.error import ErrorHandler + +# Initialize the error handler +error_handler = ErrorHandler() + +# Set the current file being processed +error_handler.set_current_file(file_content) + +try: + # Your code here + if some_error_condition: + error_handler.raise_syntax_error( + 'MISSING_BRACKET', + line=5, + column=10, + file_name='example.duck', + bracket_type='}' + ) +except DuckLangError as e: + print(e) # Will print the formatted error message +``` + +## Contributing + +To add new error types or enhance the error handling system: + +1. Add the error type and message to `ErrorConfig.default_messages` +2. Add a corresponding suggestion to `ErrorConfig.suggestions` +3. Update the documentation with the new error type and its placeholders +4. Add test cases to verify the new error handling \ No newline at end of file diff --git a/ducklangweb b/ducklangweb deleted file mode 100644 index ecb20fb..0000000 --- a/ducklangweb +++ /dev/null @@ -1,237 +0,0 @@ -/*index.html*/ -Basic web page of ducklang - index.html - - - - - - DuckLang - The Fun Programming Language - - - - - -
-

DuckLang ✨

- - - -

Welcome to DuckLang, the quirky and fun programming language that makes coding a laugh-out-loud experience!

-
- - -
-

What is DuckLang?

-

DuckLang is a playful and engaging programming language designed to engage young developers with its humorous syntax. A unique blend of Bengali, Hindi, and English makes coding fun and easy to understand!

-

Get ready to code like never before, where coding meets comedy!

-
- - -
-

How to Install DuckLang

-

Follow these steps to get DuckLang up and running on your machine:

- -

For more details, check the guidebook!

-
- - -
-

Getting Started with DuckLang

-

Here’s your chance to write your first DuckLang program! Start by writing some funny code and experience the humor of DuckLang syntax.

-

Check out our documentation to begin coding with DuckLang and discover how its language and syntax make programming enjoyable!

- Explore the Docs -
- - -
-

Contribute to DuckLang

-

We welcome contributions to DuckLang! Please refer to the README.md file for guidelines on how to contribute.

-

Remember to always push your changes to the `dev` branch!

-
- - -
-

Contact

-

If you have any questions or feedback, feel free to open an issue or contact the project maintainer.

-

Show some love by starring the repository!

-

Maintainer: Puskar Roy 🖋️

-
- - - - - - -//style.css of web page of ducklang - /* General Reset */ -* { - margin: 0; - padding: 0; - box-sizing: border-box; -} - -/* Body and Font Settings */ -body { - font-family: Arial, sans-serif; - background-color: #f8f9fa; - color: #333; - line-height: 1.6; -} - -/* Header Section */ -header { - text-align: center; - padding: 40px; - background-color: #ffcc00; - color: white; -} - -header h1 { - font-size: 2.5em; - margin-bottom: 10px; -} - -/* Overview Section */ -#overview { - padding: 30px; - background-color: #fff; - margin: 20px auto; - max-width: 900px; - border-radius: 8px; - box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); -} - -#overview h2 { - font-size: 2em; - color: #ffcc00; -} - -#overview p { - font-size: 1.2em; - margin-bottom: 20px; -} - -/* Installation Section */ -#installation { - padding: 30px; - background-color: #f9f9f9; - margin: 20px auto; - max-width: 900px; - border-radius: 8px; - box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); -} - -#installation h2 { - font-size: 2em; - color: #ffcc00; -} - -#installation ul { - list-style-type: none; - margin-top: 10px; -} - -#installation li { - font-size: 1.1em; - margin: 10px 0; -} - -pre code { - font-size: 1.1em; - background-color: #f1f1f1; - padding: 10px; - border-radius: 6px; - display: block; - white-space: pre-wrap; -} - -/* Getting Started Section */ -#getting-started { - padding: 30px; - background-color: #fff; - margin: 20px auto; - max-width: 900px; - border-radius: 8px; - box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); -} - -#getting-started h2 { - font-size: 2em; - color: #ffcc00; -} - -#getting-started p { - font-size: 1.2em; - margin-bottom: 20px; -} - -#getting-started a { - display: inline-block; - padding: 10px 20px; - background-color: #ffcc00; - color: white; - text-decoration: none; - border-radius: 6px; - font-size: 1.2em; - margin-top: 20px; -} - -/* Contribution Section */ -#contribution { - padding: 30px; - background-color: #f9f9f9; - margin: 20px auto; - max-width: 900px; - border-radius: 8px; - box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); -} - -#contribution h2 { - font-size: 2em; - color: #ffcc00; -} - -#contribution p { - font-size: 1.2em; -} - -/* Contact Section */ -#contact { - padding: 30px; - background-color: #fff; - margin: 20px auto; - max-width: 900px; - border-radius: 8px; - box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); -} - -#contact h2 { - font-size: 2em; - color: #ffcc00; -} - -#contact p { - font-size: 1.2em; -} - -/* Footer */ -footer { - text-align: center; - padding: 20px; - background-color: #ffcc00; - color: white; - margin-top: 40px; -} -/*for opening the web page of ducklang*/ -file:///C:/Users/D/DuckLang/DuckLang/index.html diff --git a/main.py b/main.py index 81c03e7..7c64939 100644 --- a/main.py +++ b/main.py @@ -1,40 +1,110 @@ +import sys +from src.error import ErrorHandler, DuckLangError from src.lexer import Lexer +from src.parser.main_parser import MainParser +from src.interpreter import Interpreter +from src.utils.debug import debug, DebugLevel -# Sample source code input -source_code = """ - def match(self, text): - if self.source[self.position.index:self.position.index + len(text)] -> == text: - for . in range(len(text)): - self.advance() - return True - return False +def execute_program(source_code, file_path, error_handler): + """Execute a DuckLang program with proper error handling""" + try: + # Create instances + lexer = Lexer() + tokens = lexer.tokenize(source_code) + + parser = MainParser(tokens) + ast = parser.parse() + + if not ast: + error_handler.raise_syntax_error( + 'SYNTAX_GENERIC', + line=1, + column=0, + file_name=file_path, + message="Could not generate AST" + ) + return + + # Execute the program + interpreter = Interpreter(debug_mode=True) + interpreter.interpret(ast) + + except Exception as e: + # Get line and column info if available + line = getattr(e, 'line', 1) + column = getattr(e, 'column', 0) + + # Determine error type from exception + if 'division by zero' in str(e): + error_handler.raise_runtime_error( + 'DIVISION_BY_ZERO', + line=line, + column=column, + file_name=file_path + ) + elif 'undefined variable' in str(e): + error_handler.raise_runtime_error( + 'UNDEFINED_VAR', + line=line, + column=column, + file_name=file_path, + var_name=str(e).split("'")[1] if "'" in str(e) else "unknown" + ) + elif 'type mismatch' in str(e): + error_handler.raise_runtime_error( + 'TYPE_MISMATCH', + line=line, + column=column, + file_name=file_path, + expected_type=getattr(e, 'expected_type', 'unknown'), + actual_type=getattr(e, 'actual_type', 'unknown') + ) + else: + # Default to syntax error for unrecognized errors + error_handler.raise_syntax_error( + 'SYNTAX_GENERIC', + line=line, + column=column, + file_name=file_path + ) - """ +def main(): + # Check if a file was provided + if len(sys.argv) < 2: + print("🦆 Quack! Please provide a .duck file to run") + print("Usage: python main.py your_program.duck") + sys.exit(1) -# Initialize lexer -lexer = Lexer() -tokens = lexer.tokenize(source_code) + # Get the file path from command line arguments + file_path = sys.argv[1] + + try: + # Initialize error handler + error_handler = ErrorHandler() + + # Read the file + with open(file_path, 'r') as file: + file_content = file.read() + + # Set up error handling for this file + error_handler.set_current_file(file_content) + + # Execute the program + execute_program(file_content, file_path, error_handler) + + except FileNotFoundError: + print(f"🦆 Quack! Cannot find file '{file_path}'") + print("Make sure the file exists and the path is correct!") + sys.exit(1) + except DuckLangError as e: + # This is our custom error, just print its message + print(e) + sys.exit(1) + except Exception as e: + # For unexpected errors during startup + print("🦆 Quack! Something went wrong while starting up!") + print(f"Error: {str(e)}") + sys.exit(1) -# Print detailed token information -print("\n📝 Token Details:") -for token in tokens: - print(f"🔹 {token}") - -# Print a nicely formatted table -print("\n📌 Token Table:") - -# Define column widths -col_widths = [30, 12, 14, 12, 6, 8] -line_sep = "+" + "+".join(["-" * w for w in col_widths]) + "+" - -# Print table header -print(line_sep) -print(f"| {'Type':<20} | {'Value':<12} | {'Start Index':<14} | {'End Index':<12} | {'Line':<6} | {'Column':<8} |") -print(line_sep) - -# Print table rows -for token in tokens: - print(f"| {str(token.token_type):<20} | {str(token.value):<12} | {str(token.start_pos.index):<14} | {str(token.end_pos.index):<12} | {str(token.start_pos.line):<6} | {str(token.start_pos.column):<8} |") - -# Print table footer -print(line_sep) +if __name__ == "__main__": + main() diff --git a/program.duck b/program.duck new file mode 100644 index 0000000..2235d0d --- /dev/null +++ b/program.duck @@ -0,0 +1,43 @@ +// This is a sample program in our Duck language + +// Function definition +function factorial(n) { + if (n < 0) { + return 0 + } + if (n <= 1) { + return 1 + } + result = 1 + counter = n + while (counter > 1) { + result = result * counter + counter = counter - 1 + } + return result +} + +// Array operations +numbers = [1, 2, 3, 4, 5] +numbers[2] = 10 + +// Variables and arithmetic +x = 5 +y = 3 +sum = x + y + +// Control flow +if (x > y) { + print("x is greater than y") +} else { + print("x is not greater than y") +} + +// Function call +result = factorial(5) +print("Factorial of 5 is:") +print(result) + +// Array output +print("Modified array:") +print(numbers) \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..4e44716 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,3 @@ +""" +DuckLang source package. +""" \ No newline at end of file diff --git a/src/error/__init__.py b/src/error/__init__.py new file mode 100644 index 0000000..6d5134e --- /dev/null +++ b/src/error/__init__.py @@ -0,0 +1,5 @@ +from .error_handler import ErrorHandler, DuckLangError +from .error_config import ErrorConfig +from .error_formatter import ErrorFormatter + +__all__ = ['ErrorHandler', 'DuckLangError', 'ErrorConfig', 'ErrorFormatter'] \ No newline at end of file diff --git a/src/error/error_config.py b/src/error/error_config.py new file mode 100644 index 0000000..d6de2b7 --- /dev/null +++ b/src/error/error_config.py @@ -0,0 +1,101 @@ +from typing import Dict, Optional +from dotenv import load_dotenv +import os + +class ErrorConfig: + def __init__(self): + load_dotenv() + self.default_messages = { + # Syntax Errors + 'SYNTAX_GENERIC': "Quack! Your code seems a bit scrambled at line {line}", + 'MISSING_BRACKET': "Waddle waddle... missing a {bracket_type} at line {line}", + 'INVALID_TOKEN': "This syntax makes me want to duck for cover! Found {token} at line {line}", + 'UNEXPECTED_TOKEN': "Found a strange duck in the pond: {token} at line {line}", + 'INVALID_INDENTATION': "Your ducks aren't lined up properly at line {line}", + 'MISSING_SEMICOLON': "Missing a tail feather at line {line}", + + # Runtime Errors + 'DIVISION_BY_ZERO': "That operation is like dividing by zero ducks... impossible!", + 'UNDEFINED_VAR': "This variable seems to have flown south for the winter: {var_name}", + 'TYPE_MISMATCH': "Expected a duck, but got a goose! Expected {expected_type} but got {actual_type}", + 'INDEX_OUT_OF_RANGE': "Trying to catch a duck that's not in the pond! Index {index} is out of range", + 'STACK_OVERFLOW': "Too many nested ducks! Stack overflow at line {line}", + 'MEMORY_ERROR': "The duck pond is full! Out of memory at line {line}", + + # Function Errors + 'UNDEFINED_FUNCTION': "Can't find this duck call: {func_name}", + 'INVALID_ARGUMENTS': "Wrong number of ducks! Expected {expected} arguments but got {actual}", + 'RECURSION_LIMIT': "Too much duck recursion! Maximum depth exceeded", + + # Configuration Errors + 'INVALID_KEYWORD': "That keyword doesn't fit in our duck pond: {keyword}", + 'DUPLICATE_KEYWORD': "Two ducks can't share the same name: {keyword} is already used for {existing_use}", + 'INVALID_CONFIG': "Your duck configuration seems wrong: {details}", + + # Import/Module Errors + 'MODULE_NOT_FOUND': "Couldn't find this duck module: {module}", + 'CIRCULAR_IMPORT': "Your ducks are chasing their own tails! Circular import detected", + + # Type Errors + 'TYPE_CONVERSION': "Can't turn this {from_type} duck into a {to_type} duck!", + 'INVALID_OPERATION': "These ducks don't play well together: can't {operation} with {type1} and {type2}", + 'NULL_REFERENCE': "Found an empty nest where a duck should be!", + + # IO Errors + 'FILE_NOT_FOUND': "This duck nest doesn't exist: {file}", + 'PERMISSION_DENIED': "This duck pond is private! No permission to access {file}", + 'IO_ERROR': "Duck communication error: {details}" + } + + self.suggestions = { + 'SYNTAX_GENERIC': "Check your syntax and make sure all brackets and parentheses are properly closed.", + 'MISSING_BRACKET': "Add a closing {bracket_type} to match the opening one.", + 'INVALID_TOKEN': "Make sure you're using valid DuckLang syntax and keywords.", + 'UNEXPECTED_TOKEN': "Remove or replace the unexpected {token}.", + 'INVALID_INDENTATION': "Fix the indentation to match the code block structure.", + 'DIVISION_BY_ZERO': "Check your division operation and make sure the denominator is not zero.", + 'UNDEFINED_VAR': "Declare the variable '{var_name}' before using it.", + 'TYPE_MISMATCH': "Convert your {actual_type} to {expected_type} before this operation.", + 'INDEX_OUT_OF_RANGE': "Make sure your index is within the valid range [0 to {max_index}].", + 'STACK_OVERFLOW': "Reduce the number of nested function calls or recursion depth.", + 'UNDEFINED_FUNCTION': "Define the function '{func_name}' before calling it.", + 'INVALID_ARGUMENTS': "Provide exactly {expected} arguments to this function.", + 'TYPE_CONVERSION': "Use appropriate type conversion functions or check your data types.", + 'INVALID_OPERATION': "Make sure both operands are of compatible types for {operation}.", + 'NULL_REFERENCE': "Initialize your variable before using it.", + 'FILE_NOT_FOUND': "Check if the file path '{file}' is correct and the file exists.", + 'PERMISSION_DENIED': "Run the program with appropriate permissions or check file access rights.", + 'CIRCULAR_IMPORT': "Restructure your imports to avoid circular dependencies." + } + + self.messages = self._load_custom_messages() + + def _load_custom_messages(self) -> Dict[str, str]: + """Load custom error messages from .env file, falling back to defaults""" + messages = {} + for key in self.default_messages.keys(): + env_key = f"ERROR_{key}" + messages[key] = os.getenv(env_key, self.default_messages[key]) + return messages + + def get_message(self, error_type: str, **kwargs) -> str: + """Get formatted error message with placeholders filled in""" + message = self.messages.get(error_type, self.default_messages.get(error_type)) + if not message: + return self.default_messages['SYNTAX_GENERIC'].format(line=kwargs.get('line', '?')) + + try: + return message.format(**kwargs) + except KeyError: + return self.default_messages['SYNTAX_GENERIC'].format(line=kwargs.get('line', '?')) + + def get_suggestion(self, error_type: str, **kwargs) -> Optional[str]: + """Get a helpful suggestion for the error""" + suggestion = self.suggestions.get(error_type) + if not suggestion: + return None + + try: + return suggestion.format(**kwargs) + except KeyError: + return None \ No newline at end of file diff --git a/src/error/error_formatter.py b/src/error/error_formatter.py new file mode 100644 index 0000000..ec892b1 --- /dev/null +++ b/src/error/error_formatter.py @@ -0,0 +1,147 @@ +from typing import Optional, List, Tuple +import os + +class ErrorFormatter: + """Formats error messages with color and context""" + + def __init__(self, file_content: str): + self.file_content = file_content + self.lines = file_content.split('\n') + + # ANSI color codes + self.RED = '\033[91m' + self.YELLOW = '\033[93m' + self.BLUE = '\033[94m' + self.GREEN = '\033[92m' + self.GRAY = '\033[90m' + self.RESET = '\033[0m' + self.BOLD = '\033[1m' + self.UNDERLINE = '\033[4m' + + # Disable colors if not supported + if os.name == 'nt' and not os.environ.get('FORCE_COLOR'): + self.RED = self.YELLOW = self.BLUE = self.GREEN = self.GRAY = '' + self.RESET = self.BOLD = self.UNDERLINE = '' + + def format_error(self, + category: str, + message: str, + line: int, + column: int, + file_name: str, + suggestion: Optional[str] = None, + context_lines: int = 2) -> str: + """Format an error message with context and color + + Args: + category: The type of error (e.g., 'Syntax Error', 'Runtime Error') + message: The error message to display + line: The line number where the error occurred (1-based) + column: The column number where the error occurred (0-based) + file_name: The name of the file containing the error + suggestion: Optional suggestion for fixing the error + context_lines: Number of lines of context to show before and after the error + + Returns: + A formatted error message string with color and context + """ + error_lines = [] + + # Header with error type and location + error_lines.extend([ + f"{self.BOLD}🦆 DuckLang Error: {category}{self.RESET}", + f"{self.BLUE}📍 {self._format_location(line, column, file_name)}{self.RESET}", + f"{self.RED}❌ {message}{self.RESET}", + "" + ]) + + # Add code context if available + if self.lines and 0 <= line - 1 < len(self.lines): + error_lines.extend(self._format_code_context(line, column, context_lines)) + + # Add suggestion if available + if suggestion: + error_lines.extend([ + "", + f"{self.GREEN}💡 Suggestion:{self.RESET}", + f" {suggestion}" + ]) + + return "\n".join(error_lines) + + def _format_location(self, line: int, column: int, file_name: str) -> str: + """Format the error location information""" + location_parts = [] + + # Add line and column info + if line > 0: + location_parts.append(f"Line {line}") + if column >= 0: + location_parts.append(f"Column {column}") + + # Add file name + if file_name: + location_parts.append(f"in '{file_name}'") + + return ", ".join(location_parts) + + def _format_code_context(self, error_line: int, error_column: int, context_lines: int) -> List[str]: + """Format the code context around the error + + Args: + error_line: The line number where the error occurred (1-based) + error_column: The column where the error occurred (0-based) + context_lines: Number of lines to show before and after the error + + Returns: + List of formatted context lines + """ + output = ["Code:"] + + # Calculate the range of lines to show + start_line = max(1, error_line - context_lines) + end_line = min(len(self.lines), error_line + context_lines) + + # Calculate line number width for padding + line_num_width = len(str(end_line)) + + # Show the context lines + for line_num in range(start_line, end_line + 1): + # Get the line content + line_idx = line_num - 1 + if 0 <= line_idx < len(self.lines): + line_content = self.lines[line_idx] + + # Format the line prefix + if line_num == error_line: + prefix = f"{self.RED}>{self.RESET}" + line_num_color = self.RED + else: + prefix = " " + line_num_color = self.GRAY + + # Format the line + formatted_line = ( + f"{prefix} {line_num_color}{line_num:>{line_num_width}} |{self.RESET} " + f"{line_content}" + ) + output.append(formatted_line) + + # Add error pointer if this is the error line + if line_num == error_line and error_column >= 0: + pointer = " " * (line_num_width + 4 + error_column) + "^" + output.append(f"{self.RED}{pointer}{self.RESET}") + + return output + + def get_line_context(self, line: int, context_lines: int = 2) -> str: + """Get the surrounding lines of code for context + + Args: + line: The line number to get context for (1-based) + context_lines: Number of lines to show before and after + + Returns: + A string containing the formatted context lines + """ + return "\n".join(self._format_code_context(line, -1, context_lines)) \ No newline at end of file diff --git a/src/error/error_handler.py b/src/error/error_handler.py new file mode 100644 index 0000000..831bc3f --- /dev/null +++ b/src/error/error_handler.py @@ -0,0 +1,121 @@ +from typing import Optional +from .error_config import ErrorConfig +from .error_formatter import ErrorFormatter + +class DuckLangError(Exception): + """Base class for DuckLang errors""" + def __init__(self, + error_type: str, + message: str, + line: int, + column: int, + file_name: str, + suggestion: Optional[str] = None): + self.error_type = error_type + self.message = message + self.line = line + self.column = column + self.file_name = file_name + self.suggestion = suggestion + super().__init__(message) + +class ErrorHandler: + def __init__(self): + self.config = ErrorConfig() + self.formatter = None + + def set_current_file(self, file_content: str): + """Set the current file being processed for context in error messages""" + self.formatter = ErrorFormatter(file_content) + + def raise_syntax_error(self, + error_type: str, + line: int, + column: int, + file_name: str, + **kwargs): + """Raise a syntax error with formatted message""" + message = f"[{error_type}] {self.config.get_message(error_type, line=line, **kwargs)}" + suggestion = self.config.get_suggestion(error_type, **kwargs) + self._raise_error('Syntax Error', error_type, message, line, column, file_name, suggestion) + + def raise_runtime_error(self, + error_type: str, + line: int, + column: int, + file_name: str, + **kwargs): + """Raise a runtime error with formatted message""" + message = f"[{error_type}] {self.config.get_message(error_type, **kwargs)}" + suggestion = self.config.get_suggestion(error_type, **kwargs) + self._raise_error('Runtime Error', error_type, message, line, column, file_name, suggestion) + + def raise_config_error(self, + error_type: str, + line: int, + column: int, + file_name: str, + **kwargs): + """Raise a configuration error with formatted message""" + message = f"[{error_type}] {self.config.get_message(error_type, **kwargs)}" + suggestion = self.config.get_suggestion(error_type, **kwargs) + self._raise_error('Configuration Error', error_type, message, line, column, file_name, suggestion) + + def raise_function_error(self, + error_type: str, + line: int, + column: int, + file_name: str, + **kwargs): + """Raise a function-related error with formatted message""" + message = f"[{error_type}] {self.config.get_message(error_type, **kwargs)}" + suggestion = self.config.get_suggestion(error_type, **kwargs) + self._raise_error('Function Error', error_type, message, line, column, file_name, suggestion) + + def raise_type_error(self, + error_type: str, + line: int, + column: int, + file_name: str, + **kwargs): + """Raise a type-related error with formatted message""" + message = f"[{error_type}] {self.config.get_message(error_type, **kwargs)}" + suggestion = self.config.get_suggestion(error_type, **kwargs) + self._raise_error('Type Error', error_type, message, line, column, file_name, suggestion) + + def raise_io_error(self, + error_type: str, + line: int, + column: int, + file_name: str, + **kwargs): + """Raise an IO-related error with formatted message""" + message = f"[{error_type}] {self.config.get_message(error_type, **kwargs)}" + suggestion = self.config.get_suggestion(error_type, **kwargs) + self._raise_error('IO Error', error_type, message, line, column, file_name, suggestion) + + def _raise_error(self, + category: str, + error_type: str, + message: str, + line: int, + column: int, + file_name: str, + suggestion: Optional[str] = None): + """Format and raise the error""" + if self.formatter: + formatted_message = self.formatter.format_error( + category, + message, + line, + column, + file_name, + suggestion + ) + raise DuckLangError(error_type, formatted_message, line, column, file_name, suggestion) + else: + # Fallback if no formatter is available + error_msg = f"{category} at line {line}, column {column} in {file_name}: {message}" + if suggestion: + error_msg += f"\nSuggestion: {suggestion}" + raise DuckLangError(error_type, error_msg, line, column, file_name, suggestion) \ No newline at end of file diff --git a/src/interpreter/__init__.py b/src/interpreter/__init__.py index b28b04f..d97fbd7 100644 --- a/src/interpreter/__init__.py +++ b/src/interpreter/__init__.py @@ -1,3 +1,6 @@ +from .interpreter import Interpreter + +__all__ = ['Interpreter'] diff --git a/src/interpreter/interpreter.py b/src/interpreter/interpreter.py new file mode 100644 index 0000000..1b272f7 --- /dev/null +++ b/src/interpreter/interpreter.py @@ -0,0 +1,261 @@ +from ..lexer.token_types import TokenType +from ..utils.debug import debug, DebugLevel +from ..parser.ast import ( + AssignmentNode, + BinaryOpNode, + UnaryOpNode, + LiteralNode, + VariableNode, + ArrayNode, + ArrayAccessNode, + PrintNode, + IfNode, + WhileNode, + BlockNode, + FunctionDefNode, + FunctionCallNode, + ReturnNode, + BreakNode, + ContinueNode, + ForNode +) +import sys +sys.setrecursionlimit(10000) # Increase recursion limit + +class BreakException(Exception): + """Exception raised when a break statement is encountered.""" + pass + +class ContinueException(Exception): + """Exception raised when a continue statement is encountered.""" + pass + +class Interpreter: + def __init__(self, debug_mode=False): + self.symbol_table = {} + self.component_name = "Interpreter" + self.debug_mode = False # Always set to False + self.call_stack = [] # Stack for managing function calls + debug.level = DebugLevel.OFF # Always set to OFF + + def interpret(self, node): + """Interpret an AST node and return the result.""" + if node is None: + return None + + if isinstance(node, BlockNode): + result = None + for statement in node.statements: + result = self.interpret(statement) + return result + + elif isinstance(node, AssignmentNode): + if isinstance(node.target, str): + # Simple variable assignment + value = self.interpret(node.value) + self.symbol_table[node.target] = value + return value + elif isinstance(node.target, ArrayAccessNode): + # Array element assignment + array = self.interpret(node.target.array) + index = self.interpret(node.target.index) + value = self.interpret(node.value) + + if not isinstance(array, list): + raise TypeError(f"Cannot index into non-array: {array}") + + try: + idx = int(index) if isinstance(index, str) else index + except (ValueError, TypeError): + raise TypeError(f"Array index must be an integer, got {type(index)}") + + if not (0 <= idx < len(array)): + raise IndexError(f"Array index {idx} out of bounds") + + array[idx] = value + return value + else: + raise TypeError(f"Invalid assignment target: {node.target}") + + elif isinstance(node, BinaryOpNode): + left = self.interpret(node.left) + right = self.interpret(node.right) + + # Convert string operands to numbers if possible for arithmetic operations + if node.operator in {TokenType.PLUS, TokenType.MINUS, TokenType.MULTIPLY, TokenType.DIVIDE}: + try: + if isinstance(left, str): + left = int(left) + if isinstance(right, str): + right = int(right) + except ValueError: + pass # Keep as strings if conversion fails + + if node.operator == TokenType.PLUS: + result = left + right + elif node.operator == TokenType.MINUS: + result = left - right + elif node.operator == TokenType.MULTIPLY: + result = left * right + elif node.operator == TokenType.DIVIDE: + result = left / right + elif node.operator == TokenType.EQUALS: + result = left == right + elif node.operator == TokenType.NOT_EQUALS: + result = left != right + elif node.operator == TokenType.LESS_THAN: + result = left < right + elif node.operator == TokenType.GREATER_THAN: + result = left > right + elif node.operator == TokenType.LESS_EQUAL: + result = left <= right + elif node.operator == TokenType.GREATER_EQUAL: + result = left >= right + elif node.operator == TokenType.AND: + result = left and right + elif node.operator == TokenType.OR: + result = left or right + else: + raise ValueError(f"Unknown operator: {node.operator}") + + return result + + elif isinstance(node, UnaryOpNode): + operand = self.interpret(node.operand) + + if node.operator == TokenType.MINUS: + try: + if isinstance(operand, str): + operand = int(operand) + except ValueError: + pass + result = -operand + elif node.operator == TokenType.NOT: + result = not operand + else: + raise ValueError(f"Unknown operator: {node.operator}") + + return result + + elif isinstance(node, PrintNode): + value = self.interpret(node.expression) + print(value) + return value + + elif isinstance(node, VariableNode): + if node.name not in self.symbol_table: + raise NameError(f"Variable '{node.name}' is not defined") + return self.symbol_table[node.name] + + elif isinstance(node, LiteralNode): + return node.value + + elif isinstance(node, ArrayNode): + elements = [self.interpret(element) for element in node.elements] + return elements + + elif isinstance(node, ArrayAccessNode): + array = self.interpret(node.array) + index = self.interpret(node.index) + + if not isinstance(array, list): + raise TypeError(f"Cannot index into non-array: {array}") + + try: + idx = int(index) if isinstance(index, str) else index + except (ValueError, TypeError): + raise TypeError(f"Array index must be an integer, got {type(index)}") + + if not (0 <= idx < len(array)): + raise IndexError(f"Array index {idx} out of bounds") + + return array[idx] + + elif isinstance(node, IfNode): + condition = self.interpret(node.condition) + if condition: + return self.interpret(node.if_block) + elif node.else_block: + return self.interpret(node.else_block) + return None + + elif isinstance(node, WhileNode): + result = None + while self.interpret(node.condition): + try: + result = self.interpret(node.body) + except BreakException: + break + except ContinueException: + pass # Skip to next iteration + # Any cleanup code (like decrementing counters) should go here + return result + + elif isinstance(node, ForNode): + # Initialize + self.interpret(node.initializer) + + # Loop + result = None + while self.interpret(node.condition): + try: + result = self.interpret(node.body) + except BreakException: + break + except ContinueException: + pass # Continue to increment step + # Increment + self.interpret(node.increment) + return result + + elif isinstance(node, BreakNode): + raise BreakException() + + elif isinstance(node, ContinueNode): + raise ContinueException() + + elif isinstance(node, FunctionDefNode): + self.symbol_table[node.name] = node + return node + + elif isinstance(node, FunctionCallNode): + # Get the function definition + func = self.symbol_table.get(node.name) + if not func or not isinstance(func, FunctionDefNode): + raise NameError(f"Function '{node.name}' is not defined") + + if len(node.arguments) != len(func.parameters): + raise TypeError(f"Function '{node.name}' takes {len(func.parameters)} arguments but {len(node.arguments)} were given") + + # Evaluate arguments before binding them to parameters + arg_values = [self.interpret(arg) for arg in node.arguments] + + # Create a new symbol table for the function scope + new_symbol_table = self.symbol_table.copy() + + # Bind arguments to parameters in the new scope + for param, value in zip(func.parameters, arg_values): + new_symbol_table[param] = value + + # Save the current symbol table + old_symbol_table = self.symbol_table + + # Set the new symbol table + self.symbol_table = new_symbol_table + + try: + # Execute the function body + result = self.interpret(func.body) + finally: + # Restore the original symbol table + self.symbol_table = old_symbol_table + + return result + + elif isinstance(node, ReturnNode): + if node.value: + return self.interpret(node.value) + return None + + else: + raise TypeError(f"Unknown AST node type: {type(node)}") \ No newline at end of file diff --git a/src/lexer/__init__.py b/src/lexer/__init__.py index df6dba7..b883d4f 100644 --- a/src/lexer/__init__.py +++ b/src/lexer/__init__.py @@ -5,16 +5,14 @@ from .handler.string_handler import StringHandler from .handler.operator_handler import OperatorHandler from .handler.identifier_handler import IdentifierHandler -from .handler.miscellanious_handler import MiscellaneousHandler -from .token_types import TokenType +from .token_types import TokenType, Token class Lexer: """Main Lexer class to tokenize source code.""" def __init__(self): """Initialize all token handlers.""" - - + self.debug_mode = False self.handlers = [ WhiteSpaceHandler(), CommentHandler(), @@ -22,7 +20,6 @@ def __init__(self): StringHandler(), OperatorHandler(), IdentifierHandler(), - MiscellaneousHandler(), ] def tokenize(self, source): @@ -30,20 +27,23 @@ def tokenize(self, source): state = LexerState(source) while state.has_more_chars(): - # print(f"Processing: '{state.current_char}' at {state.position.index}") # Debug info + if self.debug_mode: + print(f"Processing: '{state.current_char()}' at {state.position}") + start_pos = state.position.copy() - handler_found = False + for handler in self.handlers: if handler.can_handle(state): - # print(f"Using handler: {handler.__class__.__name__}") # Debug info - handler.handle(state,start_pos) + if self.debug_mode: + print(f"Using handler: {handler.__class__.__name__}") + handler.handle(state, start_pos) handler_found = True break if not handler_found: - print(f"Current char: '{state.current_char()}', Position: {state.position.index}, Line: {state.position.line}, Column: {state.position.column}") #debugger - - raise Exception(f"Unexpected character '{state.current_char()}' at {start_pos}") + if self.debug_mode: + print(f"Current char: '{state.current_char()}', Position: {state.position}") + raise SyntaxError(f"Unexpected character '{state.current_char()}' at {start_pos}") return state.tokens diff --git a/src/lexer/handler/comment_handler.py b/src/lexer/handler/comment_handler.py index 61de944..15dfd8e 100644 --- a/src/lexer/handler/comment_handler.py +++ b/src/lexer/handler/comment_handler.py @@ -3,31 +3,49 @@ from ..state import LexerState, Position class CommentHandler(TokenHandler): - """Handles both single-line and multi-line comments.""" + """Handles single-line (#, //) and multi-line (/* */) comments.""" def can_handle(self, state): """Check if the current character starts a comment.""" - return state.match("#") or state.match("/*") + current = state.current_char() + if current is None: + return False + + # Check for // or /* first (they're two characters) + if current == '/': + next_char = state.peek() + return next_char in ['/', '*'] + # Then check for # + return current == '#' def handle(self, state: LexerState, start_pos: Position): """Extracts and handles a comment token.""" start_pos = state.position.copy() - - # Single-line comment (e.g., # This is a comment) - if state.match("#"): - state.advance() # Skip # - while not state.is_at_end() and not state.match("\n"): - state.advance() - return # Ignore comment (do not store it) + current = state.current_char() # Multi-line comment (e.g., /* This is a multi-line comment */) - if state.match("/*"): + if current == '/' and state.peek() == '*': state.advance(2) # Skip /* - while not state.is_at_end() and not state.match("*/"): + while not state.is_at_end() and not (state.current_char() == '*' and state.peek() == '/'): state.advance() - if state.match("*/"): + if not state.is_at_end(): state.advance(2) # Skip */ - return # Ignore comment (do not store it) + return # Ignore comment + + # Single-line comment (e.g., // This is a comment) + if current == '/' and state.peek() == '/': + state.advance(2) # Skip // + while not state.is_at_end() and state.current_char() != '\n': + state.advance() + return # Ignore comment + + # Single-line comment (e.g., # This is a comment) + if current == '#': + state.advance() # Skip # + while not state.is_at_end() and state.current_char() != '\n': + state.advance() + return # Ignore comment # If we reach here, something went wrong raise ValueError(f"Unrecognized comment at position {start_pos}") + diff --git a/src/lexer/handler/identifier_handler.py b/src/lexer/handler/identifier_handler.py index ce20047..acd1638 100644 --- a/src/lexer/handler/identifier_handler.py +++ b/src/lexer/handler/identifier_handler.py @@ -1,46 +1,49 @@ from ..token_types import TokenType from ..state import LexerState, Position from .base import TokenHandler +from src.utils.config import LanguageConfig class IdentifierHandler(TokenHandler): - """Handles identifiers and keywords like IF, FOR, RETURN, AND, OR.""" - - keywords = { - "for" : TokenType.FOR, - "if" : TokenType.IF, - "else" : TokenType.ELSE, - "while" : TokenType.WHILE, - "break" : TokenType.BREAK, - "continue" : TokenType.CONTINUE, - "return" : TokenType.RETURN, - "and" : TokenType.AND, - "or" : TokenType.OR, - "not" : TokenType.NOT, - "in" : TokenType.IN, - "not_in" : TokenType.NOT_IN, - "is" : TokenType.IS, - "is_not" : TokenType.IS_NOT - - } + """Handles identifiers and keywords.""" def __init__(self): - pass + super().__init__() + self.config = LanguageConfig.get_instance() def can_handle(self, state: LexerState): """Check if the current character starts an identifier (letter or underscore).""" - return state.current_char().isalpha() or state.current_char() == "_" + char = state.current_char() + return char is not None and (char.isalpha() or char == "_") - def handle(self, state:LexerState, start_pos: Position): + def handle(self, state: LexerState, start_pos: Position): """Processes identifiers and keywords.""" identifier = "" # Read while the character is alphanumeric or underscore - while state.current_char().isalnum() or state.current_char() == "_": - identifier += state.current_char() + while True: + char = state.current_char() + if char is None or not (char.isalnum() or char == "_"): + break + identifier += char state.advance() - # Determine token type keyword or identifier - token_type = self.keywords.get(identifier, TokenType.IDENTIFIER) + # Convert to lowercase for keyword comparison + lower_identifier = identifier.lower() + + # Get token type from configuration + token_type = self.config.get_token_type(lower_identifier) + if token_type is None: + token_type = TokenType.IDENTIFIER + + # Handle special cases + if token_type == TokenType.BOOLEAN: + value = lower_identifier == self.config.get_keyword(TokenType.BOOLEAN) + elif token_type == TokenType.NONE: + value = None + elif token_type == TokenType.IDENTIFIER: + value = identifier # Keep original case for identifiers + else: + value = lower_identifier # Use lowercase for keywords # Store the token - state.add_token(token_type, identifier, start_pos, identifier) \ No newline at end of file + state.add_token(token_type, value, start_pos, identifier) \ No newline at end of file diff --git a/src/lexer/handler/miscellanious_handler.py b/src/lexer/handler/miscellanious_handler.py deleted file mode 100644 index ca92242..0000000 --- a/src/lexer/handler/miscellanious_handler.py +++ /dev/null @@ -1,35 +0,0 @@ -from .base import TokenHandler -from ..token_types import TokenType -from ..state import LexerState,Position - -class MiscellaneousHandler(TokenHandler): - """Handles punctuation and special symbols.""" - - TOKEN_MAP = { - ",": TokenType.COMMA, - ".": TokenType.DOT, - ":": TokenType.COLON, - ";": TokenType.SEMICOLON, - "->": TokenType.ARROW, - "(": TokenType.LEFT_PAREN, - ")": TokenType.RIGHT_PAREN, - "{": TokenType.LEFT_BRACE, - "}": TokenType.RIGHT_BRACE, - "[": TokenType.LEFT_BRACKET, - "]": TokenType.RIGHT_BRACKET - } - - def can_handle(self, state): - """Check if the current character is a known miscellaneous symbol.""" - return state.current_char() in self.TOKEN_MAP - - def handle(self, state:LexerState, start_pos: Position): - """Extract and store the token.""" - start_pos = state.position.copy() - - - raw_value = state.current_char() - state.advance() - - token_type = self.TOKEN_MAP[raw_value] - state.add_token(token_type, raw_value, start_pos, raw_value) diff --git a/src/lexer/handler/operator_handler.py b/src/lexer/handler/operator_handler.py index ac312b5..5841342 100644 --- a/src/lexer/handler/operator_handler.py +++ b/src/lexer/handler/operator_handler.py @@ -23,7 +23,17 @@ class OperatorHandler(TokenHandler): "&&": TokenType.AND, "||": TokenType.OR, "!": TokenType.NOT, - "->": TokenType.ARROW + "->": TokenType.ARROW, + ",": TokenType.COMMA, + ".": TokenType.DOT, + ":": TokenType.COLON, + ";": TokenType.SEMICOLON, + "(": TokenType.LEFT_PAREN, + ")": TokenType.RIGHT_PAREN, + "{": TokenType.LEFT_BRACE, + "}": TokenType.RIGHT_BRACE, + "[": TokenType.LEFT_BRACKET, + "]": TokenType.RIGHT_BRACKET } def can_handle(self, state): diff --git a/src/lexer/handler/whitespace_hadler.py b/src/lexer/handler/whitespace_hadler.py index 34be16e..a9ecf93 100644 --- a/src/lexer/handler/whitespace_hadler.py +++ b/src/lexer/handler/whitespace_hadler.py @@ -3,7 +3,7 @@ from .base import TokenHandler class WhiteSpaceHandler(TokenHandler): - def __init__(self, store_whitespace=True): + def __init__(self, store_whitespace=False): self.store_whitespace = store_whitespace def can_handle(self, state: LexerState) -> bool: diff --git a/src/lexer/handler/whitespace_handler.py b/src/lexer/handler/whitespace_handler.py new file mode 100644 index 0000000..a9ecf93 --- /dev/null +++ b/src/lexer/handler/whitespace_handler.py @@ -0,0 +1,39 @@ +from ..token_types import TokenType +from ..state import LexerState +from .base import TokenHandler + +class WhiteSpaceHandler(TokenHandler): + def __init__(self, store_whitespace=False): + self.store_whitespace = store_whitespace + + def can_handle(self, state: LexerState) -> bool: + return state.current_char() in {' ', '\t', '\n', '\r'} + + def handle(self, state: LexerState, start_pos): + if self.store_whitespace: + raw_value = '' + + while state.has_more_chars() and state.current_char() in {' ', '\t', '\n', '\r'}: + # Handle Windows-style line endings (\r\n) + if state.current_char() == '\r' and state.peek(1) == '\n': + if self.store_whitespace: + raw_value += '\r\n' + state.position.line += 1 + state.position.column = 0 + state.advance(2) # Skip both \r and \n + # Handle Unix-style line endings (\n) + elif state.current_char() == '\n': + if self.store_whitespace: + raw_value += state.current_char() + state.position.line += 1 + state.position.column = 0 + state.advance() + # Handle other whitespace + else: + if self.store_whitespace: + raw_value += state.current_char() + state.position.column += 1 + state.advance() + + if self.store_whitespace: + state.add_token(TokenType.WHITESPACE, raw_value, start_pos, raw_value) \ No newline at end of file diff --git a/src/lexer/state.py b/src/lexer/state.py index b7bf9fc..f47052f 100644 --- a/src/lexer/state.py +++ b/src/lexer/state.py @@ -20,36 +20,42 @@ def advance(self, char): else: self.column += 1 + def __str__(self): + return f"line {self.line}, column {self.column}" + class LexerState: """Manages the current state of the lexer, including character position and tokens.""" def __init__(self, source: str): self.source = source self.position = Position() self.tokens = [] - # Don't store initial character, get it from source when needed def current_char(self): """Returns the current character being processed or None if at end.""" - if self.position.index >= len(self.source): + if self.is_at_end(): return None return self.source[self.position.index] - def has_more_chars(self) -> bool: + def is_at_end(self): + """Checks if we've reached the end of the source.""" + return self.position.index >= len(self.source) + + def has_more_chars(self): """Checks if there are more characters left to process.""" - return self.position.index < len(self.source) + return not self.is_at_end() def next_char(self): """Returns the next character in the source code or None if at end.""" - next_idx = self.position.index + 1 - if next_idx >= len(self.source): + if self.is_at_end() or self.position.index + 1 >= len(self.source): return None - return self.source[next_idx] + return self.source[self.position.index + 1] def advance(self, steps=1): """Moves forward in the source code by a given number of characters.""" for _ in range(steps): if self.has_more_chars(): - self.position.advance(self.current_char()) + char = self.current_char() + self.position.advance(char) def peek(self, offset=1): """Looks ahead in the source without advancing the position.""" @@ -60,14 +66,20 @@ def peek(self, offset=1): def match(self, text): """Checks if the next characters match the given text and advances if true.""" + if self.position.index + len(text) > len(self.source): + return False + if self.source[self.position.index:self.position.index + len(text)] == text: - for _ in range(len(text)): + for _ in range(len(text)): self.advance() return True return False - def add_token(self, token_type, value, start_pos, raw): + def add_token(self, token_type, value, start_pos, raw=None): """Creates and stores a new token.""" + if raw is None: + raw = str(value) + token = Token( token_type=token_type, value=value, diff --git a/src/lexer/token_types/__init__.py b/src/lexer/token_types/__init__.py index 071906e..323fac9 100644 --- a/src/lexer/token_types/__init__.py +++ b/src/lexer/token_types/__init__.py @@ -4,6 +4,7 @@ class TokenType(Enum): # Commands PRINT_COMMAND = "PRINT_COMMAND" VARIABLE_DECLARE = "VARIABLE_DECLARE" + FUNCTION = "FUNCTION" # Basic Data Types IDENTIFIER = "IDENTIFIER" @@ -65,14 +66,16 @@ class TokenType(Enum): ASSIGN = "ASSIGN" ARROW = "ARROW" WHITESPACE = "WHITESPACE" + NEWLINE = "NEWLINE" class Position: - def __init__(self, line, column): + def __init__(self, line, column, index=0): self.line = line self.column = column + self.index = index def copy(self): - return Position(self.line, self.column) + return Position(self.line, self.column, self.index) def __str__(self): return f"line {self.line}, column {self.column}" @@ -83,7 +86,10 @@ def __init__(self, token_type: TokenType, value, start_pos: Position, end_pos: P self.value = value self.start_pos = start_pos self.end_pos = end_pos - self.raw = raw if raw is not None else value + self.raw = raw if raw is not None else str(value) def __repr__(self): - return f"Token({self.token_type}, {self.value}, {self.start_pos}, {self.end_pos})" \ No newline at end of file + return f"Token({self.token_type}, {repr(self.value)}, {self.start_pos}, {self.end_pos})" + + def __str__(self): + return f"{self.token_type}({repr(self.value)})" \ No newline at end of file diff --git a/src/parser/ast/AssignmentNode.py b/src/parser/ast/AssignmentNode.py new file mode 100644 index 0000000..ae12071 --- /dev/null +++ b/src/parser/ast/AssignmentNode.py @@ -0,0 +1,12 @@ +from .base_node import BaseNode + +class AssignmentNode(BaseNode): + """AST node for variable assignments.""" + + def __init__(self, target, value): + super().__init__("Assignment") + self.target = target # Can be a string (variable name) or ArrayAccessNode + self.value = value + + def __repr__(self): + return f"Assignment({self.target} = {self.value})" diff --git a/src/parser/ast/BinaryOpNode.py b/src/parser/ast/BinaryOpNode.py new file mode 100644 index 0000000..43bc652 --- /dev/null +++ b/src/parser/ast/BinaryOpNode.py @@ -0,0 +1,121 @@ +from .base_node import BaseNode +from .variable_node import VariableNode +from .literal_node import LiteralNode +from .array_node import ArrayNode, ArrayAccessNode +from ...lexer.token_types import TokenType + +class BinaryOpNode(BaseNode): + """AST node for binary operations.""" + + def __init__(self, left, operator, right, symbol_table=None): + super().__init__("BinaryOp") + self.left = self.resolve(left, symbol_table) + self.operator = operator + self.right = self.resolve(right, symbol_table) + self.interpreter = None # Will be set by the interpreter + + def resolve(self, node, symbol_table): + if isinstance(node, VariableNode) and symbol_table: + return symbol_table.get(node.name, node) # Lookup value if exists + return node + + def get_numeric_value(self, node): + """Get the numeric value from a node, handling both direct values and array operations.""" + if self.interpreter: + # If we have an interpreter, use it to evaluate the node + value = self.interpreter.execute(node) + else: + # Fallback to direct value extraction + value = node.value if isinstance(node, LiteralNode) else None + + if isinstance(value, (int, float, bool)): + return value + elif isinstance(value, list): + return value + elif isinstance(value, str): + try: + return float(value) if '.' in value else int(value) + except ValueError: + return value + return None + + def evaluate(self): + """Evaluate the binary operation.""" + left_value = self.get_numeric_value(self.left) + right_value = self.get_numeric_value(self.right) + + if left_value is None or right_value is None: + return None + + # Handle array operations + if isinstance(left_value, list): + if self.operator == TokenType.PLUS: + # Array concatenation + if isinstance(right_value, list): + result = left_value + right_value + else: + result = left_value + [right_value] + return LiteralNode(result) + elif self.operator == TokenType.MULTIPLY and isinstance(right_value, (int, float)): + # Array repetition + result = left_value * int(right_value) + return LiteralNode(result) + elif self.operator == TokenType.IN: + # Array membership test + return LiteralNode(right_value in left_value) + else: + raise TypeError(f"Unsupported array operation: {self.operator}") + + # Convert string operands to numbers if possible + if isinstance(left_value, str): + try: + left_value = float(left_value) if '.' in left_value else int(left_value) + except ValueError: + pass + + if isinstance(right_value, str): + try: + right_value = float(right_value) if '.' in right_value else int(right_value) + except ValueError: + pass + + # Handle numeric operations + if self.operator == TokenType.PLUS: + result = left_value + right_value + elif self.operator == TokenType.MINUS: + result = left_value - right_value + elif self.operator == TokenType.MULTIPLY: + result = left_value * right_value + elif self.operator == TokenType.DIVIDE: + if right_value == 0: + raise ZeroDivisionError("Division by zero") + result = left_value / right_value + elif self.operator == TokenType.MODULO: + if right_value == 0: + raise ZeroDivisionError("Modulo by zero") + result = left_value % right_value + # Comparison operators + elif self.operator == TokenType.EQUALS: + result = left_value == right_value + elif self.operator == TokenType.NOT_EQUALS: + result = left_value != right_value + elif self.operator == TokenType.LESS_THAN: + result = left_value < right_value + elif self.operator == TokenType.LESS_EQUAL: + result = left_value <= right_value + elif self.operator == TokenType.GREATER_THAN: + result = left_value > right_value + elif self.operator == TokenType.GREATER_EQUAL: + result = left_value >= right_value + # Logical operators + elif self.operator == TokenType.AND: + result = bool(left_value) and bool(right_value) + elif self.operator == TokenType.OR: + result = bool(left_value) or bool(right_value) + else: + raise ValueError(f"Unsupported operator: {self.operator}") + + return LiteralNode(result) + + def __repr__(self): + return f"BinaryOp({self.left} {self.operator} {self.right})" diff --git a/src/parser/ast/FunctionCallNode.py b/src/parser/ast/FunctionCallNode.py new file mode 100644 index 0000000..36f1f0c --- /dev/null +++ b/src/parser/ast/FunctionCallNode.py @@ -0,0 +1,12 @@ +from .base_node import BaseNode + +class FunctionCallNode(BaseNode): + """AST node for function calls.""" + + def __init__(self, name, arguments): + super().__init__("FunctionCall") + self.name = name + self.arguments = arguments + + def __repr__(self): + return f"FunctionCall({self.name}, {self.arguments})" diff --git a/src/parser/ast/FunctionDefNode.py b/src/parser/ast/FunctionDefNode.py new file mode 100644 index 0000000..6744e0d --- /dev/null +++ b/src/parser/ast/FunctionDefNode.py @@ -0,0 +1,13 @@ +from .base_node import BaseNode + +class FunctionDefNode(BaseNode): + """AST node for function definitions.""" + + def __init__(self, name, parameters, body): + super().__init__("FunctionDef") + self.name = name + self.parameters = parameters + self.body = body + + def __repr__(self): + return f"FunctionDef({self.name}({self.parameters}) -> {self.body})" diff --git a/src/parser/ast/ReturnNode.py b/src/parser/ast/ReturnNode.py new file mode 100644 index 0000000..fdfc53f --- /dev/null +++ b/src/parser/ast/ReturnNode.py @@ -0,0 +1,11 @@ +from .base_node import BaseNode + +class ReturnNode(BaseNode): + """AST node for return statements.""" + + def __init__(self, value): + super().__init__("Return") + self.value = value + + def __repr__(self): + return f"Return({self.value})" diff --git a/src/parser/ast/UnaryOpNode.py b/src/parser/ast/UnaryOpNode.py new file mode 100644 index 0000000..3fdf54f --- /dev/null +++ b/src/parser/ast/UnaryOpNode.py @@ -0,0 +1,39 @@ +from .base_node import BaseNode +from .literal_node import LiteralNode +from ...lexer.token_types import TokenType + +class UnaryOpNode(BaseNode): + """AST node for unary operations.""" + + def __init__(self, operator, operand): + super().__init__("UnaryOp") + self.operator = operator + self.operand = operand + self.interpreter = None # Will be set by the interpreter + + def evaluate(self): + """Evaluate the unary operation.""" + if self.interpreter: + operand = self.interpreter.execute(self.operand) + else: + operand = self.operand.value if hasattr(self.operand, 'value') else self.operand + + if isinstance(operand, str): + try: + operand = float(operand) if '.' in operand else int(operand) + except ValueError: + pass + + if self.operator == TokenType.NOT: + result = not bool(operand) + elif self.operator == TokenType.MINUS: + if not isinstance(operand, (int, float)): + raise TypeError(f"Cannot apply unary minus to {type(operand)}") + result = -operand + else: + raise RuntimeError(f"Unknown unary operator: {self.operator}") + + return LiteralNode(result) + + def __repr__(self): + return f"UnaryOp({self.operator} {self.operand})" diff --git a/src/parser/ast/__init__.py b/src/parser/ast/__init__.py new file mode 100644 index 0000000..8963d09 --- /dev/null +++ b/src/parser/ast/__init__.py @@ -0,0 +1,40 @@ +from .base_node import BaseNode +from .array_node import ArrayNode, ArrayAccessNode +from .block_node import BlockNode +from .control_flow_nodes import ( + BreakNode, + ContinueNode, + ForNode, + WhileNode, + IfNode +) +from .BinaryOpNode import BinaryOpNode +from .UnaryOpNode import UnaryOpNode +from .literal_node import LiteralNode +from .variable_node import VariableNode +from .print_node import PrintNode +from .ReturnNode import ReturnNode +from .AssignmentNode import AssignmentNode +from .FunctionCallNode import FunctionCallNode +from .FunctionDefNode import FunctionDefNode + +__all__ = [ + 'BaseNode', + 'ArrayNode', + 'ArrayAccessNode', + 'BlockNode', + 'BreakNode', + 'ContinueNode', + 'ForNode', + 'WhileNode', + 'IfNode', + 'BinaryOpNode', + 'UnaryOpNode', + 'LiteralNode', + 'VariableNode', + 'PrintNode', + 'ReturnNode', + 'AssignmentNode', + 'FunctionCallNode', + 'FunctionDefNode' +] \ No newline at end of file diff --git a/src/parser/ast/array_node.py b/src/parser/ast/array_node.py new file mode 100644 index 0000000..736dfa2 --- /dev/null +++ b/src/parser/ast/array_node.py @@ -0,0 +1,18 @@ +from .base_node import BaseNode + +class ArrayNode(BaseNode): + def __init__(self, elements): + super().__init__("Array") + self.elements = elements + + def __str__(self): + return f"Array[{', '.join(str(e) for e in self.elements)}]" + +class ArrayAccessNode(BaseNode): + def __init__(self, array, index): + super().__init__("ArrayAccess") + self.array = array + self.index = index + + def __str__(self): + return f"{self.array}[{self.index}]" \ No newline at end of file diff --git a/src/parser/ast/base_node.py b/src/parser/ast/base_node.py new file mode 100644 index 0000000..0f2ec17 --- /dev/null +++ b/src/parser/ast/base_node.py @@ -0,0 +1,8 @@ +class BaseNode: + """Base class for all AST nodes.""" + + def __init__(self, node_type): + self.node_type = node_type + + def __repr__(self): + return f"{self.node_type}()" diff --git a/src/parser/ast/block_node.py b/src/parser/ast/block_node.py new file mode 100644 index 0000000..67234de --- /dev/null +++ b/src/parser/ast/block_node.py @@ -0,0 +1,10 @@ +from .base_node import BaseNode + +class BlockNode(BaseNode): + def __init__(self, statements): + super().__init__("Block") + self.statements = statements + + def __str__(self): + statements_str = '\n '.join(str(stmt) for stmt in self.statements) + return f"Block[\n {statements_str}\n]" \ No newline at end of file diff --git a/src/parser/ast/control_flow_nodes.py b/src/parser/ast/control_flow_nodes.py new file mode 100644 index 0000000..4dff6d9 --- /dev/null +++ b/src/parser/ast/control_flow_nodes.py @@ -0,0 +1,48 @@ +from .base_node import BaseNode + +class BreakNode(BaseNode): + def __init__(self): + super().__init__("Break") + + def __str__(self): + return "Break" + +class ContinueNode(BaseNode): + def __init__(self): + super().__init__("Continue") + + def __str__(self): + return "Continue" + +class ForNode(BaseNode): + def __init__(self, init, condition, update, body): + super().__init__("For") + self.init = init + self.condition = condition + self.update = update + self.body = body + + def __str__(self): + return f"For({self.init}; {self.condition}; {self.update}) {self.body}" + +class WhileNode(BaseNode): + def __init__(self, condition, body): + super().__init__("While") + self.condition = condition + self.body = body + + def __str__(self): + return f"While({self.condition}) {self.body}" + +class IfNode(BaseNode): + def __init__(self, condition, if_block, else_block=None): + super().__init__("If") + self.condition = condition + self.if_block = if_block + self.else_block = else_block + + def __str__(self): + result = f"If({self.condition}) {self.if_block}" + if self.else_block: + result += f" Else {self.else_block}" + return result \ No newline at end of file diff --git a/src/parser/ast/literal_node.py b/src/parser/ast/literal_node.py new file mode 100644 index 0000000..2785ba5 --- /dev/null +++ b/src/parser/ast/literal_node.py @@ -0,0 +1,11 @@ +from .base_node import BaseNode + +class LiteralNode(BaseNode): + """AST node representing literal values.""" + + def __init__(self, value): + super().__init__("Literal") + self.value = value + + def __repr__(self): + return f"Literal({self.value})" diff --git a/src/parser/ast/print_node.py b/src/parser/ast/print_node.py new file mode 100644 index 0000000..860bbae --- /dev/null +++ b/src/parser/ast/print_node.py @@ -0,0 +1,11 @@ +from .base_node import BaseNode + +class PrintNode(BaseNode): + """AST node for print statements.""" + + def __init__(self, expression): + super().__init__("Print") + self.expression = expression + + def __repr__(self): + return f"Print({self.expression})" diff --git a/src/parser/ast/variable_node.py b/src/parser/ast/variable_node.py new file mode 100644 index 0000000..96c1a35 --- /dev/null +++ b/src/parser/ast/variable_node.py @@ -0,0 +1,14 @@ +from .base_node import BaseNode + +class VariableNode(BaseNode): + """AST node representing variable declarations.""" + + def __init__(self, name, value=None, position=None): + super().__init__("VariableDeclaration") + self.name = name + self.value = value + self.position = position + + def __repr__(self): + return f"Variable({self.name} = {self.value})" + diff --git a/src/parser/main_parser.py b/src/parser/main_parser.py new file mode 100644 index 0000000..e347707 --- /dev/null +++ b/src/parser/main_parser.py @@ -0,0 +1,327 @@ +from .parsers.expression_parser import ExpressionParser +from .parsers.symbol_table import SymbolTable +from .ast import ( + AssignmentNode, + BinaryOpNode, + FunctionCallNode, + IfNode, + LiteralNode, + PrintNode, + ReturnNode, + UnaryOpNode, + VariableNode, + WhileNode, + BlockNode, + ArrayNode, + ArrayAccessNode, + FunctionDefNode, + BreakNode, + ContinueNode, + ForNode +) +from src.lexer.token_types import TokenType +from ..utils.debug import debug, DebugLevel + +# Disable debugging +debug.level = DebugLevel.ERROR + +class MainParser: + DEBUG_MODE = False + + def __init__(self, tokens): + self.tokens = tokens + self.current = 0 + self.symbol_table = SymbolTable() + self.component_name = "MainParser" + + def parse(self): + """Parse the token stream and return an AST.""" + statements = [] + + while not self.is_at_end(): + statement = self.parse_statement() + if statement: + statements.append(statement) + else: + self.synchronize() + if not self.is_at_end(): + self.advance() + + return BlockNode(statements) + + def parse_statement(self): + """Parse a statement.""" + current = self.current_token() + + statement = None + if current.token_type == TokenType.PRINT_COMMAND: + statement = self.parse_print_statement() + elif current.token_type == TokenType.LEFT_BRACE: + statement = self.parse_block() + elif current.token_type == TokenType.FUNCTION: + statement = self.parse_function_definition() + elif current.token_type == TokenType.RETURN: + statement = self.parse_return() + elif current.token_type == TokenType.IF: + statement = self.parse_if() + elif current.token_type == TokenType.WHILE: + statement = self.parse_while() + elif current.token_type == TokenType.FOR: + statement = self.parse_for() + elif current.token_type == TokenType.BREAK: + statement = self.parse_break() + elif current.token_type == TokenType.CONTINUE: + statement = self.parse_continue() + elif current.token_type == TokenType.VARIABLE_DECLARE: + statement = self.parse_variable_declaration() + else: + statement = self.parse_expression_statement() + + # Consume any trailing semicolon + if self.check(TokenType.SEMICOLON): + self.consume(TokenType.SEMICOLON) + + # Skip any whitespace after the statement + while self.check(TokenType.WHITESPACE, TokenType.NEWLINE): + self.advance() + + return statement + + def parse_function_definition(self): + """Parse a function definition.""" + self.consume(TokenType.FUNCTION) + + name_token = self.consume(TokenType.IDENTIFIER) + name = name_token.value + + self.consume(TokenType.LEFT_PAREN) + parameters = [] + + if not self.check(TokenType.RIGHT_PAREN): + while True: + param_token = self.consume(TokenType.IDENTIFIER) + parameters.append(param_token.value) + if not self.match(TokenType.COMMA): + break + + self.consume(TokenType.RIGHT_PAREN) + body = self.parse_block() + + return FunctionDefNode(name, parameters, body) + + def parse_block(self): + """Parse a block of statements.""" + self.consume(TokenType.LEFT_BRACE) + statements = [] + + while not self.check(TokenType.RIGHT_BRACE) and not self.is_at_end(): + statement = self.parse_statement() + if statement: + statements.append(statement) + + self.consume(TokenType.RIGHT_BRACE) + return BlockNode(statements) + + def parse_if(self): + """Parse an if statement.""" + self.consume(TokenType.IF) + + condition = self.parse_expression() + if_block = self.parse_block() + else_block = None + + if self.match(TokenType.ELSE): + else_block = self.parse_block() + + return IfNode(condition, if_block, else_block) + + def parse_while(self): + """Parse a while statement.""" + self.consume(TokenType.WHILE) + + condition = self.parse_expression() + body = self.parse_block() + + return WhileNode(condition, body) + + def parse_expression(self): + """Parse an expression.""" + expr_parser = ExpressionParser(self.tokens[self.current:]) + expr, consumed = expr_parser.parse() + self.current += consumed + return expr + + def parse_expression_statement(self): + """Parse an expression statement.""" + expr = self.parse_expression() + if self.check(TokenType.SEMICOLON): + self.consume(TokenType.SEMICOLON) + return expr + + def parse_print_statement(self): + """Parse a print statement.""" + self.consume(TokenType.PRINT_COMMAND) + expression = self.parse_expression() + return PrintNode(expression) + + def parse_return(self): + """Parse a return statement.""" + self.consume(TokenType.RETURN) + value = None + if not self.check(TokenType.RIGHT_BRACE): + value = self.parse_expression() + return ReturnNode(value) + + def parse_break(self): + """Parse a break statement.""" + self.consume(TokenType.BREAK) + return BreakNode() + + def parse_continue(self): + """Parse a continue statement.""" + self.consume(TokenType.CONTINUE) + return ContinueNode() + + def parse_variable_declaration(self): + """Parse a variable declaration.""" + self.consume(TokenType.VARIABLE_DECLARE) + name_token = self.consume(TokenType.IDENTIFIER) + name = name_token.value + + if not self.match(TokenType.ASSIGN): + return None + + value = self.parse_expression() + return AssignmentNode(name, value) + + def is_at_end(self): + """Check if we've reached the end of the token stream.""" + return self.current >= len(self.tokens) + + def current_token(self): + """Get the current token.""" + if self.is_at_end(): + return self.tokens[-1] + return self.tokens[self.current] + + def previous(self): + """Get the previous token.""" + return self.tokens[self.current - 1] + + def advance(self): + """Advance to the next token.""" + if not self.is_at_end(): + self.current += 1 + return self.previous() + + def match(self, *types): + """Match and consume a token if it matches any of the given types.""" + for t in types: + if self.check(t): + self.advance() + return True + return False + + def check(self, *types): + """Check if the current token is of any of the given types.""" + if self.is_at_end(): + return False + return self.current_token().token_type in types + + def consume(self, expected_type, error_message=None): + """Consume a token of the expected type.""" + if error_message is None: + error_message = f"Expected {expected_type}" + + if self.check(expected_type): + return self.advance() + + raise SyntaxError(f"{error_message}, got {self.current_token()}") + + def synchronize(self): + """Skip tokens until we find a statement boundary.""" + self.advance() + + while not self.is_at_end(): + if self.previous().token_type == TokenType.SEMICOLON: + return + + if self.current_token().token_type in { + TokenType.FUNCTION, + TokenType.VARIABLE_DECLARE, + TokenType.FOR, + TokenType.IF, + TokenType.WHILE, + TokenType.PRINT_COMMAND, + TokenType.RETURN, + }: + return + + self.advance() + + def resolve_expression(self, expr): + """ Recursively resolve VariableNode and BinaryOpNode expressions """ + if isinstance(expr, VariableNode): + value = self.symbol_table.get(expr.name) + if value is not None: + return value + return expr # If the variable is undefined, return the node itself + + elif isinstance(expr, BinaryOpNode): + expr.left = self.resolve_expression(expr.left) + expr.right = self.resolve_expression(expr.right) + + if isinstance(expr.left, LiteralNode) and isinstance(expr.right, LiteralNode): + result = expr.evaluate() + return result + + elif isinstance(expr, ArrayAccessNode): + array = self.resolve_expression(expr.array) + index = self.resolve_expression(expr.index) + + # Convert string index to integer if needed + if isinstance(index, LiteralNode): + try: + idx = int(str(index.value)) # Convert any numeric string to int + except (ValueError, TypeError): + raise TypeError(f"Array index must be an integer, got {type(index.value)}") + else: + raise TypeError(f"Array index must be a literal value") + + if isinstance(array, (ArrayNode, LiteralNode)): + if isinstance(array, ArrayNode): + if 0 <= idx < len(array.elements): + return array.elements[idx] + else: + raise IndexError(f"Array index {idx} out of bounds") + elif isinstance(array.value, list): + if 0 <= idx < len(array.value): + return LiteralNode(array.value[idx]) + else: + raise IndexError(f"Array index {idx} out of bounds") + else: + raise TypeError(f"Cannot index into non-array: {array}") + + elif isinstance(expr, ArrayNode): + # Resolve each element in the array + resolved_elements = [] + for element in expr.elements: + resolved = self.resolve_expression(element) + if isinstance(resolved, LiteralNode): + resolved_elements.append(resolved) + else: + resolved_elements.append(LiteralNode(resolved)) + expr.elements = resolved_elements + return expr + + return expr + + def evaluate_binary_operation(self, left, operator, right): + """Evaluate a binary operation using the BinaryOpNode's evaluate method.""" + binary_op = BinaryOpNode(left, operator, right) + return binary_op.evaluate() + + def consume_optional_separators(self): + """Consume any optional statement separators (semicolons, whitespace, newlines).""" + while self.check(TokenType.SEMICOLON, TokenType.WHITESPACE, TokenType.NEWLINE): + self.advance() diff --git a/src/parser/parsers/__init__.py b/src/parser/parsers/__init__.py new file mode 100644 index 0000000..05737c5 --- /dev/null +++ b/src/parser/parsers/__init__.py @@ -0,0 +1,7 @@ +from .expression_parser import ExpressionParser +from .base_expression_parser import BaseExpressionParser + +__all__ = [ + 'ExpressionParser', + 'BaseExpressionParser' +] diff --git a/src/parser/parsers/array_parser.py b/src/parser/parsers/array_parser.py new file mode 100644 index 0000000..21a12e2 --- /dev/null +++ b/src/parser/parsers/array_parser.py @@ -0,0 +1,75 @@ +from ...lexer.token_types import TokenType +from ..ast import ArrayNode, ArrayAccessNode +from .base_expression_parser import BaseExpressionParser +from ...utils.debug import DebugLevel + +class ArrayParser(BaseExpressionParser): + def parse_array_literal(self): + """Parse an array literal: [expr1, expr2, ...]""" + self.debug("Parsing array literal", DebugLevel.DEBUG) + + if not self.match(TokenType.LEFT_BRACKET): + return None, 0 + + elements = [] + start_pos = self.current + + while not self.check(TokenType.RIGHT_BRACKET): + if len(elements) > 0: + if not self.match(TokenType.COMMA): + error_msg = "Expected ',' between array elements" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + self.skip_whitespace() + + expr_parser = self.get_expression_parser() + expression, consumed = expr_parser.parse() + + if not expression: + if len(elements) == 0 and self.check(TokenType.RIGHT_BRACKET): + break # Empty array + error_msg = "Expected expression in array" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + elements.append(expression) + self.current += consumed + self.skip_whitespace() + + if not self.match(TokenType.RIGHT_BRACKET): + error_msg = "Expected ']' after array elements" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + return ArrayNode(elements), self.current - start_pos + + def parse_array_access(self, array_expr): + """Parse array access: array[index]""" + self.debug("Parsing array access", DebugLevel.DEBUG) + + if not self.match(TokenType.LEFT_BRACKET): + return None, 0 + + self.skip_whitespace() + expr_parser = self.get_expression_parser() + index_expr, consumed = expr_parser.parse() + + if not index_expr: + error_msg = "Expected index expression in array access" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + self.current += consumed + self.skip_whitespace() + + if not self.match(TokenType.RIGHT_BRACKET): + error_msg = "Expected ']' after array index" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + return ArrayAccessNode(array_expr, index_expr), self.current + + def get_expression_parser(self): + """Get a new expression parser starting from current position""" + from .expression_parser import ExpressionParser + return ExpressionParser(self.tokens[self.current:]) \ No newline at end of file diff --git a/src/parser/parsers/assignment_parser.py b/src/parser/parsers/assignment_parser.py new file mode 100644 index 0000000..b50e967 --- /dev/null +++ b/src/parser/parsers/assignment_parser.py @@ -0,0 +1,59 @@ +from .base_parser import BaseParser +from ..ast import AssignmentNode +from ...lexer.token_types import TokenType +from .expression_parser import ExpressionParser + +class AssignmentParser(BaseParser): + def __init__(self, tokens): + super().__init__(tokens) + self.debug_mode = True + + def debug(self, message): + if self.debug_mode: + print(f"[AssignmentParser DEBUG] {message}") + + def parse(self): + start_pos = self.current + + # Get the identifier token + identifier_token = self.tokens[self.current] + self.debug(f"Parsing assignment for: {identifier_token.value}") + self.advance() # Move past identifier + + # Skip any whitespace + while self.current < len(self.tokens) and self.tokens[self.current].token_type == TokenType.WHITESPACE: + self.advance() + + # Consume the assignment operator + if self.current >= len(self.tokens) or self.tokens[self.current].token_type != TokenType.ASSIGN: + raise SyntaxError(f"Expected '=' after identifier, got {self.tokens[self.current].token_type if self.current < len(self.tokens) else 'END OF TOKENS'}.") + self.advance() # Move past '=' + + # Skip any whitespace + while self.current < len(self.tokens) and self.tokens[self.current].token_type == TokenType.WHITESPACE: + self.advance() + + # Debug print all tokens from current position + self.debug("Tokens for parsing:") + for i, token in enumerate(self.tokens[self.current:]): + self.debug(f"{i}: {token}") + + # Create an expression parser using the current token index + expression_parser = ExpressionParser(self.tokens[self.current:]) + expression, expr_consumed = expression_parser.parse() + + # Verify expression was parsed + if expression is None: + raise SyntaxError("Could not parse expression in assignment") + + # Update current position based on tokens consumed by expression parser + self.current += expr_consumed + + # Calculate total tokens consumed + consumed = self.current - start_pos + + self.debug(f"Successfully parsed assignment: {identifier_token.value} = {expression}") + self.debug(f"Tokens consumed: {consumed}") + + # Return both the node and tokens consumed + return AssignmentNode(identifier_token.value, expression), consumed \ No newline at end of file diff --git a/src/parser/parsers/base_expression_parser.py b/src/parser/parsers/base_expression_parser.py new file mode 100644 index 0000000..eb9b03f --- /dev/null +++ b/src/parser/parsers/base_expression_parser.py @@ -0,0 +1,61 @@ +from .base_parser import BaseParser +from ...lexer.token_types import TokenType +from ...utils.debug import debug, DebugLevel + +class BaseExpressionParser(BaseParser): + def __init__(self, tokens): + super().__init__(tokens) + self.component_name = self.__class__.__name__ + + def debug(self, message: str, level: DebugLevel = DebugLevel.DEBUG): + debug.log(level, self.component_name, message) + + def skip_whitespace(self): + while self.check(TokenType.WHITESPACE): + self.advance() + debug.trace(self.component_name, "Skipped whitespace") + + def match(self, *types): + self.skip_whitespace() + for token_type in types: + if self.check(token_type): + self.advance() + debug.trace(self.component_name, f"Matched token type: {token_type}") + return True + return False + + def check(self, *types): + if self.is_at_end(): + return False + current_token = self.peek() + return any(current_token.token_type == t for t in types) + + def advance(self): + if not self.is_at_end(): + self.current += 1 + debug.trace(self.component_name, f"Advanced to token: {self.peek()}") + return self.previous() + + def current_token(self): + if self.is_at_end(): + raise SyntaxError("Unexpected end of tokens") + return self.tokens[self.current] + + def previous(self): + return self.tokens[self.current - 1] + + def is_at_end(self): + return self.current >= len(self.tokens) + + def peek(self): + if self.is_at_end(): + return None + return self.tokens[self.current] + + def consume(self, token_type, error_message=None): + if self.check(token_type): + debug.debug(self.component_name, f"Consumed token: {self.peek()}") + return self.advance() + if error_message is None: + error_message = f"Expected {token_type}" + raise SyntaxError(error_message) \ No newline at end of file diff --git a/src/parser/parsers/base_parser.py b/src/parser/parsers/base_parser.py new file mode 100644 index 0000000..0cf6ff6 --- /dev/null +++ b/src/parser/parsers/base_parser.py @@ -0,0 +1,46 @@ +from ...lexer.token_types import TokenType + +class BaseParser: + """Base class for all parsers.""" + + def __init__(self, tokens): + self.tokens = tokens + self.current = 0 + + def peek(self): + """Peek at the current token.""" + if self.current < len(self.tokens): + return self.tokens[self.current] + return None + + def consume(self, expected_type, error_message): + """Consume a token of the expected type or raise an error.""" + self.skip_whitespace() + + if self.current < len(self.tokens) and self.tokens[self.current].token_type == expected_type: + token = self.tokens[self.current] + self.advance() + return token + + raise SyntaxError(error_message) + + def match(self, token_type): + """Check if the current token matches the expected type and advance if it does.""" + if self.peek() is not None and self.peek().token_type == token_type: + self.advance() + return True + return False + + def advance(self): + """Move to the next token.""" + if self.current < len(self.tokens): + self.current += 1 + + def skip_whitespace(self): + """Skip whitespace tokens.""" + while self.current < len(self.tokens) and self.tokens[self.current].token_type == TokenType.WHITESPACE: + self.advance() + + def parse(self): + """Parse method to be implemented by subclasses.""" + raise NotImplementedError("Parse method should be implemented by subclasses.") \ No newline at end of file diff --git a/src/parser/parsers/block_parser.py b/src/parser/parsers/block_parser.py new file mode 100644 index 0000000..2aa3c18 --- /dev/null +++ b/src/parser/parsers/block_parser.py @@ -0,0 +1,47 @@ +from ...lexer.token_types import TokenType +from ..ast import BlockNode +from .base_expression_parser import BaseExpressionParser +from ...utils.debug import DebugLevel + +class BlockParser(BaseExpressionParser): + def parse_block(self): + """Parse a code block: { statement1; statement2; ... }""" + self.debug("Parsing code block", DebugLevel.DEBUG) + + if not self.match(TokenType.LEFT_BRACE): + return None, 0 + + statements = [] + start_pos = self.current + + self.skip_whitespace() + + while not self.check(TokenType.RIGHT_BRACE): + if self.is_at_end(): + error_msg = "Unterminated code block" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + statement = self.parse_statement() + if statement: + statements.append(statement) + + # Skip any statement separators (semicolon or newline) + while self.match(TokenType.SEMICOLON, TokenType.NEWLINE): + self.skip_whitespace() + + if not self.match(TokenType.RIGHT_BRACE): + error_msg = "Expected '}' after block statements" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + return BlockNode(statements), self.current - start_pos + + def parse_statement(self): + """Parse a single statement""" + from .statement_parser import StatementParser + parser = StatementParser(self.tokens[self.current:]) + statement, consumed = parser.parse() + if statement: + self.current += consumed + return statement \ No newline at end of file diff --git a/src/parser/parsers/command_parser.py b/src/parser/parsers/command_parser.py new file mode 100644 index 0000000..14a51b9 --- /dev/null +++ b/src/parser/parsers/command_parser.py @@ -0,0 +1,61 @@ +from ...lexer.token_types import TokenType +from ..ast import PrintNode, VariableDeclarationNode +from .base_expression_parser import BaseExpressionParser +from ...utils.debug import DebugLevel + +class CommandParser(BaseExpressionParser): + def parse_print(self): + """Parse a print statement: print """ + self.debug("Parsing print command", DebugLevel.DEBUG) + + if not self.match(TokenType.PRINT_COMMAND): + return None, 0 + + self.skip_whitespace() + expr_parser = self.get_expression_parser() + expression, consumed = expr_parser.parse() + + if not expression: + error_msg = "Expected expression after print command" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + return PrintNode(expression), self.current + consumed + + def parse_variable_declaration(self): + """Parse a variable declaration: var = """ + self.debug("Parsing variable declaration", DebugLevel.DEBUG) + + if not self.match(TokenType.VARIABLE_DECLARE): + return None, 0 + + self.skip_whitespace() + + if not self.match(TokenType.IDENTIFIER): + error_msg = "Expected identifier after variable declaration" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + identifier = self.previous().value + self.skip_whitespace() + + if not self.match(TokenType.ASSIGN): + error_msg = "Expected '=' after variable identifier" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + self.skip_whitespace() + expr_parser = self.get_expression_parser() + expression, consumed = expr_parser.parse() + + if not expression: + error_msg = "Expected expression after '='" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + return VariableDeclarationNode(identifier, expression), self.current + consumed + + def get_expression_parser(self): + """Get a new expression parser starting from current position""" + from .expression_parser import ExpressionParser + return ExpressionParser(self.tokens[self.current:]) \ No newline at end of file diff --git a/src/parser/parsers/control_flow_parser.py b/src/parser/parsers/control_flow_parser.py new file mode 100644 index 0000000..7d006be --- /dev/null +++ b/src/parser/parsers/control_flow_parser.py @@ -0,0 +1,196 @@ +from ...lexer.token_types import TokenType +from ..ast import IfNode, WhileNode, ForNode, ReturnNode, BreakNode, ContinueNode +from .base_expression_parser import BaseExpressionParser +from ...utils.debug import DebugLevel + +class ControlFlowParser(BaseExpressionParser): + def parse_if(self): + """Parse if statement: if (condition) { block } else { block }""" + self.debug("Parsing if statement", DebugLevel.DEBUG) + + if not self.match(TokenType.IF): + return None, 0 + + self.skip_whitespace() + + # Parse condition + if not self.match(TokenType.LEFT_PAREN): + error_msg = "Expected '(' after 'if'" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + condition, consumed = self.get_expression_parser().parse() + if not condition: + error_msg = "Expected condition in if statement" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + self.current += consumed + self.skip_whitespace() + + if not self.match(TokenType.RIGHT_PAREN): + error_msg = "Expected ')' after if condition" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + # Parse if block + if_block, consumed = self.get_block_parser().parse_block() + if not if_block: + error_msg = "Expected block after if condition" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + self.current += consumed + self.skip_whitespace() + + # Parse optional else block + else_block = None + if self.match(TokenType.ELSE): + self.skip_whitespace() + else_block, consumed = self.get_block_parser().parse_block() + if not else_block: + error_msg = "Expected block after 'else'" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + self.current += consumed + + return IfNode(condition, if_block, else_block), self.current + + def parse_while(self): + """Parse while statement: while (condition) { block }""" + self.debug("Parsing while statement", DebugLevel.DEBUG) + + if not self.match(TokenType.WHILE): + return None, 0 + + self.skip_whitespace() + + # Parse condition + if not self.match(TokenType.LEFT_PAREN): + error_msg = "Expected '(' after 'while'" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + condition, consumed = self.get_expression_parser().parse() + if not condition: + error_msg = "Expected condition in while statement" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + self.current += consumed + self.skip_whitespace() + + if not self.match(TokenType.RIGHT_PAREN): + error_msg = "Expected ')' after while condition" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + # Parse block + block, consumed = self.get_block_parser().parse_block() + if not block: + error_msg = "Expected block after while condition" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + self.current += consumed + return WhileNode(condition, block), self.current + + def parse_for(self): + """Parse for statement: for (init; condition; update) { block }""" + self.debug("Parsing for statement", DebugLevel.DEBUG) + + if not self.match(TokenType.FOR): + return None, 0 + + self.skip_whitespace() + + if not self.match(TokenType.LEFT_PAREN): + error_msg = "Expected '(' after 'for'" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + # Parse initialization + init, consumed = self.get_statement_parser().parse() + self.current += consumed + + if not self.match(TokenType.SEMICOLON): + error_msg = "Expected ';' after for init" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + # Parse condition + condition, consumed = self.get_expression_parser().parse() + self.current += consumed + + if not self.match(TokenType.SEMICOLON): + error_msg = "Expected ';' after for condition" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + # Parse update + update, consumed = self.get_statement_parser().parse() + self.current += consumed + + if not self.match(TokenType.RIGHT_PAREN): + error_msg = "Expected ')' after for clauses" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + # Parse block + block, consumed = self.get_block_parser().parse_block() + if not block: + error_msg = "Expected block in for statement" + self.debug(error_msg, DebugLevel.ERROR) + raise SyntaxError(error_msg) + + self.current += consumed + return ForNode(init, condition, update, block), self.current + + def parse_return(self): + """Parse return statement: return [expression];""" + self.debug("Parsing return statement", DebugLevel.DEBUG) + + if not self.match(TokenType.RETURN): + return None, 0 + + self.skip_whitespace() + + # Parse optional return value + value = None + if not self.check(TokenType.SEMICOLON, TokenType.NEWLINE): + expr, consumed = self.get_expression_parser().parse() + if expr: + value = expr + self.current += consumed + + return ReturnNode(value), self.current + + def parse_break(self): + """Parse break statement: break;""" + self.debug("Parsing break statement", DebugLevel.DEBUG) + + if not self.match(TokenType.BREAK): + return None, 0 + + return BreakNode(), self.current + + def parse_continue(self): + """Parse continue statement: continue;""" + self.debug("Parsing continue statement", DebugLevel.DEBUG) + + if not self.match(TokenType.CONTINUE): + return None, 0 + + return ContinueNode(), self.current + + def get_expression_parser(self): + from .expression_parser import ExpressionParser + return ExpressionParser(self.tokens[self.current:]) + + def get_block_parser(self): + from .block_parser import BlockParser + return BlockParser(self.tokens[self.current:]) + + def get_statement_parser(self): + from .statement_parser import StatementParser + return StatementParser(self.tokens[self.current:]) \ No newline at end of file diff --git a/src/parser/parsers/expression_parser.py b/src/parser/parsers/expression_parser.py new file mode 100644 index 0000000..11bef39 --- /dev/null +++ b/src/parser/parsers/expression_parser.py @@ -0,0 +1,282 @@ +from ...lexer.token_types import TokenType +from ..ast import ( + BinaryOpNode, + UnaryOpNode, + LiteralNode, + VariableNode, + ArrayNode, + ArrayAccessNode, + AssignmentNode, + FunctionCallNode +) +from .base_expression_parser import BaseExpressionParser + +class ExpressionParser(BaseExpressionParser): + def __init__(self, tokens): + super().__init__(tokens) + self.component_name = "ExpressionParser" + + def parse(self): + """Parse an expression and return the AST node and number of tokens consumed.""" + start_pos = self.current + + try: + self.skip_whitespace() + expr = self.parse_assignment() + consumed = self.current - start_pos + return (expr, consumed) if expr else (None, 0) + except Exception: + return None, 0 + + def parse_assignment(self): + """Parse an assignment expression.""" + self.skip_whitespace() + left = self.parse_logical() + + if not left: + return None + + self.skip_whitespace() + if self.match(TokenType.ASSIGN): + if isinstance(left, VariableNode): + value = self.parse_assignment() + if not value: + return None + return AssignmentNode(left.name, value) + elif isinstance(left, ArrayAccessNode): + value = self.parse_assignment() + if not value: + return None + return AssignmentNode(left, value) + else: + return None + + return left + + def parse_logical(self): + """Parse logical operators (and, or).""" + self.skip_whitespace() + expr = self.parse_equality() + + if not expr: + return None + + while True: + self.skip_whitespace() + if not self.match(TokenType.AND, TokenType.OR): + break + operator = self.previous().token_type + right = self.parse_equality() + if not right: + return None + expr = BinaryOpNode(expr, operator, right) + + return expr + + def parse_equality(self): + """Parse equality expressions (==, !=).""" + self.skip_whitespace() + expr = self.parse_comparison() + + if not expr: + return None + + while True: + self.skip_whitespace() + if not self.match(TokenType.EQUALS, TokenType.NOT_EQUALS): + break + operator = self.previous().token_type + right = self.parse_comparison() + if not right: + return None + expr = BinaryOpNode(expr, operator, right) + + return expr + + def parse_comparison(self): + """Parse comparison expressions (<, >, <=, >=).""" + self.skip_whitespace() + expr = self.parse_term() + + if not expr: + return None + + while True: + self.skip_whitespace() + if not self.match( + TokenType.LESS_THAN, TokenType.GREATER_THAN, + TokenType.LESS_EQUAL, TokenType.GREATER_EQUAL + ): + break + operator = self.previous().token_type + right = self.parse_term() + if not right: + return None + expr = BinaryOpNode(expr, operator, right) + + return expr + + def parse_term(self): + """Parse terms (addition and subtraction).""" + self.skip_whitespace() + expr = self.parse_factor() + + if not expr: + return None + + while True: + self.skip_whitespace() + if not self.match(TokenType.PLUS, TokenType.MINUS): + break + operator = self.previous().token_type + right = self.parse_factor() + if not right: + return None + expr = BinaryOpNode(expr, operator, right) + + return expr + + def parse_factor(self): + """Parse factors (multiplication and division).""" + self.skip_whitespace() + expr = self.parse_unary() + + if not expr: + return None + + while True: + self.skip_whitespace() + if not self.match(TokenType.MULTIPLY, TokenType.DIVIDE): + break + operator = self.previous().token_type + right = self.parse_unary() + if not right: + return None + expr = BinaryOpNode(expr, operator, right) + + return expr + + def parse_unary(self): + """Parse unary expressions (-, not).""" + self.skip_whitespace() + if self.match(TokenType.MINUS, TokenType.NOT): + operator = self.previous().token_type + right = self.parse_unary() + if not right: + return None + expr = UnaryOpNode(operator, right) + return expr + + return self.parse_call() + + def parse_call(self): + """Parse function calls and array access.""" + self.skip_whitespace() + expr = self.parse_primary() + + if not expr: + return None + + while True: + self.skip_whitespace() + if self.match(TokenType.LEFT_PAREN): + # Function call + if not isinstance(expr, VariableNode): + return None + + arguments = [] + if not self.check(TokenType.RIGHT_PAREN): + while True: + arg = self.parse_assignment() + if not arg: + return None + arguments.append(arg) + if not self.match(TokenType.COMMA): + break + + if not self.match(TokenType.RIGHT_PAREN): + return None + expr = FunctionCallNode(expr.name, arguments) + elif self.match(TokenType.LEFT_BRACKET): + # Array access + index = self.parse_assignment() + if not index: + return None + if not self.match(TokenType.RIGHT_BRACKET): + return None + expr = ArrayAccessNode(expr, index) + else: + break + + return expr + + def parse_primary(self): + """Parse primary expressions (literals, variables, parentheses, arrays).""" + self.skip_whitespace() + + if self.match(TokenType.INTEGER): + value = int(self.previous().value) + return LiteralNode(value) + + if self.match(TokenType.STRING): + value = str(self.previous().value) + return LiteralNode(value) + + if self.match(TokenType.BOOLEAN): + value = self.previous().value + return LiteralNode(value) + + if self.match(TokenType.IDENTIFIER): + name = self.previous().value + return VariableNode(name) + + if self.match(TokenType.LEFT_PAREN): + expr = self.parse_assignment() + if not expr: + return None + if not self.match(TokenType.RIGHT_PAREN): + return None + return expr + + if self.match(TokenType.LEFT_BRACKET): + elements = [] + if not self.check(TokenType.RIGHT_BRACKET): + while True: + element = self.parse_assignment() + if not element: + return None + elements.append(element) + if not self.match(TokenType.COMMA): + break + + if not self.match(TokenType.RIGHT_BRACKET): + return None + return ArrayNode(elements) + + return None + + def skip_whitespace(self): + """Skip whitespace tokens.""" + while self.check(TokenType.WHITESPACE, TokenType.NEWLINE): + self.advance() + + def consume(self, expected_type, error_message): + """Consume a token of the expected type or raise an error.""" + self.skip_whitespace() + if self.check(expected_type): + return self.advance() + raise SyntaxError(f"{error_message} at token {self.current_token()}") + + def match(self, *token_types): + """Match current token against given types.""" + self.skip_whitespace() + for token_type in token_types: + if self.check(token_type): + return self.advance() is not None + return False + + def check(self, *token_types): + """Check if current token is of given type.""" + if self.is_at_end(): + return False + return self.current_token().token_type in token_types \ No newline at end of file diff --git a/src/parser/parsers/functionCall_parser.py b/src/parser/parsers/functionCall_parser.py new file mode 100644 index 0000000..b449b13 --- /dev/null +++ b/src/parser/parsers/functionCall_parser.py @@ -0,0 +1,74 @@ +from ..ast import FunctionCallNode +from ...lexer.token_types import TokenType +from .literal_parser import LiteralParser + +class FunctionCallParser: + def __init__(self, tokens): + self.tokens = tokens + self.current = 0 + + def parse(self): + start_pos = self.current + + # Check if this looks like a function call + if not (self.check(TokenType.IDENTIFIER) and + self.current + 1 < len(self.tokens) and + self.tokens[self.current + 1].token_type == TokenType.LEFT_PAREN): + return None, 0 + + # Get function name + name_token = self.consume(TokenType.IDENTIFIER, "Expected function name.") + self.consume(TokenType.LEFT_PAREN, "Expected '(' after function name.") + + # Parse arguments + args = [] + while not self.check(TokenType.RIGHT_PAREN): + # Skip whitespace before argument + while self.check(TokenType.WHITESPACE): + self.advance() + + # Parse argument (which could be an expression) + arg_parser = LiteralParser(self.tokens[self.current:]) + arg, arg_consumed = arg_parser.parse() + if not arg: + raise SyntaxError("Expected function argument.") + + args.append(arg) + self.current += arg_consumed + + # Skip whitespace after argument + while self.check(TokenType.WHITESPACE): + self.advance() + + # Check for comma + if self.check(TokenType.COMMA): + self.advance() + + self.consume(TokenType.RIGHT_PAREN, "Expected ')' after arguments.") + + # Calculate total tokens consumed + total_consumed = self.current - start_pos + + return FunctionCallNode(name_token.value, args), total_consumed + + def consume(self, token_type, error_message): + if self.check(token_type): + token = self.current_token() + self.advance() + return token + raise SyntaxError(error_message) + + def check(self, *token_types): + if self.is_at_end(): + return False + return self.current_token().token_type in token_types + + def advance(self): + if not self.is_at_end(): + self.current += 1 + + def current_token(self): + return self.tokens[self.current] + + def is_at_end(self): + return self.current >= len(self.tokens) \ No newline at end of file diff --git a/src/parser/parsers/literal_parser.py b/src/parser/parsers/literal_parser.py new file mode 100644 index 0000000..1d46b59 --- /dev/null +++ b/src/parser/parsers/literal_parser.py @@ -0,0 +1,51 @@ +from ..ast import LiteralNode +from ...lexer.token_types import TokenType + +class LiteralParser: + def __init__(self, tokens): + self.tokens = tokens + self.current = 0 + + def parse(self): + start_pos = self.current + + # Skip any whitespace + while self.current < len(self.tokens) and self.tokens[self.current].token_type == TokenType.WHITESPACE: + self.current += 1 + + # Check if there are tokens left + if self.is_at_end(): + return None, 0 + + # Check for literal types + if self.check(TokenType.INTEGER, TokenType.FLOAT, TokenType.STRING): + token = self.current_token() + self.advance() + # Calculate tokens consumed (including any whitespace we skipped) + consumed = self.current - start_pos + return LiteralNode(token.value), consumed + + # No literal found + return None, 0 + + def consume(self, token_type, error_message): + if self.check(token_type): + token = self.current_token() + self.advance() + return token + raise SyntaxError(error_message) + + def check(self, *token_types): + if self.is_at_end(): + return False + return self.current_token().token_type in token_types + + def advance(self): + if not self.is_at_end(): + self.current += 1 + + def current_token(self): + return self.tokens[self.current] + + def is_at_end(self): + return self.current >= len(self.tokens) \ No newline at end of file diff --git a/src/parser/parsers/statement_parser.py b/src/parser/parsers/statement_parser.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/src/parser/parsers/statement_parser.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/src/parser/parsers/symbol_table.py b/src/parser/parsers/symbol_table.py new file mode 100644 index 0000000..bf53de0 --- /dev/null +++ b/src/parser/parsers/symbol_table.py @@ -0,0 +1,63 @@ +class SymbolTable: + def __init__(self, parent=None): + self.symbols = {} + self.parent = parent + self.children = [] + + def create_child_scope(self): + """Create a new child scope.""" + child = SymbolTable(parent=self) + self.children.append(child) + return child + + def set(self, name, value): + """Set a variable in the current scope.""" + self.symbols[name] = value + + def get(self, name): + """Get a variable from the current scope or parent scopes.""" + value = self.symbols.get(name) + if value is not None: + return value + if self.parent: + return self.parent.get(name) + return None + + def update(self, name, value): + """Update a variable in its original scope.""" + if name in self.symbols: + self.symbols[name] = value + return True + if self.parent: + return self.parent.update(name, value) + return False + + def exists(self, name): + """Check if a variable exists in any accessible scope.""" + return name in self.symbols or (self.parent and self.parent.exists(name)) + + def get_scope(self, name): + """Get the scope where a variable is defined.""" + if name in self.symbols: + return self + if self.parent: + return self.parent.get_scope(name) + return None + + def dump(self, indent=0): + """Print the symbol table contents with scope information.""" + print("\n📄 [Symbol Table Dump]") + self._dump_recursive(indent) + + def _dump_recursive(self, indent=0): + """Helper method for recursive symbol table dumping.""" + indent_str = " " * indent + print(f"{indent_str}🔷 Scope Level {indent}:") + for var, value in self.symbols.items(): + print(f"{indent_str} 🔑 {var} = {value}") + for child in self.children: + child._dump_recursive(indent + 1) + + def __repr__(self): + scope_info = "global" if not self.parent else "local" + return f"{scope_info} SymbolTable{self.symbols}" diff --git a/src/parser/parsers/variable_parser.py b/src/parser/parsers/variable_parser.py new file mode 100644 index 0000000..0ada2fa --- /dev/null +++ b/src/parser/parsers/variable_parser.py @@ -0,0 +1,88 @@ +from ..ast import VariableNode +from ...lexer.token_types import TokenType + +class VariableParser: + def __init__(self, tokens): + self.tokens = tokens + self.current = 0 + + def parse(self): + + from .expression_parser import ExpressionParser + + start_pos = self.current + + # Check for variable declaration keyword + if self.check(TokenType.VARIABLE_DECLARE): + # Consume 'var' keyword + self.consume(TokenType.VARIABLE_DECLARE, "Expected 'var' keyword.") + + # Skip whitespace after 'var' + while self.check(TokenType.WHITESPACE): + self.advance() + + # Get variable name + if not self.check(TokenType.IDENTIFIER): + raise SyntaxError("Expected identifier after 'var'.") + + var_name = self.consume(TokenType.IDENTIFIER, "Expected identifier after 'var'.").value + + # Skip whitespace after identifier + while self.check(TokenType.WHITESPACE): + self.advance() + + # Check for initialization + initial_value = None + if self.check(TokenType.ASSIGN): + self.advance() # Consume '=' + + # Skip whitespace after '=' + while self.check(TokenType.WHITESPACE): + self.advance() + + # Parse initial value + expr_parser = ExpressionParser(self.tokens[self.current:]) + initial_value, expr_consumed = expr_parser.parse() + + if not initial_value: + raise SyntaxError("Expected expression after '='.") + + self.current += expr_consumed + + # Calculate total tokens consumed + total_consumed = self.current - start_pos + + return VariableNode(var_name, initial_value), total_consumed + + # If not a variable declaration but an identifier + elif self.check(TokenType.IDENTIFIER): + var_name = self.consume(TokenType.IDENTIFIER, "Expected identifier.").value + + # Calculate tokens consumed + total_consumed = self.current - start_pos + + return VariableNode(var_name, None), total_consumed + + return None, 0 + + def consume(self, token_type, error_message): + if self.check(token_type): + token = self.current_token() + self.advance() + return token + raise SyntaxError(error_message) + + def check(self, *token_types): + if self.is_at_end(): + return False + return self.current_token().token_type in token_types + + def advance(self): + if not self.is_at_end(): + self.current += 1 + + def current_token(self): + return self.tokens[self.current] + + def is_at_end(self): + return self.current >= len(self.tokens) \ No newline at end of file diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..2805ee4 --- /dev/null +++ b/src/utils/__init__.py @@ -0,0 +1,3 @@ +from .debug import debug, DebugLevel + +__all__ = ['debug', 'DebugLevel'] \ No newline at end of file diff --git a/src/utils/config.py b/src/utils/config.py new file mode 100644 index 0000000..11d31b3 --- /dev/null +++ b/src/utils/config.py @@ -0,0 +1,81 @@ +import os +from dotenv import load_dotenv +from src.lexer.token_types import TokenType + +class LanguageConfig: + _instance = None + + @classmethod + def get_instance(cls): + if cls._instance is None: + cls._instance = cls() + return cls._instance + + def __init__(self): + self.load_config() + + def load_config(self): + """Load configuration from .env file""" + load_dotenv() + + # Default keywords if not specified in .env + self.keyword_mappings = { + 'print': TokenType.PRINT_COMMAND, + 'if': TokenType.IF, + 'else': TokenType.ELSE, + 'while': TokenType.WHILE, + 'for': TokenType.FOR, + 'function': TokenType.FUNCTION, + 'return': TokenType.RETURN, + 'break': TokenType.BREAK, + 'continue': TokenType.CONTINUE, + 'and': TokenType.AND, + 'or': TokenType.OR, + 'not': TokenType.NOT, + 'true': TokenType.BOOLEAN, + 'false': TokenType.BOOLEAN, + 'none': TokenType.NONE, + 'var': TokenType.VARIABLE_DECLARE + } + + # Override with custom keywords from .env + custom_keywords = { + 'KEYWORD_PRINT': TokenType.PRINT_COMMAND, + 'KEYWORD_IF': TokenType.IF, + 'KEYWORD_ELSE': TokenType.ELSE, + 'KEYWORD_WHILE': TokenType.WHILE, + 'KEYWORD_FOR': TokenType.FOR, + 'KEYWORD_FUNCTION': TokenType.FUNCTION, + 'KEYWORD_RETURN': TokenType.RETURN, + 'KEYWORD_BREAK': TokenType.BREAK, + 'KEYWORD_CONTINUE': TokenType.CONTINUE, + 'KEYWORD_AND': TokenType.AND, + 'KEYWORD_OR': TokenType.OR, + 'KEYWORD_NOT': TokenType.NOT, + 'KEYWORD_TRUE': TokenType.BOOLEAN, + 'KEYWORD_FALSE': TokenType.BOOLEAN, + 'KEYWORD_NONE': TokenType.NONE, + 'KEYWORD_VAR': TokenType.VARIABLE_DECLARE + } + + # Update keyword mappings with custom keywords from .env + for env_key, token_type in custom_keywords.items(): + custom_keyword = os.getenv(env_key) + if custom_keyword: + # Remove old mapping if it exists + old_keyword = next((k for k, v in self.keyword_mappings.items() if v == token_type), None) + if old_keyword: + del self.keyword_mappings[old_keyword] + # Add new mapping + self.keyword_mappings[custom_keyword.lower()] = token_type + + def get_keyword(self, token_type): + """Get the keyword string for a given token type""" + for keyword, t_type in self.keyword_mappings.items(): + if t_type == token_type: + return keyword + return None + + def get_token_type(self, keyword): + """Get the token type for a given keyword string""" + return self.keyword_mappings.get(keyword.lower()) \ No newline at end of file diff --git a/src/utils/debug.py b/src/utils/debug.py new file mode 100644 index 0000000..035b7e0 --- /dev/null +++ b/src/utils/debug.py @@ -0,0 +1,53 @@ +from enum import IntEnum +from typing import Optional + +class DebugLevel(IntEnum): + OFF = 0 + ERROR = 1 + WARN = 2 + INFO = 3 + DEBUG = 4 + TRACE = 5 + +class DebugManager: + _instance: Optional['DebugManager'] = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._level = DebugLevel.OFF + return cls._instance + + @property + def level(self) -> DebugLevel: + return self._level + + @level.setter + def level(self, value: DebugLevel): + self._level = value + + def should_log(self, level: DebugLevel) -> bool: + return level <= self._level + + def log(self, level: DebugLevel, component: str, message: str): + if self.should_log(level): + level_name = level.name.ljust(5) + print(f"[{level_name}] [{component}] {message}") + + def error(self, component: str, message: str): + self.log(DebugLevel.ERROR, component, message) + + def warn(self, component: str, message: str): + self.log(DebugLevel.WARN, component, message) + + def info(self, component: str, message: str): + self.log(DebugLevel.INFO, component, message) + + def debug(self, component: str, message: str): + self.log(DebugLevel.DEBUG, component, message) + + def trace(self, component: str, message: str): + self.log(DebugLevel.TRACE, component, message) + +# Global debug manager instance +debug = DebugManager() \ No newline at end of file diff --git a/test_custom.duck b/test_custom.duck new file mode 100644 index 0000000..776a775 --- /dev/null +++ b/test_custom.duck @@ -0,0 +1,25 @@ +// Testing custom keywords +define factorial(n) { + suppose (n <= 1) { + give 1; + } + give n * factorial(n - 1); +} + +// Testing loop with custom keywords +var x = 5; +loop (x > 0) { + duck(x); + suppose (x == 3) { + skip; + } + suppose (x == 1) { + stop; + } + x = x - 1; +} + +// Testing repeat (for) loop +repeat (var i = 0; i < 3; i = i + 1) { + duck("Counter: " + i); +} \ No newline at end of file diff --git a/test_error_handling.py b/test_error_handling.py new file mode 100644 index 0000000..732e0eb --- /dev/null +++ b/test_error_handling.py @@ -0,0 +1,181 @@ +import sys +from src.error import ErrorHandler, DuckLangError + +def test_error_handling(): + test_cases = [ + # Syntax Errors + { + 'name': 'Basic Syntax Error', + 'code': 'def test() { x = 5 + }', + 'expected_type': 'Syntax Error', + 'error_key': 'SYNTAX_GENERIC', + 'raise_func': 'raise_syntax_error' + }, + { + 'name': 'Missing Bracket', + 'code': 'when x > 0 { show("test")', + 'expected_type': 'Syntax Error', + 'error_key': 'MISSING_BRACKET', + 'raise_func': 'raise_syntax_error', + 'kwargs': {'bracket_type': '}'} + }, + { + 'name': 'Invalid Token', + 'code': 'let @invalid = 5', + 'expected_type': 'Syntax Error', + 'error_key': 'INVALID_TOKEN', + 'raise_func': 'raise_syntax_error', + 'kwargs': {'token': '@'} + }, + + # Runtime Errors + { + 'name': 'Undefined Variable', + 'code': 'show(undefinedVariable)', + 'expected_type': 'Runtime Error', + 'error_key': 'UNDEFINED_VAR', + 'raise_func': 'raise_runtime_error', + 'kwargs': {'var_name': 'undefinedVariable'} + }, + { + 'name': 'Type Mismatch', + 'code': 'let x = "hello" + 5', + 'expected_type': 'Runtime Error', + 'error_key': 'TYPE_MISMATCH', + 'raise_func': 'raise_runtime_error', + 'kwargs': {'expected_type': 'number', 'actual_type': 'string'} + }, + { + 'name': 'Division by Zero', + 'code': 'let x = 10 / 0', + 'expected_type': 'Runtime Error', + 'error_key': 'DIVISION_BY_ZERO', + 'raise_func': 'raise_runtime_error' + }, + { + 'name': 'Index Out of Range', + 'code': 'let arr = [1, 2, 3]; show(arr[5])', + 'expected_type': 'Runtime Error', + 'error_key': 'INDEX_OUT_OF_RANGE', + 'raise_func': 'raise_runtime_error', + 'kwargs': {'index': 5, 'max_index': 2} + }, + + # Function Errors + { + 'name': 'Undefined Function', + 'code': 'nonexistent_func()', + 'expected_type': 'Function Error', + 'error_key': 'UNDEFINED_FUNCTION', + 'raise_func': 'raise_function_error', + 'kwargs': {'func_name': 'nonexistent_func'} + }, + { + 'name': 'Invalid Arguments', + 'code': 'def add(a, b) {} add(1)', + 'expected_type': 'Function Error', + 'error_key': 'INVALID_ARGUMENTS', + 'raise_func': 'raise_function_error', + 'kwargs': {'expected': 2, 'actual': 1} + }, + + # Type Errors + { + 'name': 'Type Conversion Error', + 'code': 'let x = "hello" as number', + 'expected_type': 'Type Error', + 'error_key': 'TYPE_CONVERSION', + 'raise_func': 'raise_type_error', + 'kwargs': {'from_type': 'string', 'to_type': 'number'} + }, + { + 'name': 'Invalid Operation', + 'code': 'let x = true * 5', + 'expected_type': 'Type Error', + 'error_key': 'INVALID_OPERATION', + 'raise_func': 'raise_type_error', + 'kwargs': {'operation': 'multiply', 'type1': 'boolean', 'type2': 'number'} + }, + + # IO Errors + { + 'name': 'File Not Found', + 'code': 'import "nonexistent.duck"', + 'expected_type': 'IO Error', + 'error_key': 'FILE_NOT_FOUND', + 'raise_func': 'raise_io_error', + 'kwargs': {'file': 'nonexistent.duck'} + }, + { + 'name': 'Permission Denied', + 'code': 'import "/root/secret.duck"', + 'expected_type': 'IO Error', + 'error_key': 'PERMISSION_DENIED', + 'raise_func': 'raise_io_error', + 'kwargs': {'file': '/root/secret.duck'} + } + ] + + error_handler = ErrorHandler() + + print("🦆 Running DuckLang Error Handler Tests") + print("======================================") + + passed = 0 + total = len(test_cases) + + for test in test_cases: + print(f"\nTesting: {test['name']}") + print("-" * (9 + len(test['name']))) + + try: + error_handler.set_current_file(test['code']) + kwargs = test.get('kwargs', {}) + kwargs.update({ + 'line': 1, + 'column': 0, + 'file_name': 'test.duck' + }) + + # Call the appropriate error raising function + if hasattr(error_handler, test['raise_func']): + getattr(error_handler, test['raise_func'])( + test['error_key'], + **kwargs + ) + + except DuckLangError as e: + print(e) + print("\nTest Result: ", end="") + + # Check if both error type and message are correct + error_type_correct = test['expected_type'] in str(e) + error_key_correct = test['error_key'].lower() in str(e).lower() + suggestion_present = e.suggestion is not None + + if error_type_correct and error_key_correct and suggestion_present: + print("✅ Passed") + passed += 1 + else: + print("❌ Failed") + if not error_type_correct: + print(f"Expected error type '{test['expected_type']}' not found in message") + if not error_key_correct: + print(f"Expected error key '{test['error_key']}' not found in message") + if not suggestion_present: + print("No suggestion provided for the error") + + except Exception as e: + print(f"Unexpected error: {str(e)}") + print("\nTest Result: ❌ Failed") + + print("\n======================================") + print(f"Final Results: {passed}/{total} tests passed") + if passed == total: + print("🎉 All tests passed!") + else: + print(f"😢 {total - passed} tests failed") + print("======================================") + +if __name__ == "__main__": + test_error_handling() \ No newline at end of file diff --git a/test_errors.duck b/test_errors.duck new file mode 100644 index 0000000..ad94e75 --- /dev/null +++ b/test_errors.duck @@ -0,0 +1,29 @@ +# Syntax Error Test +def test1() { + x = 5 + # Missing operand +} + +# Undefined Variable Error +def test2() { + show(undefinedVariable) +} + +# Type Mismatch Error +def test3() { + let x = "hello" + let y = x + 5 +} + +# Division by Zero Error +def test4() { + let x = 10 + let y = x / 0 +} + +# Missing Bracket Error +def test5() { + let x = 5 + when x > 0 { + show("Missing closing bracket") + } +} \ No newline at end of file