From f82f3f88f21e83322604027430a232f79e361ba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=81=95=E5=B1=B1?= Date: Mon, 24 Jun 2024 16:16:01 +0800 Subject: [PATCH 01/10] sync documents of godel-script project --- godel-script/README.md | 91 +++++ .../docs/language-reference/databases.md | 50 +++ godel-script/docs/language-reference/enums.md | 25 ++ .../docs/language-reference/functions.md | 360 ++++++++++++++++++ godel-script/docs/language-reference/impl.md | 18 + .../docs/language-reference/import.md | 102 +++++ .../docs/language-reference/program.md | 111 ++++++ .../docs/language-reference/queries.md | 93 +++++ .../docs/language-reference/schemas.md | 208 ++++++++++ godel-script/docs/language-reference/type.md | 112 ++++++ godel-script/docs/syntax.md | 248 ++++++++++++ 11 files changed, 1418 insertions(+) create mode 100644 godel-script/README.md create mode 100644 godel-script/docs/language-reference/databases.md create mode 100644 godel-script/docs/language-reference/enums.md create mode 100644 godel-script/docs/language-reference/functions.md create mode 100644 godel-script/docs/language-reference/impl.md create mode 100644 godel-script/docs/language-reference/import.md create mode 100644 godel-script/docs/language-reference/program.md create mode 100644 godel-script/docs/language-reference/queries.md create mode 100644 godel-script/docs/language-reference/schemas.md create mode 100644 godel-script/docs/language-reference/type.md create mode 100644 godel-script/docs/syntax.md diff --git a/godel-script/README.md b/godel-script/README.md new file mode 100644 index 00000000..16935f07 --- /dev/null +++ b/godel-script/README.md @@ -0,0 +1,91 @@ +# GödelScript + +## Content + +* 简介 | [Introduction](#introduction) +* 文档 | [Documents](#documents) +* 编译 | [Compilation](#compilation) +* 用法 | [Usage](#usage) + +## Introduction + +GödelScript is designed for creating code analysis libraries and programs, +and compiling them to soufflé more easily. With it's Object-Oriented features, +it has great maintainability and readability. + +```rust +@output +pub fn hello() -> string { + return "Hello World!" +} +``` + +## Documents + +* GödelScript Language Reference + * GödelScript [Program](./docs/language-reference/program.md) + * GödelScript [Type](./docs/language-reference/type.md) + * GödelScript [Schema](./docs/language-reference/schemas.md) + * GödelScript [Database](./docs/language-reference/databases.md) + * GödelScript [Enum](./docs/language-reference/enums.md) + * GödelScript [Impl](./docs/language-reference/impl.md) + * GödelScript [Function](./docs/language-reference/functions.md) + * GödelScript [Import](./docs/language-reference/import.md) + * GödelScript [Query](./docs/language-reference/queries.md) + * GödelScript [Statement](./docs/language-reference/functions.md#statement) + * GödelScript [Expression](./docs/language-reference/functions.md#expression) +* GödelScript [Query Example](./example) +* GödelScript [Syntax Definition](./docs/syntax.md) + +## Compilation + +Structure of this project: + +``` +. +|-- dockerFile +|-- docs godel-script documents +|-- godel-backend godel-script backend +| |-- extension godel-script souffle extension +| |-- souffle souffle source code +| +-- tools souffle build tools ++-- godel-frontend godel-script frontend + +-- src godel-frontend source code +``` + +Need C++ standard at least `-std=c++17`. + +### Build Godel Script + +Use command below: + +```bash +mkdir build +cd build +cmake .. +make -j +``` + +After building, you'll find `build/godel` in the `build` folder. + +## Usage + +Use this command for help: + +> ./build/godel -h + +### Compile Target Soufflé + +> ./build/godel -p {godel library directory} {input file} -s {soufflé output file} -Of + +`-Of` is an optimization for join order, we suggest to switch it on. + +### Directly Run Soufflé + +> ./build/godel -p {godel library directory} {input file} -r -Of -f {database directory} + +`-Of` is an optimization for join order, we suggest to switch it on. + +`-r` means directly run soufflé. + +`-v` could be used for getting verbose info. diff --git a/godel-script/docs/language-reference/databases.md b/godel-script/docs/language-reference/databases.md new file mode 100644 index 00000000..609384e6 --- /dev/null +++ b/godel-script/docs/language-reference/databases.md @@ -0,0 +1,50 @@ +# GödelScript Database + +Back to [README.md](../../README.md#documents) + +## Declaration + +```rust +database School { + student: *Student, + classes: *Class as "class" + ... +} +``` + +Tables in databases should be set type, which uses `*` before the type name. +And the table type must be `schema`. +And for table name `student`, when running soufflé directly, we read sqlite database +using the same table name. + +If the table name conflicts with a keyword, try using `as "real_table"`, and +GödelScript will find the data from sqlite table `real_table`. + +## Initializing + +Database has a native method `fn load(dbname: string) -> Self`. +The argument string must be a string literal. + +```rust +fn default_db() -> School { + return School::load("example_db_school.db") // must use string literal +} +``` + +Then GödelScript will give you the input database. + +## Get Schema From Database + +It's quite easy to fetch schema data from database. +For example in `Student::__all__(db: School) -> *School`, +we could fetch the data by directly using `db.student`. + +```rust +impl Student { + pub fn __all__(db: School) -> *Student { + return db.student + } +} +``` + +Back to [README.md](../../README.md#documents) \ No newline at end of file diff --git a/godel-script/docs/language-reference/enums.md b/godel-script/docs/language-reference/enums.md new file mode 100644 index 00000000..4011f5d3 --- /dev/null +++ b/godel-script/docs/language-reference/enums.md @@ -0,0 +1,25 @@ +# GödelScript Enum + +Back to [README.md](../../README.md#documents) + +## Declaration + +```rust +enum Status { + exited, // 0 + running, // 1 + suspend // 2 +} +``` + +Usage: + +```rust +fn example() -> Status { + Status::exited + Status::running + Status::suspend +} +``` + +Back to [README.md](../../README.md#documents) diff --git a/godel-script/docs/language-reference/functions.md b/godel-script/docs/language-reference/functions.md new file mode 100644 index 00000000..39b14ecc --- /dev/null +++ b/godel-script/docs/language-reference/functions.md @@ -0,0 +1,360 @@ +# GödelScript Function + +Back to [README.md](../../README.md#documents) + +## Content + +* Function [Declaration](#function-declaration) +* Function [Implement](#function-implement) +* [Statement](#statement) +* [Expression](#expression) + +## Function Declaration + +GödelScript function declaration should include parameters, type of parameters +and the return type. +Only `main` function does not need return type. + +```rust +fn func_name(param0: int, param1: string) -> ReturnValueType {...} + +// deprecated +fn main() {...} +``` + +All the functions muse be [implemented](#function-implement)。 + +## Function Implementation + +Implement functions: + +```rust +fn func_name(param0: int, param1: string) -> int { + return param0 + param1.to_int() +} +``` + +Multiple statements in the same code block has the `or` condition. +They do not share variables and conditions through each other. And the execution +is not ordered (scheduled by soufflé). + +## Statement + +GödelScript supports the following statements: + +* [For Statement](#for-statement) +* [Let Statement](#let-statement) +* [Condition Statement](#condition-statement) +* [Match Statement](#match-statement) +* [Fact Statement(Experimental Feature)](#fact-statement) +* [Return Statement](#return-statement) + +Here's an example for nested statements +```rust +for(...) { + let (...) { + if (...) { + return true + } + } + let (...) { + if (...) { + return false + } + } +} +``` + +### For Statement + +For statement is used to define a variable from a set/collection. +Initialization expression is used after keyword `in`, and the type must be a set. +Initialization order is from left to right. + +```rust +for (a in Annotation(db), c in Class(db), m in Method(db)) { + ... +} +``` + +### Let Statement + +Let statement is used to define a variable initialized by single value. +initial value after `=` must not be a set type. +Initialization order is from left to right. + +```rust +let (file = c.getLocation.getFile(), anno = a.getName(), line = 1004) { + ... +} +``` + +### Condition Statement + +Condition statement does not support `else`, for the reason that this branch +often causes `ungrounded error` in soufflé. + +```rust +if (xxxxx) { + ... +} +``` + +### Match Statement + +Match statement requires the matched variable/value is type of `int` or `string`. +And must use literals for matching. + +```rust +match(type) { + 0 => if (anno.contains("a")) { + return true + }, + 1 => return false, + 2 => for(b: BinaryOperator in BinaryOperator(db)) { + if (b.getOperatorType() = "+") { + return true + } + } +} +``` + +### Fact Statement + +Fact statement is used to generate a collection of temporary data. +Once it is used in the function, other statements are not allowed. +All the data inside the fact statement must be `int` or `string` literals. +And each record of data must satisfy the parameter list: + +```rust +fn multi_input_test(a: int, b: string, c: int) -> bool { + [{1, "1", 1}, + {2, "2", 2}, + {3, "3", 3}, + {4, "4" ,4}] +} + +@output +fn out(a: int) -> bool { + for(num in int::range(0, 100)) { + if (multi_input_test(num, num.to_string(), num) && a = num) { + return true + } + } +} +``` + +### Return Statement + +Return statement uses two keywords, and values are required after them. + +```rust +return 0 // for return of single return type +yield 0 // for return of set return type +``` + +`return` is often used for return single value: + +```rust +fn get() -> int { + return 0 +} +``` + +`yield` is only allowed to return a set of value: + +```rust +fn int_set() -> *int { + yield 0 + yield 1 + yield 2 +} + +fn getm() -> *int { + // fn int_set() -> *int; + yield int_set() + yield 3 + yield 4 +} +``` + +## Expression + +GödelScript supports the following expressions: + +* [Call Expression](#call-expression) +* [Binary Operator](#binary-operator) + * [Mathematic Operator](#mathematic-operator) + * [Compare Operator](#compare-operator) + * [Logic Operator](#logic-operator) +* [Unary Operator](#unary-operator) + +### Call Expression + +Main components of call expressions are as follows: + +* First Expression + * [Function Call](#function-call) + * [Literal | Bracketed Expression](#literal-or-bracketed-expression) + * [Initializer List(Struct Expression)](#initializer-list) +* [Field Call](#field-call) +* [Path Call](#path-call) + +#### Function Call + +GödelScript function call is the same as other programming languages: + +```rust +global_function(arg0, arg1, arg2) +``` + +#### Literal or Bracketed Expression + +GödelScript Literal includes `int` `string` `float` `bool` literals. +These literals could be used as the first expression in call chains: + +```rust +fn example() -> *int { + yield "hello".len() + yield 1.add(2).sub(4) + yield "123".to_int().add(0) +} +``` + +Also bracketed expressions are allowed to be the first expression: + +```rust +fn example() -> *int { + yield ("hello" + "world").len() + yield (1 + 0).add(2).sub(4) +} +``` + +#### Initializer List + +GödelScript allows initializer for creating schema instance, +but the precondition is that this instance should be in the universal set of +the schema. +Initialization order of fields is not required. + +```rust +schema Student { + @primary id: int, + name: string +} + +impl Student { + pub fn __all__(db: DB) -> *Student { + return db.students + } +} + +fn example() -> Student { + return Student {id: 0, name: "xxx"} +} +``` + +#### Field Call + +Field call using `.`: + +1. get field from schema instance +```rust +fn example(stu: Student) -> string { + return stu.name + // ^^^^^ +} +``` +2. get table from database instance +```rust +impl Student { + @data_constraint + fn __all__(db: DB) -> *Student { + return db.students + // ^^^^^^^^^ + } +} +``` +3. call method from basic type instance or schema instance +```rust +fn example() -> *int { + yield 1.add(2) + // ^^^^^^^ + yield Student {id: 0, name: "xxx"}.getId() + // ^^^^^^^^ +} +``` + +#### Path Call + +Path call using `::`: + +1. call static method from schema +```rust +impl Student { + fn type() -> string { + return "Student" + } +} + +fn example() -> string { + return Student::type() + // ^^^^^^^^ +} +``` +2. call load method from database: `load(str: string) -> database` +```rust +fn example() -> *int { + let (db = DB::load("example_src.db")) { + // ^^^^^^^^^^^^^^^^^^^^^^^^ + for(stu in Student(db)) { + yield stu.id + } + } +} +``` +3. get member from enum +```rust +enum Status { + running, + suspend +} + +fn example() -> int { + Status::running + // ^^^^^^^^^ + Status::suspend + // ^^^^^^^^^ +} +``` + +### Binary Operator + +#### Mathematic Operator + +|`+`|`-`|`*`|`/`| +|:--|:--|:--|:--| +|add|sub|mul|div| + +#### Compare Operator + +Result must be `bool`. `=` will do binding operation if the left-value is not grounded when doing this +comparison, and this expression returns `true`. + +|`=`|`<`|`>`|`<=`|`>=`|`!=`| +|:--|:--|:--|:--|:--|:--| +|eq|lt|gt|le|ge|ne| + +#### Logic Operator + +|`&&`|`\|\|`| +|:--|:--| +|and|or| + +### Unary Operator + +|`!`|`-`| +|:--|:--| +|not|neg| + +Back to [README.md](../../README.md#documents) diff --git a/godel-script/docs/language-reference/impl.md b/godel-script/docs/language-reference/impl.md new file mode 100644 index 00000000..8f0de63e --- /dev/null +++ b/godel-script/docs/language-reference/impl.md @@ -0,0 +1,18 @@ +# GödelScript Impl + +Back to [README.md](../../README.md#documents) + +All functions in impl block should be implemented. + +```rust +impl SchemaA { + pub fn __all__(db: DB) -> *SchemaA { + ... + } + pub fn getName(self) -> string { + ... + } +} +``` + +Back to [README.md](../../README.md#documents) diff --git a/godel-script/docs/language-reference/import.md b/godel-script/docs/language-reference/import.md new file mode 100644 index 00000000..25b65a46 --- /dev/null +++ b/godel-script/docs/language-reference/import.md @@ -0,0 +1,102 @@ +# GödelScript Import/Use + +Back to [README.md](../../README.md#documents) + +## Content + +* [Import All Symbol](#import-all-symbol) +* [Partial Import](#partial-import) +* [Package Management](#package-management) + +## Import All Symbol + +```rust +use coref::java::* +``` + +## Partial Import + +```rust +use coref::java::{Annotation, Class, Method, JavaDB} +use coref::xml::XmlElement +``` + +## Package Management + +GödelScript package manager is enabled when command line arguments including +`-p {package dir path}`. + +### Path Mapping + +Package manager will scan the structure of given directory, finding all the files +with `.gdl` or `.gs`. Then mapping the relative path to package path. + +If illegal characters exist in relative path, for example `-`, or only numbers for +the file name, this path will not be accepted by package manager. But package +manager may not report error, instead, it will ignore them. + +If want to get the ignored relative paths, try using `-v` for verbose info. +Package manager will report these paths by using warning info. + +But if package path confliction occurs after scan, package manager will report +error and terminate the compilation process. + +### Example + +``` +Library +|-- coref.java.gdl +|-- coref.xml.gdl +|-- coref.python.gdl ++-- coref + |-- go.gdl + +-- a + +-- b.gdl + +=> + +coref::java +coref::xml +coref::python +coref::go +coref::a::b +``` + +Path confliction occurs in the example below: + +``` +Library +|-- coref +| |-- java.gdl +| +-- python.gdl ++-- coref.python.gdl + +=> + +coref::java +coref::python -- \ + > confliction occurs +coref::python -- / +``` + +Illegal characters detected in this example: + +``` +Library +|-- 0123.gdl +|-- my-godel-lib +| +-- js.gdl ++-- lib-file.123.gdl + +=> +0123 +^^^^ number + +my-godel-lib::js + ^ ^ character `-` included + +lib-file::123 + ^ ^^^ path segment including number and `-` +``` + +Back to [README.md](../../README.md#documents) diff --git a/godel-script/docs/language-reference/program.md b/godel-script/docs/language-reference/program.md new file mode 100644 index 00000000..0bd5c77d --- /dev/null +++ b/godel-script/docs/language-reference/program.md @@ -0,0 +1,111 @@ +# GödelScript Program + +Back to [README.md](../../README.md#documents) + +## Content +* 程序组成 | [Program Component](#program-component) +* 程序注释 | [Notes](#notes) +* 程序入口 | [Main](#main) + +## Program Component + +GödelScript programs may include: + +1. [package/symbol import](./import.md) +2. [enum declaration](./enums.md) +3. [schema declaration](./schemas.md) +4. [database declaration](./databases.md) +6. [schema method implementation](./impl.md) +7. [function declaration and implementation](./functions.md) +8. [query declaration](./queries.md) + +Here is an example including all the components mentioned above: + +```rust +// package import +use coref::java::{Annotation, JavaDB} + +// function declaration +fn default_db() -> JavaDB { + return JavaDB::load("example.db") +} + +// enum declaration +Enum status { + killed, + running, + suspend +} + +// schema declaration +Schema File { + @primary id: int +} + +// database declaration +database NewDB { + file: *File +} + +impl File { + pub fn __all__() -> *File { + yield File {id: 1} + yield File {id: 2} + } + + pub fn getId(self) -> int { + return self.id + } +} + +// query declaration +query get_all_anno from + anno in Annotation(default_db()) +select + anno.id as id +``` + +## Notes + +GödelScript uses C-like notes/comments。 + +```c +// single line comment + +/* + * multi line comment +*/ +``` + +## Main + +__[Warning] Deprecated: Better use `@output`__ + +Query output of GödelScript will output by main function. +Main function is the only one that does not need return value in GödelScript. + +Query output uses native function `output`. +This function only needs functions that you want to output the result as the argument. +And the argument function __does not need arguments__. +`output` can only be called in `main`. + +```rust +fn query_0(a: int, b: int) -> bool { + ... +} + +fn query_1(a: int, b: string) -> bool { + ... +} + +fn main() { + // output table structure: a (int) b (int) + output(query_0()) + + // output table structure: a (int) b (string) + output(query_1()) + ... +} +``` + +Back to [README.md](../../README.md#documents) \ No newline at end of file diff --git a/godel-script/docs/language-reference/queries.md b/godel-script/docs/language-reference/queries.md new file mode 100644 index 00000000..aa88e650 --- /dev/null +++ b/godel-script/docs/language-reference/queries.md @@ -0,0 +1,93 @@ +# GödelScript Query + +Back to [README.md](../../README.md#documents) + +## Query Name + +After keyword `query`, there requires a query name: + +```rust +query this_is_example_query +``` + +## From + +GödelScript query uses keyword `from` for variable definition. +Declared variables must be initialized, the way of initializing variables is the same +as in `for` statement. +But we do not need to consider whether the variable is a collection or not. + +Initialization is executed by order, so using variables before when initializing +other variables is allowed. + +```rust +from + anno in Annotation(db()), + class in Class(db()), + loc in class.getLocation() +``` + +## Where + +GödelScript query uses `where` with conditional expression for filtering data. + +```rust +where + anno = class.getAnnotation() && + loc.getFile().getRelativePath().contains(".java") +``` + +## Select + +GödelScript query uses `select` to generate the final result. +Each select data has an expression and it's corresponding column name after keyword `as`. If column name is not given, GödelScript will generate a random column name automatically. + +```rust +select + anno.getName() as annotationName, + loc.getFile() as fileName, + class.getName() // random column name: column_543771021 +``` + +The total query declaration is as follows: + +```rust +query this_is_example_query from + anno in Annotation(db()), + class in Class(db()), + loc in class.getLocation() +where + anno = class.getAnnotation() && + loc.getFile().getRelativePath().contains(".java") +select + anno.getName() as annotationName, + loc.getFile() as fileName, + class.getName() +``` + +And it is equivalent to: + +```rust +@output +fn this_is_example_query( + annotationName: string, + fileName: string, + column_543771021: string +) -> bool { + for(anno in Annotation(db()), class in Class(db())) { + let (loc in c.getLocation()) { + if (anno = c.getAnnotation() && + loc.getFile().getRelativePath().contains(".java")) { + if (annotationName = anno.getName() && + fileName = loc.getFile() && + column_543771021 = class.getName()) { + return true + } + } + } + } +} + +``` + +Back to [README.md](../../README.md#documents) diff --git a/godel-script/docs/language-reference/schemas.md b/godel-script/docs/language-reference/schemas.md new file mode 100644 index 00000000..f45fe6e0 --- /dev/null +++ b/godel-script/docs/language-reference/schemas.md @@ -0,0 +1,208 @@ +# GödelScript Schema + +Back to [README.md](../../README.md#documents) + +## Declaration + +Here's a schema declaration example, +the field declaration format is `field : type`, +primary key is annotated by `@primary`. + +```rust +schema Student { + @primary id: int, + name: string, + phone: string +} +``` + +## Initializing + +GödelScript requires universal set for schema. +We could simply think it as the constructor of schema. +The constructor is declared in `impl` block, and should be public. +GödelScript only accept `__all__` as the constructor. + +### From Database + +Schema's universal set could be fetched from a database: + +```rust +database DB { + students: *Student +} + +impl Student { + pub fn __all__(db: DB) -> *Student { + return db.students + } +} + +fn getStudents() -> *Student { + let (db = DB::load("example.db")) { + for (student in Student(db)) { + if (student.name.contains("von")) { + yield student + } + } + } +} +``` + +Inherited schema could use [this](#initializing-inherited-schema) to initialize. + +### From Initializer + +Schema's universal set could be initialized from initializers: + +```rust +impl Student { + pub fn __all__() -> *Student { + yield Student {id: 1, name: "zxj", phone: "11451419"} + yield Student {id: 2, name: "fxj", phone: "11451419"} + yield Student {id: 3, name: "lyn", phone: "11451419"} + } +} +``` + +This example also shows the feature of `initializer`, +by this way we could create a schema instance. +Although it's an instance created temporarily, this instance also should be +included in the universal set. Otherwise the creation is failed. + +In this example, the `initializer` is used in constructor, so all the created +instance must be included in universal set. + +## Inheritance + +### Inherit Fields + +Schema could be inherited, after inheritance, parent schema's fields will be +add at the front of the child schema structure. +All the inheritable methods will also be inherited from parent schema, except +`__all__`. + +```rust +schema Lee extends Student { + // parent fields added here. + + // @primary id: int, + // name: string, + // phone: string, + for_example: int +} +``` + +### Inherit Methods + +If parent schema has these two methods, child schema will inherit them. + +```rust +impl Student { + // method, first parameter is self, do not need type declaration. + fn getName(self) -> string { + return self.name + } + // static method, without self parameter + fn getType() -> string { + return "Student" + } +} +``` + +### Method Override + +GödelScript allows child schema implements methods that +having the same names of parent methods. +Methods of parent schema will be overridden. +Overridden methods share the same name, +but there's no need to share the same parameters and return type. + +```rust +impl Lee { + fn getType() -> string { + return "Lee Student" + } +} +``` + +### Initializing Inherited Schema + +We often initialize child schema by universal set of parent schema: + +```rust +schema Lee extends Student { + // @primary id: int, + // name: string, + // phone: string, + for_example: int +} + +impl Lee { + pub fn __all__(db: DB) -> *Lee { + // schema(db) will call schema::__all__(db), this is a syntactic sugar + // also it is correct to directly use schema::__all__(db) + for (parent in Student(db)) { + // ..parent here is a spread syntax + yield Lee { ..parent, for_example: 114 } + } + } +} +``` + +And from the example above, we use another initializer feature `spread`, +this syntax will expand the input schema instance. GödelScript only do duck-type +check for the schema instance, so if the structure is correct, the program is +correct. It is not a must to use parent schema to initialize... + +```rust +yield Lee { ..parent, for_example: 114 } +// this is equivalent to +// yield Lee { id: parent.id, name: parent.name, phone: parent.phone, for_example: 114 } +``` + +Here is an example, get `Class` in a specified file from universal set of `Class`, +by using inheritance: + +```rust +schema ClassInIDKFile extends Class {} + +impl ClassInIDKFile { + fn __all__(db: JavaDB) -> *ClassInIDKFile { + for(c in Class(db)) { + if (c.getLocation().getFile().getRelativePath() = "./com/xxx/xxx.java") { + yield ClassInIDKFile { ..c } + } + } + } +} +``` + +### Comparison and Type Casting + +#### `fn key_eq(self, T) -> bool` | `fn key_neq(self, T) -> bool` + +For primary key comparisons, require schemas having `int` type primary key. + +```rust +method.key_eq(function) +method.key_neq(function) +``` + +#### `fn to(self) -> T` + +Convert schema instance to another schema instance, duck type check. + +```rust +stmt.to() +``` + +#### `fn is(self) -> bool` + +Judge if this schema instance in universal set of another schema, duck type check. + +```rust +stmt.is() +``` + +Back to [README.md](../../README.md#documents) \ No newline at end of file diff --git a/godel-script/docs/language-reference/type.md b/godel-script/docs/language-reference/type.md new file mode 100644 index 00000000..5f097686 --- /dev/null +++ b/godel-script/docs/language-reference/type.md @@ -0,0 +1,112 @@ +# GödelScript Types + +Back to [README.md](../../README.md#documents) + +GödelScript reserved some symbols for basic types, +other types defined by users could also be used: + +* [Enum](./enums.md) +* [Schema](./schemas.md) +* [Database](./databases.md) + +Some type definitions like this below may also exists: + +```rust +*int +*string +*Annotation +``` + +Type with `*` means it is a __set__ type: + +```rust +*int // int set +*string // string set +*Annotation // schema Annotation set +``` + +## GödelScript Basic Types + +### Bool + +`bool` literal should be `true` or `false`. + +### String + +`string` includes these native methods: + +```rust +fn to_int(self: string) -> int; +fn substr(self: string, begin: int, length: int) -> string; +fn len(self: string) -> int; +fn get_regex_match_result(self: string, str: string, num: int) -> string; +fn matches(self: string, str: string) -> bool; +fn contains(self: string, str: string) -> bool; +fn ne(self: string, str: string) -> string; +fn eq(self: string, str: string) -> string; +fn add(self: string, str: string) -> string; +fn to_set(self) -> *string; +fn to_upper(self) -> string; +fn to_lower(self) -> string; +fn replace_all(self, pattern: string, replacement: string) -> string; +fn replace_once(self, pattern: string, replacement: string, index: int) -> string; +``` + +### Float + +`float` includes these native methods: + +```rust +fn rem(self: float, num: float) -> float; +fn pow(self: float, num: float) -> float; +fn le(self: float, num: float) -> float; +fn gt(self: float, num: float) -> float; +fn ne(self: float, num: float) -> float; +fn ge(self: float, num: float) -> float; +fn eq(self: float, num: float) -> float; +fn div(self: float, num: float) -> float; +fn sub(self: float, num: float) -> float; +fn mul(self: float, num: float) -> float; +fn lt(self: float, num: float) -> float; +fn add(self: float, num: float) -> float; +fn neg(self: float) -> float; +``` + +### Int + +`int` includes these native methods: + +```rust +fn bitxor(self: int, num: int) -> int; +fn bitor(self: int, num: int) -> int; +fn bitand(self: int, num: int) -> int; +fn rem(self: int, num: int) -> int; +fn pow(self: int, num: int) -> int; +fn le(self: int, num: int) -> int; +fn lt(self: int, num: int) -> int; +fn gt(self: int, num: int) -> int; +fn ne(self: int, num: int) -> int; +fn ge(self: int, num: int) -> int; +fn eq(self: int, num: int) -> int; +fn div(self: int, num: int) -> int; +fn mul(self: int, num: int) -> int; +fn sub(self: int, num: int) -> int; +fn add(self: int, num: int) -> int; +fn bitnot(self: int) -> int; +fn neg(self: int) -> int; +fn to_string(self: int) -> string; +fn range(begin: int, end: int) -> *int; +fn to_set(self) -> *int; +``` + +### Aggregator for Set Types + +```rust +fn len(self: *T) -> int; +fn sum(self: *int) -> int; +fn min(self: *int) -> int; +fn max(self: *int) -> int; +fn find(self: *T0, instance: T1) -> T0; +``` + +Back to [README.md](../../README.md#documents) \ No newline at end of file diff --git a/godel-script/docs/syntax.md b/godel-script/docs/syntax.md new file mode 100644 index 00000000..349399a5 --- /dev/null +++ b/godel-script/docs/syntax.md @@ -0,0 +1,248 @@ +# GödelScript Syntax + +Back to [README.md](../README.md#documents) + +GödelScript Syntax Spec, +Usage Document see [GödelScript Program](./language-reference/program.md)。 + +## Identifier / Literal / Annotation +```ebnf +identifier = (* basic token *); +number_literal = (* basic token *); +string_literal = (* basic token *); +literal = number_literal | string_literal; +``` + +## Annotation +```ebnf +prop_pair = identifier "=" string_literal; +annotation = + "@" identifier ["(" string_literal ")"] | + "@" identifier ["(" [prop_pair [{"," prop_pair}]] ")"]; +``` + +Example: + +```rust +this_is_an_identifier +12345 +"string literal" +``` + +Annotation example: + +```rust +@inline +fn get(a: Method) -> bool { + ... +} +``` + +## Program + +About main program, [Click](./language-reference/program.md)。 + +```ebnf +program = {use_stmt} { + {function_decl} | + {enum_decl} | + {schema_decl} | + {use_decl} | + {impl_decl} | + {database_decl} | + {query_decl} +}; +``` + +## Declaration + +### Function + +Function Usage [Documents](./language-reference/functions.md)。 + +```ebnf +function_decl = + [{annotation}] + "fn" id "(" [params] ")" ["->" type_def] + [ ";" | ("{" block_stmt "}")]; +params = param {"," param}; +param = identifier [":" param_type_def]; +param_type_def = ["*"] identifier; +block_stmt = {statement}; +``` + +### Enum + +Enum Usage [Documents](./language-reference/enums.md)。 + +```ebnf +enum_decl = "enum" identifier "{" [identifier {"," identifier}] "}"; +``` + +### Schema + +Schema Usage [Documents](./language-reference/schemas.md)。 + +```ebnf +schema_decl = + "schema" identifier ["extends" type_def] "{" [schema_members] "}"; +schema_members = schema_member {"," schema_member}; +schema_member = [anno] id ":" type_def; +``` + +### Database + +Database Usage [Documents](./language-reference/databases.md)。 + +```ebnf +database_decl = "database" identifier "{" [database_tables] "}"; +database_tables = database_table {"," database_table}; +database_table = identifier ":" type_def ["as" string_literal]; +``` + +### Use / Import + +Package Manager / Symbol Import +[Documents](./language-reference/import.md)。 + +```ebnf +use_stmt = "use" identifier {"::" identifier} ["::" ("*"|multi_use)]; +multi_use = "{" identifier {"," identifier} "}"; +``` + +### Implement + +Impl Usage [Documents](./language-reference/impl.md)。 + +```ebnf +impl_decl = "impl" identifier ["for" identifier] "{" {function_decl} "}"; +``` + +### GödelScript Query + +Query Usage [Documents](./language-reference/queries.md)。 + +```ebnf +query_decl = + "query" identifier + "from" from_list + ["where" or_expr] + "select" select_list; +from_list = from_def {"," from_def}; +from_def = identifier "in" or_expr; +select_list = select_column {"," select_column}; +select_column = or_expr ["as" identifier]; +``` + +## Statement + +GödelScript statement +[Documents](./language-reference/functions.md#statement)。 + +```ebnf +statement = + let_stmt | + cond_stmt | + for_stmt | + match_stmt | + fact_stmt | + ret_stmt | + in_block_expr; + +def = identifier [":" type_def]; +type_def = identifier ["::" identifier]; +``` + +### Let Statement +```ebnf +let_stmt = "let" "(" let_def ")" "{" [statement] "}"; +let_def = def "=" expression {"," def "=" expression}; +``` + +### Condition Statement +```ebnf +cond_stmt = if_stmt {elsif_stmt} [else_stmt]; +if_stmt = "if" "(" expression ")" "{" [statement] "}"; +elsif_stmt = "else" "if" "(" expression ")" "{" [statement] "}"; +else_stmt = "else" "{" [statement] "}"; +``` + +### For Statement +```ebnf +for_stmt = "for" "(" for_def ")" "{" [statement] "}"; +for_def = def "in" expression {"," def "in" expression}; +``` + +### Match Statement +```ebnf +match_stmt = "match" "(" expression ")" "{" [match_pairs] "}"; +match_pairs = match_pair {"," match_pair}; +match_pair = literal "=>" statement; +``` + +### Fact Statement +```ebnf +fact_stmt = "[" fact_data {"," fact_data} "]"; +fact_data = "{" + (number_literal | string_literal) + {"," (number_literal | string_literal)} +"}"; +``` + +### Return Statement +```ebnf +ret_stmt = ("return" | "yield") or_expr; +``` + +### In Block Expression (as Statement) +```ebnf +in_block_expr = expression; +``` + +## Expression + +GödelScript Expression [Documents](./language-reference/functions.md#expression)。 + +```ebnf +expression = or_expr; +``` + +### Calculation +```ebnf +or_expr = and_expr {"||" and_expr}; +and_expr = (not_expr | cmp_expr) {"&&" (not_expr | cmp_expr)}; +curved_expr = "(" or_expr ")"; +unary_expr = "-" (symcall | curved_calc_expr | unary_expr); +not_expr = "!" cmp_expr; +cmp_expr = + additive_expr + [("=" | "!=" | "<" | "<=" | ">" | ">=" | "in") additive_expr]; +additive_expr = multiple_expr {("+" | "-") multiple_expr}; +multiple_expr = + (symcall | curved_expr | unary_expr) + {("*" | "/") (symcall | curved_expr | unary_expr)}; +``` + +### Call Expression +```ebnf +symcall = symhead {sympath | symget}; +symhead = + identifier [initializer|funcall] + | literal + | curved_expr + ; +sympath = "::" identifier [funcall]; +symget = "." identifier [funcall]; + +funcall = "(" [arglist] ")"; +arglist = or_expr {"," or_expr}; +``` + +### Initializer + +Usage: [Initializer List](./language-reference/functions.md#initializer-list) + +```ebnf +initializer = "{" [initializer_pair ","] "}"; +initializer_pair = identifier ":" or_expr; +``` \ No newline at end of file From 90175415d0e5d80424054c9aa1e9e0746fe60e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=81=95=E5=B1=B1?= Date: Mon, 24 Jun 2024 16:33:23 +0800 Subject: [PATCH 02/10] update godel-script source code of frontend --- godel-script/CMakeLists.txt | 147 + .../godel-frontend/src/ast/ast_dumper.cpp | 831 +++++ .../godel-frontend/src/ast/ast_dumper.h | 133 + .../godel-frontend/src/ast/ast_node.h | 169 + .../godel-frontend/src/ast/ast_root.cpp | 24 + .../godel-frontend/src/ast/ast_root.h | 41 + .../godel-frontend/src/ast/ast_visitor.cpp | 340 ++ .../godel-frontend/src/ast/ast_visitor.h | 70 + godel-script/godel-frontend/src/ast/decl.cpp | 133 + godel-script/godel-frontend/src/ast/decl.h | 416 +++ godel-script/godel-frontend/src/ast/expr.cpp | 120 + godel-script/godel-frontend/src/ast/expr.h | 493 +++ godel-script/godel-frontend/src/ast/stmt.cpp | 140 + godel-script/godel-frontend/src/ast/stmt.h | 350 ++ .../src/ast/template_extractor.cpp | 620 ++++ .../src/ast/template_extractor.h | 93 + godel-script/godel-frontend/src/cli.cpp | 220 ++ godel-script/godel-frontend/src/cli.h | 129 + godel-script/godel-frontend/src/engine.cpp | 546 +++ godel-script/godel-frontend/src/engine.h | 131 + .../godel-frontend/src/error/error.cpp | 268 ++ godel-script/godel-frontend/src/error/error.h | 144 + .../src/ir/aggregator_inline_remark.cpp | 72 + .../src/ir/aggregator_inline_remark.h | 30 + .../godel-frontend/src/ir/flatten_block.cpp | 190 + .../godel-frontend/src/ir/flatten_block.h | 63 + .../godel-frontend/src/ir/inst_combine.cpp | 340 ++ .../godel-frontend/src/ir/inst_combine.h | 115 + .../godel-frontend/src/ir/ir_context.cpp | 510 +++ .../godel-frontend/src/ir/ir_context.h | 248 ++ godel-script/godel-frontend/src/ir/ir_gen.cpp | 2489 +++++++++++++ godel-script/godel-frontend/src/ir/ir_gen.h | 267 ++ godel-script/godel-frontend/src/ir/lir.cpp | 422 +++ godel-script/godel-frontend/src/ir/lir.h | 638 ++++ .../godel-frontend/src/ir/name_mangling.cpp | 41 + .../godel-frontend/src/ir/name_mangling.h | 11 + godel-script/godel-frontend/src/ir/pass.cpp | 3 + godel-script/godel-frontend/src/ir/pass.h | 42 + .../godel-frontend/src/ir/pass_manager.cpp | 56 + .../godel-frontend/src/ir/pass_manager.h | 21 + .../godel-frontend/src/ir/remove_unused.cpp | 296 ++ .../godel-frontend/src/ir/remove_unused.h | 66 + godel-script/godel-frontend/src/lexer.cpp | 503 +++ godel-script/godel-frontend/src/lexer.h | 219 ++ godel-script/godel-frontend/src/main.cpp | 17 + .../src/package/module_tree.cpp | 85 + .../godel-frontend/src/package/module_tree.h | 53 + .../godel-frontend/src/package/package.cpp | 367 ++ .../godel-frontend/src/package/package.h | 101 + godel-script/godel-frontend/src/parse.cpp | 1213 +++++++ godel-script/godel-frontend/src/parse.h | 173 + .../src/sema/annotation_checker.cpp | 151 + .../src/sema/annotation_checker.h | 82 + .../godel-frontend/src/sema/context.cpp | 100 + .../godel-frontend/src/sema/context.h | 68 + .../src/sema/data_structure_construct.cpp | 368 ++ .../src/sema/data_structure_construct.h | 39 + .../src/sema/fact_statement_checker.cpp | 65 + .../src/sema/fact_statement_checker.h | 25 + .../src/sema/function_declaration.cpp | 507 +++ .../src/sema/function_declaration.h | 80 + .../src/sema/global_symbol_loader.cpp | 170 + .../src/sema/global_symbol_loader.h | 35 + .../src/sema/inherit_schema.cpp | 157 + .../godel-frontend/src/sema/inherit_schema.h | 38 + .../src/sema/self_reference_check.cpp | 125 + .../src/sema/self_reference_check.h | 83 + .../godel-frontend/src/sema/symbol_import.cpp | 243 ++ .../godel-frontend/src/sema/symbol_import.h | 40 + .../src/sema/ungrounded_checker.cpp | 563 +++ .../src/sema/ungrounded_checker.h | 133 + godel-script/godel-frontend/src/semantic.cpp | 3083 +++++++++++++++++ godel-script/godel-frontend/src/semantic.h | 169 + godel-script/godel-frontend/src/symbol.cpp | 705 ++++ godel-script/godel-frontend/src/symbol.h | 745 ++++ godel-script/godel-frontend/src/util/util.cpp | 314 ++ godel-script/godel-frontend/src/util/util.h | 85 + 77 files changed, 22382 insertions(+) create mode 100644 godel-script/CMakeLists.txt create mode 100644 godel-script/godel-frontend/src/ast/ast_dumper.cpp create mode 100644 godel-script/godel-frontend/src/ast/ast_dumper.h create mode 100644 godel-script/godel-frontend/src/ast/ast_node.h create mode 100644 godel-script/godel-frontend/src/ast/ast_root.cpp create mode 100644 godel-script/godel-frontend/src/ast/ast_root.h create mode 100644 godel-script/godel-frontend/src/ast/ast_visitor.cpp create mode 100644 godel-script/godel-frontend/src/ast/ast_visitor.h create mode 100644 godel-script/godel-frontend/src/ast/decl.cpp create mode 100644 godel-script/godel-frontend/src/ast/decl.h create mode 100644 godel-script/godel-frontend/src/ast/expr.cpp create mode 100644 godel-script/godel-frontend/src/ast/expr.h create mode 100644 godel-script/godel-frontend/src/ast/stmt.cpp create mode 100644 godel-script/godel-frontend/src/ast/stmt.h create mode 100644 godel-script/godel-frontend/src/ast/template_extractor.cpp create mode 100644 godel-script/godel-frontend/src/ast/template_extractor.h create mode 100644 godel-script/godel-frontend/src/cli.cpp create mode 100644 godel-script/godel-frontend/src/cli.h create mode 100644 godel-script/godel-frontend/src/engine.cpp create mode 100644 godel-script/godel-frontend/src/engine.h create mode 100644 godel-script/godel-frontend/src/error/error.cpp create mode 100644 godel-script/godel-frontend/src/error/error.h create mode 100644 godel-script/godel-frontend/src/ir/aggregator_inline_remark.cpp create mode 100644 godel-script/godel-frontend/src/ir/aggregator_inline_remark.h create mode 100644 godel-script/godel-frontend/src/ir/flatten_block.cpp create mode 100644 godel-script/godel-frontend/src/ir/flatten_block.h create mode 100644 godel-script/godel-frontend/src/ir/inst_combine.cpp create mode 100644 godel-script/godel-frontend/src/ir/inst_combine.h create mode 100644 godel-script/godel-frontend/src/ir/ir_context.cpp create mode 100644 godel-script/godel-frontend/src/ir/ir_context.h create mode 100644 godel-script/godel-frontend/src/ir/ir_gen.cpp create mode 100644 godel-script/godel-frontend/src/ir/ir_gen.h create mode 100644 godel-script/godel-frontend/src/ir/lir.cpp create mode 100644 godel-script/godel-frontend/src/ir/lir.h create mode 100644 godel-script/godel-frontend/src/ir/name_mangling.cpp create mode 100644 godel-script/godel-frontend/src/ir/name_mangling.h create mode 100644 godel-script/godel-frontend/src/ir/pass.cpp create mode 100644 godel-script/godel-frontend/src/ir/pass.h create mode 100644 godel-script/godel-frontend/src/ir/pass_manager.cpp create mode 100644 godel-script/godel-frontend/src/ir/pass_manager.h create mode 100644 godel-script/godel-frontend/src/ir/remove_unused.cpp create mode 100644 godel-script/godel-frontend/src/ir/remove_unused.h create mode 100644 godel-script/godel-frontend/src/lexer.cpp create mode 100644 godel-script/godel-frontend/src/lexer.h create mode 100644 godel-script/godel-frontend/src/main.cpp create mode 100644 godel-script/godel-frontend/src/package/module_tree.cpp create mode 100644 godel-script/godel-frontend/src/package/module_tree.h create mode 100644 godel-script/godel-frontend/src/package/package.cpp create mode 100644 godel-script/godel-frontend/src/package/package.h create mode 100644 godel-script/godel-frontend/src/parse.cpp create mode 100644 godel-script/godel-frontend/src/parse.h create mode 100644 godel-script/godel-frontend/src/sema/annotation_checker.cpp create mode 100644 godel-script/godel-frontend/src/sema/annotation_checker.h create mode 100644 godel-script/godel-frontend/src/sema/context.cpp create mode 100644 godel-script/godel-frontend/src/sema/context.h create mode 100644 godel-script/godel-frontend/src/sema/data_structure_construct.cpp create mode 100644 godel-script/godel-frontend/src/sema/data_structure_construct.h create mode 100644 godel-script/godel-frontend/src/sema/fact_statement_checker.cpp create mode 100644 godel-script/godel-frontend/src/sema/fact_statement_checker.h create mode 100644 godel-script/godel-frontend/src/sema/function_declaration.cpp create mode 100644 godel-script/godel-frontend/src/sema/function_declaration.h create mode 100644 godel-script/godel-frontend/src/sema/global_symbol_loader.cpp create mode 100644 godel-script/godel-frontend/src/sema/global_symbol_loader.h create mode 100644 godel-script/godel-frontend/src/sema/inherit_schema.cpp create mode 100644 godel-script/godel-frontend/src/sema/inherit_schema.h create mode 100644 godel-script/godel-frontend/src/sema/self_reference_check.cpp create mode 100644 godel-script/godel-frontend/src/sema/self_reference_check.h create mode 100644 godel-script/godel-frontend/src/sema/symbol_import.cpp create mode 100644 godel-script/godel-frontend/src/sema/symbol_import.h create mode 100644 godel-script/godel-frontend/src/sema/ungrounded_checker.cpp create mode 100644 godel-script/godel-frontend/src/sema/ungrounded_checker.h create mode 100644 godel-script/godel-frontend/src/semantic.cpp create mode 100644 godel-script/godel-frontend/src/semantic.h create mode 100644 godel-script/godel-frontend/src/symbol.cpp create mode 100644 godel-script/godel-frontend/src/symbol.h create mode 100644 godel-script/godel-frontend/src/util/util.cpp create mode 100644 godel-script/godel-frontend/src/util/util.h diff --git a/godel-script/CMakeLists.txt b/godel-script/CMakeLists.txt new file mode 100644 index 00000000..3f43e3b4 --- /dev/null +++ b/godel-script/CMakeLists.txt @@ -0,0 +1,147 @@ +cmake_minimum_required(VERSION 3.1) + +project("GodelScript" VERSION 0.1 DESCRIPTION "GodelScript compiler") + +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0") + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED True) + +if(UNIX AND NOT APPLE) + set(LINUX TRUE) +endif() + +set(GODEL_FRONTEND_HDR_FILES + godel-frontend/src/cli.h + godel-frontend/src/engine.h + godel-frontend/src/lexer.h + godel-frontend/src/parse.h + godel-frontend/src/semantic.h + godel-frontend/src/symbol.h + godel-frontend/src/ir/aggregator_inline_remark.h + godel-frontend/src/ir/flatten_block.h + godel-frontend/src/ir/ir_gen.h + godel-frontend/src/ir/ir_context.h + godel-frontend/src/ir/lir.h + godel-frontend/src/ir/inst_combine.h + godel-frontend/src/ir/name_mangling.h + godel-frontend/src/ir/pass.h + godel-frontend/src/ir/pass_manager.h + godel-frontend/src/ir/remove_unused.h + godel-frontend/src/error/error.h + godel-frontend/src/ast/ast_node.h + godel-frontend/src/ast/ast_root.h + godel-frontend/src/ast/ast_visitor.h + godel-frontend/src/ast/decl.h + godel-frontend/src/ast/expr.h + godel-frontend/src/ast/stmt.h + godel-frontend/src/ast/ast_dumper.h + godel-frontend/src/ast/template_extractor.h + godel-frontend/src/sema/ungrounded_checker.h + godel-frontend/src/sema/fact_statement_checker.h + godel-frontend/src/sema/self_reference_check.h + godel-frontend/src/sema/context.h + godel-frontend/src/sema/global_symbol_loader.h + godel-frontend/src/sema/symbol_import.h + godel-frontend/src/sema/data_structure_construct.h + godel-frontend/src/sema/inherit_schema.h + godel-frontend/src/sema/function_declaration.h + godel-frontend/src/sema/annotation_checker.h + godel-frontend/src/util/util.h + godel-frontend/src/package/package.h + godel-frontend/src/package/module_tree.h) + +set(GODEL_FRONTEND_SRC_FILES + godel-frontend/src/cli.cpp + godel-frontend/src/engine.cpp + godel-frontend/src/lexer.cpp + godel-frontend/src/parse.cpp + godel-frontend/src/semantic.cpp + godel-frontend/src/symbol.cpp + godel-frontend/src/ir/aggregator_inline_remark.cpp + godel-frontend/src/ir/flatten_block.cpp + godel-frontend/src/ir/ir_gen.cpp + godel-frontend/src/ir/ir_context.cpp + godel-frontend/src/ir/lir.cpp + godel-frontend/src/ir/inst_combine.cpp + godel-frontend/src/ir/name_mangling.cpp + godel-frontend/src/ir/pass.cpp + godel-frontend/src/ir/pass_manager.cpp + godel-frontend/src/ir/remove_unused.cpp + godel-frontend/src/error/error.cpp + godel-frontend/src/ast/ast_visitor.cpp + godel-frontend/src/ast/ast_root.cpp + godel-frontend/src/ast/decl.cpp + godel-frontend/src/ast/expr.cpp + godel-frontend/src/ast/stmt.cpp + godel-frontend/src/ast/ast_dumper.cpp + godel-frontend/src/ast/template_extractor.cpp + godel-frontend/src/sema/ungrounded_checker.cpp + godel-frontend/src/sema/fact_statement_checker.cpp + godel-frontend/src/sema/self_reference_check.cpp + godel-frontend/src/sema/context.cpp + godel-frontend/src/sema/global_symbol_loader.cpp + godel-frontend/src/sema/symbol_import.cpp + godel-frontend/src/sema/data_structure_construct.cpp + godel-frontend/src/sema/inherit_schema.cpp + godel-frontend/src/sema/function_declaration.cpp + godel-frontend/src/sema/annotation_checker.cpp + godel-frontend/src/util/util.cpp + godel-frontend/src/package/package.cpp + godel-frontend/src/package/module_tree.cpp) + +execute_process(COMMAND mkdir -p install) +set(ENV{CC} cc) +set(ENV{CXX} c++) +# build bison +set(BISON_PKG bison-3.8.2) +execute_process(COMMAND tar -xf ${CMAKE_CURRENT_SOURCE_DIR}/godel-backend/tools/${BISON_PKG}.tar) +execute_process(COMMAND ./configure --prefix=${CMAKE_BINARY_DIR}/install WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${BISON_PKG}) +execute_process(COMMAND make -j install WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${BISON_PKG}) + +# build flex +set(FLEX_PKG flex-2.6.4) +execute_process(COMMAND tar -xf ${CMAKE_CURRENT_SOURCE_DIR}/godel-backend/tools/${FLEX_PKG}.tar) +execute_process(COMMAND ./configure --prefix=${CMAKE_BINARY_DIR}/install WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${FLEX_PKG}) +execute_process(COMMAND make -j install WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${FLEX_PKG}) + +# set variables for souffle target +set(FLEX_EXECUTABLE ${CMAKE_BINARY_DIR}/install/bin/flex) +set(BISON_EXECUTABLE ${CMAKE_BINARY_DIR}/install/bin/bison) + +set(SOUFFLE_DOMAIN_64BIT ON) +set(SOUFFLE_USE_CURSES OFF) +set(SOUFFLE_ENABLE_TESTING OFF) + +add_subdirectory(godel-backend/souffle) +add_subdirectory(godel-backend/extension) + +add_library(godel-frontend STATIC + ${GODEL_FRONTEND_SRC_FILES}) + +target_link_libraries(godel-frontend PUBLIC + libsouffle souffle_ext) + +target_include_directories(godel-frontend PUBLIC + ${PROJECT_SOURCE_DIR}) + +# add binary target godel +add_executable(godel godel-frontend/src/main.cpp) +# avoid easylogging to generate myeasylog.log automatically +add_definitions(-DELPP_NO_DEFAULT_LOG_FILE) +# link static library +target_link_libraries(godel + PRIVATE godel-frontend) +# link dynamic library +target_link_libraries(godel PUBLIC + libsouffle-shared souffle_ext) + +# add testing rule +enable_testing() +add_test(NAME godel-test + COMMAND godel -p ${PROJECT_SOURCE_DIR}/godel-frontend/test/pkgtest ${PROJECT_SOURCE_DIR}/godel-frontend/test/pkgtest/a.gdl) +add_test(NAME gs_new-test + COMMAND godel ${PROJECT_SOURCE_DIR}/godel-frontend/test/semantic/gs_new.gdl) +add_test(NAME fact-stmt-test + COMMAND godel ${PROJECT_SOURCE_DIR}/godel-frontend/test/semantic/fact.gdl) diff --git a/godel-script/godel-frontend/src/ast/ast_dumper.cpp b/godel-script/godel-frontend/src/ast/ast_dumper.cpp new file mode 100644 index 00000000..666a28d6 --- /dev/null +++ b/godel-script/godel-frontend/src/ast/ast_dumper.cpp @@ -0,0 +1,831 @@ +#include "godel-frontend/src/ast/ast_dumper.h" + +#include + +namespace godel { + +std::string ast_dumper::format_pointer(ast_node* node) { + std::stringstream ss; + ss << light_yellow << node << reset; + return ss.str(); +} + +std::string ast_dumper::format_resolve(ast_node* node) { + if (node->get_resolve().type.is_err()) { + return ""; + } + std::stringstream ss; + ss << light_green << " <\""; + ss << "resolve: " << node->get_resolve().type.full_path_name(); + ss << "\">" << reset; + return ss.str(); +} + +std::string ast_dumper::format_location(const span& location) { + std::stringstream ss; + ss << light_grey << " <" << location.file << ":" + << location.start_line << ":" + << location.start_column + 1 << ">" + << reset << "\n"; + return ss.str(); +} + +std::string ast_dumper::format_string(const std::string& str) { + std::stringstream ss; + ss << light_green << str << reset; + return ss.str(); +} + +std::string ast_dumper::format_identifier(const std::string& str) { + std::stringstream ss; + ss << light_cyan << str << reset; + return ss.str(); +} + +std::string ast_dumper::format_annotation(const std::string& str) { + std::stringstream ss; + ss << light_red << str << reset; + return ss.str(); +} + +std::string ast_dumper::format_number(const int64_t num) { + std::stringstream ss; + ss << light_red << num << reset; + return ss.str(); +} + +std::string ast_dumper::format_number(const float num) { + std::stringstream ss; + ss << light_red << num << reset; + return ss.str(); +} + +std::string ast_dumper::format_decl(const std::string& str) { + std::stringstream ss; + ss << cyan << str << reset; + return ss.str(); +} + +std::string ast_dumper::format_expr(const std::string& str) { + std::stringstream ss; + ss << green << str << reset; + return ss.str(); +} + +std::string ast_dumper::format_stmt(const std::string& str) { + std::stringstream ss; + ss << purple << str << reset; + return ss.str(); +} + +std::string ast_dumper::format_operator(const std::string& str) { + std::stringstream ss; + ss << red << str << reset; + return ss.str(); +} + +std::string ast_dumper::format_lambda(const std::string& str) { + std::stringstream ss; + ss << purple << str << reset; + return ss.str(); +} + +bool ast_dumper::visit_ast_root(ast_root* node) { + indent.dump(); + os << format_stmt("AbstractSyntaxTreeRoot ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_use_statements()) { + if (i == node->get_use_statements().back() && + !node->get_declarations().size()) { + indent.set_last(); + } + i->accept(this); + } + for(auto i : node->get_declarations()) { + if (i == node->get_declarations().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_ast_null(ast_null* node) { + indent.dump(); + os << "Null " << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + return true; +} + +bool ast_dumper::visit_number_literal(number_literal* node) { + indent.dump(); + os << format_expr("NumberLiteral ") << format_pointer(node) << " "; + if (node->is_integer()) { + os << format_number(node->get_integer()); + } else { + os << format_number(node->get_float()); + } + os << format_resolve(node) << format_location(node->get_location()); + return true; +} + +bool ast_dumper::visit_string_literal(string_literal* node) { + indent.dump(); + os << format_expr("StringLiteral ") << format_pointer(node); + os << " " << format_string(node->get_literal()); + os << format_resolve(node) << format_location(node->get_location()); + return true; +} + +bool ast_dumper::visit_boolean_literal(boolean_literal* node) { + indent.dump(); + os << format_expr("BooleanLiteral ") << format_pointer(node); + os << " " << format_identifier(node->get_flag()? "true":"false"); + os << format_resolve(node) << format_location(node->get_location()); + return true; +} + +bool ast_dumper::visit_identifier(identifier* node) { + indent.dump(); + os << format_expr("Identifier ") << format_pointer(node); + os << " " << format_identifier(node->get_name()); + os << format_resolve(node) << format_location(node->get_location()); + return true; +} + +bool ast_dumper::visit_unary_operator(unary_operator* node) { + indent.dump(); + os << format_operator("UnaryOperator "); + switch(node->get_operator_type()) { + case unary_operator::type::arithmetic_negation: + os << format_operator("-"); break; + case unary_operator::type::logical_negation: + os << format_operator("!"); break; + } + os << " " << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + indent.set_last(); + node->get_child()->accept(this); + indent.pop(); + return true; +} + +bool ast_dumper::visit_binary_operator(binary_operator* node) { + indent.dump(); + os << format_operator("BinaryOperator "); + switch(node->get_operator_type()) { + case binary_operator::type::compare_equal: + os << format_operator("="); break; + case binary_operator::type::compare_great: + os << format_operator(">"); break; + case binary_operator::type::compare_great_equal: + os << format_operator(">="); break; + case binary_operator::type::compare_less: + os << format_operator("<"); break; + case binary_operator::type::compare_less_equal: + os << format_operator("<="); break; + case binary_operator::type::compare_not_equal: + os << format_operator("!="); break; + case binary_operator::type::logical_and: + os << format_operator("&&"); break; + case binary_operator::type::logical_or: + os << format_operator("||"); break; + case binary_operator::type::add: + os << format_operator("+"); break; + case binary_operator::type::sub: + os << format_operator("-"); break; + case binary_operator::type::mult: + os << format_operator("*"); break; + case binary_operator::type::div: + os << format_operator("/"); break; + case binary_operator::type::in: + os << format_operator("in"); break; + } + os << " " << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + node->get_left()->accept(this); + indent.set_last(); + node->get_right()->accept(this); + indent.pop(); + return true; +} + +bool ast_dumper::visit_func_call(func_call* node) { + indent.dump(); + os << format_expr("FunctionCall ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_arguments()) { + if (i == node->get_arguments().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_initializer_pair(initializer_pair* node) { + indent.dump(); + os << format_expr("InitializerPair ") << format_pointer(node) << " "; + os << format_identifier(node->get_field_name()->get_name()); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + indent.set_last(); + node->get_field_value()->accept(this); + indent.pop(); + return true; +} + +bool ast_dumper::visit_spread_expr(spread_expr* node) { + indent.dump(); + os << format_expr("SpreadExpr ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + indent.set_last(); + node->get_child()->accept(this); + indent.pop(); + return true; +} + +bool ast_dumper::visit_initializer(initializer* node) { + indent.dump(); + os << format_expr("Initializer ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_field_pairs()) { + if (i == node->get_field_pairs().back() && + node->get_spread_exprs().empty()) { + indent.set_last(); + } + i->accept(this); + } + for(auto i : node->get_spread_exprs()) { + if (i == node->get_spread_exprs().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_call_head(call_head* node) { + indent.dump(); + os << format_expr("CallHead ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + if (!node->has_func_call() && !node->is_initializer()) { + indent.set_last(); + } + node->get_first_expression()->accept(this); + if (!node->is_initializer()) { + indent.set_last(); + } + if (node->has_func_call()) { + node->get_func_call()->accept(this); + } + if (node->is_initializer()) { + indent.set_last(); + node->get_initializer()->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_call_expr(call_expr* node) { + indent.dump(); + switch(node->get_call_type()) { + case call_expr::type::get_field: os << format_expr("GetField "); break; + case call_expr::type::get_path: os << format_expr("GetPath "); break; + } + os << format_pointer(node); + os << " " << format_identifier(node->get_field_name()->get_name()); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + if (node->is_generic()) { + if (!node->has_func_call() && !node->is_initializer()) { + indent.set_last(); + } + node->get_generic_type()->accept(this); + } + if (node->has_func_call()) { + indent.set_last(); + node->get_func_call()->accept(this); + } + if (node->is_initializer()) { + indent.set_last(); + node->get_initializer()->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_call_root(call_root* node) { + indent.dump(); + os << format_expr("CallRoot ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + if (!node->get_call_chain().size()) { + indent.set_last(); + } + node->get_call_head()->accept(this); + for(auto i : node->get_call_chain()) { + if (i == node->get_call_chain().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_block_stmt(block_stmt* node) { + indent.dump(); + os << format_stmt("CodeBlock ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_statement()) { + if (i == node->get_statement().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_multi_use_stmt(multi_use_stmt* node) { + indent.dump(); + os << format_stmt("MultipleUseStmt ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_import_symbol()) { + if (i == node->get_import_symbol().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_use_stmt(use_stmt* node) { + indent.dump(); + os << format_stmt("UseStmt ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_path()) { + i->accept(this); + } + if (node->is_use_all()) { + indent.set_last(); + indent.dump(); + os << format_stmt("ImportAllSymbols ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + } else { + indent.set_last(); + node->get_multi_use()->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_let_stmt(let_stmt* node) { + indent.dump(); + os << format_stmt("LetStmt ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_symbols()) { + if (i == node->get_symbols().back() && !node->has_statement()) { + indent.set_last(); + } + i->accept(this); + } + if (node->has_statement()) { + indent.set_last(); + node->get_code_block()->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_if_stmt(if_stmt* node) { + indent.dump(); + switch(node->get_cond_type()) { + case if_stmt::type::cond_if: + os << format_stmt("IfStmt"); break; + case if_stmt::type::cond_elsif: + os << format_stmt("ElseIfStmt"); break; + case if_stmt::type::cond_else: + os << format_stmt("ElseStmt"); break; + } + os << " " << format_pointer(node) << format_location(node->get_location()); + indent.push(); + if (!node->has_statement()) { + indent.set_last(); + } + if (node->has_condition()) { + node->get_condition()->accept(this); + } + if (node->has_statement()) { + indent.set_last(); + node->get_code_block()->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_cond_stmt(cond_stmt* node) { + indent.dump(); + os << format_stmt("ConditionStmt ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + if (!node->get_elsif_stmt().size() && !node->has_else_stmt()) { + indent.set_last(); + } + node->get_if_stmt()->accept(this); + for(auto i : node->get_elsif_stmt()) { + if (i == node->get_elsif_stmt().back() && !node->has_else_stmt()) { + indent.set_last(); + } + i->accept(this); + } + if (node->has_else_stmt()) { + indent.set_last(); + node->get_else_stmt()->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_for_stmt(for_stmt* node) { + indent.dump(); + os << format_stmt("ForStmt ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_symbols()) { + if (i == node->get_symbols().back() && !node->has_statement()) { + indent.set_last(); + } + i->accept(this); + } + if (node->has_statement()) { + indent.set_last(); + node->get_code_block()->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_match_pair(match_pair* node) { + indent.dump(); + os << format_stmt("MatchPair ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + node->get_literal()->accept(this); + indent.set_last(); + node->get_statement()->accept(this); + indent.pop(); + return true; +} + +bool ast_dumper::visit_match_stmt(match_stmt* node) { + indent.dump(); + os << format_stmt("MatchStmt ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + if (!node->get_match_pair_list().size()) { + indent.set_last(); + } + node->get_match_condition()->accept(this); + for(auto i : node->get_match_pair_list()) { + if (i == node->get_match_pair_list().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_ret_stmt(ret_stmt* node) { + indent.dump(); + os << format_stmt("ReturnStmt ") << format_pointer(node) << " "; + os << format_identifier(node->is_yield()? "yield":"return"); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + indent.set_last(); + node->get_return_value()->accept(this); + indent.pop(); + return true; +} + +bool ast_dumper::visit_fact_data(fact_data* node) { + indent.dump(); + os << format_stmt("FactData ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_literals()) { + if (i == node->get_literals().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_fact_stmt(fact_stmt* node) { + indent.dump(); + os << format_stmt("FactStmt ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_facts()) { + if (i == node->get_facts().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_in_block_expr(in_block_expr* node) { + indent.dump(); + os << format_stmt("InBlockExpr ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + indent.set_last(); + node->get_expr()->accept(this); + indent.pop(); + return true; +} + +bool ast_dumper::visit_annotation(annotation* node) { + indent.dump(); + os << format_decl("Annotation ") << format_pointer(node) << " "; + if (node->get_annotation().length()) { + os << format_annotation(node->get_annotation()); + } else { + os << format_annotation(""); + } + if (node->get_ordered_properties().size()) { + os << format_annotation("(" ); + for(const auto& i : node->get_ordered_properties()) { + os << format_identifier(i.first); + os << format_annotation("="); + os << format_string(i.second); + if (i != node->get_ordered_properties().back()) { + os << format_annotation(", "); + } + } + os << format_annotation(")"); + } + if (node->get_property_string().length()) { + os << format_annotation("(" ); + os << format_string(node->get_property_string()); + os << format_annotation(")"); + } + os << format_resolve(node) << format_location(node->get_location()); + return true; +} + +bool ast_dumper::visit_type_def(type_def* node) { + indent.dump(); + os << format_decl("TypeDef ") << format_pointer(node) << " "; + os << format_string((node->is_set()? "\"*":"\"") + node->get_full_name() + "\""); + os << format_resolve(node) << format_location(node->get_location()); + return true; +} + +bool ast_dumper::visit_database_table(database_table* node) { + indent.dump(); + os << format_decl("DatabaseTable ") << format_pointer(node) << " "; + os << format_identifier(node->get_name()->get_name()); + os << " " << format_string( + (node->get_type()->is_set()? "\"*":"\"") + + node->get_type()->get_full_name() + "\""); + if (node->has_real_name()) { + os << " " << format_annotation(node->get_real_name()->get_literal()); + } + os << format_resolve(node) << format_location(node->get_location()); + return true; +} + +bool ast_dumper::visit_database_decl(database_decl* node) { + indent.dump(); + os << format_decl("DatabaseDecl ") << format_pointer(node) << " "; + os << format_identifier(node->get_name()->get_name()); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_tables()) { + if (i == node->get_tables().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_schema_field(schema_field* node) { + indent.dump(); + os << format_decl("Field ") << format_pointer(node) << " "; + os << format_identifier(node->get_identifier()->get_name()); + os << " " << format_string( + (node->get_field_type()->is_set()? "\"*":"\"") + + node->get_field_type()->get_full_name() + "\""); + if (node->get_annotation()->get_annotation().length()) { + os << format_annotation( + " \"" + node->get_annotation()->get_annotation() + "\""); + } else { + os << format_annotation(" \"\""); + } + os << format_resolve(node) << format_location(node->get_location()); + return true; +} + +bool ast_dumper::visit_schema_decl(schema_decl* node) { + indent.dump(); + os << format_decl("SchemaDecl ") << format_pointer(node) << " "; + os << format_identifier(node->get_name()->get_name()); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + if (!node->get_fields().size()) { + indent.set_last(); + } + if (node->has_parent()) { + node->get_parent_name()->accept(this); + } + for(auto i : node->get_fields()) { + if (i == node->get_fields().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_function_decl(function_decl* node) { + indent.dump(); + os << format_decl("FunctionDecl ") << format_pointer(node) << " "; + if (node->is_public()) { + os << format_annotation("public "); + } + os << format_identifier(node->get_name()->get_name()); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_annotations()) { + i->accept(this); + } + if (!node->has_return_value() && !node->implemented()) { + indent.set_last(); + } + indent.dump(); + os << format_expr("Parameter ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_parameter_list()) { + if (i == node->get_parameter_list().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + if (!node->implemented()) { + indent.set_last(); + } + if (node->has_return_value()) { + node->get_return_type()->accept(this); + } + if (node->implemented()) { + indent.set_last(); + node->get_code_block()->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_enum_decl(enum_decl* node) { + indent.dump(); + os << format_decl("EnumDecl ") << format_pointer(node) << " "; + os << format_identifier(node->get_name()->get_name()); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_member()) { + if (i == node->get_member().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_var_decl(var_decl* node) { + indent.dump(); + os << format_decl("VariableDecl ") << format_pointer(node); + os << " " << format_identifier(node->get_var_name()->get_name()); + if (node->has_declared_type()) { + const auto type_node = node->get_type(); + os << " " << format_string( + (type_node->is_set()? "\"*":"\"") + + type_node->get_full_name() + "\"" + ); + } + os << format_resolve(node) << format_location(node->get_location()); + + indent.push(); + if (node->has_init_value()) { + indent.set_last(); + node->get_init_value()->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_impl_block(impl_block* node) { + indent.dump(); + os << format_decl("Implement ") << format_pointer(node) << " "; + os << format_identifier(node->get_impl_schema_name()->get_name()); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + if (!node->get_functions().size()) { + indent.set_last(); + } + + for(auto i : node->get_functions()) { + if (i == node->get_functions().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); + return true; +} + +bool ast_dumper::visit_query_column(query_column* node) { + indent.dump(); + os << format_decl("QueryColumn ") << format_pointer(node); + if (node->has_column_name()) { + os << " " << format_identifier(node->get_column_name()->get_name()); + } + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + indent.set_last(); + node->get_column_value()->accept(this); + indent.pop(); + return true; +} + +void ast_dumper::dump_query_from_list(query_decl* node) { + indent.dump(); + os << format_decl("QueryFromList ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_from_list()) { + if (i == node->get_from_list().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); +} + +void ast_dumper::dump_query_select_list(query_decl* node) { + indent.dump(); + os << format_decl("QuerySelectList ") << format_pointer(node); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + for(auto i : node->get_select_list()) { + if (i == node->get_select_list().back()) { + indent.set_last(); + } + i->accept(this); + } + indent.pop(); +} + +bool ast_dumper::visit_query_decl(query_decl* node) { + indent.dump(); + os << format_decl("QueryDecl ") << format_pointer(node); + os << " " << format_identifier(node->get_name()->get_name()); + os << format_resolve(node) << format_location(node->get_location()); + indent.push(); + dump_query_from_list(node); + if (node->has_condition()) { + node->get_where_condition()->accept(this); + } + indent.set_last(); + dump_query_select_list(node); + indent.pop(); + return true; +} + +} // end namespace godel \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ast/ast_dumper.h b/godel-script/godel-frontend/src/ast/ast_dumper.h new file mode 100644 index 00000000..ee17d233 --- /dev/null +++ b/godel-script/godel-frontend/src/ast/ast_dumper.h @@ -0,0 +1,133 @@ +#pragma once + +#include "godel-frontend/src/ast/ast_visitor.h" +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/util/util.h" + +#include +#include +#include +#include + +namespace godel { + +using report::span; +using util::reset; +using util::light_yellow; +using util::light_green; +using util::light_grey; +using util::light_cyan; +using util::light_red; +using util::cyan; +using util::green; +using util::purple; +using util::red; + +class indentation { +private: + std::ostream &os; + std::vector indent; + std::vector last; + +public: + indentation(std::ostream& out): os(out) {} + void dump() { + if (indent.size() && last.size()) { + indent.back() = last.back() ? "+-" : "|-"; + } + for(const auto& i : indent) { + os << i; + } + if (indent.size() && last.size()) { + indent.back() = last.back() ? " " : "| "; + } + } + + void push() { + indent.push_back("| "); + last.push_back(false); + } + + void pop() { + indent.pop_back(); + last.pop_back(); + } + + void set_last() { + last.back() = true; + } +}; + +class ast_dumper: public ast_visitor { +private: + std::ostream &os; + indentation indent; + +private: + std::string format_pointer(ast_node*); + std::string format_resolve(ast_node*); + std::string format_location(const span&); + std::string format_string(const std::string&); + std::string format_identifier(const std::string&); + std::string format_annotation(const std::string&); + std::string format_number(const int64_t); + std::string format_number(const float); + std::string format_decl(const std::string&); + std::string format_expr(const std::string&); + std::string format_stmt(const std::string&); + std::string format_operator(const std::string&); + std::string format_lambda(const std::string&); + +public: + ast_dumper(std::ostream &out): os(out), indent(out) {} + + bool visit_ast_root(ast_root*) override; + bool visit_ast_null(ast_null*) override; + + bool visit_number_literal(number_literal*) override; + bool visit_string_literal(string_literal*) override; + bool visit_boolean_literal(boolean_literal*) override; + bool visit_identifier(identifier*) override; + bool visit_unary_operator(unary_operator*) override; + bool visit_binary_operator(binary_operator*) override; + bool visit_func_call(func_call*) override; + bool visit_initializer_pair(initializer_pair*) override; + bool visit_spread_expr(spread_expr*) override; + bool visit_initializer(initializer*) override; + bool visit_call_head(call_head*) override; + bool visit_call_expr(call_expr*) override; + bool visit_call_root(call_root*) override; + + bool visit_block_stmt(block_stmt*) override; + bool visit_multi_use_stmt(multi_use_stmt*) override; + bool visit_use_stmt(use_stmt*) override; + bool visit_let_stmt(let_stmt*) override; + bool visit_if_stmt(if_stmt*) override; + bool visit_cond_stmt(cond_stmt*) override; + bool visit_for_stmt(for_stmt*) override; + bool visit_match_pair(match_pair*) override; + bool visit_match_stmt(match_stmt*) override; + bool visit_ret_stmt(ret_stmt*) override; + bool visit_fact_data(fact_data*) override; + bool visit_fact_stmt(fact_stmt*) override; + bool visit_in_block_expr(in_block_expr*) override; + + bool visit_annotation(annotation*) override; + bool visit_type_def(type_def*) override; + bool visit_database_table(database_table*) override; + bool visit_database_decl(database_decl*) override; + bool visit_schema_field(schema_field*) override; + bool visit_schema_decl(schema_decl*) override; + bool visit_function_decl(function_decl*) override; + bool visit_enum_decl(enum_decl*) override; + bool visit_var_decl(var_decl*) override; + bool visit_impl_block(impl_block*) override; + bool visit_query_column(query_column*) override; + +public: + void dump_query_from_list(query_decl*); + void dump_query_select_list(query_decl*); + bool visit_query_decl(query_decl*) override; +}; + +} // end namespace godel \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ast/ast_node.h b/godel-script/godel-frontend/src/ast/ast_node.h new file mode 100644 index 00000000..e8c514cb --- /dev/null +++ b/godel-script/godel-frontend/src/ast/ast_node.h @@ -0,0 +1,169 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/symbol.h" + +#include +#include +#include +#include + +namespace godel { + +using report::span; + +enum class ast_class { + ac_root, // ast root + + ac_null, // null/empty node + ac_number_literal, // number literal + ac_string_literal, // string literal + ac_boolean_literal, // boolean literal + ac_identifier, // identifier + ac_unary_operator, // unary operator + ac_binary_operator, // binary operator + ac_call_expr, // call chain node expression + ac_call_head, // first node if whole call expression + ac_call_root, // call expression ast root + ac_func_call, // function call + ac_initializer_pair, // pair of initializer + ac_spread_expr, // spread expression, like: `..identifier` + ac_initializer, // initializer/constructor + + ac_block_stmt, // code block + ac_multi_use, // multiple(explicit symbol import) use statement + ac_use_stmt, // use statement + ac_let_stmt, // let statement + ac_if_stmt, // if statement + ac_cond_stmt, // condition statement + ac_for_stmt, // for statement + ac_match_pair, // pair of match expression + ac_match_stmt, // match statement + ac_ret_stmt, // return statement + ac_fact_data, // fact data + ac_fact_stmt, // fact statement + ac_in_block_expr, // in block expression + + ac_annotation, // annotation + ac_type_def, // type definition + ac_database_table, // database table + ac_database_decl, // database declaration + ac_schema_field, // schema field + ac_schema_decl, // schema declaration + ac_function_decl, // function declaration + ac_enum_decl, // enum declaration + ac_var_decl, // variable declaration + ac_impl_block, // implementation block + ac_query_column, // query column + ac_query_decl // query declaration +}; + +// basic visitor class +class ast_visitor; + +// basic ast node class +class ast_node { +public: + ast_node(const ast_node&) = delete; + ast_node& operator=(const ast_node&) = delete; + +protected: + ast_class ac; + span loc; + infer resolve; + +protected: + // report fatal error in ast structure + void fatal_error(const std::string& info) { + std::cerr << "fatal error at file " << loc.file << ":"; + std::cerr << loc.start_line << ":" << loc.start_column << ":\n"; + std::cerr << " " << info << "\n\n"; + std::exit(-1); + } + +public: + ast_node(ast_class ast_class, const span& location): + ac(ast_class), loc(location), resolve(infer::error()) {} + virtual ~ast_node() = default; + + auto get_ast_class() const { return ac; } + const auto& get_location() const { return loc; } + const auto& get_file() const { return loc.file; } + const auto& get_resolve() const { return resolve; } + + void update_location(const span& l) { + if (loc.start_line>l.start_line) { + loc.start_line = l.start_line; + loc.start_column = l.start_column; + } else if (loc.start_line==l.start_line && + loc.start_column>l.start_column) { + loc.start_column = l.start_column; + } + if (loc.end_line +#include +#include +#include + +namespace godel { + +ast_root::~ast_root() { + for(auto i : use_statements) { + delete i; + } + for(auto i : declarations) { + delete i; + } +} + +void ast_root::accept(ast_visitor* visitor) { + visitor->visit_ast_root(this); +} + +} // end namespace godel \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ast/ast_root.h b/godel-script/godel-frontend/src/ast/ast_root.h new file mode 100644 index 00000000..2b2e75ae --- /dev/null +++ b/godel-script/godel-frontend/src/ast/ast_root.h @@ -0,0 +1,41 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" + +#include "godel-frontend/src/ast/ast_node.h" +#include "godel-frontend/src/ast/decl.h" +#include "godel-frontend/src/ast/stmt.h" +#include "godel-frontend/src/ast/expr.h" + +#include +#include +#include + +namespace godel { + +using report::span; + +// ast root node +class ast_root: public ast_node { +public: + ast_root(const ast_root&) = delete; + ast_root& operator=(const ast_root&) = delete; + +private: + std::vector use_statements; + std::vector declarations; + +public: + ast_root(const span& location): ast_node(ast_class::ac_root, location) {} + ~ast_root() override; + + void add_use_statement(use_stmt* p) { use_statements.push_back(p); } + void add_declaration(decl* p) { declarations.push_back(p); } + + auto& get_use_statements() { return use_statements; } + auto& get_declarations() { return declarations; } + + void accept(ast_visitor* visitor) override; +}; + +} // end namespace godel \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ast/ast_visitor.cpp b/godel-script/godel-frontend/src/ast/ast_visitor.cpp new file mode 100644 index 00000000..33eda0a9 --- /dev/null +++ b/godel-script/godel-frontend/src/ast/ast_visitor.cpp @@ -0,0 +1,340 @@ +#include "godel-frontend/src/ast/ast_visitor.h" +#include + +namespace godel { + +bool ast_visitor::visit(ast_node* node) { + assert(node != nullptr); + node->accept(this); + return true; +} + +bool ast_visitor::visit_ast_root(ast_root* node) { + for(auto i : node->get_use_statements()) { + i->accept(this); + } + for(auto i : node->get_declarations()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_ast_null([[maybe_unused]] ast_null* node) { + return true; +} + +bool ast_visitor::visit_expr(expr* node) { + node->accept(this); + return true; +} + +bool ast_visitor::visit_number_literal([[maybe_unused]] number_literal* node) { + return true; +} + +bool ast_visitor::visit_string_literal([[maybe_unused]] string_literal* node) { + return true; +} + +bool ast_visitor::visit_boolean_literal([[maybe_unused]] boolean_literal* node) { + return true; +} + +bool ast_visitor::visit_identifier([[maybe_unused]] identifier* node) { + return true; +} + +bool ast_visitor::visit_unary_operator(unary_operator* node) { + node->get_child()->accept(this); + return true; +} + +bool ast_visitor::visit_binary_operator(binary_operator* node) { + node->get_left()->accept(this); + node->get_right()->accept(this); + return true; +} + +bool ast_visitor::visit_func_call(func_call* node) { + for(auto i : node->get_arguments()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_initializer_pair(initializer_pair* node) { + node->get_field_name()->accept(this); + node->get_field_value()->accept(this); + return true; +} + +bool ast_visitor::visit_initializer(initializer* node) { + for(auto i : node->get_field_pairs()) { + i->accept(this); + } + for(auto i : node->get_spread_exprs()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_spread_expr(spread_expr* node) { + node->get_child()->accept(this); + return true; +} + +bool ast_visitor::visit_call_head(call_head* node) { + node->get_first_expression()->accept(this); + if (node->has_func_call()) { + node->get_func_call()->accept(this); + } + if (node->is_initializer()) { + node->get_initializer()->accept(this); + } + return true; +} + +bool ast_visitor::visit_call_expr(call_expr* node) { + node->get_field_name()->accept(this); + if (node->is_generic()) { + node->get_generic_type()->accept(this); + } + if (node->has_func_call()) { + node->get_func_call()->accept(this); + } + if (node->is_initializer()) { + node->get_initializer()->accept(this); + } + return true; +} + +bool ast_visitor::visit_call_root(call_root* node) { + node->get_call_head()->accept(this); + for(auto i : node->get_call_chain()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_stmt(stmt* node) { + node->accept(this); + return true; +} + +bool ast_visitor::visit_block_stmt(block_stmt* node) { + for(auto i : node->get_statement()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_multi_use_stmt(multi_use_stmt* node) { + for(auto i : node->get_import_symbol()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_use_stmt(use_stmt* node) { + for(auto i : node->get_path()) { + i->accept(this); + } + if (!node->is_use_all()) { + node->get_multi_use()->accept(this); + } + return true; +} + +bool ast_visitor::visit_let_stmt(let_stmt* node) { + for(auto i : node->get_symbols()) { + i->accept(this); + } + if (node->has_statement()) { + node->get_code_block()->accept(this); + } + return true; +} + +bool ast_visitor::visit_if_stmt(if_stmt* node) { + if (node->has_condition()) { + node->get_condition()->accept(this); + } + if (node->has_statement()) { + node->get_code_block()->accept(this); + } + return true; +} + +bool ast_visitor::visit_cond_stmt(cond_stmt* node) { + node->get_if_stmt()->accept(this); + for(auto i : node->get_elsif_stmt()) { + i->accept(this); + } + if (node->has_else_stmt()) { + node->get_else_stmt()->accept(this); + } + return true; +} + +bool ast_visitor::visit_for_stmt(for_stmt* node) { + for(auto i : node->get_symbols()) { + i->accept(this); + } + if (node->has_statement()) { + node->get_code_block()->accept(this); + } + return true; +} + +bool ast_visitor::visit_match_pair(match_pair* node) { + node->get_literal()->accept(this); + node->get_statement()->accept(this); + return true; +} + +bool ast_visitor::visit_match_stmt(match_stmt* node) { + node->get_match_condition()->accept(this); + for(auto i : node->get_match_pair_list()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_ret_stmt(ret_stmt* node) { + node->get_return_value()->accept(this); + return true; +} + +bool ast_visitor::visit_fact_data(fact_data* node) { + for(auto i : node->get_literals()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_fact_stmt(fact_stmt* node) { + for(auto i : node->get_facts()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_in_block_expr(in_block_expr* node) { + node->get_expr()->accept(this); + return true; +} + +bool ast_visitor::visit_decl(decl* node) { + node->accept(this); + return true; +} + +bool ast_visitor::visit_annotation([[maybe_unused]] annotation* node) { + return true; +} + +bool ast_visitor::visit_type_def([[maybe_unused]] type_def* node) { + return true; +} + +bool ast_visitor::visit_database_table(database_table* node) { + node->get_name()->accept(this); + node->get_type()->accept(this); + if (node->has_real_name()) { + node->get_real_name()->accept(this); + } + return true; +} + +bool ast_visitor::visit_database_decl(database_decl* node) { + node->get_name()->accept(this); + for(auto i : node->get_tables()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_schema_field(schema_field* node) { + node->get_annotation()->accept(this); + node->get_identifier()->accept(this); + node->get_field_type()->accept(this); + return true; +} + +bool ast_visitor::visit_schema_decl(schema_decl* node) { + node->get_name()->accept(this); + if (node->has_parent()) { + node->get_parent_name()->accept(this); + } + for(auto i : node->get_fields()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_function_decl(function_decl* node) { + for(auto i : node->get_annotations()) { + i->accept(this); + } + node->get_name()->accept(this); + for(auto i : node->get_parameter_list()) { + i->accept(this); + } + if (node->has_return_value()) { + node->get_return_type()->accept(this); + } + if (node->implemented()) { + node->get_code_block()->accept(this); + } + return true; +} + +bool ast_visitor::visit_enum_decl(enum_decl* node) { + node->get_name()->accept(this); + for(auto i : node->get_member()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_var_decl(var_decl* node) { + node->get_var_name()->accept(this); + if (node->has_declared_type()) { + node->get_type()->accept(this); + } + if (node->has_init_value()) { + node->get_init_value()->accept(this); + } + return true; +} + +bool ast_visitor::visit_impl_block(impl_block* node) { + node->get_impl_schema_name()->accept(this); + for(auto i : node->get_functions()) { + i->accept(this); + } + return true; +} + +bool ast_visitor::visit_query_column(query_column* node) { + if (node->has_column_name()) { + node->get_column_name()->accept(this); + } + node->get_column_value()->accept(this); + return true; +} + +bool ast_visitor::visit_query_decl(query_decl* node) { + for(auto i : node->get_from_list()) { + i->accept(this); + } + if (node->has_condition()) { + node->get_where_condition()->accept(this); + } + for(auto i : node->get_select_list()) { + i->accept(this); + } + return true; +} + +} // end namespace godel diff --git a/godel-script/godel-frontend/src/ast/ast_visitor.h b/godel-script/godel-frontend/src/ast/ast_visitor.h new file mode 100644 index 00000000..56ce1372 --- /dev/null +++ b/godel-script/godel-frontend/src/ast/ast_visitor.h @@ -0,0 +1,70 @@ +#pragma once + +#include "godel-frontend/src/ast/ast_node.h" +#include "godel-frontend/src/ast/decl.h" +#include "godel-frontend/src/ast/expr.h" +#include "godel-frontend/src/ast/stmt.h" +#include "godel-frontend/src/ast/ast_root.h" + +namespace godel { + +class ast_visitor { +public: + ast_visitor(ast_visitor const &) = delete; + ast_visitor& operator=(ast_visitor const &) = delete; + +public: + ast_visitor() {} + virtual ~ast_visitor() {} + + bool visit(ast_node*); + + virtual bool visit_ast_root(ast_root*); + virtual bool visit_ast_null(ast_null*); + + virtual bool visit_expr(expr*); + virtual bool visit_number_literal(number_literal*); + virtual bool visit_string_literal(string_literal*); + virtual bool visit_boolean_literal(boolean_literal*); + virtual bool visit_identifier(identifier*); + virtual bool visit_unary_operator(unary_operator*); + virtual bool visit_binary_operator(binary_operator*); + virtual bool visit_func_call(func_call*); + virtual bool visit_initializer_pair(initializer_pair*); + virtual bool visit_spread_expr(spread_expr*); + virtual bool visit_initializer(initializer*); + virtual bool visit_call_head(call_head*); + virtual bool visit_call_expr(call_expr*); + virtual bool visit_call_root(call_root*); + + virtual bool visit_stmt(stmt*); + virtual bool visit_block_stmt(block_stmt*); + virtual bool visit_multi_use_stmt(multi_use_stmt*); + virtual bool visit_use_stmt(use_stmt*); + virtual bool visit_let_stmt(let_stmt*); + virtual bool visit_if_stmt(if_stmt*); + virtual bool visit_cond_stmt(cond_stmt*); + virtual bool visit_for_stmt(for_stmt*); + virtual bool visit_match_pair(match_pair*); + virtual bool visit_match_stmt(match_stmt*); + virtual bool visit_ret_stmt(ret_stmt*); + virtual bool visit_fact_data(fact_data*); + virtual bool visit_fact_stmt(fact_stmt*); + virtual bool visit_in_block_expr(in_block_expr*); + + virtual bool visit_decl(decl*); + virtual bool visit_annotation(annotation*); + virtual bool visit_type_def(type_def*); + virtual bool visit_database_table(database_table*); + virtual bool visit_database_decl(database_decl*); + virtual bool visit_schema_field(schema_field*); + virtual bool visit_schema_decl(schema_decl*); + virtual bool visit_function_decl(function_decl*); + virtual bool visit_enum_decl(enum_decl*); + virtual bool visit_var_decl(var_decl*); + virtual bool visit_impl_block(impl_block*); + virtual bool visit_query_column(query_column*); + virtual bool visit_query_decl(query_decl*); +}; // end class ast_visitor + +} // end namespace diff --git a/godel-script/godel-frontend/src/ast/decl.cpp b/godel-script/godel-frontend/src/ast/decl.cpp new file mode 100644 index 00000000..948f2825 --- /dev/null +++ b/godel-script/godel-frontend/src/ast/decl.cpp @@ -0,0 +1,133 @@ +#include "godel-frontend/src/ast/decl.h" +#include "godel-frontend/src/ast/ast_visitor.h" + +namespace godel { + +void decl::accept(ast_visitor* visitor) { + visitor->visit_decl(this); +} + +void annotation::accept(ast_visitor* visitor) { + visitor->visit_annotation(this); +} + +void type_def::accept(ast_visitor* visitor) { + visitor->visit_type_def(this); +} + +database_table::~database_table() { + delete name; + delete type_name; + delete as_name; +} + +void database_table::accept(ast_visitor* visitor) { + visitor->visit_database_table(this); +} + +database_decl::~database_decl() { + delete name; + for(auto i : tables) { + delete i; + } +} + +void database_decl::accept(ast_visitor* visitor) { + visitor->visit_database_decl(this); +} + +schema_field::~schema_field() { + delete anno; + delete name; + delete type_name; +} + +void schema_field::accept(ast_visitor* visitor) { + visitor->visit_schema_field(this); +} + +schema_decl::~schema_decl() { + delete name; + delete parent_name; + for(auto i : pairs) { + delete i; + } +} + +void schema_decl::accept(ast_visitor* visitor) { + visitor->visit_schema_decl(this); +} + +function_decl::~function_decl() { + for(auto i : annotations) { + delete i; + } + delete name; + for(auto i : param) { + delete i; + } + delete return_type; + delete block; +} + +void function_decl::accept(ast_visitor* visitor) { + visitor->visit_function_decl(this); +} + +enum_decl::~enum_decl() { + delete name; + for(auto i : member) { + delete i; + } +} + +void enum_decl::accept(ast_visitor* visitor) { + visitor->visit_enum_decl(this); +} + +var_decl::~var_decl() { + delete name; + delete type_name; + delete init_value; +} + +void var_decl::accept(ast_visitor* visitor) { + visitor->visit_var_decl(this); +} + +impl_block::~impl_block() { + delete impl_schema_name; + for(auto i : method) { + delete i; + } +} + +void impl_block::accept(ast_visitor* visitor) { + visitor->visit_impl_block(this); +} + +query_column::~query_column() { + delete column_name; + delete column_value; +} + +void query_column::accept(ast_visitor* visitor) { + visitor->visit_query_column(this); +} + +query_decl::~query_decl() { + delete query_name; + for(auto i : from_list) { + delete i; + } + delete where_condition; + for(auto i : select_list) { + delete i; + } +} + +void query_decl::accept(ast_visitor* visitor) { + visitor->visit_query_decl(this); +} + +} // end namespace godel \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ast/decl.h b/godel-script/godel-frontend/src/ast/decl.h new file mode 100644 index 00000000..5407858d --- /dev/null +++ b/godel-script/godel-frontend/src/ast/decl.h @@ -0,0 +1,416 @@ +#pragma once + +#include "godel-frontend/src/ast/ast_node.h" +#include "godel-frontend/src/ast/expr.h" +#include "godel-frontend/src/ast/stmt.h" + +#include +#include +#include +#include + +namespace godel { + +// decl class +class decl: public ast_node { +public: + decl(const decl&) = delete; + decl& operator=(const decl&) = delete; + +public: + decl(ast_class ast_class, const span &location): + ast_node(ast_class, location) {} + virtual ~decl() override = default; + void accept(ast_visitor* visitor) override; +}; // end class decl + + +// decl class annotation +class annotation: public decl { +public: + annotation(const annotation&) = delete; + annotation& operator=(const annotation&) = delete; + +private: + /* the name/content of annotation */ + std::string annotation_content; + + /* be aware that if writing @annot("") */ + /* the string literal here is not "" but "\"\"" */ + /* so it is still not empty */ + std::string property_string; + + /* map of properties, for example @annot(a = "xxx", b = "xxx") */ + std::vector> ordered_properties; + +public: + annotation(const span& location, const std::string& content): + decl(ast_class::ac_annotation, location), + annotation_content(content), + property_string(""), + ordered_properties({}) {} + ~annotation() override = default; + void set_property_string(const std::string& s) { property_string = s; } + void add_property(const std::string& key, const std::string& value) { + ordered_properties.push_back({key, value}); + } + + const auto& get_annotation() const { return annotation_content; } + const auto& get_property_string() const { return property_string; } + const auto& get_ordered_properties() const { return ordered_properties; } + + void accept(ast_visitor* visitor) override; +}; // end class annotation + + +// decl class type_def +class type_def: public decl { +public: + type_def(const type_def&) = delete; + type_def& operator=(const type_def&) = delete; + +private: + bool flag_is_set; + std::vector name_path; + +public: + type_def(const span& location): + decl(ast_class::ac_type_def, location), + flag_is_set(false), name_path({}) {} + type_def(const span& location, const std::string& name): + decl(ast_class::ac_type_def, location), + flag_is_set(false), name_path({name}) {} + ~type_def() override = default; + + void set_dataset_flag(bool f) { flag_is_set = f; } + bool is_set() const { return flag_is_set; } + bool is_full_path() const { return name_path.size() > 1; } + void add_path(const std::string& path) { name_path.push_back(path); } + + std::string get_full_name() const { + std::string type_name = ""; + for(const auto& i : name_path) { + type_name += i + "::"; + } + if (type_name.length()) { + type_name = type_name.substr(0, type_name.length() - 2); + } + return type_name; + } + + const std::string& get_short_name() const { + return name_path.back(); + } + + std::string get_path() const { + if (!name_path.size()) { + return ""; + } + std::string path = ""; + for(size_t i = 0; i < name_path.size() - 1; i++) { + path += name_path[i] + "::"; + } + if (path.length()) { + path = path.substr(0, path.length() - 2); + } + return path; + } + + void accept(ast_visitor* visitor) override; +}; // end class type_def + + +// decl class database_table +class database_table: public decl { +public: + database_table(const database_table&) = delete; + database_table& operator=(const database_table&) = delete; + +private: + identifier* name; + type_def* type_name; + string_literal* as_name; + +public: + database_table(const span& location): + decl(ast_class::ac_database_table, location), + name(nullptr), type_name(nullptr), as_name(nullptr) {} + ~database_table() override; + void set_name(identifier* node) { name = node; } + void set_type(type_def* node) { type_name = node; } + void set_real_name(string_literal* node) { as_name = node; } + + identifier* get_name() { return name; } + type_def* get_type() { return type_name; } + bool has_real_name() const { return as_name!=nullptr; } + string_literal* get_real_name() { return as_name; } + + void accept(ast_visitor* visitor) override; +}; // end class database_table + + +// decl class database_decl +class database_decl: public decl { +public: + database_decl(const database_decl&) = delete; + database_decl& operator=(const database_decl&) = delete; + +private: + identifier* name; + std::vector tables; + +public: + database_decl(const span& location): + decl(ast_class::ac_database_decl, location), + name(nullptr) {} + ~database_decl() override; + void set_name(identifier* node) { name = node; } + void add_table(database_table* node) { tables.push_back(node); } + + identifier* get_name() { return name; } + auto& get_tables() { return tables; } + + void accept(ast_visitor* visitor) override; +}; // end class database_decl + + +// decl class schema_field +class schema_field: public decl { +public: + schema_field(const schema_field&) = delete; + schema_field& operator=(const schema_field&) = delete; + +private: + annotation* anno; + identifier* name; + type_def* type_name; + +public: + schema_field(const span& location): + decl(ast_class::ac_schema_field, location), + anno(nullptr), name(nullptr), type_name(nullptr) {} + ~schema_field() override; + void set_annotation(annotation* node) { anno = node; } + void set_identifier(identifier* node) { name = node; } + void set_field_type(type_def* node) { type_name = node; } + + annotation* get_annotation() { return anno; } + identifier* get_identifier() { return name; } + type_def* get_field_type() { return type_name; } + + void accept(ast_visitor* visitor) override; +}; // end class schema_field + + +// decl class schema_decl +class schema_decl: public decl { +public: + schema_decl(const schema_decl&) = delete; + schema_decl& operator=(const schema_decl&) = delete; + +private: + identifier* name; + type_def* parent_name; + std::vector pairs; + +public: + schema_decl(const span& location): + decl(ast_class::ac_schema_decl, location), + name(nullptr), parent_name(nullptr) {} + ~schema_decl() override; + void set_name(identifier* node) { name = node; } + void set_parent_name(type_def* node) { parent_name = node; } + void add_field(schema_field* node) { pairs.push_back(node); } + + identifier* get_name() { return name; } + bool has_parent() const { return parent_name!=nullptr; } + type_def* get_parent_name() { return parent_name; } + auto& get_fields() { return pairs; } + + void accept(ast_visitor* visitor) override; +}; // end class schema_decl + + +// decl class function_decl +class function_decl: public decl { +public: + function_decl(const function_decl&) = delete; + function_decl& operator=(const function_decl&) = delete; + +private: + std::vector annotations; + identifier* name; + std::vector param; + type_def* return_type; + block_stmt* block; + bool is_public_flag; + + // maybe used in ungrounded check pass and codegen, for optimization + bool can_disable_self_check; + +public: + function_decl(const span& location): + decl(ast_class::ac_function_decl, location), + name(nullptr), return_type(nullptr), + block(nullptr), is_public_flag(false), + can_disable_self_check(false) {} + ~function_decl() override; + void add_annotation(annotation* node) { annotations.push_back(node); } + void set_name(identifier* node) { name = node; } + void add_parameter(var_decl* node) { param.push_back(node); } + void set_return_type(type_def* node) { return_type = node;} + void set_code_block(block_stmt* node) { block = node; } + void set_public() { is_public_flag = true; } + void set_can_disable_self_check(bool flag) { can_disable_self_check = flag; } + + auto& get_annotations() { return annotations; } + identifier* get_name() { return name; } + auto& get_parameter_list() { return param; } + bool has_return_value() const { return return_type!=nullptr; } + type_def* get_return_type() { return return_type; } + block_stmt* get_code_block() { return block; } + bool implemented() const { return block!=nullptr; } + bool is_public() const { return is_public_flag; } + bool can_disable_self_check_flag() const { return can_disable_self_check; } + + void accept(ast_visitor* visitor) override; +}; // end class function_decl + + +// decl class enum_decl +class enum_decl: public decl { +public: + enum_decl(const enum_decl&) = delete; + enum_decl& operator=(const enum_decl&) = delete; + +private: + identifier* name; + std::vector member; + +public: + enum_decl(const span& location): + decl(ast_class::ac_enum_decl, location), + name(nullptr) {} + ~enum_decl() override; + void set_name(identifier* node) { name = node; } + void add_member(identifier* node) { member.push_back(node); } + + identifier* get_name() { return name; } + auto& get_member() { return member; } + + void accept(ast_visitor* visitor) override; +}; // end class enum_decl + + +// decl class var_decl +class var_decl: public decl { +public: + var_decl(const var_decl&) = delete; + var_decl& operator=(const var_decl&) = delete; + +private: + identifier* name; + type_def* type_name; + expr* init_value; + +public: + var_decl(const span& location): + decl(ast_class::ac_var_decl, location), + name(nullptr), type_name(nullptr), init_value(nullptr) {} + ~var_decl() override; + void set_var_name(identifier* node) { name = node; } + void set_type(type_def* node) { type_name = node; } + void set_init_value(expr* node) { init_value = node; } + + bool has_declared_type() const { return type_name!=nullptr; } + bool has_init_value() const { return init_value!=nullptr; } + identifier* get_var_name() { return name; } + type_def* get_type() { return type_name; } + expr* get_init_value() { return init_value; } + + void accept(ast_visitor* visitor) override; +}; // end class var_decl + + +// decl class impl_block +class impl_block: public decl { +public: + impl_block(const impl_block&) = delete; + impl_block& operator=(const impl_block&) = delete; + +private: + identifier* impl_schema_name; + std::vector method; + +public: + impl_block(const span& location): + decl(ast_class::ac_impl_block, location), + impl_schema_name(nullptr) {} + ~impl_block() override; + void set_impl_schema_name(identifier* node) { impl_schema_name = node; } + void add_function(function_decl* node) { method.push_back(node); } + + identifier* get_impl_schema_name() { return impl_schema_name; } + auto& get_functions() { return method; } + + void accept(ast_visitor* visitor) override; +}; // end class impl_block + +// decl class query_column +class query_column: public decl { +public: + query_column(const query_column&) = delete; + query_column& operator=(const query_column&) = delete; + +private: + identifier* column_name; + expr* column_value; + +public: + query_column(const span& location): + decl(ast_class::ac_query_column, location), + column_name(nullptr), column_value(nullptr) {} + ~query_column() override; + void set_column_name(identifier* node) { column_name = node; } + void set_column_value(expr* node) { column_value = node; } + + bool has_column_name() const { return column_name != nullptr; } + identifier* get_column_name() { return column_name; } + expr* get_column_value() { return column_value; } + + void accept(ast_visitor* visitor) override; +}; // end class query_column + +// decl class query_decl +class query_decl: public decl { +public: + query_decl(const query_decl&) = delete; + query_decl& operator=(const query_decl&) = delete; + +private: + identifier* query_name; + std::vector from_list; + expr* where_condition; + std::vector select_list; + +public: + query_decl(const span& location): + decl(ast_class::ac_query_decl, location), + query_name(nullptr), + where_condition(nullptr) {} + ~query_decl() override; + void set_name(identifier* node) { query_name = node; } + void add_var_decl(var_decl* node) { from_list.push_back(node); } + void set_condition(expr* node) { where_condition = node; } + void add_output_column(query_column* node) { select_list.push_back(node); } + + bool has_condition() const { return where_condition != nullptr; } + identifier* get_name() { return query_name; } + auto& get_from_list() { return from_list; } + expr* get_where_condition() { return where_condition; } + auto& get_select_list() { return select_list; } + + void accept(ast_visitor* visitor) override; +}; // end class query_decl + +} // end namespace godel \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ast/expr.cpp b/godel-script/godel-frontend/src/ast/expr.cpp new file mode 100644 index 00000000..6d6d9b33 --- /dev/null +++ b/godel-script/godel-frontend/src/ast/expr.cpp @@ -0,0 +1,120 @@ +#include "godel-frontend/src/ast/expr.h" +#include "godel-frontend/src/ast/ast_visitor.h" + +#include + +namespace godel { + +void expr::accept(ast_visitor* visitor) { + visitor->visit_expr(this); +} + +void ast_null::accept(ast_visitor* visitor) { + visitor->visit_ast_null(this); +} + +void number_literal::accept(ast_visitor* visitor) { + visitor->visit_number_literal(this); +} + +void string_literal::accept(ast_visitor* visitor) { + visitor->visit_string_literal(this); +} + +void boolean_literal::accept(ast_visitor* visitor) { + visitor->visit_boolean_literal(this); +} + +void identifier::accept(ast_visitor* visitor) { + visitor->visit_identifier(this); +} + +unary_operator::~unary_operator() { + delete child; +} + +void unary_operator::accept(ast_visitor* visitor) { + visitor->visit_unary_operator(this); +} + +binary_operator::~binary_operator() { + delete left; + delete right; +} + +void binary_operator::accept(ast_visitor* visitor) { + visitor->visit_binary_operator(this); +} + +func_call::~func_call() { + for(auto i : args) { + delete i; + } +} + +void func_call::accept(ast_visitor* visitor) { + visitor->visit_func_call(this); +} + +initializer_pair::~initializer_pair() { + delete field_name; + delete field_value; +} + +void initializer_pair::accept(ast_visitor* visitor) { + visitor->visit_initializer_pair(this); +} + +spread_expr::~spread_expr() { + delete child; +} + +void spread_expr::accept(ast_visitor* visitor) { + visitor->visit_spread_expr(this); +} + +initializer::~initializer() { + for(auto i : pairs) { + delete i; + } + for(auto i : spread_exprs) { + delete i; + } +} + +void initializer::accept(ast_visitor* visitor) { + visitor->visit_initializer(this); +} + +call_head::~call_head() { + delete first; + delete call; + delete ini; +} + +void call_head::accept(ast_visitor* visitor) { + visitor->visit_call_head(this); +} + +call_expr::~call_expr() { + delete name; + delete call; + delete ini; +} + +void call_expr::accept(ast_visitor* visitor) { + visitor->visit_call_expr(this); +} + +call_root::~call_root() { + delete head; + for(auto i : call_chain) { + delete i; + } +} + +void call_root::accept(ast_visitor* visitor) { + visitor->visit_call_root(this); +} + +} diff --git a/godel-script/godel-frontend/src/ast/expr.h b/godel-script/godel-frontend/src/ast/expr.h new file mode 100644 index 00000000..3f61de6f --- /dev/null +++ b/godel-script/godel-frontend/src/ast/expr.h @@ -0,0 +1,493 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/ast/ast_node.h" + +#include +#include +#include +#include +#include + +namespace godel { + +using report::span; + +// expr class +class expr: public ast_node { +public: + expr(const expr&) = delete; + expr& operator=(const expr&) = delete; + +public: + expr(ast_class ast_class, const span &location): + ast_node(ast_class, location) {} + virtual ~expr() override = default; + + void accept(ast_visitor* visitor) override; +}; // end class expr + + +// expr class ast_null +class ast_null: public expr { +public: + ast_null(const ast_null&) = delete; + ast_null& operator=(const ast_null&) = delete; + ~ast_null() override = default; + + ast_null(const span &location): expr(ast_class::ac_null, location) {} + void accept(ast_visitor* visitor) override; +}; // end class ast_null + + +// expr class integer_literal +class number_literal: public expr { +public: + number_literal(const number_literal&) = delete; + number_literal& operator=(const number_literal&) = delete; + +private: + bool is_integer_flag; + int64_t int_literal; + float float_literal; + +public: + number_literal(const span &location, int64_t l): + expr(ast_class::ac_number_literal, location), + is_integer_flag(true), int_literal(l) {} + number_literal(const span &location, float l): + expr(ast_class::ac_number_literal, location), + is_integer_flag(false), float_literal(l) {} + ~number_literal() override = default; + + int64_t get_integer() const { return int_literal; } + float get_float() const { return float_literal; } + bool is_integer() const { return is_integer_flag; } + + void accept(ast_visitor* visitor) override; +}; // end class integer_literal + + +// expr class string_literal +class string_literal: public expr { +public: + string_literal(const string_literal&) = delete; + string_literal& operator=(const string_literal&) = delete; + +private: + std::string literal; + // mark this literal is a input database + std::string input_database_full_path; + +public: + string_literal(const span& location, const std::string& s): + expr(ast_class::ac_string_literal, location), + literal(s), input_database_full_path("") {} + ~string_literal() override = default; + + void set_input_database(const std::string& full_path) { + input_database_full_path = full_path; + } + bool is_input_database_path() const { return !input_database_full_path.empty(); } + const std::string& get_literal() const { return literal; } + const std::string& get_database_full_path() const { return input_database_full_path; } + + void accept(ast_visitor* visitor) override; +}; // end class string_literal + +// expr class boolean_literal +class boolean_literal: public expr { +public: + boolean_literal(const boolean_literal&) = delete; + boolean_literal& operator=(const boolean_literal&) = delete; + +private: + bool flag; + +public: + boolean_literal(const span& location, bool _flag): + expr(ast_class::ac_boolean_literal, location), flag(_flag) {} + ~boolean_literal() override = default; + + bool get_flag() const { return flag; } + void accept(ast_visitor* visitor) override; +}; // end class boolean_literal + +// expr class identifier +class identifier: public expr { +public: + identifier(const identifier&) = delete; + identifier& operator=(const identifier&) = delete; + +private: + std::string name; + +public: + identifier(const span& location, const std::string& n): + expr(ast_class::ac_identifier, location), name(n) {} + ~identifier() override = default; + + const std::string& get_name() const { return name; } + + void accept(ast_visitor* visitor) override; +}; // end class identifier + + +// expr class unary_operator +class unary_operator: public expr { +public: + enum class type { + logical_negation, // unary operator ! + arithmetic_negation // unary operator - + }; + unary_operator(const unary_operator&) = delete; + unary_operator& operator=(const unary_operator&) = delete; + +private: + type operator_type; + expr* child; + +public: + unary_operator(const span& location): + expr(ast_class::ac_unary_operator, location), + child(nullptr) {} + ~unary_operator() override; + void set_operator(type t) { operator_type = t; } + void set_child(expr* node) { child = node; } + + type get_operator_type() { return operator_type; } + expr* get_child() { return child; } + + void accept(ast_visitor* visitor) override; +}; // end class unary_operator + + +// expr class binary_operator +class binary_operator: public expr { +public: + enum class type { + logical_or, // binary operator || + logical_and, // binary operator && + compare_equal, // binary operator = + compare_not_equal, // binary operator != + compare_less, // binary operator < + compare_less_equal, // binary operator <= + compare_great, // binary operator > + compare_great_equal, // binary operator >= + in, // binary operator in + add, // binary operator + + sub, // binary operator - + mult, // binary operator * + div // binary operator / + }; + binary_operator(const binary_operator&) = delete; + binary_operator& operator=(const binary_operator&) = delete; + +private: + type operator_type; + expr* left; + expr* right; + +public: + binary_operator(const span& location): + expr(ast_class::ac_binary_operator, location), + left(nullptr), right(nullptr) {} + ~binary_operator() override; + void set_operator(type t) { operator_type = t; } + void set_left(expr* node) { left = node; } + void set_right(expr* node) { right = node; } + + type get_operator_type() const { return operator_type; } + expr* get_left() { return left; } + expr* get_right() { return right; } + + void accept(ast_visitor* visitor) override; +}; // end class binary_operator + + +// expr class func_call +class func_call: public expr { +public: + func_call(const func_call&) = delete; + func_call& operator=(const func_call&) = delete; + +private: + std::vector args; + +public: + func_call(const span& location): + expr(ast_class::ac_func_call, location) {} + ~func_call() override; + void add_argument(expr* node) { args.push_back(node); } + + auto& get_arguments() { return args; } + + void accept(ast_visitor* visitor) override; +}; // end class func_call + + +// expr class initializer_pair +class initializer_pair: public expr { +public: + initializer_pair(const initializer_pair&) = delete; + initializer_pair& operator=(const initializer_pair&) = delete; + +private: + identifier* field_name; + expr* field_value; + +public: + initializer_pair(const span& location): + expr(ast_class::ac_initializer_pair, location), + field_name(nullptr), field_value(nullptr) {} + ~initializer_pair() override; + void set_field_name(identifier* node) { field_name = node; } + void set_field_value(expr* node) { field_value = node; } + + identifier* get_field_name() { return field_name; } + expr* get_field_value() { return field_value; } + + void accept(ast_visitor* visitor) override; +}; // end class initializer_pair + +// expr class spread_expr +class spread_expr: public expr { +public: + spread_expr(const spread_expr&) = delete; + spread_expr& operator=(const spread_expr&) = delete; + +private: + expr* child; + +public: + spread_expr(const span& location, expr* node): + expr(ast_class::ac_spread_expr, location), + child(node) {} + ~spread_expr() override; + + auto get_child() { return child; } + + void accept(ast_visitor* visitor) override; +}; // end class spread_expr + + +// expr class initializer +class initializer: public expr { +public: + initializer(const initializer&) = delete; + initializer& operator=(const initializer&) = delete; + +private: + std::vector pairs; + std::vector spread_exprs; + +public: + initializer(const span& location): + expr(ast_class::ac_initializer, location) {} + ~initializer() override; + void add_field_pair(initializer_pair* node) { pairs.push_back(node); } + void add_spread_expr(spread_expr* node) { spread_exprs.push_back(node); } + + auto& get_field_pairs() { return pairs; } + auto& get_spread_exprs() { return spread_exprs; } + + void accept(ast_visitor* visitor) override; +}; // end class initializer + + +// expr class call_head +class call_head: public expr { +public: + call_head(const call_head&) = delete; + call_head& operator=(const call_head&) = delete; + +private: + expr* first; + func_call* call; + initializer* ini; + + // mark schema(xxx) is schema::__all__(xxx) + bool schema_loader; + +private: + void check_call_and_init() { + if (!call || !ini) { + return; + } + fatal_error("call_head: func_call and initializer both exist."); + } + +public: + call_head(const span& location): + expr(ast_class::ac_call_head, location), + first(nullptr), call(nullptr), ini(nullptr), + schema_loader(false) {} + ~call_head() override; + void set_first_expression(expr* node) { first = node; } + void set_func_call(func_call* node) { call = node; check_call_and_init(); } + void set_initializer(initializer* node) { ini = node; check_call_and_init(); } + void set_is_schema_loader() { schema_loader = true; } + + expr* get_first_expression() { return first; } + bool has_func_call() const { return call!=nullptr; } + func_call* get_func_call() { return call; } + bool is_initializer() const { return ini!=nullptr; } + initializer* get_initializer() { return ini; } + bool is_schema_loader() const { return schema_loader; } + + void accept(ast_visitor* visitor) override; +}; + + +// expr class call_expr +class call_expr: public expr { +public: + enum class type { + get_field, + get_path + }; + call_expr(const call_expr&) = delete; + call_expr& operator=(const call_expr&) = delete; + +public: + struct aggregator_find_info { + std::string set_name; + std::string set_key; + std::string schema_key; + }; + +private: + type node_call_type; + identifier* name; // specify field or path name + func_call* call; // call function + initializer* ini; // construct structure + type_def* generic_type; // generic type `.field()` `::field()` + + // mark schema(xxx) is schema::__all__(xxx) + bool schema_loader; + + // mark aggregator find + bool flag_is_aggregator_find; + + // mark this is aggregator + bool flag_is_aggregator; + + // mark .to_set() + bool to_set_method; + std::string to_set_type_full_name; + + // mark .key_eq() + bool key_eq_method; + // mark .key_neq() + bool key_not_eq_method; + std::string left_primary_key; + std::string right_primary_key; + + // mark .to() + bool generic_to_method; + // mark .is() + bool generic_is_method; + std::string source_full_name; + std::string generic_full_name; + +private: + void check_call_and_init() { + if (!call || !ini) { + return; + } + fatal_error("call_expr: func_call and initializer both exist."); + } + +public: + call_expr(const span& location): + expr(ast_class::ac_call_expr, location), + name(nullptr), call(nullptr), ini(nullptr), + generic_type(nullptr), schema_loader(false), + flag_is_aggregator_find(false), flag_is_aggregator(false), + to_set_method(false), key_eq_method(false), key_not_eq_method(false), + generic_to_method(false), generic_is_method(false) {} + ~call_expr() override; + void set_call_type(type t) { node_call_type = t; } + void set_field_name(identifier* node) { name = node; } + void set_func_call(func_call* node) { call = node; check_call_and_init(); } + void set_initializer(initializer* node) { ini = node; check_call_and_init(); } + void set_generic_type(type_def* node) { generic_type = node; } + void set_schema_loader() { schema_loader = true; } + void set_aggregator_find() { flag_is_aggregator_find = true; } + void set_is_aggregator() { flag_is_aggregator = true; } + void set_to_set_method(const std::string& full_name) { + to_set_method = true; + to_set_type_full_name = full_name; + } + void set_key_eq_method(const std::string& left_key_name, + const std::string& right_key_name) { + key_eq_method = true; + left_primary_key = left_key_name; + right_primary_key = right_key_name; + } + void set_key_not_eq_method(const std::string& left_key_name, + const std::string& right_key_name) { + key_not_eq_method = true; + left_primary_key = left_key_name; + right_primary_key = right_key_name; + } + void set_generic_to() { generic_to_method = true; } + void set_generic_is() { generic_is_method = true; } + void set_generic_input_full_name(const std::string& full_name) { source_full_name = full_name; } + void set_generic_output_full_name(const std::string& full_name) { generic_full_name = full_name; } + + type get_call_type() const { return node_call_type; } + identifier* get_field_name() { return name; } + bool has_func_call() const { return call!=nullptr; } + func_call* get_func_call() { return call; } + bool is_initializer() const { return ini!=nullptr; } + initializer* get_initializer() { return ini; } + bool is_generic() const { return generic_type!=nullptr; } + type_def* get_generic_type() { return generic_type; } + + bool is_schema_loader() const { return schema_loader; } + + bool is_aggregator_find() const { return flag_is_aggregator_find; } + bool is_aggregator() const { return flag_is_aggregator; } + + bool is_to_set_method() const { return to_set_method; } + const auto& get_to_set_schema_name() const { return to_set_type_full_name; } + + bool is_key_eq_method() const { return key_eq_method; } + bool is_key_not_eq_method() const { return key_not_eq_method; } + const auto& get_left_key_name() const { return left_primary_key; } + const auto& get_right_key_name() const { return right_primary_key; } + void accept(ast_visitor* visitor) override; + + bool is_generic_to() const { return generic_to_method; } + bool is_generic_is() const { return generic_is_method; } + const auto& get_generic_input_full_name() const { return source_full_name; } + const auto& get_generic_output_full_name() const { return generic_full_name; } +}; // end class call_expr + + +// expr class call_root +class call_root: public expr { +public: + call_root(const call_root&) = delete; + call_root& operator=(const call_root&) = delete; + +private: + call_head* head; + std::vector call_chain; + +public: + call_root(const span& location): + expr(ast_class::ac_call_root, location), + head(nullptr) {} + ~call_root() override; + void set_call_head(call_head* node) { head = node; } + void add_call_chain(call_expr* node) { call_chain.push_back(node); } + + call_head* get_call_head() { return head; } + auto& get_call_chain() { return call_chain; } + + void accept(ast_visitor* visitor) override; +}; // end class call_root + +} // end namespace godel diff --git a/godel-script/godel-frontend/src/ast/stmt.cpp b/godel-script/godel-frontend/src/ast/stmt.cpp new file mode 100644 index 00000000..59c0f19d --- /dev/null +++ b/godel-script/godel-frontend/src/ast/stmt.cpp @@ -0,0 +1,140 @@ +#include "godel-frontend/src/ast/stmt.h" +#include "godel-frontend/src/ast/ast_visitor.h" + +namespace godel { + +void stmt::accept(ast_visitor* visitor) { + visitor->visit_stmt(this); +} + +block_stmt::~block_stmt() { + for(auto i : statements) { + delete i; + } +} + +void block_stmt::accept(ast_visitor* visitor) { + visitor->visit_block_stmt(this); +} + +multi_use_stmt::~multi_use_stmt() { + for(auto i : symbols) { + delete i; + } +} + +void multi_use_stmt::accept(ast_visitor* visitor) { + visitor->visit_multi_use_stmt(this); +} + +use_stmt::~use_stmt() { + for(auto i : path) { + delete i; + } + delete multi_use; +} + +void use_stmt::accept(ast_visitor* visitor) { + visitor->visit_use_stmt(this); +} + +let_stmt::~let_stmt() { + for(auto i : symbol) { + delete i; + } + delete block; +} + +void let_stmt::accept(ast_visitor* visitor) { + visitor->visit_let_stmt(this); +} + +if_stmt::~if_stmt() { + delete condition; + delete block; +} + +void if_stmt::accept(ast_visitor* visitor) { + visitor->visit_if_stmt(this); +} + +cond_stmt::~cond_stmt() { + delete if_statement; + for(auto i : elsif_statement) { + delete i; + } + delete else_statement; +} + +void cond_stmt::accept(ast_visitor* visitor) { + visitor->visit_cond_stmt(this); +} + +for_stmt::~for_stmt() { + for(auto i : symbol) { + delete i; + } + delete block; +} + +void for_stmt::accept(ast_visitor* visitor) { + visitor->visit_for_stmt(this); +} + +match_pair::~match_pair() { + delete literal; + delete statement; +} + +void match_pair::accept(ast_visitor* visitor) { + visitor->visit_match_pair(this); +} + +match_stmt::~match_stmt() { + delete condition; + for(auto i : match_context) { + delete i; + } +} + +void match_stmt::accept(ast_visitor* visitor) { + visitor->visit_match_stmt(this); +} + +ret_stmt::~ret_stmt() { + delete return_value; +} + +void ret_stmt::accept(ast_visitor* visitor) { + visitor->visit_ret_stmt(this); +} + +fact_data::~fact_data() { + for(auto i : literals) { + delete i; + } +} + +void fact_data::accept(ast_visitor* visitor) { + visitor->visit_fact_data(this); +} + +fact_stmt::~fact_stmt() { + for(auto i : data) { + delete i; + } +} + +void fact_stmt::accept(ast_visitor* visitor) { + visitor->visit_fact_stmt(this); +} + +in_block_expr::~in_block_expr() { + delete expression; +} + +void in_block_expr::accept(ast_visitor* visitor) { + visitor->visit_in_block_expr(this); +} + +} // end namespace godel \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ast/stmt.h b/godel-script/godel-frontend/src/ast/stmt.h new file mode 100644 index 00000000..a2358053 --- /dev/null +++ b/godel-script/godel-frontend/src/ast/stmt.h @@ -0,0 +1,350 @@ +#pragma once + +#include "godel-frontend/src/ast/ast_node.h" +#include "godel-frontend/src/ast/expr.h" + +#include +#include +#include +#include +#include + +namespace godel { + +// stmt class +class stmt: public ast_node { +public: + stmt(const stmt&) = delete; + stmt& operator=(const stmt&) = delete; + +public: + stmt(ast_class ast_class, const span &location): + ast_node(ast_class, location) {} + virtual ~stmt() override = default; + void accept(ast_visitor* visitor) override; +}; // end class stmt + + +// stmt class block_stmt +class block_stmt: public stmt { +public: + block_stmt(const block_stmt&) = delete; + block_stmt& operator=(const block_stmt&) = delete; + +private: + std::vector statements; + +public: + block_stmt(const span& location): + stmt(ast_class::ac_block_stmt, location) {} + ~block_stmt() override; + void add_statement(stmt* node) { statements.push_back(node); } + + auto& get_statement() { return statements; } + + void accept(ast_visitor* visitor) override; +}; // end class block_stmt + + +// stmt class multi_use_stmt +class multi_use_stmt: public stmt { +public: + multi_use_stmt(const multi_use_stmt&) = delete; + multi_use_stmt& operator=(const multi_use_stmt&) = delete; + +private: + std::vector symbols; + +public: + multi_use_stmt(const span& location): + stmt(ast_class::ac_multi_use, location) {} + ~multi_use_stmt() override; + void add_import_symbol(identifier* node) { symbols.push_back(node); } + + auto& get_import_symbol() { return symbols; } + + void accept(ast_visitor* visitor) override; +}; // end class multi_use_stmt + + +// stmt class use_stmt +class use_stmt: public stmt { +public: + use_stmt(const use_stmt&) = delete; + use_stmt& operator=(const use_stmt&) = delete; + +private: + std::vector path; + bool use_all; + multi_use_stmt* multi_use; + +public: + use_stmt(const span& location): + stmt(ast_class::ac_use_stmt, location), + use_all(false), multi_use(nullptr) {} + ~use_stmt() override; + void add_path(identifier* node) { path.push_back(node); } + void set_use_all() { use_all = true; } + void set_multi_use(multi_use_stmt* node) { multi_use = node; } + + auto& get_path() { return path; } + bool is_use_all() const { return use_all; } + multi_use_stmt* get_multi_use() { return multi_use; } + + void accept(ast_visitor* visitor) override; +}; // end class use_stmt + + +// stmt class let_stmt +class let_stmt: public stmt { +public: + let_stmt(const let_stmt&) = delete; + let_stmt& operator=(const let_stmt&) = delete; + +private: + std::vector symbol; + block_stmt* block; + +public: + let_stmt(const span& location): + stmt(ast_class::ac_let_stmt, location), + block(nullptr) {} + ~let_stmt() override; + void add_symbol(var_decl* node) { symbol.push_back(node); } + void set_code_block(block_stmt* node) { block = node; } + + auto& get_symbols() { return symbol; } + bool has_statement() const { return block!=nullptr; } + block_stmt* get_code_block() { return block; } + + void accept(ast_visitor* visitor) override; +}; // end class let_stmt + + +// stmt class if_stmt +class if_stmt: public stmt { +public: + enum class type { + cond_if, + cond_elsif, + cond_else + }; + +public: + if_stmt(const if_stmt&) = delete; + if_stmt& operator=(const if_stmt&) = delete; + +private: + type ctype; + expr* condition; + block_stmt* block; + +public: + if_stmt(const span& location, type cond_type): + stmt(ast_class::ac_if_stmt, location), + ctype(cond_type), condition(nullptr), block(nullptr) {} + ~if_stmt() override; + void set_condition(expr* node) { condition = node; } + void set_code_block(block_stmt* node) { block = node; } + + type get_cond_type() const { return ctype; } + bool has_condition() const { return condition!=nullptr; } + expr* get_condition() { return condition; } + bool has_statement() const { return block!=nullptr; } + block_stmt* get_code_block() { return block; } + + void accept(ast_visitor* visitor) override; +}; // end class if_stmt + + +// stmt class cond_stmt +class cond_stmt: public stmt { +public: + cond_stmt(const cond_stmt&) = delete; + cond_stmt& operator=(const cond_stmt&) = delete; + +private: + if_stmt* if_statement; + std::vector elsif_statement; + if_stmt* else_statement; + +public: + cond_stmt(const span& location): + stmt(ast_class::ac_cond_stmt, location), + if_statement(nullptr), else_statement(nullptr) {} + ~cond_stmt() override; + void set_if_stmt(if_stmt* node) { if_statement = node; } + void add_elsif_stmt(if_stmt* node) { elsif_statement.push_back(node); } + void set_else_stmt(if_stmt* node) { else_statement = node; } + + if_stmt* get_if_stmt() { return if_statement; } + auto& get_elsif_stmt() { return elsif_statement; } + bool has_else_stmt() const { return else_statement!=nullptr; } + if_stmt* get_else_stmt() { return else_statement; } + + void accept(ast_visitor* visitor) override; +}; // end class condition_stmt + + +// stmt class for_stmt +class for_stmt: public stmt { +public: + for_stmt(const for_stmt&) = delete; + for_stmt& operator=(const for_stmt&) = delete; + +private: + std::vector symbol; + block_stmt* block; + +public: + for_stmt(const span& location): + stmt(ast_class::ac_for_stmt, location), + block(nullptr) {} + ~for_stmt() override; + void add_symbol(var_decl* node) { symbol.push_back(node); } + void set_code_block(block_stmt* node) { block = node; } + + auto& get_symbols() { return symbol; } + bool has_statement() const { return block!=nullptr; } + block_stmt* get_code_block() { return block; } + + void accept(ast_visitor* visitor) override; +}; // end class for_stmt + + +// stmt class match_pair +class match_pair: public stmt { +public: + match_pair(const match_pair&) = delete; + match_pair& operator=(const match_pair&) = delete; + +private: + expr* literal; + stmt* statement; + +public: + match_pair(const span& location): + stmt(ast_class::ac_match_pair, location), + literal(nullptr), statement(nullptr) {} + ~match_pair() override; + void set_literal(expr* node) { literal = node; } + void set_statement(stmt* node) { statement = node; } + + expr* get_literal() { return literal; } + stmt* get_statement() { return statement; } + + void accept(ast_visitor* visitor) override; +}; // end class match_pair + + +// stmt class match_stmt +class match_stmt: public stmt { +public: + match_stmt(const match_stmt&) = delete; + match_stmt& operator=(const match_stmt&) = delete; + +private: + expr* condition; + std::vector match_context; + +public: + match_stmt(const span& location): + stmt(ast_class::ac_match_stmt, location), + condition(nullptr) {} + ~match_stmt() override; + void set_match_condition(expr* node) { condition = node; } + void add_match_pair(match_pair* node) { match_context.push_back(node); } + + expr* get_match_condition() { return condition; } + auto& get_match_pair_list() { return match_context; } + + void accept(ast_visitor* visitor) override; +}; // end class match_stmt + + +// stmt class ret_stmt +class ret_stmt: public stmt { +public: + ret_stmt(const ret_stmt&) = delete; + ret_stmt& operator=(const ret_stmt&) = delete; + +private: + bool flag_is_yield; + expr* return_value; + +public: + ret_stmt(const span& location): + stmt(ast_class::ac_ret_stmt, location), + flag_is_yield(false), return_value(nullptr) {} + ~ret_stmt() override; + void set_is_yield() { flag_is_yield = true; } + void set_return_value(expr* node) { return_value = node; } + + bool is_yield() const { return flag_is_yield; } + expr* get_return_value() { return return_value; } + + void accept(ast_visitor* visitor) override; +}; // end class ret_stmt + + +// stmt class fact_data +class fact_data: public stmt { +public: + fact_data(const fact_data&) = delete; + fact_data& operator=(const fact_data&) = delete; + +private: + std::vector literals; + +public: + fact_data(const span& location): stmt(ast_class::ac_fact_data, location) {} + ~fact_data() override; + void add_literal(expr* node) { literals.push_back(node); } + + auto& get_literals() { return literals; } + + void accept(ast_visitor* visitor) override; +}; // end class fact_data + + +// stmt class fact_stmt +class fact_stmt: public stmt { +public: + fact_stmt(const fact_stmt&) = delete; + fact_stmt& operator=(const fact_stmt&) = delete; + +private: + std::vector data; + +public: + fact_stmt(const span& location): stmt(ast_class::ac_fact_stmt, location) {} + ~fact_stmt() override; + void add_fact(fact_data* node) { data.push_back(node); } + + auto& get_facts() { return data; } + + void accept(ast_visitor* visitor) override; +}; // end class fact_stmt + +// stmt class in_block_expr +class in_block_expr: public stmt { +public: + in_block_expr(const in_block_expr&) = delete; + in_block_expr& operator=(const in_block_expr&) = delete; + +private: + expr* expression; + +public: + in_block_expr(const span& location, expr* wraped_expr): + stmt(ast_class::ac_in_block_expr, location), + expression(wraped_expr) { + assert(wraped_expr != nullptr); + } + ~in_block_expr() override; + expr* get_expr() { return expression; } + + void accept(ast_visitor* visitor) override; +}; + +} // end namespace godel \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ast/template_extractor.cpp b/godel-script/godel-frontend/src/ast/template_extractor.cpp new file mode 100644 index 00000000..a8c6cfb3 --- /dev/null +++ b/godel-script/godel-frontend/src/ast/template_extractor.cpp @@ -0,0 +1,620 @@ +#include "godel-frontend/src/ast/template_extractor.h" + +#include + +namespace godel { + +bool template_extractor::check_call_is_literal(call_root* node) { + auto type = node->get_call_head()->get_first_expression()->get_ast_class(); + return type==ast_class::ac_number_literal || type==ast_class::ac_string_literal; +} + +bool template_extractor::check_unary_operator_literal_childs(unary_operator* node) { + if (node->get_child()->get_ast_class()==ast_class::ac_call_root) { + return check_call_is_literal((call_root*)node->get_child()); + } + return false; +} + +bool template_extractor::check_binary_operator_literal_childs(binary_operator* node) { + auto left = node->get_left(); + auto right = node->get_right(); + auto left_flag = false, right_flag = false; + if (left->get_ast_class()==ast_class::ac_binary_operator) { + left_flag = check_binary_operator_literal_childs((binary_operator*)left); + } else if (left->get_ast_class()==ast_class::ac_unary_operator) { + left_flag = check_unary_operator_literal_childs((unary_operator*)left); + } else if (left->get_ast_class()==ast_class::ac_call_root) { + left_flag = check_call_is_literal((call_root*)left); + } + if (right->get_ast_class()==ast_class::ac_binary_operator) { + right_flag = check_binary_operator_literal_childs((binary_operator*)right); + } else if (right->get_ast_class()==ast_class::ac_unary_operator) { + right_flag = check_unary_operator_literal_childs((unary_operator*)right); + } else if (right->get_ast_class()==ast_class::ac_call_root) { + right_flag = check_call_is_literal((call_root*)right); + } + return left_flag || right_flag; +} + +bool template_extractor::visit_ast_root(ast_root* node) { + for (auto i : node->get_use_statements()) { + i->accept(this); + if (i!=node->get_use_statements().back()) { + result += " "; + } + } + if (node->get_use_statements().size() && node->get_declarations().size()) { + result += " "; + } + for (auto i : node->get_declarations()) { + i->accept(this); + if (i!=node->get_declarations().back()) { + result += " "; + } + } + return true; +} + +bool template_extractor::visit_ast_null([[maybe_unused]] ast_null* node) { + result += ""; + return true; +} + +bool template_extractor::visit_number_literal([[maybe_unused]] number_literal* node) { + result += "LITERAL"; + return true; +} + +bool template_extractor::visit_string_literal([[maybe_unused]] string_literal* node) { + result += "LITERAL"; + return true; +} + +bool template_extractor::visit_boolean_literal(boolean_literal* node) { + result += node->get_flag()? "true":"false"; + return true; +} + +bool template_extractor::visit_identifier(identifier* node) { + result += node->get_name(); + return true; +} + +bool template_extractor::visit_unary_operator(unary_operator* node) { + switch (node->get_operator_type()) { + case unary_operator::type::logical_negation: result += "! "; break; + case unary_operator::type::arithmetic_negation: result += "- "; break; + } + result += "("; + node->get_child()->accept(this); + result += ")"; + return true; +} + +bool template_extractor::visit_binary_operator(binary_operator* node) { + if (check_binary_operator_literal_childs(node)) { + result += "COMPLETELY_LITERAL_DOMINATED_EXPRESSION"; + return true; + } + result += "("; + node->get_left()->accept(this); + switch (node->get_operator_type()) { + case binary_operator::type::logical_or: result += " || "; break; + case binary_operator::type::logical_and: result += " && "; break; + case binary_operator::type::compare_equal: result += " = "; break; + case binary_operator::type::compare_not_equal: result += " != "; break; + case binary_operator::type::compare_less: result += " < "; break; + case binary_operator::type::compare_less_equal: result += " <= "; break; + case binary_operator::type::compare_great: result += " > "; break; + case binary_operator::type::compare_great_equal: result += " >= "; break; + case binary_operator::type::in: result += " in "; break; + case binary_operator::type::add: result += " + "; break; + case binary_operator::type::sub: result += " - "; break; + case binary_operator::type::mult: result += " * "; break; + case binary_operator::type::div: result += " / "; break; + } + node->get_right()->accept(this); + result += ")"; + return true; +} + +bool template_extractor::visit_func_call(func_call* node) { + result += "("; + for (auto i : node->get_arguments()) { + i->accept(this); + if (i!=node->get_arguments().back()) { + result += ", "; + } + } + result += ")"; + return true; +} + +bool template_extractor::visit_initializer_pair(initializer_pair* node) { + node->get_field_name()->accept(this); + result += ": "; + node->get_field_value()->accept(this); + return true; +} + +bool template_extractor::visit_spread_expr(spread_expr* node) { + result += ".."; + node->get_child()->accept(this); + return true; +} + +bool template_extractor::visit_initializer(initializer* node) { + result += " {"; + for (auto i : node->get_field_pairs()) { + i->accept(this); + if (i!=node->get_field_pairs().back()) { + result += ", "; + } + } + result += "}"; + return true; +} + +bool template_extractor::visit_call_head(call_head* node) { + node->get_first_expression()->accept(this); + if (node->has_func_call()) { + node->get_func_call()->accept(this); + } + if (node->is_initializer()) { + node->get_initializer()->accept(this); + } + return true; +} + +bool template_extractor::visit_call_expr(call_expr* node) { + switch (node->get_call_type()) { + case call_expr::type::get_field: result += "."; break; + case call_expr::type::get_path: result += "::"; break; + } + node->get_field_name()->accept(this); + if (node->has_func_call()) { + node->get_func_call()->accept(this); + } + if (node->is_initializer()) { + node->get_initializer()->accept(this); + } + return true; +} + +bool template_extractor::visit_call_root(call_root* node) { + node->get_call_head()->accept(this); + for (auto i : node->get_call_chain()) { + i->accept(this); + } + return true; +} + +bool template_extractor::visit_block_stmt(block_stmt* node) { + result += "{"; + bool check_all_yield = true; + for (auto i : node->get_statement()) { + if (i->get_ast_class()!=ast_class::ac_ret_stmt || + !static_cast(i)->is_yield()) { + check_all_yield = false; + } + } + if (check_all_yield) { + result += "yield @@yieldFields@@}"; + return true; + } + for (auto i : node->get_statement()) { + i->accept(this); + if (i!=node->get_statement().back()) { + result += " "; + } + } + result += "}"; + return true; +} + +bool template_extractor::visit_multi_use_stmt(multi_use_stmt* node) { + result += "{"; + for (auto i : node->get_import_symbol()) { + i->accept(this); + if (i!=node->get_import_symbol().back()) { + result += ", "; + } + } + result += "}"; + return true; +} + +bool template_extractor::visit_use_stmt(use_stmt* node) { + result += "use "; + for (auto i : node->get_path()) { + i->accept(this); + result += "::"; + } + if (node->is_use_all()) { + result += "*"; + } else { + node->get_multi_use()->accept(this); + } + return true; +} + +bool template_extractor::visit_let_stmt(let_stmt* node) { + result += "let("; + for (auto i : node->get_symbols()) { + i->accept(this); + if (i!=node->get_symbols().back()) { + result += ", "; + } + } + result += ") "; + if (node->has_statement()) { + node->get_code_block()->accept(this); + } else { + result += "{}"; + } + return true; +} + +bool template_extractor::visit_if_stmt(if_stmt* node) { + switch (node->get_cond_type()) { + case if_stmt::type::cond_if: result += "if"; break; + case if_stmt::type::cond_elsif: result += "else if"; break; + case if_stmt::type::cond_else: result += "else"; break; + } + if (node->has_condition()) { + result += " ("; + node->get_condition()->accept(this); + result += ") "; + } + if (node->has_statement()) { + node->get_code_block()->accept(this); + } else { + result += "{}"; + } + return true; +} + +bool template_extractor::visit_cond_stmt(cond_stmt* node) { + node->get_if_stmt()->accept(this); + for (auto i : node->get_elsif_stmt()) { + i->accept(this); + } + if (node->has_else_stmt()) { + node->get_else_stmt()->accept(this); + } + return true; +} + +bool template_extractor::visit_for_stmt(for_stmt* node) { + result += "for("; + for (auto i : node->get_symbols()) { + i->accept(this); + if (i!=node->get_symbols().back()) { + result += ", "; + } + } + result += ") "; + if (node->has_statement()) { + node->get_code_block()->accept(this); + } else { + result += "{}"; + } + return true; +} + +bool template_extractor::visit_match_pair(match_pair* node) { + node->get_literal()->accept(this); + result += " => "; + node->get_statement()->accept(this); + return true; +} + +bool template_extractor::visit_match_stmt(match_stmt* node) { + result += "match("; + node->get_match_condition()->accept(this); + result += ") {"; + for (auto i : node->get_match_pair_list()) { + i->accept(this); + if (i!=node->get_match_pair_list().back()) { + result += ", "; + } + } + result += "}"; + return true; +} + +bool template_extractor::visit_ret_stmt(ret_stmt* node) { + if (node->is_yield()) { + result += "yield "; + } else { + result += "return "; + } + node->get_return_value()->accept(this); + return true; +} + +bool template_extractor::visit_fact_stmt([[maybe_unused]] fact_stmt* node) { + result += "[COMPLETELY_LITERAL_DOMINATED_EXPRESSION]"; + return true; +} + +bool template_extractor::visit_in_block_expr(in_block_expr* node) { + node->get_expr()->accept(this); + return true; +} + +bool template_extractor::visit_annotation(annotation* node) { + result += node->get_annotation(); + if (node->get_property_string().length()) { + result += "(LITERAL)"; + } + return true; +} + +bool template_extractor::visit_type_def(type_def* node) { + result += (node->is_set()? "*":"") + node->get_full_name(); + return true; +} + +bool template_extractor::visit_database_table(database_table* node) { + node->get_name()->accept(this); + result += ": "; + node->get_type()->accept(this); + if (node->has_real_name()) { + result += " as "; + node->get_real_name()->accept(this); + } + return true; +} + +bool template_extractor::visit_database_decl(database_decl* node) { + result += "database "; + node->get_name()->accept(this); + result += " {"; + for (auto i : node->get_tables()) { + i->accept(this); + if (i!=node->get_tables().back()) { + result += "; "; + } + } + result += "}"; + return true; +} + +bool template_extractor::visit_schema_field(schema_field* node) { + node->get_annotation()->accept(this); + if (node->get_annotation()->get_annotation().length()) { + result += " "; + } + node->get_identifier()->accept(this); + result += ": "; + node->get_field_type()->accept(this); + return true; +} + +bool template_extractor::visit_schema_decl(schema_decl* node) { + result += "schema "; + node->get_name()->accept(this); + if (node->has_parent()) { + result += " extends "; + node->get_parent_name()->accept(this); + } + result += " {"; + for (auto i : node->get_fields()) { + i->accept(this); + if (i!=node->get_fields().back()) { + result += ", "; + } + } + result += "}"; + return true; +} + +bool template_extractor::visit_function_decl(function_decl* node) { + for (auto i : node->get_annotations()) { + i->accept(this); + result += " "; + } + result += "fn "; + node->get_name()->accept(this); + result += "("; + for (auto i : node->get_parameter_list()) { + i->accept(this); + if (i!=node->get_parameter_list().back()) { + result += ", "; + } + } + result += ")"; + if (node->has_return_value()) { + result += " -> "; + node->get_return_type()->accept(this); + } + if (node->implemented()) { + node->get_code_block()->accept(this); + } else { + result += ";"; + } + return true; +} + +bool template_extractor::visit_enum_decl(enum_decl* node) { + result += "enum "; + node->get_name()->accept(this); + result += " {"; + for (auto i : node->get_member()) { + i->accept(this); + if (i!=node->get_member().back()) { + result += ", "; + } + } + result += "}"; + return true; +} + +bool template_extractor::visit_var_decl(var_decl* node) { + node->get_var_name()->accept(this); + if (node->has_declared_type()) { + result += ": "; + node->get_type()->accept(this); + } + if (node->has_init_value()) { + result += " = "; + node->get_init_value()->accept(this); + } + return true; +} + +bool template_extractor::visit_impl_block(impl_block* node) { + result += "impl "; + node->get_impl_schema_name()->accept(this); + result += " {"; + for (auto i : node->get_functions()) { + i->accept(this); + if (i!=node->get_functions().back()) { + result += " "; + } + } + result += "}"; + return true; +} + +bool template_extractor::visit_query_column(query_column* node) { + node->get_column_value()->accept(this); + if (node->has_column_name()) { + result += " as "; + node->get_column_name()->accept(this); + } + return true; +} + +void template_extractor::dump_query_from_list(query_decl* node) { + result += "from "; + for (auto i : node->get_from_list()) { + i->accept(this); + if (i!=node->get_from_list().back()) { + result += ", "; + } + } +} + +void template_extractor::dump_query_select_list(query_decl* node) { + result += "select "; + for (auto i : node->get_select_list()) { + i->accept(this); + if (i!=node->get_select_list().back()) { + result += ", "; + } + } +} + +bool template_extractor::visit_query_decl(query_decl* node) { + result += "query "; + node->get_name()->accept(this); + result += " "; + dump_query_from_list(node); + if (node->has_condition()) { + result += " where "; + node->get_where_condition()->accept(this); + } + result += " "; + dump_query_select_list(node); + return true; +} + +bool location_extractor::visit_impl_block(impl_block* node) { + impls.push_back(node); + schema_name = node->get_impl_schema_name()->get_name(); + node->get_impl_schema_name()->accept(this); + for(auto i : node->get_functions()) { + i->accept(this); + } + schema_name = ""; + return true; +} + +bool location_extractor::visit_schema_decl(schema_decl* node) { + schemas.push_back(node); + node->get_name()->accept(this); + if (node->has_parent()) { + node->get_parent_name()->accept(this); + } + for(auto i : node->get_fields()) { + i->accept(this); + } + return true; +} + +bool location_extractor::visit_function_decl(function_decl* node) { + funcs.push_back({schema_name, node}); + for(auto i : node->get_annotations()) { + i->accept(this); + } + node->get_name()->accept(this); + for(auto i : node->get_parameter_list()) { + i->accept(this); + } + if (node->has_return_value()) { + node->get_return_type()->accept(this); + } + if (node->implemented()) { + node->get_code_block()->accept(this); + } + return true; +} + +std::string location_extractor::location_dump(const span& loc) { + std::string res = "{\"filename\":\"" + loc.file + "\","; + res += "\"begin_line\":" + std::to_string(loc.start_line) + ","; + res += "\"begin_column\":" + std::to_string(loc.start_column) + ","; + res += "\"end_line\":" + std::to_string(loc.end_line) + ","; + res += "\"end_column\":" + std::to_string(loc.end_column) + "}"; + return res; +} + +void location_extractor::output(ast_root* root, const std::string& file) { + schema_name = ""; + impls = {}; + funcs = {}; + root->accept(this); + + std::string res = "{"; + res += "\"impl\": ["; + for(auto i : impls) { + res += "{"; + res += "\"name\":\"" + i->get_impl_schema_name()->get_name() + "\","; + res += "\"location\":" + location_dump(i->get_location()); + res += "},"; + } + if (res.back()==',') { + res.pop_back(); + } + res += "],"; + res += "\"schema\": ["; + for(auto i : schemas) { + res += "{"; + res += "\"name\":\"" + i->get_name()->get_name() + "\","; + res += "\"location\":" + location_dump(i->get_location()); + res += "},"; + } + if (res.back()==',') { + res.pop_back(); + } + res += "],"; + res += "\"funcs\": ["; + for(auto i : funcs) { + res += "{"; + res += "\"name\":\"" + i.second->get_name()->get_name() + "\","; + res += "\"schema\":\"" + i.first + "\","; + res += "\"location\":" + location_dump(i.second->get_location()); + res += "},"; + } + if (res.back()==',') { + res.pop_back(); + } + res += "]}"; + + std::ofstream(file) << res; +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ast/template_extractor.h b/godel-script/godel-frontend/src/ast/template_extractor.h new file mode 100644 index 00000000..a50a391a --- /dev/null +++ b/godel-script/godel-frontend/src/ast/template_extractor.h @@ -0,0 +1,93 @@ +#pragma once + +#include "godel-frontend/src/ast/ast_visitor.h" + +#include +#include +#include + +namespace godel { + +// extract probable template from godel script +class template_extractor: public ast_visitor { +private: + std::string result; + +private: + bool check_call_is_literal(call_root*); + bool check_unary_operator_literal_childs(unary_operator*); + bool check_binary_operator_literal_childs(binary_operator*); + +private: + bool visit_ast_root(ast_root*) override; + bool visit_ast_null(ast_null*) override; + + bool visit_number_literal(number_literal*) override; + bool visit_string_literal(string_literal*) override; + bool visit_boolean_literal(boolean_literal*) override; + bool visit_identifier(identifier*) override; + bool visit_unary_operator(unary_operator*) override; + bool visit_binary_operator(binary_operator*) override; + bool visit_func_call(func_call*) override; + bool visit_initializer_pair(initializer_pair*) override; + bool visit_spread_expr(spread_expr*) override; + bool visit_initializer(initializer*) override; + bool visit_call_head(call_head*) override; + bool visit_call_expr(call_expr*) override; + bool visit_call_root(call_root*) override; + + bool visit_block_stmt(block_stmt*) override; + bool visit_multi_use_stmt(multi_use_stmt*) override; + bool visit_use_stmt(use_stmt*) override; + bool visit_let_stmt(let_stmt*) override; + bool visit_if_stmt(if_stmt*) override; + bool visit_cond_stmt(cond_stmt*) override; + bool visit_for_stmt(for_stmt*) override; + bool visit_match_pair(match_pair*) override; + bool visit_match_stmt(match_stmt*) override; + bool visit_ret_stmt(ret_stmt*) override; + bool visit_fact_stmt(fact_stmt*) override; + bool visit_in_block_expr(in_block_expr*) override; + + bool visit_annotation(annotation*) override; + bool visit_type_def(type_def*) override; + bool visit_database_table(database_table*) override; + bool visit_database_decl(database_decl*) override; + bool visit_schema_field(schema_field*) override; + bool visit_schema_decl(schema_decl*) override; + bool visit_function_decl(function_decl*) override; + bool visit_enum_decl(enum_decl*) override; + bool visit_var_decl(var_decl*) override; + bool visit_impl_block(impl_block*) override; + bool visit_query_column(query_column*) override; + + void dump_query_from_list(query_decl*); + void dump_query_select_list(query_decl*); + bool visit_query_decl(query_decl*) override; + +public: + const std::string& to_string(ast_root* root) { + result = ""; + visit_ast_root(root); + return result; + } +}; + +class location_extractor: public ast_visitor { +private: + std::string schema_name; + std::vector impls; + std::vector schemas; + std::vector> funcs; + +private: + bool visit_impl_block(impl_block*) override; + bool visit_schema_decl(schema_decl*) override; + bool visit_function_decl(function_decl*) override; + std::string location_dump(const span&); + +public: + void output(ast_root*, const std::string&); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/cli.cpp b/godel-script/godel-frontend/src/cli.cpp new file mode 100644 index 00000000..2e7709f8 --- /dev/null +++ b/godel-script/godel-frontend/src/cli.cpp @@ -0,0 +1,220 @@ +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/util/util.h" +#include "cli.h" + +#include + +namespace godel { +namespace cli { + +std::ostream& welcome(std::ostream& out) { + using util::green; + using util::reset; + + out + << green << "\n" + << " _____ /_\\_/_\\ _____ _____ ____\n" + << "/ __\\/ _ \\| _ \\/ __\\/ _/\n" + << "| |_ || | || | || __|| |---\n" + << "\\_____/\\_____/|_____/\\_____/\\_____/\n" + << " _____ _____ _____ ___ _____ ____ \n" + << " / ___>/ \\/ _ \\/___\\/ _ \\/ \\\n" + << " |___ || |--|| _ <| || __/\\- -/\n" + << " <_____/\\_____/\\__|\\_/\\___/\\__/ |__| \n" + << reset << "\n" + << green << "GodelScript " << util::generate_version_from_time_macro(__DATE__) + << reset << " (" << __DATE__ << " " << __TIME__ << ")\n\n" + << "Input " << green << "-h" << reset << " to get help.\n\n"; + return out; +} + +std::ostream& version(std::ostream& out) { + out << "GodelScript " << util::generate_version_from_time_macro(__DATE__); + out << " (" << __DATE__ << " " << __TIME__ << ")\n"; + return out; +} + +std::ostream& help(std::ostream& out) { + using util::reset; + using util::green; + + out + << reset << "\nUsage: ./godel " + << green << "[options] \n\n" + << reset << "Compile options:\n" + << green << " -s, --souffle " + << reset << "Output generated souffle to file.\n" + << green << " -r, --run-souffle " + << reset << "Run compiled godel script program directly.\n" + << green << " -p, --package-path " + << reset << "Give godelscript package root path.\n" + << green << " -f, --fact " + << reset << "Specify souffle fact data path.\n" + << green << " -e, --extract-template " + << reset << "Extract probable script template.\n" + << green << " -l, --location-extract " + << reset << "Extract all functions and methods location into json.\n"; + out + << reset << "\nInformation dump options:\n" + << green << " -h, --help " + << reset << "Show help message.\n" + << green << " -v, --verbose " + << reset << "Show verbose message.\n" + << green << " -V, --version " + << reset << "Show version info.\n" + << green << " -u, --used-module " + << reset << "Show used modules.\n" + << green << " -d, --dump " + << reset << "Show abstract syntax tree.\n" + << green << " --color-off " + << reset << "Show abstract syntax tree without color.\n" + << green << " --dump-sema " + << reset << "Show semantic analysis result.\n" + << green << " --dump-resolve " + << reset << "Show resolved abstract syntax tree.\n" + << green << " --dump-global " + << reset << "Show global symbol.\n" + << green << " --dump-local " + << reset << "Show local symbol.\n"; + out + << reset << "\nLanguage server options:\n" + << green << " --dump-lsp " + << reset << "Show semantic result in json format.\n" + << green << " --lsp-dump-use-indexed-file " + << reset << "Use file index instead of string.\n"; + out + << reset << "\nLexical analysis dump options:\n" + << green << " --lexer-dump-token " + << reset << "Dump analysed tokens.\n" + << green << " --lexer-dump-comment " + << reset << "Dump collected comments.\n"; + out + << reset << "\nSemantic checker options:\n" + << green << " --semantic-only " + << reset << "Only do semantic analysis and exit.\n" + << green << " --semantic-pub-check " + << reset << "Enable semantic public access authority checker.\n" + << green << " --semantic-no-else " + << reset << "Enable semantic no else branch checker.\n"; + out + << reset << "\nSouffle code generation options:\n" + << green << " -Of, --opt-for " + << reset << "Enable souffle code generator for statement optimizer.\n" + << green << " -Ol, --opt-let " + << reset << "Enable souffle code generator let statement optimizer(not suggested).\n" + << green << " -Oim, --opt-ir-merge " + << reset << "Enable souffle inst combine pass (Experimental).\n" + << green << " -Osc, --opt-self-constraint " + << reset << "Enable self data constraint optimizer in souffle code generator.\n" + << green << " --disable-remove-unused " + << reset << "Disable unused method deletion pass.\n" + << green << " --disable-do-schema-opt " + << reset << "Disable DO Schema data constraint __all__ method optimization.\n" + << green << " --souffle-debug " + << reset << "Dump generated souffle code by stdout.\n" + << green << " --souffle-slow-transformers " + << reset << "Enable Souffle slow transformers.\n" + << green << " --enable-souffle-profiling " + << reset << "Enable Souffle profiling log, output to .\n" + << green << " --enable-souffle-cache " + << reset << "Enable Souffle cache (experimental).\n" + << green << " --clean-souffle-cache " + << reset << "Clean Souffle cache (experimental).\n" + << green << " --output-json " + << reset << "Redirect stdout souffle execution result into json.\n" + << green << " --output-csv " + << reset << "Redirect stdout souffle execution result into csv.\n" + << green << " --output-sqlite " + << reset << "Redirect stdout souffle execution result into sqlite.\n"; + + out << "\n"; + return out; +} + +void report_invalid_argument(const std::string& arg) { + struct distance_info { + size_t distance; + std::string content; + }; + std::vector possible_choise; + for(const auto& i : settings) { + auto distance = util::levenshtein_distance(i.first, arg); + possible_choise.push_back({distance, i.first}); + } + for(const auto& i : options) { + auto distance = util::levenshtein_distance(i.first, arg); + possible_choise.push_back({distance, i.first}); + } + + std::sort( + possible_choise.begin(), + possible_choise.end(), + [](const distance_info& left, const distance_info& right) { + return left.distance < right.distance; + } + ); + auto info = "invalid argument <" + arg + ">, possible arguments: "; + size_t count = 0; + for(const auto& i : possible_choise) { + info += "<" + i.content + ">"; + ++count; + if (count>2) { + info += "."; + break; + } else { + info += ", "; + } + } + report::error().fatal(info); +} + +configure process_args(const std::vector& vec) { + configure config = { + {option::cli_executable_path, vec[0]} // load executable path here + }; + + report::error err; + for(size_t i = 1; i=vec.size() || vec[i][0]=='-') { + err.fatal( + "expected " + settings.at(arg).error_info + + " after <" + arg + ">." + ); + } + config[settings.at(arg).command_type] = vec[i]; + } else if (arg[0]!='-' && !config.count(option::cli_input_path)) { + config[option::cli_input_path] = arg; + } else { + report_invalid_argument(arg); + } + } + + // enable global color off mode + if (config.count(option::cli_color_off)) { + util::enable_color_off_mode(); + } + + if (config.count(option::cli_help)) { + std::clog << help; + std::exit(0); + } + + if (config.count(option::cli_version)) { + std::clog << version; + std::exit(0); + } + + if (!config.count(option::cli_input_path)) { + err.fatal("input file is required."); + } + + return config; +} + +} +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/cli.h b/godel-script/godel-frontend/src/cli.h new file mode 100644 index 00000000..041be21d --- /dev/null +++ b/godel-script/godel-frontend/src/cli.h @@ -0,0 +1,129 @@ +#pragma once + +#include +#include +#include +#include + +namespace godel { +namespace cli { + +enum class option { + cli_executable_path, // executable path of godel + cli_input_path, // input(to be analysed) file path + cli_package_path, // library directory path + cli_fact_path, // fact data path(directory) + cli_extract_template, // extract template + cli_extract_location, // extract location + cli_run_souffle, // generate souffle and run + cli_dump_souffle_file, // generate souffle and dump + + cli_help, // get help + cli_verbose, // verbose output information + cli_version, // get version + cli_used_module, // get used modules + cli_dump_ast, // abstract syntax tree dump + cli_color_off, // switch ast dump color mode off + cli_dump_semantic, // get godel frontend dump + cli_dump_resolve, // show resolved ast + cli_dump_global, // get global symbol information + cli_dump_local, // get local variables' information + + cli_dump_lsp, // get godel frontend json dump + cli_dump_lsp_file_indexed, // use indexed file name in json dump + + cli_lexer_dump_token, // dump tokens + cli_lexer_dump_comment, // dump comments + cli_semantic_only, // only do semantic analysis and exit + cli_semantic_pub_check, // switch pub-access check on + cli_semantic_no_else, // switch no-else check on + + cli_enable_for_opt, // switch for optimization on + cli_enable_let_opt, // switch let optimization on + cli_enable_ir_merge, // switch ir merge on + cli_enable_self_constraint_opt, // switch self constraint optimization on + cli_disable_remove_unused, // switch unused method deletion off + cli_disable_do_schema_opt, // switch do schema optimization off + cli_souffle_debug_dump, // switch souffle debug mode on + cli_souffle_slow_transformers, // switch souffle slow transformers on + cli_enable_souffle_cache, // switch souffle cache on + cli_clean_souffle_cache, // switch clean souffle cache on + cli_enable_souffle_profiling, // switch souffle profiling on + cli_souffle_json_output, // switch souffle json output on + cli_souffle_csv_output, // switch souffle csv output on + cli_souffle_sqlite_output // switch souffle sqlite output on +}; + +struct info_setting { + option command_type; + std::string error_info; +}; + +const std::unordered_map settings = { + {"--package-path", {option::cli_package_path, "package root path"}}, + {"-p", {option::cli_package_path, "package root path"}}, + {"--location-extract", {option::cli_extract_location, "location file output path"}}, + {"-l", {option::cli_extract_location, "location file output path"}}, + {"--souffle", {option::cli_dump_souffle_file, "souffle dump output path"}}, + {"-s", {option::cli_dump_souffle_file, "souffle dump output path"}}, + {"--fact", {option::cli_fact_path, "souffle fact path"}}, + {"-f", {option::cli_fact_path, "souffle fact path"}}, + {"--output-json", {option::cli_souffle_json_output, "souffle json output path"}}, + {"--output-csv", {option::cli_souffle_csv_output, "souffle csv output path"}}, + {"--output-sqlite", {option::cli_souffle_sqlite_output, "souffle sqlite output path"}} +}; + +const std::unordered_map options = { + {"--help", option::cli_help}, + {"-h", option::cli_help}, + {"--verbose", option::cli_verbose}, + {"-v", option::cli_verbose}, + {"--version", option::cli_version}, + {"-V", option::cli_version}, + {"--run", option::cli_run_souffle}, + {"-r", option::cli_run_souffle}, + {"--used-module", option::cli_used_module}, + {"-u", option::cli_used_module}, + {"--dump", option::cli_dump_ast}, + {"-d", option::cli_dump_ast}, + {"--extract-template", option::cli_extract_template}, + {"-e", option::cli_extract_template}, + {"--dump-sema", option::cli_dump_semantic}, + {"--dump-resolve", option::cli_dump_resolve}, + {"--dump-global", option::cli_dump_global}, + {"--dump-local", option::cli_dump_local}, + {"--dump-lsp", option::cli_dump_lsp}, + {"--lsp-dump-use-indexed-file", option::cli_dump_lsp_file_indexed}, + {"--color-off", option::cli_color_off}, + {"--lexer-dump-token", option::cli_lexer_dump_token}, + {"--lexer-dump-comment", option::cli_lexer_dump_comment}, + {"--semantic-only", option::cli_semantic_only}, + {"--semantic-pub-check", option::cli_semantic_pub_check}, + {"--semantic-no-else", option::cli_semantic_no_else}, + {"--opt-for", option::cli_enable_for_opt}, + {"-Of", option::cli_enable_for_opt}, + {"--opt-let", option::cli_enable_let_opt}, + {"-Ol", option::cli_enable_let_opt}, + {"--opt-ir-merge", option::cli_enable_ir_merge}, + {"-Oim", option::cli_enable_ir_merge}, + {"--opt-self-constraint", option::cli_enable_self_constraint_opt}, + {"-Osc", option::cli_enable_self_constraint_opt}, + {"--disable-remove-unused", option::cli_disable_remove_unused}, + {"--disable-do-schema-opt", option::cli_disable_do_schema_opt}, + {"--souffle-debug", option::cli_souffle_debug_dump}, + {"--souffle-slow-transformers", option::cli_souffle_slow_transformers}, + {"--enable-souffle-profiling", option::cli_enable_souffle_profiling}, + {"--enable-souffle-cache", option::cli_enable_souffle_cache}, + {"--clean-souffle-cache", option::cli_clean_souffle_cache} +}; + +typedef std::unordered_map configure; + +std::ostream& welcome(std::ostream&); +std::ostream& version(std::ostream&); +std::ostream& help(std::ostream&); +void report_invalid_argument(const std::string&); +configure process_args(const std::vector&); + +} +} diff --git a/godel-script/godel-frontend/src/engine.cpp b/godel-script/godel-frontend/src/engine.cpp new file mode 100644 index 00000000..ad995112 --- /dev/null +++ b/godel-script/godel-frontend/src/engine.cpp @@ -0,0 +1,546 @@ +#include "godel-frontend/src/engine.h" +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/util/util.h" +#include "godel-frontend/src/package/package.h" +#include "godel-frontend/src/ast/ast_dumper.h" +#include "godel-frontend/src/ast/template_extractor.h" + +#include "godel-frontend/src/ir/ir_gen.h" + +#include +#include +#include +#include +#include +#include + +namespace godel { + +std::string engine::dump_json_token() const { + std::string res = "["; + for (const auto& i : lexical_analyser.result()) { + if (i.type==tok::tok_id) { + res += i.to_json() + ","; + } + } + if (res.back()==',') { + res.pop_back(); + } + res += "]"; + return res; +} + +std::string engine::dump_json_comment() const { + std::string res = "["; + for (const auto& i : lexical_analyser.extract_comments()) { + res += i.to_json() + ","; + } + if (res.back()==',') { + res.pop_back(); + } + res += "]"; + return res; +} + +std::string engine::dump_json_basic() const { + std::string res = "\"basic\":["; + for(const auto& i : global().get_basics()) { + res += i.second.to_json() + ","; + } + if (res.back()==',') { + res.pop_back(); + } + res += "]"; + return res; +} + +std::string engine::dump_json_enum() const { + std::string res = "\"enum\":["; + for(const auto& i : name_space()) { + if (i.second!=symbol_kind::enumerate) { + continue; + } + res += global().get_enum(mapper().at(i.first)).to_json() + ","; + } + if (res.back()==',') { + res.pop_back(); + } + res += "]"; + return res; +} + +std::string engine::dump_json_schema() const { + std::string res = "\"schema\":["; + for(const auto& i : name_space()) { + if (i.second!=symbol_kind::schema) { + continue; + } + res += global().get_schema(mapper().at(i.first)).to_json() + ","; + } + if (res.back()==',') { + res.pop_back(); + } + res += "]"; + return res; +} + +std::string engine::dump_json_database() const { + std::string res = "\"database\":["; + for(const auto& i : name_space()) { + if (i.second!=symbol_kind::database) { + continue; + } + res += global().get_database(mapper().at(i.first)).to_json() + ","; + } + if (res.back()==',') { + res.pop_back(); + } + res += "]"; + return res; +} + +std::string engine::dump_json_trait() const { + // FIXME: this is deprecated + // but delete this will cause error in language server + return "\"trait\":[]"; +} + +std::string engine::dump_json_fn() const { + std::string res = "\"function\":["; + for(const auto& i : name_space()) { + if (i.second!=symbol_kind::function) { + continue; + } + res += global().get_func(mapper().at(i.first)).to_json() + ","; + } + for(const auto& i : global().get_native()) { + res += i.second.to_json() + ","; + } + for(const auto& i : global().get_aggregator()) { + res += i.second.to_json() + ","; + } + if (res.back()==',') { + res.pop_back(); + } + res += "]"; + return res; +} + +std::string engine::dump_json_query() const { + std::string res = "\"query\":["; + for(const auto& i : name_space()) { + if (i.second!=symbol_kind::query) { + continue; + } + res += global().get_query(mapper().at(i.first)).to_json() + ","; + } + if (res.back()==',') { + res.pop_back(); + } + res += "]"; + return res; +} + +std::string engine::dump_json_package() const { + std::string res = "\"package\":["; + for(const auto& i : global().get_packages()) { + res += "\"" + i.first + "\","; + } + if (res.back()==',') { + res.pop_back(); + } + res += "]"; + return res; +} + +std::string engine::dump_json_local() const { + std::string res = "["; + for(const auto& record : global().get_locals()) { + // avoid local symbols that are not in this file + if (record.location.file != this_file_name) { + continue; + } + // we do not need to output this local variable + if (record.name=="self") { + continue; + } + res += "{\"name\":\"" + record.name + "\","; + res += "\"location\":" + record.location.to_json() + ","; + res += "\"type\":" + record.type.to_json() + "},"; + } + for(const auto& record : global().get_reserved_id()) { + res += "{\"name\":\"" + record.first + "\","; + res += "\"location\":" + record.second.location.to_json() + ","; + res += "\"type\":" + record.second.to_json() + "},"; + } + if (res.back() == ',') { + res.pop_back(); + } + return res + "]"; +} + +std::string engine::dump_json_infer() const { + std::string res = "["; + for(const auto& record : global().get_infers()) { + // avoid unnecessary infers that are not in this file + if (record.location.file != this_file_name) { + continue; + } + res += "{\"location\":" + record.location.to_json() + ","; + res += "\"type\":" + record.type.to_json() + "},"; + } + if (res.back() == ',') { + res.pop_back(); + } + return res + "]"; +} + +std::string engine::dump_json_used_files() const { + std::string res = "["; + + std::vector used_files; + used_files.resize(span::get_location_file_mapper().size()); + for(const auto& i : span::get_location_file_mapper()) { + used_files[i.second] = i.first; + } + + for(const auto& i : used_files) { + res += "\"" + i + "\""; + if (i!=used_files.back()) { + res += ","; + } + } + + return res + "]"; +} + +void engine::dump_json(std::ostream& out) const { + out << "{"; + + out << "\"semantic\":{"; + out << dump_json_basic() << ","; + out << dump_json_enum() << ","; + out << dump_json_database() << ","; + out << dump_json_schema() << ","; + + // FIXME: this is deprecated, but cannot be moved now + // if moved, language server will fail + out << dump_json_trait() << ","; + + out << dump_json_fn() << ","; + out << dump_json_query() << ","; + out << dump_json_package() << "},"; + + out << "\"tokens\":"; + out << dump_json_token() << ","; + out << "\"comments\":"; + out << dump_json_comment() << ","; + out << "\"locals\":"; + out << dump_json_local() << ","; + out << "\"infers\":"; + out << dump_json_infer() << ","; + out << "\"files\":"; + out << dump_json_used_files(); + + out << "}"; +} + +void engine::dump_used_modules() const { + const auto instance = godel_module::instance(); + for(const auto& i : instance->get_all_module_status()) { + if (i.second!=godel_module::module_status::analysed) { + continue; + } + std::cout << i.first << " "; + std::cout << instance->find_file_by_module_path(i.first) << "\n"; + } +} + +bool engine::language_server_dump(const configure& config) { + if (!config.count(option::cli_dump_lsp)) { + return false; + } + if (config.count(option::cli_dump_lsp_file_indexed)) { + span::set_flag_lsp_dump_use_file_index(true); + } + dump_json(std::cout); + error::json_output_stderr(); + return true; +} + +void engine::scan_package_root(const std::string& package_path, bool verbose) { + util::time_stamp duration; + duration.stamp(); + godel_module::instance()->set_compiled_file_path(this_file_name); + if (godel_module::instance()->scanpkg(package_path).get_error()) { + error::json_output_stderr(); + flag_execution_terminated = true; + return; + } + + // in verbose mode, dump + if (!verbose) { + return; + } + auto scan_time = duration.duration(); + std::clog << util::format_time(scan_time) << " package scan\n"; + godel_module::instance()->dump(); + godel_module::instance()->dump_module_tree(); + return; +} + +void engine::do_lexical_analysis(const configure& config) { + util::time_stamp duration; + duration.stamp(); + if (lexical_analyser.scan(this_file_name).get_error()) { + language_server_dump(config); + flag_execution_terminated = true; + return; + } + prof.lexical_analysis_time = duration.duration(); +} + +void engine::do_syntax_analysis(const configure& config) { + util::time_stamp duration; + duration.stamp(); + if (syntax_parser.analyse(lexical_analyser.result()).get_error()) { + language_server_dump(config); + flag_execution_terminated = true; + return; + } + prof.syntax_parse_time = duration.duration(); +} + +void engine::do_semantic_analysis(const configure& config) { + util::time_stamp duration; + duration.stamp(); + if (semantic_analyser.analyse(config, syntax_parser.result()).get_error()) { + language_server_dump(config); + flag_execution_terminated = true; + return; + } + prof.semantic_analysis_time = duration.duration(); +} + +void engine::ast_structure_dump() { + auto dumper = ast_dumper(std::cout); + syntax_parser.result()->accept(&dumper); +} + +void engine::template_extract() { + auto dumper = template_extractor(); + std::cout << dumper.to_string(syntax_parser.result()); + return; +} + +void engine::run_souffle(const configure& config) { + const auto souffle_content = ir_gen::get_mutable_context().str_output(config); + + // extra arguments to be passed to souffle + std::vector argv = {}; + + // disable souffle slow transformers, some works of these transformers + // have been done by godel semantic analysis and IR analysis passes + // so we can skip them + if (!config.count(option::cli_souffle_slow_transformers)) { + argv.push_back( + "--disable-transformers=" + "SubsumptionQualifierTransformer," + "SemanticChecker," + "MinimiseProgramTransformer" + ); + } + + // enable souffle profiling, souffle will run slower in this mode + // and generate a prof file named "souffle.prof.log", + // and then we can analyze it to find the performance bottlenecks + if (config.count(option::cli_enable_souffle_profiling)) { + argv.push_back("--profile=souffle.prof.log"); + argv.push_back("--profile-frequency"); + } + + // null terminator + argv.push_back(nullptr); + + // souffle fact path + const auto fact_path = config.count(option::cli_fact_path)? + config.at(option::cli_fact_path).c_str():""; + + // execute souffle and get exitcode + const auto exitcode = souffle_engine::souffle_entry( + config.at(option::cli_executable_path).c_str(), + // program path + "", // query file name, set it as "" + souffle_content.c_str(), // souffle program content + fact_path, // fact(database) search path + "", // library search path + "", // library name + 0, // enable warning + config.count(option::cli_verbose), + // verbose info + argv.data() // extra arguments + ); + + // if exits with non-zero value, exit with the same value + if (exitcode) { + std::exit(exitcode); + } + + // merge json files generated by souffle + const auto& ictx = ir_gen::get_context(); + const auto& vec = ictx.souffle_output; + // merge json files if multiple output files are generated + if (vec.size()>1 && ictx.json_output_path.size()) { + const auto temp = std::filesystem::temp_directory_path(); + std::ofstream out(ictx.json_output_path); + out << "{"; + for(const auto& i : vec) { + // get temp file path + const auto tmpfile = temp / ("godel_script_" + i + ".json"); + if (config.count(option::cli_verbose)) { + std::clog << "Merge json output ["; + std::clog << ictx.souffle_output_real_name.at(i) << "] into "; + std::clog << ictx.json_output_path << "\n"; + std::clog << " - Temporary file path: " << tmpfile << "\n"; + } + + std::ifstream in(tmpfile); + std::stringstream ss; + ss << in.rdbuf(); + out << "\"" << ictx.souffle_output_real_name.at(i) << "\":"; + out << ss.str(); + + // delete temp file + std::filesystem::remove(tmpfile); + if (i!=vec.back()) { + out << ","; + } + } + out << "}"; + } +} + +const error& engine::run(const configure& config) { + util::time_stamp total_time_duration; + total_time_duration.stamp(); + + this_file_name = config.at(option::cli_input_path); + if (!this_file_name.length()) { + return err; + } + + if (config.count(option::cli_dump_lsp)) { + report::error::set_json_out(); + } + + if (config.count(option::cli_package_path)) { + scan_package_root( + config.at(option::cli_package_path), + config.count(option::cli_verbose) + ); + if (flag_execution_terminated) { + return err; + } + } + + // error module load file here + err.load(this_file_name); + + // do lexical analysis + do_lexical_analysis(config); + if (flag_execution_terminated) { + return err; + } + if (config.count(option::cli_lexer_dump_token)) { + lexical_analyser.dump(); + } + if (config.count(option::cli_lexer_dump_comment)) { + lexical_analyser.dump_comments(); + } + + // do syntax analysis + do_syntax_analysis(config); + if (flag_execution_terminated) { + return err; + } + if (config.count(option::cli_dump_ast)) { + ast_structure_dump(); + } + if (config.count(option::cli_extract_template)) { + template_extract(); + return err; + } + if (config.count(option::cli_extract_location)) { + location_extractor().output( + syntax_parser.result(), + config.at(option::cli_extract_location) + ); + return err; + } + + // do semantic analysis (also do souffle code generation) + do_semantic_analysis(config); + if (flag_execution_terminated) { + return err; + } + if (config.count(option::cli_dump_semantic)) { + semantic_analyser.dump_detail_semantic_info(); + } + // dump ast, but this time the ast has resolved type info + if (config.count(option::cli_dump_resolve)) { + ast_structure_dump(); + } + if (config.count(option::cli_dump_global)) { + semantic_analyser.get_context().global.dump(); + } + if (config.count(option::cli_dump_local)) { + semantic_analyser.dump_local_variables(); + } + + // transfer analysed result to language server in json format + // and return immediately to avoid changing output file + if (language_server_dump(config)) { + return err; + } + + // record total compile time + prof.total_time = total_time_duration.duration(); + + // if only semantic analysis is required, return + // also used when analysing a module + if (config.count(option::cli_semantic_only)) { + return err; + } + + // dump profiling info + if (config.count(option::cli_verbose)) { + prof.dump(this_file_name); + } + + // dump used modules + if (config.count(option::cli_used_module)) { + dump_used_modules(); + } + + // dump generated souffle code to file + if (config.count(option::cli_dump_souffle_file)) { + ir_gen::get_mutable_context().file_output( + config.at(option::cli_dump_souffle_file), + config + ); + } + + // dump generated souffle code to console(stdout) + if (config.count(option::cli_souffle_debug_dump)) { + const auto souffle_content = ir_gen::get_mutable_context().str_output(config); + std::cout << souffle_content << "\n"; + } + + // directly run souffle program + if (config.count(option::cli_run_souffle)) { + run_souffle(config); + } + return err; +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/engine.h b/godel-script/godel-frontend/src/engine.h new file mode 100644 index 00000000..4ab06fd0 --- /dev/null +++ b/godel-script/godel-frontend/src/engine.h @@ -0,0 +1,131 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/package/package.h" +#include "cli.h" +#include "lexer.h" +#include "parse.h" +#include "semantic.h" + +#include +#include +#include +#include +#include + +namespace godel { + +namespace souffle_engine { + +// souffle main C API +extern "C" int souffle_main(int argc, char** argv); + +// souffle program entry used by godel +// program_path : program path(aka argv[0]), Soufflé use this path +// : to find library and header files +// query_file : query file path +// query_content : [default nullptr] query content, if not null, +// : souffle does not read the query file +// fact_search_path : [default nullptr] fact file search path +// library_search_path : [default nullptr] library search path +// library_name : [default nullptr] library name +// enable_warning : enable warning, 1 for enabled, 0 for disabled +// verbose : use verbose info +// extra_args : [default nullptr] extra arguments, array ends with nullptr +extern "C" int souffle_entry(const char* program_path, + const char* query_file, + const char* query_content, + const char* fact_search_path, + const char* library_search_path, + const char* library_name, + int enable_warning, + int verbose, + const char* extra_args[]); + +} + +using report::error; +using report::span; +using package::godel_module; + +struct profile_data { + double lexical_analysis_time = -1; + double syntax_parse_time = -1; + double semantic_analysis_time = -1; + double total_time = -1; + + void dump(const std::string& file_name) const { + std::clog << util::format_time(total_time); + std::clog << " analyse <" << file_name << ">\n"; + std::clog << util::format_time(lexical_analysis_time); + std::clog << " lexical analysis\n"; + std::clog << util::format_time(syntax_parse_time); + std::clog << " syntax parse\n"; + std::clog << util::format_time(semantic_analysis_time); + std::clog << " semantic analysis\n\n"; + } +}; + +class engine { +private: + error err; + lexer lexical_analyser; + parse syntax_parser; + semantic semantic_analyser; + + // store compiled file name + std::string this_file_name; + + // data for profiling module compilation + profile_data prof; + + // location of import statement + span import_location; + + // flag mark execution terminated + bool flag_execution_terminated = false; + +private: + std::string dump_json_token() const; + std::string dump_json_comment() const; + std::string dump_json_basic() const; + std::string dump_json_enum() const; + std::string dump_json_schema() const; + std::string dump_json_database() const; + std::string dump_json_trait() const; + std::string dump_json_fn() const; + std::string dump_json_query() const; + std::string dump_json_package() const; + std::string dump_json_local() const; + std::string dump_json_infer() const; + std::string dump_json_used_files() const; + void dump_json(std::ostream&) const; + void dump_used_modules() const; + bool language_server_dump(const configure&); + +private: + void scan_package_root(const std::string&, bool); + void do_lexical_analysis(const configure&); + void do_syntax_analysis(const configure&); + void do_semantic_analysis(const configure&); + void ast_structure_dump(); + void template_extract(); + void run_souffle(const configure&); + +public: + const auto& name_space() const { return semantic_analyser.get_context().this_name_space; } + const auto& mapper() const { return semantic_analyser.get_context().mapper; } + const auto& global() const { return semantic_analyser.get_context().global; } + const auto& get_profiling_data() const { return prof; } + +public: + engine(): lexical_analyser(err), + syntax_parser(err), + semantic_analyser(err), + this_file_name(""), + import_location(span::null()) {} + void set_import_location(const span& loc) { import_location = loc; } + const error& run(const configure&); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/error/error.cpp b/godel-script/godel-frontend/src/error/error.cpp new file mode 100644 index 00000000..e3a69bd9 --- /dev/null +++ b/godel-script/godel-frontend/src/error/error.cpp @@ -0,0 +1,268 @@ +#include "error.h" +#include "godel-frontend/src/util/util.h" + +#include +#include +#include +#include +#include + +namespace report { + +namespace fs = std::filesystem; +using util::reset; +using util::red; +using util::yellow; +using util::white; +using util::cyan; + +std::string span::to_json() const { + std::string res = "["; + + // replace all '\' with '\\', for JSON string + auto tmp = file; + size_t pos = 0; + while((pos = tmp.find_first_of("\\", pos))!=std::string::npos) { + tmp.replace(pos, 1, "\\\\"); + pos += 2; + } + + // insert used file into the mapper, and give index to it + if (!location_file_mapper.count(tmp)) { + location_file_mapper[tmp] = location_file_mapper.size(); + } + + if (flag_lsp_dump_use_file_index) { + res += std::to_string(location_file_mapper.at(tmp)) + ","; + } else { + res += "\"" + tmp + "\","; + } + res += std::to_string(start_line) + ","; + res += std::to_string(start_column) + ","; + res += std::to_string(end_line) + ","; + res += std::to_string(end_column) + "]"; + return res; +} + +void error::load(const std::string& path) { + if (filename==path) { + return; + } else if (!fs::exists(path)) { + fatal("file <" + path + "> does not exist."); + } else if (!fs::is_regular_file(path)) { + fatal("file <" + path + "> is not a regular file."); + } + filename = path; + src.clear(); + std::ifstream in(path, std::ios::binary); + std::string line; + while (!in.eof()) { + std::getline(in, line); + src.push_back(line); + } +} + +void error::err(const std::string& info) { + ++count; + if (json_output) { + lsp_errors.push_back({info, "", {0, 0, 0, 0, ""}}); + return; + } + std::cerr << red << "Error: " << white << info << reset << "\n\n"; +} + +void error::warn(const std::string& info) const { + if (json_output) { + lsp_warnings.push_back({info, "", {0, 0, 0, 0, ""}}); + return; + } + std::clog << yellow << "Warning: " << white << info << reset << "\n\n"; +} + +void error::fatal(const std::string& info) { + err(info); + json_output_stderr(); + std::exit(-1); +} + +void error::json_output_stderr() { + if (!json_output) { + return; + } + std::cerr << "{\"error\":["; + for(size_t i = 0; i " << reset; + out << filename << ":" << line << ":" << column+1 << std::endl; +} + +void error::report_context(const span& loc, + bool is_error, + const std::string& tips) { + auto start_line = loc.start_line; + auto start_column = loc.start_column; + auto end_line = loc.end_line; + auto end_column = loc.end_column; + const auto& file = loc.file; + load(file); + + auto& out = is_error? std::cerr:std::clog; + auto& color = is_error? red:yellow; + + const char* underscore[] = {"^", "^^^^"}; + std::string indent = util::indentation(end_line); + + report_location(out, start_line, start_column); + + // report single line error info + if (start_line == end_line) { + out << cyan << indent << " | " << reset << std::endl; + out << util::rightpad(std::to_string(start_line), indent.size()); + out << cyan << " | " << reset << src[start_line-1] << std::endl; + out << cyan << indent << " | " << reset; + + for (size_t i = 0; i < start_column; ++i) { + out << char(" \t"[src[start_line-1][i]=='\t']); + } + for (size_t i = start_column; i < end_column; ++i) { + out << color << underscore[src[start_line-1][i]=='\t']; + } + if (start_column == end_column) { + out << color << "^"; + } + + // give error info and tips + out << reset << std::endl; + if (tips.length()) { + out << cyan << indent << " +- "; + out << white << "note: " << reset << tips << std::endl; + } + return; + } + + // invalid location, return directly + if (!start_line || + !end_line || + end_line < start_line || + start_line > src.size()) { + return; + } + // report multiple lines error info + // draw start line info + out << cyan << indent << " | " << reset << std::endl; + out << util::rightpad(std::to_string(start_line), indent.size()); + out << cyan << " | " << reset << src[start_line-1] << std::endl; + if (start_column > src[start_line-1].size()) { + start_column = src[start_line-1].size(); + } + out << cyan << indent << " | " << reset; + for (size_t i = 0; i < start_column; ++i) { + out << char(" \t"[src[start_line-1][i]=='\t']); + } + if (start_column == src[start_line-1].size()) { + out << color << "^"; + } else { + for (size_t i = start_column; i < src[start_line-1].size(); ++i) { + out << color << underscore[src[start_line-1][i]=='\t']; + } + } + out << reset << std::endl; + + // print ... if having multiple lines between the start and the end line + if (end_line != start_line + 1) { + out << cyan << indent << " | " << reset << "..." << std::endl; + } + + // draw end line info + out << util::rightpad(std::to_string(end_line),indent.size()); + out << cyan << " | " << reset << src[end_line-1] << std::endl; + if (end_column > src[end_line-1].size()) { + end_column = src[end_line-1].size(); + } + out << cyan << indent << " | " << reset; + for (size_t i = 0; i < end_column; ++i) { + out << color << underscore[src[end_line-1][i]=='\t']; + } + + // give error info and tips + out << reset << std::endl; + if (tips.length()) { + out << cyan << indent << " +- "; + out << white << "note: " << reset << tips << std::endl; + } +} + +void error::warn_report_ignored_DO_schema(const std::vector>& vec) { + if (json_output) { + return; + } + auto info = std::to_string(vec.size()); + info += " \"__all__\" methods of DO schemas are ignored:"; + report_head_info(info, false); + size_t ignored_count = 0; + for(const auto& i : vec) { + ++ignored_count; + if (ignored_count > 4) { + break; + } + report_context(i.second, false, ""); + } + if (vec.size() > 4) { + std::clog << cyan << " --> " << reset << "...(" << vec.size()-4 << ")\n"; + } + std::clog << std::endl; +} + +void error::err(const span& loc, const std::string& info, const std::string& tips) { + ++count; + if (json_output) { + lsp_errors.push_back({info, tips, loc}); + return; + } + report_head_info(info, true); + report_context(loc, true, tips); + std::cerr << std::endl; +} + +void error::warn(const span& loc, const std::string& info, const std::string& tips) { + if (json_output) { + lsp_warnings.push_back({info, tips, loc}); + return; + } + report_head_info(info, false); + report_context(loc, false, tips); + std::clog << std::endl; +} + +void error::fatal(const span& loc, const std::string& info, const std::string& tips) { + err(loc, info, tips); + json_output_stderr(); + std::exit(-1); +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/error/error.h b/godel-script/godel-frontend/src/error/error.h new file mode 100644 index 00000000..1199cc7c --- /dev/null +++ b/godel-script/godel-frontend/src/error/error.h @@ -0,0 +1,144 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace report { + +struct span { + uint32_t start_line; + uint32_t start_column; + uint32_t end_line; + uint32_t end_column; + std::string file; + + span() = default; + span(uint32_t sl, + uint32_t sc, + uint32_t el, + uint32_t ec, + const std::string& f): + start_line(sl), start_column(sc), + end_line(el), end_column(ec), + file(f) {} + std::string to_json() const; + + friend std::ostream& operator<<(std::ostream& out, span& loc) { + out << loc.file << ":" << loc.start_line << ":" << loc.start_column; + out << " " << loc.end_line << ":" << loc.end_column; + return out; + } + + bool operator==(const span& other) const { + return (start_line == other.start_line && + start_column == other.start_column && + end_line == other.end_line && + end_column == other.end_column && + file == other.file); + } + + bool operator!=(const span& other) const { + return (start_line != other.start_line || + start_column != other.start_column || + end_line != other.end_line || + end_column != other.end_column || + file != other.file); + } + + bool is_null() const { + return file.length() == 0; + } + static const span& null() { + static const span result = span(0, 0, 0, 0, ""); + return result; + } + +private: + static inline std::unordered_map location_file_mapper; + static inline bool flag_lsp_dump_use_file_index = false; + +public: + static void set_flag_lsp_dump_use_file_index(bool v) { + flag_lsp_dump_use_file_index = v; + } + static const auto& get_location_file_mapper() { + return location_file_mapper; + } +}; + +struct lsp_error_info { + std::string info; + std::string tips; + span location; + + static std::string to_raw(const std::string& src) { + if (src.empty()) { + return ""; + } + std::string res = src; + std::string::size_type pos = 0; + while((pos = res.find_first_of("\\", pos))!=std::string::npos) { + res.replace(pos, 1, "\\\\"); + pos += 2; + } + pos = 0; + while((pos = res.find_first_of("\"", pos))!=std::string::npos) { + res.replace(pos, 1, "\\\""); + pos += 2; + } + return res; + } + + std::string to_json() const { + std::string res = "{\"info\":\"" + to_raw(info) + "\","; + res += "\"tips\":\"" + to_raw(tips) + "\","; + res += "\"location\":" + location.to_json() + "}"; + return res; + } +}; + +class error { +private: + int64_t count = 0; + std::string filename = ""; + std::vector src; + +private: + // for language server error report, use json format output + static inline bool json_output = false; + // record generated errors + static inline std::vector lsp_errors = {}; + // record generated warnings + static inline std::vector lsp_warnings = {}; + +private: + // print the report head info, format: {"Error: "/"Warning: "}{info} + void report_head_info(const std::string&, bool) const; + // print the report location, format: {filename}:{line}:{column} + void report_location(std::ostream&, uint32_t, uint32_t) const; + // print the context of given location + void report_context(const span&, bool, const std::string&); + +public: + void warn_report_ignored_DO_schema(const std::vector>&); + +public: + void load(const std::string&); + void err(const std::string&); + void warn(const std::string&) const; + void fatal(const std::string&); + void err(const span&, const std::string&, const std::string& tips = ""); + void warn(const span&, const std::string&, const std::string& tips = ""); + void fatal(const span&, const std::string&, const std::string& tips = ""); + +public: + auto get_error() const { return count; } + static void set_json_out() { json_output = true; } + static void json_output_stderr(); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/aggregator_inline_remark.cpp b/godel-script/godel-frontend/src/ir/aggregator_inline_remark.cpp new file mode 100644 index 00000000..1733c276 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/aggregator_inline_remark.cpp @@ -0,0 +1,72 @@ +#include "godel-frontend/src/ir/aggregator_inline_remark.h" + +namespace godel { + +void aggregator_inline_remark::visit_call(lir::call* node) { + if (!in_aggregator) { + return; + } + switch(node->get_func_kind()) { + case lir::call::kind::function: + case lir::call::kind::method: + if (inline_rules.count(replace_colon(node->get_function_name()))) { + err.warn(node->get_location(), + "inline function \"" + + node->get_function_name() + + "\" used in aggregator.", + "will generate as a normal function." + ); + } + need_remark.insert(replace_colon(node->get_function_name())); + break; + default: break; + } +} + +void aggregator_inline_remark::visit_aggregator(lir::aggregator* node) { + if (in_aggregator) { + err.err(node->get_location(), + "detect nested aggregator, please check generated code." + ); + return; + } + + in_aggregator = true; + node->get_body()->accept(this); + in_aggregator = false; +} + +bool aggregator_inline_remark::run() { + for(const auto& decl : ctx->rule_decls) { + if (decl->is_inline()) { + inline_rules.insert(replace_colon(decl->get_rule_raw_name())); + } + } + + for(auto impl : ctx->rule_impls) { + impl->get_block()->accept(this); + } + for(auto impl : ctx->database_get_table) { + impl->get_block()->accept(this); + } + for(auto impl : ctx->schema_get_field) { + impl->get_block()->accept(this); + } + for(auto impl : ctx->schema_data_constraint_impls) { + impl->get_block()->accept(this); + } + + if (err.get_error()) { + return false; + } + + // remark inline to false + for(auto& decl : ctx->rule_decls) { + if (need_remark.count(replace_colon(decl->get_rule_raw_name()))) { + decl->set_is_inline_rule(false); + } + } + return true; +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/aggregator_inline_remark.h b/godel-script/godel-frontend/src/ir/aggregator_inline_remark.h new file mode 100644 index 00000000..fd349797 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/aggregator_inline_remark.h @@ -0,0 +1,30 @@ +#pragma once + +#include "godel-frontend/src/ir/pass.h" + +#include +#include +#include + +namespace godel { + +class aggregator_inline_remark: public pass { +private: + std::unordered_set inline_rules; + std::unordered_set need_remark; + bool in_aggregator = false; + +private: + void visit_call(lir::call*) override; + void visit_aggregator(lir::aggregator*) override; + +public: + aggregator_inline_remark(ir_context& c): + pass(pass_kind::ps_aggregator_inline_remark, c) {} + const char* get_name() const override { + return "[Transform] Aggregator Inline Remark"; + } + bool run() override; +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/flatten_block.cpp b/godel-script/godel-frontend/src/ir/flatten_block.cpp new file mode 100644 index 00000000..595a5a83 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/flatten_block.cpp @@ -0,0 +1,190 @@ +#include "godel-frontend/src/ir/flatten_block.h" + +namespace godel { + +void flatten_nested_block::visit_block(lir::block* node) { + if (node->get_use_comma()) { + blk.back()->set_use_comma(); + } + if (node->get_use_semicolon()) { + blk.back()->set_use_semicolon(); + } + + for(auto i : node->get_content()) { + if (i->get_kind()!=lir::inst_kind::inst_block) { + i->accept(this); + continue; + } + + auto new_blk = new lir::block(i->get_location()); + blk.back()->add_new_content(new_blk); + blk.push_back(new_blk); + i->accept(this); + blk.pop_back(); + } + + flatten_block(); +} + +void flatten_nested_block::visit_not_operand(lir::not_operand* node) { + auto new_not = new lir::not_operand(node->get_location()); + auto new_blk = new lir::block(node->get_body()->get_location()); + new_not->set_body(new_blk); + blk.back()->add_new_content(new_not); + + blk.push_back(new_blk); + node->get_body()->accept(this); + blk.pop_back(); +} + +void flatten_nested_block::visit_and_operand(lir::and_operand* node) { + // do lowering if the two operands are blocks using `,` as the separator + // and we flatten them into a single block, replacing this and_operand node + if (node->get_left_block()->get_use_comma() && + node->get_right_block()->get_use_comma()) { + flatten_and_operand(node); + return; + } + + auto new_and = new lir::and_operand(node->get_location()); + auto new_left = new lir::block(node->get_left_block()->get_location()); + auto new_right = new lir::block(node->get_right_block()->get_location()); + new_and->set_left(new_left); + new_and->set_right(new_right); + blk.back()->add_new_content(new_and); + + blk.push_back(new_left); + node->get_left_block()->accept(this); + blk.pop_back(); + + blk.push_back(new_right); + node->get_right_block()->accept(this); + blk.pop_back(); +} + +void flatten_nested_block::visit_or_operand(lir::or_operand* node) { + auto new_and = new lir::or_operand(node->get_location()); + auto new_left = new lir::block(node->get_left_block()->get_location()); + auto new_right = new lir::block(node->get_right_block()->get_location()); + new_and->set_left(new_left); + new_and->set_right(new_right); + blk.back()->add_new_content(new_and); + + blk.push_back(new_left); + node->get_left_block()->accept(this); + blk.pop_back(); + + blk.push_back(new_right); + node->get_right_block()->accept(this); + blk.pop_back(); +} + +void flatten_nested_block::visit_aggregator(lir::aggregator* node) { + auto new_aggr = new lir::aggregator(*node); + auto new_blk = new lir::block(node->get_body()->get_location()); + new_aggr->set_body(new_blk); + blk.back()->add_new_content(new_aggr); + + blk.push_back(new_blk); + node->get_body()->accept(this); + blk.pop_back(); +} + +void flatten_nested_block::flatten_block() { + if (blk.back()->get_content().size()==1 && + blk.back()->get_content().front()->get_kind()==lir::inst_kind::inst_block) { + auto nested = reinterpret_cast(blk.back()->get_content().front()); + blk.back()->get_mutable_content() = nested->get_content(); + if (nested->get_use_comma()) { + blk.back()->set_use_comma(); + } + if (nested->get_use_semicolon()) { + blk.back()->set_use_semicolon(); + } + + nested->get_mutable_content().clear(); + delete nested; + } + if (!blk.back()->get_use_comma()) { + return; + } + + std::vector flat_content; + for(auto i : blk.back()->get_content()) { + if (i->get_kind()!=lir::inst_kind::inst_block) { + flat_content.push_back(i); + continue; + } + + // merge block which uses the same separator + auto tmp = reinterpret_cast(i); + if (!tmp->get_use_comma()) { + flat_content.push_back(i); + continue; + } + for(auto j : tmp->get_content()) { + flat_content.push_back(j); + } + + // clear content to avoid double free + tmp->get_mutable_content().clear(); + delete tmp; + } + + // replace content + blk.back()->get_mutable_content() = flat_content; +} + +void flatten_nested_block::flatten_and_operand(lir::and_operand* node) { + // and operand is translated as: + // + // (left, left), (right, right) + // + // if left and right block all use `,` as the separator, then we can + // flatten it into a single block + // + // (left, left), (right, right) => (left, left, right, right) + // + auto new_and = new lir::block(node->get_location()); + blk.back()->add_new_content(new_and); + + blk.push_back(new_and); + node->get_left_block()->accept(this); + node->get_right_block()->accept(this); + blk.pop_back(); +} + +void flatten_nested_block::copy(souffle_rule_impl* impl) { + auto impl_blk = new lir::block(impl->get_block()->get_location()); + + blk.push_back(impl_blk); + impl->get_block()->accept(this); + blk.pop_back(); + + if (impl_blk->get_use_comma()) { + impl->get_block()->set_use_comma(); + } + if (impl_blk->get_use_semicolon()) { + impl->get_block()->set_use_semicolon(); + } + impl->get_block()->get_mutable_content().swap(impl_blk->get_mutable_content()); + delete impl_blk; +} + +bool flatten_nested_block::run() { + for(auto impl : ctx->rule_impls) { + copy(impl); + } + for(auto impl : ctx->database_get_table) { + copy(impl); + } + for(auto impl : ctx->schema_get_field) { + copy(impl); + } + for(auto impl : ctx->schema_data_constraint_impls) { + copy(impl); + } + return true; +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/flatten_block.h b/godel-script/godel-frontend/src/ir/flatten_block.h new file mode 100644 index 00000000..a2593ee8 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/flatten_block.h @@ -0,0 +1,63 @@ +#pragma once + +#include "godel-frontend/src/ir/pass.h" + +#include + +namespace godel { + +class flatten_nested_block: public pass { +private: + std::vector blk; + +private: + void visit_boolean(lir::boolean* node) override { + blk.back()->add_new_content(new lir::boolean(*node)); + } + void visit_store(lir::store* node) override { + blk.back()->add_new_content(new lir::store(*node)); + } + void visit_call(lir::call* node) override { + blk.back()->add_new_content(new lir::call(*node)); + } + void visit_constructor(lir::constructor* node) override { + blk.back()->add_new_content(new lir::constructor(*node)); + } + void visit_record(lir::record* node) override { + blk.back()->add_new_content(new lir::record(*node)); + } + void visit_unary(lir::unary* node) override { + blk.back()->add_new_content(new lir::unary(*node)); + } + void visit_binary(lir::binary* node) override { + blk.back()->add_new_content(new lir::binary(*node)); + } + void visit_compare(lir::compare* node) override { + blk.back()->add_new_content(new lir::compare(*node)); + } + void visit_block(lir::block*) override; + void visit_fact(lir::fact* node) override { + blk.back()->add_new_content(new lir::fact(*node)); + } + void visit_not_operand(lir::not_operand*) override; + void visit_and_operand(lir::and_operand*) override; + void visit_or_operand(lir::or_operand*) override; + void visit_aggregator(lir::aggregator*) override; + +private: + void flatten_block(); + void flatten_and_operand(lir::and_operand*); + +private: + void copy(souffle_rule_impl*); + +public: + flatten_nested_block(ir_context& c): + pass(pass_kind::ps_flatten_nested_block, c) {} + const char* get_name() const override { + return "[Transform] Flatten Nested Block"; + } + bool run() override; +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/inst_combine.cpp b/godel-script/godel-frontend/src/ir/inst_combine.cpp new file mode 100644 index 00000000..426d7bc7 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/inst_combine.cpp @@ -0,0 +1,340 @@ +#include "godel-frontend/src/ir/inst_combine.h" + +#include + +namespace godel { + +void inst_combine_pass::visit_store(lir::store* s) { + const auto& src = s->get_source(); + const auto& dst = s->get_destination(); + + // record this case: + // + // ( + // ssa_temp_0 = a, + // b = ssa_temp_1, + // call(ssa_temp_2, ssa_temp_0, ssa_temp_1) + // ) + // + // and optimize this case to: + // + // (call(ssa_temp_2, a, b)) + // + if (dst.kind==lir::inst_value_kind::variable && + src.kind==lir::inst_value_kind::variable) { + variable_reference_graph[dst.content].insert({src.content, s}); + variable_reference_graph[src.content].insert({dst.content, s}); + } + + // record this case: + // + // ( + // ssa_temp_0 = 1, + // ssa_temp_1 = 2, + // call(ssa_temp_2, ssa_temp_0, ssa_temp_1) + // ) + // + // and optimize this case to: + // + // (call(ssa_temp_2, 1, 2)) + // + if (dst.kind==lir::inst_value_kind::variable && + src.kind==lir::inst_value_kind::literal) { + variable_reference_graph[dst.content].insert({src.content, s}); + } +} + +void inst_combine_pass::visit_compare(lir::compare* c) { + if (c->get_operator()!=lir::compare::kind::op_eq) { + return; + } + + const auto& left = c->get_left(); + const auto& right = c->get_right(); + + // record this case: + // + // ( + // call(ssa_temp_0, ...), + // ssa_temp_0 = a + // ) + // + // and optimize this case to: + // + // (call(a, ...)) + // + if (left.kind==lir::inst_value_kind::variable && + right.kind==lir::inst_value_kind::variable) { + variable_reference_graph[left.content].insert({right.content, c}); + variable_reference_graph[right.content].insert({left.content, c}); + } + + // record this case: + // + // ( + // call(ssa_temp_0, ...), + // ssa_temp_0 = 1234567890 + // ) + // + // and optimize this case to: + // + // (call(1234567890, ...)) + // + if (left.kind==lir::inst_value_kind::variable && + right.kind==lir::inst_value_kind::literal) { + variable_reference_graph[left.content].insert({right.content, c}); + } +} + +bool inst_combine_pass::run() { + for(auto impl : ctx->rule_impls) { + scan(impl); + inst_elimination_worker().copy(impl); + } + for(auto impl : ctx->database_get_table) { + scan(impl); + inst_elimination_worker().copy(impl); + } + for(auto impl : ctx->schema_get_field) { + scan(impl); + inst_elimination_worker().copy(impl); + } + for(auto impl : ctx->schema_data_constraint_impls) { + scan(impl); + inst_elimination_worker().copy(impl); + } + return true; +} + +void inst_combine_pass::scan(souffle_rule_impl* b) { + variable_reference_graph.clear(); + b->get_block()->accept(this); + if (variable_reference_graph.empty()) { + return; + } + + // delete circle in reference graph: + // + // ssa_temp_0 <--> ssa_temp_1 + // + // we need to delete one of them, only rest one is enough. + // + // ssa_temp_0 ---> ssa_temp_1 + // + // for example: + // + // get_field_coref__java__File_element_hash_id(ssa_temp_0, f), + // rule_coref__java__Location__getFileHashId(ssa_temp_1, self), + // ssa_temp_0 = ssa_temp_1 + // + // this circle will mark ssa_temp_0 and ssa_temp_1, + // and then we will remove ssa_temp_0 and ssa_temp_1 and change the code to: + // + // get_field_coref__java__File_element_hash_id(ssa_temp_1, f), + // ^^^^^^^^^| + // rule_coref__java__Location__getFileHashId(ssa_temp_0, self) + // ^^^^^^^^^| + // + // 0 and 1 swap their place, and cause unexpected behavior. + // by removing the circle and reserve only one edge, the result is correct: + // + // get_field_coref__java__File_element_hash_id(ssa_temp_0, f), + // rule_coref__java__Location__getFileHashId(ssa_temp_0, self) + // + for(const auto& i : variable_reference_graph) { + const auto& name = i.first; + if (i.second.size()!=1) { + continue; + } + const auto& to = i.second.begin()->first; + if (!variable_reference_graph.count(to)) { + continue; + } + if (variable_reference_graph.at(to).size()!=1) { + continue; + } + const auto& from = variable_reference_graph.at(to).begin()->first; + if (from==name && to.find("ssa_temp")==0 && from.find("ssa_temp")==0) { + variable_reference_graph.at(to).clear(); + } + } + + combine_worker(variable_reference_graph).mark(b); +} + +void combine_worker::visit_call(lir::call* node) { + // change result destination + if (is_single_ref_ssa_temp(node->get_mutable_result().content)) { + const auto& ref = get_single_ref(node->get_mutable_result().content); + node->get_mutable_result().content = ref.first; + ref.second->set_flag_eliminated(true); + } + // change argument list + for(auto& i : node->get_mutable_arguments()) { + if (is_single_ref_ssa_temp(i.content)) { + const auto& ref = get_single_ref(i.content); + i.content = ref.first; + ref.second->set_flag_eliminated(true); + } + } +} + +void combine_worker::visit_constructor(lir::constructor* node) { + // change result destination + // temporary variable with `ssa_temp` prefix + if (is_single_ref_ssa_temp(node->get_mutable_result().content)) { + const auto& ref = get_single_ref(node->get_mutable_result().content); + node->get_mutable_result().content = ref.first; + ref.second->set_flag_eliminated(true); + } + // change argument list + for(auto& i : node->get_mutable_fields()) { + if (is_single_ref_ssa_temp(i.content)) { + const auto& ref = get_single_ref(i.content); + i.content = ref.first; + ref.second->set_flag_eliminated(true); + } + } +} + +void combine_worker::visit_record(lir::record* node) { + // change result destination + // temporary variable with `ssa_temp` prefix + if (is_single_ref_ssa_temp(node->get_mutable_result().content)) { + const auto& ref = get_single_ref(node->get_mutable_result().content); + node->get_mutable_result().content = ref.first; + ref.second->set_flag_eliminated(true); + } + // change argument list + for(auto& i : node->get_mutable_fields()) { + if (is_single_ref_ssa_temp(i.content)) { + const auto& ref = get_single_ref(i.content); + i.content = ref.first; + ref.second->set_flag_eliminated(true); + } + } +} + +void combine_worker::visit_unary(lir::unary* node) { + const auto& dst = node->get_destination(); + if (is_single_ref_ssa_temp(dst.content)) { + const auto& ref = get_single_ref(dst.content); + node->get_mutable_destination().content = ref.first; + ref.second->set_flag_eliminated(true); + } +} + +void combine_worker::visit_binary(lir::binary* node) { + const auto& dst = node->get_destination(); + if (is_single_ref_ssa_temp(dst.content)) { + const auto& ref = get_single_ref(dst.content); + node->get_mutable_destination().content = ref.first; + ref.second->set_flag_eliminated(true); + } +} + +void combine_worker::mark(souffle_rule_impl* b) { + b->get_block()->accept(this); +} + +void inst_elimination_worker::visit_block(lir::block* node) { + if (node->get_use_comma()) { + blk.back()->set_use_comma(); + } + if (node->get_use_semicolon()) { + blk.back()->set_use_semicolon(); + } + + for(auto i : node->get_content()) { + // skip eliminated instruction + if (i->get_flag_eliminated()) { + continue; + } + + if (i->get_kind()!=lir::inst_kind::inst_block) { + i->accept(this); + continue; + } + + auto new_blk = new lir::block(i->get_location()); + blk.back()->add_new_content(new_blk); + blk.push_back(new_blk); + i->accept(this); + blk.pop_back(); + } +} + +void inst_elimination_worker::visit_not_operand(lir::not_operand* node) { + auto new_not = new lir::not_operand(node->get_location()); + auto new_blk = new lir::block(node->get_body()->get_location()); + new_not->set_body(new_blk); + blk.back()->add_new_content(new_not); + + blk.push_back(new_blk); + node->get_body()->accept(this); + blk.pop_back(); +} + +void inst_elimination_worker::visit_and_operand(lir::and_operand* node) { + auto new_and = new lir::and_operand(node->get_location()); + auto new_left = new lir::block(node->get_left_block()->get_location()); + auto new_right = new lir::block(node->get_right_block()->get_location()); + new_and->set_left(new_left); + new_and->set_right(new_right); + blk.back()->add_new_content(new_and); + + blk.push_back(new_left); + node->get_left_block()->accept(this); + blk.pop_back(); + + blk.push_back(new_right); + node->get_right_block()->accept(this); + blk.pop_back(); +} + +void inst_elimination_worker::visit_or_operand(lir::or_operand* node) { + auto new_and = new lir::or_operand(node->get_location()); + auto new_left = new lir::block(node->get_left_block()->get_location()); + auto new_right = new lir::block(node->get_right_block()->get_location()); + new_and->set_left(new_left); + new_and->set_right(new_right); + blk.back()->add_new_content(new_and); + + blk.push_back(new_left); + node->get_left_block()->accept(this); + blk.pop_back(); + + blk.push_back(new_right); + node->get_right_block()->accept(this); + blk.pop_back(); +} + +void inst_elimination_worker::visit_aggregator(lir::aggregator* node) { + auto new_aggr = new lir::aggregator(*node); + auto new_blk = new lir::block(node->get_body()->get_location()); + new_aggr->set_body(new_blk); + blk.back()->add_new_content(new_aggr); + + blk.push_back(new_blk); + node->get_body()->accept(this); + blk.pop_back(); +} + +void inst_elimination_worker::copy(souffle_rule_impl* impl) { + auto impl_blk = new lir::block(impl->get_block()->get_location()); + + blk.push_back(impl_blk); + impl->get_block()->accept(this); + blk.pop_back(); + + if (impl_blk->get_use_comma()) { + impl->get_block()->set_use_comma(); + } + if (impl_blk->get_use_semicolon()) { + impl->get_block()->set_use_semicolon(); + } + impl->get_block()->get_mutable_content().swap(impl_blk->get_mutable_content()); + delete impl_blk; +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/inst_combine.h b/godel-script/godel-frontend/src/ir/inst_combine.h new file mode 100644 index 00000000..e14abc7c --- /dev/null +++ b/godel-script/godel-frontend/src/ir/inst_combine.h @@ -0,0 +1,115 @@ +#pragma once + +#include "godel-frontend/src/ir/lir.h" +#include "godel-frontend/src/ir/ir_context.h" +#include "godel-frontend/src/ir/pass.h" + +#include +#include + +namespace godel { + +class inst_combine_pass: public pass { +public: + typedef std::unordered_map> ref_graph; + +private: + ref_graph variable_reference_graph; + +private: + void visit_store(lir::store*) override; + void visit_compare(lir::compare*) override; + +private: + void scan(souffle_rule_impl*); + +public: + inst_combine_pass(ir_context& c): pass(pass_kind::ps_inst_combine, c) {} + const char* get_name() const override { + return "[Transform] Instruction Combine"; + } + bool run() override; +}; + +// replace optimized temporary variable and mark some instructions as `eliminated` +class combine_worker: public lir::inst_visitor { +private: + const inst_combine_pass::ref_graph& vg; + +public: + // used to check if a variable is temporary ssa variable + // and only referenced by one other variable(not ssa) + bool is_single_ref_ssa_temp(const std::string& n) { + return n.find("ssa_temp")==0 && vg.count(n) && vg.at(n).size() == 1; + } + const auto& get_single_ref(const std::string& n) const { + return *vg.at(n).begin(); + } + +private: + void visit_call(lir::call*) override; + void visit_constructor(lir::constructor*) override; + void visit_record(lir::record*) override; + void visit_unary(lir::unary*) override; + void visit_binary(lir::binary*) override; + +public: + combine_worker(const inst_combine_pass::ref_graph& g): vg(g) {} + void mark(souffle_rule_impl*); +}; + +// copy all the instructions, delete the `eliminated` instructions +// and flatten nested blocks/and operands +class inst_elimination_worker: public lir::inst_visitor { +private: + std::vector blk; + +private: + void visit_boolean(lir::boolean* node) override { + // 1 = 1 is always true, there's no need to copy it + if (node->get_flag()) { + return; + } + blk.back()->add_new_content(new lir::boolean(*node)); + } + void visit_store(lir::store* node) override { + blk.back()->add_new_content(new lir::store(*node)); + } + void visit_call(lir::call* node) override { + // undetermined ungrounded temp variables not needed after combine pass + if (node->get_function_name() == "int::__undetermined_all__" || + node->get_function_name() == "string::__undetermined_all__") { + return; + } + blk.back()->add_new_content(new lir::call(*node)); + } + void visit_constructor(lir::constructor* node) override { + blk.back()->add_new_content(new lir::constructor(*node)); + } + void visit_record(lir::record* node) override { + blk.back()->add_new_content(new lir::record(*node)); + } + void visit_unary(lir::unary* node) override { + blk.back()->add_new_content(new lir::unary(*node)); + } + void visit_binary(lir::binary* node) override { + blk.back()->add_new_content(new lir::binary(*node)); + } + void visit_compare(lir::compare* node) override { + blk.back()->add_new_content(new lir::compare(*node)); + } + void visit_block(lir::block*) override; + void visit_fact(lir::fact* node) override { + blk.back()->add_new_content(new lir::fact(*node)); + } + void visit_not_operand(lir::not_operand*) override; + void visit_and_operand(lir::and_operand*) override; + void visit_or_operand(lir::or_operand*) override; + void visit_aggregator(lir::aggregator*) override; + +public: + void copy(souffle_rule_impl*); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/ir_context.cpp b/godel-script/godel-frontend/src/ir/ir_context.cpp new file mode 100644 index 00000000..2a7618d4 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/ir_context.cpp @@ -0,0 +1,510 @@ +#include "godel-frontend/src/ir/ir_context.h" +#include "godel-frontend/src/ir/pass_manager.h" +#include "godel-frontend/src/ir/remove_unused.h" +#include "godel-frontend/src/ir/inst_combine.h" + +#include +#include +#include +#include + +#include + +namespace godel { + +void souffle_functor::dump(std::ostream& out) const { + out << ".functor " << name; + out << "(" << params << "): " << return_type << "\n"; +} + +void souffle_type_alias::dump(std::ostream& out) const { + out << ".type " << alias << " = " << real << "\n"; +} + +void souffle_schema::dump(std::ostream& out) const { + out << ".decl schema_" << replace_colon(name) << "("; + out << "result: " << replace_colon(name) << ", db_id: DBIndex"; + if (fields.size()) { + out << ", "; + } + for(const auto& i : fields) { + out << i.first << ": " << replace_colon(i.second); + if (i!=fields.back()) { + out << ", "; + } + } + out << ")\n"; +} + +void souffle_input_decl::dump(std::ostream& os) const { + os << ".decl " << decl_name << "("; + for(const auto& i : fields) { + os << i.first << ": " << replace_colon(i.second); + if (i!=fields.back()) { + os << ", "; + } + } + os << ")\n"; +} + +void souffle_input_impl::dump(std::ostream& os) const { + os << ".input " << decl_name; + os << "(IO=\"sqlite\", dbname=" << input_db_path << ", name=\""; + os << table_name << "\")\n"; +} + +void souffle_rule_decl::dump(std::ostream& out) const { + out << ".decl " << replace_colon(name) << "("; + if (return_type.length() && return_type!="bool") { + out << "result: " << replace_colon(return_type); + if (params.size()) { + out << ", "; + } + } + for(const auto& i : params) { + out << i.first << ": " << replace_colon(i.second); + if (i!=params.back()) { + out << ", "; + } + } + out << ")"; + if (flag_is_inline_rule) { + out << " inline"; + } + out << "\n"; +} + +void souffle_rule_impl::dump(std::ostream& out) const { + out << replace_colon(func_name) << "("; + if (!params.empty()) { + auto it = params.begin(); + out << *it++; + while(it != params.end()) { + out << ", " << *it++; + } + } + + // empty rule block should be treated as a fact: xx(x, x). + // else dump the body of the rule: xx(x, x) :- ... + if (block.get_content().empty()) { + out << ").\n"; + } else { + out << ") :- "; + block.dump(out, ""); + out << ".\n"; + } +} + +ir_context::ir_context() { + generate_functors(); + generate_type_alias(); + generate_all_data_database(); + db_path = {}; +} + +ir_context::~ir_context() { + for(auto i : rule_decls) { + delete i; + } + for(auto i : rule_impls) { + delete i; + } + for(auto i : database_get_table) { + delete i; + } + for(auto i : schema_get_field) { + delete i; + } + for(auto i : schema_data_constraint_impls) { + delete i; + } +} + +void ir_context::dump_souffle_functors(std::ostream& out) const { + for(const auto& i : functors) { + i.dump(out); + } + if (functors.size()) { + out << "\n"; + } +} + +void ir_context::dump_type_alias(std::ostream& out) const { + for(const auto& i : type_alias) { + i.dump(out); + } + if (type_alias.size()) { + out << "\n"; + } +} + +void ir_context::dump_schema_data_constraint_decls(std::ostream& out) const { + for(const auto& i : schema_data_constraint_decls) { + i.dump(out); + } + if (schema_data_constraint_decls.size()) { + out << "\n"; + } +} + +void ir_context::dump_rule_decls(std::ostream& out) const { + for(auto i : rule_decls) { + i->dump(out); + } + if (rule_decls.size()) { + out << "\n"; + } +} + +void ir_context::dump_input_decls(std::ostream& out) const { + for(const auto& i : input_decls) { + i.dump(out); + } + if (input_decls.size()) { + out << "\n"; + } +} + +bool ir_context::cache_input_impl(std::ostream& out, const std::string& fn) const { + const auto tempfile = cache_directory / "cache.db"; + if (!enable_souffle_cache || !std::filesystem::exists(tempfile)) { + return false; + } + if (check_cache_table_exists(fn)) { + out << ".input " << fn; + out << "(IO=\"sqlite-cache\", dbname=\"" << tempfile.string() << "\", "; + out << "name=\"_" << fn << "\")\n"; + return true; + } + return false; +} + +void ir_context::dump_rule_impls(std::ostream& out, + const std::unordered_set& cache_decl) const { + for(auto i : rule_impls) { + const auto name = replace_colon(i->get_func_name()); + // cache input + if (cache_decl.count(name) && cache_input_impl(out, name)) { + continue; + } + i->dump(out); + } + if (rule_impls.size()) { + out << "\n"; + } +} + +void ir_context::dump_database_get_table(std::ostream& out) const { + for(auto i : database_get_table) { + i->dump(out); + } + if (database_get_table.size()) { + out << "\n"; + } +} + +void ir_context::dump_database_data_constraint(std::ostream& out) const { + for(const auto& i : database_all_data) { + out << "all_data_DBIndex(" << i << ").\n"; + } + if (database_all_data.size()) { + out << "\n"; + } +} + +void ir_context::dump_schema_data_constraint_impls(std::ostream& out) const { + for(auto i : schema_data_constraint_impls) { + i->dump(out); + } + if (schema_data_constraint_impls.size()) { + out << "\n"; + } +} + +void ir_context::dump_schema_get_field(std::ostream& out) const { + for(auto i : schema_get_field) { + i->dump(out); + } + if (schema_get_field.size()) { + out << "\n"; + } +} + +void ir_context::dump_input_impls(std::ostream& out) const { + for(const auto& i : input_impls) { + i.dump(out); + } + if (input_impls.size()) { + out << "\n"; + } +} + +void ir_context::dump_souffle_annotated_input(std::ostream& out) const { + if (annotated_input.empty()) { + return; + } + for(const auto& i : annotated_input) { + out << ".input " << replace_colon(i.rule_name); + if (i.format=="\"json\"") { + out << "(IO=\"jsonfile\", filename="; + out << i.file_path << ", format=\"object\")"; + } else if (i.format=="\"csv\"") { + out << "(IO=\"file\", filename="; + out << i.file_path << ", format=\"object\", rfc4180=true)"; + } else if (i.format=="\"sqlite\"") { + out << "(IO=\"sqlite\", filename="; + out << i.file_path << ", format=\"object\")"; + } + out << "\n"; + } + out << "\n"; +} + +void ir_context::dump_souffle_multi_json_output(std::ostream& out) const { + const auto temp = std::filesystem::temp_directory_path(); + if (temp.string().empty()) { + assert(false && "cannot find temp directory or path is empty"); + } + // dump output rules + for(const auto& i : souffle_output) { + const auto temp_file = temp / ("godel_script_" + i + ".json"); + out << ".output " << replace_colon(i); + out << "(IO=\"jsonfile\", filename=\""; + out << temp_file.string() << "\", format=\"object\")" << "\n"; + } + if (souffle_output.size()) { + out << "\n"; + } +} + +void ir_context::dump_souffle_output(std::ostream& out) const { + if (souffle_output.empty()) { + return; + } + // multi output for json + if (souffle_output.size()>1 && json_output_path.size()) { + dump_souffle_multi_json_output(out); + return; + } + // generate io format + auto io_format = std::string("(IO=\"stdout\")"); + if (json_output_path.size()) { + io_format = "(IO=\"jsonfile\", filename=\"" + + json_output_path + + "\", format=\"object\")"; + } + if (csv_output_path.size()) { + io_format = "(IO=\"file\", filename=\"" + + csv_output_path + + "\", format=\"object\", rfc4180=true)"; + } + if (sqlite_output_path.size()) { + io_format = "(IO=\"sqlite\", filename=\"" + + sqlite_output_path + + "\", format=\"object\")"; + } + // dump output rules + for(const auto& i : souffle_output) { + out << ".output " << replace_colon(i) << io_format << "\n"; + } + if (souffle_output.size()) { + out << "\n"; + } +} + +void ir_context::dump_souffle_annotated_output(std::ostream& out) const { + if (annotated_output.empty()) { + return; + } + for(const auto& i : annotated_output) { + out << ".output " << replace_colon(i.rule_name); + if (i.format=="\"json\"") { + out << "(IO=\"jsonfile\", filename="; + out << i.file_path << ", format=\"object\")"; + } else if (i.format=="\"csv\"") { + out << "(IO=\"file\", filename="; + out << i.file_path << ", format=\"object\", rfc4180=true)"; + } else if (i.format=="\"sqlite\"") { + out << "(IO=\"sqlite\", filename="; + out << i.file_path << ", format=\"object\")"; + } + out << "\n"; + } + out << "\n"; +} + +void ir_context::dump(std::ostream& out, const cli::configure& conf) { + // only run the passes once + if (!flag_ir_pass_already_executed) { + pass_manager().run(*this, conf); + flag_ir_pass_already_executed = true; + } + + // inline rules cannot be used as IO + std::unordered_set inline_decl; + std::unordered_set cache_decl; + for(auto i : rule_decls) { + if (i->is_inline()) { + inline_decl.insert(replace_colon(i->get_rule_raw_name())); + } + if (i->need_cache()) { + cache_decl.insert(replace_colon(i->get_rule_raw_name())); + } + } + + if (conf.count(cli::option::cli_enable_souffle_cache)) { + enable_souffle_cache = true; + } + if (conf.count(cli::option::cli_clean_souffle_cache)) { + check_cache_directory_and_clean(); + } + + // dump maybe-used souffle functors + dump_souffle_functors(out); + // dump godel -> souffle type alias + dump_type_alias(out); + + // dump data constraints' declarations for schema + dump_schema_data_constraint_decls(out); + // dump rule declarations + dump_rule_decls(out); + // dump input declaration + dump_input_decls(out); + + // dump rule implementation + dump_rule_impls(out, cache_decl); + // dump database get table + dump_database_get_table(out); + // dump database all data + dump_database_data_constraint(out); + // dump schema data constraint + dump_schema_data_constraint_impls(out); + // dump schema get field + dump_schema_get_field(out); + // dump souffle fact input + dump_input_impls(out); + dump_souffle_annotated_input(out); + // dump souffle output + dump_souffle_output(out); + dump_souffle_annotated_output(out); + + if (enable_souffle_cache) { + dump_cache_output(out, inline_decl, cache_decl); + } +} + +bool ir_context::check_cache_table_exists(const std::string& rule) const { + check_cache_directory_and_create(); + const auto tempfile = cache_directory / "cache.db"; + if (!std::filesystem::exists(tempfile)) { + return false; + } + + sqlite3* db = nullptr; + if (sqlite3_open(tempfile.string().c_str(), &db) != SQLITE_OK) { + return false; + } + + auto cmd = std::string("SELECT name FROM sqlite_master WHERE type='table'"); + cmd += " AND name='_" + rule + "';"; + + sqlite3_stmt* stmt = nullptr; + const char* tail = nullptr; + auto rc = sqlite3_prepare_v2(db, cmd.c_str(), -1, &stmt, &tail); + if (rc != SQLITE_OK) { + sqlite3_finalize(stmt); + sqlite3_close(db); + return false; + } + + if (sqlite3_step(stmt)==SQLITE_ROW) { + sqlite3_finalize(stmt); + sqlite3_close(db); + return true; + } + + sqlite3_finalize(stmt); + sqlite3_close(db); + return false; +} + +void ir_context::dump_cache_output(std::ostream& out, + const std::unordered_set& inline_decl, + const std::unordered_set& cache_decl) const { + check_cache_directory_and_create(); + for(const auto& i : cache_decl) { + if (inline_decl.count(i) || // should not be inline rule + check_cache_table_exists(i)) { // should not exist in cache + continue; + } + out << ".output " << i << "(IO=\"sqlite\", filename=\""; + out << (cache_directory / "cache.db").string(); + out << "\")\n"; + } +} + +void ir_context::file_output(const std::string& output, + const cli::configure& conf) { + std::ofstream out(output); + dump(out, conf); +} + +std::string ir_context::str_output(const cli::configure& conf) { + std::stringstream ss; + dump(ss, conf); + return ss.str(); +} + +void ir_context::generate_functors() { + functors.push_back({ + "get_field_by_index", + "self: number, total: number, index: number", + "number stateful" + }); + functors.push_back({ + "godel_lang_builtin_string_getMatchResult", + "self: symbol, pattern: symbol, index: number", + "symbol" + }); + functors.push_back({ + "godel_lang_builtin_string_to_upper", + "self: symbol", + "symbol" + }); + functors.push_back({ + "godel_lang_builtin_string_to_lower", + "self: symbol", + "symbol" + }); + functors.push_back({ + "godel_lang_builtin_string_replace_once", + "self: symbol, pattern: symbol, replacement: symbol, index: number", + "symbol" + }); + functors.push_back({ + "godel_lang_builtin_string_replace_all", + "self: symbol, pattern: symbol, replacement: symbol", + "symbol" + }); +} + +void ir_context::generate_type_alias() { + type_alias.push_back({ + "DBIndex", + "[type_id: number, load_path: number]", + {"number", "number"} + }); + type_alias.push_back({"int", "number", {}}); + type_alias.push_back({"string", "symbol", {}}); +} + +void ir_context::generate_all_data_database() { + auto all_data_database = new souffle_rule_decl("all_data_DBIndex"); + all_data_database->add_param("db_id", "DBIndex"); + rule_decls.push_back(all_data_database); + + // default database index is [-1, -1] + database_all_data.insert("[-1, -1]"); +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/ir_context.h b/godel-script/godel-frontend/src/ir/ir_context.h new file mode 100644 index 00000000..ce98629a --- /dev/null +++ b/godel-script/godel-frontend/src/ir/ir_context.h @@ -0,0 +1,248 @@ +#pragma once + +#include "godel-frontend/src/ir/lir.h" +#include "godel-frontend/src/sema/context.h" +#include "godel-frontend/src/cli.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace godel { + +struct souffle_functor { + std::string name; + std::string params; + std::string return_type; + + void dump(std::ostream&) const; +}; + +struct souffle_type_alias { + std::string alias; + std::string real; + + std::vector structure_type_list; + + void dump(std::ostream&) const; +}; + +struct souffle_schema { + std::string name; + std::vector> fields; + + void dump(std::ostream&) const; +}; + +// declaration of database input +struct souffle_input_decl { + std::string database_name; + std::string table_type; + uint64_t load_times; + std::vector> fields; + +private: + std::string decl_name; + +public: + souffle_input_decl(const std::string& dbn, + const std::string& tt, + uint64_t lt): + database_name(dbn), table_type(tt), load_times(lt) { + decl_name = "input_" + replace_colon(database_name) + "_" + + replace_colon(table_type) + "_" + + std::to_string(load_times); + } + void dump(std::ostream&) const; + const auto& get_decl_name() const { return decl_name; } +}; + +// implementation of database input +struct souffle_input_impl { + std::string name; + std::string table_name; + std::string table_type; + uint64_t path_id; + std::string input_db_path; + +private: + std::string decl_name; + +public: + souffle_input_impl(const std::string& n, + const std::string& tn, + const std::string& tt, + uint64_t pid, + const std::string& idb): + name(n), table_name(tn), table_type(tt), + path_id(pid), input_db_path(idb) { + decl_name = "input_" + replace_colon(name) + "_" + + replace_colon(table_type) + "_" + + std::to_string(path_id); + } + void dump(std::ostream&) const; + const auto& get_decl_name() const { return decl_name; } +}; + +class souffle_rule_decl { +private: + std::string name; + std::string return_type; + std::vector> params; + +private: + bool flag_is_inline_rule; + bool flag_need_cache; + +public: + souffle_rule_decl(const std::string& n): + name(n), return_type(""), + flag_is_inline_rule(false), + flag_need_cache(false) {} + void dump(std::ostream&) const; + +public: + void set_return_type(const std::string& t) { return_type = t; } + void set_is_inline_rule(bool flag) { flag_is_inline_rule = flag; } + void set_need_cache(bool flag) { flag_need_cache = flag; } + void add_param(const std::string& pn, const std::string& pt) { + params.push_back({pn, pt}); + } + +public: + const auto& get_rule_raw_name() const { return name; } + const auto& get_params() const { return params; } + const auto& get_return_type() const { return return_type; } + auto is_inline() const { return flag_is_inline_rule; } + auto need_cache() const { return flag_need_cache;} +}; + +class souffle_rule_impl { +private: + std::string func_name; + std::vector params; + lir::block block; + +public: + souffle_rule_impl(const std::string& c, const report::span& loc): + func_name(c), block(loc) { + block.set_use_semicolon(); + } + void add_param(const std::string& p) { + params.push_back(p); + } + auto get_block() { return █ } + const auto& get_params() const { return params; } + const auto& get_func_name() const { return func_name; } + + void dump(std::ostream&) const; +}; + +struct souffle_annotated_file_output { + std::string format; + std::string file_path; + std::string rule_name; +}; + +// alias to annotated_file_output +using souffle_annotated_file_input = souffle_annotated_file_output; + +struct ir_context { + std::vector functors; + std::vector type_alias; + + // souffle stdout output, can be redirected to file output + std::vector souffle_output; + // mapper stores real name of mangled output rule, + // used for merging output files into one file + std::unordered_map souffle_output_real_name; + + std::vector annotated_output; + std::vector annotated_input; + + std::vector schema_data_constraint_decls; + std::vector input_decls; + std::vector input_impls; + + // record all database index in this vector + std::unordered_set database_all_data; + + // declarations for rules + std::vector rule_decls; + + // implementations for rules + std::vector rule_impls; + std::vector database_get_table; + std::vector schema_get_field; + std::vector schema_data_constraint_impls; + + // database index and path index + std::unordered_map db_index; + std::unordered_map db_path; + + // for stdout redirect output + std::string json_output_path = ""; + std::string csv_output_path = ""; + std::string sqlite_output_path = ""; + +private: + bool flag_ir_pass_already_executed = false; + +private: + void generate_functors(); + void generate_type_alias(); + void generate_all_data_database(); + +private: + void dump_souffle_functors(std::ostream&) const; + void dump_type_alias(std::ostream&) const; + void dump_schema_data_constraint_decls(std::ostream&) const; + void dump_rule_decls(std::ostream&) const; + void dump_input_decls(std::ostream&) const; + bool cache_input_impl(std::ostream&, const std::string&) const; + void dump_rule_impls(std::ostream&, + const std::unordered_set&) const; + void dump_database_get_table(std::ostream&) const; + void dump_database_data_constraint(std::ostream&) const; + void dump_schema_data_constraint_impls(std::ostream&) const; + void dump_schema_get_field(std::ostream&) const; + void dump_input_impls(std::ostream&) const; + void dump_souffle_annotated_input(std::ostream&) const; + void dump_souffle_multi_json_output(std::ostream&) const; + void dump_souffle_output(std::ostream&) const; + void dump_souffle_annotated_output(std::ostream&) const; + void dump(std::ostream&, const cli::configure&); + +private: + static inline bool enable_souffle_cache = false; + const std::filesystem::path cache_directory = std::filesystem::current_path() / "godel-script-cache"; + void check_cache_directory_and_create() const { + if (std::filesystem::exists(cache_directory)) { + return; + } + std::filesystem::create_directory(cache_directory); + } + void check_cache_directory_and_clean() const { + if (!std::filesystem::exists(cache_directory)) { + return; + } + std::filesystem::remove(cache_directory / "cache.db"); + } + bool check_cache_table_exists(const std::string&) const; + void dump_cache_output(std::ostream&, + const std::unordered_set&, + const std::unordered_set&) const; + +public: + ir_context(); + ~ir_context(); + void file_output(const std::string&, const cli::configure&); + std::string str_output(const cli::configure&); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/ir_gen.cpp b/godel-script/godel-frontend/src/ir/ir_gen.cpp new file mode 100644 index 00000000..b813ab43 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/ir_gen.cpp @@ -0,0 +1,2489 @@ +#include "godel-frontend/src/ir/ir_gen.h" +#include "godel-frontend/src/ir/name_mangling.h" +#include "godel-frontend/src/ir/inst_combine.h" + +namespace godel { + +lir::inst_value_t ir_gen::value_data::to_inst_value() const { + switch(kind) { + case data_kind::literal: return lir::inst_value_t::literal(content); + case data_kind::variable: return lir::inst_value_t::variable(content); + default: break; + } + + // unreachable + return lir::inst_value_t::null(); +} + +void ir_gen::emit_type_alias_for_database() { + const auto& glb = context::global; + const auto& ns = glb.get_all_namespace().at(ctx->this_file_name); + for(const auto& sc : ns.databases) { + // generate symbol to get the full path name of the database + // for example: + // + // coref::java::JavaDB => coref__java__JavaDB + // + const auto sym = symbol { + .type_name = sc.second.name, + .type_loc = sc.second.location + }; + const auto name = replace_colon(sym.full_path_name()); + + // insert type alias into the context + // for example: + // + // coref::java::JavaDB + // => + // .type coref__java__JavaDB = DBIndex + // + irc.type_alias.push_back(souffle_type_alias { + .alias = name, + .real = "DBIndex", + .structure_type_list = {} + }); + } +} + +void ir_gen::emit_type_alias_for_schema_with_primary_key(const schema& sc) { + // generate full path name of the schema + const auto sym = symbol({ + .type_name = sc.name, + .type_loc = sc.location + }); + + // get full path name of the schema's primary key, this is the real type + const auto& real_type = sc.get_primary_key_type(); + + // insert type alias into the context + // for example: + // + // schema Example { @primary id: int, name: string} + // => + // .type Example = int + // + irc.type_alias.push_back(souffle_type_alias { + .alias = replace_colon(sym.full_path_name()), + .real = replace_colon(real_type.full_path_name_without_set()), + .structure_type_list = {} + }); +} + +void ir_gen::emit_type_alias_for_schema_without_primary_key(const schema& sc) { + // generate full path name of the schema + const auto sym = symbol { + .type_name = sc.name, + .type_loc = sc.location + }; + + // generate alias real type + std::vector structure_type_list; + auto real = std::string("["); + for(const auto& i : sc.ordered_fields) { + real += i + ": "; + const auto& type = sc.fields.at(i); + const auto name = replace_colon(type.full_path_name_without_set()); + real += name + ", "; + structure_type_list.push_back(name); + } + if (real.back()==' ') { + real = real.substr(0, real.length()-2); + } + real += "]"; + + // insert type alias into the context + // for example: + // + // schema Example { id: int, name: string} + // => + // .type Example = [id: int, name: string] + // + irc.type_alias.push_back(souffle_type_alias { + .alias = replace_colon(sym.full_path_name()), + .real = real, + .structure_type_list = structure_type_list + }); +} + +void ir_gen::emit_type_alias_for_schema() { + const auto& glb = context::global; + const auto& ns = glb.get_all_namespace().at(ctx->this_file_name); + for(const auto& sc : ns.schemas) { + if (sc.second.has_primary_key()) { + emit_type_alias_for_schema_with_primary_key(sc.second); + } else { + emit_type_alias_for_schema_without_primary_key(sc.second); + } + } +} + +void ir_gen::emit_type_alias_for_enum() { + const auto& glb = context::global; + const auto& ns = glb.get_all_namespace().at(ctx->this_file_name); + for(const auto& e : ns.enums) { + const auto sym = symbol { + .type_name = e.second.name, + .type_loc = e.second.location + }; + irc.type_alias.push_back(souffle_type_alias { + .alias = replace_colon(sym.full_path_name()), + .real = "int", + .structure_type_list = {} + }); + } +} + +void ir_gen::emit_used_database_input_decl(const std::string& db_type_name, + const std::string& db_path, + const std::string& table_type, + const schema& sc) { + irc.input_decls.push_back(souffle_input_decl( + db_type_name, + table_type, + irc.db_path.at(db_path) + )); + auto& input_decl = irc.input_decls.back(); + for(const auto& field : sc.ordered_fields) { + const auto& type = sc.fields.at(field); + input_decl.fields.push_back({field, type.full_path_name_without_set()}); + } +} + +void ir_gen::emit_used_database_get_table_impl(const std::string& db_type_name, + const std::string& db_path, + const std::string& db_id, + const std::string& table_name, + const std::string& table_type, + const schema& sc) { + // generate implementation of get table rule + auto get_table_impl = new souffle_rule_impl( + "get_table_" + db_type_name + "_" + table_name, + report::span::null() + ); + get_table_impl->add_param("result"); + get_table_impl->add_param(db_id); + get_table_impl->get_block()->set_use_comma(); + + // generate input call + auto input_rule_name = "input_" + db_type_name + "_"; + input_rule_name += table_type + "_"; + input_rule_name += std::to_string(irc.db_path.at(db_path)); + auto input_call = new lir::call(input_rule_name, report::span::null()); + + // generate arguments of input call + if (sc.has_primary_key()) { + // only need primary key + for(const auto& f : sc.ordered_fields) { + input_call->add_arg(sc.fields.at(f).primary? + lir::inst_value_t::variable("result"): + lir::inst_value_t::default_value() + ); + } + } else { + for(const auto& f : sc.ordered_fields) { + input_call->add_arg(lir::inst_value_t::variable(f)); + } + } + + // if schema does not have a primary key, generate schema literal + if (!sc.has_primary_key()) { + // generate schema literal from input data + auto literal = std::string("["); + for(const auto& f : sc.ordered_fields) { + literal += f; + if (f!=sc.ordered_fields.back()) { + literal += ", "; + } + } + literal += "]"; + + // generate result = [ f1, f2, ...] + get_table_impl->get_block()->add_new_content(new lir::store( + lir::inst_value_t::literal(literal), + lir::inst_value_t::variable("result"), + report::span::null() + )); + } + + get_table_impl->get_block()->add_new_content(input_call); + irc.database_get_table.push_back(get_table_impl); +} + +void ir_gen::emit_schema_data_constraint_impl(const std::string& db_type_name, + const std::string& db_path, + const std::string& db_id, + const std::string& table_type, + const schema& sc) { + auto input_to_schema = new souffle_rule_impl( + "schema_" + table_type, + report::span::null() + ); + + // load parameter + if (sc.has_primary_key()) { + // schema with primary key, just set the result = primary key + input_to_schema->add_param(sc.get_primary_key()); + } else { + // schema without primary key, need to generate result = [...] + input_to_schema->add_param("result"); + } + + // database index + input_to_schema->add_param(db_id); + // schema field + for(const auto& field : sc.ordered_fields) { + input_to_schema->add_param(field); + } + + // load result, generate result = [...] + if (!sc.has_primary_key()) { + auto literal = std::string("["); + for(const auto& f : sc.ordered_fields) { + literal += f; + if (f!=sc.ordered_fields.back()) { + literal += ", "; + } + } + literal += "]"; + input_to_schema->get_block()->add_new_content(new lir::store( + lir::inst_value_t::literal(literal), + lir::inst_value_t::variable("result"), + report::span::null() + )); + } + + // load fact data input call + auto input_call = new lir::call( + "input_" + db_type_name + "_" + table_type + "_" + + std::to_string(irc.db_path.at(db_path)), + report::span::null() + ); + input_to_schema->get_block()->add_new_content(input_call); + input_to_schema->get_block()->set_use_comma(); + for(const auto& field : sc.ordered_fields) { + input_call->add_arg(lir::inst_value_t::variable(field)); + } + + irc.schema_data_constraint_impls.push_back(input_to_schema); +} + +void ir_gen::emit_used_database(const std::string& db_type_name, + const std::string& db_file_path) { + // update db index and load time counter + if (!irc.db_index.count(db_type_name)) { + irc.db_index.insert({db_type_name, irc.db_index.size()}); + } + if (!irc.db_path.count(db_file_path)) { + irc.db_path.insert({db_file_path, irc.db_path.size()}); + } + + // get load time counter and generate db id + const auto path_id = irc.db_path.at(db_file_path); + const auto db_id = "[" + std::to_string(irc.db_index.at(db_type_name)) + + ", " + std::to_string(path_id) + "]"; + // check if this db_id exists in all_data + if (irc.database_all_data.count(db_id)) { + return; + } + irc.database_all_data.insert(db_id); + + // add input table into input impl + const auto index = ctx->global.get_index(db_type_name); + const auto& db = ctx->global.get_database(index); + for(const auto& table : db.tables) { + const auto& name = table.first; + const auto& type = table.second.full_path_name_without_set(); + + // generate input impl + if (db.real_name.count(name)) { + const auto& real = db.real_name.at(name); + irc.input_impls.push_back( + souffle_input_impl(db_type_name, real, type, path_id, db_file_path) + ); + } else { + irc.input_impls.push_back( + souffle_input_impl(db_type_name, name, type, path_id, db_file_path) + ); + } + + auto sc_index = ctx->global.get_index(type); + const auto& sc = ctx->global.get_schema(sc_index); + emit_used_database_input_decl(db_type_name, db_file_path, type, sc); + + // get table rule impl + emit_used_database_get_table_impl(db_type_name, db_file_path, db_id, name, type, sc); + + // input data load to schema data constraint + emit_schema_data_constraint_impl(db_type_name, db_file_path, db_id, type, sc); + } +} + +void ir_gen::emit_database_get_table_decl() { + const auto& glb = context::global; + const auto& ns = glb.get_all_namespace().at(ctx->this_file_name); + for(auto& db : ns.databases) { + const auto sym = symbol({ + .type_name = db.second.name, + .type_loc = db.second.location + }); + + for(auto& table : db.second.tables) { + auto new_decl = new souffle_rule_decl( + "get_table_" + sym.full_path_name_without_set() + + "_" + table.first + ); + + // generate this: + // + // .decl get_table_database_table(return: table_type, db_id: DBIndex) + // + new_decl->set_return_type(table.second.full_path_name_without_set()); + new_decl->add_param("db_id", "DBIndex"); + irc.rule_decls.push_back(new_decl); + } + } +} + +void ir_gen::emit_schema_decl() { + const auto& glb = context::global; + const auto& ns = glb.get_all_namespace().at(ctx->this_file_name); + // load schema structure and methods + for(auto& sc : ns.schemas) { + const auto sym = symbol({ + .type_name = sc.second.name, + .type_loc = sc.second.location + }); + + std::vector> fields; + // schema data constraint decl + irc.schema_data_constraint_decls.push_back(souffle_schema { + .name = sym.full_path_name(), + .fields = fields + }); + + // generate name:type pair in order + for(const auto& field_name : sc.second.ordered_fields) { + const auto& field_type = sc.second.fields.at(field_name); + irc.schema_data_constraint_decls.back().fields.push_back({ + field_name, + field_type.full_path_name_without_set() + }); + } + + // load schema methods + for(const auto& method : sc.second.methods) { + auto name = "rule_" + sym.full_path_name() + "::" + method.first; + emit_schema_method_decl(method.second, name); + emit_schema_inherit_method(sc.second, method.second, name); + } + } +} + +void ir_gen::emit_schema_method_decl(const function& method, + const std::string& self_rule_name) { + auto method_decl = new souffle_rule_decl(self_rule_name); + irc.rule_decls.push_back(method_decl); + + const auto& ret_type = method.return_type; + + // set return type of the method, this determines + // whether it is a rule or a predicate: + // rule: return type is not void, for example: name(result, ...) + // predicate: with no return type, for example: name(...) + method_decl->set_return_type(ret_type.full_path_name_without_set()); + + // inherit method and method with inline annotation + // should all be declared as `inline`, for optimization purpose + method_decl->set_is_inline_rule( + method.has_annotation("@inline") || + method.inherit + ); + + // load parameters of the method + for(const auto& arg_name : method.ordered_parameter_list) { + const auto& arg_type = method.parameter_list.at(arg_name); + method_decl->add_param(arg_name, arg_type.full_path_name_without_set()); + } +} + +void ir_gen::emit_schema_inherit_method(const schema& sc, + const function& method, + const std::string& self_rule_name) { + // do not emit if is not inherited + if (!method.inherit) { + return; + } + + const auto parent_type = symbol({ + .type_name = sc.parent->name, + .type_loc = sc.parent->location + }); + + auto impl = new souffle_rule_impl(self_rule_name, report::span::null()); + auto call = new lir::call( + "rule_" + parent_type.full_path_name() + "::" + method.name, + report::span::null() + ); + impl->get_block()->add_new_content(call); + + if (method.return_type!=symbol::null() && + method.return_type!=symbol::boolean()) { + impl->add_param("result"); + call->add_arg(lir::inst_value_t::variable("result")); + } + for(auto& arg : method.ordered_parameter_list) { + impl->add_param(arg); + call->add_arg(lir::inst_value_t::variable(arg)); + } + irc.rule_impls.push_back(impl); +} + +void ir_gen::emit_schema_type_check() { + const auto& glb = context::global; + const auto& ns = glb.get_all_namespace().at(ctx->this_file_name); + for(const auto& sc : ns.schemas) { + const auto sym = symbol({ + .type_name = sc.second.name, + .type_loc = sc.second.location + }); + + // typecheck + emit_schema_type_check_decl(sym); + + // typecheck impl + emit_schema_type_check_impl(sym, sc.second); + } +} + +void ir_gen::emit_schema_type_check_decl(const symbol& sym) { + auto typecheck_decl = new souffle_rule_decl("typecheck_" + sym.full_path_name()); + irc.rule_decls.push_back(typecheck_decl); + typecheck_decl->add_param("self", sym.full_path_name()); + typecheck_decl->set_is_inline_rule(true); +} + +void ir_gen::emit_schema_type_check_impl(const symbol& sym, const schema& sc) { + // generate typecheck impl + auto typecheck_impl = new souffle_rule_impl( + "typecheck_" + sym.full_path_name(), + report::span::null() + ); + typecheck_impl->add_param("self"); + + // generate inner data constraint call + auto data_constraint_call = new lir::call( + "schema_" + sym.full_path_name(), + report::span::null() + ); + data_constraint_call->add_arg(lir::inst_value_t::variable("self")); + data_constraint_call->add_arg(lir::inst_value_t::default_value()); + for(size_t i = 0; iadd_arg(lir::inst_value_t::default_value()); + } + + // insert data constraint call into typecheck impl block + typecheck_impl->get_block()->add_new_content(data_constraint_call); + + // insert typecheck impl into IR context + irc.rule_impls.push_back(typecheck_impl); +} + +void ir_gen::emit_schema_get_field() { + const auto& glb = context::global; + const auto& ns = glb.get_all_namespace().at(ctx->this_file_name); + + for(const auto& sc : ns.schemas) { + const auto sym = symbol { + .type_name = sc.second.name, + .type_loc = sc.second.location + }; + + for(const auto& field : sc.second.ordered_fields) { + auto name = "get_field_" + sym.full_path_name() + "_" + field; + auto rule = new souffle_rule_decl(name); + rule->set_return_type( + sc.second.fields.at(field).full_path_name_without_set() + ); + + // must be inline, it's a necessary optimization + rule->set_is_inline_rule(true); + rule->add_param("self", sym.full_path_name()); + irc.rule_decls.push_back(rule); + + // implementation of get field method + auto rule_impl = new souffle_rule_impl(name, report::span::null()); + rule_impl->add_param("result"); + rule_impl->add_param("self"); + + // necessary optimization, if the field is primary key, + // we can directly store self in result to avoid extra join + if (sc.second.fields.at(field).primary) { + auto assign = new lir::store( + lir::inst_value_t::variable("self"), + lir::inst_value_t::variable("result"), + report::span::null() + ); + rule_impl->get_block()->add_new_content(assign); + } else { + // if the field is not a primary key, then generate this: + // + // get_field_name(result, self) :- + // schema_name(self, _, ..., result, ...). + // + auto call = new lir::call( + "schema_" + sym.full_path_name(), + report::span::null() + ); + call->add_arg(lir::inst_value_t::variable("self")); + call->add_arg(lir::inst_value_t::default_value()); + for(const auto& f : sc.second.ordered_fields) { + call->add_arg(f==field? + lir::inst_value_t::variable("result"): + lir::inst_value_t::default_value() + ); + } + rule_impl->get_block()->add_new_content(call); + } + + // insert the rule implementation into IR context + irc.schema_get_field.push_back(rule_impl); + } + } +} + +void ir_gen::emit_DO_schema_default_constructor() { + const auto& glb = context::global; + const auto& ns = glb.get_all_namespace().at(ctx->this_file_name); + for(auto& sc : ns.schemas) { + if (!sc.second.methods.count("__all__")) { + continue; + } + if (sc.second.methods.at("__all__").implemented) { + continue; + } + + const auto sym = symbol({ + .type_name = sc.second.name, + .type_loc = sc.second.location + }); + + // generate this method as a rule implementation: + // rule_name(result, ...) :- schema_name(result, db, ...). + const auto function_name = replace_colon(sym.full_path_name() + "::__all__"); + auto func_impl = new souffle_rule_impl( + "rule_" + function_name, + report::span::null() + ); + func_impl->add_param("result"); + func_impl->add_param("db"); + irc.rule_impls.push_back(func_impl); + + auto call = new lir::call( + "schema_" + replace_colon(sym.full_path_name()), + report::span::null() + ); + func_impl->get_block()->add_new_content(call); + call->add_arg(lir::inst_value_t::variable("result")); + call->add_arg(lir::inst_value_t::variable("db")); + for(size_t i = 0; iadd_arg(lir::inst_value_t::default_value()); + } + } +} + +void ir_gen::emit_func_decl() { + const auto& ns_name = godel_module::instance()->find_module_by_file(ctx->this_file_name); + const auto& ns = context::global.get_all_namespace().at(ctx->this_file_name); + for(const auto& fn : ns.functions) { + if (fn.first=="main") { + continue; + } + auto rule = new souffle_rule_decl( + "rule_" + (ns_name.length()? ns_name + "::":"") + fn.first + ); + + if (!fn.second.return_type.is_null()) { + rule->set_return_type(fn.second.return_type.full_path_name_without_set()); + } + rule->set_is_inline_rule(fn.second.has_annotation("@inline")); + rule->set_need_cache(fn.second.has_annotation("@cache")); + for(const auto& arg : fn.second.ordered_parameter_list) { + const auto& arg_type = fn.second.parameter_list.at(arg); + rule->add_param(arg, arg_type.full_path_name_without_set()); + } + irc.rule_decls.push_back(rule); + } +} + +void ir_gen::generate_basic_symbol(identifier* node) { + path_infer_result = {path_kind::basic, node->get_name()}; +} + +void ir_gen::generate_database_symbol(identifier* node) { + const auto sym = symbol({ + .type_name = node->get_name(), + .type_loc = ctx->find_global_location(node->get_name()) + }); + path_infer_result = {path_kind::database, sym.full_path_name_without_set()}; +} + +void ir_gen::generate_schema_symbol(identifier* node) { + const auto sym = symbol({ + .type_name = node->get_name(), + .type_loc = ctx->find_global_location(node->get_name()) + }); + path_infer_result = {path_kind::schema, sym.full_path_name_without_set()}; +} + +void ir_gen::generate_enum_symbol(identifier* node) { + const auto sym = symbol({ + .type_name = node->get_name(), + .type_loc = ctx->find_global_location(node->get_name()) + }); + path_infer_result = {path_kind::enumerate, sym.full_path_name_without_set()}; +} + +void ir_gen::generate_function_symbol(identifier* node) { + const auto loc = ctx->find_global_location(node->get_name()); + // native function is not allowed to be used in souffle codegen + if (loc.is_null()) { + report_compiler_bug(node, + "\"" + node->get_name() + "\" is a native function." + ); + return; + } + + const auto sym = symbol({ + .type_name = node->get_name(), + .type_loc = loc + }); + func_stack.push_back({ + func_kind::function, + "rule_" + sym.full_path_name_without_set(), + node->get_resolve() + }); +} + +void ir_gen::generate_package_symbol(identifier* node) { + path_infer_result = {path_kind::module_path, node->get_name()}; +} + +void ir_gen::generate_symbol_call(identifier* node) { + if (node->get_name()=="Self") { + path_infer_result = { + path_kind::schema, + node->get_resolve().type.full_path_name_without_set() + }; + return; + } + + switch(ctx->find_global_kind(node->get_name())) { + case symbol_kind::basic: generate_basic_symbol(node); break; + case symbol_kind::database: generate_database_symbol(node); break; + case symbol_kind::enumerate: generate_enum_symbol(node); break; + case symbol_kind::function: generate_function_symbol(node); break; + case symbol_kind::package: generate_package_symbol(node); break; + case symbol_kind::query: + report_compiler_bug(node, "query call is not allowed in codegen."); + break; + case symbol_kind::schema: generate_schema_symbol(node); break; + default: + // not found, means this is not a global symbol + if (node->get_name()=="__all_data__") { + value_stack.push_back({ + data_kind::variable, + "_", + node->get_resolve() + }); + } else { + value_stack.push_back({ + data_kind::variable, + node->get_name(), + node->get_resolve() + }); + } + break; + } +} + +void ir_gen::get_path_from_basic(call_expr* node) { + func_stack.push_back({ + func_kind::basic_static, + path_infer_result.content + "::" + node->get_field_name()->get_name() + }); + path_infer_result.kind = path_kind::null; +} + +void ir_gen::get_path_from_database(call_expr* node) { + if (node->get_field_name()->get_name()=="load") { + func_stack.push_back({ + func_kind::database_load, + path_infer_result.content + "::load", + node->get_resolve() + }); + path_infer_result.kind = path_kind::null; + return; + } + report_compiler_bug(node, "unreachable."); +} + +void ir_gen::get_path_from_schema(call_expr* node) { + func_stack.push_back({ + func_kind::function, + "rule_" + path_infer_result.content + "::" + node->get_field_name()->get_name(), + node->get_resolve() + }); + path_infer_result.kind = path_kind::null; +} + +void ir_gen::get_path_from_enum(call_expr* node) { + const auto name = node->get_resolve().type.full_path_name_without_set(); + const auto index = ctx->global.get_index(name); + const auto& en = ctx->global.get_enum(index); + value_stack.push_back({ + data_kind::literal, + std::to_string(en.pairs.at(node->get_field_name()->get_name())), + node->get_resolve() + }); + path_infer_result.kind = path_kind::null; +} + +void ir_gen::get_path_from_package(call_expr* node) { + auto path = path_infer_result.content + "::" + node->get_field_name()->get_name(); + auto index = ctx->global.get_index(path); + switch(ctx->global.get_kind(index)) { + case symbol_kind::package: + path_infer_result = {path_kind::module_path, path}; break; + case symbol_kind::database: + path_infer_result = {path_kind::database, path}; break; + case symbol_kind::enumerate: + path_infer_result = {path_kind::enumerate, path}; break; + case symbol_kind::schema: + path_infer_result = {path_kind::schema, path}; break; + case symbol_kind::function: + func_stack.push_back({ + func_kind::function, + "rule_" + path, + node->get_resolve() + }); + break; + default: + report_compiler_bug(node, "get unknown symbol from package."); + break; + } +} + +void ir_gen::get_path_from_infer(call_expr* node) { + if (path_infer_result.is_null()) { + report_compiler_bug(node, "path infer is null."); + return; + } + switch(path_infer_result.kind) { + case path_kind::null: break; + case path_kind::database: get_path_from_database(node); break; + case path_kind::enumerate: get_path_from_enum(node); break; + case path_kind::schema: get_path_from_schema(node); break; + case path_kind::basic: get_path_from_basic(node); break; + case path_kind::module_path: get_path_from_package(node); break; + } +} + +void ir_gen::get_field_from_database(call_expr* node) { + const auto name = field_infer_result.type.full_path_name_without_set(); + auto lir_call = new lir::call( + "get_table_" + name + "_" + node->get_field_name()->get_name(), + node->get_location() + ); + lir_call->set_call_type(lir::call::kind::method); + if (value_stack.empty()) { + report_empty_value_stack(node->get_field_name()); + } + lir_call->add_arg(value_stack.back().to_inst_value()); + value_stack.pop_back(); + + const auto temp_var = generate_temp_variable(); + lir_call->set_return(lir::inst_value_t::variable(temp_var)); + value_stack.push_back({data_kind::variable, temp_var, node->get_resolve()}); + + blocks.back()->add_new_content(lir_call); +} + +void ir_gen::get_field_from_schema(call_expr* node) { + const auto name = field_infer_result.type.full_path_name_without_set(); + const auto index = ctx->global.get_index(name); + const auto& sch = ctx->global.get_schema(index); + if (sch.fields.count(node->get_field_name()->get_name())) { + auto lir_call = new lir::call( + "get_field_" + name + "_" + node->get_field_name()->get_name(), + node->get_location() + ); + lir_call->set_call_type(lir::call::kind::method); + if (value_stack.empty()) { + report_empty_value_stack(node->get_field_name()); + } + lir_call->add_arg(value_stack.back().to_inst_value()); + value_stack.pop_back(); + + const auto temp_var = generate_temp_variable(); + lir_call->set_return(lir::inst_value_t::variable(temp_var)); + value_stack.push_back({data_kind::variable, temp_var, node->get_resolve()}); + blocks.back()->add_new_content(lir_call); + return; + } + if (sch.methods.count(node->get_field_name()->get_name())) { + func_stack.push_back({ + func_kind::method, + "rule_" + name + "::" + node->get_field_name()->get_name(), + node->get_resolve() + }); + return; + } + if (node->get_field_name()->get_name()=="key_eq" || + node->get_field_name()->get_name()=="key_neq") { + func_stack.push_back({ + func_kind::key_cmp, + node->get_field_name()->get_name(), + node->get_resolve() + }); + return; + } + if (node->get_field_name()->get_name()=="to_set") { + func_stack.push_back({ + func_kind::to_set, + node->get_field_name()->get_name(), + node->get_resolve() + }); + return; + } + if (node->is_generic_is()) { + func_stack.push_back({ + func_kind::schema_is, + "is", + node->get_resolve() + }); + func_stack.back().generic_type = node->get_generic_output_full_name(); + return; + } + if (node->is_generic_to()) { + func_stack.push_back({ + func_kind::schema_to, + "to", + node->get_resolve() + }); + func_stack.back().generic_type = node->get_generic_output_full_name(); + return; + } + report_compiler_bug(node, "unreachable."); +} + +void ir_gen::get_field_from_basic(call_expr* node) { + const auto tn = field_infer_result.type.full_path_name_without_set(); + + // bool native methods is not supported now + if (field_infer_result.type==symbol::boolean()) { + err.err(node->get_location(), + "generation for bool method is not supported yet." + ); + } + + // generate real name of native methods, for example: + // + // "1".add("2") + // => + // string::add("1", "2") + func_stack.push_back({ + func_kind::basic_method, + tn + "::" + node->get_field_name()->get_name() + }); +} + +void ir_gen::get_field_from_infer(call_expr* node) { + if (field_infer_result.type.is_null()) { + report_compiler_bug(node, "field infer is empty."); + return; + } + if (field_infer_result.type.is_set) { + if (node->get_field_name()->get_name()!="find") { + report_compiler_bug(node, + "expected \"find\" but get \"" + + node->get_field_name()->get_name() + "\"." + ); + } + func_stack.push_back({ + func_kind::find, + node->get_field_name()->get_name() + }); + return; + } + const auto name = field_infer_result.type.full_path_name_without_set(); + const auto index = ctx->global.get_index(name); + switch(ctx->global.get_kind(index)) { + case symbol_kind::basic: get_field_from_basic(node); break; + case symbol_kind::database: get_field_from_database(node); break; + case symbol_kind::schema: get_field_from_schema(node); break; + default: report_compiler_bug(node, "unreachable."); break; + } +} + +void ir_gen::generate_single_output(call_root* node) { + auto id = reinterpret_cast(node->get_call_head()->get_first_expression()); + const auto& first_id = id->get_name(); + auto name = first_id; + for(auto i : node->get_call_chain()) { + name += "::" + i->get_field_name()->get_name(); + } + + // if it's a global function + if (name.find(":")==std::string::npos && ctx->find_global(name)) { + const auto sym = symbol { + .type_name = name, + .type_loc = ctx->find_global_location(name) + }; + const auto func_name = sym.full_path_name(); + irc.souffle_output.push_back("rule_" + func_name); + irc.souffle_output_real_name.insert({ + "rule_" + func_name, + func_name + }); + return; + } + + // if it's a module path or schema + if (name.find(":")!=std::string::npos) { + if (ctx->find_global_kind(first_id)==symbol_kind::package) { + irc.souffle_output.push_back("rule_" + name); + irc.souffle_output_real_name.insert({"rule_" + name, name}); + } else if (ctx->find_global_kind(first_id)==symbol_kind::schema) { + const auto sym = symbol { + .type_name = first_id, + .type_loc = ctx->find_global_location(first_id) + }; + auto func_name = sym.full_path_name(); + for(auto i : node->get_call_chain()) { + func_name += "::" + i->get_field_name()->get_name(); + } + irc.souffle_output.push_back("rule_" + func_name); + irc.souffle_output_real_name.insert({ + "rule_" + func_name, + func_name + }); + } else { + // unreachable + err.err(node->get_location(), + "cannot get output from \"" + name + "\".", + "failed to find the type of \"" + first_id + "\"." + ); + } + } +} + +void ir_gen::generate_output(block_stmt* node) { + if (!node) { + return; + } + for(auto i : node->get_statement()) { + auto tmp = reinterpret_cast(i); + auto call = reinterpret_cast(tmp->get_expr()); + auto args = call->get_call_head()->get_func_call(); + generate_single_output(reinterpret_cast(args->get_arguments()[0])); + } +} + +bool ir_gen::is_stdout_output_annotation(annotation* anno) { + const auto& props = anno->get_ordered_properties(); + if (props.empty()) { + return true; + } + if (props.size()==1 && + props[0].first=="format" && + props[0].second=="\"stdout\"") { + return true; + } + return false; +} + +void ir_gen::generate_annotated_output(annotation* anno, function_decl* node) { + auto name = node->get_name()->get_name(); + if (impl_schema_name.length()) { + name = impl_schema_name + "::" + name; + } else { + const auto& ns = godel_module::instance() + ->find_module_by_file(ctx->this_file_name); + name = (ns.length()? ns + "::":"") + name; + } + + if (is_stdout_output_annotation(anno)) { + irc.souffle_output.push_back("rule_" + name); + irc.souffle_output_real_name.insert({ + "rule_" + name, + name + }); + } else { + auto format = std::string(""); + auto file_path = std::string(""); + for(const auto& i : anno->get_ordered_properties()) { + if (i.first=="format") { + format = i.second; + } else if (i.first=="file") { + file_path = i.second; + } + } + irc.annotated_output.push_back({ + .format = format, + .file_path = file_path, + .rule_name = "rule_" + name + }); + } +} + +void ir_gen::generate_annotated_input(annotation* anno, function_decl* node) { + auto name = node->get_name()->get_name(); + // annotated input must be used on global function + if (impl_schema_name.length()) { + report_compiler_bug(anno, "unreachable."); + } else { + const auto& ns = godel_module::instance() + ->find_module_by_file(ctx->this_file_name); + name = (ns.length()? ns + "::":"") + name; + } + + auto format = std::string(""); + auto file_path = std::string(""); + for(const auto& i : anno->get_ordered_properties()) { + if (i.first=="format") { + format = i.second; + } else if (i.first=="file") { + file_path = i.second; + } + } + irc.annotated_input.push_back({ + .format = format, + .file_path = file_path, + .rule_name = "rule_" + name + }); +} + +void ir_gen::generate_function_call(func_call* node, lir::call* c) { + std::vector args; + for(size_t i = 0; iget_arguments().size(); ++i) { + if (value_stack.empty()) { + report_empty_value_stack(node); + break; + } + args.push_back(value_stack.back()); + value_stack.pop_back(); + } + for(auto i = args.rbegin(); i!=args.rend(); ++i) { + c->add_arg(i->to_inst_value()); + } +} + +void ir_gen::generate_method_call(func_call* node, lir::call* c) { + std::vector args; + for(size_t i = 0; iget_arguments().size()+1; ++i) { + if (value_stack.empty()) { + report_empty_value_stack(node); + break; + } + args.push_back(value_stack.back()); + value_stack.pop_back(); + } + for(auto i = args.rbegin(); i!=args.rend(); ++i) { + c->add_arg(i->to_inst_value()); + } +} + +void ir_gen::report_ignored_DO_schema_data_constraint() { + if (ignored_DO_schema.empty()) { + return; + } + err.warn_report_ignored_DO_schema(ignored_DO_schema); +} + +bool ir_gen::visit_number_literal(number_literal* node) { + value_stack.push_back({ + data_kind::literal, + node->is_integer()? std::to_string(node->get_integer()): + std::to_string(node->get_float()), + node->get_resolve() + }); + return true; +} + +bool ir_gen::visit_string_literal(string_literal* node) { + if (node->is_input_database_path()) { + const auto& db_type_name = node->get_database_full_path(); + emit_used_database(db_type_name, node->get_literal()); + auto db_object = std::string("["); + db_object += std::to_string(irc.db_index.at(db_type_name)); + db_object += ", "; + db_object += std::to_string(irc.db_path.at(node->get_literal())); + db_object += "]"; + + // load database index instead of string literal + // so the stack should be: + // +----------+----------+ + // | value | func | + // +----------+----------+ + // | [0, 0] | db::load | + // +----------+----------+ + value_stack.push_back({ + data_kind::literal, + db_object, + node->get_resolve() + }); + return true; + } + value_stack.push_back({ + data_kind::literal, + node->get_literal(), + node->get_resolve() + }); + return true; +} + +bool ir_gen::visit_boolean_literal(boolean_literal* node) { + blocks.back()->add_new_content( + new lir::boolean(node->get_flag(), node->get_location()) + ); + return true; +} + +bool ir_gen::visit_unary_operator(unary_operator* node) { + switch(node->get_operator_type()) { + case unary_operator::type::arithmetic_negation: { + node->get_child()->accept(this); + if (value_stack.empty()) { + report_empty_value_stack(node); + } else { + auto top = value_stack.back(); + value_stack.pop_back(); + auto temp = generate_temp_variable(); + blocks.back()->add_new_content(new lir::unary( + lir::unary::kind::op_neg, + top.to_inst_value(), + lir::inst_value_t::variable(temp), + node->get_child()->get_location() + )); + value_stack.push_back({ + data_kind::variable, + temp, + node->get_resolve() + }); + } + } + break; + case unary_operator::type::logical_negation: { + auto new_not = new lir::not_operand(node->get_location()); + blocks.back()->add_new_content(new_not); + auto new_blk = new lir::block(node->get_child()->get_location()); + new_not->set_body(new_blk); + blocks.push_back(new_blk); + node->get_child()->accept(this); + blocks.pop_back(); + } + break; + } + return true; +} + +bool ir_gen::visit_binary_operator(binary_operator* node) { + if (node->get_operator_type()==binary_operator::type::logical_and) { + auto new_and = new lir::and_operand(node->get_location()); + auto new_left_blk = new lir::block(node->get_left()->get_location()); + auto new_right_blk = new lir::block(node->get_right()->get_location()); + new_and->set_left(new_left_blk); + new_and->set_right(new_right_blk); + blocks.back()->add_new_content(new_and); + + blocks.push_back(new_left_blk); + node->get_left()->accept(this); + blocks.pop_back(); + + blocks.push_back(new_right_blk); + node->get_right()->accept(this); + blocks.pop_back(); + return true; + } + + if (node->get_operator_type()==binary_operator::type::logical_or) { + auto new_or = new lir::or_operand(node->get_location()); + auto new_left_blk = new lir::block(node->get_left()->get_location()); + auto new_right_blk = new lir::block(node->get_right()->get_location()); + new_or->set_left(new_left_blk); + new_or->set_right(new_right_blk); + blocks.back()->add_new_content(new_or); + + blocks.push_back(new_left_blk); + node->get_left()->accept(this); + blocks.pop_back(); + + blocks.push_back(new_right_blk); + node->get_right()->accept(this); + blocks.pop_back(); + return true; + } + + node->get_left()->accept(this); + node->get_right()->accept(this); + + const auto temp_var = generate_temp_variable(); + if (value_stack.empty()) { + report_empty_value_stack(node); + } + const auto r = value_stack.back(); + value_stack.pop_back(); + if (value_stack.empty()) { + report_empty_value_stack(node); + } + const auto l = value_stack.back(); + value_stack.pop_back(); + + switch(node->get_operator_type()) { + case binary_operator::type::add: + if (l.resolve.type==symbol::str() && r.resolve.type==symbol::str()) { + auto cat_call = new lir::call("string::add", node->get_location()); + cat_call->set_call_type(lir::call::kind::basic_method); + cat_call->set_return(lir::inst_value_t::variable(temp_var)); + cat_call->add_arg(l.to_inst_value()); + cat_call->add_arg(r.to_inst_value()); + blocks.back()->add_new_content(cat_call); + } else { + blocks.back()->add_new_content(new lir::binary( + l.to_inst_value(), + r.to_inst_value(), + lir::inst_value_t::variable(temp_var), + lir::binary::kind::op_add, + node->get_location() + )); + } + value_stack.push_back({data_kind::variable, temp_var, node->get_resolve()}); + break; + case binary_operator::type::compare_equal: + case binary_operator::type::in: + blocks.back()->add_new_content(new lir::compare( + l.to_inst_value(), + r.to_inst_value(), + lir::compare::kind::op_eq, + node->get_location() + )); + break; + case binary_operator::type::compare_great: + blocks.back()->add_new_content(new lir::compare( + l.to_inst_value(), + r.to_inst_value(), + lir::compare::kind::op_gt, + node->get_location() + )); + break; + case binary_operator::type::compare_great_equal: + blocks.back()->add_new_content(new lir::compare( + l.to_inst_value(), + r.to_inst_value(), + lir::compare::kind::op_ge, + node->get_location() + )); + break; + case binary_operator::type::compare_less: + blocks.back()->add_new_content(new lir::compare( + l.to_inst_value(), + r.to_inst_value(), + lir::compare::kind::op_lt, + node->get_location() + )); + break; + case binary_operator::type::compare_less_equal: + blocks.back()->add_new_content(new lir::compare( + l.to_inst_value(), + r.to_inst_value(), + lir::compare::kind::op_le, + node->get_location() + )); + break; + case binary_operator::type::compare_not_equal: + blocks.back()->add_new_content(new lir::compare( + l.to_inst_value(), + r.to_inst_value(), + lir::compare::kind::op_neq, + node->get_location() + )); + break; + case binary_operator::type::div: + blocks.back()->add_new_content(new lir::binary( + l.to_inst_value(), + r.to_inst_value(), + lir::inst_value_t::variable(temp_var), + lir::binary::kind::op_div, + node->get_location() + )); + value_stack.push_back({data_kind::variable, temp_var, node->get_resolve()}); + break; + case binary_operator::type::mult: + blocks.back()->add_new_content(new lir::binary( + l.to_inst_value(), + r.to_inst_value(), + lir::inst_value_t::variable(temp_var), + lir::binary::kind::op_mul, + node->get_location() + )); + value_stack.push_back({data_kind::variable, temp_var, node->get_resolve()}); + break; + case binary_operator::type::sub: + blocks.back()->add_new_content(new lir::binary( + l.to_inst_value(), + r.to_inst_value(), + lir::inst_value_t::variable(temp_var), + lir::binary::kind::op_sub, + node->get_location() + )); + value_stack.push_back({data_kind::variable, temp_var, node->get_resolve()}); + break; + default: break; + } + return true; +} + +bool ir_gen::visit_impl_block(impl_block* node) { + impl_schema_name = full_name(node->get_impl_schema_name()); + for(auto i : node->get_functions()) { + i->accept(this); + } + impl_schema_name = ""; + return true; +} + +bool ir_gen::has_self_typecheck_free_annotation(function_decl* node) { + for(auto i : node->get_annotations()) { + if (i->get_annotation()=="@self_typecheck_free") { + return true; + } + } + return false; +} + +void ir_gen::add_self_type_check() { + // note this line to enable self type check generation + auto typecheck = new lir::call( + "typecheck_" + impl_schema_name, + report::span::null() + ); + + blocks.back()->add_new_content(typecheck); + typecheck->add_arg(lir::inst_value_t::variable("self")); +} + +void ir_gen::add_parameter_type_check(function_decl* node) { + for(auto i : node->get_parameter_list()) { + const auto& type = i->get_resolve().type; + if (type==symbol::null()) { + continue; + } + + const auto index = ctx->global.get_index(type.full_path_name_without_set()); + if (index==global_symbol_table::npos) { + continue; + } + + if (ctx->global.get_kind(index)!=symbol_kind::schema) { + continue; + } + auto typecheck = new lir::call( + "typecheck_" + type.full_path_name_without_set(), + i->get_location() + ); + typecheck->add_arg({ + lir::inst_value_kind::variable, + i->get_var_name()->get_name() + }); + + blocks.back()->add_new_content(typecheck); + } +} + +bool ir_gen::check_need_parameter_type_check(function_decl* node) { + for(auto i : node->get_parameter_list()) { + const auto& type = i->get_resolve().type; + if (type==symbol::null()) { + continue; + } + const auto index = ctx->global.get_index(type.full_path_name_without_set()); + if (index==global_symbol_table::npos) { + continue; + } + if (ctx->global.get_kind(index)==symbol_kind::schema) { + return true; + } + } + return false; +} + +void ir_gen::not_data_constraint_func_decl(const std::string& function_name, + function_decl* node) { + // generate rule implementation + current_rule = new souffle_rule_impl( + "rule_" + function_name, + node->get_location() + ); + if (node->has_return_value() && + node->get_return_type()->get_full_name()!="bool") { + current_rule->add_param("result"); + } + for(auto i : node->get_parameter_list()) { + current_rule->add_param(i->get_var_name()->get_name()); + } + irc.rule_impls.push_back(current_rule); + + // push a new block + blocks.push_back(current_rule->get_block()); + // self type check flag + bool need_self_typecheck = + node->get_parameter_list().size() && + node->get_parameter_list().front()->get_var_name()->get_name()=="self" && + ( + !flag_enable_self_constraint_optimization || + ( + !has_self_typecheck_free_annotation(node) && + !node->can_disable_self_check_flag() + ) + ); + // parameter type check flag + bool need_parameter_typecheck = check_need_parameter_type_check(node); + // if need type check, generate a new block to wrap the code block up + if (need_self_typecheck || need_parameter_typecheck) { + // + // warp block ( + // a; b; c + // ), + // typecheck d, + // typecheck e, + // typecheck f + // + auto warp_block = new lir::block(node->get_location()); + warp_block->set_use_semicolon(); // this is the inside block + blocks.back()->set_use_comma(); + blocks.back()->add_new_content(warp_block); + blocks.push_back(warp_block); + } + // generate code block + if (node->get_code_block()) { + ssa_temp_id = 0; + for(auto i : node->get_code_block()->get_statement()) { + i->accept(this); + } + } + // generate type check + if (need_self_typecheck || need_parameter_typecheck) { + blocks.pop_back(); + if (need_self_typecheck) add_self_type_check(); + if (need_parameter_typecheck) add_parameter_type_check(node); + } + // pop code block + blocks.pop_back(); +} + +void ir_gen::data_constraint_func_decl(const std::string& function_name, + function_decl* node) { + // generate data constraint for schema + const auto sc_index = ctx->global.get_index(impl_schema_name); + const auto& sc = ctx->global.get_schema(sc_index); + + // get name of database type parameter + std::string database_param_name = ""; + for(auto i : node->get_parameter_list()) { + const auto type = i->get_resolve().type.full_path_name_without_set(); + const auto index = ctx->global.get_index(type); + const auto kind = ctx->global.get_kind(index); + if (kind==symbol_kind::database) { + database_param_name = i->get_var_name()->get_name(); + break; + } + } + + if (sc.referenced_by_database_table && + flag_ignore_do_schema_data_constraint) { + // DO schema's __all__ does not need to be generated to data constraint + ignored_DO_schema.push_back({ + impl_schema_name, + node->get_name()->get_location() + }); + } else { + current_rule = new souffle_rule_impl( + "schema_" + impl_schema_name, + node->get_location() + ); + current_rule->add_param("result"); + // add database name into parameter + current_rule->add_param( + database_param_name.empty()? + "[-1, -1]": + database_param_name + ); + // add field name into parameter, doing mangling + for(const auto& f : sc.ordered_fields) { + const auto type = sc.fields.at(f); + const auto name_mangled_field = field_name_mangling(f, type); + current_rule->add_param(name_mangled_field); + } + irc.rule_impls.push_back(current_rule); + + // generate data constraint impl + blocks.push_back(current_rule->get_block()); + // parameter typecheck + bool need_self_typecheck = + node->get_parameter_list().size() && + node->get_parameter_list().front()->get_var_name()->get_name()=="self"; + if (need_self_typecheck) { + auto new_block = new lir::block(node->get_location()); + new_block->set_use_semicolon(); + blocks.back()->set_use_comma(); + blocks.back()->add_new_content(new_block); + blocks.push_back(new_block); + } + if (node->get_code_block()) { + ssa_temp_id = 0; + for(auto i : node->get_code_block()->get_statement()) { + i->accept(this); + } + } + if (need_self_typecheck) { + blocks.pop_back(); + add_self_type_check(); + } + blocks.pop_back(); + } + + // generate this method as a rule implementation: + // rule_name(result, ...) :- schema_name(result, db, ...). + auto fn_impl = new souffle_rule_impl( + "rule_" + function_name, + node->get_location() + ); + fn_impl->add_param("result"); + for(auto i : node->get_parameter_list()) { + fn_impl->add_param(i->get_var_name()->get_name()); + } + auto call = new lir::call("schema_" + impl_schema_name, node->get_location()); + fn_impl->get_block()->add_new_content(call); + call->add_arg(lir::inst_value_t::variable("result")); + call->add_arg(database_param_name.empty()? + lir::inst_value_t::default_value(): + lir::inst_value_t::variable(database_param_name) + ); + for(size_t i = 0; iadd_arg(lir::inst_value_t::default_value()); + } + irc.rule_impls.push_back(fn_impl); +} + +bool ir_gen::visit_function_decl(function_decl* node) { + // check name if the function is in impl block of a schema + // if name is "__all__", then it is a data constraint + if (impl_schema_name.length() && node->get_name()->get_name()=="__all__") { + in_data_constraint = true; + } + + for(auto i : node->get_annotations()) { + if (i->get_annotation()=="@output") { + generate_annotated_output(i, node); + } + if (i->get_annotation()=="@input") { + generate_annotated_input(i, node); + } + } + + // generate function name + auto name = node->get_name()->get_name(); + if (name=="main" && impl_schema_name.empty()) { + generate_output(node->get_code_block()); + return true; + } + if (impl_schema_name.length()) { + name = impl_schema_name + "::" + name; + } else { + const auto& ns = godel_module::instance() + ->find_module_by_file(ctx->this_file_name); + name = (ns.length()? ns + "::":"") + name; + } + + if (!in_data_constraint) { + not_data_constraint_func_decl(name, node); + } else { + data_constraint_func_decl(name, node); + } + + in_data_constraint = false; + return true; +} + +bool ir_gen::visit_query_decl(query_decl* node) { + // generate full path name of the query + const auto sym = symbol({ + .type_name = node->get_name()->get_name(), + .type_loc = node->get_location() + }); + const auto name = sym.full_path_name(); + const auto index = ctx->global.get_index(name); + const auto& query_self = ctx->global.get_query(index); + + // generate query declaration rule + auto query_decl = new souffle_rule_decl("rule_" + name); + for(const auto& i : query_self.ordered_output_list) { + query_decl->add_param(i, + query_self.output_list.at(i).full_path_name_without_set() + ); + } + irc.rule_decls.push_back(query_decl); + irc.souffle_output.push_back("rule_" + name); + irc.souffle_output_real_name.insert({ + "rule_" + name, + name + }); + + // generate query implementation rule + auto query_impl = new souffle_rule_impl( + "rule_" + name, + node->get_location() + ); + for(const auto& i : query_self.ordered_output_list) { + query_impl->add_param(i); + } + query_impl->get_block()->set_use_comma(); + blocks.push_back(query_impl->get_block()); + // generate from list(let-eq) behind all statements + for(auto i : node->get_from_list()) { + if (i->get_init_value()->get_resolve().type.is_set) { + continue; + } + i->get_init_value()->accept(this); + if (value_stack.empty()) { + report_empty_value_stack(i->get_init_value()); + } + blocks.back()->add_new_content(new lir::store( + value_stack.back().to_inst_value(), + lir::inst_value_t::variable(i->get_var_name()->get_name()), + i->get_location() + )); + value_stack.pop_back(); + } + // generate condition + if (node->has_condition()) { + node->get_where_condition()->accept(this); + } + // generate select list + for(size_t i = 0; iget_select_list().size(); ++i) { + node->get_select_list()[i]->get_column_value()->accept(this); + if (value_stack.empty()) { + report_empty_value_stack(node->get_select_list()[i]); + } + blocks.back()->add_new_content(new lir::store( + value_stack.back().to_inst_value(), + lir::inst_value_t::variable(query_self.ordered_output_list[i]), + node->get_select_list()[i]->get_location() + )); + value_stack.pop_back(); + } + // generate from list(for-in) behind all statements + for(auto i : node->get_from_list()) { + if (!i->get_init_value()->get_resolve().type.is_set) { + continue; + } + i->get_init_value()->accept(this); + if (value_stack.empty()) { + report_empty_value_stack(i->get_init_value()); + } + blocks.back()->add_new_content(new lir::store( + value_stack.back().to_inst_value(), + lir::inst_value_t::variable(i->get_var_name()->get_name()), + i->get_location() + )); + value_stack.pop_back(); + } + blocks.pop_back(); + irc.rule_impls.push_back(query_impl); + return true; +} + +bool ir_gen::visit_block_stmt(block_stmt* node) { + if (node->get_statement().size()>1) { + auto new_test_blk = new lir::block(node->get_location()); + new_test_blk->set_use_semicolon(); + blocks.back()->add_new_content(new_test_blk); + blocks.push_back(new_test_blk); + } + for(auto i : node->get_statement()) { + i->accept(this); + } + if (node->get_statement().size()>1) { + blocks.pop_back(); + } + return true; +} + +lir::call* ir_gen::typecheck_defined_variable(const infer& infer_result, + const std::string& value) { + const auto name = infer_result.type.full_path_name_without_set(); + const auto index = ctx->global.get_index(name); + if (ctx->global.get_kind(index)!=symbol_kind::schema) { + return nullptr; + } + // generate typecheck for schema type variable + auto typecheck = new lir::call("typecheck_" + name, report::span::null()); + typecheck->add_arg(lir::inst_value_t::variable(value)); + return typecheck; +} + +void ir_gen::generate_definition(const std::vector& var, + std::vector& vec, + bool is_for_loop) { + for(auto i : var) { + i->get_init_value()->accept(this); + // check value stack, should not be empty + if (value_stack.empty()) { + // unreachable + report_compiler_bug(i, + "internal error: for/let statement should have a value on stack" + ); + continue; + } + // get value + const auto top = value_stack.back(); + value_stack.pop_back(); + // varibale name + const auto& dst = i->get_var_name()->get_name(); + // type check variable if defined in let statement + if (!is_for_loop) { + vec.push_back(typecheck_defined_variable( + i->get_init_value()->get_resolve(), + dst + )); + } + // store value to variable + blocks.back()->add_new_content(new lir::store( + top.to_inst_value(), + lir::inst_value_t::variable(dst), + i->get_location() + )); + } +} + +bool ir_gen::visit_for_stmt(for_stmt* node) { + auto new_test_blk = new lir::block(node->get_location()); + blocks.back()->add_new_content(new_test_blk); + blocks.push_back(new_test_blk); + if (flag_enable_for_optimization) { + optimized_for_stmt_gen(node); + } else { + unoptimized_for_stmt_gen(node); + } + blocks.pop_back(); + return true; +} + +void ir_gen::optimized_for_stmt_gen(for_stmt* node) { + if (node->get_code_block()) { + node->get_code_block()->accept(this); + } + // place initialization after the block to avoid join + // in most cases it works but some special cases it does not work + std::vector vec; + generate_definition(node->get_symbols(), vec, true); + for(auto i : vec) { + if (i) { + blocks.back()->add_new_content(i); + } + } +} + +void ir_gen::unoptimized_for_stmt_gen(for_stmt* node) { + std::vector vec; + generate_definition(node->get_symbols(), vec, true); + if (node->get_code_block()) { + node->get_code_block()->accept(this); + } + for(auto i : vec) { + if (i) { + blocks.back()->add_new_content(i); + } + } +} + +bool ir_gen::visit_let_stmt(let_stmt* node) { + auto new_test_blk = new lir::block(node->get_location()); + blocks.back()->add_new_content(new_test_blk); + blocks.push_back(new_test_blk); + if (flag_enable_let_optimization) { + optimized_let_stmt_gen(node); + } else { + unoptimized_let_stmt_gen(node); + } + blocks.pop_back(); + return true; +} + +void ir_gen::optimized_let_stmt_gen(let_stmt* node) { + if (node->get_code_block()) { + node->get_code_block()->accept(this); + } + // place initialization after the block to avoid join + // in most cases it works but some special cases it does not work + std::vector vec; + generate_definition(node->get_symbols(), vec, false); + for(auto i : vec) { + if (i) { + blocks.back()->add_new_content(i); + } + } +} + +void ir_gen::unoptimized_let_stmt_gen(let_stmt* node) { + std::vector vec; + generate_definition(node->get_symbols(), vec, false); + if (node->get_code_block()) { + node->get_code_block()->accept(this); + } + for(auto i : vec) { + if (i) { + blocks.back()->add_new_content(i); + } + } +} + +bool ir_gen::visit_cond_stmt(cond_stmt* node) { + node->get_if_stmt()->accept(this); + if (node->get_elsif_stmt().size() || node->has_else_stmt()) { + err.err(node->get_location(), + "elsif or else clause is not allowed to be translated to souffle.", + "please use if clause instead." + ); + return false; + } + return true; +} + +bool ir_gen::visit_if_stmt(if_stmt* node) { + auto new_test_blk = new lir::block(node->get_location()); + blocks.back()->add_new_content(new_test_blk); + blocks.push_back(new_test_blk); + if (node->get_condition()) { + node->get_condition()->accept(this); + } + if (node->get_code_block()) { + node->get_code_block()->accept(this); + } + blocks.pop_back(); + return true; +} + +bool ir_gen::visit_ret_stmt(ret_stmt* node) { + auto new_blk = new lir::block(node->get_location()); + blocks.back()->add_new_content(new_blk); + blocks.push_back(new_blk); + + node->get_return_value()->accept(this); + + // do not generate anything if the resolved return type is boolean + if (node->get_return_value()->get_resolve().type==symbol::boolean()) { + blocks.pop_back(); + return true; + } + + // by default, the return value will be stored into `result` variable + if (value_stack.size()) { + blocks.back()->add_new_content(new lir::store( + value_stack.back().to_inst_value(), + lir::inst_value_t::variable("result"), + node->get_return_value()->get_location() + )); + value_stack.pop_back(); + } + blocks.pop_back(); + return true; +} + +bool ir_gen::visit_fact_data(fact_data* node) { + auto new_fact = new lir::fact(node->get_location()); + blocks.back()->add_new_content(new_fact); + + const auto& params = current_rule->get_params(); + size_t c = 0; + for(auto i : node->get_literals()) { + i->accept(this); + + // value stack should not be empty + if (value_stack.empty()) { + report_empty_value_stack(node); + break; + } + + // size of literal list should match with rule's parameter + if (c>=params.size()) { + report_compiler_bug(node, + "fatal error occurred when generating fact data." + ); + break; + } + + // inst_value_t here should be inst_value_t::variable + new_fact->add_pair(params[c], value_stack.back().to_inst_value()); + value_stack.pop_back(); + c++; + } + return true; +} + +bool ir_gen::visit_fact_stmt(fact_stmt* node) { + auto new_blk = new lir::block(node->get_location()); + new_blk->set_use_semicolon(); + blocks.back()->add_new_content(new_blk); + blocks.push_back(new_blk); + for(auto i : node->get_facts()) { + i->accept(this); + } + blocks.pop_back(); + return true; +} + +bool ir_gen::visit_match_pair(match_pair* node) { + auto new_block = new lir::block(node->get_location()); + blocks.back()->add_new_content(new_block); + blocks.push_back(new_block); + + const auto stack_top = value_stack.back(); + node->get_literal()->accept(this); + blocks.back()->add_new_content(new lir::store( + value_stack.back().to_inst_value(), + stack_top.to_inst_value(), + node->get_literal()->get_location() + )); + + // pop stack_top + value_stack.pop_back(); + node->get_statement()->accept(this); + + blocks.pop_back(); + return true; +} + +bool ir_gen::visit_match_stmt(match_stmt* node) { + auto new_match_uplevel_block = new lir::block(node->get_location()); + blocks.back()->add_new_content(new_match_uplevel_block); + blocks.push_back(new_match_uplevel_block); + node->get_match_condition()->accept(this); + + // generate match block + auto new_match_block = new lir::block(node->get_location()); + new_match_block->set_use_semicolon(); + blocks.back()->add_new_content(new_match_block); + blocks.push_back(new_match_block); + for(auto i : node->get_match_pair_list()) { + i->accept(this); + } + // pop match block + blocks.pop_back(); + // pop match uplevel block + blocks.pop_back(); + return true; +} + +bool ir_gen::visit_in_block_expr(in_block_expr* node) { + // should be unreachable + // will be reported in semantic analysis: ungrounded check + // but we add this report here to avoid unexpected error + // because we will change ungrounded check pass from ast-check to lir-check + // in future versions + report_compiler_bug(node, "return value is not grounded."); + + auto new_blk = new lir::block(node->get_location()); + blocks.back()->add_new_content(new_blk); + blocks.push_back(new_blk); + node->get_expr()->accept(this); + blocks.pop_back(); + return true; +} + +bool ir_gen::visit_call_head(call_head* node) { + if (node->get_first_expression()->get_ast_class()==ast_class::ac_identifier) { + auto fe = node->get_first_expression(); + auto id = reinterpret_cast(fe); + generate_symbol_call(id); + } else { + node->get_first_expression()->accept(this); + } + if (node->has_func_call()) { + if (path_infer_result.kind==path_kind::schema) { + func_stack.push_back({ + func_kind::function, + "rule_" + path_infer_result.content + "::__all__", + node->get_resolve() + }); + } + node->get_func_call()->accept(this); + } + if (node->is_initializer()) { + node->get_initializer()->accept(this); + } + field_infer_result = node->get_resolve(); + return true; +} + +bool ir_gen::visit_call_expr(call_expr* node) { + switch (node->get_call_type()) { + case call_expr::type::get_field: get_field_from_infer(node); break; + case call_expr::type::get_path: get_path_from_infer(node); break; + } + if (node->is_generic()) { + node->get_generic_type()->accept(this); + } + if (node->has_func_call()) { + if (path_infer_result.kind==path_kind::schema) { + func_stack.push_back({ + func_kind::function, + "rule_" + path_infer_result.content + "::__all__", + node->get_resolve() + }); + } + node->get_func_call()->accept(this); + } + if (node->is_initializer()) { + node->get_initializer()->accept(this); + } + field_infer_result = node->get_resolve(); + return true; +} + +bool ir_gen::visit_call_root(call_root* node) { + path_infer_result = {path_kind::null, ""}; + field_infer_result = infer::null(); + + // if the call chain has aggregator, we need to handle it specially + bool flag_call_chain_has_aggregator = false; + for(auto i : node->get_call_chain()) { + if (i->is_aggregator() && !i->is_aggregator_find()) { + flag_call_chain_has_aggregator = true; + break; + } + } + if (flag_call_chain_has_aggregator) { + handle_aggregator_call_root(node); + return true; + } + + node->get_call_head()->accept(this); + for(auto i : node->get_call_chain()) { + if (i->get_call_type()==call_expr::type::get_field) { + path_infer_result = {path_kind::null, ""}; + } + i->accept(this); + } + return true; +} + +void ir_gen::generate_aggregator(const std::vector& chain, + call_head* head, + call_expr* aggregator_node) { + std::vector subchain; + call_expr* mark_node = nullptr; + // find the last aggregator in the call chain + for(auto i = chain.rbegin(); i!=chain.rend(); ++i) { + if ((*i)->is_aggregator() && !(*i)->is_aggregator_find()) { + mark_node = *i; + break; + } + } + // if there is no aggregator, doing generation + if (!mark_node) { + auto new_block = new lir::block(head->get_location()); + blocks.push_back(new_block); + head->accept(this); + for(auto i : chain) { + if (i->get_call_type()==call_expr::type::get_field) { + path_infer_result = {path_kind::null, ""}; + } + i->accept(this); + } + blocks.pop_back(); + + auto temp_var = generate_temp_variable(); + const auto& name = aggregator_node->get_field_name()->get_name(); + auto new_aggr_call = new lir::aggregator( + name, + name!="len"? value_stack.back().to_inst_value():lir::inst_value_t::null(), + lir::inst_value_t::variable(temp_var), + aggregator_node->get_location() + ); + // aggregator has a return value, pop it from the stack + value_stack.pop_back(); + value_stack.push_back({ + data_kind::variable, + temp_var, + aggregator_node->get_resolve() + }); + new_aggr_call->set_body(new_block); + blocks.back()->add_new_content(new_aggr_call); + field_infer_result = aggregator_node->get_resolve(); + return; + } + // otherwise, generate the call chain + for(auto i : chain) { + if (i==mark_node) { + break; + } + subchain.push_back(i); + } + // and generate the sub chain's aggregator + generate_aggregator(subchain, head, mark_node); + + // then generate the outer aggregator + auto new_block = new lir::block(aggregator_node->get_location()); + blocks.push_back(new_block); + bool reached_mark_node = false; + for(auto i : chain) { + if (i->get_call_type()==call_expr::type::get_field) { + path_infer_result = {path_kind::null, ""}; + } + if (reached_mark_node) { + i->accept(this); + } + if (i==mark_node) { + reached_mark_node = true; + } + } + blocks.pop_back(); + + auto temp_var = generate_temp_variable(); + const auto& name = aggregator_node->get_field_name()->get_name(); + auto new_aggr_call = new lir::aggregator( + name, + name!="len"? value_stack.back().to_inst_value():lir::inst_value_t::null(), + lir::inst_value_t::variable(temp_var), + aggregator_node->get_location() + ); + // aggregator has a return value, pop it from the stack + value_stack.pop_back(); + value_stack.push_back({ + data_kind::variable, + temp_var, + aggregator_node->get_resolve() + }); + new_aggr_call->set_body(new_block); + blocks.back()->add_new_content(new_aggr_call); + field_infer_result = aggregator_node->get_resolve(); +} + +void ir_gen::handle_aggregator_call_root(call_root* node) { + std::vector chain; + call_expr* mark_node = nullptr; + // find the last aggregator in the call chain + for(auto i = node->get_call_chain().rbegin(); i!=node->get_call_chain().rend(); ++i) { + if ((*i)->is_aggregator()) { + mark_node = *i; + break; + } + } + // if there is no aggregator, just return + if (!mark_node) { + return; + } + // this condition is unreachable, but we add it anyway for safety + if (mark_node->is_aggregator_find()) { + report_compiler_bug(mark_node, + "recognized aggregator find as souffle aggregator." + ); + return; + } + // otherwise, generate the call chain + for(auto i : node->get_call_chain()) { + if (i==mark_node) { + break; + } + chain.push_back(i); + } + // and generate the aggregator + generate_aggregator(chain, node->get_call_head(), mark_node); + + // generate remained call chain + bool reach_mark_node = false; + for(auto i : node->get_call_chain()) { + if (i==mark_node) { + reach_mark_node = true; + continue; + } + if (!reach_mark_node) { + continue; + } + i->accept(this); + } +} + +bool ir_gen::visit_func_call(func_call* node) { + for(auto i : node->get_arguments()) { + i->accept(this); + } + if (func_stack.empty()) { + report_empty_func_stack(node); + return true; + } + + // do lowering for `S.to()`, for example: + // + // src.to() + // => + // (typecheck_Dst(src), ssa_temp_0 = src) + // + if (func_stack.back().kind==func_kind::schema_to) { + auto schema_to_block = new lir::block(node->get_location()); + auto typecheck_call = new lir::call( + "typecheck_" + replace_colon(func_stack.back().generic_type), + node->get_location() + ); + const auto source = value_stack.back(); + value_stack.pop_back(); + + typecheck_call->set_call_type(lir::call::kind::function); + typecheck_call->add_arg(source.to_inst_value()); + + schema_to_block->add_new_content(typecheck_call); + blocks.back()->add_new_content(schema_to_block); + func_stack.pop_back(); + + const auto temp_var = generate_temp_variable(); + value_stack.push_back({data_kind::variable, temp_var, node->get_resolve()}); + schema_to_block->add_new_content(new lir::store( + source.to_inst_value(), + lir::inst_value_t::variable(temp_var), + node->get_location() + )); + return true; + } + + // do lowering for `S.is()`, for example: + // + // src.is() + // => + // typecheck_Dst(src) + // + if (func_stack.back().kind==func_kind::schema_is) { + auto typecheck_call = new lir::call( + "typecheck_" + replace_colon(func_stack.back().generic_type), + node->get_location() + ); + const auto source = value_stack.back(); + value_stack.pop_back(); + + typecheck_call->set_call_type(lir::call::kind::function); + typecheck_call->add_arg(source.to_inst_value()); + blocks.back()->add_new_content(typecheck_call); + return true; + } + + auto call = new lir::call(func_stack.back().content, node->get_location()); + switch(func_stack.back().kind) { + case func_kind::function: call->set_call_type(lir::call::kind::function); break; + case func_kind::database_load: call->set_call_type(lir::call::kind::database_load); break; + case func_kind::basic_static: call->set_call_type(lir::call::kind::basic_static); break; + case func_kind::method: call->set_call_type(lir::call::kind::method); break; + case func_kind::find: call->set_call_type(lir::call::kind::find); break; + case func_kind::key_cmp: call->set_call_type(lir::call::kind::key_cmp); break; + case func_kind::to_set: call->set_call_type(lir::call::kind::to_set); break; + case func_kind::basic_method: call->set_call_type(lir::call::kind::basic_method); break; + default: report_compiler_bug(node, "unknown function kind."); break; + } + // generate arguments(from value stack) of the call + switch(func_stack.back().kind) { + case func_kind::function: + case func_kind::database_load: + case func_kind::basic_static: generate_function_call(node, call); break; + case func_kind::method: + case func_kind::find: + case func_kind::key_cmp: + case func_kind::to_set: + case func_kind::basic_method: generate_method_call(node, call); break; + default: report_compiler_bug(node, "unknown function kind."); break; + } + func_stack.pop_back(); + + if (node->get_resolve().type!=symbol::boolean()) { + const auto temp_var = generate_temp_variable(); + call->set_return(lir::inst_value_t::variable(temp_var)); + value_stack.push_back({data_kind::variable, temp_var, node->get_resolve()}); + } + blocks.back()->add_new_content(call); + return true; +} + +void ir_gen::generate_field_pair( + initializer_pair* node, + std::unordered_map& fields) { + + node->get_field_value()->accept(this); + fields.insert({node->get_field_name()->get_name(), value_stack.back()}); + value_stack.pop_back(); +} + +void ir_gen::generate_spread_expr( + spread_expr* node, + std::unordered_map& fields) { + + node->get_child()->accept(this); + + const auto& infer_result = node->get_resolve(); + const auto full_name = infer_result.type.full_path_name_without_set(); + const auto index = ctx->global.get_index(full_name); + const auto& infer_schema = ctx->global.get_schema(index); + + for(const auto& field : infer_schema.ordered_fields) { + const auto name = "get_field_" + full_name + "_" + field; + + // generate call + auto call = new lir::call(name, node->get_location()); + call->set_call_type(lir::call::kind::method); + call->add_arg(value_stack.back().to_inst_value()); + + // generate call result + const auto result = generate_temp_variable(); + call->set_return(lir::inst_value_t::variable(result)); + blocks.back()->add_new_content(call); + + fields.insert({ + field, + value_data { + data_kind::variable, + result, + infer {false, infer_schema.fields.at(field)} + } + }); + } + + value_stack.pop_back(); +} + +bool ir_gen::visit_initializer(initializer* node) { + std::unordered_map fields; + for(auto i : node->get_field_pairs()) { + generate_field_pair(i, fields); + } + for(auto i : node->get_spread_exprs()) { + generate_spread_expr(i, fields); + } + + // get schema + const auto index = ctx->global.get_index( + node->get_resolve().type.full_path_name_without_set() + ); + const auto& sc = ctx->global.get_schema(index); + + // check this initializer is initializing itself, + // otherwise this initializer will not be used as a data constraint + bool self_initializer = node->get_resolve() + .type + .full_path_name_without_set()==impl_schema_name; + if (!in_data_constraint || !self_initializer) { + // generate result temp variavle + const auto temp_var = generate_temp_variable(); + value_stack.push_back({ + data_kind::variable, + temp_var, + node->get_resolve() + }); + auto ctor = new lir::constructor( + node->get_resolve().type.full_path_name_without_set(), + lir::inst_value_t::variable(temp_var), + node->get_location() + ); + for(const auto& i : sc.ordered_fields) { + ctor->add_field(fields.at(i).to_inst_value()); + } + blocks.back()->add_new_content(ctor); + } else { + // generate construct code in data constraint block + for(const auto& f : sc.ordered_fields) { + const auto name_mangled_field = field_name_mangling(f, sc.fields.at(f)); + blocks.back()->add_new_content(new lir::store( + fields.at(f).to_inst_value(), + lir::inst_value_t::variable(name_mangled_field), + node->get_location() + )); + } + // generate result variable + if (sc.has_primary_key()) { + const auto& key = sc.get_primary_key(); + const auto name_mangled_field = field_name_mangling(key, sc.fields.at(key)); + blocks.back()->add_new_content(new lir::store( + lir::inst_value_t::variable(name_mangled_field), + lir::inst_value_t::variable("result"), + node->get_location() + )); + } else { + auto record = new lir::record( + lir::inst_value_t::variable("result"), + node->get_location() + ); + blocks.back()->add_new_content(record); + for(const auto& f : sc.ordered_fields) { + record->add_field(fields.at(f).to_inst_value()); + } + } + } + return true; +} + +void ir_gen::generate(const cli::configure& config, ast_root* root) { + // for/let optimization switch + flag_enable_for_optimization = config.count(cli::option::cli_enable_for_opt); + flag_enable_let_optimization = config.count(cli::option::cli_enable_let_opt); + // self constraint switch + flag_enable_self_constraint_optimization = config.count(cli::option::cli_enable_self_constraint_opt); + if (config.count(cli::option::cli_disable_do_schema_opt)) { + flag_ignore_do_schema_data_constraint = false; + } + + // souffle result output format + if (config.count(cli::option::cli_souffle_json_output)) { + irc.json_output_path = config.at(cli::option::cli_souffle_json_output); + } + if (config.count(cli::option::cli_souffle_csv_output)) { + irc.csv_output_path = config.at(cli::option::cli_souffle_csv_output); + } + if (config.count(cli::option::cli_souffle_sqlite_output)) { + irc.sqlite_output_path = config.at(cli::option::cli_souffle_sqlite_output); + } + // count chosen output format + size_t output_kind_count = config.count(cli::option::cli_souffle_json_output) + + config.count(cli::option::cli_souffle_csv_output) + + config.count(cli::option::cli_souffle_sqlite_output); + // only one output format is allowed now + if (output_kind_count>1) { + auto output_kind = std::string("too many souffle output kind specified:"); + if (config.count(cli::option::cli_souffle_json_output)) { + output_kind += " json"; + } + if (config.count(cli::option::cli_souffle_csv_output)) { + output_kind += " csv"; + } + if (config.count(cli::option::cli_souffle_sqlite_output)) { + output_kind += " sqlite"; + } + output_kind += "."; + err.err(output_kind); + } + + // generate type alias, declarations + // and automatically generated rules (not written by user) + emit_type_alias_for_database(); + emit_type_alias_for_enum(); + emit_type_alias_for_schema(); + emit_database_get_table_decl(); + emit_schema_decl(); + emit_schema_type_check(); + emit_schema_get_field(); + emit_DO_schema_default_constructor(); + emit_func_decl(); + + // generate rule implementation (written by user) + ignored_DO_schema.clear(); + root->accept(this); + + report_ignored_DO_schema_data_constraint(); +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/ir_gen.h b/godel-script/godel-frontend/src/ir/ir_gen.h new file mode 100644 index 00000000..a647ccda --- /dev/null +++ b/godel-script/godel-frontend/src/ir/ir_gen.h @@ -0,0 +1,267 @@ +#pragma once + +#include "godel-frontend/src/cli.h" +#include "godel-frontend/src/ir/ir_context.h" +#include "godel-frontend/src/ir/lir.h" +#include "godel-frontend/src/ast/ast_visitor.h" +#include "godel-frontend/src/sema/context.h" +#include "godel-frontend/src/symbol.h" +#include "godel-frontend/src/package/package.h" +#include "godel-frontend/src/error/error.h" + +#include +#include + +namespace godel { + +using package::godel_module; + +class ir_gen: public ast_visitor { +private: + report::error& err; + bool flag_enable_for_optimization = false; + bool flag_enable_let_optimization = false; + bool flag_enable_self_constraint_optimization = false; + bool flag_ignore_do_schema_data_constraint = true; + +private: + inline static ir_context irc; + souffle_rule_impl* current_rule = nullptr; + std::vector blocks; + const context* ctx; + + bool in_data_constraint = false; + std::string impl_schema_name = ""; + +private: + void report_compiler_bug(ast_node* node, const std::string& msg) { + err.err(node->get_location(), + "internal compiler error: " + msg, + "this is a compiler bug, please report it to us." + ); + } + +private: + void emit_type_alias_for_database(); + void emit_type_alias_for_schema_with_primary_key(const schema&); + void emit_type_alias_for_schema_without_primary_key(const schema&); + void emit_type_alias_for_schema(); + void emit_type_alias_for_enum(); + +private: + void emit_used_database_input_decl(const std::string&, + const std::string&, + const std::string&, + const schema&); + void emit_used_database_get_table_impl(const std::string&, + const std::string&, + const std::string&, + const std::string&, + const std::string&, + const schema&); + void emit_schema_data_constraint_impl(const std::string&, + const std::string&, + const std::string&, + const std::string&, + const schema&); + void emit_used_database(const std::string&, const std::string&); + void emit_database_get_table_decl(); + void emit_schema_decl(); + void emit_schema_method_decl(const function&, const std::string&); + void emit_schema_inherit_method(const schema&, + const function&, + const std::string&); + void emit_schema_type_check(); + void emit_schema_type_check_decl(const symbol&); + void emit_schema_type_check_impl(const symbol&, const schema&); + void emit_schema_get_field(); + void emit_DO_schema_default_constructor(); + void emit_func_decl(); + std::string full_name(identifier* node) const { + return symbol({ + .type_name = node->get_name(), + .type_loc = node->get_location() + }).full_path_name(); + } + +private: + bool visit_use_stmt(use_stmt*) override { return true; } + bool visit_database_decl(database_decl*) override { return true; } + bool visit_schema_decl(schema_decl*) override { return true; } + +private: + enum class path_kind { + null = 0, // mark this infer is null + database, // global symbol: database + enumerate, // global symbol: enumeration + schema, // global symbol: schema + basic, // global symbol: basic type + module_path // global symbol: module + }; + struct infer_data { + path_kind kind = path_kind::null; + std::string content = ""; + bool is_null() const { return kind==path_kind::null; } + }; + // record infer result of get path + infer_data path_infer_result; + + // methods for generating symbol path + void generate_basic_symbol(identifier*); + void generate_database_symbol(identifier*); + void generate_schema_symbol(identifier*); + void generate_enum_symbol(identifier*); + void generate_function_symbol(identifier*); + void generate_package_symbol(identifier*); + // used in call_head when type is identifier* + void generate_symbol_call(identifier*); + + void get_path_from_basic(call_expr*); + void get_path_from_database(call_expr*); + void get_path_from_schema(call_expr*); + void get_path_from_enum(call_expr*); + void get_path_from_package(call_expr*); + void get_path_from_infer(call_expr*); + +private: + // record infer result of get field + infer field_infer_result = infer::null(); + + void get_field_from_database(call_expr*); + void get_field_from_schema(call_expr*); + void get_field_from_basic(call_expr*); + void get_field_from_infer(call_expr*); + +private: + enum class data_kind { + null = 0, + literal, // integer literal + variable, // generated or declared variable + }; + struct value_data { + data_kind kind; + std::string content; + infer resolve; + + lir::inst_value_t to_inst_value() const; + }; + std::vector value_stack; + void report_empty_value_stack(ast_node* node) { + err.err(node->get_location(), + "internal compiler error: value stack is empty.", + "this is a compiler bug, please report it to us." + ); + } + +private: + enum class func_kind { + null = 0, + function, // function (static call) + method, // schema method (method call) + database_load, // database loader static method (static call) + find, // aggregator find (method call) + key_cmp, // key_eq & key_neq method (method call) + to_set, // to_set method (method call) + basic_method, // basic method (method call) + basic_static, // basic static method (static call) + schema_is, // schema is method (method call) + schema_to // schema to method (method call) + }; + struct func_data { + func_kind kind; + std::string content; + infer resolve = infer::null(); + // used in is/to method + std::string generic_type = ""; + }; + std::vector func_stack; + void report_empty_func_stack(ast_node* node) { + err.err(node->get_location(), + "internal compiler error: function stack is empty.", + "this is a compiler bug, please report it to us." + ); + } + +private: + size_t ssa_temp_id = 0; + std::string generate_temp_variable() { + return "ssa_temp_" + std::to_string(ssa_temp_id++); + } + +private: + void generate_single_output(call_root*); + void generate_output(block_stmt*); + bool is_stdout_output_annotation(annotation*); + void generate_annotated_output(annotation*, function_decl*); + void generate_annotated_input(annotation*, function_decl*); + +private: + void generate_function_call(func_call*, lir::call*); + void generate_method_call(func_call*, lir::call*); + +private: + std::vector> ignored_DO_schema; + void report_ignored_DO_schema_data_constraint(); + +private: + bool visit_number_literal(number_literal*) override; + bool visit_string_literal(string_literal*) override; + bool visit_unary_operator(unary_operator*) override; + bool visit_binary_operator(binary_operator*) override; + bool visit_boolean_literal(boolean_literal*) override; + bool visit_impl_block(impl_block*) override; + + bool has_self_typecheck_free_annotation(function_decl*); + void add_self_type_check(); + void add_parameter_type_check(function_decl*); + bool check_need_parameter_type_check(function_decl*); + void not_data_constraint_func_decl(const std::string&, function_decl*); + void data_constraint_func_decl(const std::string&, function_decl*); + bool visit_function_decl(function_decl*) override; + bool visit_query_decl(query_decl*) override; + bool visit_block_stmt(block_stmt*) override; + + lir::call* typecheck_defined_variable(const infer&, const std::string&); + void generate_definition(const std::vector&, + std::vector&, + bool); + bool visit_for_stmt(for_stmt*) override; + void optimized_for_stmt_gen(for_stmt*); + void unoptimized_for_stmt_gen(for_stmt*); + bool visit_let_stmt(let_stmt*) override; + void optimized_let_stmt_gen(let_stmt*); + void unoptimized_let_stmt_gen(let_stmt*); + + bool visit_cond_stmt(cond_stmt*) override; + bool visit_if_stmt(if_stmt*) override; + bool visit_ret_stmt(ret_stmt*) override; + bool visit_fact_data(fact_data*) override; + bool visit_fact_stmt(fact_stmt*) override; + bool visit_match_pair(match_pair*) override; + bool visit_match_stmt(match_stmt*) override; + bool visit_in_block_expr(in_block_expr*) override; + bool visit_call_head(call_head*) override; + bool visit_call_expr(call_expr*) override; + bool visit_call_root(call_root*) override; + void generate_aggregator(const std::vector&, + call_head*, + call_expr*); + // handle all default aggregators except `*T.find` + void handle_aggregator_call_root(call_root*); + bool visit_func_call(func_call*) override; + +public: + void generate_field_pair(initializer_pair*, + std::unordered_map&); + void generate_spread_expr(spread_expr*, + std::unordered_map&); + bool visit_initializer(initializer*) override; + +public: + ir_gen(const context* c, report::error& e): err(e), ctx(c) {} + static auto& get_mutable_context() { return irc; } + static const auto& get_context() { return irc; } + void generate(const cli::configure&, ast_root*); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/lir.cpp b/godel-script/godel-frontend/src/ir/lir.cpp new file mode 100644 index 00000000..e9e36245 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/lir.cpp @@ -0,0 +1,422 @@ +#include "godel-frontend/src/ir/lir.h" + +#include +#include +#include + +namespace godel { + +std::string replace_colon(const std::string& input) { + auto result = input; + auto colon_pos = result.find(':'); + while(colon_pos!=std::string::npos) { + result.replace(colon_pos, 1, "_"); + colon_pos = result.find(':', colon_pos+1); + } + return result; +} + +namespace lir { + +std::ostream& operator<<(std::ostream& os, const inst_value_t& ivt) { + os << ivt.content; + return os; +} + +void inst_visitor::visit_block(block* i) { + for(auto s : i->get_content()) { + s->accept(this); + } +} + +void inst_visitor::visit_not_operand(not_operand* i) { + i->get_body()->accept(this); +} + +void inst_visitor::visit_or_operand(or_operand* i) { + i->get_left_block()->accept(this); + i->get_right_block()->accept(this); +} + +void inst_visitor::visit_and_operand(and_operand* i) { + i->get_left_block()->accept(this); + i->get_right_block()->accept(this); +} + +void inst_visitor::visit_aggregator(aggregator* i) { + i->get_body()->accept(this); +} + +void boolean::dump(std::ostream& os, const std::string& indent) const { + os << indent << (flag? "1 = 1":"1 = 0"); +} + +void store::dump(std::ostream& os, const std::string& indent) const { + os << indent << destination << " = " << source; +} + +void call::generate_key_cmp(std::ostream& os) const { + if (function_name=="key_eq") { + os << arguments[0] << " = " << arguments[1]; + } else { + os << arguments[0] << " != " << arguments[1]; + } +} + +void call::generate_schema_to_set(std::ostream& os) const { + os << destination << " = " << arguments[0]; +} + +void call::generate_database_load(std::ostream& os) const { + os << destination << " = " << arguments[0]; +} + +void call::generate_int_basic_method(std::ostream& os) const { + switch(int_basic_methods.at(function_name)) { + case int_method_kind::int_add: + os << destination << " = " << arguments[0] << " + " << arguments[1]; + break; + case int_method_kind::int_sub: + os << destination << " = " << arguments[0] << " - " << arguments[1]; + break; + case int_method_kind::int_div: + os << destination << " = " << arguments[0] << " / " << arguments[1]; + break; + case int_method_kind::int_mul: + os << destination << " = " << arguments[0] << " * " << arguments[1]; + break; + case int_method_kind::int_rem: + os << destination << " = " << arguments[0] << " % " << arguments[1]; + break; + case int_method_kind::int_pow: + os << destination << " = " << arguments[0] << " ^ " << arguments[1]; + break; + case int_method_kind::int_bitand: + os << destination << " = " << arguments[0] << " band " << arguments[1]; + break; + case int_method_kind::int_bitor: + os << destination << " = " << arguments[0] << " bor " << arguments[1]; + break; + case int_method_kind::int_bitxor: + os << destination << " = " << arguments[0] << " bxor " << arguments[1]; + break; + case int_method_kind::int_bitnot: + os << destination << " = bnot " << arguments[0]; break; + case int_method_kind::int_neg: + os << destination << " = - " << arguments[0]; break; + case int_method_kind::int_eq: + os << arguments[1] << " = " << arguments[0]; break; + case int_method_kind::int_ne: + os << arguments[1] << " != " << arguments[0]; break; + case int_method_kind::int_lt: + os << arguments[1] << " < " << arguments[0]; break; + case int_method_kind::int_le: + os << arguments[1] << " <= " << arguments[0]; break; + case int_method_kind::int_gt: + os << arguments[1] << " > " << arguments[0]; break; + case int_method_kind::int_ge: + os << arguments[1] << " >= " << arguments[0]; break; + case int_method_kind::int_to_string: + os << destination << " = to_string(" << arguments[0] << ")"; break; + case int_method_kind::int_to_set: + os << destination << " = " << arguments[0]; break; + } +} + +void call::generate_string_basic_method(std::ostream& os) const { + switch(string_basic_methods.at(function_name)) { + case string_method_kind::string_substr: + os << destination << " = substr("; + os << arguments[0] << ", " << arguments[1] << ", " << arguments[2] << ")"; + break; + case string_method_kind::string_get_regex_match_result: + os << destination << " = @godel_lang_builtin_string_getMatchResult("; + os << arguments[0] << ", " << arguments[1] << ", " << arguments[2] << ")"; + break; + case string_method_kind::string_matches: + os << "match("; + os << arguments[1] << ", " << arguments[0] << ")"; + break; + case string_method_kind::string_contains: + os << "contains("; + os << arguments[1] << ", " << arguments[0] << ")"; + break; + case string_method_kind::string_add: + os << destination << " = cat("; + os << arguments[0] << ", " << arguments[1] << ")"; + break; + case string_method_kind::string_len: + os << destination << " = strlen(" << arguments[0] << ")"; break; + case string_method_kind::string_to_int: + os << destination << " = to_number(" << arguments[0] << ")"; break; + case string_method_kind::string_eq: + os << arguments[1] << " = " << arguments[0]; break; + case string_method_kind::string_ne: + os << arguments[1] << " != " << arguments[0]; break; + case string_method_kind::string_to_set: + os << destination << " = " << arguments[0]; break; + case string_method_kind::string_to_upper: + os << destination << " = @godel_lang_builtin_string_to_upper("; + os << arguments[0] << ")"; + break; + case string_method_kind::string_to_lower: + os << destination << " = @godel_lang_builtin_string_to_lower("; + os << arguments[0] << ")"; + break; + case string_method_kind::string_replace_all: + os << destination << " = @godel_lang_builtin_string_replace_all("; + os << arguments[0] << ", " << arguments[1] << ", " << arguments[2] << ")"; + break; + case string_method_kind::string_replace_once: + os << destination << " = @godel_lang_builtin_string_replace_once("; + os << arguments[0] << ", " << arguments[1] << ", "; + os << arguments[2] << ", " << arguments[3] << ")"; + break; + } +} + +void call::generate_basic_method(std::ostream& os) const { + if (int_basic_methods.count(function_name)) { + generate_int_basic_method(os); + return; + } + if (string_basic_methods.count(function_name)) { + generate_string_basic_method(os); + return; + } + + std::cerr << "unimplemented basic method: " << function_name; + std::cerr << ", please report a bug to us.\n"; + assert(false && "unimplemented"); +} + +void call::generate_basic_static(std::ostream& os) const { + // record undetermined variable, use singleton in souffle + static size_t undetermined_all_counter = 0; + + if (function_name=="int::range") { + os << destination << " = range("; + os << arguments[0] << ", " << arguments[1] << ")"; + return; + } + if (function_name=="int::__undetermined_all__" || + function_name=="string::__undetermined_all__") { + os << destination << " = __undetermined_all__"; + os << undetermined_all_counter; + undetermined_all_counter++; + return; + } + + std::cerr << "unimplemented basic static: " << function_name; + std::cerr << ", please report a bug to us.\n"; + assert(false && "unimplemented"); +} + +void call::generate_find(std::ostream& os) const { + // only `*T.find` is generated here, others are handled in lir::aggregator + // + // class_instance_set.find(class_or_interface_instance) -> + // + // souffle: ( + // destination = class_instance_set, + // class_instance_set = class_or_interface_instance + // ) + // + if (function_name=="find") { + os << "(" << destination << " = " << arguments[0] << ", "; + os << arguments[0] << " = " << arguments[1] << ")"; + return; + } + + std::cerr << "unimplemented find method: " << function_name; + std::cerr << ", please report a bug to us.\n"; + assert(false && "unimplemented"); +} + +void call::dump(std::ostream& os, const std::string& indent) const { + os << indent; + switch(type) { + case kind::database_load: generate_database_load(os); return; + case kind::find: generate_find(os); return; + case kind::key_cmp: generate_key_cmp(os); return; + case kind::to_set: generate_schema_to_set(os); return; + case kind::basic_method: generate_basic_method(os); return; + case kind::basic_static: generate_basic_static(os); return; + default: break; + } + + // normal function and method call + os << replace_colon(function_name) << "("; + if (destination.content.size()) { + os << destination << (arguments.size()? ", ":""); + } + size_t s = arguments.size(); + size_t c = 0; + for(const auto& i : arguments) { + os << i; + c++; + if (c!=s) { + os << ", "; + } + } + os << ")"; +} + +void constructor::dump(std::ostream& os, const std::string& indent) const { + os << indent << "schema_" << replace_colon(schema_name); + os << "(" << destination << ", _" << (fields_value.size()? ", ":""); + + size_t s = fields_value.size(); + size_t c = 0; + for(const auto& i : fields_value) { + os << i; + c++; + if (c!=s) { + os << ", "; + } + } + os << ")"; +} + +void record::dump(std::ostream& os, const std::string& indent) const { + os << indent << destination << " = " << "["; + + size_t s = fields_value.size(); + size_t c = 0; + for(const auto& i : fields_value) { + os << i; + c++; + if (c!=s) { + os << ", "; + } + } + os << "]"; +} + +void unary::dump(std::ostream& os, const std::string& indent) const { + os << indent << destination << " = "; + switch(operand) { + case kind::op_neg: os << "-"; break; + default: assert(false && "unreachable"); break; + } + os << " " << source ; +} + +void binary::dump(std::ostream& os, const std::string& indent) const { + os << indent<< destination << " = "; + os << left << " "; + switch(operator_kind) { + case kind::op_add: os << "+"; break; + case kind::op_sub: os << "-"; break; + case kind::op_mul: os << "*"; break; + case kind::op_div: os << "/"; break; + default: assert(false && "unreachable"); break; + } + os << " " << right; +} + +void compare::dump(std::ostream& os, const std::string& indent) const { + os << indent << left << " "; + switch(operator_kind) { + case kind::op_eq: os << "="; break; + case kind::op_neq: os << "!="; break; + case kind::op_gt: os << ">"; break; + case kind::op_ge: os << ">="; break; + case kind::op_lt: os << "<"; break; + case kind::op_le: os << "<="; break; + default: assert(false && "unreachable"); break; + } + os << " " << right; +} + +void block::dump(std::ostream& os, const std::string& indent) const { + if (content.empty()) { + os << indent << "(1 = 1)"; + return; + } + // only one statement in the block, dump without curves + if (content.size()==1) { + content[0]->dump(os, indent); + return; + } + // multiple statements + os << indent << "(\n"; + for(auto i : content) { + i->dump(os, indent + " "); + if (i!=content.back()) { + os << (flag_use_semi? ";":","); + } + os << "\n"; + } + os << indent << ")"; +} + +void fact::dump(std::ostream& os, const std::string& indent) const { + os << indent << "("; + size_t s = literals.size(); + size_t c = 0; + for(const auto& i : literals) { + os << i.first << " = " << i.second; + c++; + if (c!=s) { + os << ", "; + } + } + os << ")"; +} + +void not_operand::dump(std::ostream& os, const std::string& indent) const { + // only one statement in the block + if (body->get_content().size()==1) { + os << indent << "!("; + body->get_content()[0]->dump(os, ""); + os << ")"; + return; + } + // multiple statements + os << indent << "!(\n"; + for(auto i : body->get_content()) { + i->dump(os, indent + " "); + if (i!=body->get_content().back()) { + os << ",\n"; + } else { + os << "\n"; + } + } + os << indent << ")"; +} + +void and_operand::dump(std::ostream& os, const std::string& indent) const { + os << indent << "(\n"; + left->dump(os, indent + " "); + os << ",\n"; // `,` means `and` + right->dump(os, indent + " "); + os << "\n" << indent << ")"; +} + +void or_operand::dump(std::ostream& os, const std::string& indent) const { + os << indent << "(\n"; + left->dump(os, indent + " "); + os << ";\n"; // `;` means `or` + right->dump(os, indent + " "); + os << "\n" << indent << ")"; +} + +void aggregator::dump(std::ostream& os, const std::string& indent) const { + os << indent; + os << destination << " = "; + if (aggregate_name=="len") { + os << "count "; + } else { + os << aggregate_name << " "; + } + if (source.content.length()) { + os << source; + } + os << ": {\n"; + body->dump(os, indent + " "); + os << "\n" << indent << "}"; +} + +} +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/lir.h b/godel-script/godel-frontend/src/ir/lir.h new file mode 100644 index 00000000..02dc66dd --- /dev/null +++ b/godel-script/godel-frontend/src/ir/lir.h @@ -0,0 +1,638 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" + +#include +#include +#include +#include + +namespace godel { + +std::string replace_colon(const std::string&); + +namespace lir { + +enum class inst_value_kind { + null = 0, // reserved, useless + literal, // literal + variable, // variable + defltval, // default value, in souffle it is `_` +}; + +struct inst_value_t { + inst_value_kind kind; + std::string content; + + static inst_value_t null() { + return {inst_value_kind::null, ""}; + } + static inst_value_t literal(const std::string& s) { + return {inst_value_kind::literal, s}; + } + static inst_value_t variable(const std::string& s) { + return {inst_value_kind::variable, s}; + } + static inst_value_t default_value() { + return {inst_value_kind::defltval, "_"}; + } +}; + +std::ostream& operator<<(std::ostream&, const inst_value_t&); + +enum class inst_kind { + // +---------+----------+ + inst_bool, // | boolean | flag | + // +---------+----------+-------+ + inst_store, // | store | src | dst | + // +---------+----------+-------+ + inst_call, // | call | [args] | dst | (for normal type return value) + // +---------+----------+-------+ + // | call | [args] | (for bool type return value) + // +---------+----------+-------+ + inst_ctor, // | schema | [fields] | dst | (for initializer with key) + // +---------+----------+-------+ + inst_record, // | record | [fields] | dst | (for initializer without key) + // +---------+----------+-------+ record is like: [a, b, c] + inst_unary, // | unary | src | dst | + // +---------+----------+-------+-----+ + inst_binary, // | binary | left | right | dst | + // +---------+----------+-------+-----+ + inst_cmp, // | compare | left | right | + // +---------+----------+-------+ + inst_block, // | block | [inst*] | + // +---------+----------+ + inst_fact, // | fact | [[src]] | + // +---------+----------+ + inst_not, // | not | block | + // +---------+----------+-------+ + inst_and, // | and | block | block | + // +---------+----------+-------+ + inst_or, // | or | block | block | + // +---------+----------+-------+ + inst_aggr, // | aggr | block | (for aggregator without dst) + // +---------+----------+-------+ + // | aggr | block | dst | (for aggregator with dst) + // +---------+----------+-------+ +}; + +class boolean; +class store; +class call; +class constructor; +class record; +class unary; +class binary; +class compare; +class block; +class fact; +class not_operand; +class and_operand; +class or_operand; +class aggregator; + +class inst_visitor { +public: + virtual void visit_boolean(boolean*) {} + virtual void visit_store(store*) {} + virtual void visit_call(call*) {} + virtual void visit_constructor(constructor*) {} + virtual void visit_record(record*) {} + virtual void visit_unary(unary*) {} + virtual void visit_binary(binary*) {} + virtual void visit_compare(compare*) {} + virtual void visit_block(block*); + virtual void visit_fact(fact*) {} + virtual void visit_not_operand(not_operand*); + virtual void visit_and_operand(and_operand*); + virtual void visit_or_operand(or_operand*); + virtual void visit_aggregator(aggregator*); +}; + +class inst { +private: + inst_kind kind; + report::span location; + bool flag_eliminated; + +public: + inst(inst_kind k, const report::span& loc): + kind(k), location(loc), flag_eliminated(false) {} + virtual ~inst() = default; + + auto get_kind() const { return kind; } + const auto& get_location() const { return location; } + virtual void dump(std::ostream&, const std::string&) const {} + virtual void accept(inst_visitor*) {} + +public: + void set_flag_eliminated(bool flag) { flag_eliminated = flag; } + auto get_flag_eliminated() const { return flag_eliminated; } +}; + +class boolean: public inst { +private: + bool flag; + +public: + boolean(bool f, const report::span& loc): + inst(inst_kind::inst_bool, loc), flag(f) {} + boolean(const boolean& b): + inst(inst_kind::inst_bool, b.get_location()), flag(b.flag) {} + ~boolean() override = default; + auto get_flag() const { return flag; } + void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_boolean(this); + } +}; + +class store: public inst { +private: + inst_value_t source; + inst_value_t destination; + +public: + store(const inst_value_t& src, + const inst_value_t& dst, + const report::span& loc): + inst(inst_kind::inst_store, loc), source(src), destination(dst) {} + store(const store& s): + inst(inst_kind::inst_store, s.get_location()), + source(s.source), destination(s.destination) {} + ~store() override = default; + void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_store(this); + } + + const auto& get_source() const { return source; } + const auto& get_destination() const { return destination; } +}; + +class call: public inst { +public: + enum class kind { + null = 0, // usually used as a placeholder, when generating a call + // manually, we don't need to specify the kind of call + function, // function (static call) + method, // schema method (method call) + database_load, // database loader static method (static call) + find, // (method call) `find` method + key_cmp, // key_eq & key_neq method (method call) + to_set, // schema to_set method (method call) + basic_method, // basic method (method call) + basic_static, // basic static method (static call) + }; + + // native method for int type + enum class int_method_kind { + int_add, + int_sub, + int_div, + int_mul, + int_rem, + int_pow, + int_bitand, + int_bitor, + int_bitxor, + int_bitnot, + int_neg, + int_eq, + int_ne, + int_gt, + int_ge, + int_lt, + int_le, + int_to_string, + int_to_set + }; + // mapper for method name -> int method kind + std::unordered_map int_basic_methods = { + {"int::add", int_method_kind::int_add}, + {"int::sub", int_method_kind::int_sub}, + {"int::div", int_method_kind::int_div}, + {"int::mul", int_method_kind::int_mul}, + {"int::rem", int_method_kind::int_rem}, + {"int::pow", int_method_kind::int_pow}, + {"int::bitand", int_method_kind::int_bitand}, + {"int::bitor", int_method_kind::int_bitor}, + {"int::bitxor", int_method_kind::int_bitxor}, + {"int::bitnot", int_method_kind::int_bitnot}, + {"int::neg", int_method_kind::int_neg}, + {"int::eq", int_method_kind::int_eq}, + {"int::ne", int_method_kind::int_ne}, + {"int::gt", int_method_kind::int_gt}, + {"int::ge", int_method_kind::int_ge}, + {"int::lt", int_method_kind::int_lt}, + {"int::le", int_method_kind::int_le}, + {"int::to_string", int_method_kind::int_to_string}, + {"int::to_set", int_method_kind::int_to_set} + }; + + // native method for string type + enum class string_method_kind { + string_substr, + string_get_regex_match_result, + string_matches, + string_contains, + string_add, + string_len, + string_to_int, + string_eq, + string_ne, + string_to_set, + string_to_upper, + string_to_lower, + string_replace_all, + string_replace_once + }; + // mapper for method name -> string method kind + std::unordered_map string_basic_methods = { + {"string::substr", string_method_kind::string_substr}, + {"string::get_regex_match_result", string_method_kind::string_get_regex_match_result}, + {"string::matches", string_method_kind::string_matches}, + {"string::contains", string_method_kind::string_contains}, + {"string::add", string_method_kind::string_add}, + {"string::len", string_method_kind::string_len}, + {"string::to_int", string_method_kind::string_to_int}, + {"string::eq", string_method_kind::string_eq}, + {"string::ne", string_method_kind::string_ne}, + {"string::to_set", string_method_kind::string_to_set}, + {"string::to_upper", string_method_kind::string_to_upper}, + {"string::to_lower", string_method_kind::string_to_lower}, + {"string::replace_all", string_method_kind::string_replace_all}, + {"string::replace_once", string_method_kind::string_replace_once} + }; + +private: + kind type; + std::string function_name; + std::vector arguments; + inst_value_t destination; + +private: + // used in generic is/to method + std::string generic_type; + +private: + void generate_key_cmp(std::ostream&) const; + void generate_schema_to_set(std::ostream&) const; + void generate_database_load(std::ostream&) const; + void generate_int_basic_method(std::ostream&) const; + void generate_string_basic_method(std::ostream&) const; + void generate_basic_method(std::ostream&) const; + void generate_basic_static(std::ostream&) const; + // aggregator generation is now only used for `*T.find` method; + // `len`, `sum`, `min`, `max` is generated by lir::aggregator + void generate_find(std::ostream&) const; + +public: + call(const std::string& func_name, const report::span& loc): + inst(inst_kind::inst_call, loc), type(kind::null), + function_name(func_name), + destination({inst_value_kind::null, ""}) {} + call(const call& c): + inst(inst_kind::inst_call, c.get_location()), type(c.type), + function_name(c.function_name), + arguments(c.arguments), + destination(c.destination), + generic_type(c.generic_type) {} + ~call() override = default; + void dump(std::ostream&, const std::string&) const override; + void add_arg(const inst_value_t& arg) { arguments.push_back(arg); } + void set_return(const inst_value_t& dst) { destination = dst; } + void set_call_type(kind t) { type = t; } + void set_generic_type(const std::string& t) { generic_type = t; } + const auto& get_generic_type() const { return generic_type; } + const auto& get_function_name() const { return function_name; } + auto get_func_kind() const { return type; } + void accept(inst_visitor* v) override { + v->visit_call(this); + } + + const auto& get_arguments() const { return arguments; } + const auto& get_return() const { return destination; } + auto& get_mutable_arguments() { return arguments; } + auto& get_mutable_result() { return destination; } +}; + +class constructor: public inst { +private: + std::string schema_name; + std::vector fields_value; + inst_value_t destination; + +public: + constructor(const std::string& name, + const inst_value_t& dst, + const report::span& loc): + inst(inst_kind::inst_ctor, loc), + schema_name(name), destination(dst) {} + constructor(const constructor& c): + inst(inst_kind::inst_ctor, c.get_location()), + schema_name(c.schema_name), + fields_value(c.fields_value), + destination(c.destination) {} + ~constructor() override = default; + void dump(std::ostream&, const std::string&) const override; + void add_field(const inst_value_t& source) { + fields_value.push_back(source); + } + const auto& get_fields() const { return fields_value; } + const auto& get_schema_name() const { return schema_name; } + void accept(inst_visitor* v) override { + v->visit_constructor(this); + } + + auto& get_mutable_fields() { return fields_value; } + auto& get_mutable_result() { return destination; } +}; + +class record: public inst { +private: + std::vector fields_value; + inst_value_t destination; + +public: + record(const inst_value_t& dst, + const report::span& loc): + inst(inst_kind::inst_record, loc), destination(dst) {} + record(const record& c): + inst(inst_kind::inst_record, c.get_location()), + fields_value(c.fields_value), + destination(c.destination) {} + ~record() override = default; + void dump(std::ostream&, const std::string&) const override; + void add_field(const inst_value_t& source) { + fields_value.push_back(source); + } + const auto& get_fields() const { return fields_value; } + void accept(inst_visitor* v) override { + v->visit_record(this); + } + + auto& get_mutable_fields() { return fields_value; } + auto& get_mutable_result() { return destination; } +}; + +class unary: public inst { +public: + enum class kind { + op_neg, // - + }; + +private: + kind operand; + inst_value_t source; + inst_value_t destination; + +public: + unary(const kind op, + const inst_value_t& src, + const inst_value_t& dst, + const report::span& loc): + inst(inst_kind::inst_unary, loc), operand(op), + source(src), destination(dst) {} + unary(const unary& u): + inst(inst_kind::inst_unary, u.get_location()), operand(u.operand), + source(u.source), destination(u.destination) {} + ~unary() override = default; + +public: + const auto& get_destination() const { return destination; } + auto& get_mutable_destination() { return destination; } + +public: + void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_unary(this); + } +}; + +class binary: public inst { +public: + enum class kind { + op_add, // + + op_sub, // - + op_mul, // * + op_div // / + }; + +private: + inst_value_t left; + inst_value_t right; + inst_value_t destination; + kind operator_kind; + +public: + binary(const inst_value_t& l, + const inst_value_t& r, + const inst_value_t& dst, + const kind op, + const report::span& loc): + inst(inst_kind::inst_binary, loc), left(l), right(r), + destination(dst), operator_kind(op) {} + binary(const binary& b): + inst(inst_kind::inst_binary, b.get_location()), + left(b.left), right(b.right), + destination(b.destination), operator_kind(b.operator_kind) {} + ~binary() override = default; + +public: + void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_binary(this); + } + +public: + auto get_operator() const { return operator_kind; } + const auto& get_left() const { return left; } + const auto& get_right() const { return right; } + const auto& get_destination() const { return destination; } + auto& get_mutable_destination() { return destination; } +}; + +class compare: public inst { +public: + enum class kind { + op_eq, // = + op_neq, // != + op_gt, // > + op_ge, // >= + op_lt, // < + op_le // <= + }; + +private: + inst_value_t left; + inst_value_t right; + kind operator_kind; + +public: + compare(const inst_value_t& l, + const inst_value_t& r, + const kind op, + const report::span& loc): + inst(inst_kind::inst_cmp, loc), + left(l), right(r), operator_kind(op) {} + compare(const compare& c): + inst(inst_kind::inst_cmp, c.get_location()), + left(c.left), right(c.right), + operator_kind(c.operator_kind) {} + ~compare() override = default; + +public: + void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_compare(this); + } + auto get_operator() const { return operator_kind; } + const auto& get_left() const { return left; } + const auto& get_right() const { return right; } +}; + +class block: public inst { +private: + std::vector content; + // when doing output, if there are more than one instruction, + // we need to use semicolon to separate them if this switch is true, + // otherwise use comma instead + // in souffle: + // - `;` means `or` + // - `,` means `and` + bool flag_use_semi; + +public: + block(const report::span& loc): + inst(inst_kind::inst_block, loc), flag_use_semi(false) {} + ~block() override { + for(auto i : content) { + delete i; + } + } + void add_new_content(inst* i) { content.push_back(i); } + void set_use_semicolon() { flag_use_semi = true; } + void set_use_comma() { flag_use_semi = false; } + auto get_use_semicolon() const { return flag_use_semi; } + auto get_use_comma() const { return !flag_use_semi; } + const auto& get_content() const { return content; } + auto& get_mutable_content() { return content; } + void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_block(this); + } +}; + +class fact: public inst { +private: + std::vector> literals; + +public: + fact(const report::span& loc): inst(inst_kind::inst_fact, loc) {} + fact(const fact& f): + inst(inst_kind::inst_fact, f.get_location()), literals(f.literals) {} + ~fact() override = default; + void add_pair(const std::string& name, const inst_value_t& literal) { + literals.push_back({name, literal}); + } + void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_fact(this); + } +}; + +class not_operand: public inst { +private: + block* body; + +public: + not_operand(const report::span& loc): + inst(inst_kind::inst_not, loc), body(nullptr) {} + ~not_operand() override { + delete body; + } + void set_body(block* b) { body = b; } + auto get_body() { return body; } + void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_not_operand(this); + } +}; + +class and_operand: public inst { +private: + block* left; + block* right; + +public: + and_operand(const report::span& loc): + inst(inst_kind::inst_and, loc), left(nullptr), right(nullptr) {} + ~and_operand() override { + delete left; + delete right; + } + void set_left(block* b) { left = b; } + void set_right(block* b) { right = b; } + auto get_left_block() { return left; } + auto get_right_block() { return right; } + void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_and_operand(this); + } +}; + +class or_operand: public inst { +private: + block* left; + block* right; + +public: + or_operand(const report::span& loc): + inst(inst_kind::inst_or, loc), left(nullptr), right(nullptr) {} + ~or_operand() override { + delete left; + delete right; + } + void set_left(block* b) { left = b; } + void set_right(block* b) { right = b; } + auto get_left_block() { return left; } + auto get_right_block() { return right; } + void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_or_operand(this); + } +}; + +class aggregator: public inst { +private: + std::string aggregate_name; + inst_value_t source; + inst_value_t destination; + block* body; + +public: + aggregator(const std::string& agg_name, + const inst_value_t& src, + const inst_value_t& dest, + const report::span& loc): + inst(inst_kind::inst_aggr, loc), + aggregate_name(agg_name), + source(src), destination(dest), body(nullptr) {} + aggregator(const aggregator& a): + inst(inst_kind::inst_aggr, a.get_location()), + aggregate_name(a.aggregate_name), + source(a.source), destination(a.destination), body(nullptr) {} + ~aggregator() override { + delete body; + } + void set_body(block* b) { body = b; } + auto get_body() { return body; } + void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_aggregator(this); + } +}; + +} +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/name_mangling.cpp b/godel-script/godel-frontend/src/ir/name_mangling.cpp new file mode 100644 index 00000000..377aae75 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/name_mangling.cpp @@ -0,0 +1,41 @@ +#include "godel-frontend/src/ir/name_mangling.h" + +namespace godel { + +std::string field_name_mangling(const std::string& name, const symbol& type) { + // by field name mangling, we try to avoid variable name conflictions + // for example: here's a schema `schema test {a: int}` + // + // impl test { + // pub fn __all__() -> *test { + // for(a in int::range(0, 10)) { yield test{a: a}; } + // } + // } + // + // if we do not do the name mangling, then will generate this: + // + // schema_test(result, [-1, -1], a) :- ( + // a = a, + // ^ this is the field of test: `test.a` + // ^ this is the variable `a` + // result = [a], + // a = range(0, 10) + // ). + // + // and this will cause `ungrounded error` or `empty result`. + // but if we do the name mangling, then it will be like this: + // + // schema_test(result, [-1, -1], field_0x6669656c64_a_int) :- ( + // field_0x6669656c64_a_int = a, + // ^^^^^^^^^^^^^^^^^^^^^^^^ this is the field of test: `test.a` + // ^ this is the variable `a` + // result = [field_0x6669656c64_a_int], + // a = range(0, 10) + // ). + // + return "field_0x6669656c64_" + name + "_" + replace_colon( + type.full_path_name_without_set() + ); +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/name_mangling.h b/godel-script/godel-frontend/src/ir/name_mangling.h new file mode 100644 index 00000000..466c03d9 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/name_mangling.h @@ -0,0 +1,11 @@ +#pragma once + +#include "godel-frontend/src/ir/ir_context.h" +#include "godel-frontend/src/ir/lir.h" +#include "godel-frontend/src/symbol.h" + +namespace godel { + +std::string field_name_mangling(const std::string&, const symbol&); + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/pass.cpp b/godel-script/godel-frontend/src/ir/pass.cpp new file mode 100644 index 00000000..31331b1b --- /dev/null +++ b/godel-script/godel-frontend/src/ir/pass.cpp @@ -0,0 +1,3 @@ +#include "godel-frontend/src/ir/pass.h" + +namespace godel {} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/pass.h b/godel-script/godel-frontend/src/ir/pass.h new file mode 100644 index 00000000..4ff43e3a --- /dev/null +++ b/godel-script/godel-frontend/src/ir/pass.h @@ -0,0 +1,42 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/ir/ir_context.h" + +#include +#include +#include + +namespace godel { + +enum class pass_kind { + ps_remove_unused, + ps_remove_unused_type, + ps_inst_combine, + ps_flatten_nested_block, + ps_aggregator_inline_remark +}; + +// there are three types of passes: +// +// - Analysis: analyse and report possible errors +// - Transform: perform optimizations or rewrite IR +// - Utility: provide extra functionality +// +class pass: public lir::inst_visitor { +protected: + pass_kind kind; + report::error err; + ir_context* ctx; + +public: + pass(pass_kind k, ir_context& c): kind(k), ctx(&c) {} + virtual ~pass() = default; + +public: + auto get_kind() const { return kind; } + virtual const char* get_name() const = 0; + virtual bool run() = 0; +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/pass_manager.cpp b/godel-script/godel-frontend/src/ir/pass_manager.cpp new file mode 100644 index 00000000..0c4bcf16 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/pass_manager.cpp @@ -0,0 +1,56 @@ +#include "godel-frontend/src/ir/pass_manager.h" +#include "godel-frontend/src/ir/inst_combine.h" +#include "godel-frontend/src/ir/remove_unused.h" +#include "godel-frontend/src/ir/flatten_block.h" +#include "godel-frontend/src/ir/aggregator_inline_remark.h" + +namespace godel { + +pass_manager::~pass_manager() { + for(auto p : ordered_pass_list) { + delete p; + } +} + +void pass_manager::run(ir_context& ctx, const cli::configure& conf) { + // load all needed passes by running order + // because transform pass may change the IR + // and the next pass may use the changed IR + // + // [analyse] -> [transform] -> [transform] -> [analyse] -> [transform] + // + // so be aware of the order of passes + ordered_pass_list = {}; + if (!conf.count(cli::option::cli_disable_remove_unused)) { + ordered_pass_list.push_back(new unused_remove_pass(ctx)); + ordered_pass_list.push_back(new unused_type_alias_remove_pass(ctx)); + } + if (conf.count(cli::option::cli_enable_ir_merge)) { + ordered_pass_list.push_back(new inst_combine_pass(ctx)); + } + ordered_pass_list.push_back(new flatten_nested_block(ctx)); + ordered_pass_list.push_back(new aggregator_inline_remark(ctx)); + + bool verbose_info = conf.count(cli::option::cli_verbose); + + // must run in order, stop on first failure + for(auto p : ordered_pass_list) { + // print info + if (verbose_info) { + std::clog << "IR Pass Running: "; + std::clog << p->get_name() << "\n"; + } + + // run pass + if (!p->run()) { + err.err("failed to run pass: " + std::string(p->get_name())); + break; + } + } + + if (verbose_info) { + std::clog << "\n"; + } +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/pass_manager.h b/godel-script/godel-frontend/src/ir/pass_manager.h new file mode 100644 index 00000000..4e696fda --- /dev/null +++ b/godel-script/godel-frontend/src/ir/pass_manager.h @@ -0,0 +1,21 @@ +#pragma once + +#include "godel-frontend/src/ir/pass.h" +#include "godel-frontend/src/cli.h" +#include "godel-frontend/src/error/error.h" + +namespace godel { + +class pass_manager { +private: + report::error err; + std::vector ordered_pass_list; + +public: + pass_manager() = default; + pass_manager(pass_manager&&) = delete; + ~pass_manager(); + void run(ir_context&, const cli::configure&); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/remove_unused.cpp b/godel-script/godel-frontend/src/ir/remove_unused.cpp new file mode 100644 index 00000000..00d5584e --- /dev/null +++ b/godel-script/godel-frontend/src/ir/remove_unused.cpp @@ -0,0 +1,296 @@ +#include "godel-frontend/src/ir/remove_unused.h" + +namespace godel { + +void call_graph_generator::check_inst(lir::inst* stmt, + std::queue& bfs, + used_dict& dict) const { + switch(stmt->get_kind()) { + case lir::inst_kind::inst_call: + dict.insert(replace_colon( + reinterpret_cast(stmt)->get_function_name() + )); + break; + case lir::inst_kind::inst_ctor: + dict.insert(replace_colon( + "schema_" + + reinterpret_cast(stmt)->get_schema_name() + )); + break; + case lir::inst_kind::inst_block: + bfs.push(reinterpret_cast(stmt)); + break; + case lir::inst_kind::inst_not: + bfs.push(reinterpret_cast(stmt)->get_body()); + break; + case lir::inst_kind::inst_and: + bfs.push(reinterpret_cast(stmt)->get_left_block()); + bfs.push(reinterpret_cast(stmt)->get_right_block()); + break; + case lir::inst_kind::inst_or: + bfs.push(reinterpret_cast(stmt)->get_left_block()); + bfs.push(reinterpret_cast(stmt)->get_right_block()); + break; + case lir::inst_kind::inst_aggr: + bfs.push(reinterpret_cast(stmt)->get_body()); + default: break; + } +} + +void call_graph_generator::scan_call(souffle_rule_impl* impl, + used_dict& dict) const { + // recursively search used rules + // but we use bfs queue to avoid stack overflow + // so visitor(dfs) is not needed here + std::queue bfs; + bfs.push(impl->get_block()); + while(!bfs.empty()) { + auto block = bfs.front(); + bfs.pop(); + for(auto stmt : block->get_content()) { + check_inst(stmt, bfs, dict); + } + } +} + +void call_graph_generator::initialize_call_graph_root(const std::vector& output, + call_graph& cg) const { + for(const auto& i : output) { + const auto name = replace_colon(i); + if (!cg.count(name)) { + cg.insert({name, {}}); + } + } +} + +void call_graph_generator::initialize_call_graph_root( + const std::vector& output, + call_graph& cg) const { + for(const auto& i : output) { + const auto name = replace_colon(i.rule_name); + if (!cg.count(name)) { + cg.insert({name, {}}); + } + } +} + +void call_graph_generator::initialize_call_graph(const std::vector& impls, + call_graph& cg) const { + for(auto i : impls) { + const auto name = replace_colon(i->get_func_name()); + if (!cg.count(name)) { + cg.insert({name, {}}); + } + // construct the call graph and mark all used rules + scan_call(i, cg.at(name)); + } +} + +const used_dict& call_graph_generator::apply(const ir_context& ctx) { + // create call graph data structure + call_graph cg; + + // construct call graph by scanning the IR + initialize_call_graph_root(ctx.souffle_output, cg); + initialize_call_graph_root(ctx.annotated_output, cg); + initialize_call_graph(ctx.rule_impls, cg); + initialize_call_graph(ctx.database_get_table, cg); + initialize_call_graph(ctx.schema_get_field, cg); + initialize_call_graph(ctx.schema_data_constraint_impls, cg); + + // use bfs to find all used rules + std::queue bfs; + // clear used set + used.clear(); + // data constraint for database, this must be used + used.insert("all_data_DBIndex"); + // start from souffle output, the root of call graph + for(const auto& i : ctx.souffle_output) { + bfs.push(replace_colon(i)); + used.insert(replace_colon(i)); + } + for(const auto& i : ctx.annotated_output) { + bfs.push(replace_colon(i.rule_name)); + used.insert(replace_colon(i.rule_name)); + } + + // use bfs to find all used rules + while(!bfs.empty()) { + const auto curr = bfs.front(); + bfs.pop(); + // rule name not found, check next rule impl in queue + if (!cg.count(curr)) { + continue; + } + // add all used rules into the queue + for(const auto& i : cg.at(curr)) { + // do not push used rule into the queue, to avoid infinite loop + if (used.count(i)) { + continue; + } + bfs.push(i); + used.insert(i); + } + } + + // return the result + return used; +} + +void unused_remove_pass::remove_unused_schema_data_constraint_decl(const used_dict& used_rule) { + std::vector used; + for(const auto& i : ctx->schema_data_constraint_decls) { + if (used_rule.count("schema_" + replace_colon(i.name))) { + used.push_back(i); + } + } + ctx->schema_data_constraint_decls = used; +} + +void unused_remove_pass::remove_unused_schema_data_constraint_impl(const used_dict& used_rule) { + std::vector used; + for(auto i : ctx->schema_data_constraint_impls) { + if (used_rule.count(replace_colon(i->get_func_name()))) { + used.push_back(i); + } else { + delete i; + } + } + ctx->schema_data_constraint_impls = used; +} + +void unused_remove_pass::remove_unused_schema_get_field(const used_dict& used_rule) { + std::vector used; + for(auto i : ctx->schema_get_field) { + if (used_rule.count(replace_colon(i->get_func_name()))) { + used.push_back(i); + } else { + delete i; + } + } + ctx->schema_get_field = used; +} + +void unused_remove_pass::remove_unused_rule_decl(const used_dict& used_rule) { + std::vector used; + for(auto i : ctx->rule_decls) { + if (used_rule.count(replace_colon(i->get_rule_raw_name()))) { + used.push_back(i); + } else { + delete i; + } + } + ctx->rule_decls = used; +} + +void unused_remove_pass::remove_unused_rule_impl(const used_dict& used_rule) { + std::vector used; + for(auto i : ctx->rule_impls) { + if (used_rule.count(replace_colon(i->get_func_name()))) { + used.push_back(i); + } else { + delete i; + } + } + ctx->rule_impls = used; +} + +void unused_remove_pass::remove_unused_input_decl(const used_dict& used_rule) { + std::vector used; + for(const auto& i : ctx->input_decls) { + if (used_rule.count(i.get_decl_name())) { + used.push_back(i); + } + } + ctx->input_decls = used; +} + +void unused_remove_pass::remove_unused_input_impl(const used_dict& used_rule) { + std::vector used; + for(const auto& i : ctx->input_impls) { + if (used_rule.count(i.get_decl_name())) { + used.push_back(i); + } + } + ctx->input_impls = used; +} + +void unused_remove_pass::remove_unused_annotated_input(const used_dict& used_rule) { + std::vector used; + for(const auto& i : ctx->annotated_input) { + if (used_rule.count(i.rule_name)) { + used.push_back(i); + } + } + ctx->annotated_input = used; +} + +void unused_remove_pass::remove_unused_database_get_table(const used_dict& used_rule) { + std::vector used; + for(auto i : ctx->database_get_table) { + if (used_rule.count(replace_colon(i->get_func_name()))) { + used.push_back(i); + } else { + delete i; + } + } + ctx->database_get_table = used; +} + +bool unused_remove_pass::run() { + call_graph_generator cgg; + const auto& used_rule = cgg.apply(*ctx); + remove_unused_schema_data_constraint_decl(used_rule); + remove_unused_schema_data_constraint_impl(used_rule); + remove_unused_schema_get_field(used_rule); + remove_unused_rule_decl(used_rule); + remove_unused_rule_impl(used_rule); + remove_unused_input_decl(used_rule); + remove_unused_input_impl(used_rule); + remove_unused_annotated_input(used_rule); + remove_unused_database_get_table(used_rule); + return true; +} + +bool unused_type_alias_remove_pass::run() { + std::unordered_set used_type = {"int", "string"}; + for(const auto& i : ctx->schema_data_constraint_decls) { + for(const auto& field : i.fields) { + used_type.insert(replace_colon(field.second)); + } + } + for(const auto& i : ctx->input_decls) { + for(const auto& field : i.fields) { + used_type.insert(replace_colon(field.second)); + } + } + for(auto i : ctx->rule_decls) { + for(const auto& param : i->get_params()) { + used_type.insert(replace_colon(param.second)); + } + used_type.insert(replace_colon(i->get_return_type())); + } + // add their real type into the used type too + for(const auto& i : ctx->type_alias) { + if (used_type.count(i.alias)) { + if (i.structure_type_list.size()) { + for(const auto& j : i.structure_type_list) { + used_type.insert(j); + } + } else { + used_type.insert(i.real); + } + } + } + + std::vector used_type_alias; + for(const auto& i : ctx->type_alias) { + if (used_type.count(i.alias)) { + used_type_alias.push_back(i); + } + } + ctx->type_alias = used_type_alias; + return true; +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/remove_unused.h b/godel-script/godel-frontend/src/ir/remove_unused.h new file mode 100644 index 00000000..220b6d86 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/remove_unused.h @@ -0,0 +1,66 @@ +#pragma once + +#include "godel-frontend/src/ir/lir.h" +#include "godel-frontend/src/ir/ir_context.h" +#include "godel-frontend/src/ir/pass.h" + +#include +#include +#include +#include +#include + +namespace godel { + +typedef std::unordered_set used_dict; +typedef std::unordered_map call_graph; + +class call_graph_generator { +private: + used_dict used; + +private: + void check_inst(lir::inst*, std::queue&, used_dict&) const; + void scan_call(souffle_rule_impl*, used_dict&) const; + void initialize_call_graph_root(const std::vector&, + call_graph&) const; + void initialize_call_graph_root(const std::vector&, + call_graph&) const; + void initialize_call_graph(const std::vector&, + call_graph&) const; + +public: + const used_dict& apply(const ir_context&); +}; + +class unused_remove_pass: public pass { +private: + void remove_unused_schema_data_constraint_decl(const used_dict&); + void remove_unused_schema_data_constraint_impl(const used_dict&); + void remove_unused_schema_get_field(const used_dict&); + void remove_unused_rule_decl(const used_dict&); + void remove_unused_rule_impl(const used_dict&); + void remove_unused_input_decl(const used_dict&); + void remove_unused_input_impl(const used_dict&); + void remove_unused_annotated_input(const used_dict&); + void remove_unused_database_get_table(const used_dict&); + +public: + unused_remove_pass(ir_context& c): pass(pass_kind::ps_remove_unused, c) {} + const char* get_name() const override { + return "[Transform] Remove Unused Rule"; + } + bool run() override; +}; + +class unused_type_alias_remove_pass: public pass { +public: + unused_type_alias_remove_pass(ir_context& c): + pass(pass_kind::ps_remove_unused_type, c) {} + const char* get_name() const override { + return "[Transform] Remove Unused Type Alias"; + } + bool run() override; +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/lexer.cpp b/godel-script/godel-frontend/src/lexer.cpp new file mode 100644 index 00000000..53950b98 --- /dev/null +++ b/godel-script/godel-frontend/src/lexer.cpp @@ -0,0 +1,503 @@ +#include "godel-frontend/src/util/util.h" +#include "godel-frontend/src/error/error.h" +#include "lexer.h" + +#include +#include +#include +#include + +namespace godel { + +using report::span; +using report::error; +namespace fs = std::filesystem; + +void lexer::open(const std::string& path) { + if (!fs::exists(path)) { + err.fatal("file <" + path + "> does not exist."); + } else if (!fs::is_regular_file(path)) { + err.fatal("file <" + path + "> is not regular file."); + } + filename = path; + std::ifstream in(path, std::ios::binary); + std::stringstream ss; + ss << in.rdbuf(); + source = ss.str(); +} + +bool lexer::should_skip(char c) const { + return (c==' ') || (c=='\t') || (c=='\n') || (c=='\r') || (c<=0); +} + +bool lexer::is_identifier_head(char c) const { + return (c=='_') || std::isalpha(c); +} + +bool lexer::is_identifier(char c) const { + return is_identifier_head(c) || std::isdigit(c); +} + +bool lexer::is_number_head(char c) const { + return std::isdigit(c); +} + +bool lexer::is_dec_number(char c) const { + return std::isdigit(c); +} + +bool lexer::is_oct_number(char c) const { + return ('0'<=c && c<='7'); +} + +bool lexer::is_hex_number(char c) const { + return std::isxdigit(c); +} + +bool lexer::is_string_head(char c) const { + return c=='\"'; +} + +bool lexer::is_single_opr(char c) const { + return c=='(' || c==')' || c=='[' || c==']' || c=='{' || c=='}' || + c==',' || c=='+' || c=='*' || c=='/' || c==';'; +} + +bool lexer::is_dot(char c) const { + return c=='.'; +} + +bool lexer::is_cmp_opr_head(char c) const { + return c=='<' || c=='>' || c=='!'; +} + +bool lexer::is_logic_opr(char c) const { + return c=='&' || c=='|'; +} + +bool lexer::is_annotation(char c) const { + return c=='@'; +} + +bool lexer::is_colon(char c) const { + return c==':'; +} + +bool lexer::is_note() const { + return source[ptr]=='/' && ptr+1=source.size()) { + return {start_line, start_column, line, column, tok::tok_num, res, filename}; + } + + if (source[ptr]=='x' || source[ptr]=='X') { + res += source[ptr]; + ++column; + ++ptr; + while (ptr1) { + err.warn({start_line, start_column, line, column, filename}, + "decimal literal should not begin with `0`." + ); + } + } + // float number beginning with "0" + else if (res=="0" && source[ptr]=='.') { + res += source[ptr]; + ++column; + ++ptr; + while (ptr=source.size() && str.back()!='\"') { + err.err({start_line, start_column, line, column, filename}, + "get when generating strings.", "check if `\"` is in pair." + ); + } + ++column; + ++ptr; + return {start_line, start_column, line, column, tok::tok_str, str, filename}; +} + +token lexer::single_operator() { + auto tmp = std::string(1, source[ptr]); + const auto start_line = line; + const auto start_column = column; + ++column; + ++ptr; + return {start_line, start_column, line, column, type_mapper.at(tmp), tmp, filename}; +} + +token lexer::dot_operator() { + auto tmp = std::string(1, source[ptr]); + const auto start_line = line; + const auto start_column = column; + if (ptr+1') { + tmp += source[ptr]; + ++column; + ++ptr; + } + return {start_line, start_column, line, column, type_mapper.at(tmp), tmp, filename}; +} + +void lexer::notes() { + auto comment = std::string(""); + const auto start_line = line; + const auto start_column = column; + while (ptr", filename + }); + } + return err; +} + +void lexer::dump() const { + size_t max_length = 0; + for(const auto& i : toks) { + max_length = max_length32? 32:max_length; + for(const auto& i : toks) { + std::cout + << util::rightpad(i.content, max_length) + << " (" << i.location.file + << ":" << i.location.start_line + << ":" << i.location.start_column + 1 + << " -> " << i.location.file + << ":" << i.location.end_line + << ":" << i.location.end_column + 1 + << ")\n"; + } +} + +void lexer::dump_comments() const { + for(const auto& i : comments) { + std::cout << i.location.file << ":" << i.location.start_line << ":\n"; + std::cout << i.content << "\n"; + } +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/lexer.h b/godel-script/godel-frontend/src/lexer.h new file mode 100644 index 00000000..17bae34d --- /dev/null +++ b/godel-script/godel-frontend/src/lexer.h @@ -0,0 +1,219 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" + +#include +#include +#include +#include +#include + +namespace godel { + +using report::error; +using report::span; + +enum class tok { + tok_err = 0, // empty token + tok_comment, // comment token + tok_use, // keyword use + tok_pub, // keyword pub + tok_schema, // keyword schema + tok_extends, // keyword extends + tok_database, // keyword database + tok_fn, // keyword fn + tok_enum, // keyword enum + tok_impl, // keyword impl + tok_for, // keyword for + tok_let, // keyword let + tok_if, // keyword if + tok_else, // keyword else + tok_true, // keyword true + tok_false, // keyword false + tok_match, // keyword match + tok_yield, // keyword yield + tok_ret, // keyword return + tok_query, // keyword query + tok_from, // keyword from + tok_where, // keyword where + tok_select, // keyword select + tok_id, // identifier + tok_num, // number literal + tok_str, // string literal + tok_anno, // annotation like @annotation_example + tok_in, // operator in + tok_as, // operator as + tok_lcurve, // ( + tok_rcurve, // ) + tok_lbrkt, // [ + tok_rbrkt, // ] + tok_lbrace, // { + tok_rbrace, // } + tok_semi, // ; + tok_comma, // , + tok_colon, // : + tok_plink, // :: path link + tok_and, // && + tok_or, // || + tok_not, // ! + tok_equal, // = + tok_add, // + + tok_sub, // - + tok_mult, // * + tok_div, // / + tok_grt, // > + tok_geq, // >= + tok_less, // < + tok_leq, // <= + tok_neq, // != + tok_dot, // . + tok_ellipsis, // .. ellipsis + tok_arrow, // -> used to declare return value type + tok_marrow, // => used in match statement + tok_eof // end of file token +}; + +struct token { + span location; + tok type; + std::string content; + + token(uint32_t sline, uint32_t scol, uint32_t eline, uint32_t ecol, + tok ttype, const std::string& str, const std::string& file): + location(sline, scol, eline, ecol, file), + type(ttype), content(str) {} + std::string to_json() const { + if (type==tok::tok_comment) { + std::string raw = ""; + for(const char c: content) { + if (c=='\\') { + raw += "\\\\"; + } else if (c=='\"') { + raw += "\\\""; + } else if (c=='\n') { + raw += "\\n"; + } else if (c=='\r') { + raw += "\\r"; + } else if (c=='\t') { + raw += "\\t"; + } else { + raw += c; + } + } + return "{\"content\":\"" + raw + "\"," + + "\"location\":" + location.to_json() + "}"; + } + return "{\"content\":\"" + content + "\"," + + "\"location\":" + location.to_json() + "}"; + } +}; + +class lexer { +private: + error& err; + uint32_t ptr; + uint32_t line; + uint32_t column; + std::string source; + std::string filename; + std::vector toks; + std::vector comments; + // mapper for string -> tok kind + const std::unordered_map type_mapper = { + {"use", tok::tok_use }, + {"pub", tok::tok_pub }, + {"schema", tok::tok_schema }, + {"extends", tok::tok_extends }, + {"database", tok::tok_database }, + {"fn", tok::tok_fn }, + {"enum", tok::tok_enum }, + {"impl", tok::tok_impl }, + {"for", tok::tok_for }, + {"let", tok::tok_let }, + {"if", tok::tok_if }, + {"else", tok::tok_else }, + {"true", tok::tok_true }, + {"false", tok::tok_false }, + {"match", tok::tok_match }, + {"yield", tok::tok_yield }, + {"return", tok::tok_ret }, + {"query", tok::tok_query }, + {"from", tok::tok_from }, + {"where", tok::tok_where }, + {"select", tok::tok_select }, + {"in", tok::tok_in }, + {"as", tok::tok_as }, + {"(", tok::tok_lcurve }, + {")", tok::tok_rcurve }, + {"[", tok::tok_lbrkt }, + {"]", tok::tok_rbrkt }, + {"{", tok::tok_lbrace }, + {"}", tok::tok_rbrace }, + {",", tok::tok_comma }, + {";", tok::tok_semi }, + {":", tok::tok_colon }, + {"::", tok::tok_plink }, + {"&&", tok::tok_and }, + {"||", tok::tok_or }, + {"!", tok::tok_not }, + {"=", tok::tok_equal }, + {"+", tok::tok_add }, + {"-", tok::tok_sub }, + {"*", tok::tok_mult }, + {"/", tok::tok_div }, + {">", tok::tok_grt }, + {">=", tok::tok_geq }, + {"<", tok::tok_less }, + {"<=", tok::tok_leq }, + {"!=", tok::tok_neq }, + {".", tok::tok_dot }, + {"..", tok::tok_ellipsis }, + {"->", tok::tok_arrow }, + {"=>", tok::tok_marrow } + }; + + void open(const std::string&); + + bool should_skip(char)const; + bool is_identifier_head(char) const; + bool is_identifier(char) const; + bool is_number_head(char) const; + bool is_dec_number(char) const; + bool is_oct_number(char) const; + bool is_hex_number(char) const; + bool is_string_head(char) const; + bool is_single_opr(char) const; + bool is_dot(char) const; + bool is_cmp_opr_head(char) const; + bool is_logic_opr(char) const; // && || + bool is_annotation(char) const; // @identifier + bool is_colon(char) const; // : :: + bool is_note() const; // //note + bool is_multi_line_note() const; // /*note*/ + bool is_opr_or_arrow(char) const; // - -> = => + + void skip(); + token identifier(); + token annotation(); + token num_gen(); + token str_gen(); + token single_operator(); + token dot_operator(); + token logic_operator(); + token multi_char_operator(); + token colons(); + token opr_or_arrow(); + void notes(); + void multi_line_notes(); + void invalid_character(); + +public: + lexer(error& err_module): err(err_module), ptr(0), line(0), column(0) {} + [[nodiscard]] const error& scan(const std::string&); + const auto& result() const { return toks; } + const auto& extract_comments() const { return comments; } + void dump() const; + void dump_comments() const; +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/main.cpp b/godel-script/godel-frontend/src/main.cpp new file mode 100644 index 00000000..bd3c3243 --- /dev/null +++ b/godel-script/godel-frontend/src/main.cpp @@ -0,0 +1,17 @@ +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/util/util.h" +#include "godel-frontend/src/cli.h" +#include "godel-frontend/src/engine.h" + +#include + +int main(int argc, const char* argv[]) { + if (argc == 1) { + std::clog << godel::cli::welcome; + return 0; + } + + const auto config = godel::cli::process_args({argv, argv + argc}); + const auto error_count = godel::engine().run(config).get_error(); + return error_count? -1:0; +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/package/module_tree.cpp b/godel-script/godel-frontend/src/package/module_tree.cpp new file mode 100644 index 00000000..47b193df --- /dev/null +++ b/godel-script/godel-frontend/src/package/module_tree.cpp @@ -0,0 +1,85 @@ +#include "module_tree.h" + +#include +#include + +namespace package { + +void module_tree::insert(const std::string& full_path) { + std::vector paths; + size_t last = 0, pos = full_path.find("::", 0); + while(pos!=std::string::npos) { + paths.push_back(full_path.substr(last, pos - last)); + last = pos + 2; + pos = full_path.find("::", last); + } + if (last!=full_path.length()) { + paths.push_back(full_path.substr(last, pos - last)); + } + insert_core(paths); +} + +void module_tree::insert_core(const std::vector& paths) { + if (!paths.size()) { + return; + } + module_tree_node* ptr = nullptr; + auto gen_path = paths[0]; + if (!root.count(paths[0])) { + ptr = new module_tree_node(paths[0], gen_path); + root[paths[0]] = ptr; + } else { + ptr = root.at(paths[0]); + } + for(size_t i = 1; i < paths.size(); i++) { + gen_path += "::" + paths[i]; + if (!ptr->next.count(paths[i])) { + auto tmp = new module_tree_node(paths[i], gen_path); + ptr->next[paths[i]] = tmp; + ptr = tmp; + } else { + ptr = ptr->next.at(paths[i]); + } + } +} + +void module_tree::dump_node(module_tree_node* node) { + std::clog << " "; + for(const auto& i : indent) { + std::clog << i; + } + if (node->next.empty()) { + std::clog << "[module] "; + } else { + std::clog << "[package] "; + } + std::clog << node->node_name; + if (node->full_path.length()) { + std::clog << " => " << node->full_path; + } + std::clog << "\n"; + if (indent.back()=="+--") { + indent.back() = " "; + } else if (indent.back()=="|--") { + indent.back() = "| "; + } + indent.push_back(" "); + size_t index = 0; + for(const auto& i : node->next) { + indent.back() = (index==node->next.size()-1)? "+--":"|--"; + dump_node(i.second); + ++index; + } + indent.pop_back(); +} + +void module_tree::dump() { + size_t index = 0; + for(const auto& i : root) { + indent = {index==root.size()-1? "+--":"|--"}; + dump_node(i.second); + ++index; + } +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/package/module_tree.h b/godel-script/godel-frontend/src/package/module_tree.h new file mode 100644 index 00000000..d6b03073 --- /dev/null +++ b/godel-script/godel-frontend/src/package/module_tree.h @@ -0,0 +1,53 @@ + +#pragma once + +#include +#include +#include +#include + +namespace package { + +struct module_tree_node { + // node short name, for example: godelscript + std::string node_name; + // node full path, for example: coref::godelscript + std::string full_path; + // child nodes/packages + std::unordered_map next; + + module_tree_node(const std::string& name, const std::string& path): + node_name(name), full_path(path) {} + ~module_tree_node() { + // recursively delete all child nodes + for(auto& i : next) { + delete i.second; + } + } +}; + +class module_tree { +private: + // tree root + std::unordered_map root; + // used for dump + std::vector indent; + +private: + void insert_core(const std::vector&); + void dump_node(module_tree_node*); + +public: + ~module_tree() { + // recursively delete all child nodes + for(auto& i : root) { + delete i.second; + } + } + const auto& get_root() const { return root; } + bool empty() const { return root.empty(); } + void insert(const std::string&); + void dump(); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/package/package.cpp b/godel-script/godel-frontend/src/package/package.cpp new file mode 100644 index 00000000..7d7bd782 --- /dev/null +++ b/godel-script/godel-frontend/src/package/package.cpp @@ -0,0 +1,367 @@ +#include "package.h" +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/util/util.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace package { + +namespace fs = std::filesystem; + +using report::error; +using util::red; +using util::white; +using util::reset; + +/** + * @brief generate format module path from file name, + * for example coref.java.gdl will be convert to: coref::java + * @param filename +*/ +std::string godel_module::mod_path_gen(const std::string& filename) const { + std::string res = ""; + size_t last = 0, pos = filename.find(".", 0); + while(pos!=std::string::npos) { + res += filename.substr(last, pos - last) + "::"; + last = pos + 1; + pos = filename.find(".", last); + } + if (last!=filename.length()) { + res += filename.substr(last, pos - last); + } + return res; +} + +/** + * @brief split path by '/' or '\\' + * @param p +*/ +std::vector godel_module::pathvec(const fs::path& p) const { + std::vector res; + for (const auto& i : fs::canonical(p)) { + res.push_back(i); + } + return res; +} + +/** + * @brief insert a format module path into package table + * @param pack package root directory path + * @param fp file canonical path + */ +void godel_module::insert(const std::vector& pack, + const fs::path& file_path) { + auto file_path_vec = pathvec(file_path); + std::string mod_path = ""; + + // use i = pack.size() to skip path root before + for(size_t i = pack.size(); i conflicts:\n --> " + + modules.at(mod_path) + "\n --> " + file_path.string() + ); + return; + } + + if (check_path_has_reserve_word(mod_path) || + check_path_has_invalid_character(mod_path)) { + invalid_path.push_back(file_path); + return; + } + + if (check_path_is_sub_path(file_path, mod_path)) { + return; + } + + // store module path, module name as the key, and value is the file path + modules[mod_path] = file_path; + convert_table[file_path] = mod_path; + analysed[mod_path] = module_status::unused; +} + +void godel_module::ignored_path_report() const { + if (error_path.size()) { + std::string info = "ignore package path "; + info += "including \".\", \"-\" or numbers:"; + for(auto& i : error_path) { + info += "\n - " + i.string(); + } + err.warn(info); + } + + if (invalid_path.size()) { + std::string info = "ignore package path "; + info += "including reserved words or invalid characters:"; + for(auto& i : invalid_path) { + info += "\n - " + i.string(); + } + err.warn(info); + } + + if (conflict_path.size()) { + std::string info = "ignore package path "; + info += "including other path inside or is the sub path:"; + for(const auto& i : conflict_path) { + info += "\n - " + i.file_path.string(); + info += " => [" + i.module_full_name + "]"; + info += " X [" + i.conflict_module_full_name + "]"; + } + err.warn(info); + } +} + +bool godel_module::check_path_is_sub_path(const fs::path& file_path, + const std::string& module_full_name) { + for(const auto& p : modules) { + if (p.first==module_full_name) { + continue; + } + // sub-path check, not just simple prefix check, for example: + // coref::java is not the sub-path of coref::javascript + // ``````````` ```````````^^^^^^ not just prefix + // coref::java is the sub-path of coref::java::script + // ``````````` ```````````^^ + // path is separated by "::", so simple prefix check is not enough. + if (p.first.find(module_full_name)==0 && + p.first.length()>module_full_name.length() && + p.first[module_full_name.length()]==':') { + conflict_path.push_back({ + .file_path = file_path, + .module_full_name = module_full_name, + .conflict_module_full_name = p.first + }); + return true; + } + if (module_full_name.find(p.first)==0 && + module_full_name.length()>p.first.length() && + module_full_name[p.first.length()]==':') { + conflict_path.push_back({ + .file_path = file_path, + .module_full_name = module_full_name, + .conflict_module_full_name = p.first + }); + return true; + } + } + return false; +} + +bool godel_module::check_path_has_reserve_word(const std::string& path) const { + const std::unordered_set reserved = { + "int", "bool", "string", "float", "true", "false", "use", + "Self", "self", "enum", "schema", "extends", "database", + "impl", "fn", "for", "let", "as", "query", "from", "where", "select", + "if", "else", "match", "return", "in", "yield" + }; + size_t last = 0, pos = path.find("::", 0); + std::string tmp = ""; + while(pos!=std::string::npos) { + tmp = path.substr(last, pos - last); + last = pos + 2; + pos = path.find("::", last); + if (reserved.count(tmp)) { + return true; + } + } + if (last!=path.length()) { + tmp = path.substr(last, pos - last); + if (reserved.count(tmp)) { + return true; + } + } + return false; +} + +bool godel_module::check_path_has_invalid_character(const std::string& path) const { + const auto invalid_char = "[](){}!~`'\"?-+=*&^%$#@!|\\;:,.<>/"; + size_t last = 0, pos = path.find("::", 0); + std::string tmp = ""; + while(pos!=std::string::npos) { + tmp = path.substr(last, pos - last); + last = pos + 2; + pos = path.find("::", last); + if (tmp.find_first_of(invalid_char)!=std::string::npos) { + return true; + } + } + if (last!=path.length()) { + tmp = path.substr(last, pos - last); + if (tmp.find_first_of(invalid_char)!=std::string::npos) { + return true; + } + } + return false; +} + +[[nodiscard]] const error& godel_module::scanpkg(const std::string& directory) { + if (scanned) { + return err; + } + + scanned = true; + error_path.clear(); + invalid_path.clear(); + conflict_path.clear(); + + if (!fs::exists(directory)) { + err.err("package root <" + directory + "> does not exist."); + return err; + } + if (!fs::is_directory(directory)) { + err.err("package root <" + directory + "> is not a directory."); + return err; + } + + auto package_root = pathvec(directory); + auto canonical_package_root = fs::canonical(directory); + std::vector maybe_used_lib_file; + for(const auto& entry : fs::recursive_directory_iterator(canonical_package_root)) { + if (fs::is_directory(entry) || entry.path().extension()!=".gdl") { + continue; + } + maybe_used_lib_file.push_back(entry.path()); + } + std::sort( + maybe_used_lib_file.begin(), + maybe_used_lib_file.end(), + [](const fs::path& lhs, const fs::path& rhs) { + if (lhs.string().length()==rhs.string().length()) { + return lhs.string() < rhs.string(); + } + return lhs.string().length() < rhs.string().length(); + } + ); + for(const auto& i : maybe_used_lib_file) { + insert(package_root, i); + } + + // generate module tree structure + for(const auto& i : modules) { + tree.insert(i.first); + } + return err; +} + +void godel_module::dump() const { + ignored_path_report(); + + std::vector ordered_module_paths; + size_t maxlen = 0; + for(const auto& i : modules) { + ordered_module_paths.push_back(i.first); + maxlen = i.first.length()>maxlen? i.first.length():maxlen; + } + maxlen = maxlen>16? 16:maxlen; + std::sort( + ordered_module_paths.begin(), + ordered_module_paths.end(), + [](const std::string& lhs, const std::string& rhs) { + if (lhs.length() == rhs.length()) { + return lhs < rhs; + } + return lhs.length() < rhs.length(); + } + ); + + if (ordered_module_paths.empty()) { + return; + } + std::clog << "\nmodules:\n"; + for(const auto& i : ordered_module_paths) { + std::clog << " " << util::rightpad(i, maxlen); + std::clog << " => " << modules.at(i) << "\n"; + } + std::clog << "\n"; +} + +void godel_module::dump_module_tree() { + if (tree.empty()) { + return; + } + + std::clog << "module tree structure:\n"; + tree.dump(); + std::clog << "\n"; +} + +const std::string& godel_module::find_file_by_module_path(const std::string& mod_name) const { + if (modules.count(mod_name)) { + return modules.at(mod_name); + } + static const std::string null_return_string = ""; + return null_return_string; +} + +const std::string& godel_module::find_module_by_file(const std::string& file_path) const { + static const std::string null_return_string = ""; + // sometimes the compiled file is in the package directory + // but we could not recognize it as a module, so the module path should be empty + if (file_path == compiled_file_path) { + return null_return_string; + } + if (convert_table.count(file_path)) { + return convert_table.at(file_path); + } + return null_return_string; +} + +void godel_module::mark_analysing(const std::string& path) { + if (analysed.count(path)) { + analysed.at(path) = module_status::analysing; + } +} + +void godel_module::mark_analysed(const std::string& path) { + if (analysed.count(path)) { + analysed.at(path) = module_status::analysed; + } +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/package/package.h b/godel-script/godel-frontend/src/package/package.h new file mode 100644 index 00000000..cc59c698 --- /dev/null +++ b/godel-script/godel-frontend/src/package/package.h @@ -0,0 +1,101 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" +#include "module_tree.h" + +#include +#include +#include +#include +#include + +namespace package { + +namespace fs = std::filesystem; + +struct conflict_path_info { + fs::path file_path; + std::string module_full_name; + std::string conflict_module_full_name; +}; + +class godel_module { +public: + enum class module_status { + unused, + analysing, + analysed + }; + +private: + bool scanned = false; + report::error err; + + // path of compiled file, this is not included in the module + std::string compiled_file_path; + + // format path map to real file path + std::unordered_map modules; + + // real file path to format path + std::unordered_map convert_table; + + // mark one file is analysed before, to avoid loop-reference + std::unordered_map analysed; + + // collect conflict path in this vector for error report + std::vector error_path; + std::vector invalid_path; + std::vector conflict_path; + + // store the module path tree + module_tree tree; + +private: + std::string mod_path_gen(const std::string&) const; + std::vector pathvec(const fs::path&) const; + void insert(const std::vector&, const fs::path&); + void ignored_path_report() const; + + // check if module path is sub path + bool check_path_is_sub_path(const fs::path&, const std::string&); + // check if module path including reserved words + bool check_path_has_reserve_word(const std::string&) const; + // check if module path including invalid characters + bool check_path_has_invalid_character(const std::string&) const; + +public: + // singleton + static godel_module* instance() { + static godel_module package_manager; + return &package_manager; + } + + [[nodiscard]] const report::error& scanpkg(const std::string&); + + // dump package mapping + void dump() const; + + // dump module structure in tree mode + void dump_module_tree(); + + // module path -> file path + const std::string& find_file_by_module_path(const std::string&) const; + + // file path -> module path + const std::string& find_module_by_file(const std::string&) const; + +public: + void mark_analysing(const std::string&); + void mark_analysed(const std::string&); + void set_compiled_file_path(const std::string& path) { + compiled_file_path = path; + } + module_status get_status(const std::string& path) const { + return analysed.count(path)? analysed.at(path):module_status::unused; + } + const auto& get_tree() const { return tree; } + const auto& get_all_module_status() const { return analysed; } +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/parse.cpp b/godel-script/godel-frontend/src/parse.cpp new file mode 100644 index 00000000..705127e6 --- /dev/null +++ b/godel-script/godel-frontend/src/parse.cpp @@ -0,0 +1,1213 @@ +#include "parse.h" +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/util/util.h" + +#include "ast/ast_root.h" +#include "ast/decl.h" +#include "ast/stmt.h" +#include "ast/expr.h" + +#include + +namespace godel { + +void parse::next() { + if(toks[ptr].type==tok::tok_eof) { + return; + } + ++ptr; +} + +void parse::match(tok token_type, + const std::string& process = "", + std::unordered_set panic = {}) { + auto head = process.length()? "in " + process + ": ":""; + // match eof with expected '}' + if (toks[ptr].type==tok::tok_eof && token_type==tok::tok_rbrace) { + err.err(toks[ptr].location, + head + "expected \"}\" here but get .", + "check redundant \"{\" before." + ); + return; + } + // match eof + if (toks[ptr].type==tok::tok_eof) { + err.err(toks[ptr].location, + head + "expected " + err_info.at(token_type) + + " here but get .", + "check syntax error before." + ); + return; + } + // fail to match, use panic mode to do error recovery + if (toks[ptr].type!=token_type) { + err.err(toks[ptr].location, + head + "expected " + err_info.at(token_type) + " here." + ); + panic_mode(panic); + return; + } + next(); +} + +void parse::panic_mode(std::unordered_set panic) { + // make sure it skips at least one token + next(); + while (!panic.count(toks[ptr].type) && toks[ptr].type!=tok::tok_eof) { + next(); + } +} + +void parse::update_location(ast_node* node) { + if (!ptr) { + return; + } + node->update_location(toks[ptr-1].location); +} + +bool parse::lookahead(tok t) const { + return toks[ptr].type==t; +} + +bool parse::lookahead_elsif() const { + return toks[ptr].type==tok::tok_else && toks[ptr+1].type==tok::tok_if; +} + +bool parse::lookahead_generic() const { + // check < + if (toks[ptr].type!=tok::tok_less) { + return false; + } + + // check type def + auto index = ptr+1; + // type def may begin with "*", check it, though it is wrong + if (toks[index].type==tok::tok_mult) { + index++; + } + + bool should_be_identifier = true; + while(toks[index].type!=tok::tok_grt && toks[index].type!=tok::tok_eof) { + if (should_be_identifier && toks[index].type!=tok::tok_id) { + return false; + } + if (!should_be_identifier && toks[index].type!=tok::tok_plink) { + return false; + } + should_be_identifier = !should_be_identifier; + index++; + } + + if (toks[index].type!=tok::tok_grt) { + return false; + } + return true; +} + +ast_null* parse::null() { + return new ast_null(toks[ptr].location); +} + +identifier* parse::id() { + identifier* node = new identifier(toks[ptr].location, toks[ptr].content); + match(tok::tok_id); + return node; +} + +number_literal* parse::num() { + number_literal* node = nullptr; + if (toks[ptr].content.find('.')!=std::string::npos) { + node = new number_literal(toks[ptr].location, util::to_float(toks[ptr].content)); + } else { + node = new number_literal(toks[ptr].location, util::to_int64(toks[ptr].content)); + } + match(tok::tok_num); + return node; +} + +string_literal* parse::str() { + string_literal* node = new string_literal(toks[ptr].location, toks[ptr].content); + match(tok::tok_str); + return node; +} + +type_def* parse::type() { + type_def* node = new type_def(toks[ptr].location); + node->set_dataset_flag(lookahead(tok::tok_mult)); + + // mark a set type + if (lookahead(tok::tok_mult)) { + match(tok::tok_mult); // * + } + + node->add_path(toks[ptr].content); + match(tok::tok_id, "variable type definition"); + + // mark full path symbol + while(lookahead(tok::tok_plink)) { + match(tok::tok_plink); + node->add_path(toks[ptr].content); + match(tok::tok_id, "variable type definition"); + } + update_location(node); + return node; +} + +annotation* parse::annot() { + if (!lookahead(tok::tok_anno)) { + return new annotation(toks[ptr].location, ""); + } + annotation* node = new annotation(toks[ptr].location, toks[ptr].content); + match(tok::tok_anno); + if (!lookahead(tok::tok_lcurve)) { + update_location(node); + return node; + } + + match(tok::tok_lcurve); + + if (lookahead(tok::tok_str)) { + // property string set here must has length > 2 + // because string literal is quoted by "" + node->set_property_string(toks[ptr].content); + match(tok::tok_str, "annotation property string"); + } else { + while (lookahead(tok::tok_id)) { + const auto name = toks[ptr].content; + match(tok::tok_id, "annotation properties"); + match(tok::tok_equal); + const auto value = toks[ptr].content; + match(tok::tok_str, "annotation properties"); + + node->add_property(name, value); + + if (lookahead(tok::tok_comma)) { + match(tok::tok_comma); + } else if (lookahead(tok::tok_id)) { + err.err(toks[ptr-1].location, "expected \",\" here."); + } + } + } + + match(tok::tok_rcurve, "annotation properties"); + update_location(node); + return node; +} + +multi_use_stmt* parse::multi_use() { + multi_use_stmt* node = new multi_use_stmt(toks[ptr].location); + match(tok::tok_lbrace); + while (lookahead(tok::tok_id)) { + node->add_import_symbol(id()); + if (lookahead(tok::tok_comma)) { + match(tok::tok_comma); + } else if (lookahead(tok::tok_id)) { + err.err(toks[ptr-1].location, "expected \",\" here."); + } + } + match(tok::tok_rbrace, "multiple use statement", { + tok::tok_use, tok::tok_schema, + tok::tok_enum, tok::tok_fn, + tok::tok_database, tok::tok_impl + }); + update_location(node); + return node; +} + +void parse::gen_enum_members(enum_decl* node) { + while (lookahead(tok::tok_id)) { + node->add_member(id()); + if (lookahead(tok::tok_comma)) { + match(tok::tok_comma); + } else if (lookahead(tok::tok_id)) { + err.err(toks[ptr-1].location, "expected \",\" here."); + } + } + if (node->get_member().size()) { + update_location(node); + } + return; +} + +void parse::gen_schema_members(schema_decl* node) { + while (lookahead(tok::tok_id) || lookahead(tok::tok_anno)) { + node->add_field(gen_schema_field()); + if (lookahead(tok::tok_comma)) { + match(tok::tok_comma); + } else if (lookahead(tok::tok_id) || lookahead(tok::tok_anno)) { + err.err(toks[ptr-1].location, "expected \",\" here."); + } + } + if (node->get_fields().size()) { + update_location(node); + } + return; +} + +schema_field* parse::gen_schema_field() { + schema_field* node= new schema_field(toks[ptr].location); + node->set_annotation(annot()); + node->set_identifier(id()); + match(tok::tok_colon, "schema field"); + node->set_field_type(type()); + update_location(node); + return node; +} + +void parse::database_tables(database_decl* node) { + while (lookahead(tok::tok_id)) { + node->add_table(gen_database_table()); + if (lookahead(tok::tok_comma)) { + match(tok::tok_comma); + } else if (lookahead(tok::tok_id)) { + err.err(toks[ptr-1].location, "expected \",\" here."); + } + } + if (node->get_tables().size()) { + update_location(node); + } + return; +} + +database_table* parse::gen_database_table() { + database_table* node = new database_table(toks[ptr].location); + node->set_name(id()); + match(tok::tok_colon); + node->set_type(type()); + if (lookahead(tok::tok_as)) { + match(tok::tok_as); + if (!lookahead(tok::tok_str)) { + err.err(toks[ptr-1].location, "operator \"as\" needs string literal."); + return node; + } + node->set_real_name(str()); + } + update_location(node); + return node; +} + +std::vector parse::func_list() { + std::vector res; + while (lookahead(tok::tok_fn) || + lookahead(tok::tok_anno) || + lookahead(tok::tok_pub)) { + res.push_back(function()); + if (lookahead(tok::tok_eof)) { + break; + } + } + return res; +} + +std::vector parse::parameter_list() { + std::vector res; + while (lookahead(tok::tok_id)) { + res.push_back(parameter()); + if (lookahead(tok::tok_comma)) { + match(tok::tok_comma); + } else if (lookahead(tok::tok_id)) { + err.err(toks[ptr-1].location, "expected \",\" here."); + } + } + return res; +} + +var_decl* parse::parameter() { + var_decl* node = new var_decl(toks[ptr].location); + node->set_var_name(id()); + if (lookahead(tok::tok_colon)) { + match(tok::tok_colon); + node->set_type(type()); + } + update_location(node); + return node; +} + +block_stmt* parse::block() { + block_stmt* node = new block_stmt(toks[ptr].location); + while(!lookahead(tok::tok_rbrace)) { + node->add_statement(statement()); + if (lookahead(tok::tok_semi)) { + next(); + } + if (lookahead(tok::tok_eof)) { + break; + } + } + update_location(node); + return node; +} + +stmt* parse::statement() { + switch (toks[ptr].type) { + case tok::tok_let: return gen_let_stmt(); + case tok::tok_if: return gen_cond_stmt(); + case tok::tok_for: return gen_for_stmt(); + case tok::tok_match: return gen_match_stmt(); + case tok::tok_lbrkt: return gen_fact_stmt(); + case tok::tok_ret: return gen_ret_stmt(); + case tok::tok_yield: return gen_ret_stmt(); + default: { + const auto& loc = toks[ptr].location; + auto ex = or_expr(); + auto res = new in_block_expr(loc, ex); + update_location(res); + return res; + } + } + // unreachable + err.err(toks[ptr].location, + "invalid token " + err_info.at(toks[ptr].type) + "." + ); + panic_mode({ + tok::tok_let, tok::tok_if, tok::tok_match, + tok::tok_ret, tok::tok_yield + }); + return (stmt*)null(); +} + +let_stmt* parse::gen_let_stmt() { + let_stmt* node = new let_stmt(toks[ptr].location); + + match(tok::tok_let); + match(tok::tok_lcurve); + node->add_symbol(let_def()); + while(lookahead(tok::tok_comma)) { + match(tok::tok_comma); + node->add_symbol(let_def()); + } + match(tok::tok_rcurve); + match(tok::tok_lbrace); + if (!lookahead(tok::tok_rbrace)) { + node->set_code_block(block()); + } + match(tok::tok_rbrace); + update_location(node); + return node; +} + +var_decl* parse::let_def() { + var_decl* node = def(); + if (lookahead(tok::tok_in)) { + err.err(toks[ptr].location, + "in let definition: \"in\" is not allowed here.", + "maybe use \"=\"?"); + match(tok::tok_in); + } else if (lookahead(tok::tok_equal)) { + match(tok::tok_equal); + } else { + err.err(toks[ptr].location, + "in let definition: \"" + toks[ptr].content + + "\" is not allowed here.", + "maybe use \"=\"?" + ); + match(toks[ptr].type); + } + node->set_init_value(or_expr()); + update_location(node); + return node; +} + +cond_stmt* parse::gen_cond_stmt() { + cond_stmt* node = new cond_stmt(toks[ptr].location); + + node->set_if_stmt(gen_if_stmt()); + while (lookahead_elsif()) { + node->add_elsif_stmt(gen_elsif_stmt()); + if (lookahead(tok::tok_eof)) { + break; + } + } + if (lookahead(tok::tok_else)) { + node->set_else_stmt(gen_else_stmt()); + } + update_location(node); + return node; +} + +for_stmt* parse::gen_for_stmt() { + for_stmt* node = new for_stmt(toks[ptr].location); + + match(tok::tok_for); + match(tok::tok_lcurve, "for statement"); + node->add_symbol(for_def()); + while(lookahead(tok::tok_comma)) { + match(tok::tok_comma); + node->add_symbol(for_def()); + } + match(tok::tok_rcurve, "for statement"); + match(tok::tok_lbrace, "for statement"); + if (!lookahead(tok::tok_rbrace)) { + node->set_code_block(block()); + } + match(tok::tok_rbrace, "for statement"); + update_location(node); + return node; +} + +var_decl* parse::for_def() { + var_decl* node = def(); + if (lookahead(tok::tok_equal)) { + err.err(toks[ptr].location, + "in for definition: \"=\" is not allowed here.", + "maybe use \"in\"?" + ); + match(tok::tok_equal); + } else if (lookahead(tok::tok_in)) { + match(tok::tok_in); + } else { + err.err(toks[ptr].location, + "in for definition: \"" + toks[ptr].content + + "\" is not allowed here.", + "maybe use \"in\"?" + ); + match(toks[ptr].type); + } + node->set_init_value(or_expr()); + update_location(node); + return node; +} + +match_stmt* parse::gen_match_stmt() { + match_stmt* node = new match_stmt(toks[ptr].location); + + match(tok::tok_match, "match statement"); + match(tok::tok_lcurve, "match statement"); + node->set_match_condition(or_expr()); + match(tok::tok_rcurve, "match statement"); + match(tok::tok_lbrace, "match statement"); + match_pairs(node); + match(tok::tok_rbrace, "match statement"); + update_location(node); + return node; +} + +fact_stmt* parse::gen_fact_stmt() { + fact_stmt* node = new fact_stmt(toks[ptr].location); + + match(tok::tok_lbrkt, "fact statement"); + while(lookahead(tok::tok_lbrace)) { + node->add_fact(gen_fact_data()); + if (lookahead(tok::tok_comma)) { + match(tok::tok_comma); + } else if (lookahead(tok::tok_lbrace)) { + err.err(toks[ptr-1].location, "expected \",\" here."); + } + } + match(tok::tok_rbrkt, "fact statement"); + update_location(node); + return node; +} + +ret_stmt* parse::gen_ret_stmt() { + ret_stmt* node = new ret_stmt(toks[ptr].location); + + if (lookahead(tok::tok_ret)) { + match(tok::tok_ret); + } else { + match(tok::tok_yield); + node->set_is_yield(); + } + node->set_return_value(or_expr()); + update_location(node); + return node; +} + +var_decl* parse::def() { + var_decl* node = new var_decl(toks[ptr].location); + + node->set_var_name(id()); + if (lookahead(tok::tok_colon)) { + match(tok::tok_colon); + node->set_type(type()); + } + update_location(node); + return node; +} + +if_stmt* parse::gen_if_stmt() { + if_stmt* node = new if_stmt(toks[ptr].location, if_stmt::type::cond_if); + + match(tok::tok_if, "if statement"); + match(tok::tok_lcurve, "if statement"); + node->set_condition(or_expr()); + match(tok::tok_rcurve, "if statement"); + match(tok::tok_lbrace, "if statement"); + if (!lookahead(tok::tok_rbrace)) { + node->set_code_block(block()); + } + match(tok::tok_rbrace, "if statement"); + update_location(node); + return node; +} + +if_stmt* parse::gen_elsif_stmt() { + if_stmt* node = new if_stmt(toks[ptr].location, if_stmt::type::cond_elsif); + + match(tok::tok_else, "else if statement"); + match(tok::tok_if, "else if statement"); + match(tok::tok_lcurve, "else if statement"); + node->set_condition(or_expr()); + match(tok::tok_rcurve, "else if statement"); + match(tok::tok_lbrace, "else if statement"); + if (!lookahead(tok::tok_rbrace)) { + node->set_code_block(block()); + } + match(tok::tok_rbrace, "else if statement"); + update_location(node); + return node; +} + +if_stmt* parse::gen_else_stmt() { + if_stmt* node = new if_stmt(toks[ptr].location, if_stmt::type::cond_else); + + match(tok::tok_else, "else statement"); + match(tok::tok_lbrace, "else statement"); + if (!lookahead(tok::tok_rbrace)) { + node->set_code_block(block()); + } + match(tok::tok_rbrace, "else statement"); + update_location(node); + return node; +} + +void parse::match_pairs(match_stmt* node) { + while (lookahead(tok::tok_num) || lookahead(tok::tok_str)) { + node->add_match_pair(gen_match_pair()); + if (lookahead(tok::tok_comma)) { + match(tok::tok_comma); + } else if (lookahead(tok::tok_num) || lookahead(tok::tok_str)) { + err.err(toks[ptr-1].location, "expected \",\" here."); + } + } + if (node->get_match_pair_list().size()) { + update_location(node); + } +} + +match_pair* parse::gen_match_pair() { + match_pair* node = new match_pair(toks[ptr].location); + + node->set_literal(literal()); + match(tok::tok_marrow, "match pair", { + tok::tok_id, tok::tok_num, + tok::tok_str, tok::tok_sub, + tok::tok_lcurve, tok::tok_not + }); + node->set_statement(statement()); + update_location(node); + return node; +} + +fact_data* parse::gen_fact_data() { + fact_data* node = new fact_data(toks[ptr].location); + + match(tok::tok_lbrace, "fact data"); + while(lookahead(tok::tok_num) || lookahead(tok::tok_str)) { + node->add_literal(lookahead(tok::tok_num)? (expr*)num():(expr*)str()); + if (lookahead(tok::tok_comma)) { + match(tok::tok_comma); + } else if (lookahead(tok::tok_num) || lookahead(tok::tok_str)) { + err.err(toks[ptr-1].location, "expected \",\" here."); + } + } + match(tok::tok_rbrace, "fact data"); + update_location(node); + return node; +} + +binary_operator* parse::or_expr() { + binary_operator* node = and_expr(); + if (!lookahead(tok::tok_or)) { + return node; + } + while (lookahead(tok::tok_or)) { + binary_operator* tmp = new binary_operator(toks[ptr].location); + tmp->set_operator(binary_operator::type::logical_or); + match(tok::tok_or); + tmp->update_location(node->get_location()); + tmp->set_left(node); + tmp->set_right(and_expr()); + node = tmp; + if (lookahead(tok::tok_eof)) { + break; + } + } + update_location(node); + return node; +} + +binary_operator* parse::and_expr() { + binary_operator* node = nullptr; + + if (lookahead(tok::tok_not)) { + node = (binary_operator*)not_expr(); + } else { + node = cmp_expr(); + } + if (!lookahead(tok::tok_and)) { + return node; + } + while (lookahead(tok::tok_and)) { + binary_operator* tmp = new binary_operator(toks[ptr].location); + tmp->set_operator(binary_operator::type::logical_and); + tmp->update_location(node->get_location()); + tmp->set_left(node); + match(tok::tok_and); + if (lookahead(tok::tok_not)) { + tmp->set_right(not_expr()); + } else { + tmp->set_right(cmp_expr()); + } + node = tmp; + if (lookahead(tok::tok_eof)) { + break; + } + } + update_location(node); + return node; +} + +expr* parse::curved_expr() { + match(tok::tok_lcurve, "curve quoted expression"); + expr* node = or_expr(); + match(tok::tok_rcurve, "curve quoted expression"); + update_location(node); + return node; +} + +unary_operator* parse::neg_expr() { + unary_operator* node = new unary_operator(toks[ptr].location); + node->set_operator(unary_operator::type::arithmetic_negation); + + match(tok::tok_sub); + if (lookahead(tok::tok_id) || lookahead(tok::tok_num) || + lookahead(tok::tok_str) || lookahead(tok::tok_lcurve)) { + node->set_child(symcall()); + } else if (lookahead(tok::tok_sub)) { + node->set_child(neg_expr()); + } else { + err.err(toks[ptr].location, "expected expressions here."); + panic_mode({ + tok::tok_mult, tok::tok_div, + tok::tok_add, tok::tok_sub, + tok::tok_rcurve + }); + } + update_location(node); + return node; +} + +unary_operator* parse::not_expr() { + unary_operator* node = new unary_operator(toks[ptr].location); + node->set_operator(unary_operator::type::logical_negation); + + match(tok::tok_not); + node->set_child(cmp_expr()); + update_location(node); + return node; +} + +binary_operator* parse::cmp_expr() { + binary_operator* node = additive_expr(); + while (lookahead(tok::tok_equal) || + lookahead(tok::tok_neq) || + lookahead(tok::tok_less) || + lookahead(tok::tok_leq) || + lookahead(tok::tok_grt) || + lookahead(tok::tok_geq) || + lookahead(tok::tok_in) + ) { + binary_operator* tmp = new binary_operator(toks[ptr].location); + switch (toks[ptr].type) { + case tok::tok_equal: + tmp->set_operator(binary_operator::type::compare_equal); + break; + case tok::tok_neq: + tmp->set_operator(binary_operator::type::compare_not_equal); + break; + case tok::tok_less: + tmp->set_operator(binary_operator::type::compare_less); + break; + case tok::tok_leq: + tmp->set_operator(binary_operator::type::compare_less_equal); + break; + case tok::tok_grt: + tmp->set_operator(binary_operator::type::compare_great); + break; + case tok::tok_geq: + tmp->set_operator(binary_operator::type::compare_great_equal); + break; + case tok::tok_in: + tmp->set_operator(binary_operator::type::in); + break; + default: break; + } + tmp->update_location(node->get_location()); + tmp->set_left(node); + match(toks[ptr].type); + tmp->set_right(additive_expr()); + node = tmp; + } + update_location(node); + return node; +} + +binary_operator* parse::additive_expr() { + binary_operator* node = multiple_expr(); + while (lookahead(tok::tok_add) || lookahead(tok::tok_sub)) { + binary_operator* tmp = new binary_operator(toks[ptr].location); + if (lookahead(tok::tok_add)) { + tmp->set_operator(binary_operator::type::add); + } else { + tmp->set_operator(binary_operator::type::sub); + } + match(toks[ptr].type); + tmp->update_location(node->get_location()); + tmp->set_left(node); + tmp->set_right(multiple_expr()); + node = tmp; + if (lookahead(tok::tok_eof)) { + break; + } + } + update_location(node); + return node; +} + +binary_operator* parse::multiple_expr() { + binary_operator* node = new binary_operator(toks[ptr].location); + + if (lookahead(tok::tok_id) || lookahead(tok::tok_num) || + lookahead(tok::tok_true) || lookahead(tok::tok_false) || + lookahead(tok::tok_str) || lookahead(tok::tok_lcurve)) { + node = (binary_operator*)symcall(); + } else if (lookahead(tok::tok_sub)) { + node = (binary_operator*)neg_expr(); + } else { + err.err(toks[ptr].location, "expected expression here."); + panic_mode({tok::tok_id, tok::tok_num, tok::tok_str, tok::tok_sub}); + return node; + } + while (lookahead(tok::tok_mult) || lookahead(tok::tok_div)) { + binary_operator* tmp = new binary_operator(toks[ptr].location); + if (lookahead(tok::tok_mult)) { + tmp->set_operator(binary_operator::type::mult); + } else { + tmp->set_operator(binary_operator::type::div); + } + match(toks[ptr].type); + tmp->update_location(node->get_location()); + tmp->set_left(node); + if (lookahead(tok::tok_id) || lookahead(tok::tok_num) || + lookahead(tok::tok_str) || lookahead(tok::tok_lcurve)) { + tmp->set_right(symcall()); + } else if (lookahead(tok::tok_sub)) { + tmp->set_right(neg_expr()); + } + node = tmp; + if (lookahead(tok::tok_eof)) { + break; + } + } + update_location(node); + return node; +} + +call_root* parse::symcall() { + call_root* node = new call_root(toks[ptr].location); + + node->set_call_head(symhead()); + while (lookahead(tok::tok_dot) || lookahead(tok::tok_plink)) { + if (lookahead(tok::tok_dot)) { + node->add_call_chain(symfield()); + } else if (lookahead(tok::tok_plink)) { + node->add_call_chain(sympath()); + } + + if (lookahead(tok::tok_eof)) { + break; + } + } + update_location(node); + return node; +} + +call_head* parse::symhead() { + call_head* node = new call_head(toks[ptr].location); + + if (lookahead(tok::tok_num) || lookahead(tok::tok_str) || + lookahead(tok::tok_true) || lookahead(tok::tok_false)) { + node->set_first_expression(literal()); + return node; + } else if (lookahead(tok::tok_lcurve)) { + node->set_first_expression(curved_expr()); + return node; + } else { + node->set_first_expression(id()); + } + + if (lookahead(tok::tok_lcurve)) { + node->set_func_call(funcall()); + } else if (lookahead(tok::tok_lbrace)) { + // initializer must have this format: + // schema_name {identifier : value} + node->set_initializer(gen_initializer()); + } + update_location(node); + return node; +} + +call_expr* parse::symfield() { + call_expr* node = new call_expr(toks[ptr].location); + node->set_call_type(call_expr::type::get_field); + + match(tok::tok_dot); + node->set_field_name(id()); + if (lookahead_generic()) { + match(tok::tok_less); + node->set_generic_type(type()); + match(tok::tok_grt, "get field"); + } + if (lookahead(tok::tok_lcurve)) { + node->set_func_call(funcall()); + } + update_location(node); + return node; +} + +call_expr* parse::sympath() { + call_expr* node = new call_expr(toks[ptr].location); + node->set_call_type(call_expr::type::get_path); + + match(tok::tok_plink); + node->set_field_name(id()); + if (lookahead_generic()) { + match(tok::tok_less); + node->set_generic_type(type()); + match(tok::tok_grt, "get path"); + } + if (lookahead(tok::tok_lcurve)) { + node->set_func_call(funcall()); + } else if (lookahead(tok::tok_lbrace)) { + node->set_initializer(gen_initializer()); + } + update_location(node); + return node; +} + +initializer* parse::gen_initializer() { + initializer* node = new initializer(toks[ptr].location); + + match(tok::tok_lbrace, "initializer"); + while (lookahead(tok::tok_id) || lookahead(tok::tok_ellipsis)) { + if (lookahead(tok::tok_id)) { + node->add_field_pair(gen_initializer_pair()); + } else { + node->add_spread_expr(gen_spread_expr()); + } + if (lookahead(tok::tok_comma)) { + match(tok::tok_comma); + } else if (lookahead(tok::tok_id) || lookahead(tok::tok_ellipsis)) { + err.err(toks[ptr-1].location, "expected \",\" here."); + } + } + match(tok::tok_rbrace, "initializer"); + update_location(node); + return node; +} + +initializer_pair* parse::gen_initializer_pair() { + initializer_pair* node = new initializer_pair(toks[ptr].location); + + node->set_field_name(id()); + match(tok::tok_colon, "initializer pair"); + node->set_field_value(or_expr()); + update_location(node); + return node; +} + +spread_expr* parse::gen_spread_expr() { + const auto& ellipsis_location = toks[ptr].location; + match(tok::tok_ellipsis); + auto node = new spread_expr(ellipsis_location, or_expr()); + update_location(node); + return node; +} + +func_call* parse::funcall() { + func_call* node = new func_call(toks[ptr].location); + match(tok::tok_lcurve, "function call"); + gen_arglist(node); + match(tok::tok_rcurve, "function call", {tok::tok_rcurve}); + update_location(node); + return node; +} + +void parse::gen_arglist(func_call* node) { + if (lookahead(tok::tok_rcurve)) { + return; + } + node->add_argument(or_expr()); + // follow set of '(' + std::unordered_set panics = { + tok::tok_id, tok::tok_num, tok::tok_str, + tok::tok_lcurve, tok::tok_sub + }; + // check if lack comma between arguments + bool skip_comma = (!lookahead(tok::tok_comma) && panics.count(toks[ptr].type)); + while (lookahead(tok::tok_comma) || skip_comma) { + if (lookahead(tok::tok_eof)) { + break; + } + if (skip_comma) { + err.err(toks[ptr-1].location, "expected \",\" here."); + panic_mode(panics); + } else { + match(tok::tok_comma, "argument list", panics); + } + node->add_argument(or_expr()); + skip_comma = (!lookahead(tok::tok_comma) && panics.count(toks[ptr].type)); + } + update_location(node); +} + +expr* parse::literal() { + ast_null* node = null(); + if (lookahead(tok::tok_num)) { + return num(); + } else if (lookahead(tok::tok_str)) { + return str(); + } else if (lookahead(tok::tok_true) || lookahead(tok::tok_false)) { + auto res = new boolean_literal(toks[ptr].location, toks[ptr].content=="true"); + match(toks[ptr].type); + return res; + } else if (lookahead(tok::tok_eof)) { + return node; + } + err.err(toks[ptr].location, "expected literals or calculations here."); + panic_mode({tok::tok_marrow, tok::tok_mult, tok::tok_div}); + return node; +} + +var_decl* parse::query_from_list() { + var_decl* node = new var_decl(toks[ptr].location); + node->set_var_name(id()); + if (lookahead(tok::tok_in)) { + match(tok::tok_in); + node->set_init_value(or_expr()); + } + update_location(node); + return node; +} + +query_column* parse::query_single_column() { + query_column* node = new query_column(toks[ptr].location); + node->set_column_value(or_expr()); + if (lookahead(tok::tok_as)) { + match(tok::tok_as); + node->set_column_name(id()); + } + return node; +} + +function_decl* parse::function() { + function_decl* node = new function_decl(toks[ptr].location); + + // get annotations + while(lookahead(tok::tok_anno)) { + node->add_annotation(annot()); + } + + if (lookahead(tok::tok_pub)) { + node->set_public(); + match(tok::tok_pub); + } + + match(tok::tok_fn, "function declaration"); + node->set_name(id()); + match(tok::tok_lcurve, "function declaration"); + for(auto i : parameter_list()) { + node->add_parameter(i); + } + match(tok::tok_rcurve, "function declaration"); + if (lookahead(tok::tok_arrow)) { + match(tok::tok_arrow); + node->set_return_type(type()); + } + if (lookahead(tok::tok_semi)) { + match(tok::tok_semi); + update_location(node); + return node; + } + if (!lookahead(tok::tok_lbrace)) { + update_location(node); + return node; + } + match(tok::tok_lbrace, "function declaration"); + node->set_code_block(block()); + match(tok::tok_rbrace, "function declaration"); + update_location(node); + return node; +} + +enum_decl* parse::enums() { + enum_decl* node = new enum_decl(toks[ptr].location); + + match(tok::tok_enum, "enum declaration"); + node->set_name(id()); + match(tok::tok_lbrace, "enum declaration"); + gen_enum_members(node); + match(tok::tok_rbrace, "enum declaration"); + update_location(node); + return node; +} + +schema_decl* parse::schema() { + schema_decl* node = new schema_decl(toks[ptr].location); + + match(tok::tok_schema, "schema declaration"); + node->set_name(id()); + if (lookahead(tok::tok_extends)) { + match(tok::tok_extends); + node->set_parent_name(type()); + } + match(tok::tok_lbrace, "schema declaration"); + gen_schema_members(node); + match(tok::tok_rbrace, "schema declaration"); + update_location(node); + return node; +} + +use_stmt* parse::use() { + use_stmt* node = new use_stmt(toks[ptr].location); + + match(tok::tok_use); + node->add_path(id()); + while (lookahead(tok::tok_plink)) { + match(tok::tok_plink); + if (lookahead(tok::tok_id)) { + node->add_path(id()); + } else if (lookahead(tok::tok_mult)) { + match(tok::tok_mult); + node->set_use_all(); + break; + } else if (lookahead(tok::tok_lbrace)) { + node->set_multi_use(multi_use()); + break; + } else { + err.err(toks[ptr].location, "expected identifier or \"*\" here"); + break; + } + } + + // if the last one is single identifier, merge it to a multi_use_stmt + if (!node->is_use_all() && !node->get_multi_use()) { + identifier* back = node->get_path().back(); + node->get_path().pop_back(); + multi_use_stmt* tmp = new multi_use_stmt(toks[ptr].location); + tmp->add_import_symbol(back); + node->set_multi_use(tmp); + } + + update_location(node); + return node; +} + +impl_block* parse::implement() { + impl_block* node = new impl_block(toks[ptr].location); + + match(tok::tok_impl, "impl block"); + node->set_impl_schema_name(id()); + match(tok::tok_lbrace, "impl block"); + for(auto i : func_list()) { + node->add_function(i); + } + match(tok::tok_rbrace, "impl block"); + update_location(node); + return node; +} + +database_decl* parse::database() { + database_decl* node = new database_decl(toks[ptr].location); + match(tok::tok_database, "database declaration"); + node->set_name(id()); + match(tok::tok_lbrace, "database declaration"); + database_tables(node); + match(tok::tok_rbrace, "database declaration"); + update_location(node); + return node; +} + +query_decl* parse::query() { + query_decl* node = new query_decl(toks[ptr].location); + match(tok::tok_query, "query declaration"); + node->set_name(id()); + match(tok::tok_from, "query declaration"); + node->add_var_decl(query_from_list()); + while(lookahead(tok::tok_comma)) { + match(tok::tok_comma); + node->add_var_decl(query_from_list()); + } + if (lookahead(tok::tok_where)) { + match(tok::tok_where); + node->set_condition(or_expr()); + } + match(tok::tok_select, "query declaration"); + node->add_output_column(query_single_column()); + while (lookahead(tok::tok_comma)) { + match(tok::tok_comma); + node->add_output_column(query_single_column()); + } + update_location(node); + return node; +} + +[[nodiscard]] +const error& parse::analyse(const std::vector& tokens) { + ptr = 0; + toks = tokens.data(); + root->set_start_line(toks[ptr].location.start_line); + root->set_start_column(toks[ptr].location.start_column); + root->set_end_line(toks[ptr].location.end_line); + root->set_end_column(toks[ptr].location.end_column); + root->set_file(toks[ptr].location.file); + + while(lookahead(tok::tok_use)) { + root->add_use_statement(use()); + } + + while(!lookahead(tok::tok_eof)) { + switch (toks[ptr].type) { + case tok::tok_anno: + case tok::tok_pub: + case tok::tok_fn: root->add_declaration(function()); break; + case tok::tok_enum: root->add_declaration(enums()); break; + case tok::tok_schema: root->add_declaration(schema()); break; + case tok::tok_impl: root->add_declaration(implement()); break; + case tok::tok_database: root->add_declaration(database()); break; + case tok::tok_query: root->add_declaration(query()); break; + case tok::tok_use: + err.err(toks[ptr].location, "cannot import module here."); + panic_mode({ + tok::tok_fn, tok::tok_enum, tok::tok_schema, + tok::tok_use, tok::tok_impl, tok::tok_database, + tok::tok_query + }); + break; + default: + err.err(toks[ptr].location, + "invalid token " + err_info.at(toks[ptr].type) + "." + ); + panic_mode({ + tok::tok_fn, tok::tok_enum, tok::tok_schema, + tok::tok_use, tok::tok_impl, + tok::tok_database, tok::tok_query + }); + break; + } + } + return err; +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/parse.h b/godel-script/godel-frontend/src/parse.h new file mode 100644 index 00000000..7bdf7888 --- /dev/null +++ b/godel-script/godel-frontend/src/parse.h @@ -0,0 +1,173 @@ +#pragma once + +#include "lexer.h" +#include "godel-frontend/src/error/error.h" +#include "ast/ast_node.h" +#include "ast/ast_root.h" +#include "ast/decl.h" +#include "ast/stmt.h" +#include "ast/expr.h" + +#include +#include +#include +#include + +namespace godel { + +using report::error; + +class parse { +private: + uint32_t ptr; + error& err; + const token* toks; + ast_root* root; + + const std::unordered_map err_info = { + {tok::tok_err, "" }, + {tok::tok_use, "\"use\"" }, + {tok::tok_pub, "\"pub\"" }, + {tok::tok_schema, "\"schema\"" }, + {tok::tok_extends, "\"extends\"" }, + {tok::tok_database, "\"database\"" }, + {tok::tok_fn, "\"fn\"" }, + {tok::tok_enum, "\"enum\"" }, + {tok::tok_impl, "\"impl\"" }, + {tok::tok_for, "\"for\"" }, + {tok::tok_let, "\"let\"" }, + {tok::tok_if, "\"if\"" }, + {tok::tok_else, "\"else\"" }, + {tok::tok_true, "\"true\"" }, + {tok::tok_false, "\"false\"" }, + {tok::tok_match, "\"match\"" }, + {tok::tok_yield, "\"yield\"" }, + {tok::tok_ret, "\"return\"" }, + {tok::tok_query, "\"query\"" }, + {tok::tok_from, "\"from\"" }, + {tok::tok_where, "\"where\"" }, + {tok::tok_select, "\"select\"" }, + {tok::tok_id, "identifier" }, + {tok::tok_num, "\"number\"" }, + {tok::tok_str, "\"string\"" }, + {tok::tok_anno, "\"annotation\""}, + {tok::tok_in, "\"in\"" }, + {tok::tok_as, "\"as\"" }, + {tok::tok_lcurve, "\"(\"" }, + {tok::tok_rcurve, "\")\"" }, + {tok::tok_lbrkt, "\"[\"" }, + {tok::tok_rbrkt, "\"]\"" }, + {tok::tok_lbrace, "\"{\"" }, + {tok::tok_rbrace, "\"}\"" }, + {tok::tok_semi, "\";\"" }, + {tok::tok_comma, "\",\"" }, + {tok::tok_colon, "\":\"" }, + {tok::tok_plink, "\"::\"" }, + {tok::tok_and, "\"&&\"" }, + {tok::tok_or, "\"||\"" }, + {tok::tok_not, "\"!\"" }, + {tok::tok_equal, "\"=\"" }, + {tok::tok_add, "\"+\"" }, + {tok::tok_sub, "\"-\"" }, + {tok::tok_mult, "\"*\"" }, + {tok::tok_div, "\"/\"" }, + {tok::tok_grt, "\">\"" }, + {tok::tok_geq, "\">=\"" }, + {tok::tok_less, "\"<\"" }, + {tok::tok_leq, "\"<=\"" }, + {tok::tok_neq, "\"!=\"" }, + {tok::tok_dot, "\".\"" }, + {tok::tok_ellipsis, "\"..\"" }, + {tok::tok_arrow, "\"->\"" }, + {tok::tok_marrow, "\"=>\"" }, + {tok::tok_eof, "" } + }; + +private: + void next(); + void match(tok, const std::string&, std::unordered_set); + void panic_mode(std::unordered_set); + void update_location(ast_node*); + + bool lookahead(tok) const; + bool lookahead_elsif() const; + bool lookahead_generic() const; + + ast_null* null(); + identifier* id(); + number_literal* num(); + string_literal* str(); + type_def* type(); + annotation* annot(); + + multi_use_stmt* multi_use(); + void gen_enum_members(enum_decl*); + void gen_schema_members(schema_decl*); + schema_field* gen_schema_field(); + void database_tables(database_decl*); + database_table* gen_database_table(); + std::vector func_list(); + std::vector parameter_list(); + var_decl* parameter(); + block_stmt* block(); + + stmt* statement(); + let_stmt* gen_let_stmt(); + var_decl* let_def(); + cond_stmt* gen_cond_stmt(); + for_stmt* gen_for_stmt(); + var_decl* for_def(); + match_stmt* gen_match_stmt(); + fact_stmt* gen_fact_stmt(); + ret_stmt* gen_ret_stmt(); + var_decl* def(); + + if_stmt* gen_if_stmt(); + if_stmt* gen_elsif_stmt(); + if_stmt* gen_else_stmt(); + void match_pairs(match_stmt*); + match_pair* gen_match_pair(); + fact_data* gen_fact_data(); + + binary_operator* or_expr(); + binary_operator* and_expr(); + expr* curved_expr(); + unary_operator* neg_expr(); + unary_operator* not_expr(); + binary_operator* cmp_expr(); + binary_operator* additive_expr(); + binary_operator* multiple_expr(); + + call_root* symcall(); + call_head* symhead(); + call_expr* symfield(); + call_expr* sympath(); + initializer* gen_initializer(); + initializer_pair* gen_initializer_pair(); + spread_expr* gen_spread_expr(); + func_call* funcall(); + void gen_arglist(func_call*); + expr* literal(); + var_decl* query_from_list(); + query_column* query_single_column(); + + function_decl* function(); + enum_decl* enums(); + schema_decl* schema(); + use_stmt* use(); + impl_block* implement(); + database_decl* database(); + query_decl* query(); + +public: + parse(error& err_module): + ptr(0), err(err_module), toks(nullptr), + root(new ast_root({0, 0, 0, 0, ""})) {} + ~parse() { delete root; } + auto result() { return root; } + +public: + [[nodiscard]] const error& analyse(const std::vector&); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/annotation_checker.cpp b/godel-script/godel-frontend/src/sema/annotation_checker.cpp new file mode 100644 index 00000000..adc45bdc --- /dev/null +++ b/godel-script/godel-frontend/src/sema/annotation_checker.cpp @@ -0,0 +1,151 @@ +#include "godel-frontend/src/sema/annotation_checker.h" + +namespace godel { + +void input_annotation_checker::check(report::error& err, const annot& annotation) { + if (annotation.property_map.empty()) { + err.err(annotation.location, + "properties \"format\" and \"file\" are needed.", + "expected format: @input(format=\"\", file=\"\")" + ); + return; + } + + if (!annotation.property_map.count("format")) { + err.err(annotation.location, + "property \"format\" is needed.", + "expected format: @input(format=\"\", file=\"\")" + ); + return; + } + if (!annotation.property_map.count("file")) { + err.err(annotation.location, + "property \"file\" is needed.", + "expected format: @input(format=\"\", file=\"\")" + ); + return; + } + + const auto& format = annotation.property_map.at("format"); + if (!supported_formats.count(format)) { + err.err(annotation.location, + "format " + format + " is not supported.", + "support \"json\", \"sqlite\", \"csv\"." + ); + } + + for(const auto& i : annotation.property_map) { + if (i.first != "format" && i.first != "file") { + err.err(annotation.location, + "unknown property \"" + i.first + "\".", + "only support \"format\" and \"file\"." + ); + return; + } + } +} + +void output_annotation_checker::check(report::error& err, const annot& annotation) { + // correct annotation `@output` + if (annotation.property.empty() && annotation.property_map.empty()) { + return; + } + + if (annotation.property.length()) { + err.warn(annotation.location, + "property string is not needed here.", + "ignored." + ); + } + + if (annotation.property_map.empty()) { + return; + } + + if (!annotation.property_map.count("format")) { + err.err(annotation.location, + "property \"format\" is needed.", + "expected format: @output(format=\"\", [file=\"\"])" + ); + return; + } + + const auto& format = annotation.property_map.at("format"); + if (!supported_formats.count(format)) { + err.err(annotation.location, + "format " + format + " is not supported.", + "support \"stdout\", \"json\", \"sqlite\", \"csv\"." + ); + return; + } + + if (format == "\"stdout\"" && annotation.property_map.count("file")) { + err.err(annotation.location, + "property \"file\" is not needed for \"stdout\"." + ); + return; + } + + if (format != "\"stdout\"" && !annotation.property_map.count("file")) { + err.err(annotation.location, + "property \"file\" is needed for non-stdout format.", + "expected format: @output(format=\"\", file=\"\")" + ); + return; + } + + for(const auto& i : annotation.property_map) { + if (i.first != "format" && i.first != "file") { + err.err(annotation.location, + "unknown property \"" + i.first + "\".", + "only support \"format\" and \"file\"." + ); + return; + } + } +} + +annotation_checker* annotation_checker::instance() { + static annotation_checker checker; + return &checker; +} + +bool annotation_checker::is_valid_schema_field_annotation(const std::string& annotation) const { + return schema_field_annotation.count(annotation); +} + +bool annotation_checker::is_valid_function_annotation(const std::string& annotation) const { + return function_annotation.count(annotation) || + is_input_annotation(annotation) || + is_output_annotation(annotation); +} + +bool annotation_checker::is_deprecated_annotation(const std::string& annotation) const { + return deprecated_annotation.count(annotation); +} + +bool annotation_checker::is_deprecated_no_warning(const std::string& annotation) const { + return deprecated_no_warning.count(annotation); +} + +bool annotation_checker::need_property_string(const std::string& annotation) const { + if (function_annotation.count(annotation)) { + return function_annotation.at(annotation)==annot_kind::single_property; + } + if (schema_field_annotation.count(annotation)) { + return schema_field_annotation.at(annotation)==annot_kind::single_property; + } + return false; +} + +bool annotation_checker::need_property_map(const std::string& annotation) const { + if (function_annotation.count(annotation)) { + return function_annotation.at(annotation)==annot_kind::property_map; + } + if (schema_field_annotation.count(annotation)) { + return schema_field_annotation.at(annotation)==annot_kind::property_map; + } + return false; +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/annotation_checker.h b/godel-script/godel-frontend/src/sema/annotation_checker.h new file mode 100644 index 00000000..e1674eca --- /dev/null +++ b/godel-script/godel-frontend/src/sema/annotation_checker.h @@ -0,0 +1,82 @@ +#pragma once + +#include "godel-frontend/src/symbol.h" + +#include +#include +#include +#include + +namespace godel { + +enum class annot_kind { + none_property, // do not need property string + single_property, // need single property string + property_map // need map of properties +}; + +class input_annotation_checker { +private: + inline static const std::unordered_set supported_formats = { + "\"json\"", "\"sqlite\"", "\"csv\"" + }; + +public: + static void check(report::error&, const annot&); +}; + +class output_annotation_checker { +private: + inline static const std::unordered_set supported_formats = { + "\"stdout\"", "\"json\"", "\"sqlite\"", "\"csv\"" + }; + +public: + static void check(report::error&, const annot&); +}; + +class annotation_checker { +private: + // valid annotations used on schema fields + // second marks if property string is needed + const std::unordered_map schema_field_annotation = { + {"@primary", annot_kind::none_property} + }; + + // valid annotations used on functions + // second marks if property string is needed + const std::unordered_map function_annotation = { + {"@data_constraint", annot_kind::none_property}, + {"@self_typecheck_free", annot_kind::none_property}, + {"@inline", annot_kind::none_property}, + {"@cache", annot_kind::none_property} + }; + + // deprecated annotations + const std::unordered_set deprecated_annotation = {}; + + // deprecated annotation, but do not report warning or error + const std::unordered_set deprecated_no_warning = { + "@data_constraint" + }; + +public: + bool is_input_annotation(const std::string& annotation) const { + return annotation == "@input"; + } + bool is_output_annotation(const std::string& annotation) const { + return annotation == "@output"; + } + +public: + // singleton + static annotation_checker* instance(); + bool is_valid_schema_field_annotation(const std::string&) const; + bool is_valid_function_annotation(const std::string&) const; + bool is_deprecated_annotation(const std::string&) const; + bool is_deprecated_no_warning(const std::string&) const; + bool need_property_string(const std::string&) const; + bool need_property_map(const std::string&) const; +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/context.cpp b/godel-script/godel-frontend/src/sema/context.cpp new file mode 100644 index 00000000..4cf8ab78 --- /dev/null +++ b/godel-script/godel-frontend/src/sema/context.cpp @@ -0,0 +1,100 @@ +#include "godel-frontend/src/sema/context.h" + +namespace godel { + +bool context::find_global(const std::string& short_name) const { + return this_name_space.count(short_name)!=0 || + global.get_native().count(short_name)!=0 || + global.get_basics().count(short_name)!=0 || + global.get_packages().count(short_name)!=0; +} + +symbol_kind context::find_global_kind(const std::string& short_name) const { + if (this_name_space.count(short_name)) { + return this_name_space.at(short_name); + } + if (global.get_basics().count(short_name)) { + return symbol_kind::basic; + } + if (global.get_native().count(short_name)) { + return symbol_kind::function; + } + if (global.get_packages().count(short_name)) { + return symbol_kind::package; + } + return symbol_kind::null; +} + +span context::find_global_location(const std::string& short_name) const { + return mapper.count(short_name) + ? global.get_location(mapper.at(short_name)) + : span::null(); +} + +bool context::is_data_type(const std::string& short_name) const { + // check basic type + if (global.get_basics().count(short_name)) { + return true; + } + + // check if undefined + if (!this_name_space.count(short_name)) { + return false; + } + + // check if is enum, schema or db type + const auto type = this_name_space.at(short_name); + return type==symbol_kind::enumerate || + type==symbol_kind::schema || + type==symbol_kind::database; +} + +bool context::check_full_path_type_imported(report::error& err, + const type_def* node) const { + const auto& full_path_name = node->get_full_name(); + const auto index = global.get_index(full_path_name); + if (index==global_symbol_table::npos) { + err.err(node->get_location(), + "undefined symbol \"" + full_path_name + "\"." + ); + return false; + } + if (!imported_full_path_symbol_mapper.count(full_path_name)) { + err.err(node->get_location(), + "\"" + full_path_name + "\" is not imported." + ); + return false; + } + return true; +} + +void context::report_conflict_symbol(report::error& err, + const span& location, + const std::string& symbol_name) { + if (!confliction.count(symbol_name)) { + return; + } + + // generate conflict symbols + const auto& vec = confliction.at(symbol_name); + auto info = std::string(""); + for(const auto& sym : vec) { + info += sym.full_path_name() + ", "; + } + if (info.length()) { + info = info.substr(0, info.length() - 2); + } + + err.err(location, + "\"" + symbol_name + + "\" is ambiguous, with multiple symbols \"" + + info + "\".", + "consider using explicit import." + ); + + for(const auto& sym : vec) { + err.warn(sym.type_loc, sym.full_path_name() + " is defined here."); + } +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/context.h b/godel-script/godel-frontend/src/sema/context.h new file mode 100644 index 00000000..1161e45b --- /dev/null +++ b/godel-script/godel-frontend/src/sema/context.h @@ -0,0 +1,68 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/symbol.h" +#include "godel-frontend/src/ast/ast_node.h" +#include "godel-frontend/src/ast/decl.h" + +#include +#include +#include +#include +#include +#include + +namespace godel { + +struct context { + // total global symbol table + static inline global_symbol_table global = {}; + + // store this file name + std::string this_file_name; + + // store imported symbol (full path like: coref::java::Class) and unique index + std::unordered_map imported_full_path_symbol_mapper; + + // store global symbol(including imported) and their unique index, not full path + std::unordered_map mapper; + + // store global symbol used in this scope(including imported) and their type, not full path + std::unordered_map this_name_space; + + // store all the global functions' & methods' full path + std::unordered_map output_used_functions; + + // store conflict symbol + std::unordered_map> confliction; + + // store variable names that should not be used + const std::unordered_set invalid_variable_name = { + "Self", "self", "count", "sum", "output", + "input", "max", "min", "result" + }; + +public: + // constructor + context(): + this_file_name(""), imported_full_path_symbol_mapper({}), + mapper({}), this_name_space({}), output_used_functions({}) { + global.init_basics(); + global.init_natives(); + global.init_packages(); + } + // find if global symbol exists by short name + bool find_global(const std::string&) const; + // find global symbol's type by short name, if not found, return symbol_kind::null + symbol_kind find_global_kind(const std::string&) const; + // find global symbol's location by short name, if not found, return null location + span find_global_location(const std::string&) const; + // find if a symbol (short name) has type except function + bool is_data_type(const std::string&) const; + // find if a full path symbol is imported + bool check_full_path_type_imported(report::error&, const type_def*) const; + // report symbol(short name) has multiple full path import symbol choices + void report_conflict_symbol(report::error&, const span&, const std::string&); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/data_structure_construct.cpp b/godel-script/godel-frontend/src/sema/data_structure_construct.cpp new file mode 100644 index 00000000..a9e04573 --- /dev/null +++ b/godel-script/godel-frontend/src/sema/data_structure_construct.cpp @@ -0,0 +1,368 @@ +#include "godel-frontend/src/sema/data_structure_construct.h" +#include "godel-frontend/src/sema/annotation_checker.h" +#include "godel-frontend/src/symbol.h" + +namespace godel { + +bool data_structure_construct::visit_enum_decl(enum_decl* node) { + const auto name_node = node->get_name(); + const auto& name = name_node->get_name(); + + // check if this symbol exists and is an enumeration + if (ctx->find_global(name) && + ctx->find_global_kind(name)!=symbol_kind::enumerate) { + err->err(name_node->get_location(), "\"" + name + "\" exists."); + return false; + } + + // get enum structure + auto& enum_struct = ctx->global.get_enum(ctx->mapper.at(name)); + + // make enum index and check redefinition + for(auto child : node->get_member()) { + const auto& member_name = child->get_name(); + if (enum_struct.pairs.count(member_name)) { + err->err(child->get_location(), + "redefinition of enum \"" + member_name + "\"." + ); + continue; + } + auto index = enum_struct.pairs.size(); + enum_struct.pairs.insert({member_name, index}); + } + return true; +} + +bool data_structure_construct::visit_schema_decl(schema_decl* node) { + const auto schema_name_node = node->get_name(); + const auto& name = schema_name_node->get_name(); + + // check if this schema exists + if (ctx->find_global(name) && + ctx->find_global_kind(name)!=symbol_kind::schema) { + err->err(schema_name_node->get_location(), "\"" + name + "\" exists."); + return false; + } + + // get schema structure + auto& schema_structure = ctx->global.get_schema(ctx->mapper.at(name)); + + // load parent schema info + if (node->has_parent()) { + schema_load_parent(node->get_parent_name(), schema_structure); + } + + // load field and do type existence check + for(auto child: node->get_fields()) { + schema_load_field(child, schema_structure); + } + + if (schema_structure.extends.empty() && schema_structure.fields.empty()) { + err->warn(node->get_location(), + "better define at least one field." + ); + } + + // load schema native methods + schema_load_native_method(schema_structure); + return true; +} + +bool data_structure_construct::visit_database_decl(database_decl* node) { + const auto name_node = node->get_name(); + const auto& name = name_node->get_name(); + + // check if database exists + if (ctx->find_global(name) && + ctx->find_global_kind(name)!=symbol_kind::database) { + err->err(name_node->get_location(), "\"" + name + "\" exists."); + return false; + } + + // get database structure + auto& database_structure = ctx->global.get_database(ctx->mapper.at(name)); + + // initialize load method + database_load_native_method(database_structure); + + // check tables + for(auto child : node->get_tables()) { + database_load_table(child, database_structure); + } + return true; +} + +void data_structure_construct::schema_load_parent( + type_def* node, schema& schema_structure) { + schema_structure.extends = node->get_full_name(); + + // check if the name conflicts with other symbols + if (!node->is_full_path()) { + ctx->report_conflict_symbol( + *err, node->get_location(), node->get_short_name() + ); + } + + if (!node->is_full_path() && + ctx->find_global_kind(node->get_short_name())!=symbol_kind::schema) { + err->err(node->get_location(), + "\"" + schema_structure.extends + + "\" does not exist or is not schema." + ); + return; + } + + if (node->is_full_path()) { + if (!ctx->check_full_path_type_imported(*err, node)) { + return; + } + const auto full_path_name = node->get_full_name(); + const auto index = ctx->global.get_index(full_path_name); + schema_structure.parent = &ctx->global.get_schema(index); + } else { + schema_structure.parent = &ctx->global.get_schema( + ctx->mapper.at(schema_structure.extends) + ); + } + schema_structure.parent->child.push_back(&schema_structure); +} + +void data_structure_construct::check_field_annot(annotation* node) { + // only @primary is accepted here + const auto& annot_name = node->get_annotation(); + + // schema field must have annotation node after parsing (i forget why) + // if none annotation is given, the name is empty + if (!annot_name.length()) { + return; + } + + const auto checker = annotation_checker::instance(); + // should be valid annotation for schema field, now only @primary is supported + if (!checker->is_valid_schema_field_annotation(annot_name)) { + err->warn(node->get_location(), + "invalid annotation \"" + annot_name + "\".", + "maybe use \"@primary\" ?" + ); + return; + } + + if (!checker->need_property_string(annot_name) && + node->get_property_string().length()) { + err->warn(node->get_location(), + "property string is not needed here.", + "ignored." + ); + } + if (!checker->need_property_map(annot_name) && + node->get_ordered_properties().size()) { + err->warn(node->get_location(), + "property map is not needed here.", + "ignored." + ); + } +} + +void data_structure_construct::schema_load_field(schema_field* field, + schema& schema_structure) { + auto anno_node = field->get_annotation(); + auto name_node = field->get_identifier(); + auto type_node = field->get_field_type(); + + // check annotation for schema field + check_field_annot(anno_node); + + // field with data set type is not allowed + if (type_node->is_set()) { + err->err(type_node->get_location(), + "data set type is not allowed in schema.", + "you mean \"" + type_node->get_full_name() + "\" ?" + ); + } + + symbol schema_field = { + .type_name = type_node->get_short_name(), + .type_loc = ctx->find_global_location(type_node->get_short_name()), + .location = name_node->get_location(), + .primary = (anno_node->get_annotation()=="@primary") + }; + + if (type_node->is_full_path()) { + if (!ctx->check_full_path_type_imported(*err, type_node)) { + return; + } + const auto& full_path_name = type_node->get_full_name(); + const auto index = ctx->global.get_index(full_path_name); + schema_field.type_loc = ctx->global.get_location(index); + } else { + ctx->report_conflict_symbol( + *err, schema_field.location, schema_field.type_name + ); + } + + // type existence and validation check + if (!ctx->is_data_type(schema_field.type_name)) { + err->err(type_node->get_location(), + "\"" + type_node->get_full_name() + + "\" is undefined or not a data type." + ); + } + + if (schema_field==symbol::boolean()) { + err->err(type_node->get_location(), "\"bool\" field is not allowed."); + } + + if (schema_field.primary && schema_field!=symbol::i64()) { + err->warn(type_node->get_location(), + "better use \"int\" type as primary key." + ); + } + + const auto& field_name = name_node->get_name(); + if (schema_structure.fields.count(field_name)) { + err->err(name_node->get_location(), + "redefinition of field \"" + field_name + "\"." + ); + return; + } + schema_structure.ordered_fields.push_back(field_name); + schema_structure.fields[field_name] = schema_field; +} + +void data_structure_construct::schema_load_native_method(schema& schema_structure) { + const symbol self_type = { + .type_name = schema_structure.name, + .type_loc = schema_structure.location, + .is_set = false + }; + const symbol self_type_set = { + .type_name = schema_structure.name, + .type_loc = schema_structure.location, + .is_set = true + }; + + // set type convert method `T::to_set()` + schema_structure.natives["to_set"] = function::build_native( + "to_set", {{"self", self_type}}, self_type_set, false + ); + + // compare method `T::key_eq(D)` `T::key_neq(D)` + // only useful when T has a primary key + schema_structure.natives["key_eq"] = function::build_native( + "key_eq", + {{"self", self_type}, {"object", symbol::any()}}, + symbol::boolean(), + false + ); + schema_structure.natives["key_neq"] = function::build_native( + "key_neq", + {{"self", self_type}, {"object", symbol::any()}}, + symbol::boolean(), + false + ); + + // generic method `T::to()` + schema_structure.natives["to"] = function::build_native( + "to", {{"self", self_type}}, symbol::any(), false + ); + schema_structure.natives.at("to").has_generic = true; + + // generic method `T::is()` + schema_structure.natives["is"] = function::build_native( + "is", {{"self", self_type}}, symbol::boolean(), false + ); + schema_structure.natives.at("is").has_generic = true; +} + +void data_structure_construct::database_load_table(database_table* table, + database& database_structure) { + const auto name_node = table->get_name(); + const auto type_node = table->get_type(); + + if (!type_node->is_set()) { + err->err(type_node->get_location(), + "only data set is allowed in database.", + "you mean \"*" + type_node->get_full_name() + "\" ?" + ); + return; + } + + // load table info + symbol database_table = { + .type_name = type_node->get_short_name(), + .type_loc = ctx->find_global_location(type_node->get_short_name()), + .location = name_node->get_location(), + .is_set = true + }; + + if (type_node->is_full_path()) { + if (!ctx->check_full_path_type_imported(*err, type_node)) { + return; + } + const auto& full_path_name = type_node->get_full_name(); + const auto index = ctx->global.get_index(full_path_name); + database_table.type_loc = ctx->global.get_location(index); + } else { + ctx->report_conflict_symbol( + *err, database_table.location, database_table.type_name + ); + } + + // type existence check + if (!ctx->is_data_type(database_table.type_name)) { + err->err(type_node->get_location(), + "\"" + type_node->get_full_name() + + "\" is undefined or not a data type." + ); + return; + } + + if (ctx->find_global_kind(database_table.type_name)!=symbol_kind::schema) { + err->err(type_node->get_location(), + "\"" + database_table.full_path_name() + "\" is not schema." + ); + return; + } + + // set schema is referenced by database + auto& domain = ctx->global.get_domain(ctx->this_file_name); + if (database_table.type_loc.file==ctx->this_file_name && + domain.schemas.count(database_table.type_name)) { + auto& sc = domain.schemas.at(database_table.type_name); + sc.referenced_by_database_table = true; + sc.referenced_database_name = database_structure.name; + if (sc.has_parent()) { + err->err(type_node->get_location(), + "child schema cannot be used as DO schema.", + "consider using base schema." + ); + } + } + + const auto& table_name = name_node->get_name(); + if (database_structure.tables.count(table_name)) { + err->err(name_node->get_location(), + "redefinition of database \"" + table_name + "\"." + ); + } else { + database_structure.tables[table_name] = database_table; + if (table->has_real_name()) { + const auto& real = table->get_real_name()->get_literal(); + database_structure.real_name[table_name] = real.substr(1, real.size()-2); + } + } +} + +void data_structure_construct::database_load_native_method(database& database_structure) { + symbol self_type = { + .type_name = database_structure.name, + .type_loc = database_structure.location, + .location = span::null(), + .is_set = false + }; + database_structure.load = function::build_native( + "load", {{"path", symbol::str()}}, self_type, false + ); +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/data_structure_construct.h b/godel-script/godel-frontend/src/sema/data_structure_construct.h new file mode 100644 index 00000000..150ecc56 --- /dev/null +++ b/godel-script/godel-frontend/src/sema/data_structure_construct.h @@ -0,0 +1,39 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/sema/context.h" +#include "godel-frontend/src/ast/ast_visitor.h" + +namespace godel { + +class data_structure_construct: public ast_visitor { +private: + report::error* err; + context* ctx; + +private: + bool visit_enum_decl(enum_decl*) override; + bool visit_schema_decl(schema_decl*) override; + bool visit_database_decl(database_decl*) override; + // check schema parent + void schema_load_parent(type_def*, schema&); + // check schema field + void check_field_annot(annotation*); + void schema_load_field(schema_field*, schema&); + // load native schema method + void schema_load_native_method(schema&); + // check database table + void database_load_table(database_table*, database&); + // load native database method + void database_load_native_method(database&); + +public: + data_structure_construct(report::error* err_ptr, context* ctx_ptr): + err(err_ptr), ctx(ctx_ptr) {} + + void check(ast_root* root) { + root->accept(this); + } +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/fact_statement_checker.cpp b/godel-script/godel-frontend/src/sema/fact_statement_checker.cpp new file mode 100644 index 00000000..2d5ec784 --- /dev/null +++ b/godel-script/godel-frontend/src/sema/fact_statement_checker.cpp @@ -0,0 +1,65 @@ +#include "godel-frontend/src/sema/fact_statement_checker.h" + +namespace godel { + +bool fact_statement_checker::visit_fact_stmt(fact_stmt* node) { + has_fact_statement = true; + if (!on_top_local_scope_flag) { + err->err(node->get_location(), + "fact statement cannot be contained in another statement." + ); + return false; + } + return true; +} + +void fact_statement_checker::check(const function& func, block_stmt* function_block) { + has_fact_statement = false; + for(auto child : function_block->get_statement()) { + if (child->get_ast_class()==ast_class::ac_fact_stmt) { + has_fact_statement = true; + } + } + if (has_fact_statement && function_block->get_statement().size()>1) { + err->err(function_block->get_location(), + "code block should have only one fact statement." + ); + return; + } + + // check embedded statements + on_top_local_scope_flag = false; + for(auto child : function_block->get_statement()) { + if (child->get_ast_class()!=ast_class::ac_fact_stmt) { + child->accept(this); + } + } + on_top_local_scope_flag = true; + + // no fact statement exists, return + if (!has_fact_statement) { + return; + } + + // check parameters + if (!func.parameter_list.size()) { + err->err(function_block->get_location(), + "code block has fact statement, function should have parameter." + ); + return; + } + + // check return type + if (func.return_type==symbol::null()) { + err->err(func.return_type.location, "must have bool return value."); + return; + } + + // check return type + if (func.return_type!=symbol::boolean() || func.return_type.is_set) { + err->err(func.return_type.location, "return type must be bool."); + return; + } +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/fact_statement_checker.h b/godel-script/godel-frontend/src/sema/fact_statement_checker.h new file mode 100644 index 00000000..7293eee2 --- /dev/null +++ b/godel-script/godel-frontend/src/sema/fact_statement_checker.h @@ -0,0 +1,25 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/ast/ast_visitor.h" +#include "godel-frontend/src/sema/context.h" + +namespace godel { + +class fact_statement_checker: public ast_visitor { +private: + report::error* err; + bool on_top_local_scope_flag; + bool has_fact_statement; + +private: + bool visit_fact_stmt(fact_stmt*) override; + +public: + fact_statement_checker(report::error* err_ptr): + err(err_ptr), on_top_local_scope_flag(true), + has_fact_statement(false) {} + void check(const function&, block_stmt*); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/function_declaration.cpp b/godel-script/godel-frontend/src/sema/function_declaration.cpp new file mode 100644 index 00000000..455c8c55 --- /dev/null +++ b/godel-script/godel-frontend/src/sema/function_declaration.cpp @@ -0,0 +1,507 @@ +#include "godel-frontend/src/sema/function_declaration.h" +#include "godel-frontend/src/sema/annotation_checker.h" + +namespace godel { + +void function_generator::check_cache_annotation_confliction(const function& func, + function_decl* node) { + if (!func.has_annotation("@cache")) { + return; + } + const auto& front_loc = node->get_annotations().front()->get_location(); + const auto& back_loc = node->get_annotations().back()->get_location(); + const auto merged_loc = span( + front_loc.start_line, + front_loc.start_column, + back_loc.end_line, + back_loc.end_column, + front_loc.file + ); + if (func.has_annotation("@inline")) { + err->err(merged_loc, + "cannot use \"@inline\" and \"@cache\" together.", + "inlined function cannot be cached." + ); + } + if (func.name=="__all__") { + err->err(merged_loc, + "\"__all__\" cannot use \"@cache\".", + "data constraint function cannot be cached." + ); + } + if (func.is_global && func.name == "main") { + err->warn(merged_loc, + "\"@cache\" on main function will be ignored." + ); + } +} + +void function_generator::check_output_annotations(function& func, function_decl* node) { + if (!func.has_annotation_name("@output")) { + return; + } + + // check annotation confliction + if (func.has_annotation("@inline")) { + err->err(node->get_location(), + "cannot use \"@inline\" and \"@output\" together.", + "inlined function cannot be output." + ); + return; + } + + // string literal is quoted by \" + const std::unordered_set allowed_formats = { + "\"stdout\"", "\"json\"", "\"sqlite\"", "\"csv\"" + }; + for(const auto& i : func.annotations) { + if (i.annotation_name=="@output") { + output_annotation_checker::check(*err, i); + } + } +} + +void function_generator::check_input_annotations(function& func, function_decl* node) { + if (!func.has_annotation_name("@input")) { + return; + } + + if (flag_in_impl) { + err->err(node->get_location(), + "cannot use \"@input\" in implementation block.", + "only support \"@input\" on global function." + ); + return; + } + + // check annotation confliction + if (func.has_annotation("@inline")) { + err->err(node->get_location(), + "cannot use \"@inline\" and \"@input\" together.", + "inlined function cannot be input." + ); + return; + } + + for(const auto& i : func.annotations) { + if (i.annotation_name=="@input") { + input_annotation_checker::check(*err, i); + } + } +} + +void function_generator::check_single_annotation(annotation* node) { + const auto checker = annotation_checker::instance(); + const auto& name = node->get_annotation(); + + // check annotation validity + if (!checker->is_valid_function_annotation(name)) { + err->warn(node->get_location(), + "invalid annotation for function.", + "ignored." + ); + return; + } else if (checker->is_deprecated_annotation(name)) { + err->warn(node->get_location(), + "deprecated annotation for function.", + "ignored." + ); + return; + } else if (checker->is_deprecated_no_warning(name)) { + return; + } + + if (checker->is_input_annotation(name) || + checker->is_output_annotation(name)) { + return; + } + + // check need property string + if (checker->need_property_string(name) && + node->get_property_string().empty()) { + err->err(node->get_location(), "need property string."); + return; + } + + if (!checker->need_property_string(name) && + node->get_property_string().length()) { + err->warn(node->get_location(), + "property string is not needed here.", + "ignored." + ); + } + if (!checker->need_property_map(name) && + node->get_ordered_properties().size()) { + err->warn(node->get_location(), + "property map is not needed here.", + "ignored." + ); + } +} + +void function_generator::check_annotations(function& func, function_decl* node) { + for(auto i : node->get_annotations()) { + check_single_annotation(i); + } + + check_input_annotations(func, node); + check_output_annotations(func, node); + check_cache_annotation_confliction(func, node); +} + +void function_generator::load_annotation(function& func, function_decl* node) { + func.annotations.clear(); + for(auto i : node->get_annotations()) { + annot tmp_anno; + tmp_anno.annotation_name = i->get_annotation(); + tmp_anno.property = i->get_property_string(); + for(const auto& j : i->get_ordered_properties()) { + tmp_anno.property_map.insert({j.first, j.second}); + } + tmp_anno.location = i->get_location(); + func.annotations.push_back(tmp_anno); + } +} + +function function_generator::generate(function_decl* node, + bool in_impl, + const std::string& self_type) { + flag_in_impl = in_impl; + self_type_name = self_type; + const auto name_node = node->get_name(); + + function func; + func.name = name_node->get_name(); + func.location = name_node->get_location(); + func.public_access_authority = node->is_public(); + // check if function declaration is implemented + func.implemented = node->implemented(); + // check if function is global function + func.is_global = !in_impl; + + // load and check annotation + load_annotation(func, node); + check_annotations(func, node); + + // load and check return type + load_return_type_and_check(func, node); + + // load parameter list + load_parameters(func, node); + + return func; +} + +symbol function_generator::generate_return_type(type_def* node) { + symbol ret = { + .type_name = node->get_short_name(), + .type_loc = span::null(), + .location = node->get_location(), + .is_set = node->is_set() + }; + + // fill Self with real type + if (ret.type_name=="Self" && self_type_name.length()) { + ret.type_name = self_type_name; + } else if (ret.type_name=="Self" && !self_type_name.length()) { + err->err(node->get_location(), + "cannot bind \"Self\" with an exist type." + ); + } + + // check type or Self type exists + if (ret.type_name!="Self" && !ctx->find_global(ret.type_name)) { + err->err(node->get_location(), + "undefined symbol \"" + ret.full_path_name() + "\"." + ); + } + + // check return symbol + const auto ret_sym_kind = ctx->find_global_kind(ret.type_name); + if (ret_sym_kind==symbol_kind::function || ret_sym_kind==symbol_kind::query) { + err->err(node->get_location(), "cannot return function or query."); + } + + // update location with the real symbol place + ret.type_loc = ctx->find_global_location(ret.type_name); + if (node->is_full_path()) { + if (!ctx->check_full_path_type_imported(*err, node)) { + return ret; + } + const auto full_path_name = node->get_full_name(); + const auto index = ctx->global.get_index(full_path_name); + ret.type_loc = ctx->global.get_location(index); + } else { + ctx->report_conflict_symbol(*err, ret.location, ret.type_name); + } + return ret; +} + +void function_generator::load_return_type_and_check(function& func, + function_decl* node) { + const auto name_node = node->get_name(); + // only main function could have no return value + // other functions must have return value + if (!node->has_return_value() && name_node->get_name()!="main") { + err->err(name_node->get_location(), + "need return value type.", + "only \"main\" does not need return value." + ); + } + // impl functions must not have name "main" + if (flag_in_impl && name_node->get_name()=="main") { + err->err(name_node->get_location(), + "cannot use \"main\" as the method name." + ); + } + + // load return value type, null must be used only for main function + if (node->has_return_value()) { + func.return_type = generate_return_type(node->get_return_type()); + } else { + func.return_type = symbol::null(); + } +} + +symbol function_generator::generate_parameter(var_decl* child, + uint32_t position, + const function& func) { + const auto var_node = child->get_var_name(); + const auto type_node = child->get_type(); + const auto& param_name = var_node->get_name(); + + if (!flag_in_impl && param_name=="self") { + err->err(var_node->get_location(), + "global function cannot use parameter \"self\"." + ); + return symbol::error(); + } + + // check if parameter "self" is the first one, if is, the position should be 0 + if (param_name=="self" && position) { + err->err(var_node->get_location(), + "\"self\" should be the first parameter.", + "move \"self\" to the first place." + ); + } + if (param_name=="self" && child->has_declared_type()) { + err->err(type_node->get_location(), + "\"self\" does not need type declaration." + ); + } + + if (func.parameter_list.count(param_name)) { + err->err(var_node->get_location(), + "redefinition of\"" + param_name + "\"." + ); + } else if (ctx->find_global(param_name)) { + err->warn(var_node->get_location(), + "\"" + param_name + "\" shadows a global symbol." + ); + } else if (invalid_parameter_name.count(param_name)) { + err->err(var_node->get_location(), + "variables cannot use this name.", + "this name is reserved to avoid confliction in generated codes." + ); + } + + // parameter type, if is "self", load type later + symbol param = { + .type_name = (child->has_declared_type()? type_node->get_short_name():""), + .type_loc = span::null(), + .location = var_node->get_location(), + .is_set = (child->has_declared_type() && type_node->is_set()), + }; + if (!param.type_name.length()) { + param.type_loc = var_node->get_location(); + } else { + param.type_loc = ctx->find_global_location(param.type_name); + } + + if (param==symbol::boolean()) { + err->err(type_node->get_location(), "\"bool\" parameter is not allowed."); + } + + // parameter type check + if (child->has_declared_type() && type_node->is_full_path()) { + if (!ctx->check_full_path_type_imported(*err, type_node)) { + return param; + } + const auto& full_path_name = type_node->get_full_name(); + const auto index = ctx->global.get_index(full_path_name); + param.type_loc = ctx->global.get_location(index); + } else { + ctx->report_conflict_symbol(*err, param.location, param.type_name); + } + + if (param.type_name.length() && !ctx->is_data_type(param.type_name)) { + err->err(type_node->get_location(), + "\"" + param.full_path_name() + + "\" is undefined or not a data type." + ); + } + if (!param.type_name.length() && + (!flag_in_impl || param_name!="self")) { + err->err(var_node->get_location(), + "parameter \"" + param_name + "\" requires a type." + ); + } + + child->set_resolve({ + .is_global = false, + .type = param + }); + return param; +} + +void function_generator::load_parameters(function& func, function_decl* node) { + // used to check if parameter "self" occurs first + uint32_t cnt = 0; + for(auto child: node->get_parameter_list()) { + const auto var_node = child->get_var_name(); + const auto& param_name = var_node->get_name(); + + auto param = generate_parameter(child, cnt, func); + ++cnt; + // insert new parameter + func.parameter_list.insert({param_name, param}); + // record parameters order + func.ordered_parameter_list.push_back(param_name); + if (param_name!="self") { + ctx->global.add_local_variable(param_name, var_node->get_location(), param); + } + } +} + +bool function_declaration::visit_function_decl(function_decl* node) { + const auto name_node = node->get_name(); + const auto& name = name_node->get_name(); + + // check function existence + if (ctx->find_global(name) && ctx->find_global_kind(name)!=symbol_kind::function) { + err->err(name_node->get_location(), "\"" + name + "\" exists."); + return false; + } + + // check if implemented + if (!node->implemented()) { + err->err(node->get_location(), "global function must be implemented."); + return false; + } + + // generate global function semantic info + ctx->global.get_func(ctx->mapper.at(name)) = generator.generate(node, false); + return true; +} + +bool function_declaration::visit_impl_block([[maybe_unused]] impl_block* node) { + // avoid scanning function implements in impl block + return true; +} + +void function_declaration::check(ast_root* root) { + for(auto i : root->get_declarations()) { + i->accept(this); + } +} + +bool impl_function_declaration::visit_impl_block(impl_block* node) { + check_impl_schema(node); + return true; +} + +void impl_function_declaration::check_impl_schema(impl_block* node) { + const auto name_node = node->get_impl_schema_name(); + const auto& name = name_node->get_name(); + + // check if this schema exists + if (!ctx->this_name_space.count(name) || + ctx->this_name_space.at(name)!=symbol_kind::schema) { + err->err(name_node->get_location(), "schema \"" + name + "\" does not exist."); + return; + } + + auto& sc = ctx->global.get_schema(ctx->mapper.at(name)); + + // should implement schema in where it is defined + if (sc.location.file!=node->get_file()) { + err->err(name_node->get_location(), "must implement in " + sc.location.file + " ."); + return; + } + + // add new function as schema's method + for(auto child: node->get_functions()) { + auto func = generator.generate(child, true, name); + // update self type + if (func.parameter_list.count("self")) { + auto& self = func.parameter_list.at("self"); + self.type_name = name; + self.type_loc = ctx->find_global_location(name); + ctx->global.add_local_variable("self", self.location, self); + } + + // check name shadowing + const auto func_name_node = child->get_name(); + const auto& func_name = func_name_node->get_name(); + + // data constraint annotation is deprecated + if (func_name!="__all__" && func.has_annotation("@data_constraint")) { + err->err(func_name_node->get_location(), + "invalid data constraint.", + "must use \"__all__\"." + ); + } + + if (sc.fields.count(func_name)) { + err->err(func_name_node->get_location(), + "method \"" + func_name + "\" repeats with field." + ); + continue; + } + if (sc.methods.count(func_name)) { + err->err(func_name_node->get_location(), + "redeclaration of \"" + func_name + "\"." + ); + continue; + } + + // check function implemention + if (!func.implemented) { + err->err(func_name_node->get_location(), + "this function is not implemented." + ); + } + if (func_name=="__all__" && func.return_type.type_name!=name) { + const auto loc = ctx->find_global_location(name); + const auto this_schema = symbol({ + .type_name = name, + .type_loc = loc, + .is_set = true + }); + if (child->get_return_type()) { + err->err(child->get_return_type()->get_location(), + "return type of data constraint \"" + func_name + + "\" should be \"" + this_schema.full_path_name() + + "\", but get \"" + func.return_type.full_path_name() + "\"." + ); + } else { + err->err(child->get_location(), + "data constraint \"" + func_name + + "\" should have return value." + ); + } + } + + // this is direct implemention + // so do not need to check parameter format + sc.methods[func_name] = func; + } +} + +void impl_function_declaration::check(ast_root* root) { + for(auto i : root->get_declarations()) { + i->accept(this); + } +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/function_declaration.h b/godel-script/godel-frontend/src/sema/function_declaration.h new file mode 100644 index 00000000..756afbec --- /dev/null +++ b/godel-script/godel-frontend/src/sema/function_declaration.h @@ -0,0 +1,80 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/ast/ast_visitor.h" +#include "godel-frontend/src/sema/context.h" +#include "godel-frontend/src/symbol.h" + +#include +#include +#include + +namespace godel { + +class function_generator { +private: + report::error* err; + context* ctx; + +private: + // not the same as ctx.invalid_variable_name, this allows `self` + const std::unordered_set invalid_parameter_name = { + "Self", "count", "sum", "output", + "input", "max", "min", "result" + }; + bool flag_in_impl = false; + std::string self_type_name = ""; + +private: + void check_cache_annotation_confliction(const function&, function_decl*); + void check_output_annotations(function&, function_decl*); + void check_input_annotations(function&, function_decl*); + void check_single_annotation(annotation*); + void check_annotations(function&, function_decl*); + void load_annotation(function&, function_decl*); + +private: + symbol generate_return_type(type_def*); + void load_return_type_and_check(function&, function_decl*); + symbol generate_parameter(var_decl*, uint32_t, const function&); + void load_parameters(function&, function_decl*); + +public: + function_generator(report::error* err_ptr, context* ctx_ptr): + err(err_ptr), ctx(ctx_ptr) {} + function generate(function_decl*, bool, const std::string& self_type = ""); +}; + +class function_declaration: public ast_visitor { +private: + report::error* err; + context* ctx; + function_generator generator; + +private: + bool visit_function_decl(function_decl*) override; + bool visit_impl_block(impl_block*) override; + +public: + function_declaration(report::error* err_ptr, context* ctx_ptr): + err(err_ptr), ctx(ctx_ptr), generator(err_ptr, ctx_ptr) {} + void check(ast_root*); +}; + +class impl_function_declaration: public ast_visitor { +private: + report::error* err; + context* ctx; + function_generator generator; + +private: + bool visit_impl_block(impl_block*) override; + void check_impl_schema(impl_block*); + +public: + impl_function_declaration(report::error* err_ptr, context* ctx_ptr): + err(err_ptr), ctx(ctx_ptr), generator(err_ptr, ctx_ptr) {} + void check(ast_root*); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/global_symbol_loader.cpp b/godel-script/godel-frontend/src/sema/global_symbol_loader.cpp new file mode 100644 index 00000000..4fc0e0e5 --- /dev/null +++ b/godel-script/godel-frontend/src/sema/global_symbol_loader.cpp @@ -0,0 +1,170 @@ +#include "godel-frontend/src/sema/global_symbol_loader.h" + +namespace godel { + +void global_symbol_loader::register_symbol( + const span& location, const std::string& name, symbol_kind kind) { + // get namespace + auto& space = ctx->global.get_domain(ctx->this_file_name); + // generate module name, if length is zero, this is the main entry file + auto module_name = package::godel_module::instance()->find_module_by_file(ctx->this_file_name); + if (module_name.length()) { + module_name += "::"; + } + // load to main file's namespace + const auto full_path_name = module_name + name; + + // load this name to this file's namespace + ctx->this_name_space.insert({name, kind}); + // register this name and type to the type mapper of this namespace + space.type_mapper.insert({name, kind}); + + // regist in global symbol table + ctx->global.regist(ctx->this_file_name, full_path_name, kind); + ctx->mapper.insert({name, ctx->global.get_index(full_path_name)}); + + switch(kind) { + case symbol_kind::enumerate: + space.enums[name].name = name; + space.enums[name].location = location; + break; + case symbol_kind::database: + space.databases[name].name = name; + space.databases[name].location = location; + break; + case symbol_kind::function: + space.functions[name].name = name; + space.functions[name].location = location; + break; + case symbol_kind::schema: + space.schemas[name].name = name; + space.schemas[name].location = location; + break; + case symbol_kind::query: + space.queries[name].name = name; + space.queries[name].location = location; + break; + default: break; + } +} + +bool global_symbol_loader::check_name_valid(identifier* name_node) const { + const auto& name = name_node->get_name(); + // check if this symbol is reserved + if (name=="Self" || name=="self") { + err->err(name_node->get_location(), "cannot use reserved name."); + return false; + } + + // check if this symbol is in natives + if (ctx->global.get_native().count(name)) { + std::stringstream ss; + ss << ctx->global.get_native().at(name); + err->err(name_node->get_location(), + "\"" + name + "\" conflicts with native function \"" + + ss.str() + "\"", + "consider another name." + ); + return false; + } + + // check if this symbol is reserved + if (ctx->global.get_reserved_id().count(name)) { + err->err(name_node->get_location(), + "this symbol name is reserved.", + "consider another name." + ); + return false; + } + + // check redefinition + if (ctx->find_global(name)) { + const auto type = ctx->find_global_kind(name); + const auto location = ctx->find_global_location(name); + auto info = (type!=symbol_kind::basic && + type!=symbol_kind::package && + type!=symbol_kind::null)? + location.file + + ":" + std::to_string(location.start_line) + + ":" + std::to_string(location.start_column + 1) : ""; + err->err(name_node->get_location(), + "redefinition of \"" + name + "\".", + (info.length()? + "first defined at " + info + ".": + (type==symbol_kind::basic? + "this is a basic type.":"this is a package." + ) + ) + ); + return false; + } + return true; +} + +bool global_symbol_loader::visit_database_decl(database_decl* node) { + auto name_node = node->get_name(); + const auto& name = name_node->get_name(); + if (check_name_valid(name_node)) { + register_symbol(name_node->get_location(), name, symbol_kind::database); + } + return true; +} + +bool global_symbol_loader::visit_schema_decl(schema_decl* node) { + auto name_node = node->get_name(); + const auto& name = name_node->get_name(); + if (check_name_valid(name_node)) { + register_symbol(name_node->get_location(), name, symbol_kind::schema); + } + return true; +} + +bool global_symbol_loader::visit_function_decl(function_decl* node) { + auto name_node = node->get_name(); + const auto& name = name_node->get_name(); + if (check_name_valid(name_node)) { + register_symbol(name_node->get_location(), name, symbol_kind::function); + } + return true; +} + +bool global_symbol_loader::visit_enum_decl(enum_decl* node) { + auto name_node = node->get_name(); + const auto& name = name_node->get_name(); + if (check_name_valid(name_node)) { + register_symbol(name_node->get_location(), name, symbol_kind::enumerate); + } + return true; +} + +bool global_symbol_loader::visit_impl_block([[maybe_unused]] impl_block* node) { + // avoid scanning function declarations in impl block + return true; +} + +bool global_symbol_loader::visit_query_decl(query_decl* node) { + auto name_node = node->get_name(); + const auto& name = name_node->get_name(); + if (check_name_valid(name_node)) { + register_symbol(name_node->get_location(), name, symbol_kind::query); + } + return true; +} + +void global_symbol_loader::scan(ast_root* root) { + // create new namespace, if created, return false, so do not load symbol again + if (!ctx->global.create_new_domain(ctx->this_file_name)) { + return; + } + // scan all declarations + for(auto i : root->get_declarations()) { + i->accept(this); + } + // check main function, global symbol named "main" must be function + if (ctx->find_global("main") && + ctx->find_global_kind("main")!=symbol_kind::function) { + err->err(ctx->find_global_location("main"), "main must be a function."); + } +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/global_symbol_loader.h b/godel-script/godel-frontend/src/sema/global_symbol_loader.h new file mode 100644 index 00000000..6ed1dd84 --- /dev/null +++ b/godel-script/godel-frontend/src/sema/global_symbol_loader.h @@ -0,0 +1,35 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/ast/ast_visitor.h" +#include "godel-frontend/src/sema/context.h" +#include "godel-frontend/src/package/package.h" + +#include +#include + +namespace godel { + +// visit all global declarations and register symbols in the semantic context +class global_symbol_loader: public ast_visitor { +private: + report::error* err; + context* ctx; + +private: + void register_symbol(const span&, const std::string&, symbol_kind); + bool check_name_valid(identifier*) const; + bool visit_database_decl(database_decl*) override; + bool visit_schema_decl(schema_decl*) override; + bool visit_function_decl(function_decl*) override; + bool visit_enum_decl(enum_decl*) override; + bool visit_impl_block(impl_block*) override; + bool visit_query_decl(query_decl*) override; + +public: + global_symbol_loader(report::error* err_ptr, context* ctx_ptr): + err(err_ptr), ctx(ctx_ptr) {} + void scan(ast_root*); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/inherit_schema.cpp b/godel-script/godel-frontend/src/sema/inherit_schema.cpp new file mode 100644 index 00000000..be8a39a1 --- /dev/null +++ b/godel-script/godel-frontend/src/sema/inherit_schema.cpp @@ -0,0 +1,157 @@ +#include "godel-frontend/src/sema/inherit_schema.h" + +#include + +namespace godel { + +void schema_field_inherit_worker::check_multiple_primary_keys() { + for(const auto& sc : ctx->global.get_domain(ctx->this_file_name).schemas) { + auto primary_field = std::string(""); + auto cnt = 0; + for(const auto& field : sc.second.fields) { + cnt += field.second.primary? 1:0; + primary_field += field.second.primary? field.first + ", ":""; + } + if (cnt>1) { + primary_field = primary_field.substr(0, primary_field.length() - 2); + err->err(sc.second.location, + "schema has multiple primary keys: \"" + + primary_field + "\"." + ); + } + } +} + +void schema_field_inherit_worker::inherit_single_schema_field(schema* child) { + for(const auto& field : child->parent->fields) { + const auto& field_name = field.first; + if (child->fields.count(field_name)) { + err->err(child->fields.at(field_name).location, + "parent field \"" + field_name + "\" overrides." + ); + } + child->fields.insert(field); + } + + // insert parent's fields in front of child's fields + auto ordered_field_copy = child->ordered_fields; + child->ordered_fields = child->parent->ordered_fields; + for(const auto& field : ordered_field_copy) { + child->ordered_fields.push_back(field); + } +} + +void schema_field_inherit_worker::inherit_field() { + auto& schemas = ctx->global.get_domain(ctx->this_file_name).schemas; + std::queue bfs; + + // first push root set into the queue + for(auto& sc : schemas) { + // add schemas whose parent has no parents + if (sc.second.has_parent() && !sc.second.parent->has_parent()) { + bfs.push(&sc.second); + } + // add schemas whose parent is not in the same file + else if (sc.second.has_parent() && sc.second.parent->location.file!=ctx->this_file_name) { + bfs.push(&sc.second); + } + } + + while(!bfs.empty()) { + auto child = bfs.front(); + bfs.pop(); + + // inherit field from parent + inherit_single_schema_field(child); + + // inherit field to childs + for(auto ptr : child->child) { + bfs.push(ptr); + } + } + + // check if schema has primary keys + check_multiple_primary_keys(); +} + +void schema_method_inherit_worker::check_schema_without_data_constraint() { + for(const auto& sc : ctx->global.get_domain(ctx->this_file_name).schemas) { + // do not need to check database table schema + if (sc.second.referenced_by_database_table) { + continue; + } + + // check schema has data constraint method or not + if (!sc.second.methods.count("__all__")) { + err->warn(sc.second.location, + "data constraint not found in this schema.", + "use in database, or implement \"__all__\" method." + ); + } + } +} + +void schema_method_inherit_worker::inherit_single_schema_method(schema* child) { + for(const auto& method : child->parent->methods) { + // child has this method, means the method is override, skip + if (child->methods.count(method.first)) { + continue; + } + + // __all__(...) must be written by yourself + // so do not inherit it + if (method.first == "__all__") { + continue; + } + + // copy method into child's methods + child->methods.insert(method); + + // set method inherit mark to true + auto& func = child->methods.at(method.first); + func.inherit = true; + + // update method parameter 'self' type info + if (func.parameter_list.count("self")) { + auto& self = func.parameter_list.at("self"); + self.type_name = child->name; + self.type_loc = ctx->find_global_location(child->name); + } + } +} + +void schema_method_inherit_worker::inherit_method() { + auto& schemas = ctx->global.get_domain(ctx->this_file_name).schemas; + std::queue bfs; + + // first push root set into the queue + for(auto& sc : schemas) { + // add schemas whose parent has no parents + if (sc.second.has_parent() && !sc.second.parent->has_parent()) { + bfs.push(&sc.second); + } + // add schemas whose parent is not in the same file + else if (sc.second.has_parent() && + sc.second.parent->location.file!=ctx->this_file_name) { + bfs.push(&sc.second); + } + } + + while(!bfs.empty()) { + auto child = bfs.front(); + bfs.pop(); + + // inherit method from parent + inherit_single_schema_method(child); + + // inherit childs + for(auto ptr : child->child) { + bfs.push(ptr); + } + } + + // check schema with no data_constraint + check_schema_without_data_constraint(); +} + +} diff --git a/godel-script/godel-frontend/src/sema/inherit_schema.h b/godel-script/godel-frontend/src/sema/inherit_schema.h new file mode 100644 index 00000000..7476d37a --- /dev/null +++ b/godel-script/godel-frontend/src/sema/inherit_schema.h @@ -0,0 +1,38 @@ +#pragma once + +#include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/sema/context.h" + +namespace godel { + +class schema_field_inherit_worker { +private: + report::error* err; + context* ctx; + +private: + void check_multiple_primary_keys(); + void inherit_single_schema_field(schema*); + +public: + schema_field_inherit_worker(report::error* err_ptr, context* ctx_ptr): + err(err_ptr), ctx(ctx_ptr) {} + void inherit_field(); +}; + +class schema_method_inherit_worker { +private: + report::error* err; + context* ctx; + +private: + void check_schema_without_data_constraint(); + void inherit_single_schema_method(schema*); + +public: + schema_method_inherit_worker(report::error* err_ptr, context* ctx_ptr): + err(err_ptr), ctx(ctx_ptr) {} + void inherit_method(); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/self_reference_check.cpp b/godel-script/godel-frontend/src/sema/self_reference_check.cpp new file mode 100644 index 00000000..292ea85b --- /dev/null +++ b/godel-script/godel-frontend/src/sema/self_reference_check.cpp @@ -0,0 +1,125 @@ +#include "godel-frontend/src/sema/self_reference_check.h" + +namespace godel { + +void self_extend_checker::clear_all_mark() { + for (auto& sc : *schema_table) { + sc.second.self_ref_checked = false; + } +} + +void self_extend_checker::self_extend_check_core( + const std::string& name, + const span& extend_id_location, + std::queue>& bfs) { + + auto [self, path] = bfs.front(); + bfs.pop(); + const auto& ext = schema_table->at(self); + + // no need to check schema without parent + if (!ext.has_parent()) { + return; + } + + const auto ref_path = path + " -> " + ext.extends; + if (ext.extends == name) { + err->err(extend_id_location, "\"" + name + "\" self extends.", ref_path); + return; + } + + // no need to check self extend from import schema + if (!schema_table->count(ext.extends)) { + return; + } + + if (!schema_table->at(ext.extends).self_ref_checked) { + bfs.push({ext.extends, ref_path}); + schema_table->at(ext.extends).self_ref_checked = true; + } +} + +void self_extend_checker::check() { + // do not need to check extend schema from other file namespace + // because godel does not allow a file to reference self + // and imported schema must not extend a schema from the file which imports itself + + // check if schema extends itself, using bfs + std::queue> bfs; + + for(auto& sc : *schema_table) { + clear_all_mark(); + // mark root + const auto& name = sc.first; + sc.second.self_ref_checked = true; + + // if does not extend anything, skip check + if (!sc.second.has_parent()) { + continue; + } + bfs.push({name, name}); + + // bfs search + while(!bfs.empty()) { + self_extend_check_core(name, sc.second.location, bfs); + } + } +} + +void self_reference_checker::clear_all_mark() { + for (auto& sc : *schema_table) { + sc.second.self_ref_checked = false; + } +} + +void self_reference_checker::self_reference_check_core( + const std::string& name, + std::queue>& bfs) { + + auto [self, path] = bfs.front(); + bfs.pop(); + + for(const auto& i : schema_table->at(self).fields) { + const auto& field_name = i.first; + const auto& type_name = i.second.full_path_name(); + const auto ref_path = path + "." + field_name + " -> " + type_name; + // check self-referenced + if (type_name == name) { + err->err(i.second.location, + "\"" + field_name + "\" self references \"" + type_name + "\".", + ref_path + ); + } + + // no need to check imported schema, file could not be self-referenced + if (!schema_table->count(type_name)) { + continue; + } + + // mark self_ref_checked flag, avoid recursive check + if (!schema_table->at(type_name).self_ref_checked) { + bfs.push({type_name, ref_path}); + schema_table->at(type_name).self_ref_checked = true; + } + } +} + +void self_reference_checker::check() { + // check if schema has references of itself, using bfs + std::queue> bfs; + + for(auto& sc : *schema_table) { + clear_all_mark(); + // mark root + const auto& name = sc.first; + sc.second.self_ref_checked = true; + bfs.push({name, name}); + + // bfs search + while(!bfs.empty()) { + self_reference_check_core(name, bfs); + } + } +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/self_reference_check.h b/godel-script/godel-frontend/src/sema/self_reference_check.h new file mode 100644 index 00000000..08205e60 --- /dev/null +++ b/godel-script/godel-frontend/src/sema/self_reference_check.h @@ -0,0 +1,83 @@ +#pragma once + +#include "godel-frontend/src/symbol.h" +#include "godel-frontend/src/sema/context.h" +#include "godel-frontend/src/error/error.h" + +#include +#include +#include +#include + +namespace godel { + +// check schema self extend +// example: +// ```rust +// // A -> C -> B -> A which is self-extends +// schema A extends C {} +// schema B extends A {} +// schema C extends B {} +// ``` +class self_extend_checker { +private: + report::error* err; + context* ctx; + std::unordered_map* schema_table; + +private: + // set all the schemas' mark to "not checked" + void clear_all_mark(); + + // check self extend and generate extend chain + void self_extend_check_core( + const std::string&, + const span&, + std::queue>& + ); + +public: + self_extend_checker(report::error* err_ptr, context* ctx_ptr): + err(err_ptr), ctx(ctx_ptr) { + schema_table = &ctx->global.get_domain(ctx->this_file_name).schemas; + } + + // do self extend check + void check(); +}; + +// check schema self-reference +// example: +// ```rust +// // A.member -> B.member -> C.member -> A which is self-referenced +// schema A {member: B} +// schema B {member: C} +// schema C {memebr: A} +// ``` +class self_reference_checker { +private: + report::error* err; + context* ctx; + std::unordered_map* schema_table; + +private: + // set all the schemas' mark to "not checked" + void clear_all_mark(); + + // check self reference and generate reference chain + void self_reference_check_core( + const std::string&, + std::queue>& + ); + +public: + self_reference_checker(report::error* err_ptr, context* ctx_ptr): + err(err_ptr), ctx(ctx_ptr) { + schema_table = &ctx->global.get_domain(ctx->this_file_name).schemas; + } + + // do self reference check + void check(); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/sema/symbol_import.cpp b/godel-script/godel-frontend/src/sema/symbol_import.cpp new file mode 100644 index 00000000..7050b66f --- /dev/null +++ b/godel-script/godel-frontend/src/sema/symbol_import.cpp @@ -0,0 +1,243 @@ +#include "godel-frontend/src/sema/symbol_import.h" +#include "godel-frontend/src/package/package.h" +#include "godel-frontend/src/engine.h" + +namespace godel { + +using package::godel_module; + +void symbol_import::add_conflict_symbol(const std::string& module_path, + const std::string& name) { + // get type location which already exists + const auto& exist_type_loc = ctx->find_global_location(name); + // get type location which will be conflicted + const auto index = ctx->global.get_index(module_path + "::" + name); + const auto& conflict_type_loc = ctx->global.get_location(index); + + // if location is the same, do not load this symbol + if (exist_type_loc.file == conflict_type_loc.file) { + return; + } + + if (!ctx->confliction.count(name)) { + ctx->confliction.insert({name, {}}); + } + + bool has_exist_type = false, has_conflict_type = false; + auto& vec = ctx->confliction.at(name); + for(const auto& exist_symbol : vec) { + if (exist_symbol.type_loc.file == exist_type_loc.file) { + has_exist_type = true; + } + if (exist_symbol.type_loc.file == conflict_type_loc.file) { + has_conflict_type = true; + } + } + + if (!has_exist_type) { + vec.push_back({.type_name = name, .type_loc = exist_type_loc}); + } + if (!has_conflict_type) { + vec.push_back({.type_name = name, .type_loc = conflict_type_loc}); + } +} + +configure symbol_import::inherit_config(const std::string& filename) { + configure new_config = { + {option::cli_input_path, filename}, + {option::cli_semantic_only, ""} // only needs semantic analysis + }; + + // load the module's config + static const std::vector