diff --git a/.gitignore b/.gitignore index d70f8773d..998dc65c7 100644 --- a/.gitignore +++ b/.gitignore @@ -29,4 +29,6 @@ core.* tags cscope.out .gdb_history +site/ +__pycache__/ diff --git a/README.md b/README.md index 7d54c1c77..39a771051 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## Ceramic +![ceramic](doc/assets/logo.svg) Ceramic is a programming language based on Clay designed for Generic Programming. diff --git a/doc/language-reference.md b/doc/_archive/language-reference.md similarity index 100% rename from doc/language-reference.md rename to doc/_archive/language-reference.md diff --git a/doc/primitives-reference.md b/doc/_archive/primitives-reference.md similarity index 100% rename from doc/primitives-reference.md rename to doc/_archive/primitives-reference.md diff --git a/doc/assets/ceramic-light.json b/doc/assets/ceramic-light.json new file mode 100644 index 000000000..cb8d9b06c --- /dev/null +++ b/doc/assets/ceramic-light.json @@ -0,0 +1,44 @@ +{ + "name": "ceramic-light", + "type": "light", + "colors": { + "editor.foreground": "#2d2520" + }, + "tokenColors": [ + { + "scope": [ + "comment", + "comment.block.ceramic", + "comment.line.double-slash.ceramic", + "punctuation.definition.comment.ceramic" + ], + "settings": { "foreground": "#a09080", "fontStyle": "italic" } + }, + { + "scope": [ + "string", + "string.quoted.double.ceramic", + "string.quoted.single.ceramic", + "string.quoted.triple.ceramic", + "punctuation.definition.string.begin.ceramic", + "punctuation.definition.string.end.ceramic", + "constant.character.escape.ceramic" + ], + "settings": { "foreground": "#2a6e2a" } + }, + { + "scope": [ + "keyword", + "keyword.control.ceramic", + "keyword.control.import.ceramic", + "keyword.other.ceramic", + "keyword.other.debug.ceramic", + "keyword.operator.word.ceramic", + "storage.type.ceramic", + "storage.modifier.ceramic", + "storage.modifier.attribute.ceramic" + ], + "settings": { "foreground": "#1a5fb4", "fontStyle": "bold" } + } + ] +} diff --git a/doc/assets/ceramic.tmLanguage.json b/doc/assets/ceramic.tmLanguage.json new file mode 100644 index 000000000..7bc234934 --- /dev/null +++ b/doc/assets/ceramic.tmLanguage.json @@ -0,0 +1,311 @@ +{ + "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", + "name": "Ceramic", + "scopeName": "source.ceramic", + "patterns": [ + { "include": "#comments" }, + { "include": "#strings" }, + { "include": "#character-literal" }, + { "include": "#static-identifier" }, + { "include": "#attribute-list" }, + { "include": "#numbers" }, + { "include": "#import-declaration" }, + { "include": "#function-definition-name" }, + { "include": "#type-definition-name" }, + { "include": "#overload-name" }, + { "include": "#external-name" }, + { "include": "#variable-binding" }, + { "include": "#keywords" }, + { "include": "#operator-keywords" }, + { "include": "#storage-keywords" }, + { "include": "#builtin-types" }, + { "include": "#boolean-literals" }, + { "include": "#debug-keywords" }, + { "include": "#special-identifiers" }, + { "include": "#goto-label" }, + { "include": "#type-annotation" }, + { "include": "#lambda-operators" }, + { "include": "#multi-value" }, + { "include": "#operators" } + ], + "repository": { + + "comments": { + "patterns": [ + { + "name": "comment.block.ceramic", + "begin": "/\\*", + "end": "\\*/", + "beginCaptures": { "0": { "name": "punctuation.definition.comment.ceramic" } }, + "endCaptures": { "0": { "name": "punctuation.definition.comment.ceramic" } } + }, + { + "name": "comment.line.double-slash.ceramic", + "begin": "//", + "end": "$", + "beginCaptures": { "0": { "name": "punctuation.definition.comment.ceramic" } } + } + ] + }, + + "strings": { + "patterns": [ + { + "comment": "Triple-quoted string must come before single-quoted", + "name": "string.quoted.triple.ceramic", + "begin": "\"\"\"", + "end": "\"\"\"(?!\")", + "beginCaptures": { "0": { "name": "punctuation.definition.string.begin.ceramic" } }, + "endCaptures": { "0": { "name": "punctuation.definition.string.end.ceramic" } }, + "patterns": [{ "include": "#string-escape" }] + }, + { + "name": "string.quoted.double.ceramic", + "begin": "\"", + "end": "\"", + "beginCaptures": { "0": { "name": "punctuation.definition.string.begin.ceramic" } }, + "endCaptures": { "0": { "name": "punctuation.definition.string.end.ceramic" } }, + "patterns": [{ "include": "#string-escape" }] + } + ] + }, + + "string-escape": { + "name": "constant.character.escape.ceramic", + "match": "\\\\([nrtf\\\\\"'0$]|x[0-9A-Fa-f]{2})" + }, + + "character-literal": { + "name": "string.quoted.single.ceramic", + "match": "'([^'\\\\]|\\\\([nrtf\\\\\"'0$]|x[0-9A-Fa-f]{2}))'", + "captures": { + "2": { "name": "constant.character.escape.ceramic" } + } + }, + + "static-identifier": { + "patterns": [ + { + "comment": "Triple-quoted static identifier: #\"\"\"...\"\"\"", + "name": "constant.other.ceramic", + "begin": "#\"\"\"", + "end": "\"\"\"(?!\")", + "beginCaptures": { "0": { "name": "punctuation.definition.constant.ceramic" } }, + "endCaptures": { "0": { "name": "punctuation.definition.constant.ceramic" } }, + "patterns": [{ "include": "#string-escape" }] + }, + { + "comment": "Quoted static identifier: #\"...\"", + "name": "constant.other.ceramic", + "begin": "#\"", + "end": "\"", + "beginCaptures": { "0": { "name": "punctuation.definition.constant.ceramic" } }, + "endCaptures": { "0": { "name": "punctuation.definition.constant.ceramic" } }, + "patterns": [{ "include": "#string-escape" }] + }, + { + "comment": "Plain static identifier: #ident", + "name": "constant.other.ceramic", + "match": "#[A-Za-z_?][A-Za-z0-9_?]*" + } + ] + }, + + "attribute-list": { + "comment": "[[transparent]] and other [[...]] attribute lists on function/overload definitions", + "name": "meta.attribute.ceramic", + "begin": "\\[\\[", + "end": "\\]\\]", + "beginCaptures": { "0": { "name": "punctuation.definition.attribute.ceramic" } }, + "endCaptures": { "0": { "name": "punctuation.definition.attribute.ceramic" } }, + "patterns": [ + { + "name": "storage.modifier.attribute.ceramic", + "match": "\\b(transparent)\\b" + }, + { + "name": "entity.other.attribute-name.ceramic", + "match": "[A-Za-z_?][A-Za-z0-9_?]*" + } + ] + }, + + "numbers": { + "patterns": [ + { + "comment": "Hex float: 0x1.FFp-10", + "name": "constant.numeric.float.hex.ceramic", + "match": "[+-]?\\b0[xX][0-9A-Fa-f][0-9A-Fa-f_]*(\\.[0-9A-Fa-f_]*)?[pP][+-]?[0-9][0-9_]*(ss|uss|s|us|i|u|l|ul|ll|ull|ff?|fl|fll|f128|j|fj|ffj|lj|flj|fllj)?\\b" + }, + { + "comment": "Hex integer: 0x1A3F", + "name": "constant.numeric.integer.hex.ceramic", + "match": "[+-]?\\b0[xX][0-9A-Fa-f][0-9A-Fa-f_]*(ss|uss|s|us|i128|u128|i|u|l|ul|ll|ull)?\\b" + }, + { + "comment": "Decimal float: 1.0, 3e+10, 1.5e-3f", + "name": "constant.numeric.float.decimal.ceramic", + "match": "[+-]?\\b[0-9][0-9_]*((\\.([0-9][0-9_]*)?)([eE][+-]?[0-9][0-9_]*)?|[eE][+-]?[0-9][0-9_]*)(ff?|f128|l|j|fj|ffj|lj|f128j)?\\b" + }, + { + "comment": "Decimal integer: 123, 1_000u", + "name": "constant.numeric.integer.decimal.ceramic", + "match": "[+-]?\\b[0-9][0-9_]*(ss|uss|s|us|i128|u128|i|u|l|ul|ll|ull|ff?|f128)?\\b" + } + ] + }, + + "import-declaration": { + "comment": "[public|private] import some.module.path", + "match": "\\b((?:public|private)\\s+)?(import)\\b", + "captures": { + "1": { "name": "keyword.control.ceramic" }, + "2": { "name": "keyword.control.import.ceramic" } + } + }, + + "keywords": { + "patterns": [ + { + "name": "keyword.control.ceramic", + "match": "\\b(if|else|switch|case|for|while|goto|return|break|continue|try|catch|throw|onerror|finally|eval)\\b" + }, + { + "name": "keyword.other.ceramic", + "match": "\\b(define|overload|record|variant|instance|enum|external|inline|public|private|in)\\b" + } + ] + }, + + "operator-keywords": { + "name": "keyword.operator.word.ceramic", + "match": "\\b(and|or|not|as|rvalue|forward|ref|static)\\b" + }, + + "storage-keywords": { + "name": "storage.type.ceramic", + "match": "\\b(var|alias)\\b" + }, + + "builtin-types": { + "name": "support.type.ceramic", + "match": "\\b(Bool|Int8|Int16|Int32|Int64|Int128|UInt8|UInt16|UInt32|UInt64|UInt128|Float32|Float64|Float80|Float128|Int|UInt|Long|ULong|Short|UShort|Byte|UByte|Char|Float|Double|PtrInt|UPtrInt|SizeT|StringConstant|RawPointer|OpaquePointer|Pointer|CodePointer|RefCodePointer|CCodePointer|StdCallCodePointer|FastCallCodePointer|VarArgsCCodePointer|Array|Tuple|Union|Vec|Static|Void)\\b" + }, + + "boolean-literals": { + "name": "constant.language.boolean.ceramic", + "match": "\\b(true|false)\\b" + }, + + "debug-keywords": { + "name": "keyword.other.debug.ceramic", + "match": "\\b(observe|observeTo|observeCall|observeCallTo)\\b" + }, + + "special-identifiers": { + "name": "support.constant.ceramic", + "match": "\\b(__ARG__|__COLUMN__|__FILE__|__LINE__|__llvm__)\\b" + }, + + "goto-label": { + "comment": "labelName: at end of line", + "match": "^\\s*([A-Za-z_?][A-Za-z0-9_?]*)\\s*(?=:\\s*$)", + "captures": { + "1": { "name": "entity.name.label.ceramic" } + } + }, + + "lambda-operators": { + "name": "keyword.operator.lambda.ceramic", + "match": "=>|->" + }, + + "multi-value": { + "name": "keyword.operator.spread.ceramic", + "match": "\\.\\.\\.?" + }, + + "type-definition-name": { + "comment": "Name after record / variant / instance / define", + "match": "\\b(record|variant|instance|define)\\s+([A-Za-z_?][A-Za-z0-9_?]*)", + "captures": { + "1": { "name": "keyword.other.ceramic" }, + "2": { "name": "entity.name.type.ceramic" } + } + }, + + "overload-name": { + "comment": "Function name after overload keyword", + "match": "\\b(overload)\\s+([A-Za-z_?][A-Za-z0-9_?]*)", + "captures": { + "1": { "name": "keyword.other.ceramic" }, + "2": { "name": "entity.name.function.ceramic" } + } + }, + + "external-name": { + "comment": "Function/variable name after external (skipping optional calling-convention parens)", + "match": "\\b(external)\\b(?:\\s*\\([^)]*\\))?\\s+([A-Za-z_?][A-Za-z0-9_?]*)", + "captures": { + "1": { "name": "keyword.other.ceramic" }, + "2": { "name": "entity.name.function.ceramic" } + } + }, + + "variable-binding": { + "comment": "Name introduced by var / ref / alias binding", + "match": "\\b(var|ref|alias)\\s+([A-Za-z_?][A-Za-z0-9_?]*)", + "captures": { + "1": { "name": "storage.type.ceramic" }, + "2": { "name": "variable.other.ceramic" } + } + }, + + "function-definition-name": { + "comment": "Top-level function definition name at start of line with optional visibility keyword", + "match": "^(?:(private|public)\\s+)?([A-Za-z_?][A-Za-z0-9_?]*)\\s*(?=\\()", + "captures": { + "1": { "name": "keyword.other.ceramic" }, + "2": { "name": "entity.name.function.ceramic" } + } + }, + + "type-annotation": { + "comment": ": after a parameter or binding name", + "match": "(?<=[A-Za-z0-9_?])\\s*(:)\\s*", + "captures": { + "1": { "name": "punctuation.separator.type-annotation.ceramic" } + } + }, + + "operators": { + "patterns": [ + { + "name": "keyword.operator.assignment.compound.ceramic", + "match": "[+\\-*/%&|^~]:" + }, + { + "name": "keyword.operator.comparison.ceramic", + "match": "==|!=|<=|>=|<|>" + }, + { + "name": "keyword.operator.assignment.ceramic", + "match": "(?])=(?![=>])" + }, + { + "name": "keyword.operator.arithmetic.ceramic", + "match": "[+\\-*/%]" + }, + { + "name": "keyword.operator.bitwise.ceramic", + "match": "[&|^~]" + }, + { + "name": "keyword.operator.dereference.ceramic", + "comment": "Postfix dereference ^ (not inside attribute lists)", + "match": "\\^" + } + ] + } + } +} diff --git a/doc/assets/favicon.svg b/doc/assets/favicon.svg new file mode 100644 index 000000000..b086f71c1 --- /dev/null +++ b/doc/assets/favicon.svg @@ -0,0 +1,6 @@ + + + + diff --git a/doc/index.md b/doc/index.md index a48815714..a7d37d2a8 100644 --- a/doc/index.md +++ b/doc/index.md @@ -4,8 +4,8 @@ Ceramic is a programming language based on Clay designed for Generic Programming ## Documentation -- [Language Reference](language-reference.md): syntax, types, statements, expressions -- [Primitives Reference](primitives-reference.md): the `__primitives__` module +- [Language Reference](language-reference/index.md): syntax, types, statements, expressions +- [Primitives Reference](primitives-reference/index.md): the `__primitives__` module ## License diff --git a/doc/js/ceramic-highlight.js b/doc/js/ceramic-highlight.js new file mode 100644 index 000000000..a2e56d59c --- /dev/null +++ b/doc/js/ceramic-highlight.js @@ -0,0 +1,58 @@ +const base = new URL("../assets/", import.meta.url).href; + +const [grammar, themeLight] = await Promise.all([ + fetch(new URL("ceramic.tmLanguage.json", base)).then((r) => { + if (!r.ok) throw new Error(`grammar HTTP ${r.status}`); + return r.json(); + }), + fetch(new URL("ceramic-light.json", base)).then((r) => { + if (!r.ok) throw new Error(`theme-light HTTP ${r.status}`); + return r.json(); + }), +]); + +grammar.name = "ceramic"; + +const shiki = await import("https://esm.sh/shiki@1.22.0"); +N; +const highlighter = await shiki.createHighlighter({ + themes: [themeLight], + langs: [grammar], +}); + +let pending = false; + +const apply = () => { + if (pending) return; + pending = true; + requestAnimationFrame(() => { + pending = false; + + const candidates = [ + ...document.querySelectorAll("pre > code.language-ceramic"), + ...document.querySelectorAll("pre[data-shiki-raw] > code"), + ]; + + for (const code of candidates) { + const pre = code.parentElement; + if (pre.dataset.shikiTheme === themeLight.name) continue; + const raw = pre.dataset.shikiRaw || code.textContent; + const html = highlighter.codeToHtml(raw, { + lang: "ceramic", + theme: themeLight.name, + }); + const temp = document.createElement("div"); + temp.innerHTML = html; + const newPre = temp.querySelector("pre"); + newPre.dataset.shikiTheme = themeLight.name; + newPre.dataset.shikiRaw = raw; + pre.replaceWith(newPre); + } + }); +}; + +if (document.readyState === "loading") { + document.addEventListener("DOMContentLoaded", apply); +} else { + apply(); +} diff --git a/doc/language-reference/compilation.md b/doc/language-reference/compilation.md new file mode 100644 index 000000000..1ee06c28d --- /dev/null +++ b/doc/language-reference/compilation.md @@ -0,0 +1,51 @@ +# Compilation Strategy + +Ceramic uses **whole-program compilation**. Starting from an entry-point source file, the compiler: + +1. Loads all imported modules recursively. +2. Populates each module's namespace, enabling free forward and circular references. +3. Establishes entry points: a public `main` symbol and any `external` function definitions. +4. Compiles only definitions reachable from those entry points. Unreachable definitions are never visited after parsing. + +### Entry Points + +If the entry-point module contains a public symbol named `main`, it is passed to the `callMain` operator function. `callMain` is responsible for calling `main` with its command-line arguments. The instantiated `callMain(static main)` becomes the program's entry point and corresponds to the C ABI `main` symbol. + +For a `main` entry point, the `setArgcArgv(argc:Int32, argv:Pointer[Pointer[Int8]])` operator function is also instantiated. It is called with the `argc` and `argv` parameters from the C `main` function before `callMain` runs. + +`external` function definitions also become entry points and are emitted with C linkage. + +### Compile-Time Evaluation + +The compiler includes an evaluator that runs certain things at compile time: + +- Pattern guard predicates +- Parameters of parameterized symbols +- Operands of `static`, `eval` statements, and `eval` expressions +- Declared return types in function definitions +- Declared instance types in variant definitions +- Computed record layouts + +The evaluator matches runtime semantics with these restrictions. It cannot call `external` functions or `__llvm__` functions. It does not support exception handling and always behaves as if exceptions are disabled. It does not call `destroy`. It does not initialize global variables. + +### Pattern Matching + +Ceramic uses unification-based pattern matching to select overloads and bind variant instances. A pattern may be a literal, a named symbol, a free pattern variable, or a symbol with a parameter suffix. The match fails if structure doesn't match, or if a pattern variable is bound to two different values. + +```ceramic +define foo; +define bar; + +define pattern; +[T] +overload pattern(static T) { println("a"); } +overload pattern(static bar) { println("b"); } + +testPattern() { + pattern(foo); // prints a + pattern(bar); // prints b +} +``` +Multiple-value patterns may end with `..name`: a variadic variable that greedily matches all remaining values. + +--- diff --git a/doc/language-reference/expressions.md b/doc/language-reference/expressions.md new file mode 100644 index 000000000..734c0101e --- /dev/null +++ b/doc/language-reference/expressions.md @@ -0,0 +1,298 @@ +# Expressions + +Ceramic's expression hierarchy, from highest to lowest precedence: + +| Level | Forms | Operator functions | +|-------|-------|--------------------| +| Atomic | names, literals, `()`, `[]`, `__FILE__` etc., `eval` | (none) | +| Suffix | `a(b)` `a[b]` `a.0` `a.field` `a^` | `call` `index` `staticIndex` `fieldRef` `dereference` | +| Prefix | `+a` `-a` `&a` `*a` | `plus` `minus`, address and dispatch are primitive | +| Multiplicative | `a*b` `a/b` `a%b` | `multiply` `divide` `remainder` | +| Additive | `a+b` `a-b` | `add` `subtract` | +| Ordered comparison | `<=` `<` `>` `>=` | `lesserEquals?` `lesser?` `greater?` `greaterEquals?` | +| Equality | `==` `!=` | `equals?` `notEquals?` | +| Boolean | `not a` `a and b` `a or b` | primitive, not overloadable | +| Low-precedence prefix | `if (a) b else c`, `name: a`, `static a`, `..a`, `a -> b` | (none) | +| Multiple value | `a, b, c` | (none) | + +### Atomic Expressions + +#### Name References + +A bare identifier evaluates to the named local or global entity in the current scope. An error is raised if no match is found. + +```ceramic +import a; +import a.(b); +var c = 0; + +foo(d) { + var e = 0; + println(a, b, c, d, e); +} +``` +Names bound to multiple values (variadic variables, variadic arguments) must be referenced with the `..` unpack operator: + +```ceramic +[..TT] +foo(..xs:TT) { + println(..xs, " have the types ", ..TT); +} +``` + +#### Literal Expressions + +| Literal | Default type | Type suffix examples | +|---------|-------------|----------------------| +| `true` / `false` | `Bool` | (none) | +| `1`, `0xFF` | `Int32` (or module default) | `ss` `s` `i` `l` `ll` `uss` `us` `u` `ul` `ull` | +| `1.0`, `1e2` | `Float64` (or module default) | `f` `ff` `fl` `fj` `j` `ffj` `flj` | +| `'x'` | via `Char` operator | (none) | +| `"hello"` | via `StringConstant` operator | (none) | +| `#foo`, `#"foo"` | `Static[#foo]` | (none) | + +```ceramic +println(Type(1)); // Int32 +println(Type(-1ss)); // Int8 +println(Type(+1ul)); // UInt64 +println(Type(1.0f)); // Float32 +println(Type(1.j)); // Imag64 +``` +Integer type suffixes may be applied to floating-point literal tokens to produce a float of that type. Floating-point suffixes may not be applied to integer literal tokens. + +#### Parentheses + +`(expr)` overrides precedence. It has no other effect. + +#### Tuple Expressions + +`[a, b, c]` constructs a tuple by calling the `tupleLiteral` operator function. + +#### Compilation Context Operators + +These are only valid inside **alias functions**: + +| Operator | Evaluates to | +|----------|-------------| +| `__FILE__` | Static string: the source file of the call site | +| `__LINE__` | `Int32`: the source line | +| `__COLUMN__` | `Int32`: the source column | +| `__ARG__ name` | Static string. Textual representation of argument `name`, not evaluated | + +```ceramic +alias assert(cond:Bool) { + if (not cond) { + println(stderr, "Assertion \"", __ARG__ cond, "\" failed at ", + __FILE__, ":", __LINE__, ":", __COLUMN__); + flush(stderr); + abort(); + } +} +``` + +#### Eval Expressions + +`eval expr` evaluates a compile-time expression to a static string, parses it as an expression, and substitutes the result in place. The generated string must be a complete, parsable expression. + +```ceramic +println(eval #""" "hello world" """); +``` + +### Suffix Operators + +#### Call (`a(b, c)`) + +If `a` is a symbol, argument types are matched to its overloads and the matching one is called. If `a` is a `CodePointer`, the pointed-to function is invoked. Otherwise, the call desugars to `call(a, b, c)`. + +Lambda expressions can be passed as trailing arguments with `:` / `::` separators: + +```ceramic +ifZero(rand()): () -> { + println("Reply hazy; try again") +} :: x -> { + println("Lucky number: ", x); +} +``` +If any argument is prefixed with `*`, the call becomes a [dynamic dispatch](#dispatch-a) on a variant type. + +#### Index (`a[b, c]`) + +Desugars to `index(a, b, c)`. If `a` is a parameterized symbol, the operation is primitive: the symbol is instantiated for compile-time parameters. + +```ceramic +var xs = Array[Int, 3](0, 111, 222); +println(xs[2]); // → index(xs, 2) +``` + +#### Static Index (`a.0`) + +Desugars to `staticIndex(a, static 0)`. Used for positional tuple field access. + +```ceramic +var x = ["hello", "cruel", "world"]; +println(x.0, ' ', x.2); +``` + +#### Field Reference (`a.field`) + +Desugars to `fieldRef(a, #"field")`. Used for named field access on records. Overloading `fieldRef` lets you add custom accessors. + +If `a` is an imported module name, the operation is primitive and looks up the name directly in that module's namespace. + +```ceramic +import foo; +foo.bar(); // module field reference; primitive + +var p = Point(array(1.0, 2.0)); +println(p.x, p.y); // fieldRef(p, #"x"), fieldRef(p, #"y") + +// Custom swizzle accessors: +overload fieldRef(p:Point, static #"xy") = ref p.coords[0], p.coords[1]; +``` + +#### Dereference (`a^`) + +Desugars to `dereference(a)`. Used to get a reference to the value behind a pointer. + +### Prefix Operators + +| Operator | Behavior | +|----------|----------| +| `+a` | desugars to `plus(a)` | +| `-a` | desugars to `minus(a)` | +| `&a` | primitive: returns `Pointer[T]` to `a`, which must be an lvalue. Not overloadable | +| `*a` | dispatch operator. Only valid as an argument to a call expression | + +#### Dispatch (`*a`) + +Transforms a call into dynamic dispatch on a variant type. Each instance type of the dispatched argument has an overload looked up and compiled into a dispatch table. All overloads must have matching return types and `ref`-ness. + +```ceramic +variant Shape (Circle, Square); + +overload draw(s:Circle) { println("()"); } +overload draw(s:Square) { println("[]"); } + +drawShapes(ss:Vector[Shape]) { + for (s in ss) + draw(*s); // dispatches over Circle and Square +} +``` + +### Arithmetic Operators + +| Operator | Desugars to | +|----------|-------------| +| `a * b` | `multiply(a, b)` | +| `a / b` | `divide(a, b)` | +| `a % b` | `remainder(a, b)` | +| `a + b` | `add(a, b)` | +| `a - b` | `subtract(a, b)` | + +All arithmetic operators are left-associative within their precedence group. + +### Comparison Operators + +| Operator | Desugars to | +|----------|-------------| +| `a <= b` | `lesserEquals?(a, b)` | +| `a < b` | `lesser?(a, b)` | +| `a > b` | `greater?(a, b)` | +| `a >= b` | `greaterEquals?(a, b)` | +| `a == b` | `equals?(a, b)` | +| `a != b` | `notEquals?(a, b)` | + +All comparison operators are left-associative within their precedence group. + +### Boolean Operators + +| Operator | Behavior | +|----------|----------| +| `not a` | Complement. `a` must be `Bool`. Not overloadable | +| `a and b` | Short-circuit conjunction. Right-associative | +| `a or b` | Short-circuit disjunction. Right-associative | + +Both `and` and `or` require `Bool` operands and are not overloadable. + +### Low-Precedence Prefix Operators + +#### If Expressions + +```ceramic +if (condition) thenExpr else elseExpr +``` +Both branches must have the same type. Unlike `if` statements, the `else` clause is required. + +#### Keyword Pair Expressions + +`name: expr` is sugar for `[#"name", expr]`: a tuple with a static string key. Useful for named-parameter style arguments to higher-order functions. + +#### Static Expressions + +`static expr` evaluates `expr` at compile time and wraps the result in `Static[result]`. Used to pass compile-time values to [static arguments](functions.md#static-arguments). Applied to a symbol or static string, it is a no-op. + +```ceramic +log(static LOG, "starting program"); +``` + +#### Unpack (`..a`) + +Evaluates `a` in multiple-value context and interpolates its values into the surrounding expression list. + +```ceramic +twoThroughFour() = 2, 3, 4; +oneThroughFive() = 1, ..twoThroughFour(), 5; +``` + +#### Lambda Expressions + +An anonymous function: argument list, arrow, body. + +```ceramic +var squares = mapped(x -> x*x, range(10)); +``` +Two capture modes: + +- **`->`**: captures by reference. Mutations are visible outside the lambda. The lambda must not outlive its enclosing scope. +- **`=>`**: captures by copying. The lambda is independent of its origin scope. + +```ceramic +// by reference; sum accumulates outside +var sum = 0; +var squares = mapped(x -> { var sq = x*x; sum += sq; return sq; }, range(10)); + +// by value; closure is self-contained +curriedAdd(x) = y => x + y; +var plus3 = curriedAdd(3); +``` +A lambda with a single untyped argument may omit parentheses: `x -> x*x`. A lambda that does not capture is equivalent to an anonymous named function. + +Lambda has higher precedence than the multi-value comma. `a -> b, c` parses as `(a -> b), c`. To return multiple values from a lambda, use a block body or explicit parentheses: `a -> (b, c)`. + +### Multiple Value Expressions + +Most Ceramic functions can return multiple values. The comma operator builds a multiple-value list: + +```ceramic +twoThroughFour() = 2, 3, 4; +``` +Expressions are normally constrained to a **single value**. To use a multiple-value expression inside another expression, unpack it with `..`: + +```ceramic +oneThroughFive() = 1, ..twoThroughFour(), 5; // ok +oneThroughFive() = 1, twoThroughFour(), 5; // ERROR +``` +The following contexts provide **implicit** multiple-value context at the outermost level. No `..` is needed there: + +- Expression statements +- Local variable bindings with multiple variables +- Assignment with multiple left-hand values +- `..for` value lists + +Within a concatenating expression, sub-expressions still require explicit `..`: + +```ceramic +var a, b, c, d = 0, ..oneTwoThree(); // .. required inside concat +..for (i in ..oneTwoThree(), ..fourFiveSix()) + println(i); +``` diff --git a/doc/language-reference/functions.md b/doc/language-reference/functions.md new file mode 100644 index 000000000..27ed27e20 --- /dev/null +++ b/doc/language-reference/functions.md @@ -0,0 +1,493 @@ +# Function Definitions + +Ceramic functions are inherently generic. They can be parameterized over types or compile-time values, and overloaded to provide multiple implementations of a common interface. A runtime function is instantiated for every distinct set of input types with which it is called. + +### Simple Function Definitions + +A name, argument list, optional return types, and a body. Return types are inferred from the body if not declared. + +```ceramic +hello() { println(helloString()); } +private helloString() = "Hello World"; + +squareInt(x:Int) : Int = x*x; + +[T] +square(x:T) : T = x*x; + +[T | Float?(T)] +quadraticRoots(a:T, b:T, c:T) : T, T { + var q = -0.5*(b + signum(b)*sqrt(b*b - 4.*a*c)); + return q/a, c/q; +} +``` +Simple definitions always **create a new symbol**. Defining the same name twice is an error. Use `overload` instead. + +```ceramic +abs(x:Int) = if (x < 0) -x else x; +abs(x:Float) = ...; // ERROR: abs is already defined +``` + +### Overloaded Function Definitions + +`define` creates a symbol with no initial implementation. `overload` adds implementations to an existing symbol. + +```ceramic +define abs; +overload abs(x:Int) = if (x < 0) -x else x; +overload abs(x:Float) = if (x < 0.) -x else if (x == 0.) 0. else x; +``` +`define` may also declare an **interface constraint**: all overloads must conform to it: + +```ceramic +[T | Numeric?(T)] +define abs(x:T) : T; + +[T | Integer?(T)] +overload abs(x:T) = if (x < 0) -x else x; + +overload abs(x:String) { ... } // ERROR: Numeric?(String) is false +``` +Overloading a **type name** is the idiomatic way to define constructors: + +```ceramic +record LatLong (latitude:Float64, longitude:Float64); +record Address (street:String, city:String, state:String, zip:String); + +overload Address(coords:LatLong) = geocode(coords); +``` +Overloads bind by pattern matching and can target parameterized types selectively: + +```ceramic +record Point[T] (x:T, y:T); + +[T | Float?(T)] +overload Point[T]() = Point[T](nan(T), nan(T)); // float default: NaN sentinel +overload Point[Int]() = Point[Int](-0x8000_0000, 0x7FFF_FFFF); + +overload Point() = Point[Float](); // base name: give them a Float point +``` +A simple function definition is shorthand for `define` + `overload`: + +```ceramic +double(x) = x+x; +// is exactly: +define double; +overload double(x) = x+x; +``` + +#### Overload Ordering + +Within a module, overloads are matched in **reverse definition order**. The last definition wins. Across modules, importing modules' overloads are visited before imported modules' (depth-first). Circular-dependency order is undefined. + +#### Universal Overloads + +The overloaded name may itself be a pattern variable, matching any call site not already handled by a more specific overload: + +```ceramic +// Delegate any call with a MyInt argument to its underlying Int value +[F] +overload F(x:MyInt) = ..F(x.value); + +// Default zero-constructor for any Numeric? type +[T | Numeric?(T)] +overload T() = T(0); +``` +When the function position of a call is not a symbol, the call desugars to the `call` operator: + +```ceramic +record MyCallable (); +overload call(f:MyCallable, x:Int, y:Int) : Int = x + y; + +main() { + var f = MyCallable(); + println(f(1, 2)); // really: call(f, 1, 2) +} +``` + +### Arguments + +Arguments are a parenthesized list of names with optional type patterns. An argument without a type matches any type. + +```ceramic +[T] +double(x:T) = x+x; // explicit pattern variable + +double(x) = x+x; // same, with implicit unbounded variable +``` +Arguments are passed **by reference**: mutations inside the function are visible to the caller: + +```ceramic +inc(x:Int) { x += 1; } + +main() { + var x = 2; + inc(x); + println(x); // 3 +} +``` + +#### Variadic Arguments + +A final argument prefixed with `..` matches all remaining arguments at the call site: + +```ceramic +printlnTimes(n:Int, ..stuff) { + for (i in range(n)) + println(..stuff); +} + +main(args) { + printlnTimes(3, "She loves you ", "yeah yeah yeah"); +} +``` +A type pattern on the variadic argument binds the types of all matched values to a variadic pattern variable: + +```ceramic +[..TT | allValues?(String?, ..TT)] +printlnTimes(n:Int, ..stuff:TT) { ... } + +[..In, ..Out] +overload call(f:CodePointer[[..In], [..Out]], ..in:In) : ..Out { + return ..f(..in); +} +``` + +#### Reference Qualifiers + +Ceramic distinguishes **lvalues** (values with a referenceable identity: variables, pointer dereferences, reference returns) from **rvalues** (unnamed temporaries that exist only for a single expression). + +An argument may be qualified to accept only one kind: + +| Qualifier | Accepts | Typical use | +|-----------|---------|-------------| +| (none) | lvalue or rvalue, bound as lvalue inside the function | general | +| `ref` | lvalue only | returning a reference into the argument | +| `rvalue` | rvalue only | move optimization, steal resources from a temporary | +| `forward` | either, preserves the caller's lvalue/rvalue-ness | perfect forwarding | + +```ceramic +// rvalue: steal the string's buffer instead of copying +foo(rvalue x:String) { + return move(x) + " world"; +} + +// ref: return a reference into x; dangerous if x is a temporary +bar(ref x:String) { + return sliced(x, 0, 5); +} + +// forward: pass rvalue-ness through to the next call +baz(forward x:Int) { + foo(x); // ok if x was originally an rvalue at the call site +} +``` +Inside a function body, an argument has a name and is therefore an lvalue, even if the caller passed an rvalue. To carry rvalue-ness through to another call, use `forward` qualification. + +`ref`, `rvalue`, and `forward` can all be applied to variadic argument names: + +```ceramic +trace(f, forward ..args) { + println("enter ", f); + finally println("exit ", f); + return forward ..f(..args); +} +``` + +#### Static Arguments + +`static` arguments match values computed at compile time. The `static` keyword at the call site evaluates an expression at compile time and passes the result as the argument. + +```ceramic +define beetlejuice; + +[n] +overload beetlejuice(static n) { + for (i in range(n)) + println("Beetlejuice!"); +} + +// Unrolled specialization for the common case +overload beetlejuice(static 3) { + println("Beetlejuice!"); + println("Beetlejuice!"); + println("Beetlejuice!"); +} + +main() { + beetlejuice(static 3); +} +``` +Symbols and static strings are inherently static and match `static` arguments without an explicit `static` at the call site. + +`static T` is syntactic sugar for an unnamed argument of primitive type `Static[T]`. + +### Return Types + +Declare return types with `:` after the argument list. The expression may reference pattern variables from the arguments. + +```ceramic +double(x:Int) : Int = x + x; + +[T] +diagonal(x:T) : Point[T] = Point[T](x, x); + +[T | Integer?(T)] +safeDouble(x:T) : NextLargerInt(T) { + alias NextT = NextLargerInt(T); + return NextT(x) + NextT(x); +} +``` +Without a declared return type, types are inferred from the body. An empty declaration means "returns nothing": + +```ceramic +foo() { } // inferred: no return values +foo() : { } // explicit: no return values +foo() : () { } // also explicit +``` + +#### Named Return Values + +For cases where constructing a return value all at once is inefficient or impossible, bind names directly to the uninitialized return storage and fill them in piecemeal using `<--`. + +```ceramic +record SOAPoint (xs:Vector[Float], ys:Vector[Float]); + +overload SOAPoint(size:SizeT) --> returned:SOAPoint +{ + returned.xs <-- Vector[Float](); + onerror destroy(returned.xs); + resize(returned.xs, size); + + returned.ys <-- Vector[Float](); + onerror destroy(returned.ys); + resize(returned.ys, size); +} +``` +Named return values are inherently unsafe. They start uninitialized. Any operation other than `<--` before initialization is undefined behavior. They are **not** automatically destroyed during exception unwinding. Use `onerror` to handle cleanup explicitly. + +A variadic named return may be declared with `..`, in which case its type expression evaluates as a multiple value expression. + +### Function Body + +A function body is one of three forms: + +```ceramic +// Block: the general form +demBones(a, b) { + println(a, " bone's connected to the ", b, " bone"); +} + +// Expression shorthand; exactly equivalent to a block with a single return +square(x) = x*x; + +// Inline LLVM assembly +overload add(x:Int32, y:Int32) --> sum:Int32 __llvm__ { + ... +} +``` + +#### Inline LLVM Functions + +A function may be implemented directly in LLVM IR with an `__llvm__` block. Arguments and named return values are available as LLVM pointers (e.g., `x:Int32` → `i32* %x`). All exit paths must end with `ret i8* null`. + +```ceramic +overload add(x:Int32, y:Int32) --> sum:Int32 __llvm__ { + %xv = load i32* %x + %yv = load i32* %y + %sumv = add i32 %xv, %yv + store i32 %sumv, i32* %sum + ret i8* null +} +``` +Ceramic static values can be interpolated with `$Name` or `${Expression}`: + +- Symbols → their LLVM type +- Static strings → literal text +- Static integer/float/bool → LLVM numeric literal + +```ceramic +[T | Integer?(T)] +overload add(x:T, y:T) --> sum:T __llvm__ { + %xv = load $T* %x + %yv = load $T* %y + %sumv = add $T %xv, %yv + store $T %sumv, $T* %sum + ret i8* null +} +``` +Any LLVM intrinsics or globals referenced must be declared in a [top-level LLVM block](modules.md#top-level-llvm). Inline LLVM functions cannot be evaluated at compile time. + +### Inline and Alias Qualifiers + +Any function or overload definition may be preceded by `inline` or `alias`. + +- **`inline`**: the function is always compiled directly into its call site. If inlining is impossible (e.g., a recursive function), it is a compile-time error. This is a hard guarantee, not a hint like C's `inline`. + +- **`alias`**: arguments are received unevaluated and re-evaluated in the caller's scope each time they are used inside the function. Equivalent to a hygienic, precedence-safe C preprocessor macro. Alias functions can query their call site's source location via `__FILE__`, `__LINE__`, `__COLUMN__`, and `__ARG__`. + +```ceramic +Debug?() = false; + +define assert(x:Bool); + +[| not Debug?()] +alias overload assert(x:Bool) { } + +[| Debug?()] +alias overload assert(x:Bool) { + if (not x) { + printlnTo(stderr, __FILE__, ":", __LINE__, ": assertion failed!"); + abort(); + } +} +``` + +### Diagnostic Attributes + +An attribute list `[[...]]` may appear between the pattern guard and any `inline`/`alias` qualifier. Unknown attributes produce a warning, not an error. + +Currently recognized attribute: + +- **`transparent`**: marks the function as a pure forwarder. When the compiler locates the source of an error, it skips transparent stack frames and attributes the error to the first non-transparent caller. Only apply this to functions whose body is a single forwarding expression. + +### External Functions + +External functions bridge Ceramic with code outside the compilation unit. + +A declaration **without** a body declares a C symbol for Ceramic to call: + +```ceramic +external puts(s:Pointer[Int8]) : Int; +external printf(fmt:Pointer[Int8], ..) : Int; // variadic C + +main() { + puts(cstring("Hello world!")); + printf(cstring("1 + 1 = %d"), 1 + 1); +} +``` +A declaration **with** a body gives a Ceramic function C linkage, making it callable from C: + +```ceramic +// square.crm +external square(x:Float64) : Float64 = x*x; +``` +```c +// square.c +double square(double x); +int main() { printf("%g\n", square(2.0)); } +``` +Limitations: + +- Cannot be generic. Types must be fully specified. No pattern guards. No overloading. +- May return zero or one value only. +- Ceramic exceptions cannot propagate across an external boundary. Unhandled exceptions call `unhandledExceptionInExternal`. +- Types with nontrivial `copy` or `destroy` overloads must be passed by pointer. +- Cannot be called at compile time. + +External function names are not true symbols. They evaluate directly to a `CCodePointer` value. + +#### External Attributes + +An optional parenthesized attribute list after the `external` keyword sets properties on the function. A string value overrides the linkage name: + +```ceramic +external ("_start") start() { + var greeting = "hello world"; + write(STDOUT_FILENO, cstring(greeting), size(greeting)); +} +``` +Calling convention attributes (from `__primitives__`): + +| Attribute | Convention | +|-----------|-----------| +| `AttributeCCall` | Default C | +| `AttributeLLVMCall` | Native LLVM (for intrinsics and other LLVM-based languages) | +| `AttributeStdCall` | x86 stdcall (Windows) | +| `AttributeFastCall` | x86 fastcall (Windows) | +| `AttributeThisCall` | x86 thiscall (Windows) | +--- + +## Global Value Definitions + +Ceramic supports global mutable state initialized before `main()` runs. + +### Global Aliases + +Global aliases define a name that expands to an expression **on demand**, without allocating any storage. + +```ceramic +alias PI = 3.14159265358979323846264338327950288; + +degreesToRadians(deg:Double) : Double = (PI/180.) * deg; +``` +Aliases may be parameterized (pattern guard optional when no predicate is needed): + +```ceramic +[T | Float?(T)] +alias PI[T] = T(3.14159265358979323846264338327950288); + +alias ZERO[T] = T(0); // [T] implied +``` +Global alias names are not true symbols. They evaluate directly to the bound expression. + +### Global Variables + +Global variables are initialized at runtime before `main()`, in dependency order. + +```ceramic +var msg = noisyString(); + +noisyString() { + println("Initializing..."); + return String(); +} + +a() { push(msg, "Hello "); } +b() { push(msg, "world!"); } + +main() { a(); b(); println(msg); } +``` +Initialization order is determined by dependencies: + +```ceramic +var a = c + 1; // runs second +var b = a + c; // runs third +var c = 0; // runs first +var d = abc(); // runs fourth +``` +Circular initialization dependencies are compile-time errors. A global variable that is never referenced by runtime-executed code is never instantiated. Do not rely on its side effects. + +Global variables are destroyed in **reverse initialization order** after `main()` returns. If destruction throws, `exceptionInFinalizer` is called. + +Parameterized global variables are supported: + +```ceramic +private var TAG_COUNTER = 0; + +[T] +private var ANY_TAG[T] = nextTagCounter(); +``` +Global variable names are not true symbols. They evaluate to a reference to the variable's storage. + +Runtime access is subject to the C11/C++11 memory model. See the *Primitives Reference* for atomic operations. + +### External Variables + +C `extern` variables can be linked with external variable definitions. Like external functions, they cannot be parameterized. + +```ceramic +external errno : Int; + +main() { + if (null?(fopen(cstring("hello.txt"), cstring("r")))) + println("error code ", errno); +} +``` +A string attribute overrides the linkage name: + +```ceramic +external ("____errno$OBSCURECOMPATIBILITYTAG") errno : Int; +``` +Ceramic-defined variables with external linkage are currently unsupported. External variables cannot be evaluated at compile time. +--- diff --git a/doc/language-reference/grammar.md b/doc/language-reference/grammar.md new file mode 100644 index 000000000..419061271 --- /dev/null +++ b/doc/language-reference/grammar.md @@ -0,0 +1,394 @@ +# Grammar Reference + +The complete BNF grammar for Ceramic, organized by chapter. + +- Regular expressions use `/slashes/` with Perl `/x` syntax (whitespace inside is insignificant). +- Literal strings are written in `"quotation marks"`. + +## Tokenization + +### Whitespace + +[→ context in tokenization.md](tokenization.md#whitespace) + +```text +ws -> /[ \t\r\n\f]+/ +``` + +### Comments + +[→ context in tokenization.md](tokenization.md#comments) + +```text +Comment -> "/*" /.*?/ "*/" + | "//" /.*$/ +``` + +### Identifiers + +[→ context in tokenization.md](tokenization.md#identifiers) + +```text +Identifier -> !Keyword, /[A-Za-z_?][A-Za-z_0-9?]*/ +``` + +### Integer Literals + +[→ context in tokenization.md](tokenization.md#integer-literals) + +```text +IntToken -> "0x" HexDigits | DecimalDigits +HexDigits -> /([0-9A-Fa-f]_*)+/ +DecimalDigits -> /([0-9]_*)+/ +``` + +### Floating-Point Literals + +[→ context in tokenization.md](tokenization.md#floating-point-literals) + +```text +FloatToken -> "0x" HexDigits ("." HexDigits?)? /[pP] [+-]?/ DecimalDigits + | DecimalDigits ("." DecimalDigits?)? (/[eE] [+-]?/ DecimalDigits)? +``` + +### Character Literals + +[→ context in tokenization.md](tokenization.md#character-literals) + +```text +CharToken -> "'" CharChar "'" +CharChar -> /[^\\']/ + | EscapeCode +EscapeCode -> /\\ ([nrtf\\'"0] | x [0-9A-Fa-f]{2})/ +``` + +### String Literals + +[→ context in tokenization.md](tokenization.md#string-literals) + +```text +StringToken -> "\"" StringChar* "\"" + | "\"\"\"" TripleStringChar* "\"\"\"" +StringChar -> /[^\\"]/ | EscapeCode +TripleStringChar -> /(?!=""" ([^"]|$)) [^\\]/ | EscapeCode +``` + +## Compilation Strategy + +### Pattern Matching + +[→ context in compilation.md](compilation.md#pattern-matching) + +```text +Pattern -> AtomicPattern PatternSuffix? +AtomicPattern -> Literal | PatternNameRef +PatternNameRef -> DottedName +PatternSuffix -> "[" comma_list(Pattern) "]" +``` + +## Modules & Source Layout + +### Source File Layout + +[→ context in modules.md](modules.md#source-file-layout) + +```text +Module -> Import* ModuleDeclaration? TopLevelLLVM? TopLevelItem* +``` + +### List Syntactic Forms + +[→ context in modules.md](modules.md#list-syntactic-forms) + +```text +comma_list(Rule) -> (Rule ("," Rule)* ","?)? + +variadic_list(Rule, LastRule) -> Rule ("," Rule)* ("," (LastRule)?)? + | LastRule + | nil +``` + +### Conflict Resolution + +[→ context in modules.md](modules.md#conflict-resolution) + +```text +Import -> Visibility? "import" DottedName ImportSpec? ";" +ImportSpec -> "as" Identifier + | "." "(" comma_list(ImportedItem) ")" + | "." "*" +DottedName -> Identifier ("." Identifier)* +ImportedItem -> Visibility? Identifier ("as" Identifier)? +``` + +### Module Declaration + +[→ context in modules.md](modules.md#module-declaration) + +```text +ModuleDeclaration -> "in" DottedName AttributeList? ";" +AttributeList -> "(" ExprList ")" +``` + +### Top-Level LLVM + +[→ context in modules.md](modules.md#top-level-llvm) + +```text +TopLevelLLVM -> LLVMBlock +LLVMBlock -> "__llvm__" "{" /.*/ "}" +``` + +### Pattern Guards + +[→ context in modules.md](modules.md#pattern-guards) + +```text +PatternGuard -> "[" comma_list(PatternVar) ("|" Expression)? "]" +PatternVar -> Identifier | ".." Identifier +``` + +### Visibility Modifiers + +[→ context in modules.md](modules.md#visibility-modifiers) + +```text +Visibility -> "public" | "private" +``` + +## Type Definitions + +### Computed Layouts + +[→ context in types.md](types.md#computed-layouts) + +```text +Record -> PatternGuard? Visibility? "record" TypeDefinitionName RecordBody +TypeDefinitionName -> Identifier PatternVars? +PatternVars -> "[" comma_list(PatternVar) "]" +NormalRecordBody -> "(" comma_list(RecordField) ")" ";" +ComputedRecordBody -> "=" comma_list(Expression) ";" +RecordField -> Identifier TypeSpec +TypeSpec -> ":" Pattern +``` + +### Extending Variants + +[→ context in types.md](types.md#extending-variants) + +```text +Variant -> PatternGuard? Visibility? "variant" TypeDefinitionName ("(" ExprList ")")? ";" +Instance -> PatternGuard? "instance" Pattern "(" ExprList ")" ";" +``` + +### Enumerations + +[→ context in types.md](types.md#enumerations) + +```text +Enumeration -> Visibility? "enum" Identifier "(" comma_list(Identifier) ")" ";" +``` + +## Function Definitions + +### Simple Function Definitions + +[→ context in functions.md](functions.md#simple-function-definitions) + +```text +Function -> PatternGuard? Visibility? CodegenAttribute? + Identifier Arguments ReturnSpec? FunctionBody +``` + +### Universal Overloads + +[→ context in functions.md](functions.md#universal-overloads) + +```text +Define -> PatternGuard? "define" Identifier (Arguments ReturnSpec?)? ";" +Overload -> PatternGuard? CodegenAttribute? "overload" + Pattern Arguments ReturnSpec? FunctionBody +``` + +### Static Arguments + +[→ context in functions.md](functions.md#static-arguments) + +```text +Arguments -> "(" ArgumentList ")" +ArgumentList -> variadic_list(Argument, VarArgument) +Argument -> NamedArgument | StaticArgument +NamedArgument -> ReferenceQualifier? Identifier TypeSpec? +VarArgument -> ReferenceQualifier? ".." Identifier TypeSpec? +StaticArgument -> "static" Pattern +ReferenceQualifier -> "ref" | "rvalue" | "forward" +``` + +### Named Return Values + +[→ context in functions.md](functions.md#named-return-values) + +```text +ReturnSpec -> ReturnTypeSpec | NamedReturnSpec +ReturnTypeSpec -> ":" ExprList +NamedReturnSpec -> "-->" comma_list(NamedReturn) +NamedReturn -> ".."? Identifier ":" Expression +``` + +### Inline LLVM Functions + +[→ context in functions.md](functions.md#inline-llvm-functions) + +```text +FunctionBody -> Block | "=" ReturnExpression ";" | LLVMBlock +LLVMBlock -> "__llvm__" "{" /.*/ "}" +``` + +### Diagnostic Attributes + +[→ context in functions.md](functions.md#diagnostic-attributes) + +```text +Attributes -> "[[" Identifier ("," Identifier)* "]]" +``` + +### External Attributes + +[→ context in functions.md](functions.md#external-attributes) + +```text +ExternalFunction -> Visibility? "external" AttributeList? + Identifier "(" ExternalArgs ")" + ":" Type? (FunctionBody | ";") +ExternalArgs -> variadic_list(ExternalArg, "..") +ExternalArg -> Identifier TypeSpec +``` + +### Global Aliases + +[→ context in functions.md](functions.md#global-aliases) + +```text +GlobalAlias -> PatternGuard? Visibility? + "alias" Identifier PatternVars? "=" Expression ";" +``` + +### Global Variables + +[→ context in functions.md](functions.md#global-variables) + +```text +GlobalVariable -> PatternGuard? Visibility? + "var" Identifier PatternVars? "=" Expression ";" +``` + +### External Variables + +[→ context in functions.md](functions.md#external-variables) + +```text +ExternalVariable -> Visibility? "external" AttributeList? Identifier TypeSpec ";" +``` + +## Statements + +### Blocks + +[→ context in statements.md](statements.md#blocks) + +```text +Block -> "{" (Statement | Binding | LabelDef)* "}" +LabelDef -> Identifier ":" +``` + +### Return by Reference + +[→ context in statements.md](statements.md#return-by-reference) + +```text +ReturnStatement -> "return" ReturnExpression? ";" +ReturnExpression -> ReturnKind? ExprList +ReturnKind -> "ref" | "forward" +``` + +### `alias`: call-by-name binding + +[→ context in statements.md](statements.md#alias-call-by-name-binding) + +```text +Binding -> BindingKind comma_list(Identifier) "=" ExprList ";" +BindingKind -> "var" | "ref" | "forward" | "alias" +``` + +### Initialization Statements + +[→ context in statements.md](statements.md#initialization-statements) + +```text +Assignment -> ExprList AssignmentOp ExprList ";" +AssignmentOp -> "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "<--" +``` + +### `switch` + +[→ context in statements.md](statements.md#switch) + +```text +IfStatement -> "if" "(" Expression ")" Statement ("else" Statement)? +SwitchStatement -> "switch" "(" Expression ")" + ("case" "(" ExprList ")" Statement)* + ("else" Statement)? +``` + +### `..for`: Multiple-Value For + +[→ context in statements.md](statements.md#for-multiple-value-for) + +```text +WhileStatement -> "while" "(" Expression ")" Statement +ForStatement -> "for" "(" comma_list(Identifier) "in" Expression ")" Statement +MultiValueForStatement -> ".." "for" "(" Identifier "in" ExprList ")" Statement +``` + +### `goto` + +[→ context in statements.md](statements.md#goto) + +```text +BreakStatement -> "break" ";" +ContinueStatement -> "continue" ";" +GotoStatement -> "goto" Identifier ";" +``` + +### `finally` and `onerror` + +[→ context in statements.md](statements.md#finally-and-onerror) + +```text +ThrowStatement -> "throw" Expression ";" +TryStatement -> "try" Block ("catch" "(" (Identifier (":" Type)?) ")" Block)+ +ScopeGuardStatement -> ScopeGuardKind Statement +ScopeGuardKind -> "finally" | "onerror" +``` + +### Eval Statements + +[→ context in statements.md](statements.md#eval-statements) + +```text +EvalStatement -> "eval" ExprList ";" +``` + +## Expressions + +### Lambda Expressions + +[→ context in expressions.md](expressions.md#lambda-expressions) + +```text +Lambda -> LambdaArguments LambdaArrow LambdaBody +LambdaArguments -> ".."? Identifier | Arguments +LambdaArrow -> "=>" | "->" +LambdaBody -> Block | ReturnExpression +``` + diff --git a/doc/language-reference/index.md b/doc/language-reference/index.md new file mode 100644 index 000000000..0c589b439 --- /dev/null +++ b/doc/language-reference/index.md @@ -0,0 +1,27 @@ +# The Ceramic Programming Language Reference + +**Version 0.1** + +--- + +## Conventions + +This reference uses two kinds of code blocks: + +- **Grammar blocks**: formal BNF syntax. Regular expressions use `/slashes/` with Perl `/x` syntax (whitespace inside is insignificant). Literal strings use `"quotation marks"`. +- **Code blocks**: examples of Ceramic source code. + +--- + +## Sections + +| Section | Contents | +|---------|----------| +| [Tokenization](tokenization.md) | Source encoding, whitespace, comments, literals | +| [Compilation Strategy](compilation.md) | Whole-program compilation, compile-time evaluation, pattern matching | +| [Modules & Source Layout](modules.md) | Modules, imports, symbols, static strings | +| [Type Definitions](types.md) | Records, variants, enumerations, lambda types | +| [Function Definitions](functions.md) | Simple and overloaded functions, arguments, external functions, global values | +| [Statements](statements.md) | Blocks, assignment, control flow, loops, exceptions | +| [Expressions](expressions.md) | Operators, precedence, lambdas, multiple values | +| [Grammar Reference](grammar.md) | Full BNF grammar, organized by chapter | diff --git a/doc/language-reference/modules.md b/doc/language-reference/modules.md new file mode 100644 index 000000000..bc8efa093 --- /dev/null +++ b/doc/language-reference/modules.md @@ -0,0 +1,284 @@ +# Modules + +A **module** is a single Ceramic source file. Module names are hierarchical dotted identifiers that map to filesystem paths: + +- `foo.bar` resolves to `foo/bar.crm` or `foo/bar/bar.crm` under a compiler search path. + +Modules are the basis of Ceramic's namespacing and encapsulation. Each module has its own namespace and can mark symbols `public` or `private`. + +### Special Modules + +| Module | Description | +|--------|-------------| +| `__primitives__` | Synthesized by the compiler. Contains fundamental types (`Int`, `Pointer[T]`, `Bool`), basic operations, and compile-time introspection. See the *Primitives Reference*. | +| `prelude` | Loaded automatically and implicitly imported by every module. The location searched for [operator functions](#operator-functions). | +| `__main__` | Default name of the entry-point module if it declares no name of its own. | + +### Operator Functions + +Operator functions are symbols in library code that the language uses internally to implement syntactic forms. They must be publicly reachable through the `prelude` module. + +**Overloadable operators:** +`add` `call` `dereference` `divide` `equals?` `fieldRef` `greater?` `greaterEquals?` `index` `lesser?` `lesserEquals?` `minus` `multiply` `notEquals?` `plus` `remainder` `staticIndex` `subtract` `tupleLiteral` + +**Literals:** +`Char` `StringConstant` + +**Value lifecycle:** +`copy` `destroy` `move` + +**Switch:** `case?` + +**Assignment:** +`assign` `fieldRefAssign` `fieldRefUpdateAssign` `indexAssign` `indexUpdateAssign` `staticIndexAssign` `staticIndexUpdateAssign` `updateAssign` + +**For loops:** `hasNext?` `iterator` `next` + +**Entry point:** `callMain` `setArgcArgv` + +**Exceptions:** +`continueException` `exceptionIs?` `exceptionAs` `exceptionAsAny` `throwValue` + +**Finalizer/external handlers:** +`exceptionInFinalizer` `exceptionInInitializer` `unhandledExceptionInExternal` + +--- + +## Source File Layout + +A Ceramic source file must be laid out in this order: + +1. Zero or more [import declarations](#import-declarations) +2. An optional [module declaration](#module-declaration) +3. An optional [top-level LLVM block](#top-level-llvm) +4. Zero or more [top-level definitions](#top-level-definitions) + +### List Syntactic Forms + +Comma-delimited lists appear throughout Ceramic's grammar and may always end with an optional trailing comma. + +```ceramic +record US_Address ( + name:String, + street:String, + city:String, + state:String, + zip:String, // trailing comma ok +); +``` +In pattern-matching contexts, a variadic tail item may also appear at the end of the list. + +### Import Declarations + +Import declarations bring other modules' definitions into the current namespace. There are four forms: + +```ceramic +import foo.bar; // import as foo.bar; access via foo.bar.thing() +import foo.bar as bar; // alias; access via bar.thing() +import foo.bar.(apple, mandarin as tangerine); // import specific members +import foo.bar.*; // import all public members +``` +Imports are `private` by default. Use `public import` to re-export through the current module: + +```ceramic +public import foo.bar; +``` +Private members of another module can be force-imported explicitly: + +```ceramic +import foo.bar.(private banana); // use sparingly +``` + +#### Conflict Resolution + +Importing two things under the same name is an error: + +```ceramic +import malkevich; +import bar as malkevich; // ERROR +``` +`.*` imports from multiple modules may overlap without error, as long as ambiguous names are never actually used: + +```ceramic +import foo.*; // exports: a, b +import bar.*; // exports: b, c + +main() { + a(); // ok: only in foo + c(); // ok: only in bar + b(); // ERROR; ambiguous +} +``` +Resolve ambiguities by explicitly importing the desired name, or by defining a local override (which shadows wildcard imports): + +```ceramic +import foo.*; +import bar.*; +import bar.(b); // use bar.b specifically +``` + +### Module Declaration + +A module may optionally declare its own name using the `in` keyword. This must appear after imports and before any top-level definitions. + +```ceramic +in foo.bas; +``` +The declaration may include **module attributes** in parentheses. Currently supported: + +- A primitive floating-point type. Sets the default type of untyped float literals in this module. +- A primitive integer type. Sets the default type of untyped integer literals. + +```ceramic +in mymodule (Float32, Int64); + +main() { + println(Type(1.0)); // Float32 + println(Type(3)); // Int64 +} +``` +The attribute list may reference any imported symbols: + +```ceramic +// foo.crm +GraphicsModuleAttributes() = Float32, Int32; + +// bar.crm +import foo; +in bar (..foo.GraphicsModuleAttributes()); +``` + +### Top-Level LLVM + +A module may include a block of raw LLVM assembly emitted directly into the generated LLVM module. This is used to declare intrinsics or global symbols needed by [`__llvm__` function bodies](functions.md#inline-llvm-functions). It must appear after the module declaration and before any top-level definitions. + +```ceramic +in traps; + +__llvm__ { +declare void @llvm.trap() +} + +trap() __llvm__ { + call void @llvm.trap() + ret i8* null +} +``` +Ceramic static values can be [interpolated](functions.md#inline-llvm-functions) into LLVM blocks. + +### Top-Level Definitions + +Top-level definitions fall into three categories: + +- **[Type definitions](types.md#type-definitions)**: `record`, `variant`, `instance`, `enum` +- **[Function definitions](functions.md#function-definitions)**: `define`, `overload`, function bodies, `external` +- **[Global value definitions](functions.md#global-value-definitions)**: `var`, `alias`, external variables + +Ceramic uses two-pass loading: all module namespaces are fully populated before any definition is evaluated. **Forward and circular references are freely allowed**: no forward declarations needed. + +```ceramic +// Mutually recursive functions: no forward declarations needed +hurd() { hird(); } +hird() { hurd(); } + +// Mutually recursive record types +record Ping (pong:Pointer[Pong]); +record Pong (ping:Pointer[Ping]); +``` + +#### Pattern Guards + +Most definitions can be made **generic** using a pattern guard: a bracketed list of pattern variables before the definition: + +```ceramic +[T] +printTwice(file:File, x:T) { + printTo(file, x); + printTo(file, x); +} + +[T] +record Point[T] (x:T, y:T); + +[Stream, T] +printPoint(s:Stream, p:Point[T]) { + printTo(s, "(", p.x, ", ", p.y, ")"); +} +``` +Variadic pattern variables are prefixed with `..`: + +```ceramic +[..TT] +printlnTwice(file:File, ..x:TT) { + printlnTo(file, ..x); + printlnTo(file, ..x); +} +``` +A `|` after the variables adds a **predicate**, constraining which values are valid: + +```ceramic +[T | Numeric?(T)] +record Point[T] (x:T, y:T); + +// No variables: just a platform condition +[| TypeSize(Pointer[Int]) < 4] +overload platformCheck() { error("Time for a new computer"); } +``` + +#### Visibility Modifiers + +Every definition that creates a new symbol may be marked `public` or `private`. The default is `public`. + +- `public`: available to importing modules. +- `private`: not importable by default (but can be force-imported). + +Visibility modifiers are not valid on `overload` or `instance` forms, which modify existing symbols rather than creating new ones. +--- + +## Symbols + +Symbols are module-level global names representing types or functions. A symbol is the only value of the stateless primitive type `Static[symbol]`. It exists entirely at compile time. + +```ceramic +define foo; + +main() { + println(Type(foo)); // Static[foo] +} +``` +Record and variant type symbols may be **parameterized**. Applying the index operator to the base symbol instantiates a parameterized version: + +```ceramic +record Foo[T] (); + +main() { + println(Foo); // the base symbol + println(Foo[Int32]); // a parameterized instance + println(Foo[Float64]); +} +``` + +### Static Strings + +Static strings are compile-time identifiers with no module affiliation. The same static string is identical everywhere it appears. They are written as a string literal (or a valid identifier) prefixed with `#`. + +```ceramic +// a.crm +foo() = #"foo"; + +// b.crm +foo() = #foo; + +// main.crm +import a; +import b; + +main() { + println(Type(#"foo")); // Static[#foo] + println(a.foo() == b.foo()); // true; same static string +} +``` + +Static strings are the operands to `fieldRef`, which implements the `.` field access operator. The `__primitives__` module provides operations for indexing, composing, and slicing them. + +--- diff --git a/doc/language-reference/statements.md b/doc/language-reference/statements.md new file mode 100644 index 000000000..e547e8233 --- /dev/null +++ b/doc/language-reference/statements.md @@ -0,0 +1,332 @@ +# Statements + +Statements form the basic unit of control flow within function bodies. + +### Blocks + +A **block** groups statements and introduces a new scope for local variables. Statements execute sequentially unless modified by control flow. + +```ceramic +main() { + println("VENI"); + println("VIDI"); + println("VICI"); +} +``` +Blocks may also contain labels for [`goto`](#goto) targets: a label is an identifier followed by `:`. + +### Expression Statements + +An expression followed by `;`. Return values are discarded via the `destroy` operator. Reference returns are simply dropped: the referenced value is not destroyed. + +```ceramic +main() { + 1 + 2; // computed, then discarded + println("Hi"); +} +``` +If a call's final argument is a block lambda, the trailing `;` may be omitted: + +```ceramic +maybe(maybeMode): mode -> { + println(mode.name, " mode selected"); +} :: () -> { + println("Please select a mode"); +} +``` + +### Return Statements + +Ends the current function and provides its return values. + +```ceramic +foo(x, y) { + return x + y; +} + +// Shorthand: a single return as the whole body: +foo(x, y) = x + y; +``` +Multiple `return` statements are allowed. All must return the same types. Code after all return paths are covered is a compile-time error. + +#### Return by Reference + +`return ref` returns lvalue references. All returned values must be lvalues, and all `return` statements in the function must agree on `ref`-ness. + +```ceramic +[T] +overload index(pv:PitchedVector[T], n) { + return ref pv.vec[n*pv.pitch]; +} +``` +`return forward` generalizes this: each value is returned by reference if it is an lvalue, by value otherwise. + +### Local Variable Bindings + +Local variables are introduced with a binding statement. There are four kinds: + +#### `var`: new independent value + +```ceramic +var x = 1; +var y = 2; +``` +`var`s are destroyed at the end of their enclosing block on any exit path: normal flow, `return`, `break`, `continue`, `goto`, or exception. + +#### `ref`: reference to an existing lvalue + +```ceramic +var x = 1; +ref y = x; +y = 3; +println(x); // 3; same underlying value +``` +A `ref` does not affect the lifetime of the bound value. Accessing a `ref` after the underlying value is freed is undefined behavior. + +#### `forward`: generic over lvalue/rvalue + +Behaves like `ref` if the bound value is an lvalue, and like `var` if it is an rvalue. + +```ceramic +forward x2, y2 = xs[2], ys[2]; +// xs[2] is an rvalue → x2 is a var +// ys[2] is an lvalue → y2 is a ref +``` + +#### `alias`: call-by-name binding + +Like alias functions: the name expands to the bound expression, re-evaluated in the original lexical context each time it is referenced. + +--- + +Multiple variables can be bound from a multiple-value expression: + +```ceramic +var x, y = 1, 2; +var a, b, c, d = 1, ..twoAndThree(), 4; + +// A lone multi-value expression on the right is automatically unpacked: +var x, y = oneAndTwo(); +``` +Variable names come into scope after the right-hand side is evaluated, so a name may be shadowed by its own value: + +```ceramic +var x = 1; +var x = x + 1; // right side sees the outer x; result: x = 2 +``` +Binding statements must appear inside a block. They cannot be the single-statement body of `if`, `while`, or other compound statements. + +### Assignment Statements + +Assignment is a **statement** in Ceramic, not an expression. Using `=` in an expression context is a syntax error. + +```ceramic +var x = 1; +x = 2; // desugars to: assign(x, 2) +``` +Multiple-value assignment evaluates the entire right-hand side into temporaries first, then assigns each. This makes shuffles safe: + +```ceramic +x, y = y, x; // safe swap: no aliasing issues +``` +Special property assignment desugars differently when the left-hand side is an index, static-index, or field reference: + +```ceramic +a[..b] = c; // → indexAssign(a, ..b, c) +a.0 = c; // → staticIndexAssign(a, static 0, c) +a.field = c; // → fieldRefAssign(a, #"field", c) +``` + +#### Update Assignment + +`+=`, `-=`, `*=`, `/=`, `%=` desugar to calls to `updateAssign`: + +```ceramic +x += 1; // updateAssign(add, x, 1) +x -= 2; // updateAssign(subtract, x, 2) +x *= 4; // updateAssign(multiply, x, 4) +``` +Property update forms also exist: + +```ceramic +a[..b] += c; // → indexUpdateAssign(add, a, ..b, c) +a.field += c; // → fieldRefUpdateAssign(add, a, #"field", c) +``` + +#### Initialization Statements + +Use `<--` to initialize **uninitialized** storage (from a raw allocator, a named return value, etc.). Unlike `=`, it assumes the destination has no prior state. + +```ceramic +var p = allocateRawMemory(Foo); +finally freeRawMemory(p); + +p^ <-- Foo(); +``` +If the right side is a function call that returns by value, the return value is written directly into the destination. If the right side is an lvalue, `copy` is called. If it is a `forward`-bound rvalue, `move` is called. + +Initializing an already-initialized value with `<--`, or assigning an uninitialized value with `=`, is undefined behavior. + +### Conditional Statements + +#### `if` + +Executes a branch based on a `Bool` expression. + +```ceramic +if (asFoghornLeghorn?) + print(", I say, that"); + +if (condition) + thenBranch(); +else + elseBranch(); +``` + +#### `switch` + +Dispatches to the first matching `case` clause using the `case?` operator. If no case matches, the `else` clause runs if present. Unlike C, there is no fall-through between cases. + +```ceramic +switch (card.rank) +case (1) printTo(stream, "Ace"); +case (2) printTo(stream, "Deuce"); +case (3, 4, 5, 6, 7, 8, 9, 10) printTo(stream, card.rank); +case (11) printTo(stream, "Jack"); +case (12) printTo(stream, "Queen"); +case (13) printTo(stream, "King"); +else assert(false); +``` + +### Loop Statements + +#### `while` + +Loops while a `Bool` expression is true. + +```ceramic +var x = 0; +while (x < 10) { + println(x); + x += 1; +} +``` + +#### `for` + +Iterates over a sequence using the `iterator`, `hasNext?`, and `next` operator functions. + +```ceramic +for (x in range(10)) + println(x); + +// desugars to: +{ + forward _iter = iterator(range(10)); + while (hasNext?(_iter)) { + forward x = next(_iter); + println(x); + } +} +``` + +#### `..for`: Multiple-Value For + +Unrolls over each value of a multiple-value expression at **compile time**. The loop variable's type may differ between iterations. + +```ceramic +[..TT | countValues(..TT) != 1] +overload printTo(stream, ..xs:TT) { + ..for (x in xs) + printTo(stream, x); +} +``` +`..for` is not a runtime loop. The body is instantiated once per value at compile time, like template unrolling. + +### Branch Statements + +#### `break` and `continue` + +`break` exits the innermost loop. `continue` skips to the next iteration. Both are invalid outside a loop. + +#### `goto` + +Jumps to a label within the current function. + +```ceramic +main() { +second_verse: + println("I'm Henry VIII I am"); + goto second_verse; +} +``` +There are two restrictions. A `goto` cannot jump into a `var` binding's scope from outside it, since that would skip initialization. Jumping from an outer block into a label inside an inner block is also unsupported. + +### Exception Handling Statements + +Exception handling is optional. The `ExceptionsEnabled?` alias in `__primitives__` reports whether it is active for the current compilation. The compile-time evaluator always behaves as if exceptions are disabled. + +#### `throw` + +Throws an exception by calling `throwValue` with the given value. Unwinds the call stack to the nearest matching `catch` clause. + +```ceramic +safeDivide(x:Int, y:Int) { + if (y == 0) + throw DivisionByZero(); + return x/y; +} +``` + +#### `try` / `catch` + +Executes the `try` block and, if an exception is thrown, tests it against each `catch` clause in order. A clause without a type is a catch-all. If no clause matches, the exception is rethrown to the next enclosing scope. A caught exception may be rethrown by re-throwing the bound exception object. + +```ceramic +try { + var file = File("hello.txt", CREATE); + printlnTo(file, "hello world"); +} +catch (ex:IOError) { + printlnTo(stderr, "Unable to open hello.txt: ", ex); +} +catch (ex) { + printlnTo(stderr, "Unexpected exception!"); + abort(); +} +``` +When exceptions are disabled, the `try` body runs as a plain block and all `catch` clauses are ignored. + +#### `finally` and `onerror` + +**`finally`**: runs when the enclosing block exits for *any* reason: normal flow, `return`, `break`, `continue`, `goto`, or exception. + +```ceramic +var p = malloc(SizeT(128)); +finally free(p); +``` +**`onerror`**: runs only when the enclosing block exits due to an exception. Normal exits do not trigger it. + +```ceramic +overload SomeType(size:SizeT) { + var p = malloc(size); + onerror free(p); // only if potentiallyFail() throws + + potentiallyFail(); + return SomeType(p); +} +``` +When exceptions are disabled, `onerror` guards are silently ignored. `finally` guards continue to work normally. + +### Eval Statements + +`eval` parses and expands a compile-time string as Ceramic source, then executes it in place of the `eval` statement. The string must be a complete, parsable statement or sequence of statements. Partial constructs are not allowed. + +```ceramic +main() { + eval #"""var x = "hello world";"""; + eval #"""println(x);"""; +} +``` +[Eval expressions](expressions.md#eval-expressions) work the same way in expression context. +--- diff --git a/doc/language-reference/tokenization.md b/doc/language-reference/tokenization.md new file mode 100644 index 000000000..ecc6eff3d --- /dev/null +++ b/doc/language-reference/tokenization.md @@ -0,0 +1,93 @@ +# Tokenization + +### Source Encoding + +Ceramic source files are ASCII text. Non-ASCII bytes in string and character literals are passed through as opaque bytes. + +### Whitespace + +ASCII space, tab, carriage return, newline, and form feed are whitespace. Whitespace separates tokens and is otherwise ignored. + +### Comments + +Ceramic has two comment styles: **block comments** (`/* … */`) and **line comments** (`// …`). Both are treated as a single whitespace character by the lexer. + +```ceramic +/* This is a block comment. + It can span multiple lines. */ + +// This is a line comment. +``` +Block comments are not nestable. + +### Identifiers + +Identifiers start with a letter, underscore (`_`), or question mark (`?`), followed by zero or more letters, digits, underscores, or question marks. + +```ceramic +a a1 a_1 abc123 a? ?a ? +``` +The following are **reserved keywords** and may not be used as identifiers: + +`__ARG__` `__COLUMN__` `__FILE__` `__LINE__` `__llvm__` `alias` `and` `as` `break` `case` `catch` `continue` `define` `else` `enum` `eval` `external` `false` `finally` `for` `forward` `goto` `if` `import` `in` `inline` `instance` `not` `onerror` `or` `overload` `private` `public` `record` `ref` `return` `rvalue` `static` `switch` `throw` `true` `try` `var` `variant` `while` + +### Integer Literals + +Integer literals can be decimal or hexadecimal (prefixed with `0x`). Underscores may appear after any digit for readability and have no effect on the value. + +```ceramic +0 1 23 0x45abc 1_000_000 0xFFFF_FFFF +``` + +### Floating-Point Literals + +A decimal float literal is distinguished from an integer by including a `.` or an exponent (`e`/`E`). Hexadecimal float literals require a binary exponent (`p`/`P`). Underscores are allowed after any digit. + +```ceramic +// Decimal +1. 1.0 1e0 1e-2 0.000_001 + +// Hexadecimal +0x1p0 0x1.0p0 0x1.0000_0000_0000_1p1_023 +``` + +### Character Literals + +A character literal represents a single ASCII character, written between single quotes. + +```ceramic +'x' ' ' '\n' '\'' '\x7F' +``` +Supported escape codes: + +| Escape | Meaning | +|--------|---------| +| `\0` | Null | +| `\t` | Tab | +| `\n` | Newline | +| `\f` | Form feed | +| `\r` | Carriage return | +| `\"` | Double quote | +| `\'` | Single quote | +| `\\` | Backslash | +| `\xNN` | Arbitrary byte (two hex digits) | + +### String Literals + +String literals hold a sequence of ASCII text. Ceramic has two forms: + +- **`"`-delimited**: `"` and `\` must be escaped. +- **`"""`-delimited**: only `\` must be escaped. Useful for strings that contain quotes. + +```ceramic +"hello world" +"\"hello world\"" +"""the string "hello world"""" + +""" +"But not with you, Derek, this star nonsense." +"Yes, yes." +""" +``` + +--- diff --git a/doc/language-reference/types.md b/doc/language-reference/types.md new file mode 100644 index 000000000..3368f1b0d --- /dev/null +++ b/doc/language-reference/types.md @@ -0,0 +1,122 @@ +# Type Definitions + +Ceramic has four kinds of user-defined types: + +- [Records](#records): general-purpose aggregates +- [Variants](#variants): discriminated unions +- [Enumerations](#enumerations): named symbolic constants +- [Lambda types](#lambda-types): implicit capture types, created by lambda expressions + +### Records + +A **record** is a general-purpose aggregate laid out in memory like a C `struct`. + +```ceramic +record Point (x:Int, y:Int); +``` +Records may be parameterized. When no predicate is needed, the pattern guard is optional: unrecognized names in brackets are taken as unbounded pattern variables: + +```ceramic +record Point[T] (x:T, y:T); // [T] guard is implied + +[T | Float?(T)] +record FloatPoint[T] (x:T, y:T); // explicit predicate +``` + +#### Computed Layouts + +A record's layout can be computed from an expression that evaluates to a list of `[fieldName, fieldType]` pairs: + +```ceramic +// Equivalent to: record Point[T] (x:T, y:T) +record Point[T] = [[#"x", T], [#"y", T]]; + +// Custom coordinate names +record PointWithCoordinates[T, xy] = [[xy.0, T], [xy.1, T]]; +``` +This pattern also enables template specialization via an overloaded helper function: + +```ceramic +record Vec3D[T] = Vec3DBody(T); + +private define Vec3DBody; +[T | T != Double] +overload Vec3DBody(static T) = [[#"coords", Array[T, 3]]]; +overload Vec3DBody(static Float) = [[#"coords", Vec[Float, 4]]]; // SIMD path +``` + +### Variants + +A **variant** is a discriminated union. A variant value holds exactly one of its instance types and knows which one at runtime, enabling type-safe dynamic dispatch. + +```ceramic +variant Fruit (Apple, Orange, Banana); +``` +Variants may be parameterized (pattern guard optional when no predicate is needed): + +```ceramic +variant Maybe[T] (Nothing, T); // [T] implied +variant Either[T, U] (T, U); // [T, U] implied + +[C | Color?(C)] +variant Fruit[C] (Apple[C], Orange[C], Banana[C]); +``` +The instance list may be any expression evaluated at compile time: + +```ceramic +private RainbowTypes(Base) = + Base[Red], Base[Orange], Base[Yellow], Base[Green], + Base[Blue], Base[Indigo], Base[Violet]; + +variant Fruit (..RainbowTypes(Apple), ..RainbowTypes(Banana)); +``` + +#### Extending Variants + +Variants are **open**. New instance types are added with `instance`: + +```ceramic +variant Exception (); + +record RangeError (lowerBound:Int, upperBound:Int, attemptedValue:Int); +record TypeError (expectedTypeName:String, attemptedTypeName:String); +instance Exception (RangeError, TypeError); +``` +`instance` binds to variants by pattern matching, so parameterized variants can be extended selectively: + +```ceramic +[C | Color?(C)] +variant Fruit[C] (); + +instance Fruit[Yellow] (Banana); // only Yellow + +[C | C == Red or C == Green] +instance Fruit[C] (Apple); // Red and Green only + +[C] +instance Fruit[C] (Berry[C]); // all Fruit[C] +``` +The pattern guard on `instance` is **not optional** for generic extension. Without it, `instance Variant[T]` attempts to match only the concrete type `Variant[T]` (where `T` must already be defined), not all parameterized instances. + +### Enumerations + +An **enumeration** defines a type whose values are one of a fixed set of named constants. The constant names are defined in the current module with the same visibility as the type. + +```ceramic +enum ThreatLevel (Green, Blue, Yellow, Orange, Red, Midnight); + +private enum SecurityLevel ( + Angel_0A, Archangel_1B, + Principal_2C, Power_3D, + Virtue_4E, Domination_5F, + Throne_6G, Cherubic_7H, + Seraphic_8X, +); +``` +Enumerations cannot currently be parameterized and do not allow pattern guards. + +### Lambda Types + +Lambda types are record-like types that capture values from their enclosing scope. They are created implicitly by the compiler when [lambda expressions](expressions.md#lambda-expressions) capture variables: there is no explicit syntax for defining them. + +--- diff --git a/doc/primitives-reference/atomic.md b/doc/primitives-reference/atomic.md new file mode 100644 index 000000000..966c323b7 --- /dev/null +++ b/doc/primitives-reference/atomic.md @@ -0,0 +1,90 @@ +# Atomic Memory Operations + +Uninterruptible, lock-free memory access and synchronization. None may be overloaded. + +The compile-time evaluator does not support atomic operations. Evaluating one raises an error. + +## Memory Order Symbols + +```ceramic +define OrderUnordered; +define OrderMonotonic; +define OrderAcquire; +define OrderRelease; +define OrderAcqRel; +define OrderSeqCst; +``` + +Every atomic operation takes one of these as a `static` parameter. They correspond to LLVM orderings, which are a superset of the C11/C++11 memory model. See the [LLVM Atomic Instructions and Concurrency Guide](http://llvm.org/docs/Atomics.html). + +| Ceramic | LLVM | C++11 | +|---------|------|-------| +| `OrderUnordered` | `unordered` | (none) | +| `OrderMonotonic` | `monotonic` | `memory_order_relaxed` | +| `OrderAcquire` | `acquire` | `memory_order_acquire` | +| `OrderRelease` | `release` | `memory_order_release` | +| `OrderAcqRel` | `acq_rel` | `memory_order_acq_rel` | +| `OrderSeqCst` | `seq_cst` | `memory_order_seq_cst` | + +## `atomicLoad` + +```ceramic +[Order, T | Order?(Order)] +atomicLoad(static Order, p:Pointer[T]) : T; +``` + +Atomically loads the value at `p`. Bitwise-copied. Errors if the target does not support atomic loads of `T`. Lowers to LLVM `load atomic`. + +## `atomicStore` + +```ceramic +[Order, T | Order?(Order)] +atomicStore(static Order, p:Pointer[T], value:T) :; +``` + +Atomically stores `value` at `p`. Bitwise-copied. Errors if the target does not support atomic stores of `T`. Lowers to LLVM `store atomic`. + +## `atomicRMW` + +```ceramic +define RMWXchg; +define RMWAdd; define RMWSubtract; +define RMWAnd; define RMWNAnd; +define RMWOr; define RMWXor; +define RMWMin; define RMWMax; +define RMWUMin; define RMWUMax; + +[Order, Op, T | Order?(Order) and RMWOp?(Op)] +atomicRMW(static Order, static Op, p:Pointer[T], operand:T) : T; +``` + +Atomic read-modify-write. Returns the value at `p` **before** the update. Errors if the target does not atomically support `Op` for `T`. Lowers to LLVM `atomicrmw`. + +The update semantics for each `Op`: + +| `Op` | Effect on `p^` | Constraints | +|------|----------------|-------------| +| `RMWXchg` | written to `operand` (bitwise copy) | any `T` | +| `RMWAdd` / `RMWSubtract` | arithmetic add/subtract | integer `T` | +| `RMWMin` / `RMWMax` | signed min/max | integer `T` | +| `RMWUMin` / `RMWUMax` | unsigned min/max | integer `T` | +| `RMWAnd` / `RMWOr` / `RMWXor` | bitwise and/or/xor | any `T` | +| `RMWNAnd` | bitwise NAND (`~(p^ & operand)`) | any `T` | + +## `atomicCompareExchange` + +```ceramic +[Order, T | Order?(Order)] +atomicCompareExchange(static Order, p:Pointer[T], old:T, new:T) : T; +``` + +Atomic compare-and-swap. If `p^` is bitwise equal to `old`, `new` is written to `p` and `old` is returned. Otherwise, `p^` is unchanged and its current value is returned. Errors if the target does not support CAS for `T`. Lowers to LLVM `cmpxchg`. + +## `atomicFence` + +```ceramic +[Order | Order?(Order)] +atomicFence(static Order); +``` + +Introduces a happens-before edge without an associated memory operation. `Order` must be `OrderAcquire`, `OrderRelease`, `OrderAcqRel`, or `OrderSeqCst`. Lowers to LLVM `fence`. diff --git a/doc/primitives-reference/compiler.md b/doc/primitives-reference/compiler.md new file mode 100644 index 000000000..4dad45ede --- /dev/null +++ b/doc/primitives-reference/compiler.md @@ -0,0 +1,66 @@ +# Compiler Interface + +Compilation-unit settings, external-function attributes, and assorted utilities. None of these may be overloaded. + +## Compiler Flags + +### `ExceptionsEnabled?` + +```ceramic +// If exceptions are enabled in this compilation unit: +alias ExceptionsEnabled? = true; +// Otherwise: +alias ExceptionsEnabled? = false; +``` + +A global alias set to `true` when exceptions are enabled for the current compilation, `false` otherwise. + +### `Flag?` + +```ceramic +[name | Identifier?(name)] +Flag?(static name) : Bool; +``` + +`true` if the compiler was invoked with a `-D` or `-D=value` matching `name`. + +### `Flag` + +```ceramic +[name | Identifier?(name)] +Flag(static name); +``` + +Returns the value of the compiler flag `-D=value` as a static string. If no such flag was given, or the flag was given without a value, returns the empty static string `#""`. + +## External Function Attributes + +These symbols may be used as attributes on external function declarations. + +### Calling Convention + +| Attribute | Effect | +|-----------|--------| +| `AttributeCCall` | C calling convention | +| `AttributeStdCall` | `__stdcall` (Windows x86) | +| `AttributeFastCall` | `__fastcall` (Windows x86) | +| `AttributeThisCall` | `__thiscall` (Windows x86) | +| `AttributeLLVMCall` | LLVM `ccc` calling convention | + +### Linkage + +| Attribute | Effect | +|-----------|--------| +| `AttributeDLLImport` | `__dllimport` linkage on Windows targets | +| `AttributeDLLExport` | `__dllexport` linkage on Windows targets | + +## Miscellaneous + +### `staticIntegers` + +```ceramic +[n | n >= 0] +staticIntegers(static n); +``` + +Returns a multiple-value list of `static` integers from `static 0` up to `static n - 1`. `staticIntegers(static 0)` returns no values. diff --git a/doc/primitives-reference/data-access.md b/doc/primitives-reference/data-access.md new file mode 100644 index 000000000..266ae82f6 --- /dev/null +++ b/doc/primitives-reference/data-access.md @@ -0,0 +1,102 @@ +# Data Access + +Fundamental operations on aggregates and enums. None may be overloaded. + +## `primitiveCopy` + +```ceramic +[T] +primitiveCopy(dest:T, src:T) :; +``` + +Bitwise copies `TypeSize(T)` bytes from `src` into `dest`. Lowers to an LLVM `load` followed by `store`. + +## `arrayRef` + +```ceramic +[T, n, I | Integer?(I)] +arrayRef(array:Array[T, n], i:I) : ref T; +``` + +Returns a reference to element `i` of `array`. + +- Zero-based. Not bounds-checked. +- Lowers to LLVM `getelementptr`. + +## `arrayElements` + +```ceramic +[T, n] +arrayElements(array:Array[T, n]) : ref ..repeatValue(static n, T); +``` + +Returns a multiple-value list of references to every element of `array` in order. + +## `tupleRef` + +```ceramic +[..T, n | n >= 0 and n < countValues(..T)] +tupleRef(tuple:Tuple[..T], static n) : ref nthValue(static n, ..T); +``` + +Returns a reference to the `n`th element of `tuple`. + +- Zero-based. `n` is checked at compile time. +- Lowers to LLVM `getelementptr`. + +## `tupleElements` + +```ceramic +[..T] +tupleElements(tuple:Tuple[..T]) : ref ..T; +``` + +Returns a multiple-value list of references to every tuple element in order. + +## `recordFieldRef` + +```ceramic +[R, n | Record?(R) and n >= 0 and n < RecordFieldCount(R)] +recordFieldRef(rec:R, static n) : ref RecordFieldType(R, static n); +``` + +Returns a reference to the `n`th field of a record value. + +- Zero-based. `n` is checked at compile time. +- Lowers to LLVM `getelementptr`. + +## `recordFieldRefByName` + +```ceramic +[R, name | Record?(R) and Identifier?(name) and RecordWithField?(R, name)] +recordFieldRefByName(rec:R, static name) : ref RecordFieldTypeByName(R, name); +``` + +Returns a reference to the field named `name` (a static string) in `rec`. + +## `recordFields` + +```ceramic +[R | Record?(R)] +recordFields(rec:R) : ref ..RecordFieldTypes(R); +``` + +Returns a multiple-value list of references to all fields of `rec` in declaration order. + +## `enumToInt` + +```ceramic +[E | Enum?(E)] +enumToInt(en:E) : Int32; +``` + +Returns the ordinal of `en` as an `Int32`. + +## `intToEnum` + +```ceramic +[E | Enum?(E)] +intToEnum(static E, n:Int32) : E; +``` + +Returns the value of enum type `E` with ordinal `n`. Not bounds-checked against the values defined for `E`. diff --git a/doc/primitives-reference/exceptions.md b/doc/primitives-reference/exceptions.md new file mode 100644 index 000000000..e12eca9d7 --- /dev/null +++ b/doc/primitives-reference/exceptions.md @@ -0,0 +1,11 @@ +# Exception Handling + +Implementation hooks for the exception-handling runtime. Not overloadable. + +## `activeException` + +```ceramic +activeException() : Pointer[Int8]; +``` + +Returns a pointer to the exception currently driving unwinding. Valid only during unwinding itself, not inside `catch` clauses. Implementation detail, not for user code. diff --git a/doc/primitives-reference/index.md b/doc/primitives-reference/index.md new file mode 100644 index 000000000..dc4462fce --- /dev/null +++ b/doc/primitives-reference/index.md @@ -0,0 +1,27 @@ +# Primitives Reference + +**Version 0.1** + +The `__primitives__` module is synthesized by the compiler and implicitly available to every Ceramic program. It provides primitive types, fundamental operations, and compile-time introspection. + +For the language itself, see the [Language Reference](../language-reference/index.md). + +## Conventions + +- Each entry is introduced by its signature in a fenced `ceramic` block. +- Pattern guards (`[T | ...]`) and the trailing semicolon mark forward declarations. +- Functions documented here may not be overloaded unless otherwise noted. +- `SizeT` refers to the compiler-internal unsigned integer whose size matches a pointer. It is not actually exported by `__primitives__`. + +## Sections + +| Section | Contents | +|---------|----------| +| [Primitive Types](types.md) | `Bool`, integers, floats, pointers, `Array`, `Vec`, `Tuple`, `Union`, `Static`, … | +| [Data Access](data-access.md) | `primitiveCopy`, `arrayRef`, `tupleRef`, `recordFieldRef`, enum conversions | +| [Numeric Operations](numeric.md) | Arithmetic, comparison, bitwise, conversion, checked integer ops | +| [Pointer Operations](pointers.md) | Pointer arithmetic, casts, function pointers | +| [Atomic Operations](atomic.md) | Memory orders, loads/stores, RMW, compare-exchange, fences | +| [Exceptions](exceptions.md) | `activeException` | +| [Introspection](introspection.md) | Symbols, types, records, variants, enums, static strings | +| [Compiler Interface](compiler.md) | Flags, external function attributes, miscellaneous | diff --git a/doc/primitives-reference/introspection.md b/doc/primitives-reference/introspection.md new file mode 100644 index 000000000..39c6f5afc --- /dev/null +++ b/doc/primitives-reference/introspection.md @@ -0,0 +1,248 @@ +# Introspection + +Compile-time queries over symbols, types, records, variants, enums, and static strings. None of these may be overloaded. + +## Symbol and Function Introspection + +### `Type?` + +```ceramic +[T] +Type?(static T) : Bool; +``` + +`true` if `T` is a symbol that names a type. + +```ceramic +define foo; +record bar (); + +main() { + println(Type?(Type?)); // false + println(Type?(Int32)); // true + println(Type?(foo)); // false + println(Type?(bar)); // true + println(Type?(static 3)); // false +} +``` + +### `CallDefined?` + +```ceramic +[F, ..T] +CallDefined?(static F, static ..T) : Bool; +``` + +`true` if `F` has an overload matching input types `..T`. + +To probe a non-symbol callable type, use `CallDefined?(call, FunctionType, ..T)`. + +### `ModuleName` + +```ceramic +[S] +ModuleName(static S) : StringConstant; +``` + +Generates a string literal containing the fully-qualified module name containing the symbol `S`. Evaluated via the `StringConstant` operator function. If `S` is itself a module, returns the module's own name. Errors if `S` is not a symbol. + +```ceramic +import foo; +import foo.bar as bar; + +in baz; + +main() { + println(ModuleName(main)); // "baz" + println(ModuleName(foo.a)); // "foo" + println(ModuleName(bar.a)); // "foo.bar" + println(ModuleName(bar)); // "foo.bar" +} +``` + +### `IdentifierModuleName` + +```ceramic +[S] +IdentifierModuleName(static S); +``` + +Like `ModuleName`, but returns a static string instead of a string literal. + +### `StaticName` + +```ceramic +[x] +StaticName(static x) : StringConstant; +``` + +Generates a string literal naming the static value `x`: + +- Symbol: its name (without module, with parameters). +- Static string: its string value. +- Numeric value: its decimal representation. +- Tuple: comma-delimited inside square brackets (`[a, b, c]`). + +Evaluated via `StringConstant`. + +### `IdentifierStaticName` + +```ceramic +[x] +IdentifierStaticName(static x); +``` + +Like `StaticName`, but returns a static string. + +### `staticFieldRef` + +```ceramic +[M, name | Identifier?(name)] +staticFieldRef(static M, static name); +``` + +Looks up a public global value named `name` in module `M` and evaluates as if it were referenced by name directly. Errors if `name` is not a public member of `M`. + +## Static String Manipulation + +### `Identifier?` + +```ceramic +[S] +Identifier?(static S) : Bool; +``` + +`true` if `S` is a static string. + +### `IdentifierSize` + +```ceramic +[S | Identifier?(S)] +IdentifierSize(static S) : SizeT; +``` + +Number of characters in static string `S`. + +### `IdentifierConcat` + +```ceramic +[..SS | allValues?(Identifier?, ..SS)] +IdentifierConcat(static ..SS); +``` + +Concatenation of all argument static strings. + +### `IdentifierSlice` + +```ceramic +[S, n, m | + Identifier?(S) + and n >= 0 and n < IdentifierSize(S) + and m >= 0 and m < IdentifierSize(S) +] +IdentifierSlice(static S, static n, static m); +``` + +Substring of `S` from index `n` up to (but not including) `m`. + +## Type Introspection + +### `TypeSize` + +```ceramic +[T | Type?(T)] +TypeSize(static T) : SizeT; +``` + +Size in bytes of a value of type `T`. + +### `TypeAlignment` + +```ceramic +[T | Type?(T)] +TypeAlignment(static T) : SizeT; +``` + +Natural alignment in bytes of a value of type `T`. + +### `CCodePointer?` + +```ceramic +[T] +CCodePointer?(static T) : Bool; +``` + +`true` if `T` is a symbol and an instance of one of the [external code pointer types](types.md#external-code-pointer-types) (`CCodePointer`, `LLVMCodePointer`, …). + +### `TupleElementCount` + +```ceramic +[..T] +TupleElementCount(static Tuple[..T]) : SizeT; +``` + +Number of elements in the tuple type. + +### `UnionMemberCount` + +```ceramic +[..T] +UnionMemberCount(static Union[..T]) : SizeT; +``` + +Number of member types in the union type. + +### Record Introspection + +```ceramic +[R] +Record?(static R) : Bool; + +[R | Record?(R)] +RecordFieldCount(static R) : SizeT; + +[R, n | Record?(R) and n >= 0 and n < RecordFieldCount(R)] +RecordFieldName(static R, static n); // static string + +[R, name | Record?(R) and Identifier?(name)] +RecordWithField?(static R, static name) : Bool; +``` + +- `Record?`: `true` if `R` names a record type. +- `RecordFieldCount`: field count for record type `R`. +- `RecordFieldName`: name of the `n`th field as a static string. +- `RecordWithField?`: `true` if `R` has a field named `name`. + +### Variant Introspection + +```ceramic +[V] +Variant?(static V) : Bool; + +[V | Variant?(V)] +VariantMemberCount(static V) : SizeT; + +[V, n | Variant?(V) and n >= 0 and n < VariantMemberCount(V)] +VariantMemberIndex(static V, static n); +``` + +- `Variant?`: `true` if `V` names a variant type. +- `VariantMemberCount`: number of instance types. +- `VariantMemberIndex`: the `n`th instance type. The mapping from index to instance is unspecified, but iterating `0 .. VariantMemberCount(V)` visits each instance exactly once. + +### Enum Introspection + +```ceramic +[E] +Enum?(static E) : Bool; + +[E | Enum?(E)] +EnumMemberCount(static E) : SizeT; + +[E, n | Enum?(E) and n >= 0 and n < EnumMemberCount(E)] +EnumMemberName(static E, static n) : StringConstant; +``` + +- `Enum?`: `true` if `E` names an enum type. +- `EnumMemberCount`: number of values. +- `EnumMemberName`: string literal naming the `n`th value, evaluated via `StringConstant`. diff --git a/doc/primitives-reference/numeric.md b/doc/primitives-reference/numeric.md new file mode 100644 index 000000000..465fdd609 --- /dev/null +++ b/doc/primitives-reference/numeric.md @@ -0,0 +1,169 @@ +# Numeric Operations + +Arithmetic, comparison, bitwise, and conversion primitives for [`Bool`](types.md#bool), [integer](types.md#integer-types), [floating-point](types.md#floating-point-types), and [imaginary](types.md#imaginary-and-complex-types) types. + +Binary numeric primitives require operands of matching types. Heterogeneous-type conversion is left to the library. Complex math is also library-provided. None of these primitives may be overloaded. + +## `boolNot` + +```ceramic +boolNot(x:Bool) : Bool; +``` + +Returns the complement of `x`. Equivalent to the `not` operator. + +## `numericEquals?` + +```ceramic +[T | Numeric?(T)] +numericEquals?(a:T, b:T) : Bool; +``` + +Numeric equality. + +- Integer: LLVM `icmp eq`. +- Floating-point: LLVM `fcmp ueq` (IEEE 754 unordered). `+0.0 == -0.0`. Any comparison with NaN is false. + +## `numericLesser?` + +```ceramic +[T | Numeric?(T)] +numericLesser?(a:T, b:T) : Bool; +``` + +`true` if `a < b`. + +- Signed integer: LLVM `icmp slt`. +- Unsigned integer: LLVM `icmp ult`. +- Floating-point: LLVM `fcmp ult` (IEEE 754 unordered). `-0.0 < +0.0` is false. NaN comparisons are false. + +`__primitives__` does not expose the full set of FP comparisons. The library implements ordered and unordered FP comparison via inline LLVM. These primitives are only used during compile-time evaluation, which cannot run inline LLVM. + +## `numericAdd` / `numericSubtract` / `numericMultiply` + +```ceramic +[T | Numeric?(T)] +numericAdd(a:T, b:T) : T; +numericSubtract(a:T, b:T) : T; +numericMultiply(a:T, b:T) : T; +``` + +Standard arithmetic. Integer overflow wraps (two's-complement). Integer ops lower to `add`, `sub`, `mul`. Floating-point ops lower to `fadd`, `fsub`, `fmul`. + +## `numericDivide` + +```ceramic +[T | Numeric?(T)] +numericDivide(a:T, b:T) : T; +``` + +Integer division truncates toward zero. Integer division by zero is **undefined**, as is signed overflow (e.g. `-0x8000_0000 / -1`). Floating-point division follows IEEE 754. + +- Signed: `sdiv`. Unsigned: `udiv`. Floating-point: `fdiv`. + +## `numericNegate` + +```ceramic +[T | Numeric?(T)] +numericNegate(a:T) : T; +``` + +Negation. + +- Integer: behaves as two's-complement subtraction from zero (LLVM `sub 0, %a`). Unsigned negation gives the two's complement. Signed overflow (negating `-0x8000_0000`) gives the original value. +- Floating-point: LLVM `fsub -0.0, %a`. Negating a zero yields the other zero. Negating a NaN yields an unspecified other NaN. + +## `integerRemainder` + +```ceramic +[T | Integer?(T)] +integerRemainder(a:T, b:T) : T; +``` + +Remainder of `a / b`. For signed types, a nonzero remainder takes the sign of `a`. Division by zero and signed overflow are undefined (LLVM defines the remainder of overflowing division as undefined as well). + +- Signed: `srem`. Unsigned: `urem`. + +## `integerShiftLeft` / `integerShiftRight` + +```ceramic +[T | Integer?(T)] +integerShiftLeft(a:T, b:T) : T; +integerShiftRight(a:T, b:T) : T; +``` + +Shift `a` by `b` bits. Undefined if `b` is negative or `>= bitwidth(T)`. + +- `integerShiftLeft` → LLVM `shl`. Overflowed bits discarded. +- `integerShiftRight` → arithmetic shift (`ashr`) for signed types, logical (`lshr`) for unsigned. + +## `integerBitwiseAnd` / `Or` / `Xor` + +```ceramic +[T | Integer?(T)] +integerBitwiseAnd(a:T, b:T) : T; +integerBitwiseOr(a:T, b:T) : T; +integerBitwiseXor(a:T, b:T) : T; +``` + +Bitwise AND, OR, XOR. Lower to LLVM `and`, `or`, `xor`. + +## `integerBitwiseNot` + +```ceramic +[T | Integer?(T)] +integerBitwiseNot(a:T) : T; +``` + +Bitwise complement. Lowers to LLVM `xor %T %a, -1`. + +## `numericConvert` + +```ceramic +[T, U | Numeric?(T) and Numeric?(U)] +numericConvert(static T, a:U) : T; +``` + +Converts `a` to type `T` while preserving its numeric value. If `T == U`, the value is copied. Otherwise, the conversion depends on the kinds of `T` and `U`: + +### Integer → Integer + +| Direction | LLVM | +|-----------|------| +| Narrowing | `trunc` (bitwise truncation) | +| Widening to signed | `sext` (sign-extend) | +| Widening to unsigned | `zext` (zero-extend) | +| Same width, sign change | `bitcast` | + +### Float → Float + +- Narrowing: `fptrunc`. Overflowing truncation is undefined. +- Widening: `fpext`. + +### Integer → Float + +- Signed: `sitofp`. Unsigned: `uitofp`. +- Overflowing conversion is undefined. + +### Float → Integer + +- Signed: `fptosi`. Unsigned: `fptoui`. +- Overflowing conversion is undefined. + +## Checked Integer Operations + +Variants of the integer primitives that also return a `Bool` overflow flag. On overflow, the numeric result is **undefined** and the flag is `true`. Otherwise the result matches the unchecked version and the flag is `false`. None may be overloaded. + +```ceramic +[T | Integer?(T)] +integerAddChecked(a:T, b:T) : T, Bool; +integerSubtractChecked(a:T, b:T) : T, Bool; +integerMultiplyChecked(a:T, b:T) : T, Bool; +integerDivideChecked(a:T, b:T) : T, Bool; +integerNegateChecked(a:T) : T, Bool; +integerRemainderChecked(a:T, b:T): T, Bool; +integerShiftLeftChecked(a:T, b:T): T, Bool; + +[T, U | Integer?(T) and Integer?(U)] +integerConvertChecked(static T, a:U) : T, Bool; +``` diff --git a/doc/primitives-reference/pointers.md b/doc/primitives-reference/pointers.md new file mode 100644 index 000000000..766f370d5 --- /dev/null +++ b/doc/primitives-reference/pointers.md @@ -0,0 +1,117 @@ +# Pointer Operations + +Create, dereference, compare, and convert pointers, plus function-pointer construction and invocation. None of these may be overloaded. + +## `addressOf` + +```ceramic +[T] +addressOf(ref x:T) : Pointer[T]; +``` + +Returns the address of `x`. `x` must be an lvalue. Equivalent to the prefix `&` operator. + +## `pointerDereference` + +```ceramic +[T] +pointerDereference(p:Pointer[T]) : ref T; +``` + +Returns a reference to the object pointed to by `p`. Effectively a no-op at the LLVM level (references are pointers). + +## `pointerEquals?` / `pointerLesser?` + +```ceramic +[T, U] +pointerEquals?(p:Pointer[T], q:Pointer[U]) : Bool; +pointerLesser?(p:Pointer[T], q:Pointer[U]) : Bool; +``` + +- `pointerEquals?`: `true` if `p` and `q` hold the same address. Lowers to LLVM `icmp eq`. +- `pointerLesser?`: `true` if `p`'s address is numerically less than `q`'s. Lowers to LLVM `icmp lt`. + +## `pointerOffset` + +```ceramic +[T, I | Integer?(I)] +pointerOffset(p:Pointer[T], i:I) : Pointer[T]; +``` + +Returns a pointer offset from `p` by `i * TypeSize(T)` bytes. Lowers to LLVM `getelementptr`. + +## `pointerToInt` + +```ceramic +[T, I | Integer?(I)] +pointerToInt(static I, p:Pointer[T]) : I; +``` + +Converts the address of `p` to integer type `I`. Zero-extends if `I` is wider than a pointer, truncates if narrower. Lowers to LLVM `ptrtoint`. + +## `intToPointer` + +```ceramic +[T, I | Integer?(I)] +intToPointer(static T, address:I) : Pointer[T]; +``` + +Converts `address` to a `Pointer[T]`. Truncates if `I` is wider than a pointer, zero-extends if narrower. Lowers to LLVM `inttoptr`. + +## `pointerCast` + +```ceramic +[P1, P2 | Pointer?(P1) and Pointer?(P2)] +pointerCast(static P1, p:P2) : P1; +``` + +Converts `p` to another pointer type sharing the same address. Lowers to LLVM `bitcast`. + +Works between data pointers (`Pointer[T]` ↔ `Pointer[U]`), between code-pointer types (`CodePointer`, `CCodePointer`, …), and between data and code pointers. + +## Function Pointer Operations + +### `makeCodePointer` + +```ceramic +[F, ..T] +makeCodePointer(static F, static ..T) : CodePointer[[..T], [..CallType(F, ..T)]]; +``` + +Resolves an overload of `F` matching input types `..T`, instantiates it, and returns a [`CodePointer`](types.md#codepointer) to that instance. + +- `F` must be a symbol or a non-capturing lambda (equivalent to a symbol). +- Errors if `F` is not a symbol, or if no overload matches. +- Always matches as if inputs are lvalues. Taking `CodePointer`s to rvalue functions is unsupported. + +### `makeCCodePointer` + +```ceramic +[F, ..T] +makeCCodePointer(static F, static ..T) : CCodePointer[[..T], [..CallType(F, ..T)]]; +``` + +Like `makeCodePointer`, but additionally generates a thunk that adapts the matched overload to the C calling convention, and returns a [`CCodePointer`](types.md#external-code-pointer-types). + +The matched overload must be **C-compatible**: + +- Returns zero or one values. +- No arguments with nontrivial `copy`, `move`, or `destroy` operations. + +If a Ceramic exception escapes the pointed-to overload, the `unhandledExceptionInExternal` operator function is called (same as for external functions). + +### `callCCodePointer` + +```ceramic +define callCCodePointer; + +[..In, ..Out] +overload callCCodePointer(f:CCodePointer[[..In], [..Out]], ..args:In) : ..Out; +[..In, ..Out] +overload callCCodePointer(f:VarArgsCCodePointer[[..In], [..Out]], ..args:In, ..varArgs) : ..Out; +[..In, ..Out] +overload callCCodePointer(f:LLVMCodePointer[[..In], [..Out]], ..args:In) : ..Out; +// and so on for StdCallCodePointer, FastCallCodePointer, ThisCallCodePointer. +``` + +Invokes an external function pointer using the appropriate calling convention. diff --git a/doc/primitives-reference/types.md b/doc/primitives-reference/types.md new file mode 100644 index 000000000..351ae9735 --- /dev/null +++ b/doc/primitives-reference/types.md @@ -0,0 +1,114 @@ +# Primitive Types + +## `Bool` + +A boolean value: `true` or `false`. Corresponds to LLVM `i1`, C99 `_Bool`, C++ `bool`. + +## Integer Types + +Signed and unsigned integer types are provided at 8, 16, 32, 64, and 128 bits: + +| Signed | Unsigned | LLVM | C99 (``) | +|--------|----------|------|--------------------| +| `Int8` | `UInt8` | `i8` | `int8_t`, `uint8_t` | +| `Int16` | `UInt16` | `i16` | `int16_t`, `uint16_t` | +| `Int32` | `UInt32` | `i32` | `int32_t`, `uint32_t` | +| `Int64` | `UInt64` | `i64` | `int64_t`, `uint64_t` | +| `Int128` | `UInt128` | `i128` | (extension) | + +LLVM itself does not distinguish signed and unsigned integer types. Ceramic enforces the distinction at the type level. + +The unsigned integer type whose width matches a pointer is internally referred to as `SizeT` and is used as the return type of indexing primitives. `SizeT` is **not** exported from `__primitives__`. + +## Floating-Point Types + +| Type | LLVM | C | +|------|------|---| +| `Float32` | `float` | `float` | +| `Float64` | `double` | `double` | +| `Float80` | `x86_fp80` | `long double` (Unix x86 only) | + +## Imaginary and Complex Types + +For each floating-point width: + +- `Imag32`, `Imag64`, `Imag80`: share LLVM/C representation with their floating type but are semantically imaginary. +- `Complex32`, `Complex64`, `Complex80`: LLVM `{float, float}` etc., C99 `_Complex float` etc. + +## `Pointer` + +```ceramic +Pointer[T] +``` + +A pointer to a value of type `T`. Corresponds to LLVM `%T*` or C `T*`. Created with prefix `&` or [`addressOf`](pointers.md#addressof). + +## `CodePointer` + +```ceramic +CodePointer[[..In], [..Out]] +``` + +A pointer to a Ceramic function instance. Created with [`makeCodePointer`](pointers.md#makecodepointer). The Ceramic calling convention is unspecified, so it has no fixed LLVM/C equivalent. + +## External Code Pointer Types + +```ceramic +CCodePointer[[..In], [..Out]] +``` + +A pointer to a C function. `CCodePointer[[A,B,C],[]]` corresponds to `void (*)(A,B,C)`. `CCodePointer[[A,B,C],[D]]` corresponds to `D (*)(A,B,C)`. + +Variants for other conventions: + +- `LLVMCodePointer[[..In],[..Out]]`: LLVM `ccc` convention. +- `VarArgsCCodePointer[[..In],[..Out]]`: variadic C: `D (*)(A,B,C,...)`. +- `StdCallCodePointer`, `FastCallCodePointer`, `ThisCallCodePointer`: legacy Windows x86 conventions. + +These pointers are obtained by evaluating external function names, returning them from C functions, or via [`makeCCodePointer`](pointers.md#makeccodepointer). They are invoked through [`callCCodePointer`](pointers.md#callccodepointer). + +## `Array` + +```ceramic +Array[T, n] +``` + +Fixed-size, locally-allocated array of `n` elements of type `T`. `n` must be `Int32`. Corresponds to LLVM `[n x %T]` or C `T[n]`. + +Unlike C arrays, Ceramic arrays do **not** decay to pointers. Use [`arrayRef`](data-access.md#arrayref) and [`arrayElements`](data-access.md#arrayelements) for access. + +## `Vec` + +```ceramic +Vec[T, n] +``` + +SIMD vector of `n` elements of type `T`. `n` must be `Int32`. Corresponds to LLVM `` or the GCC extension `T __attribute__((vector_size(...)))`. + +No high-level primitives are provided. Use `Vec` with LLVM vector intrinsics. + +## `Tuple` + +```ceramic +Tuple[..T] +``` + +Anonymous, ordered aggregate. Laid out like a naturally-aligned C `struct`. `Tuple[A,B,C]` corresponds to the LLVM struct `{%A, %B, %C}`. + +## `Union` + +```ceramic +Union[..T] +``` + +Anonymous, non-discriminated union. Laid out like a naturally-aligned C `union`. LLVM has no union type. The compiler picks an LLVM type with the correct size and alignment. + +## `Static` + +```ceramic +Static[x] +``` + +A stateless type representing a compile-time value. Ceramic symbols, static strings, and `static` expressions evaluate to instances of `Static[…]`. + +`Static` values emit as LLVM `i8 undef` and still take space inside tuples and records. diff --git a/doc/stylesheets/extra.css b/doc/stylesheets/extra.css index 83c9b0924..ad632c121 100644 --- a/doc/stylesheets/extra.css +++ b/doc/stylesheets/extra.css @@ -1,6 +1,33 @@ [data-md-color-scheme="default"] { - --md-primary-fg-color: #b5562a; - --md-primary-fg-color--light: #c97a50; - --md-primary-fg-color--dark: #8c3e1a; - --md-accent-fg-color: #d4845a; + --md-primary-fg-color: #c2571a; + --md-primary-fg-color--light: #d4663a; + --md-primary-fg-color--dark: #a8471a; + --md-accent-fg-color: #c2571a; + --md-typeset-a-color: #c2571a; +} + +[data-md-color-scheme="default"] .md-typeset a, +[data-md-color-scheme="default"] .md-nav__link--active, +[data-md-color-scheme="default"] .md-nav__link--active code { + color: #c2571a; +} + +.md-typeset pre.shiki { + padding: 0.8em 1em; + border-radius: 4px; + overflow-x: auto; + line-height: 1.55; +} + +[data-md-color-scheme="default"] .md-typeset pre.shiki { + background: var(--md-code-bg-color) !important; +} + + +.md-typeset pre.shiki>code { + background: transparent !important; + color: inherit !important; + padding: 0; + font-family: var(--md-code-font-family, "JetBrains Mono", monospace); + font-size: 0.85em; } diff --git a/mkdocs.yml b/mkdocs.yml index 71b5811a6..18d487417 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -5,6 +5,10 @@ repo_name: subrange/ceramic theme: name: material + favicon: assets/favicon.svg + font: + text: Inter + code: JetBrains Mono features: - navigation.sections - navigation.top @@ -15,9 +19,39 @@ theme: extra_css: - stylesheets/extra.css +extra_javascript: + - path: js/ceramic-highlight.js + type: module + +markdown_extensions: + - pymdownx.highlight: + use_pygments: false + - pymdownx.superfences + nav: - Home: index.md - - Language Reference: language-reference.md - - Primitives Reference: primitives-reference.md + - Language Reference: + - Overview: language-reference/index.md + - Tokenization: language-reference/tokenization.md + - Compilation Strategy: language-reference/compilation.md + - Modules & Source Layout: language-reference/modules.md + - Type Definitions: language-reference/types.md + - Function Definitions: language-reference/functions.md + - Statements: language-reference/statements.md + - Expressions: language-reference/expressions.md + - Grammar Reference: language-reference/grammar.md + - Primitives Reference: + - Overview: primitives-reference/index.md + - Primitive Types: primitives-reference/types.md + - Data Access: primitives-reference/data-access.md + - Numeric Operations: primitives-reference/numeric.md + - Pointer Operations: primitives-reference/pointers.md + - Atomic Operations: primitives-reference/atomic.md + - Exceptions: primitives-reference/exceptions.md + - Introspection: primitives-reference/introspection.md + - Compiler Interface: primitives-reference/compiler.md docs_dir: doc + +exclude_docs: | + _archive/