Skip to content

Commit b13017a

Browse files
committed
Make non-overlapping CIR tags
1 parent 746aaec commit b13017a

File tree

321 files changed

+6280
-1181
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

321 files changed

+6280
-1181
lines changed

src/canonicalize/CIR2.zig

Lines changed: 81 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,12 @@ diagnostics: std.ArrayListUnmanaged(CanDiagnostic),
3838
// Scope state for tracking variable definitions and nested scopes
3939
scope_state: ScopeState,
4040

41-
/// CIR Statement tags - start at 0
41+
/// Starting offset for statement tags to avoid collision with AST2.Node.Tag
42+
/// Calculated at compile time based on actual AST2.Node.Tag values
43+
const STMT_TAG_START = @typeInfo(AST2.Node.Tag).@"enum".fields.len;
44+
4245
pub const StmtTag = enum(u8) {
43-
assign, // immutable assignment
46+
assign = STMT_TAG_START, // immutable assignment
4447
init_var, // mutable variable initialization
4548
reassign, // reassignment to existing var
4649
type_alias, // type alias definition
@@ -58,14 +61,15 @@ pub const StmtTag = enum(u8) {
5861
};
5962

6063
/// Calculate the starting offset for expression tags
61-
/// We start at 100 to avoid collision with AST2.Node.Tag values (which go up to ~68)
62-
const EXPR_TAG_START = 100;
64+
/// Starts after all statement tags
65+
const EXPR_TAG_START = STMT_TAG_START + @typeInfo(StmtTag).@"enum".fields.len;
6366

6467
/// CIR Expression tags - start after statement tags
6568
pub const ExprTag = enum(u8) {
6669
lookup = EXPR_TAG_START, // First expr tag starts at calculated offset
6770
neg_lookup,
6871
not_lookup,
72+
module_access, // Module access like Bool.True (uses binop payload)
6973
num_literal_i32,
7074
int_literal_i32,
7175
num_literal_big,
@@ -110,12 +114,13 @@ pub const ExprTag = enum(u8) {
110114
unary_neg, // Unary negation (e.g., -expr)
111115
unary_not, // Unary not (e.g., !expr)
112116
unary_double_dot, // Unary double dot (e.g., ..(expr))
117+
crash, // Crash expression (e.g., crash "message")
113118
malformed,
114119
};
115120

116121
/// Calculate the starting offset for pattern tags
117-
/// We start at 150 (EXPR_TAG_START=100 + ~48 ExprTag values + buffer)
118-
const PATT_TAG_START = 150;
122+
/// Starts after all expression tags
123+
const PATT_TAG_START = EXPR_TAG_START + @typeInfo(ExprTag).@"enum".fields.len;
119124

120125
/// CIR Pattern tags - start after expression tags
121126
pub const PattTag = enum(u8) {
@@ -254,8 +259,8 @@ pub const Stmts = struct {
254259
// Skip index 0 (sentinel node)
255260
for (tags[1..]) |tag| {
256261
const tag_value = @as(u8, @intFromEnum(tag));
257-
// Statement tags are in the range [0, EXPR_TAG_START)
258-
if (tag_value < EXPR_TAG_START) {
262+
// Statement tags are in the range [STMT_TAG_START, EXPR_TAG_START)
263+
if (tag_value >= STMT_TAG_START and tag_value < EXPR_TAG_START) {
259264
count += 1;
260265
}
261266
}
@@ -404,9 +409,18 @@ pub fn getStmt(self: *const CIR, idx: Stmt.Idx) struct {
404409
.assign => .assign,
405410
.init_var => .init_var,
406411
.reassign => .reassign,
412+
.type_alias => .type_alias,
413+
.type_anno => .type_anno,
414+
.nominal_type => .nominal_type,
415+
.import => .import,
416+
.match => .match,
417+
.if_without_else => .if_without_else,
418+
.ret => .ret,
419+
.for_loop => .for_loop,
420+
.while_loop => .while_loop,
421+
.crash => .crash,
407422
.expr => .expr,
408423
.malformed => .malformed,
409-
else => .malformed, // Fallback for unexpected tags
410424
};
411425

412426
return .{
@@ -445,6 +459,7 @@ pub fn getExpr(self: *const CIR, idx: Expr.Idx) struct {
445459
.lookup => .lookup,
446460
.neg_lookup => .neg_lookup,
447461
.not_lookup => .not_lookup,
462+
.module_access => .module_access,
448463
.num_literal_i32 => .num_literal_i32,
449464
.int_literal_i32 => .int_literal_i32,
450465
.num_literal_big => .num_literal_big,
@@ -489,6 +504,7 @@ pub fn getExpr(self: *const CIR, idx: Expr.Idx) struct {
489504
.unary_neg => .unary_neg,
490505
.unary_not => .unary_not,
491506
.unary_double_dot => .unary_double_dot,
507+
.crash => .crash,
492508
.malformed => .malformed,
493509
};
494510

@@ -736,6 +752,7 @@ pub const Expr = struct {
736752
lookup, // .var_lc (e.g. `foo`)
737753
neg_lookup, // .neg_lc (e.g. `-foo`)
738754
not_lookup, // .not_lc (e.g. `!foo`)
755+
module_access, // .binop_dot with UC.UC (e.g. `Bool.True`)
739756
record_accessor, // .dot_lc (e.g. `.foo`)
740757
double_dot_ident, // .double_dot_lc (e.g. `..others`)
741758
dot_num, // dot followed by number (e.g. `.0`) - this is a tuple accessor
@@ -789,6 +806,7 @@ pub const Expr = struct {
789806
unary_not, // e.g. `!(foo())` - note that `!foo` is special-cased to .not_lc instead
790807
unary_neg, // e.g. `-(foo())` - note that `-foo` is special-cased to .neg_lc instead
791808
unary_double_dot, // e.g. `..(foo())` - note that `..foo` is special-cased to .double_dot_lc instead)
809+
crash, // e.g. `crash "not implemented"` - crash expression with message
792810
malformed, // e.g. tokenization or parsing failed (stores a Diagnostic.Tag)
793811
};
794812

@@ -894,6 +912,7 @@ pub const Type = struct {
894912
unary_not, // e.g. `!(foo())` - note that `!foo` is special-cased to .not_lc instead
895913
unary_neg, // e.g. `-(foo())` - note that `-foo` is special-cased to .neg_lc instead
896914
unary_double_dot, // e.g. `..(foo())` - note that `..foo` is special-cased to .double_dot_lc instead)
915+
crash, // e.g. `crash "not implemented"` - crash expression with message
897916
malformed, // e.g. tokenization or parsing failed (stores a Diagnostic.Tag)
898917
};
899918

@@ -1106,7 +1125,7 @@ pub fn canonicalizeExpr(self: *CIR, allocator: Allocator, node_idx: AST2.Node.Id
11061125
},
11071126

11081127
// Binary operators
1109-
.binop_plus, .binop_minus, .binop_star, .binop_slash, .binop_colon, .binop_equals => {
1128+
.binop_plus, .binop_minus, .binop_star, .binop_slash, .binop_double_slash, .binop_colon, .binop_equals => {
11101129
// Get the binop data from AST's node slices
11111130
const ast_binop = self.ast.*.node_slices.binOp(node.payload.binop);
11121131

@@ -1120,6 +1139,7 @@ pub fn canonicalizeExpr(self: *CIR, allocator: Allocator, node_idx: AST2.Node.Id
11201139
.binop_minus => .binop_minus,
11211140
.binop_star => .binop_star,
11221141
.binop_slash => .binop_slash,
1142+
.binop_double_slash => .binop_double_slash,
11231143
.binop_colon => .binop_colon,
11241144
.binop_equals => .binop_equals,
11251145
else => unreachable,
@@ -1561,8 +1581,7 @@ pub fn canonicalizeExpr(self: *CIR, allocator: Allocator, node_idx: AST2.Node.Id
15611581

15621582
if (lhs_tag == .uc and rhs_tag == .uc) {
15631583
// This is module access like Bool.True
1564-
// Treat it as a lookup for now
1565-
self.mutateToExpr(node_idx, .lookup);
1584+
self.mutateToExpr(node_idx, .module_access);
15661585
try self.ensureTypeVarExists(node_idx);
15671586
return asExprIdx(node_idx);
15681587
} else {
@@ -1603,22 +1622,26 @@ pub fn canonicalizeExpr(self: *CIR, allocator: Allocator, node_idx: AST2.Node.Id
16031622

16041623
// Record spread operator (e.g., ..person in { ..person, age: 31 })
16051624
.double_dot_lc => {
1606-
// This represents a record being spread - treat as a special lookup for now
1607-
self.mutateToExpr(node_idx, .lookup);
1625+
// This represents a record being spread
1626+
self.mutateToExpr(node_idx, .unary_double_dot);
16081627
try self.ensureTypeVarExists(node_idx);
16091628
return asExprIdx(node_idx);
16101629
},
16111630

16121631
// Module/package qualified identifiers (e.g., Module.Type, pkg.Module.Type)
16131632
.uc_dot_ucs => {
16141633
// Module-qualified type or tag (e.g., Module.Type or Module.Tag)
1615-
self.mutateToExpr(node_idx, .lookup);
1634+
// These have nodes payload with multiple identifiers
1635+
// Keep as apply_tag since they're tag applications
1636+
self.mutateToExpr(node_idx, .apply_tag);
16161637
try self.ensureTypeVarExists(node_idx);
16171638
return asExprIdx(node_idx);
16181639
},
16191640
.lc_dot_ucs => {
16201641
// Package-qualified module access (e.g., pkg.Module.Type)
1621-
self.mutateToExpr(node_idx, .lookup);
1642+
// These have nodes payload with multiple identifiers
1643+
// Keep as apply_tag since they're tag applications
1644+
self.mutateToExpr(node_idx, .apply_tag);
16221645
try self.ensureTypeVarExists(node_idx);
16231646
return asExprIdx(node_idx);
16241647
},
@@ -1838,6 +1861,35 @@ pub fn canonicalizeExpr(self: *CIR, allocator: Allocator, node_idx: AST2.Node.Id
18381861
return asExprIdx(node_idx);
18391862
},
18401863

1864+
// Crash expression
1865+
.crash => {
1866+
// Crash takes an expression to print before crashing
1867+
const nodes_iter = self.ast.*.node_slices.nodes(&node.payload.nodes);
1868+
var iter = nodes_iter;
1869+
1870+
if (iter.next()) |expr_node_idx| {
1871+
// Canonicalize the crash message expression
1872+
_ = try self.canonicalizeExpr(allocator, expr_node_idx, raw_src, idents);
1873+
}
1874+
1875+
// Mutate to crash expression
1876+
self.mutateToExpr(node_idx, .crash);
1877+
try self.ensureTypeVarExists(node_idx);
1878+
return asExprIdx(node_idx);
1879+
},
1880+
1881+
// Return statement in expression context - shouldn't happen but handle it
1882+
.ret => {
1883+
// Return is a statement, but if we encounter it in expression context,
1884+
// we'll canonicalize it as a statement first
1885+
const stmt_idx = try self.canonicalizeStmt(allocator, node_idx, raw_src, idents);
1886+
// Then treat it as a malformed expression
1887+
self.mutateToExpr(node_idx, .malformed);
1888+
try self.ensureTypeVarExists(node_idx);
1889+
_ = stmt_idx;
1890+
return asExprIdx(node_idx);
1891+
},
1892+
18411893
else => {
18421894
// This should never happen if we've handled all cases
18431895
std.log.err("Unhandled AST node tag in canonicalizeExpr: {}", .{node.tag});
@@ -2010,7 +2062,7 @@ pub fn canonicalizeStmt(self: *CIR, allocator: Allocator, node_idx: AST2.Node.Id
20102062

20112063
// Check if this node has already been canonicalized (mutated)
20122064
const tag_value = @as(u8, @intFromEnum(node.tag));
2013-
if (tag_value < EXPR_TAG_START) {
2065+
if (tag_value >= STMT_TAG_START and tag_value < EXPR_TAG_START) {
20142066
// This node has already been canonicalized as a statement
20152067
return asStmtIdx(node_idx);
20162068
}
@@ -2117,37 +2169,36 @@ pub fn canonicalizeStmt(self: *CIR, allocator: Allocator, node_idx: AST2.Node.Id
21172169
}
21182170
},
21192171

2120-
// Return statement
2121-
.ret => {
2122-
// Parse return statement with expression
2123-
// Get the expression from nodes payload
2172+
// Crash statement
2173+
.crash => {
2174+
// Crash takes an expression to print before crashing
21242175
const nodes_iter = self.ast.*.node_slices.nodes(&node.payload.nodes);
21252176
var iter = nodes_iter;
21262177

21272178
if (iter.next()) |expr_node_idx| {
2128-
// Canonicalize the return expression
2179+
// Canonicalize the crash message expression
21292180
_ = try self.canonicalizeExpr(allocator, expr_node_idx, raw_src, idents);
21302181
}
21312182

2132-
// Mutate to return statement
2133-
self.mutateToStmt(node_idx, .ret);
2183+
// Mutate to crash statement
2184+
self.mutateToStmt(node_idx, .crash);
21342185
return asStmtIdx(node_idx);
21352186
},
21362187

2137-
// Crash statement
2138-
.crash => {
2139-
// Parse crash statement with expression
2188+
// Return statement
2189+
.ret => {
2190+
// Parse return statement with expression
21402191
// Get the expression from nodes payload
21412192
const nodes_iter = self.ast.*.node_slices.nodes(&node.payload.nodes);
21422193
var iter = nodes_iter;
21432194

21442195
if (iter.next()) |expr_node_idx| {
2145-
// Canonicalize the crash expression
2196+
// Canonicalize the return expression
21462197
_ = try self.canonicalizeExpr(allocator, expr_node_idx, raw_src, idents);
21472198
}
21482199

2149-
// Mutate to crash statement
2150-
self.mutateToStmt(node_idx, .crash);
2200+
// Mutate to return statement
2201+
self.mutateToStmt(node_idx, .ret);
21512202
return asStmtIdx(node_idx);
21522203
},
21532204

src/fmt/fmt2.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -463,8 +463,8 @@ const Formatter = struct {
463463
.binop_platform => try self.formatBinOp(node_idx, " platform "),
464464
.binop_pipe => {
465465
// Check if this is a module field access pattern
466-
const node = self.getNode(node_idx);
467-
const binop = self.ast.node_slices.binOp(node.payload.binop);
466+
const pipe_node = self.getNode(node_idx);
467+
const binop = self.ast.node_slices.binOp(pipe_node.payload.binop);
468468
const lhs_node = self.getNode(binop.lhs);
469469
const rhs_node = self.getNode(binop.rhs);
470470

src/snapshot_tool/main.zig

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2754,14 +2754,8 @@ fn outputCIR2ExprAsSExpr(writer: anytype, cir: *const CIR, env: *const base.Comm
27542754
try writer.print(" {}", .{expr.payload.num_literal_i32});
27552755
},
27562756
.int_literal_i32 => {
2757-
// Get the original AST node to access the correct payload
2758-
const inner_node_idx = @as(AST.Node.Idx, @enumFromInt(@intFromEnum(expr_idx)));
2759-
const ast_node = cir.ast.*.nodes.get(@enumFromInt(@intFromEnum(inner_node_idx)));
2760-
2761-
// Check if the original node has int_literal_i32 payload
2762-
if (ast_node.tag == .int_literal_i32) {
2763-
try writer.print(" 0x{x}", .{ast_node.payload.int_literal_i32});
2764-
}
2757+
// Use the expr.payload directly - after mutation, we can't check AST nodes
2758+
try writer.print(" 0x{x}", .{expr.payload.int_literal_i32});
27652759
},
27662760
.frac_literal_small => {
27672761
// Use the expr.payload directly - it's the same as the AST payload
@@ -2770,28 +2764,17 @@ fn outputCIR2ExprAsSExpr(writer: anytype, cir: *const CIR, env: *const base.Comm
27702764
try writer.print(" {d}", .{decimal_value});
27712765
},
27722766
.frac_literal_big => {
2773-
// Check if the original AST node actually has a frac_literal_big payload
2774-
const frac_node_idx = @as(AST.Node.Idx, @enumFromInt(@intFromEnum(expr_idx)));
2775-
const ast_node = cir.ast.*.nodes.get(@enumFromInt(@intFromEnum(frac_node_idx)));
2776-
2777-
const has_frac_big = switch (ast_node.tag) {
2778-
.frac_literal_big => true,
2779-
else => false,
2780-
};
2781-
2782-
if (has_frac_big) {
2783-
// Use the expr.payload directly
2784-
const idx = expr.payload.frac_literal_big;
2785-
// Check if ByteSlices is empty or index is out of bounds
2786-
const cir_ast = cir.ast.*;
2787-
const byte_slices_len = cir_ast.byte_slices.entries.items.items.len;
2788-
const idx_usize = @as(usize, @intCast(@intFromEnum(idx)));
2789-
if (byte_slices_len == 0 or idx_usize >= byte_slices_len) {
2790-
try writer.print(" big:<idx:{}>", .{@intFromEnum(idx)});
2791-
} else {
2792-
const slice = cir_ast.byte_slices.slice(idx);
2793-
try writer.print(" {s}", .{slice});
2794-
}
2767+
// Use the expr.payload directly - after mutation, we can't check AST nodes
2768+
const idx = expr.payload.frac_literal_big;
2769+
// Check if ByteSlices is empty or index is out of bounds
2770+
const cir_ast = cir.ast.*;
2771+
const byte_slices_len = cir_ast.byte_slices.entries.items.items.len;
2772+
const idx_usize = @as(usize, @intCast(@intFromEnum(idx)));
2773+
if (byte_slices_len == 0 or idx_usize >= byte_slices_len) {
2774+
try writer.print(" big:<idx:{}>", .{@intFromEnum(idx)});
2775+
} else {
2776+
const slice = cir_ast.byte_slices.slice(idx);
2777+
try writer.print(" {s}", .{slice});
27952778
}
27962779
},
27972780
.lookup => {
@@ -2800,6 +2783,30 @@ fn outputCIR2ExprAsSExpr(writer: anytype, cir: *const CIR, env: *const base.Comm
28002783
const ident_name = env.idents.getText(ident_idx);
28012784
try writer.print(" \"{s}\"", .{ident_name});
28022785
},
2786+
.module_access => {
2787+
// Module access has binop payload
2788+
const binop = cir.getBinOp(CIR.Expr.Idx, expr.payload.binop);
2789+
try writer.writeAll("\n");
2790+
try outputCIR2ExprAsSExpr(writer, cir, env, binop.lhs, indent + 1);
2791+
try outputCIR2ExprAsSExpr(writer, cir, env, binop.rhs, indent + 1);
2792+
for (0..indent) |_| {
2793+
try writer.writeAll(" ");
2794+
}
2795+
},
2796+
.crash => {
2797+
// Crash has nodes payload with the message expression
2798+
const nodes_idx = expr.payload.nodes;
2799+
var iter = cir.ast.node_slices.nodes(&nodes_idx);
2800+
try writer.writeAll("\n");
2801+
if (iter.next()) |msg_node| {
2802+
// Cast the AST node to CIR expr
2803+
const msg_expr = CIR.asExprIdx(msg_node);
2804+
try outputCIR2ExprAsSExpr(writer, cir, env, msg_expr, indent + 1);
2805+
}
2806+
for (0..indent) |_| {
2807+
try writer.writeAll(" ");
2808+
}
2809+
},
28032810
.binop_plus, .binop_minus, .binop_star, .binop_slash, .binop_double_slash, .binop_double_question, .binop_double_equals, .binop_not_equals, .binop_gt, .binop_gte, .binop_lt, .binop_lte, .binop_and, .binop_or, .binop_thick_arrow, .binop_thin_arrow, .binop_colon, .binop_equals => {
28042811
// After mutation, binop nodes still have their binop payloads
28052812
// The mutation only changes the tag, not the payload

test/snapshots/app_header__nonempty_multiline__commented.md

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,14 @@ app # Comment after keyword
2828
pf: "../main.roc" platform [ # Comment after provides open
2929
main],
3030
# Comment after platform
31-
other: ("../../other/main.roc"),
31+
other: "../../other/main.roc",
3232
}
3333
3434
~~~
3535
# EXPECTED
3636
NIL
3737
# PROBLEMS
38-
**Parse Error**
39-
at 7:2 to 7:2
40-
41-
**Expected Close Curly Brace**
42-
at 1:1 to 7:3
43-
38+
NIL
4439
# CANONICALIZE
4540
~~~clojure
4641
(empty)

0 commit comments

Comments
 (0)