From 2699aa7eff53cb54489c996b9c743ab26b44c2d5 Mon Sep 17 00:00:00 2001 From: nojaf Date: Wed, 30 Jul 2025 21:10:08 +0200 Subject: [PATCH 01/13] Add token dump printer --- compiler/syntax/cli/res_cli.ml | 11 +- compiler/syntax/src/res_token_debugger.ml | 142 +++++++++++++++++++++ compiler/syntax/src/res_token_debugger.mli | 1 + 3 files changed, 151 insertions(+), 3 deletions(-) create mode 100644 compiler/syntax/src/res_token_debugger.ml create mode 100644 compiler/syntax/src/res_token_debugger.mli diff --git a/compiler/syntax/cli/res_cli.ml b/compiler/syntax/cli/res_cli.ml index 572ddd95f3..ab3a5bb93e 100644 --- a/compiler/syntax/cli/res_cli.ml +++ b/compiler/syntax/cli/res_cli.ml @@ -239,11 +239,12 @@ module CliArgProcessor = struct | "ast" -> Res_ast_debugger.print_engine | "sexp" -> Res_ast_debugger.sexp_print_engine | "comments" -> Res_ast_debugger.comments_print_engine + | "tokens" -> Res_token_debugger.token_print_engine | "res" -> Res_driver.print_engine | target -> print_endline - ("-print needs to be either binary, ml, ast, sexp, comments or res. \ - You provided " ^ target); + ("-print needs to be either binary, ml, ast, sexp, comments, tokens \ + or res. You provided " ^ target); exit 1 in @@ -256,7 +257,11 @@ module CliArgProcessor = struct let (Parser backend) = parsing_engine in (* This is the whole purpose of the Color module above *) Color.setup None; - if process_interface then + + (* Special case for tokens - bypass parsing entirely *) + if target = "tokens" then + print_engine.print_implementation ~width ~filename ~comments:[] [] + else if process_interface then let parse_result = backend.parse_interface ~for_printer ~filename in if parse_result.invalid then ( backend.string_of_diagnostics ~source:parse_result.source diff --git a/compiler/syntax/src/res_token_debugger.ml b/compiler/syntax/src/res_token_debugger.ml new file mode 100644 index 0000000000..a484399dcb --- /dev/null +++ b/compiler/syntax/src/res_token_debugger.ml @@ -0,0 +1,142 @@ +let dump_tokens filename = + let src = + try + let ic = open_in filename in + let content = really_input_string ic (in_channel_length ic) in + close_in ic; + content + with e -> + Printf.printf "Error reading file %s: %s\n" filename + (Printexc.to_string e); + exit 1 + in + + let scanner = Res_scanner.make ~filename src in + + let rec visit scanner = + let start_pos, end_pos, token = Res_scanner.scan scanner in + let token_str = + match token with + | Res_token.Await -> "Await" + | Res_token.Open -> "Open" + | Res_token.True -> "True" + | Res_token.False -> "False" + | Res_token.Codepoint {original} -> "Codepoint(\"" ^ original ^ "\")" + | Res_token.Int {i} -> "Int(\"" ^ i ^ "\")" + | Res_token.Float {f} -> "Float(\"" ^ f ^ "\")" + | Res_token.String s -> "String(\"" ^ s ^ "\")" + | Res_token.Lident str -> "Lident(\"" ^ str ^ "\")" + | Res_token.Uident str -> "Uident(\"" ^ str ^ "\")" + | Res_token.As -> "As" + | Res_token.Dot -> "Dot" + | Res_token.DotDot -> "DotDot" + | Res_token.DotDotDot -> "DotDotDot" + | Res_token.Bang -> "Bang" + | Res_token.Semicolon -> "Semicolon" + | Res_token.Let -> "Let" + | Res_token.And -> "And" + | Res_token.Rec -> "Rec" + | Res_token.Underscore -> "Underscore" + | Res_token.SingleQuote -> "SingleQuote" + | Res_token.Equal -> "Equal" + | Res_token.EqualEqual -> "EqualEqual" + | Res_token.EqualEqualEqual -> "EqualEqualEqual" + | Res_token.Bar -> "Bar" + | Res_token.Lparen -> "Lparen" + | Res_token.Rparen -> "Rparen" + | Res_token.Lbracket -> "Lbracket" + | Res_token.Rbracket -> "Rbracket" + | Res_token.Lbrace -> "Lbrace" + | Res_token.Rbrace -> "Rbrace" + | Res_token.Colon -> "Colon" + | Res_token.Comma -> "Comma" + | Res_token.Eof -> "Eof" + | Res_token.Exception -> "Exception" + | Res_token.Backslash -> "Backslash" + | Res_token.Forwardslash -> "Forwardslash" + | Res_token.ForwardslashDot -> "ForwardslashDot" + | Res_token.Regex (pattern, flags) -> + "Regex(\"" ^ pattern ^ "\", \"" ^ flags ^ "\")" + | Res_token.Asterisk -> "Asterisk" + | Res_token.AsteriskDot -> "AsteriskDot" + | Res_token.Exponentiation -> "Exponentiation" + | Res_token.Minus -> "Minus" + | Res_token.MinusDot -> "MinusDot" + | Res_token.Plus -> "Plus" + | Res_token.PlusDot -> "PlusDot" + | Res_token.PlusPlus -> "PlusPlus" + | Res_token.PlusEqual -> "PlusEqual" + | Res_token.ColonGreaterThan -> "ColonGreaterThan" + | Res_token.GreaterThan -> "GreaterThan" + | Res_token.LessThan -> "LessThan" + | Res_token.LessThanSlash -> "LessThanSlash" + | Res_token.Hash -> "Hash" + | Res_token.HashEqual -> "HashEqual" + | Res_token.Assert -> "Assert" + | Res_token.Tilde -> "Tilde" + | Res_token.Question -> "Question" + | Res_token.If -> "If" + | Res_token.Else -> "Else" + | Res_token.For -> "For" + | Res_token.In -> "In" + | Res_token.While -> "While" + | Res_token.Switch -> "Switch" + | Res_token.When -> "When" + | Res_token.EqualGreater -> "EqualGreater" + | Res_token.MinusGreater -> "MinusGreater" + | Res_token.External -> "External" + | Res_token.Typ -> "Typ" + | Res_token.Private -> "Private" + | Res_token.Constraint -> "Constraint" + | Res_token.Mutable -> "Mutable" + | Res_token.Include -> "Include" + | Res_token.Module -> "Module" + | Res_token.Of -> "Of" + | Res_token.Land -> "Land" + | Res_token.Lor -> "Lor" + | Res_token.Band -> "Band" + | Res_token.Caret -> "Caret" + | Res_token.BangEqual -> "BangEqual" + | Res_token.BangEqualEqual -> "BangEqualEqual" + | Res_token.LessEqual -> "LessEqual" + | Res_token.GreaterEqual -> "GreaterEqual" + | Res_token.ColonEqual -> "ColonEqual" + | Res_token.At -> "At" + | Res_token.AtAt -> "AtAt" + | Res_token.Percent -> "Percent" + | Res_token.PercentPercent -> "PercentPercent" + | Res_token.Comment c -> "Comment(" ^ Res_comment.to_string c ^ ")" + | Res_token.List -> "List" + | Res_token.Dict -> "Dict" + | Res_token.TemplateTail (text, _) -> "TemplateTail(\"" ^ text ^ "\")" + | Res_token.TemplatePart (text, _) -> "TemplatePart(\"" ^ text ^ "\")" + | Res_token.Backtick -> "Backtick" + | Res_token.Try -> "Try" + | Res_token.DocComment (_, s) -> "DocComment(\"" ^ s ^ "\")" + | Res_token.ModuleComment (_, s) -> "ModuleComment(\"" ^ s ^ "\")" + | Res_token.LeftShift -> "LeftShift" + | Res_token.RightShift -> "RightShift" + | Res_token.RightShiftUnsigned -> "RightShiftUnsigned" + in + + let start_line = start_pos.Lexing.pos_lnum in + let start_col = start_pos.Lexing.pos_cnum - start_pos.Lexing.pos_bol + 1 in + let end_line = end_pos.Lexing.pos_lnum in + let end_col = end_pos.Lexing.pos_cnum - end_pos.Lexing.pos_bol + 1 in + + Printf.printf "%s (%d,%d-%d,%d)\n" token_str start_line start_col end_line + end_col; + + match token with + | Res_token.Eof -> () + | _ -> visit scanner + in + visit scanner + +let token_print_engine = + { + Res_driver.print_implementation = + (fun ~width:_ ~filename ~comments:_ _ -> dump_tokens filename); + print_interface = + (fun ~width:_ ~filename ~comments:_ _ -> dump_tokens filename); + } diff --git a/compiler/syntax/src/res_token_debugger.mli b/compiler/syntax/src/res_token_debugger.mli new file mode 100644 index 0000000000..f8dea0742c --- /dev/null +++ b/compiler/syntax/src/res_token_debugger.mli @@ -0,0 +1 @@ +val token_print_engine : Res_driver.print_engine From b60bcae0854e29fa6e6170477c5c6ccd8ad190eb Mon Sep 17 00:00:00 2001 From: nojaf Date: Sat, 2 Aug 2025 11:09:17 +0200 Subject: [PATCH 02/13] Remove ForwardSlash token --- compiler/syntax/src/res_core.ml | 169 ++++++++-------------- compiler/syntax/src/res_scanner.ml | 66 +++------ compiler/syntax/src/res_scanner.mli | 14 +- compiler/syntax/src/res_token.ml | 2 - compiler/syntax/src/res_token_debugger.ml | 1 - 5 files changed, 89 insertions(+), 163 deletions(-) diff --git a/compiler/syntax/src/res_core.ml b/compiler/syntax/src/res_core.ml index 30adeea5ee..eaa30bd9e0 100644 --- a/compiler/syntax/src/res_core.ml +++ b/compiler/syntax/src/res_core.ml @@ -2539,7 +2539,41 @@ and parse_let_bindings ~attrs ~start_pos p = in (rec_flag, loop p [first]) +(* jsx allows for `-` token in the name, we need to combine some tokens into a single ident *) +and parse_jsx_ident p = + (* check if the next tokens are minus and ident, if so, add them to the buffer *) + let rec visit buffer = + match p.Parser.token with + | Minus -> ( + Parser.next p; + match p.Parser.token with + | Lident txt | Uident txt -> + Buffer.add_char buffer '-'; + Buffer.add_string buffer txt; + if Scanner.peekMinus p.scanner then visit buffer else buffer + | _ -> buffer) + | _ -> buffer + in + match p.Parser.token with + | Lident txt when Scanner.peekMinus p.scanner -> + let buffer = Buffer.create (String.length txt) in + Buffer.add_string buffer txt; + Parser.next p; + let name = visit buffer |> Buffer.contents in + let token = Token.Lident name in + p.token <- token + | Uident txt when Scanner.peekMinus p.scanner -> + let buffer = Buffer.create (String.length txt) in + Buffer.add_string buffer txt; + Parser.next p; + let name = visit buffer |> Buffer.contents in + let token = Token.Uident name in + p.token <- token + | _ -> () + and parse_jsx_name p : Longident.t Location.loc = + (* jsx allows for `-` token in the name, we need to combine some tokens *) + parse_jsx_ident p; match p.Parser.token with | Lident ident -> let ident_start = p.start_pos in @@ -2566,7 +2600,6 @@ and parse_jsx_opening_or_self_closing_element (* start of the opening < *) | Forwardslash -> (* *) Parser.next p; - Scanner.pop_mode p.scanner Jsx; let jsx_end_pos = p.end_pos in Parser.expect GreaterThan p; let loc = mk_loc start_pos jsx_end_pos in @@ -2578,24 +2611,25 @@ and parse_jsx_opening_or_self_closing_element (* start of the opening < *) let children = parse_jsx_children p in let closing_tag_start = match p.token with - | LessThanSlash -> + | LessThan when Scanner.peekSlash p.scanner -> let pos = p.start_pos in + (* Move to slash *) Parser.next p; - Some pos - | LessThan -> - let pos = p.start_pos in + (* Move to ident *) Parser.next p; - Parser.expect Forwardslash p; Some pos | token when Grammar.is_structure_item_start token -> None | _ -> - Parser.expect LessThanSlash p; + Parser.expect LessThan p; + Parser.expect Forwardslash p; None in + (* Again, the ident in the closing tag can have a minus. + We combine these tokens into a single ident *) + parse_jsx_ident p; match p.Parser.token with | (Lident _ | Uident _) when verify_jsx_opening_closing_name p name -> let end_tag_name = {name with loc = mk_loc p.start_pos p.end_pos} in - Scanner.pop_mode p.scanner Jsx; let closing_tag_end = p.start_pos in Parser.expect GreaterThan p; let loc = mk_loc start_pos p.prev_end_pos in @@ -2612,7 +2646,6 @@ and parse_jsx_opening_or_self_closing_element (* start of the opening < *) Ast_helper.Exp.jsx_container_element ~loc name jsx_props opening_tag_end children closing_tag | token -> - Scanner.pop_mode p.scanner Jsx; let () = if Grammar.is_structure_item_start token then let closing = "" in @@ -2632,91 +2665,11 @@ and parse_jsx_opening_or_self_closing_element (* start of the opening < *) ~loc:(mk_loc start_pos p.prev_end_pos) name jsx_props opening_tag_end children None) | token -> - Scanner.pop_mode p.scanner Jsx; Parser.err p (Diagnostics.unexpected token p.breadcrumbs); Ast_helper.Exp.jsx_unary_element ~loc:(mk_loc start_pos p.prev_end_pos) name jsx_props -(* and parse_jsx_opening_or_self_closing_element_old ~start_pos p = - let jsx_start_pos = p.Parser.start_pos in - let name = parse_jsx_name p in - let jsx_props = parse_jsx_props p in - let children = - match p.Parser.token with - | Forwardslash -> - (* *) - let children_start_pos = p.Parser.start_pos in - Parser.next p; - let children_end_pos = p.Parser.start_pos in - Scanner.pop_mode p.scanner Jsx; - Parser.expect GreaterThan p; - let loc = mk_loc children_start_pos children_end_pos in - Ast_helper.Exp.make_list_expression loc [] None (* no children *) - | GreaterThan -> ( - (* bar *) - let children_start_pos = p.Parser.start_pos in - Parser.next p; - let spread, children = parse_jsx_children p in - let children_end_pos = p.Parser.start_pos in - let () = - match p.token with - | LessThanSlash -> Parser.next p - | LessThan -> - Parser.next p; - Parser.expect Forwardslash p - | token when Grammar.is_structure_item_start token -> () - | _ -> Parser.expect LessThanSlash p - in - match p.Parser.token with - | (Lident _ | Uident _) when verify_jsx_opening_closing_name p name -> ( - Scanner.pop_mode p.scanner Jsx; - Parser.expect GreaterThan p; - let loc = mk_loc children_start_pos children_end_pos in - match (spread, children) with - | true, child :: _ -> child - | _ -> Ast_helper.Exp.make_list_expression loc children None) - | token -> ( - Scanner.pop_mode p.scanner Jsx; - let () = - if Grammar.is_structure_item_start token then - let closing = "" in - let msg = Diagnostics.message ("Missing " ^ closing) in - Parser.err ~start_pos ~end_pos:p.prev_end_pos p msg - else - let opening = "" in - let msg = - "Closing jsx name should be the same as the opening name. Did \ - you mean " ^ opening ^ " ?" - in - Parser.err ~start_pos ~end_pos:p.prev_end_pos p - (Diagnostics.message msg); - Parser.expect GreaterThan p - in - let loc = mk_loc children_start_pos children_end_pos in - match (spread, children) with - | true, child :: _ -> child - | _ -> Ast_helper.Exp.make_list_expression loc children None)) - | token -> - Scanner.pop_mode p.scanner Jsx; - Parser.err p (Diagnostics.unexpected token p.breadcrumbs); - Ast_helper.Exp.make_list_expression Location.none [] None - in - let jsx_end_pos = p.prev_end_pos in - let loc = mk_loc jsx_start_pos jsx_end_pos in - Ast_helper.Exp.apply ~loc name - (List.concat - [ - jsx_props; - [ - (Asttypes.Labelled {txt = "children"; loc = Location.none}, children); - ( Asttypes.Nolabel, - Ast_helper.Exp.construct - (Location.mknoloc (Longident.Lident "()")) - None ); - ]; - ]) *) - (* * jsx ::= * | <> jsx-children @@ -2726,7 +2679,6 @@ and parse_jsx_opening_or_self_closing_element (* start of the opening < *) * jsx-children ::= primary-expr* * => 0 or more *) and parse_jsx p = - Scanner.set_jsx_mode p.Parser.scanner; Parser.leave_breadcrumb p Grammar.Jsx; let start_pos = p.Parser.start_pos in Parser.expect LessThan p; @@ -2754,10 +2706,9 @@ and parse_jsx_fragment start_pos p = Parser.expect GreaterThan p; let children = parse_jsx_children p in let children_end_pos = p.Parser.start_pos in - if p.token = LessThan then p.token <- Scanner.reconsider_less_than p.scanner; - Parser.expect LessThanSlash p; - Scanner.pop_mode p.scanner Jsx; + Parser.expect LessThan p; let end_pos = p.Parser.end_pos in + Parser.expect Forwardslash p; Parser.expect GreaterThan p; (* location is from starting < till closing > *) let loc = mk_loc start_pos end_pos in @@ -2773,6 +2724,8 @@ and parse_jsx_fragment start_pos p = * | {...jsx_expr} *) and parse_jsx_prop p : Parsetree.jsx_prop option = + (* prop can have `-`, we need to combine some tokens into a single ident *) + parse_jsx_ident p; match p.Parser.token with | Question | Lident _ -> ( let optional = Parser.optional p Question in @@ -2785,18 +2738,15 @@ and parse_jsx_prop p : Parsetree.jsx_prop option = Parser.next p; (* no punning *) let optional = Parser.optional p Question in - Scanner.pop_mode p.scanner Jsx; let attr_expr = parse_primary_expr ~operand:(parse_atomic_expr p) p in Some (Parsetree.JSXPropValue ({txt = name; loc}, optional, attr_expr)) | _ -> Some (Parsetree.JSXPropPunning (false, {txt = name; loc}))) (* {...props} *) | Lbrace -> ( - Scanner.pop_mode p.scanner Jsx; let spread_start = p.Parser.start_pos in Parser.next p; match p.Parser.token with | DotDotDot -> ( - Scanner.pop_mode p.scanner Jsx; Parser.next p; let attr_expr = parse_primary_expr ~operand:(parse_expr p) p in match p.Parser.token with @@ -2804,7 +2754,6 @@ and parse_jsx_prop p : Parsetree.jsx_prop option = let spread_end = p.Parser.end_pos in let loc = mk_loc spread_start spread_end in Parser.next p; - Scanner.set_jsx_mode p.scanner; Some (Parsetree.JSXPropSpreading (loc, attr_expr)) (* Some (label, attr_expr) *) | _ -> None) @@ -2815,26 +2764,27 @@ and parse_jsx_props p : Parsetree.jsx_prop list = parse_region ~grammar:Grammar.JsxAttribute ~f:parse_jsx_prop p and parse_jsx_children p : Parsetree.jsx_children = - Scanner.pop_mode p.scanner Jsx; let rec loop p children = match p.Parser.token with - | Token.Eof | LessThanSlash -> children + | Token.Eof -> children + | LessThan when Scanner.peekSlash p.scanner -> children | LessThan -> (* Imagine:
< * is `<` the start of a jsx-child?
* reconsiderLessThan peeks at the next token and * determines the correct token to disambiguate *) - let token = Scanner.reconsider_less_than p.scanner in - if token = LessThan then - let child = - parse_primary_expr ~operand:(parse_atomic_expr p) ~no_call:true p - in - loop p (child :: children) - else - (* LessThanSlash *) - let () = p.token <- token in - children + (* let token = Scanner.reconsider_less_than p.scanner in *) + (* let token = p.Parser.token in *) + (* if token = LessThan then *) + let child = + parse_primary_expr ~operand:(parse_atomic_expr p) ~no_call:true p + in + loop p (child :: children) + (* else *) + (* LessThanSlash *) + (* let () = p.token <- token in *) + (* children *) | token when Grammar.is_jsx_child_start token -> let child = parse_primary_expr ~operand:(parse_atomic_expr p) ~no_call:true p @@ -2854,7 +2804,6 @@ and parse_jsx_children p : Parsetree.jsx_children = let children = List.rev (loop p []) in Parsetree.JSXChildrenItems children in - Scanner.set_jsx_mode p.scanner; children and parse_braced_or_record_expr p = diff --git a/compiler/syntax/src/res_scanner.ml b/compiler/syntax/src/res_scanner.ml index 3c66812bc1..77062ea431 100644 --- a/compiler/syntax/src/res_scanner.ml +++ b/compiler/syntax/src/res_scanner.ml @@ -2,7 +2,7 @@ module Diagnostics = Res_diagnostics module Token = Res_token module Comment = Res_comment -type mode = Jsx | Diamond +type mode = Diamond (* We hide the implementation detail of the scanner reading character. Our char will also contain the special -1 value to indicate end-of-file. This isn't @@ -29,8 +29,6 @@ type t = { let set_diamond_mode scanner = scanner.mode <- Diamond :: scanner.mode -let set_jsx_mode scanner = scanner.mode <- Jsx :: scanner.mode - let pop_mode scanner mode = match scanner.mode with | m :: ms when m = mode -> scanner.mode <- ms @@ -41,11 +39,6 @@ let in_diamond_mode scanner = | Diamond :: _ -> true | _ -> false -let in_jsx_mode scanner = - match scanner.mode with - | Jsx :: _ -> true - | _ -> false - let position scanner = Lexing. { @@ -145,6 +138,20 @@ let peek3 scanner = String.unsafe_get scanner.src (scanner.offset + 3) else hacky_eof_char +let peekChar scanner target_char = + let rec skip_whitespace_and_check offset = + if offset < String.length scanner.src then + let ch = String.unsafe_get scanner.src offset in + match ch with + | ' ' | '\t' | '\n' | '\r' -> skip_whitespace_and_check (offset + 1) + | c -> c = target_char + else false + in + skip_whitespace_and_check scanner.offset + +let peekMinus scanner = peekChar scanner '-' +let peekSlash scanner = peekChar scanner '/' + let make ~filename src = { filename; @@ -182,11 +189,9 @@ let digit_value ch = let scan_identifier scanner = let start_off = scanner.offset in let rec skip_good_chars scanner = - match (scanner.ch, in_jsx_mode scanner) with - | ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '\''), false -> - next scanner; - skip_good_chars scanner - | ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '\'' | '-'), true -> + (* TODO: figure out what to replace the parser with here *) + match scanner.ch with + | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '\'' -> next scanner; skip_good_chars scanner | _ -> () @@ -902,7 +907,8 @@ let rec scan scanner = | '>' -> next scanner; Token.GreaterThan - | '<' when not (in_jsx_mode scanner) -> ( + | '<' -> ( + (* skip_whitespace scanner; *) match peek scanner with | '<' when not (in_diamond_mode scanner) -> next2 scanner; @@ -913,25 +919,6 @@ let rec scan scanner = | _ -> next scanner; Token.LessThan) - (* special handling for JSX < *) - | '<' -> ( - (* Imagine the following:
< - * < indicates the start of a new jsx-element, the parser expects - * the name of a new element after the < - * Example:
- * This signals a closing element. To simulate the two-token lookahead, - * the - next scanner; - Token.LessThanSlash - | '=' -> - next scanner; - Token.LessEqual - | _ -> Token.LessThan) (* peeking 2 chars *) | '.' -> ( match (peek scanner, peek2 scanner) with @@ -1028,19 +1015,6 @@ let rec scan scanner = (* misc helpers used elsewhere *) -(* Imagine:
< - * is `<` the start of a jsx-child?
- * reconsiderLessThan peeks at the next token and - * determines the correct token to disambiguate *) -let reconsider_less_than scanner = - (* < consumed *) - skip_whitespace scanner; - if scanner.ch == '/' then - let () = next scanner in - Token.LessThanSlash - else Token.LessThan - (* If an operator has whitespace around both sides, it's a binary operator *) (* TODO: this helper seems out of place *) let is_binary_op src start_cnum end_cnum = diff --git a/compiler/syntax/src/res_scanner.mli b/compiler/syntax/src/res_scanner.mli index a5b9c122ba..38fee7512e 100644 --- a/compiler/syntax/src/res_scanner.mli +++ b/compiler/syntax/src/res_scanner.mli @@ -1,4 +1,4 @@ -type mode = Jsx | Diamond +type mode = Diamond type char_encoding @@ -26,13 +26,19 @@ val scan : t -> Lexing.position * Lexing.position * Res_token.t val is_binary_op : string -> int -> int -> bool -val set_jsx_mode : t -> unit val set_diamond_mode : t -> unit val pop_mode : t -> mode -> unit -val reconsider_less_than : t -> Res_token.t - val scan_template_literal_token : t -> Lexing.position * Lexing.position * Res_token.t val scan_regex : t -> Lexing.position * Lexing.position * Res_token.t + +(* Look ahead to see if the next non-whitespace character is a minus *) +val peekMinus : t -> bool + +(* Look ahead to see if the next non-whitespace character is a slash *) +val peekSlash : t -> bool +(** +Checks if the next token is a minus +*) diff --git a/compiler/syntax/src/res_token.ml b/compiler/syntax/src/res_token.ml index 312af0c423..5fc89658c0 100644 --- a/compiler/syntax/src/res_token.ml +++ b/compiler/syntax/src/res_token.ml @@ -52,7 +52,6 @@ type t = | ColonGreaterThan | GreaterThan | LessThan - | LessThanSlash | Hash | HashEqual | Assert @@ -169,7 +168,6 @@ let to_string = function | HashEqual -> "#=" | GreaterThan -> ">" | LessThan -> "<" - | LessThanSlash -> " "*" | AsteriskDot -> "*." | Exponentiation -> "**" diff --git a/compiler/syntax/src/res_token_debugger.ml b/compiler/syntax/src/res_token_debugger.ml index a484399dcb..6f3631ec20 100644 --- a/compiler/syntax/src/res_token_debugger.ml +++ b/compiler/syntax/src/res_token_debugger.ml @@ -69,7 +69,6 @@ let dump_tokens filename = | Res_token.ColonGreaterThan -> "ColonGreaterThan" | Res_token.GreaterThan -> "GreaterThan" | Res_token.LessThan -> "LessThan" - | Res_token.LessThanSlash -> "LessThanSlash" | Res_token.Hash -> "Hash" | Res_token.HashEqual -> "HashEqual" | Res_token.Assert -> "Assert" From 0e8b54252ad7c397c8752ac59986fc3a9a755d0a Mon Sep 17 00:00:00 2001 From: nojaf Date: Sat, 2 Aug 2025 11:24:24 +0200 Subject: [PATCH 03/13] Document RESCRIPT_BSC_EXE for local usage --- CONTRIBUTING.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4fbb98c17c..2f236fd0bb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -148,6 +148,16 @@ npm install npm link rescript ``` +#### Use Local BSC with Existing ReScript Installation + +Alternatively, you can set the `RESCRIPT_BSC_EXE` environment variable to point to your locally compiled `bsc.exe`. + +```sh +RESCRIPT_BSC_EXE=your-rescript-repo/packages/@rescript/darwin-arm64/bin/bsc.exe npx rescript +``` + +This will test the local compiler while still using the build system from the installed Node module. + ### Running Automatic Tests We provide different test suites for different levels of the compiler and build system infrastructure. Always make sure to locally build your compiler before running any tests. From 7d6a5632cecc1f670d1946df0b1c7cc046475c12 Mon Sep 17 00:00:00 2001 From: nojaf Date: Sat, 2 Aug 2025 11:47:50 +0200 Subject: [PATCH 04/13] Process hypens in parse_module_long_ident for jsx --- compiler/syntax/src/res_core.ml | 72 ++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/compiler/syntax/src/res_core.ml b/compiler/syntax/src/res_core.ml index eaa30bd9e0..5097b496b8 100644 --- a/compiler/syntax/src/res_core.ml +++ b/compiler/syntax/src/res_core.ml @@ -715,10 +715,42 @@ let parse_module_long_ident_tail ~lowercase p start_pos ident = in loop p ident +(* jsx allows for `-` token in the name, we need to combine some tokens into a single ident *) +let parse_jsx_ident (p : Parser.t) : unit = + (* check if the next tokens are minus and ident, if so, add them to the buffer *) + let rec visit buffer = + match p.Parser.token with + | Minus -> ( + Parser.next p; + match p.Parser.token with + | Lident txt | Uident txt -> + Buffer.add_char buffer '-'; + Buffer.add_string buffer txt; + if Scanner.peekMinus p.scanner then visit buffer else buffer + | _ -> buffer) + | _ -> buffer + in + match p.Parser.token with + | Lident txt when Scanner.peekMinus p.scanner -> + let buffer = Buffer.create (String.length txt) in + Buffer.add_string buffer txt; + Parser.next p; + let name = visit buffer |> Buffer.contents in + let token = Token.Lident name in + p.token <- token + | Uident txt when Scanner.peekMinus p.scanner -> + let buffer = Buffer.create (String.length txt) in + Buffer.add_string buffer txt; + Parser.next p; + let name = visit buffer |> Buffer.contents in + let token = Token.Uident name in + p.token <- token + | _ -> () + (* Parses module identifiers: Foo Foo.Bar *) -let parse_module_long_ident ~lowercase p = +let parse_module_long_ident ~lowercase ?(is_jsx_name : bool = false) p = (* Parser.leaveBreadcrumb p Reporting.ModuleLongIdent; *) let start_pos = p.Parser.start_pos in let module_ident = @@ -735,6 +767,7 @@ let parse_module_long_ident ~lowercase p = match p.Parser.token with | Dot -> Parser.next p; + if is_jsx_name then parse_jsx_ident p; parse_module_long_ident_tail ~lowercase p start_pos lident | _ -> Location.mkloc lident (mk_loc start_pos end_pos)) | t -> @@ -751,7 +784,8 @@ let verify_jsx_opening_closing_name p | Lident lident -> Parser.next p; Longident.Lident lident - | Uident _ -> (parse_module_long_ident ~lowercase:true p).txt + | Uident _ -> + (parse_module_long_ident ~lowercase:true ~is_jsx_name:true p).txt | _ -> Longident.Lident "" in let opening = name_longident.txt in @@ -2540,36 +2574,6 @@ and parse_let_bindings ~attrs ~start_pos p = (rec_flag, loop p [first]) (* jsx allows for `-` token in the name, we need to combine some tokens into a single ident *) -and parse_jsx_ident p = - (* check if the next tokens are minus and ident, if so, add them to the buffer *) - let rec visit buffer = - match p.Parser.token with - | Minus -> ( - Parser.next p; - match p.Parser.token with - | Lident txt | Uident txt -> - Buffer.add_char buffer '-'; - Buffer.add_string buffer txt; - if Scanner.peekMinus p.scanner then visit buffer else buffer - | _ -> buffer) - | _ -> buffer - in - match p.Parser.token with - | Lident txt when Scanner.peekMinus p.scanner -> - let buffer = Buffer.create (String.length txt) in - Buffer.add_string buffer txt; - Parser.next p; - let name = visit buffer |> Buffer.contents in - let token = Token.Lident name in - p.token <- token - | Uident txt when Scanner.peekMinus p.scanner -> - let buffer = Buffer.create (String.length txt) in - Buffer.add_string buffer txt; - Parser.next p; - let name = visit buffer |> Buffer.contents in - let token = Token.Uident name in - p.token <- token - | _ -> () and parse_jsx_name p : Longident.t Location.loc = (* jsx allows for `-` token in the name, we need to combine some tokens *) @@ -2582,7 +2586,9 @@ and parse_jsx_name p : Longident.t Location.loc = let loc = mk_loc ident_start ident_end in Location.mkloc (Longident.Lident ident) loc | Uident _ -> - let longident = parse_module_long_ident ~lowercase:true p in + let longident = + parse_module_long_ident ~lowercase:true ~is_jsx_name:true p + in longident | _ -> let msg = From b0f4806974471072b984893e610e23623a64f462 Mon Sep 17 00:00:00 2001 From: nojaf Date: Sat, 2 Aug 2025 11:53:44 +0200 Subject: [PATCH 05/13] Update test snapshot --- .../data/parsing/errors/expressions/expected/jsx.res.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/syntax_tests/data/parsing/errors/expressions/expected/jsx.res.txt b/tests/syntax_tests/data/parsing/errors/expressions/expected/jsx.res.txt index 09444d5759..17475d382b 100644 --- a/tests/syntax_tests/data/parsing/errors/expressions/expected/jsx.res.txt +++ b/tests/syntax_tests/data/parsing/errors/expressions/expected/jsx.res.txt @@ -7,7 +7,7 @@ 3 │ let x = ; 4 │ let x = - Did you forget a ` Date: Sat, 2 Aug 2025 12:00:52 +0200 Subject: [PATCH 06/13] Add how to view tokens. --- CONTRIBUTING.md | 12 ++++++++++++ compiler/syntax/cli/res_cli.ml | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2f236fd0bb..b8d03b9a13 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -126,12 +126,24 @@ make lib # Build compiler and standard library ./cli/bsc.js myTestFile.res ``` +To view the tokens of a file run: + +```sh +dune exec res_parser -- -print tokens myTestFile.res +``` + To view the untyped tree of the file run: ```sh ./cli/bsc.js -dparsetree myTestFile.res ``` +or + +```sh +dune exec res_parser -- -print ast -recover myTestFile.res +``` + To view the typed tree of the file run: ```sh diff --git a/compiler/syntax/cli/res_cli.ml b/compiler/syntax/cli/res_cli.ml index ab3a5bb93e..fc16a85b40 100644 --- a/compiler/syntax/cli/res_cli.ml +++ b/compiler/syntax/cli/res_cli.ml @@ -194,7 +194,7 @@ end = struct ("-recover", Arg.Unit (fun () -> recover := true), "Emit partial ast"); ( "-print", Arg.String (fun txt -> print := txt), - "Print either binary, ml, ast, sexp, comments or res. Default: res" ); + "Print either binary, ml, ast, sexp, comments, tokens or res. Default: res" ); ( "-width", Arg.Int (fun w -> width := w), "Specify the line length for the printer (formatter)" ); From 849d19f0eddc5151724e64ec427b782dc6a571a5 Mon Sep 17 00:00:00 2001 From: nojaf Date: Sat, 2 Aug 2025 12:01:05 +0200 Subject: [PATCH 07/13] fmt --- compiler/syntax/cli/res_cli.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler/syntax/cli/res_cli.ml b/compiler/syntax/cli/res_cli.ml index fc16a85b40..201a3ac31e 100644 --- a/compiler/syntax/cli/res_cli.ml +++ b/compiler/syntax/cli/res_cli.ml @@ -194,7 +194,8 @@ end = struct ("-recover", Arg.Unit (fun () -> recover := true), "Emit partial ast"); ( "-print", Arg.String (fun txt -> print := txt), - "Print either binary, ml, ast, sexp, comments, tokens or res. Default: res" ); + "Print either binary, ml, ast, sexp, comments, tokens or res. Default: \ + res" ); ( "-width", Arg.Int (fun w -> width := w), "Specify the line length for the printer (formatter)" ); From 28cfceb426ee9c625db866c8d915e429f306b685 Mon Sep 17 00:00:00 2001 From: nojaf Date: Sat, 2 Aug 2025 12:08:14 +0200 Subject: [PATCH 08/13] Clean up --- compiler/syntax/src/res_core.ml | 9 --------- compiler/syntax/src/res_scanner.ml | 2 -- compiler/syntax/src/res_scanner.mli | 3 --- 3 files changed, 14 deletions(-) diff --git a/compiler/syntax/src/res_core.ml b/compiler/syntax/src/res_core.ml index 5097b496b8..f872177ed0 100644 --- a/compiler/syntax/src/res_core.ml +++ b/compiler/syntax/src/res_core.ml @@ -2573,8 +2573,6 @@ and parse_let_bindings ~attrs ~start_pos p = in (rec_flag, loop p [first]) -(* jsx allows for `-` token in the name, we need to combine some tokens into a single ident *) - and parse_jsx_name p : Longident.t Location.loc = (* jsx allows for `-` token in the name, we need to combine some tokens *) parse_jsx_ident p; @@ -2780,17 +2778,10 @@ and parse_jsx_children p : Parsetree.jsx_children = * or is it the start of a closing tag?
* reconsiderLessThan peeks at the next token and * determines the correct token to disambiguate *) - (* let token = Scanner.reconsider_less_than p.scanner in *) - (* let token = p.Parser.token in *) - (* if token = LessThan then *) let child = parse_primary_expr ~operand:(parse_atomic_expr p) ~no_call:true p in loop p (child :: children) - (* else *) - (* LessThanSlash *) - (* let () = p.token <- token in *) - (* children *) | token when Grammar.is_jsx_child_start token -> let child = parse_primary_expr ~operand:(parse_atomic_expr p) ~no_call:true p diff --git a/compiler/syntax/src/res_scanner.ml b/compiler/syntax/src/res_scanner.ml index 77062ea431..c404d36cc2 100644 --- a/compiler/syntax/src/res_scanner.ml +++ b/compiler/syntax/src/res_scanner.ml @@ -189,7 +189,6 @@ let digit_value ch = let scan_identifier scanner = let start_off = scanner.offset in let rec skip_good_chars scanner = - (* TODO: figure out what to replace the parser with here *) match scanner.ch with | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '\'' -> next scanner; @@ -908,7 +907,6 @@ let rec scan scanner = next scanner; Token.GreaterThan | '<' -> ( - (* skip_whitespace scanner; *) match peek scanner with | '<' when not (in_diamond_mode scanner) -> next2 scanner; diff --git a/compiler/syntax/src/res_scanner.mli b/compiler/syntax/src/res_scanner.mli index 38fee7512e..e55896796d 100644 --- a/compiler/syntax/src/res_scanner.mli +++ b/compiler/syntax/src/res_scanner.mli @@ -39,6 +39,3 @@ val peekMinus : t -> bool (* Look ahead to see if the next non-whitespace character is a slash *) val peekSlash : t -> bool -(** -Checks if the next token is a minus -*) From bd46a3ed645210cc73e1cb455f0d0224c7720db2 Mon Sep 17 00:00:00 2001 From: nojaf Date: Sat, 2 Aug 2025 12:27:49 +0200 Subject: [PATCH 09/13] Correct fragment range --- compiler/syntax/src/res_core.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/syntax/src/res_core.ml b/compiler/syntax/src/res_core.ml index f872177ed0..c55e110545 100644 --- a/compiler/syntax/src/res_core.ml +++ b/compiler/syntax/src/res_core.ml @@ -2711,8 +2711,8 @@ and parse_jsx_fragment start_pos p = let children = parse_jsx_children p in let children_end_pos = p.Parser.start_pos in Parser.expect LessThan p; - let end_pos = p.Parser.end_pos in Parser.expect Forwardslash p; + let end_pos = p.Parser.end_pos in Parser.expect GreaterThan p; (* location is from starting < till closing > *) let loc = mk_loc start_pos end_pos in From 8bb4e41482ebc7033c42d6c1ba2fef87df65060c Mon Sep 17 00:00:00 2001 From: nojaf Date: Sat, 2 Aug 2025 13:51:53 +0200 Subject: [PATCH 10/13] Use legacy clean for analysis projects --- tests/analysis_tests/tests-generic-jsx-transform/package.json | 2 +- .../analysis_tests/tests-incremental-typechecking/package.json | 2 +- tests/analysis_tests/tests-reanalyze/deadcode/package.json | 2 +- tests/analysis_tests/tests-reanalyze/termination/package.json | 2 +- tests/analysis_tests/tests/package.json | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/analysis_tests/tests-generic-jsx-transform/package.json b/tests/analysis_tests/tests-generic-jsx-transform/package.json index fd016eeb9e..7a4f4861f8 100644 --- a/tests/analysis_tests/tests-generic-jsx-transform/package.json +++ b/tests/analysis_tests/tests-generic-jsx-transform/package.json @@ -3,7 +3,7 @@ "private": true, "scripts": { "build": "rescript legacy build", - "clean": "rescript clean" + "clean": "rescript legacy clean" }, "dependencies": { "rescript": "workspace:^" diff --git a/tests/analysis_tests/tests-incremental-typechecking/package.json b/tests/analysis_tests/tests-incremental-typechecking/package.json index 559bffd744..cd410d8afa 100644 --- a/tests/analysis_tests/tests-incremental-typechecking/package.json +++ b/tests/analysis_tests/tests-incremental-typechecking/package.json @@ -3,7 +3,7 @@ "private": true, "scripts": { "build": "rescript legacy build", - "clean": "rescript clean" + "clean": "rescript legacy clean" }, "dependencies": { "rescript": "workspace:^" diff --git a/tests/analysis_tests/tests-reanalyze/deadcode/package.json b/tests/analysis_tests/tests-reanalyze/deadcode/package.json index a73dcaf9bc..c9ff3996f7 100644 --- a/tests/analysis_tests/tests-reanalyze/deadcode/package.json +++ b/tests/analysis_tests/tests-reanalyze/deadcode/package.json @@ -3,7 +3,7 @@ "private": true, "scripts": { "build": "rescript legacy build", - "clean": "rescript clean" + "clean": "rescript legacy clean" }, "dependencies": { "@rescript/react": "link:../../../dependencies/rescript-react", diff --git a/tests/analysis_tests/tests-reanalyze/termination/package.json b/tests/analysis_tests/tests-reanalyze/termination/package.json index 06c0593f42..02b2adf3e5 100644 --- a/tests/analysis_tests/tests-reanalyze/termination/package.json +++ b/tests/analysis_tests/tests-reanalyze/termination/package.json @@ -3,7 +3,7 @@ "private": true, "scripts": { "build": "rescript legacy build", - "clean": "rescript clean" + "clean": "rescript legacy clean" }, "dependencies": { "rescript": "workspace:^" diff --git a/tests/analysis_tests/tests/package.json b/tests/analysis_tests/tests/package.json index 601884c23c..01bbe0ecbb 100644 --- a/tests/analysis_tests/tests/package.json +++ b/tests/analysis_tests/tests/package.json @@ -3,7 +3,7 @@ "private": true, "scripts": { "build": "rescript legacy build", - "clean": "rescript clean" + "clean": "rescript legacy clean" }, "dependencies": { "@rescript/react": "link:../../dependencies/rescript-react", From 9d8641ed5eaaa2bf5b66b6fc8dc21876459a7975 Mon Sep 17 00:00:00 2001 From: nojaf Date: Sat, 2 Aug 2025 13:52:15 +0200 Subject: [PATCH 11/13] Update analysis snapshot --- tests/analysis_tests/tests/src/expected/CompletionJsx.res.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/analysis_tests/tests/src/expected/CompletionJsx.res.txt b/tests/analysis_tests/tests/src/expected/CompletionJsx.res.txt index d89ae7cde6..cd4f01377d 100644 --- a/tests/analysis_tests/tests/src/expected/CompletionJsx.res.txt +++ b/tests/analysis_tests/tests/src/expected/CompletionJsx.res.txt @@ -560,8 +560,8 @@ posCursor:[30:12] posNoWhite:[30:11] Found expr:[11:4->32:10] posCursor:[30:12] posNoWhite:[30:11] Found expr:[12:4->32:10] posCursor:[30:12] posNoWhite:[30:11] Found expr:[15:4->32:10] JSX 15:8] > _children:16:7 -posCursor:[30:12] posNoWhite:[30:11] Found expr:[30:9->32:10] -JSX 30:12] div[32:6->32:9]=...[32:6->32:9]> _children:None +posCursor:[30:12] posNoWhite:[30:11] Found expr:[30:9->30:12] +JSX 30:12] > _children:None Completable: ChtmlElement Date: Tue, 5 Aug 2025 15:34:47 +0200 Subject: [PATCH 12/13] Copilot review suggestion --- compiler/syntax/src/res_core.ml | 5 ++++- .../data/parsing/grammar/expressions/expected/jsx.res.txt | 4 +++- tests/syntax_tests/data/parsing/grammar/expressions/jsx.res | 5 ++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/compiler/syntax/src/res_core.ml b/compiler/syntax/src/res_core.ml index c55e110545..0949679ba6 100644 --- a/compiler/syntax/src/res_core.ml +++ b/compiler/syntax/src/res_core.ml @@ -726,7 +726,10 @@ let parse_jsx_ident (p : Parser.t) : unit = | Lident txt | Uident txt -> Buffer.add_char buffer '-'; Buffer.add_string buffer txt; - if Scanner.peekMinus p.scanner then visit buffer else buffer + if Scanner.peekMinus p.scanner then ( + Parser.next p; + visit buffer) + else buffer | _ -> buffer) | _ -> buffer in diff --git a/tests/syntax_tests/data/parsing/grammar/expressions/expected/jsx.res.txt b/tests/syntax_tests/data/parsing/grammar/expressions/expected/jsx.res.txt index 9ff19ccbd7..871fa887dd 100644 --- a/tests/syntax_tests/data/parsing/grammar/expressions/expected/jsx.res.txt +++ b/tests/syntax_tests/data/parsing/grammar/expressions/expected/jsx.res.txt @@ -342,4 +342,6 @@ let _ = ;;<>[|a|] ;;<>(1, 2) ;;<>((array -> f)[@res.braces ]) -let _ = \ No newline at end of file +let _ = +let _ = +let _ = \ No newline at end of file diff --git a/tests/syntax_tests/data/parsing/grammar/expressions/jsx.res b/tests/syntax_tests/data/parsing/grammar/expressions/jsx.res index 1c95cbce53..4805013406 100644 --- a/tests/syntax_tests/data/parsing/grammar/expressions/jsx.res +++ b/tests/syntax_tests/data/parsing/grammar/expressions/jsx.res @@ -516,4 +516,7 @@ let _ = <> ...(1, 2) <> ...{array->f} -let _ = \ No newline at end of file +let _ = + +let _ = +let _ = \ No newline at end of file From 452301a5105ccf9e7367357061aa9d64c66a3340 Mon Sep 17 00:00:00 2001 From: nojaf Date: Tue, 5 Aug 2025 15:43:35 +0200 Subject: [PATCH 13/13] Return original error directly --- compiler/syntax/src/res_core.ml | 4 ++-- .../data/parsing/errors/expressions/expected/jsx.res.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/syntax/src/res_core.ml b/compiler/syntax/src/res_core.ml index 0949679ba6..af8f0211ca 100644 --- a/compiler/syntax/src/res_core.ml +++ b/compiler/syntax/src/res_core.ml @@ -2627,8 +2627,8 @@ and parse_jsx_opening_or_self_closing_element (* start of the opening < *) Some pos | token when Grammar.is_structure_item_start token -> None | _ -> - Parser.expect LessThan p; - Parser.expect Forwardslash p; + Parser.err ~start_pos:p.start_pos ~end_pos:p.end_pos p + (Diagnostics.message "Did you forget a `; 4 │ let x = - Did you forget a `<` here? + Did you forget a `