@@ -353,11 +353,11 @@ tokenize("=>" ++ Rest, Line, Column, Scope, Tokens) ->
353
353
tokenize (Rest , Line , Column + 2 , Scope , add_token_with_eol (Token , Tokens ));
354
354
355
355
tokenize (" ..//" ++ Rest = String , Line , Column , Scope , Tokens ) ->
356
- case strip_horizontal_space (Rest , 0 ) of
357
- {[$/ | _ ] = Remaining , Extra } ->
356
+ case strip_horizontal_space (Rest , Line , Column + 4 , Scope ) of
357
+ {[$/ | _ ] = Remaining , NewLine , NewColumn } ->
358
358
Token = {identifier , {Line , Column , nil }, '..//' },
359
- tokenize (Remaining , Line , Column + 4 + Extra , Scope , [Token | Tokens ]);
360
- {_ , _ } ->
359
+ tokenize (Remaining , NewLine , NewColumn , Scope , [Token | Tokens ]);
360
+ {_ , _ , _ } ->
361
361
unexpected_token (String , Line , Column , Scope , Tokens )
362
362
end ;
363
363
@@ -464,17 +464,17 @@ tokenize([T1, T2 | Rest], Line, Column, Scope, Tokens) when ?stab_op(T1, T2) ->
464
464
465
465
tokenize ([$& | Rest ], Line , Column , Scope , Tokens ) ->
466
466
Kind =
467
- case strip_horizontal_space (Rest , 0 ) of
468
- {[Int | _ ], 0 } when ? is_digit (Int ) ->
467
+ case strip_horizontal_space (Rest , Line , 0 , Scope ) of
468
+ {[Int | _ ], Line , 0 } when ? is_digit (Int ) ->
469
469
capture_int ;
470
470
471
- {[$/ | NewRest ], _ } ->
472
- case strip_horizontal_space (NewRest , 0 ) of
473
- {[$/ | _ ], _ } -> capture_op ;
474
- {_ , _ } -> identifier
471
+ {[$/ | NewRest ], _ , _ } ->
472
+ case strip_horizontal_space (NewRest , Line , 0 , Scope ) of
473
+ {[$/ | _ ], _ , _ } -> capture_op ;
474
+ {_ , _ , _ } -> identifier
475
475
end ;
476
476
477
- {_ , _ } ->
477
+ {_ , _ , _ } ->
478
478
capture_op
479
479
end ,
480
480
@@ -612,8 +612,8 @@ tokenize([H | T], Line, Column, Scope, Tokens) when ?is_digit(H) ->
612
612
% Spaces
613
613
614
614
tokenize ([T | Rest ], Line , Column , Scope , Tokens ) when ? is_horizontal_space (T ) ->
615
- {Remaining , Stripped } = strip_horizontal_space (Rest , 0 ),
616
- handle_space_sensitive_tokens (Remaining , Line , Column + 1 + Stripped , Scope , Tokens );
615
+ {Remaining , NewLine , NewColumn } = strip_horizontal_space (Rest , Line , Column + 1 , Scope ),
616
+ handle_space_sensitive_tokens (Remaining , NewLine , NewColumn , Scope , Tokens );
617
617
618
618
% End of line
619
619
@@ -735,32 +735,39 @@ unexpected_token([T | Rest], Line, Column, Scope, Tokens) ->
735
735
error ({? LOC (Line , Column ), " unexpected token: " , Message }, Rest , Scope , Tokens ).
736
736
737
737
tokenize_eol (Rest , Line , Scope , Tokens ) ->
738
- {StrippedRest , Column } = strip_horizontal_space (Rest , Scope # elixir_tokenizer .column ),
739
- IndentedScope = Scope # elixir_tokenizer {indentation = Column - 1 },
740
- tokenize (StrippedRest , Line + 1 , Column , IndentedScope , Tokens ).
741
-
742
- strip_horizontal_space ([H | T ], Counter ) when ? is_horizontal_space (H ) ->
743
- strip_horizontal_space (T , Counter + 1 );
744
- strip_horizontal_space (T , Counter ) ->
745
- {T , Counter }.
738
+ {StrippedRest , NewLine , NewColumn } =
739
+ strip_horizontal_space (Rest , Line + 1 , Scope # elixir_tokenizer .column , Scope ),
740
+ IndentedScope = Scope # elixir_tokenizer {indentation = NewColumn - 1 },
741
+ tokenize (StrippedRest , NewLine , NewColumn , IndentedScope , Tokens ).
742
+
743
+ strip_horizontal_space ([H | T ], Line , Counter , Scope ) when ? is_horizontal_space (H ) ->
744
+ strip_horizontal_space (T , Line , Counter + 1 , Scope );
745
+ % % \\ at the end of lines is treated as horizontal whitespace
746
+ % % except at the very end of the buffer, which we treat as incomplete
747
+ strip_horizontal_space (" \\\n " ++ T , Line , _Counter , Scope ) when T /= [] ->
748
+ strip_horizontal_space (T , Line + 1 , Scope # elixir_tokenizer .column , Scope );
749
+ strip_horizontal_space (" \\\r\n " ++ T , Line , _Counter , Scope ) when T /= [] ->
750
+ strip_horizontal_space (T , Line + 1 , Scope # elixir_tokenizer .column , Scope );
751
+ strip_horizontal_space (T , Line , Counter , _Scope ) ->
752
+ {T , Line , Counter }.
746
753
747
754
tokenize_dot (T , Line , Column , DotInfo , Scope , Tokens ) ->
748
- case strip_horizontal_space (T , 0 ) of
749
- {[$# | R ], _ } ->
755
+ case strip_horizontal_space (T , Line , Column , Scope ) of
756
+ {[$# | R ], NewLine , NewColumn } ->
750
757
case tokenize_comment (R , [$# ]) of
751
758
{error , Char , Reason } ->
752
- error_comment (Char , Reason , [$# | R ], Line , Column , Scope , Tokens );
759
+ error_comment (Char , Reason , [$# | R ], NewLine , NewColumn , Scope , Tokens );
753
760
754
761
{Rest , Comment } ->
755
- preserve_comments (Line , Column , Tokens , Comment , Rest , Scope ),
756
- tokenize_dot (Rest , Line , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens )
762
+ preserve_comments (NewLine , NewColumn , Tokens , Comment , Rest , Scope ),
763
+ tokenize_dot (Rest , NewLine , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens )
757
764
end ;
758
- {" \r\n " ++ Rest , _ } ->
759
- tokenize_dot (Rest , Line + 1 , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens );
760
- {" \n " ++ Rest , _ } ->
761
- tokenize_dot (Rest , Line + 1 , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens );
762
- {Rest , Length } ->
763
- handle_dot ([$. | Rest ], Line , Column + Length , DotInfo , Scope , Tokens )
765
+ {" \r\n " ++ Rest , NewLine , _NewColumn } ->
766
+ tokenize_dot (Rest , NewLine + 1 , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens );
767
+ {" \n " ++ Rest , NewLine , _NewColumn } ->
768
+ tokenize_dot (Rest , NewLine + 1 , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens );
769
+ {Rest , NewLine , NewColumn } ->
770
+ handle_dot ([$. | Rest ], NewLine , NewColumn , DotInfo , Scope , Tokens )
764
771
end .
765
772
766
773
handle_char (0 ) -> {" \\ 0" , " null byte" };
@@ -871,25 +878,25 @@ handle_unary_op([$: | Rest], Line, Column, _Kind, Length, Op, Scope, Tokens) whe
871
878
tokenize (Rest , Line , Column + Length + 1 , Scope , [Token | Tokens ]);
872
879
873
880
handle_unary_op (Rest , Line , Column , Kind , Length , Op , Scope , Tokens ) ->
874
- case strip_horizontal_space (Rest , 0 ) of
875
- {[$/ | _ ] = Remaining , Extra } ->
881
+ case strip_horizontal_space (Rest , Line , Column + Length , Scope ) of
882
+ {[$/ | _ ] = Remaining , NewLine , NewColumn } ->
876
883
Token = {identifier , {Line , Column , nil }, Op },
877
- tokenize (Remaining , Line , Column + Length + Extra , Scope , [Token | Tokens ]);
878
- {Remaining , Extra } ->
884
+ tokenize (Remaining , NewLine , NewColumn , Scope , [Token | Tokens ]);
885
+ {Remaining , NewLine , NewColumn } ->
879
886
Token = {Kind , {Line , Column , nil }, Op },
880
- tokenize (Remaining , Line , Column + Length + Extra , Scope , [Token | Tokens ])
887
+ tokenize (Remaining , NewLine , NewColumn , Scope , [Token | Tokens ])
881
888
end .
882
889
883
890
handle_op ([$: | Rest ], Line , Column , _Kind , Length , Op , Scope , Tokens ) when ? is_space (hd (Rest )) ->
884
891
Token = {kw_identifier , {Line , Column , nil }, Op },
885
892
tokenize (Rest , Line , Column + Length + 1 , Scope , [Token | Tokens ]);
886
893
887
894
handle_op (Rest , Line , Column , Kind , Length , Op , Scope , Tokens ) ->
888
- case strip_horizontal_space (Rest , 0 ) of
889
- {[$/ | _ ] = Remaining , Extra } ->
895
+ case strip_horizontal_space (Rest , Line , Column + Length , Scope ) of
896
+ {[$/ | _ ] = Remaining , NewLine , NewColumn } ->
890
897
Token = {identifier , {Line , Column , nil }, Op },
891
- tokenize (Remaining , Line , Column + Length + Extra , Scope , [Token | Tokens ]);
892
- {Remaining , Extra } ->
898
+ tokenize (Remaining , NewLine , NewColumn , Scope , [Token | Tokens ]);
899
+ {Remaining , NewLine , NewColumn } ->
893
900
NewScope =
894
901
% % TODO: Remove these deprecations on Elixir v2.0
895
902
case Op of
@@ -910,7 +917,7 @@ handle_op(Rest, Line, Column, Kind, Length, Op, Scope, Tokens) ->
910
917
end ,
911
918
912
919
Token = {Kind , {Line , Column , previous_was_eol (Tokens )}, Op },
913
- tokenize (Remaining , Line , Column + Length + Extra , NewScope , add_token_with_eol (Token , Tokens ))
920
+ tokenize (Remaining , NewLine , NewColumn , NewScope , add_token_with_eol (Token , Tokens ))
914
921
end .
915
922
916
923
% ## Three Token Operators
@@ -996,12 +1003,21 @@ handle_call_identifier(Rest, Line, Column, DotInfo, Length, UnencodedOp, Scope,
996
1003
997
1004
% ## Ambiguous unary/binary operators tokens
998
1005
% Keywords are not ambiguous operators
999
- handle_space_sensitive_tokens ([Sign , $: , Space | _ ] = String , Line , Column , Scope , Tokens ) when ? dual_op (Sign ), ? is_space (Space ) ->
1006
+ handle_space_sensitive_tokens ([Sign , $: , Space | _ ] = String , Line , Column , Scope , Tokens ) when
1007
+ ? dual_op (Sign ), ? is_space (Space ) ->
1000
1008
tokenize (String , Line , Column , Scope , Tokens );
1001
1009
1002
1010
% But everything else, except other operators, are
1003
1011
handle_space_sensitive_tokens ([Sign , NotMarker | T ], Line , Column , Scope , [{identifier , _ , _ } = H | Tokens ]) when
1004
- ? dual_op (Sign ), not (? is_space (NotMarker )), NotMarker =/= Sign , NotMarker =/= $/ , NotMarker =/= $> ->
1012
+ ? dual_op (Sign ), not (? is_space (NotMarker )),
1013
+ % % Do not match ++ or --
1014
+ NotMarker =/= Sign ,
1015
+ % % Do not match +/2 or -/2
1016
+ NotMarker =/= $/ ,
1017
+ % % Do not match ->
1018
+ NotMarker =/= $> ,
1019
+ % % Do not match +\\n or -\\n (it should be treated as if a space is there)
1020
+ NotMarker =/= $\\ ->
1005
1021
Rest = [NotMarker | T ],
1006
1022
DualOpToken = {dual_op , {Line , Column , nil }, list_to_atom ([Sign ])},
1007
1023
tokenize (Rest , Line , Column + 1 , Scope , [DualOpToken , setelement (1 , H , op_identifier ) | Tokens ]);
@@ -1664,8 +1680,8 @@ tokenize_keyword(block, Rest, Line, Column, Atom, Length, Scope, Tokens) ->
1664
1680
1665
1681
tokenize_keyword (Kind , Rest , Line , Column , Atom , Length , Scope , Tokens ) ->
1666
1682
NewTokens =
1667
- case strip_horizontal_space (Rest , 0 ) of
1668
- {[$/ | _ ], _ } ->
1683
+ case strip_horizontal_space (Rest , Line , Column , Scope ) of
1684
+ {[$/ | _ ], _ , _ } ->
1669
1685
[{identifier , {Line , Column , nil }, Atom } | Tokens ];
1670
1686
1671
1687
_ ->
0 commit comments