Skip to content

Commit 54321de

Browse files
committed
Update bidi/line break character checks according to UX#55
1 parent 6ee313a commit 54321de

File tree

4 files changed

+51
-11
lines changed

4 files changed

+51
-11
lines changed

lib/elixir/src/elixir_interpolation.erl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,12 @@ extract(Rest, Buffer, Output, Line, Column, Scope, Interpol, Last) ->
8989

9090
extract_char(Rest, Buffer, Output, Line, Column, Scope, Interpol, Last) ->
9191
case unicode_util:gc(Rest) of
92-
[Char | _] when ?bidi(Char) ->
92+
[Char | _] when ?bidi(Char); ?break(Char) ->
9393
Token = io_lib:format("\\u~4.16.0B", [Char]),
94-
Pre = "invalid bidirectional formatting character in string: ",
94+
Pre = if
95+
?bidi(Char) -> "invalid bidirectional formatting character in string: ";
96+
true -> "invalid line break character in string: "
97+
end,
9598
Pos = io_lib:format(". If you want to use such character, use it in its escaped ~ts form instead", [Token]),
9699
{error, {?LOC(Line, Column), {Pre, Pos}, Token}};
97100

lib/elixir/src/elixir_tokenizer.erl

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,8 @@ tokenize([$0, $o, H | T], Line, Column, Scope, Tokens) when ?is_octal(H) ->
198198

199199
tokenize([$# | String], Line, Column, Scope, Tokens) ->
200200
case tokenize_comment(String, [$#]) of
201-
{error, Char} ->
202-
error_comment(Char, [$# | String], Line, Column, Scope, Tokens);
201+
{error, Char, Reason} ->
202+
error_comment(Char, Reason, [$# | String], Line, Column, Scope, Tokens);
203203
{Rest, Comment} ->
204204
preserve_comments(Line, Column, Tokens, Comment, Rest, Scope),
205205
tokenize(Rest, Line, Column, Scope, reset_eol(Tokens))
@@ -748,8 +748,8 @@ tokenize_dot(T, Line, Column, DotInfo, Scope, Tokens) ->
748748
case strip_horizontal_space(T, 0) of
749749
{[$# | R], _} ->
750750
case tokenize_comment(R, [$#]) of
751-
{error, Char} ->
752-
error_comment(Char, [$# | R], Line, Column, Scope, Tokens);
751+
{error, Char, Reason} ->
752+
error_comment(Char, Reason, [$# | R], Line, Column, Scope, Tokens);
753753

754754
{Rest, Comment} ->
755755
preserve_comments(Line, Column, Tokens, Comment, Rest, Scope),
@@ -1315,16 +1315,17 @@ tokenize_comment("\r\n" ++ _ = Rest, Acc) ->
13151315
tokenize_comment("\n" ++ _ = Rest, Acc) ->
13161316
{Rest, lists:reverse(Acc)};
13171317
tokenize_comment([H | _Rest], _) when ?bidi(H) ->
1318-
{error, H};
1318+
{error, H, "invalid bidirectional formatting character in comment: "};
1319+
tokenize_comment([H | _Rest], _) when ?break(H) ->
1320+
{error, H, "invalid line break character in comment: "};
13191321
tokenize_comment([H | Rest], Acc) ->
13201322
tokenize_comment(Rest, [H | Acc]);
13211323
tokenize_comment([], Acc) ->
13221324
{[], lists:reverse(Acc)}.
13231325

1324-
error_comment(H, Comment, Line, Column, Scope, Tokens) ->
1325-
Token = io_lib:format("\\u~4.16.0B", [H]),
1326-
Reason = {?LOC(Line, Column), "invalid bidirectional formatting character in comment: ", Token},
1327-
error(Reason, Comment, Scope, Tokens).
1326+
error_comment(Char, Reason, Comment, Line, Column, Scope, Tokens) ->
1327+
Token = io_lib:format("\\u~4.16.0B", [Char]),
1328+
error({?LOC(Line, Column), Reason, Token}, Comment, Scope, Tokens).
13281329

13291330
preserve_comments(Line, Column, Tokens, Comment, Rest, Scope) ->
13301331
case Scope#elixir_tokenizer.preserve_comments of

lib/elixir/src/elixir_tokenizer.hrl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,11 @@
3333
C =:= 16#2068;
3434
C =:= 16#202C;
3535
C =:= 16#2069).
36+
37+
%% Unsupported newlines
38+
%% https://www.unicode.org/reports/tr55/
39+
-define(break(C), C =:= 16#000B;
40+
C =:= 16#000C;
41+
C =:= 16#0085;
42+
C =:= 16#2028;
43+
C =:= 16#2029).

lib/elixir/test/elixir/kernel/parser_test.exs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,34 @@ defmodule Kernel.ParserTest do
982982
)
983983
end
984984

985+
test "invalid newline in source" do
986+
assert_syntax_error(
987+
["nofile:1:1:", ~s/invalid line break character in comment: \\u2028/],
988+
~c"# This is a \u2028"
989+
)
990+
991+
assert_syntax_error(
992+
["nofile:1:5:", "invalid line break character in comment: \\u2028"],
993+
~c"foo. # This is a \u2028"
994+
)
995+
996+
assert_syntax_error(
997+
[
998+
"nofile:1:12:",
999+
"invalid line break character in string: \\u2028. If you want to use such character, use it in its escaped \\u2028 form instead"
1000+
],
1001+
~c"\"this is a \u2028\""
1002+
)
1003+
1004+
assert_syntax_error(
1005+
[
1006+
"nofile:1:13:",
1007+
"invalid line break character in string: \\u2028. If you want to use such character, use it in its escaped \\u2028 form instead"
1008+
],
1009+
~c"\"this is a \\\u2028\""
1010+
)
1011+
end
1012+
9851013
test "reserved tokens" do
9861014
assert_syntax_error(["nofile:1:1:", "reserved token: __aliases__"], ~c"__aliases__")
9871015
assert_syntax_error(["nofile:1:1:", "reserved token: __block__"], ~c"__block__")

0 commit comments

Comments
 (0)