Skip to content

Commit

Permalink
kernel: Support sending byte stream on standard in
Browse files Browse the repository at this point in the history
As group now acts as the proxy when running "oldshell" or
"noshell" it needs to be able to read and write raw binaries.
Latin1 encoding allows all possible bytes, so by fixing latin1
we allow any bytes to be passed into and out of Erlang unmodified.

fixes erlang#7230
  • Loading branch information
frazze-jobb authored and garazdawi committed Jun 16, 2023
1 parent 538bac4 commit ba07147
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 47 deletions.
20 changes: 19 additions & 1 deletion lib/kernel/doc/src/kernel_app.xml
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,25 @@ MaxT = NetTickTime + NetTickTime / NetTickIntensity</code>
</p>

</item>
</taglist>
<tag><marker id="standard_io_encoding"/><c>standard_io_encoding = Encoding</c></tag>
<item>
<p>Set whether bytes sent or received via standard_io should be interpreted as unicode or latin1.
By default input and output is interpreted as Unicode if it is supported on the host. With this flag
you may configure the encoding on startup.</p>
<p>This works similarly to <seemfa marker="stdlib:io#setopts/2"><c>io:setopts(standard_io, {encoding, Encoding})</c></seemfa>
but is applied before any bytes on standard_io may have been read.</p>
<p>Encoding is one of:</p>
<taglist>
<tag><c>unicode</c></tag>
<item><p>Configure standard_io to use unicode mode.</p></item>
<tag><c>latin1</c></tag>
<item><p>Configure standard_io to use latin1 mode.</p></item>
<tag><c>_</c></tag>
<item><p>Anything other than unicode or latin1 will be ignored and the system will
configure the encoding by itself, typically unicode on modern systems.</p></item>
</taglist>
</item>
</taglist>
</section>

<section>
Expand Down
70 changes: 36 additions & 34 deletions lib/kernel/src/group.erl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
-module(group).

%% A group leader process for user io.
%% This process receives input data from user_drv in this format
%% {Drv,{data,unicode:charlist()}}
%% It then keeps that data as unicode in its state and converts it
%% to latin1/unicode on a per request basis. If any data is left after
%% a request, that data is again kept as unicode.

-export([start/2, start/3, whereis_shell/0, server/4]).

Expand Down Expand Up @@ -108,6 +113,9 @@ start_shell1(Fun) ->
exit(Error) % let the group process crash
end.

-spec server_loop(UserDrv :: pid(), Shell:: pid(),
Buffer :: unicode:chardata()) ->
no_return().
server_loop(Drv, Shell, Buf0) ->
receive
{io_request,From,ReplyAs,Req} when is_pid(From) ->
Expand Down Expand Up @@ -499,14 +507,12 @@ get_chars_loop(Pbs, M, F, Xa, Drv, Shell, Buf0, State, LineCont0, Encoding) ->
false ->
%% get_line_echo_off only deals with lists,
%% so convert to list before calling it.
get_line_echo_off(
if Buf0 =:= eof -> eof;
true -> unicode:characters_to_list(Buf0, Encoding)
end, Pbs, Drv, Shell)
get_line_echo_off(cast(Buf0, list, Encoding), Pbs, Drv, Shell)
end,
case Result of
{done,LineCont1,Buf} ->
get_chars_apply(Pbs, M, F, Xa, Drv, Shell, Buf, State, LineCont1, Encoding);
get_chars_apply(Pbs, M, F, Xa, Drv, Shell, append(Buf, [], Encoding),
State, LineCont1, Encoding);

interrupted ->
{error,{error,interrupted},[]};
Expand Down Expand Up @@ -541,7 +547,7 @@ get_chars_apply(Pbs, M, F, Xa, Drv, Shell, Buf, State0, LineCont, Encoding) ->
get_chars_n_loop(Pbs, M, F, Xa, Drv, Shell, Buf0, State, Encoding) ->
try M:F(State, cast(Buf0, get(read_mode), Encoding), Encoding, Xa) of
{stop,Result,Rest} ->
{ok, Result, Rest};
{ok, Result, append(Rest,[],Encoding)};
State1 ->
case get_chars_echo_off(Pbs, Drv, Shell) of
interrupted ->
Expand Down Expand Up @@ -605,13 +611,12 @@ get_line1({undefined,{_A,Mode,Char},Cs,Cont,Rs}, Drv, Shell, Ls0, Encoding)
send_drv_reqs(Drv, edlin:erase_line()),
{more_chars,Ncont,Nrs} = edlin:start(edlin:prompt(Cont)),
send_drv_reqs(Drv, Nrs),
get_line1(edlin:edit_line1(string:to_graphemes(lists:sublist(Lcs,
1,
length(Lcs)-1)),
Ncont),
Drv,
Shell,
Ls, Encoding)
get_line1(
edlin:edit_line1(
string:to_graphemes(
lists:sublist(Lcs, 1, length(Lcs)-1)),
Ncont),
Drv, Shell, Ls, Encoding)
end;
get_line1({undefined,{_A,Mode,Char},Cs,Cont,Rs}, Drv, Shell, Ls0, Encoding)
when Mode =:= none, Char =:= $\^N;
Expand Down Expand Up @@ -801,7 +806,8 @@ more_data(What, Cont0, Drv, Shell, Ls, Encoding) ->
send_drv_reqs(Drv, edlin:redraw_line(Cont0)),
more_data(What, Cont0, Drv, Shell, Ls, Encoding);
{Drv,{data,Cs}} ->
get_line1(edlin:edit_line(Cs, Cont0), Drv, Shell, Ls, Encoding);
get_line1(edlin:edit_line(cast(Cs, list), Cont0),
Drv, Shell, Ls, Encoding);
{Drv,eof} ->
get_line1(edlin:edit_line(eof, Cont0), Drv, Shell, Ls, Encoding);
{io_request,From,ReplyAs,Req} when is_pid(From) ->
Expand Down Expand Up @@ -832,7 +838,7 @@ get_line_echo_off(Chars, Pbs, Drv, Shell) ->
get_line_echo_off1({Chars,[]}, Drv, Shell) ->
receive
{Drv,{data,Cs}} ->
get_line_echo_off1(edit_line(Cs, Chars), Drv, Shell);
get_line_echo_off1(edit_line(cast(Cs, list), Chars), Drv, Shell);
{Drv,eof} ->
get_line_echo_off1(edit_line(eof, Chars), Drv, Shell);
{io_request,From,ReplyAs,Req} when is_pid(From) ->
Expand Down Expand Up @@ -861,7 +867,7 @@ get_chars_echo_off(Pbs, Drv, Shell) ->
get_chars_echo_off1(Drv, Shell) ->
receive
{Drv, {data, Cs}} ->
Cs;
cast(Cs, list);
{Drv, eof} ->
eof;
{io_request,From,ReplyAs,Req} when is_pid(From) ->
Expand Down Expand Up @@ -1009,7 +1015,7 @@ get_password_line(Chars, Drv, Shell) ->
get_password1({Chars,[]}, Drv, Shell) ->
receive
{Drv,{data,Cs}} ->
get_password1(edit_password(Cs,Chars),Drv,Shell);
get_password1(edit_password(Cs,cast(Chars,list)),Drv,Shell);
{io_request,From,ReplyAs,Req} when is_pid(From) ->
io_request(Req, From, ReplyAs, Drv, Shell, []), %WRONG!!!
%% I guess the reason the above line is wrong is that Buf is
Expand Down Expand Up @@ -1049,20 +1055,16 @@ edit_password([Char|Cs],Chars) ->
prompt_bytes(Prompt, Encoding) ->
lists:flatten(io_lib:format_prompt(Prompt, Encoding)).

cast(L, binary,latin1) when is_list(L) ->
list_to_binary(L);
cast(L, list, latin1) when is_list(L) ->
binary_to_list(list_to_binary(L)); %% Exception if not bytes
cast(L, binary,unicode) when is_list(L) ->
unicode:characters_to_binary(L,utf8);
cast(Other, _, _) ->
Other.

append(B, L, latin1) when is_binary(B) ->
binary_to_list(B)++L;
append(B, L, unicode) when is_binary(B) ->
unicode:characters_to_list(B,utf8)++L;
append(L1, L2, _) when is_list(L1) ->
L1++L2;
append(_Eof, L, _) ->
L.
cast(Buf, Type) ->
cast(Buf, Type, utf8).
cast(eof, _, _) ->
eof;
cast(L, binary, ToEnc) ->
unicode:characters_to_binary(L, utf8, ToEnc);
cast(L, list, _ToEnc) ->
unicode:characters_to_list(L, utf8).

append(eof, L, _) ->
L;
append(B, L, FromEnc) ->
unicode:characters_to_list(B, FromEnc) ++ L.
23 changes: 17 additions & 6 deletions lib/kernel/src/prim_tty.erl
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ init(UserOptions) when is_map(UserOptions) ->
{ok, TTY} = tty_create(),

%% Initialize the locale to see if we support utf-8 or not
UnicodeMode =
UnicodeSupported =
case setlocale(TTY) of
primitive ->
lists:any(
Expand All @@ -228,6 +228,11 @@ init(UserOptions) when is_map(UserOptions) ->
UnicodeLocale when is_boolean(UnicodeLocale) ->
UnicodeLocale
end,
IOEncoding = application:get_env(kernel, standard_io_encoding, default),
UnicodeMode = if IOEncoding =:= latin1 -> false;
IOEncoding =:= unicode -> true;
true -> UnicodeSupported
end,
{ok, ANSI_RE_MP} = re:compile(?ANSI_REGEXP, [unicode]),
init_term(#state{ tty = TTY, unicode = UnicodeMode, options = Options, ansi_regexp = ANSI_RE_MP }).
init_term(State = #state{ tty = TTY, options = Options }) ->
Expand All @@ -252,7 +257,12 @@ init_term(State = #state{ tty = TTY, options = Options }) ->
ReaderState =
case {maps:get(input, Options), TTYState#state.reader} of
{true, undefined} ->
{ok, Reader} = proc_lib:start_link(?MODULE, reader, [[State#state.tty, self()]]),
DefaultReaderEncoding = if State#state.unicode -> utf8;
not State#state.unicode -> latin1
end,
{ok, Reader} = proc_lib:start_link(
?MODULE, reader,
[[State#state.tty, DefaultReaderEncoding, self()]]),
WriterState#state{ reader = Reader };
{true, _} ->
WriterState;
Expand Down Expand Up @@ -421,22 +431,23 @@ call(Pid, Msg) ->
{error, Reason}
end.

reader([TTY, Parent]) ->
reader([TTY, Encoding, Parent]) ->
register(user_drv_reader, self()),
ReaderRef = make_ref(),
SignalRef = make_ref(),

ok = tty_select(TTY, SignalRef, ReaderRef),
proc_lib:init_ack({ok, {self(), ReaderRef}}),
FromEnc = case os:type() of
{unix, _} -> utf8;
{unix, _} -> Encoding;
{win32, _} ->
case isatty(stdin) of
true ->
{utf16, little};
_ ->
%% When not reading from a console
%% the data read is utf8 encoded
utf8
Encoding
end
end,
reader_loop(TTY, Parent, SignalRef, ReaderRef, FromEnc, <<>>).
Expand Down Expand Up @@ -486,7 +497,7 @@ reader_loop(TTY, Parent, SignalRef, ReaderRef, FromEnc, Acc) ->
Alias ! {Alias, true}
end,
receive
{Parent, set_unicode_state, true} -> ok
{Parent, set_unicode_state, _} -> ok
end,
Latin1Chars = unicode:characters_to_binary(Error, latin1, utf8),
{<<B/binary,Latin1Chars/binary>>, <<>>, latin1};
Expand Down
5 changes: 2 additions & 3 deletions lib/kernel/src/user_drv.erl
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,7 @@ server({call, From}, {start_shell, _Args}, _State) ->
keep_state_and_data;
server(info, {ReadHandle,{data,UTF8Binary}}, State = #state{ read = ReadHandle })
when State#state.current_group =:= State#state.user ->
State#state.current_group !
{self(), {data, unicode:characters_to_list(UTF8Binary, utf8)}},
State#state.current_group ! {self(), {data,UTF8Binary}},
keep_state_and_data;
server(info, {ReadHandle,{data,UTF8Binary}}, State = #state{ read = ReadHandle }) ->
case contains_ctrl_g_or_ctrl_c(UTF8Binary) of
Expand All @@ -435,7 +434,7 @@ server(info, {ReadHandle,{data,UTF8Binary}}, State = #state{ read = ReadHandle }
keep_state_and_data;
none ->
State#state.current_group !
{self(), {data, unicode:characters_to_list(UTF8Binary, utf8)}},
{self(), {data, UTF8Binary}},
keep_state_and_data
end;
server(info, {ReadHandle,eof}, State = #state{ read = ReadHandle }) ->
Expand Down
47 changes: 44 additions & 3 deletions lib/stdlib/test/io_proto_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
unicode_prompt/1, shell_slogan/1, raw_stdout/1, raw_stdout_isatty/1,
file_read_stdin_binary_mode/1, file_read_stdin_list_mode/1,
io_get_chars_stdin_binary_mode/1, io_get_chars_stdin_list_mode/1,
io_get_chars_file_read_stdin_binary_mode/1
io_get_chars_file_read_stdin_binary_mode/1,
file_read_stdin_latin1_mode/1
]).


Expand Down Expand Up @@ -62,7 +63,8 @@ all() ->
file_read_stdin_list_mode,
io_get_chars_stdin_binary_mode,
io_get_chars_stdin_list_mode,
io_get_chars_file_read_stdin_binary_mode
io_get_chars_file_read_stdin_binary_mode,
file_read_stdin_latin1_mode
].

groups() ->
Expand Down Expand Up @@ -361,12 +363,47 @@ io_get_chars_file_read_stdin_binary_mode(_Config) ->

ok.

%% Test that reading from stdin using file:read_line works when io is not utf8
file_read_stdin_latin1_mode(_Config) ->
{ok, P, ErlPort} = start_stdin_node(
fun() -> file:read_line(standard_io) end,
[binary],
"-kernel standard_io_encoding latin1"),

%% Invalid utf8
erlang:port_command(ErlPort, <<192,128,10,192,128,10,192,128,10>>),

{ok, "got: <<192,128,10>>\n"} = gen_tcp:recv(P, 0, 5000),
{ok, "got: <<192,128,10>>\n"} = gen_tcp:recv(P, 0, 5000),
{ok, "got: <<192,128,10>>\n"} = gen_tcp:recv(P, 0, 5000),
ErlPort ! {self(), close},
{ok, "got: eof"} = gen_tcp:recv(P, 0, 5000),

{ok, P2, ErlPort2} = start_stdin_node(
fun() -> file:read(standard_io, 5) end,
[binary],
"-kernel standard_io_encoding latin1"),

%% Valid utf8
erlang:port_command(ErlPort2, <<"duπaduπaduπa"/utf8>>),

{ok, "got: <<100,117,207,128,97>>\n"} = gen_tcp:recv(P2, 0, 5000),
{ok, "got: <<100,117,207,128,97>>\n"} = gen_tcp:recv(P2, 0, 5000),
{ok, "got: <<100,117,207,128,97>>\n"} = gen_tcp:recv(P2, 0, 5000),
ErlPort2 ! {self(), close},
{ok, "got: eof"} = gen_tcp:recv(P2, 0, 5000),

ok.

start_stdin_node(ReadFun, IoOptions) ->
start_stdin_node(ReadFun, IoOptions, "").
start_stdin_node(ReadFun, IoOptions, ExtraArgs) ->
{ok, L} = gen_tcp:listen(0,[{active, false},{packet,4}]),
{ok, Port} = inet:port(L),
Cmd = lists:append(
[ct:get_progname(),
" -noshell",
" -noshell ",
ExtraArgs,
" -pa ", filename:dirname(code:which(?MODULE)),
" -s ", atom_to_list(?MODULE), " read_raw_from_stdin ", integer_to_list(Port)]),
ct:log("~p~n", [Cmd]),
Expand All @@ -383,6 +420,10 @@ read_raw_from_stdin([Port]) ->
{ok, OptionsBin} = gen_tcp:recv(P, 0),
io:setopts(standard_io, binary_to_term(OptionsBin)),
{ok, ReadFunBin} = gen_tcp:recv(P, 0),
spawn(fun() ->
gen_tcp:recv(P, 0),
init:stop("crash")
end),
read_raw_from_stdin(binary_to_term(ReadFunBin), P)
catch E:R:ST ->
io:format(standard_error, "~p ~p",[Port,{E,R,ST}])
Expand Down

0 comments on commit ba07147

Please sign in to comment.