From 22362e9192049e7cf245e44f196337144a120824 Mon Sep 17 00:00:00 2001 From: George Brocklehurst Date: Sun, 8 Jan 2017 10:14:23 -0500 Subject: [PATCH] Support for multi-line commands. Line breaks are supported in similar places to sh(1). Line breaks can be escaped anywhere by ending a line with a `\` character. Unescaped line breaks are also supported: - after logical operators (`&&` and `||`), - within strings, - between commands wrapped in parentheses, and - between commands in subshells. The Lexer has been expanded to insert a `MISSING` token in all situations where the input is known to be incomplete (i.e. when the input ends with an escape character, or the input ends in any of the places where an unescaped line break can be used). After invoking the `Lexer`, the `Interpreter` checks the token stream for `MISSING` tokens. If it finds any it requests another line of input from the current input strategy, appends it to the current input, and tries again. A new `EOL` token has been introduced to represent line breaks between commands. In the `Parser` it's treated exactly like the `SEMICOLON` token. Lexical analysis of comments needed to be improved to allow for comments at the end of lines in a multi-line command. For example, the following input is valid: (:echo 1 # comment :echo 2) It is semantically equivalent to: (:echo 1; :echo 2) To support this, the Lexer will now: - ignore whitespace before a comment's initial `#` character. This prevents extraneous `SPACE` tokens from being produced. A trailing `SPACE` token in a single line command is fine, but it can cause problems in a multi-line command. - pop the `:comment` state without consuming the newline character at the end of a comment, allowing the default parsing rules to handle the newline, and produce an `EOL` token. --- lib/gitsh/input_strategies/file.rb | 10 +- lib/gitsh/input_strategies/interactive.rb | 14 +++ lib/gitsh/interpreter.rb | 24 ++++- lib/gitsh/lexer.rb | 15 ++- lib/gitsh/parser.rb | 2 + man/man1/gitsh.1 | 4 + spec/integration/error_handling_spec.rb | 2 +- spec/integration/multi_line_input_spec.rb | 98 +++++++++++++++++++ spec/support/tokens.rb | 15 +++ spec/units/input_strategies/file_spec.rb | 34 ++++++- .../input_strategies/interactive_spec.rb | 22 ++++- spec/units/interpreter_spec.rb | 71 +++++++++++++- spec/units/lexer_spec.rb | 45 ++++++++- spec/units/parser_spec.rb | 17 ++-- 14 files changed, 347 insertions(+), 26 deletions(-) create mode 100644 spec/integration/multi_line_input_spec.rb create mode 100644 spec/support/tokens.rb diff --git a/lib/gitsh/input_strategies/file.rb b/lib/gitsh/input_strategies/file.rb index 82082ce9..245de6d6 100644 --- a/lib/gitsh/input_strategies/file.rb +++ b/lib/gitsh/input_strategies/file.rb @@ -25,11 +25,15 @@ def teardown end def read_command - file.readline + next_line rescue EOFError nil end + def read_continuation + next_line + end + def handle_parse_error(message) raise ParseError, message end @@ -45,6 +49,10 @@ def open_file ::File.open(path) end end + + def next_line + file.readline.chomp + end end end end diff --git a/lib/gitsh/input_strategies/interactive.rb b/lib/gitsh/input_strategies/interactive.rb index f651c712..648dcee2 100644 --- a/lib/gitsh/input_strategies/interactive.rb +++ b/lib/gitsh/input_strategies/interactive.rb @@ -13,6 +13,7 @@ module Gitsh module InputStrategies class Interactive BLANK_LINE_REGEX = /^\s*$/ + CONTINUATION_PROMPT = '> '.freeze def initialize(opts) @line_editor = opts.fetch(:line_editor) do @@ -48,6 +49,19 @@ def read_command retry end + def read_continuation + input = begin + line_editor.readline(CONTINUATION_PROMPT, true) + rescue Interrupt + nil + end + + if input.nil? + env.print "\n" + end + + input + end def handle_parse_error(message) env.puts_error("gitsh: #{message}") diff --git a/lib/gitsh/interpreter.rb b/lib/gitsh/interpreter.rb index bb81cac5..2ad81e21 100644 --- a/lib/gitsh/interpreter.rb +++ b/lib/gitsh/interpreter.rb @@ -1,4 +1,5 @@ require 'rltk' +require 'gitsh/commands/noop' require 'gitsh/error' require 'gitsh/lexer' require 'gitsh/parser' @@ -27,12 +28,31 @@ def run def execute(input) build_command(input).execute(env) - rescue RLTK::LexingError, RLTK::NotInLanguage, RLTK::BadToken + rescue RLTK::LexingError, RLTK::NotInLanguage, RLTK::BadToken, EOFError input_strategy.handle_parse_error('parse error') end def build_command(input) - parser.parse(lexer.lex(input)) + tokens = lexer.lex(input) + + if incomplete_command?(tokens) + continuation = input_strategy.read_continuation + build_multi_line_command(input, continuation) + else + parser.parse(tokens) + end + end + + def incomplete_command?(tokens) + tokens.reverse_each.detect { |token| token.type == :MISSING } + end + + def build_multi_line_command(previous_lines, new_line) + if new_line.nil? + Commands::Noop.new + else + build_command([previous_lines, new_line].join("\n")) + end end end end diff --git a/lib/gitsh/lexer.rb b/lib/gitsh/lexer.rb index d63fcce5..c2c75ef8 100644 --- a/lib/gitsh/lexer.rb +++ b/lib/gitsh/lexer.rb @@ -54,15 +54,18 @@ def initialize(*args) right_paren_stack.pop || :RIGHT_PAREN end - rule(/\s+/) { :SPACE } + rule(/[ \t\f]+/) { :SPACE } + rule(/\s+/) { :EOL } rule(/#{UNQUOTED_STRING_ESCAPABLES.to_negative_regexp}+/) { |t| [:WORD, t] } + rule(/\\[\r\n]/) { |_| } + rule(/\\\z/) { |_| [:MISSING, :continuation] } rule(/\\#{UNQUOTED_STRING_ESCAPABLES.to_regexp}/) { |t| [:WORD, t[1]] } rule(/\\/) { |t| [:WORD, t] } - rule(/#/) { push_state :comment } - rule(/.*/, :comment) {} - rule(/$/, :comment) { pop_state } + rule(/\s*#/) { push_state :comment } + rule(/(?=[\r\n])/, :comment) { pop_state } + rule(/.*/, :comment) rule(/''/) { [:WORD, ''] } rule(/'/) { push_state :hard_string } @@ -105,6 +108,10 @@ def self.lex(string, file_name = nil, env = self::Environment.new(@start_state)) tokens.insert(-2, RLTK::Token.new(:MISSING, ')')) end + if tokens.length > 1 && [:AND, :OR].include?(tokens[-2].type) + tokens.insert(-2, RLTK::Token.new(:MISSING, 'command')) + end + tokens end end diff --git a/lib/gitsh/parser.rb b/lib/gitsh/parser.rb index 7ffb4102..a059c486 100644 --- a/lib/gitsh/parser.rb +++ b/lib/gitsh/parser.rb @@ -19,6 +19,7 @@ class Parser < RLTK::Parser '!' => Gitsh::Commands::ShellCommand, }.freeze + left :EOL left :SEMICOLON left :OR left :AND @@ -31,6 +32,7 @@ class Parser < RLTK::Parser production(:commands) do clause('command') { |c| c } clause('LEFT_PAREN .commands RIGHT_PAREN') { |c| c } + clause('.commands EOL .commands') { |c1, c2| Commands::Tree::Multi.new(c1, c2) } clause('.commands SEMICOLON .commands') { |c1, c2| Commands::Tree::Multi.new(c1, c2) } clause('.commands OR .commands') { |c1, c2| Commands::Tree::Or.new(c1, c2) } clause('.commands AND .commands') { |c1, c2| Commands::Tree::And.new(c1, c2) } diff --git a/man/man1/gitsh.1 b/man/man1/gitsh.1 index a4ba8343..c4d2db81 100644 --- a/man/man1/gitsh.1 +++ b/man/man1/gitsh.1 @@ -333,6 +333,10 @@ string delimiter .Pf ( Ic ' Ns ) can be escaped. .Pp +Line-breaks can be escaped by ending a line with a +.Ic \e +character. This is useful for splitting long commands over multiple lines. +.Pp A literal .Ic \e character can always be produced by repeating it diff --git a/spec/integration/error_handling_spec.rb b/spec/integration/error_handling_spec.rb index 6c2dfb24..864ad516 100644 --- a/spec/integration/error_handling_spec.rb +++ b/spec/integration/error_handling_spec.rb @@ -11,7 +11,7 @@ it 'does not explode when given a badly formatted command' do GitshRunner.interactive do |gitsh| - gitsh.type('commit -m "Unclosed quote') + gitsh.type('add . && || commit') expect(gitsh).to output_error /gitsh: parse error/ end diff --git a/spec/integration/multi_line_input_spec.rb b/spec/integration/multi_line_input_spec.rb new file mode 100644 index 00000000..3c415325 --- /dev/null +++ b/spec/integration/multi_line_input_spec.rb @@ -0,0 +1,98 @@ +require 'spec_helper' + +describe 'Multi-line input' do + it 'supports escaped line breaks within commands' do + GitshRunner.interactive do |gitsh| + gitsh.type(':echo Hello \\') + + expect(gitsh).to output_no_errors + expect(gitsh).to prompt_with('> ') + + gitsh.type('world') + + expect(gitsh).to output_no_errors + expect(gitsh).to output(/Hello world/) + end + end + + it 'supports line breaks after logical operators' do + GitshRunner.interactive do |gitsh| + gitsh.type(':echo Hello &&') + + expect(gitsh).to output_no_errors + expect(gitsh).to prompt_with('> ') + + gitsh.type(':echo World') + + expect(gitsh).to output_no_errors + expect(gitsh).to output(/Hello\nWorld/) + end + end + + it 'supports line breaks within strings' do + GitshRunner.interactive do |gitsh| + gitsh.type(':echo "Hello, world') + + expect(gitsh).to output_no_errors + expect(gitsh).to prompt_with('> ') + + gitsh.type('') + gitsh.type('Goodbye, world"') + + expect(gitsh).to output(/\AHello, world\n\nGoodbye, world\Z/) + end + end + + it 'supports line breaks within parentheses' do + GitshRunner.interactive do |gitsh| + gitsh.type('(:echo 1') + + expect(gitsh).to output_no_errors + expect(gitsh).to prompt_with('> ') + + gitsh.type(':echo 2') + gitsh.type(':echo 3)') + + expect(gitsh).to output_no_errors + expect(gitsh).to output(/1\n2\n3/) + end + end + + it 'supports line breaks within subshells' do + GitshRunner.interactive do |gitsh| + gitsh.type(':echo $(') + gitsh.type(' :set greeting Hello') + gitsh.type(' :echo $greeting') + gitsh.type(')') + + expect(gitsh).to output_no_errors + expect(gitsh).to output(/Hello/) + end + end + + it 'supports comments in the middle of multi-line commands' do + GitshRunner.interactive do |gitsh| + gitsh.type('(:echo 1 # comment') + + expect(gitsh).to output_no_errors + expect(gitsh).to prompt_with('> ') + + gitsh.type(':echo 2') + gitsh.type('# another comment') + gitsh.type(')') + + expect(gitsh).to output_no_errors + expect(gitsh).to output(/1\n2/) + end + end + + it 'supports line breaks within strings in scripts' do + in_a_temporary_directory do + write_file('multiline.gitsh', ":echo 'foo\nbar'") + + expect("#{gitsh_path} multiline.gitsh"). + to execute.successfully. + with_output_matching(/foo\nbar/) + end + end +end diff --git a/spec/support/tokens.rb b/spec/support/tokens.rb new file mode 100644 index 00000000..0ae61e55 --- /dev/null +++ b/spec/support/tokens.rb @@ -0,0 +1,15 @@ +require 'rltk' + +module Tokens + def tokens(*tokens) + tokens.map.with_index do |token, i| + type, value = token + pos = RLTK::StreamPosition.new(i, 1, i, 10, nil) + RLTK::Token.new(type, value, pos) + end + end +end + +RSpec.configure do |config| + config.include Tokens +end diff --git a/spec/units/input_strategies/file_spec.rb b/spec/units/input_strategies/file_spec.rb index 5fa07249..f955448d 100644 --- a/spec/units/input_strategies/file_spec.rb +++ b/spec/units/input_strategies/file_spec.rb @@ -58,8 +58,8 @@ ) input_strategy.setup - expect(input_strategy.read_command).to eq "commit -m 'Changes'\n" - expect(input_strategy.read_command).to eq "push -f\n" + expect(input_strategy.read_command).to eq 'commit -m \'Changes\'' + expect(input_strategy.read_command).to eq 'push -f' expect(input_strategy.read_command).to be_nil end @@ -73,13 +73,39 @@ ) input_strategy.setup - expect(input_strategy.read_command).to eq "push\n" - expect(input_strategy.read_command).to eq "pull\n" + expect(input_strategy.read_command).to eq 'push' + expect(input_strategy.read_command).to eq 'pull' expect(input_strategy.read_command).to be_nil end end end + describe '#read_continuation' do + it 'returns the next line of the file' do + script = temp_file('script', "commit -m 'Changes'\npush -f") + input_strategy = described_class.new( + path: script.path, + ) + input_strategy.setup + input_strategy.read_command + + expect(input_strategy.read_continuation).to eq 'push -f' + end + + context 'with no lines left to return' do + it 'raises' do + script = temp_file('script', 'commit -m \'Changes\'') + input_strategy = described_class.new( + path: script.path, + ) + input_strategy.setup + input_strategy.read_command + + expect { input_strategy.read_continuation }.to raise_exception(EOFError) + end + end + end + describe '#handle_parse_error' do it 'raises' do input_strategy = described_class.new(path: double) diff --git a/spec/units/input_strategies/interactive_spec.rb b/spec/units/input_strategies/interactive_spec.rb index bd6abc3c..ff06cdce 100644 --- a/spec/units/input_strategies/interactive_spec.rb +++ b/spec/units/input_strategies/interactive_spec.rb @@ -70,7 +70,7 @@ expect(input_strategy.read_command).to eq 'my default command' end - it 'handles a SIGINT' do + it 'handles a SIGINT by retrying' do input_strategy = build_input_strategy line_editor_results = StubbedMethodResult.new. raises(Interrupt). @@ -107,6 +107,26 @@ end end + describe '#read_continuation' do + it 'returns the user input' do + input_strategy = build_input_strategy + allow(line_editor).to receive(:readline).and_return('user input') + input_strategy.setup + + expect(input_strategy.read_continuation).to eq 'user input' + expect(line_editor).to have_received(:readline). + with(described_class::CONTINUATION_PROMPT, true) + end + + it 'handles a SIGINT by returning nil' do + input_strategy = build_input_strategy + allow(line_editor).to receive(:readline).and_raise(Interrupt) + input_strategy.setup + + expect(input_strategy.read_continuation).to be_nil + end + end + describe '#handle_parse_error' do it 'outputs the error' do input_strategy = build_input_strategy diff --git a/spec/units/interpreter_spec.rb b/spec/units/interpreter_spec.rb index 64b1a3e1..fef4d191 100644 --- a/spec/units/interpreter_spec.rb +++ b/spec/units/interpreter_spec.rb @@ -8,8 +8,7 @@ env = double command = double(:command, execute: nil) parser = double(:parser, parse: command) - tokens = double(:tokens) - lexer = double('Lexer', lex: tokens) + lexer = double('Lexer', lex: tokens([:WORD, 'commit'])) input_strategy = double(:input_strategy, setup: nil, teardown: nil) allow(input_strategy).to receive(:read_command).and_return( 'first command', @@ -37,7 +36,7 @@ parser = double(:parser) allow(parser).to receive(:parse). and_raise(RLTK::NotInLanguage.new([], double(:token), [])) - lexer = double('Lexer', lex: double(:tokens)) + lexer = double('Lexer', lex: tokens([:WORD, 'commit'])) input_strategy = double( :input_strategy, setup: nil, @@ -59,5 +58,71 @@ expect(input_strategy). to have_received(:handle_parse_error).with('parse error') end + + it 'handles incomplete input by requesting a completion' do + env = double + command = double(:command, execute: nil) + parser = double(:parser, parse: command) + lexer = double('Lexer') + allow(lexer).to receive(:lex).with('(commit'). + and_return(tokens([:LEFT_PAREN], [:WORD, 'commit'], [:MISSING, ')'])) + allow(lexer).to receive(:lex).with("(commit\n)"). + and_return(tokens([:LEFT_PAREN], [:WORD, 'commit'], [:RIGHT_PAREN])) + input_strategy = double( + :input_strategy, + setup: nil, + teardown: nil, + read_continuation: ')', + ) + allow(input_strategy).to receive(:read_command).and_return( + '(commit', + nil, + ) + interpreter = described_class.new( + env: env, + parser: parser, + input_strategy: input_strategy, + lexer: lexer, + ) + + interpreter.run + + expect(lexer).to have_received(:lex). + with('(commit').ordered + expect(lexer).to have_received(:lex). + with("(commit\n)").ordered + expect(parser).to have_received(:parse).once + expect(command).to have_received(:execute) + end + + it 'drops the command if the completion is nil' do + env = double + parser = double(:parser, parse: nil) + lexer = double( + 'Lexer', + lex: tokens([:LEFT_PAREN], [:WORD, 'commit'], [:MISSING, ')']), + ) + input_strategy = double( + :input_strategy, + setup: nil, + teardown: nil, + read_continuation: nil, + ) + allow(input_strategy).to receive(:read_command).and_return( + 'first line', + nil, + ) + interpreter = described_class.new( + env: env, + parser: parser, + input_strategy: input_strategy, + lexer: lexer, + ) + + interpreter.run + + expect(lexer).to have_received(:lex).once + expect(parser).not_to have_received(:parse) + end end end diff --git a/spec/units/lexer_spec.rb b/spec/units/lexer_spec.rb index 81ea1a6b..4979583e 100644 --- a/spec/units/lexer_spec.rb +++ b/spec/units/lexer_spec.rb @@ -39,6 +39,13 @@ to produce_tokens ['WORD(foo)', 'OR', 'WORD(bar)', 'EOS'] end + it 'recognises newlines' do + expect("foo\nbar"). + to produce_tokens ['WORD(foo)', 'EOL', 'WORD(bar)', 'EOS'] + expect("foo \n\t\fbar"). + to produce_tokens ['WORD(foo)', 'EOL', 'WORD(bar)', 'EOS'] + end + it 'recognises parentheses' do expect('(foo)'). to produce_tokens ['LEFT_PAREN', 'WORD(foo)', 'RIGHT_PAREN', 'EOS'] @@ -46,13 +53,20 @@ to produce_tokens ['LEFT_PAREN', 'WORD(foo)', 'RIGHT_PAREN', 'EOS'] end - [' ', "\t", "\r", "\n", "\f", '\'', '"', '\\', '$', '#', ';', '&', '|', '(', ')'].each do |char| + [' ', "\t", "\f", '\'', '"', '\\', '$', '#', ';', '&', '|', '(', ')'].each do |char| it "recognises unquoted words containing an escaped #{char.inspect}" do expect("foo\\#{char}bar"). to produce_tokens ['WORD(foo)', "WORD(#{char})", 'WORD(bar)', 'EOS'] end end + ["\r", "\n"].each do |char| + it "ignored escaped line breaks using #{char.inspect}" do + expect("foo\\#{char}bar"). + to produce_tokens ['WORD(foo)', 'WORD(bar)', 'EOS'] + end + end + it 'does not treat all \ characters in unquoted words as escapes' do expect('\\a').to produce_tokens ['WORD(\\)', 'WORD(a)', 'EOS'] end @@ -169,9 +183,36 @@ ] end + it 'adds an error token for trailing logical operators' do + expect(':echo first &&').to produce_tokens [ + 'WORD(:echo)', 'SPACE', 'WORD(first)', 'AND', 'MISSING(command)', 'EOS' + ] + expect(':echo first ||').to produce_tokens [ + 'WORD(:echo)', 'SPACE', 'WORD(first)', 'OR', 'MISSING(command)', 'EOS' + ] + end + + it 'adds an error token for a trailing escape character' do + expect('foo\\'). + to produce_tokens ['WORD(foo)', 'MISSING(continuation)', 'EOS'] + expect('foo\\\\'). + to produce_tokens ['WORD(foo)', 'WORD(\\)', 'EOS'] + expect('foo\\\\\\'). + to produce_tokens ['WORD(foo)', 'WORD(\\)', 'MISSING(continuation)', 'EOS'] + end + it 'ignores comments' do expect('# all one big comment').to produce_tokens ['EOS'] - expect('pre #post').to produce_tokens ['WORD(pre)', 'SPACE', 'EOS'] + expect('pre #post').to produce_tokens ['WORD(pre)', 'EOS'] + end + + it 'ignores comments in multi-line input' do + expect("(:echo 1 #comment\n:echo 2)").to produce_tokens [ + 'LEFT_PAREN', + 'WORD(:echo)', 'SPACE', 'WORD(1)', 'EOL', + 'WORD(:echo)', 'SPACE', 'WORD(2)', + 'RIGHT_PAREN', 'EOS', + ] end end end diff --git a/spec/units/parser_spec.rb b/spec/units/parser_spec.rb index ea9d9a9c..abade65c 100644 --- a/spec/units/parser_spec.rb +++ b/spec/units/parser_spec.rb @@ -169,6 +169,15 @@ expect(result).to be_a(Gitsh::Commands::Tree::Multi) end + it 'parses two commands combined with newlines' do + result = parse(tokens( + [:WORD, 'add'], [:SPACE], [:WORD, '.'], + [:EOL], [:WORD, 'commit'], [:EOS], + )) + + expect(result).to be_a(Gitsh::Commands::Tree::Multi) + end + it 'parses a command with a trailing semicolon' do command = stub_command_factory @@ -199,14 +208,6 @@ def parse(tokens) described_class.new.parse(tokens) end - def tokens(*tokens) - tokens.map.with_index do |token, i| - type, value = token - pos = RLTK::StreamPosition.new(i, 1, i, 10, nil) - RLTK::Token.new(type, value, pos) - end - end - def stub_command_factory command = double(:command) allow(Gitsh::Commands::Factory).to receive(:build).and_return(command)