From d3d0ebf17a94ff43bffcc8e4d4c3c127925ab597 Mon Sep 17 00:00:00 2001 From: panage Date: Thu, 23 Jun 2016 21:53:06 +0900 Subject: [PATCH 1/9] Add test code to check from_encoding param Add configuration check new param "from_encoding", and test convert from input source is "Hello world" in japanese Hiragana that is encoded in cp932 to UTF-8. --- test/plugin/test_in_tail.rb | 58 +++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/test/plugin/test_in_tail.rb b/test/plugin/test_in_tail.rb index 385ead6f16..191e64aa59 100644 --- a/test/plugin/test_in_tail.rb +++ b/test/plugin/test_in_tail.rb @@ -71,6 +71,17 @@ def test_configure_encoding end end + def test_configure_from_encoding + # valid from_encoding + d = create_driver(SINGLE_LINE_CONFIG + 'from_encoding utf-8') + assert_equal Encoding::UTF_8, d.instance.from_encoding + + # invalid from_encoding + assert_raise(Fluent::ConfigError) do + create_driver(SINGLE_LINE_CONFIG + 'from_encoding no-such-encoding') + end + end + # TODO: Should using more better approach instead of sleep wait def test_emit @@ -403,6 +414,28 @@ def test_encoding(data) assert_equal(encoding, emits[0][2]['message'].encoding) end + def test_from_encoding + d = create_driver %[ + format /(?.*)/ + read_from_head true + from_encoding cp932 + encoding utf-8 + ] + + d.run do + sleep 1 + + File.open("#{TMP_DIR}/tail.txt", "w:cp932") {|f| + f.puts "\x82\xCD\x82\xEB\x81\x5B\x82\xED\x81\x5B\x82\xE9\x82\xC7".force_encoding(Encoding::CP932) + } + sleep 1 + end + + emits = d.emits + assert_equal("\x82\xCD\x82\xEB\x81\x5B\x82\xED\x81\x5B\x82\xE9\x82\xC7".force_encoding(Encoding::CP932).encode(Encoding::UTF_8), emits[0][2]['message']) + assert_equal(Encoding::UTF_8, emits[0][2]['message'].encoding) + end + # multiline mode test def test_multiline @@ -507,6 +540,31 @@ def test_multiline_encoding_of_flushed_record(data) end end + def test_multiline_from_encoding_of_flushed_record + d = create_driver %[ + format multiline + format1 /^s (?[^\\n]+)(\\nf (?[^\\n]+))?(\\nf (?.*))?/ + format_firstline /^[s]/ + multiline_flush_interval 2s + read_from_head true + from_encoding cp932 + encoding utf-8 + ] + + d.run do + sleep 1 + File.open("#{TMP_DIR}/tail.txt", "w:cp932") { |f| + f.puts "s \x82\xCD\x82\xEB\x81\x5B\x82\xED\x81\x5B\x82\xE9\x82\xC7".force_encoding(Encoding::CP932) + } + + sleep 4 + emits = d.emits + assert_equal(1, emits.length) + assert_equal("\x82\xCD\x82\xEB\x81\x5B\x82\xED\x81\x5B\x82\xE9\x82\xC7".force_encoding(Encoding::CP932).encode(Encoding::UTF_8), emits[0][2]['message1']) + assert_equal(Encoding::UTF_8, emits[0][2]['message1'].encoding) + end + end + def test_multiline_with_multiple_formats File.open("#{TMP_DIR}/tail.txt", "wb") { |f| } From 94582d450d9b5a3746bcdfe3d9e694c2a221afff Mon Sep 17 00:00:00 2001 From: panage Date: Thu, 23 Jun 2016 21:54:49 +0900 Subject: [PATCH 2/9] Add from_encoding param to in_tail plugin Add new param "from_encoding". If "encoding" param is only specified, process is same way as ever to keep backword compatibility. If two params, "encoding" and "from_encoding" are specified, process uses ```String.encode!(to, from)```. --- lib/fluent/plugin/in_tail.rb | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/lib/fluent/plugin/in_tail.rb b/lib/fluent/plugin/in_tail.rb index ef920c081d..8e8a3bdcb9 100644 --- a/lib/fluent/plugin/in_tail.rb +++ b/lib/fluent/plugin/in_tail.rb @@ -64,14 +64,10 @@ def initialize config_param :multiline_flush_interval, :time, default: nil desc 'Enable the additional watch timer.' config_param :enable_watch_timer, :bool, default: true + desc 'The encoding after conversion of the input.' + config_param :encoding, default: nil desc 'The encoding of the input.' - config_param :encoding, default: nil do |encoding_name| - begin - Encoding.find(encoding_name) - rescue ArgumentError => e - raise ConfigError, e.message - end - end + config_param :from_encoding, default: nil desc 'Add the log path being tailed to records. Specify the field name to be used.' config_param :path_key, :string, default: nil @@ -92,6 +88,8 @@ def configure(conf) configure_parser(conf) configure_tag + @encoding = parse_encoding_param(conf['encoding']) + @from_encoding = parse_encoding_param(conf['from_encoding']) @multiline_mode = conf['format'] =~ /multiline/ @receive_handler = if @multiline_mode @@ -117,6 +115,14 @@ def configure_tag end end + def parse_encoding_param(encoding_name) + begin + Encoding.find(encoding_name) if encoding_name + rescue ArgumentError => e + raise ConfigError, e.message + end + end + def start super @@ -251,7 +257,11 @@ def close_watcher_after_rotate_wait(tw) def flush_buffer(tw) if lb = tw.line_buffer lb.chomp! - lb.force_encoding(@encoding) if @encoding + if @encoding && @from_encoding + lb.encode!(@encoding, @from_encoding) + elsif @encoding + lb.force_encoding(@encoding) + end @parser.parse(lb) { |time, record| if time && record tag = if @tag_prefix || @tag_suffix @@ -300,7 +310,11 @@ def receive_lines(lines, tail_watcher) def convert_line_to_event(line, es, tail_watcher) begin line.chomp! # remove \n - line.force_encoding(@encoding) if @encoding + if @encoding && @from_encoding + line.encode!(@encoding, @from_encoding) + elsif @encoding + line.force_encoding(@encoding) + end @parser.parse(line) { |time, record| if time && record record[@path_key] ||= tail_watcher.path unless @path_key.nil? From 6da73b9347a6c2062544650eca6911ea5bcf9a7b Mon Sep 17 00:00:00 2001 From: panage Date: Tue, 19 Jul 2016 23:23:28 +0000 Subject: [PATCH 3/9] Specify type of encoding and from_encoding --- lib/fluent/plugin/in_tail.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/fluent/plugin/in_tail.rb b/lib/fluent/plugin/in_tail.rb index 8e8a3bdcb9..e6734fbf92 100644 --- a/lib/fluent/plugin/in_tail.rb +++ b/lib/fluent/plugin/in_tail.rb @@ -65,9 +65,9 @@ def initialize desc 'Enable the additional watch timer.' config_param :enable_watch_timer, :bool, default: true desc 'The encoding after conversion of the input.' - config_param :encoding, default: nil + config_param :encoding, :string, default: nil desc 'The encoding of the input.' - config_param :from_encoding, default: nil + config_param :from_encoding, :string, default: nil desc 'Add the log path being tailed to records. Specify the field name to be used.' config_param :path_key, :string, default: nil From 100abf83439d3e5d6ada46fb0402dc638eed11c1 Mon Sep 17 00:00:00 2001 From: panage Date: Tue, 19 Jul 2016 23:25:03 +0000 Subject: [PATCH 4/9] Fix configuration encoding and from_encoding --- lib/fluent/plugin/in_tail.rb | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/fluent/plugin/in_tail.rb b/lib/fluent/plugin/in_tail.rb index e6734fbf92..bbfd5e7ad9 100644 --- a/lib/fluent/plugin/in_tail.rb +++ b/lib/fluent/plugin/in_tail.rb @@ -88,8 +88,7 @@ def configure(conf) configure_parser(conf) configure_tag - @encoding = parse_encoding_param(conf['encoding']) - @from_encoding = parse_encoding_param(conf['from_encoding']) + configure_encoding @multiline_mode = conf['format'] =~ /multiline/ @receive_handler = if @multiline_mode @@ -115,6 +114,22 @@ def configure_tag end end + def configure_encoding + if @encoding + @encoding = parse_encoding_param(@encoding) + if @from_encoding + @from_encoding = parse_encoding_param(@from_encoding) + end + else + if @from_encoding + $log.warn "'from_encoding' parameter must be specified in conjunction with 'encoding' paramter." + $log.warn "these parameter set to nil." + @encoding = nil + @from_encoding = nil + end + end + end + def parse_encoding_param(encoding_name) begin Encoding.find(encoding_name) if encoding_name From 0a5b8ef61eaa96996e52be916f5155b3563a10ce Mon Sep 17 00:00:00 2001 From: panage Date: Tue, 19 Jul 2016 23:29:20 +0000 Subject: [PATCH 5/9] Fix test code Add test pattern from_encoding is only specified. --- test/plugin/test_in_tail.rb | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/test/plugin/test_in_tail.rb b/test/plugin/test_in_tail.rb index 191e64aa59..3edc251b0f 100644 --- a/test/plugin/test_in_tail.rb +++ b/test/plugin/test_in_tail.rb @@ -72,13 +72,27 @@ def test_configure_encoding end def test_configure_from_encoding - # valid from_encoding + # If only specified from_encoding set to nil d = create_driver(SINGLE_LINE_CONFIG + 'from_encoding utf-8') + assert_equal nil, d.instance.from_encoding + + # valid setting + d = create_driver %[ + format /(?.*)/ + read_from_head true + from_encoding utf-8 + encoding utf-8 + ] assert_equal Encoding::UTF_8, d.instance.from_encoding # invalid from_encoding assert_raise(Fluent::ConfigError) do - create_driver(SINGLE_LINE_CONFIG + 'from_encoding no-such-encoding') + d = create_driver %[ + format /(?.*)/ + read_from_head true + from_encoding no-such-encoding + encoding utf-8 + ] end end From 3e0fb5a8d724af49800a1e43a6179d65713d9a10 Mon Sep 17 00:00:00 2001 From: panage Date: Tue, 2 Aug 2016 01:06:05 +0000 Subject: [PATCH 6/9] Fix configure_encoding Use log instead of $log. Change the log messages so that the users are easy to understand. Return as soon as possible, if encoding parameters are not specified. --- lib/fluent/plugin/in_tail.rb | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/fluent/plugin/in_tail.rb b/lib/fluent/plugin/in_tail.rb index bbfd5e7ad9..aa9bcdf0a5 100644 --- a/lib/fluent/plugin/in_tail.rb +++ b/lib/fluent/plugin/in_tail.rb @@ -115,19 +115,18 @@ def configure_tag end def configure_encoding - if @encoding - @encoding = parse_encoding_param(@encoding) + unless @encoding if @from_encoding - @from_encoding = parse_encoding_param(@from_encoding) - end - else - if @from_encoding - $log.warn "'from_encoding' parameter must be specified in conjunction with 'encoding' paramter." - $log.warn "these parameter set to nil." + log.warn "'from_encoding' parameter must be specified with 'encoding' parameter." + log.warn "'from_encoding' is ignored" @encoding = nil @from_encoding = nil + return end end + + @encoding = parse_encoding_param(@encoding) if @encoding + @from_encoding = parse_encoding_param(@from_encoding) if @from_encoding end def parse_encoding_param(encoding_name) From 46a54e1b24b283588c7ec848ca4e34a64796039b Mon Sep 17 00:00:00 2001 From: panage Date: Thu, 4 Aug 2016 06:24:17 +0000 Subject: [PATCH 7/9] Use ConfigError instead of warnning-log I deleted 2nd log message. Changed to raise error if 'encoding' and 'from_encoding' paramters are bad cofiguration. --- lib/fluent/plugin/in_tail.rb | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lib/fluent/plugin/in_tail.rb b/lib/fluent/plugin/in_tail.rb index aa9bcdf0a5..07e2e87ef0 100644 --- a/lib/fluent/plugin/in_tail.rb +++ b/lib/fluent/plugin/in_tail.rb @@ -117,11 +117,7 @@ def configure_tag def configure_encoding unless @encoding if @from_encoding - log.warn "'from_encoding' parameter must be specified with 'encoding' parameter." - log.warn "'from_encoding' is ignored" - @encoding = nil - @from_encoding = nil - return + raise ConfigError, "tail: 'from_encoding' parameter must be specified with 'encoding' parameter." end end From 0696130c68fd6923b231b9555dc8a3f083212713 Mon Sep 17 00:00:00 2001 From: panage Date: Thu, 4 Aug 2016 06:47:18 +0000 Subject: [PATCH 8/9] Fix test code Corresponding to ConfigError --- test/plugin/test_in_tail.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/plugin/test_in_tail.rb b/test/plugin/test_in_tail.rb index 3edc251b0f..872f0fdfde 100644 --- a/test/plugin/test_in_tail.rb +++ b/test/plugin/test_in_tail.rb @@ -72,9 +72,10 @@ def test_configure_encoding end def test_configure_from_encoding - # If only specified from_encoding set to nil - d = create_driver(SINGLE_LINE_CONFIG + 'from_encoding utf-8') - assert_equal nil, d.instance.from_encoding + # If only specified from_encoding raise ConfigError + assert_raise(Fluent::ConfigError) do + d = create_driver(SINGLE_LINE_CONFIG + 'from_encoding utf-8') + end # valid setting d = create_driver %[ From 57cb9da3d547e35e5d543e6744c4de9f85af70e9 Mon Sep 17 00:00:00 2001 From: panage Date: Wed, 17 Aug 2016 02:30:28 +0000 Subject: [PATCH 9/9] Change implementation of encode statement For almost users, default setting should be faster than the setting is specified encoding. --- lib/fluent/plugin/in_tail.rb | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/fluent/plugin/in_tail.rb b/lib/fluent/plugin/in_tail.rb index 07e2e87ef0..914617b3e4 100644 --- a/lib/fluent/plugin/in_tail.rb +++ b/lib/fluent/plugin/in_tail.rb @@ -267,10 +267,12 @@ def close_watcher_after_rotate_wait(tw) def flush_buffer(tw) if lb = tw.line_buffer lb.chomp! - if @encoding && @from_encoding - lb.encode!(@encoding, @from_encoding) - elsif @encoding - lb.force_encoding(@encoding) + if @encoding + if @from_encoding + lb.encode!(@encoding, @from_encoding) + else + lb.force_encoding(@encoding) + end end @parser.parse(lb) { |time, record| if time && record @@ -320,10 +322,12 @@ def receive_lines(lines, tail_watcher) def convert_line_to_event(line, es, tail_watcher) begin line.chomp! # remove \n - if @encoding && @from_encoding - line.encode!(@encoding, @from_encoding) - elsif @encoding - line.force_encoding(@encoding) + if @encoding + if @from_encoding + line.encode!(@encoding, @from_encoding) + else + line.force_encoding(@encoding) + end end @parser.parse(line) { |time, record| if time && record