From 6921b470792bbed9a4322570758779ded8511aa9 Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Thu, 5 Jul 2018 09:33:31 +0900 Subject: [PATCH 1/8] Consider timezone when calculate timekey Because the timestamp of the midnight cannot be divisible by 86400 in localtime (JST). The timestamp of the midnight is divisible by 86400 only in UTC. Therefore we must consider offset from UTC in localtime. For example, at `2018-07-04 01:23:23 +0900`: If timekey is 86400 and path template is `/log/%Y%m%d.log`. In previous version, extract path template to `/log/20180703.log`. In this version, extract path template to `/log/20180704.log`. Fix #1986 Signed-off-by: Kenji Okimoto --- lib/fluent/plugin/output.rb | 19 +++++++++++++------ test/plugin/test_out_file.rb | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/lib/fluent/plugin/output.rb b/lib/fluent/plugin/output.rb index 3d64839264..feaa8b0c20 100644 --- a/lib/fluent/plugin/output.rb +++ b/lib/fluent/plugin/output.rb @@ -803,20 +803,17 @@ def metadata(tag, time, record) if !@chunk_key_time && !@chunk_key_tag @buffer.metadata() elsif @chunk_key_time && @chunk_key_tag - time_int = time.to_i - timekey = (time_int - (time_int % @buffer_config.timekey)).to_i + timekey = calculate_timekey(time) @buffer.metadata(timekey: timekey, tag: tag) elsif @chunk_key_time - time_int = time.to_i - timekey = (time_int - (time_int % @buffer_config.timekey)).to_i + timekey = calculate_timekey(time) @buffer.metadata(timekey: timekey) else @buffer.metadata(tag: tag) end else timekey = if @chunk_key_time - time_int = time.to_i - (time_int - (time_int % @buffer_config.timekey)).to_i + calculate_timekey(time) else nil end @@ -825,6 +822,16 @@ def metadata(tag, time, record) end end + def calculate_timekey(time) + time_int = time.to_i + if @buffer_config.timekey_use_utc + (time_int - (time_int % @buffer_config.timekey)).to_i + else + offset = Time.at(time_int).utc_offset + (time_int - ((time_int + offset)% @buffer_config.timekey)).to_i + end + end + def chunk_for_test(tag, time, record) require 'fluent/plugin/buffer/memory_chunk' diff --git a/test/plugin/test_out_file.rb b/test/plugin/test_out_file.rb index f68624effb..59d3080e42 100644 --- a/test/plugin/test_out_file.rb +++ b/test/plugin/test_out_file.rb @@ -557,6 +557,39 @@ def parse_system(text) check_gzipped_result(path, formatted_lines * 3) end + test 'append when JST' do + time = event_time("2011-01-02 03:14:15+09:00") + formatted_lines = %[2011-01-02T03:14:15+09:00\ttest\t{"a":1}\n] + %[2011-01-02T03:14:15+09:00\ttest\t{"a":2}\n] + + write_once = ->(){ + d = create_driver %[ + path #{TMP_DIR}/out_file_test + compress gz + append true + + timekey_use_utc false + + ] + d.run(default_tag: 'test'){ + d.feed(time, {"a"=>1}) + d.feed(time, {"a"=>2}) + } + d.instance.last_written_path + } + + path = write_once.call + assert_equal "#{TMP_DIR}/out_file_test.20110102.log.gz", path + check_gzipped_result(path, formatted_lines) + + path = write_once.call + assert_equal "#{TMP_DIR}/out_file_test.20110102.log.gz", path + check_gzipped_result(path, formatted_lines * 2) + + path = write_once.call + assert_equal "#{TMP_DIR}/out_file_test.20110102.log.gz", path + check_gzipped_result(path, formatted_lines * 3) + end + test '${chunk_id}' do time = event_time("2011-01-02 13:14:15 UTC") formatted_lines = %[2011-01-02T13:14:15Z\ttest\t{"a":1}\n] + %[2011-01-02T13:14:15Z\ttest\t{"a":2}\n] From a5cf3b0269a0ab221ca8493ef0f7dc71ccf96ea0 Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Thu, 5 Jul 2018 17:22:53 +0900 Subject: [PATCH 2/8] Use with_timezone to set proper timezone during test Signed-off-by: Kenji Okimoto --- test/plugin/test_out_file.rb | 52 +++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/test/plugin/test_out_file.rb b/test/plugin/test_out_file.rb index 59d3080e42..ddf53b8a58 100644 --- a/test/plugin/test_out_file.rb +++ b/test/plugin/test_out_file.rb @@ -558,36 +558,38 @@ def parse_system(text) end test 'append when JST' do - time = event_time("2011-01-02 03:14:15+09:00") - formatted_lines = %[2011-01-02T03:14:15+09:00\ttest\t{"a":1}\n] + %[2011-01-02T03:14:15+09:00\ttest\t{"a":2}\n] + with_timezone("JST") do + time = event_time("2011-01-02 03:14:15+09:00") + formatted_lines = %[2011-01-02T03:14:15+09:00\ttest\t{"a":1}\n] + %[2011-01-02T03:14:15+09:00\ttest\t{"a":2}\n] - write_once = ->(){ - d = create_driver %[ - path #{TMP_DIR}/out_file_test - compress gz - append true - - timekey_use_utc false - - ] - d.run(default_tag: 'test'){ - d.feed(time, {"a"=>1}) - d.feed(time, {"a"=>2}) + write_once = ->(){ + d = create_driver %[ + path #{TMP_DIR}/out_file_test + compress gz + append true + + timekey_use_utc false + + ] + d.run(default_tag: 'test'){ + d.feed(time, {"a"=>1}) + d.feed(time, {"a"=>2}) + } + d.instance.last_written_path } - d.instance.last_written_path - } - path = write_once.call - assert_equal "#{TMP_DIR}/out_file_test.20110102.log.gz", path - check_gzipped_result(path, formatted_lines) + path = write_once.call + assert_equal "#{TMP_DIR}/out_file_test.20110102.log.gz", path + check_gzipped_result(path, formatted_lines) - path = write_once.call - assert_equal "#{TMP_DIR}/out_file_test.20110102.log.gz", path - check_gzipped_result(path, formatted_lines * 2) + path = write_once.call + assert_equal "#{TMP_DIR}/out_file_test.20110102.log.gz", path + check_gzipped_result(path, formatted_lines * 2) - path = write_once.call - assert_equal "#{TMP_DIR}/out_file_test.20110102.log.gz", path - check_gzipped_result(path, formatted_lines * 3) + path = write_once.call + assert_equal "#{TMP_DIR}/out_file_test.20110102.log.gz", path + check_gzipped_result(path, formatted_lines * 3) + end end test '${chunk_id}' do From 34fb65a6dc56475af09a87ab46bfdee2e38d441d Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Thu, 5 Jul 2018 18:58:15 +0900 Subject: [PATCH 3/8] Set proper value "Asia/Tokyo" to TZ Signed-off-by: Kenji Okimoto --- test/plugin/test_out_file.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/plugin/test_out_file.rb b/test/plugin/test_out_file.rb index ddf53b8a58..30ac1e43eb 100644 --- a/test/plugin/test_out_file.rb +++ b/test/plugin/test_out_file.rb @@ -558,7 +558,7 @@ def parse_system(text) end test 'append when JST' do - with_timezone("JST") do + with_timezone("Asia/Tokyo") do time = event_time("2011-01-02 03:14:15+09:00") formatted_lines = %[2011-01-02T03:14:15+09:00\ttest\t{"a":1}\n] + %[2011-01-02T03:14:15+09:00\ttest\t{"a":2}\n] @@ -569,6 +569,7 @@ def parse_system(text) append true timekey_use_utc false + timekey_zone Asia/Tokyo ] d.run(default_tag: 'test'){ From 92225ca513dee17d5429fa53ed1df3d670d1579f Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Fri, 6 Jul 2018 16:18:40 +0900 Subject: [PATCH 4/8] Set TZ properly on Windows Signed-off-by: Kenji Okimoto --- test/plugin/test_out_file.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/plugin/test_out_file.rb b/test/plugin/test_out_file.rb index 30ac1e43eb..16471468e9 100644 --- a/test/plugin/test_out_file.rb +++ b/test/plugin/test_out_file.rb @@ -558,7 +558,7 @@ def parse_system(text) end test 'append when JST' do - with_timezone("Asia/Tokyo") do + with_timezone(Fluent.windows? ? "JST-9" : "Asia/Tokyo") do time = event_time("2011-01-02 03:14:15+09:00") formatted_lines = %[2011-01-02T03:14:15+09:00\ttest\t{"a":1}\n] + %[2011-01-02T03:14:15+09:00\ttest\t{"a":2}\n] From 1e9d52d95c43881c67127e3819790032ee004368 Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Thu, 12 Jul 2018 10:00:31 +0900 Subject: [PATCH 5/8] Use instance variables instead of `@buffer_config` instance methods This can improve performance. Signed-off-by: Kenji Okimoto --- lib/fluent/plugin/output.rb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/fluent/plugin/output.rb b/lib/fluent/plugin/output.rb index feaa8b0c20..070feff795 100644 --- a/lib/fluent/plugin/output.rb +++ b/lib/fluent/plugin/output.rb @@ -304,6 +304,8 @@ def configure(conf) raise Fluent::ConfigError, " argument includes 'time', but timekey is not configured" unless @buffer_config.timekey Fluent::Timezone.validate!(@buffer_config.timekey_zone) @timekey_zone = @buffer_config.timekey_use_utc ? '+0000' : @buffer_config.timekey_zone + @timekey = @buffer_config.timekey + @timekey_use_utc = @buffer_config.timekey_use_utc @output_time_formatter_cache = {} end @@ -824,11 +826,11 @@ def metadata(tag, time, record) def calculate_timekey(time) time_int = time.to_i - if @buffer_config.timekey_use_utc - (time_int - (time_int % @buffer_config.timekey)).to_i + if @timekey_use_utc + (time_int - (time_int % @timekey)).to_i else offset = Time.at(time_int).utc_offset - (time_int - ((time_int + offset)% @buffer_config.timekey)).to_i + (time_int - ((time_int + offset)% @timekey)).to_i end end From 72339a23125182400eaa0c3e9c24c9bdf839c915 Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Thu, 12 Jul 2018 10:01:39 +0900 Subject: [PATCH 6/8] Use Fluent::Timezone.utc_offset to consider `@buffer_config.timekey_zone` Signed-off-by: Kenji Okimoto --- lib/fluent/plugin/output.rb | 2 +- lib/fluent/timezone.rb | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/fluent/plugin/output.rb b/lib/fluent/plugin/output.rb index 070feff795..d2591cce85 100644 --- a/lib/fluent/plugin/output.rb +++ b/lib/fluent/plugin/output.rb @@ -829,7 +829,7 @@ def calculate_timekey(time) if @timekey_use_utc (time_int - (time_int % @timekey)).to_i else - offset = Time.at(time_int).utc_offset + offset = Fluent::Timezone.utc_offset(time, @timekey_zone) (time_int - ((time_int + offset)% @timekey)).to_i end end diff --git a/lib/fluent/timezone.rb b/lib/fluent/timezone.rb index bb6fa86893..0e79dd0387 100644 --- a/lib/fluent/timezone.rb +++ b/lib/fluent/timezone.rb @@ -139,5 +139,17 @@ def self.formatter(timezone = nil, format = nil) return nil end + + def self.utc_offset(time, timezone) + return 0 if timezone.nil? + + case timezone + when NUMERIC_PATTERN + Time.zone_offset(timezone) + when NAME_PATTERN + tz = TZInfo::Timezone.get(timezone) + tz.period_for_utc(time).utc_total_offset + end + end end end From f178b87f1a2b62eb24404dd200eeb24987eaf9af Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Tue, 17 Jul 2018 14:32:21 +0900 Subject: [PATCH 7/8] Add test case when machine timezone and timekey_zone is different Signed-off-by: Kenji Okimoto --- test/plugin/test_out_file.rb | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/test/plugin/test_out_file.rb b/test/plugin/test_out_file.rb index 16471468e9..48329acef0 100644 --- a/test/plugin/test_out_file.rb +++ b/test/plugin/test_out_file.rb @@ -593,6 +593,43 @@ def parse_system(text) end end + test 'append when UTC-02 but timekey_zone is +0900' do + with_timezone("UTC-02") do # +0200 + time = event_time("2011-01-02 17:14:15+02:00") + formatted_lines = %[2011-01-02T17:14:15+02:00\ttest\t{"a":1}\n] + %[2011-01-02T17:14:15+02:00\ttest\t{"a":2}\n] + + write_once = ->(){ + d = create_driver %[ + path #{TMP_DIR}/out_file_test + compress gz + append true + + timekey_use_utc false + timekey_zone +0900 + + ] + d.run(default_tag: 'test'){ + d.feed(time, {"a"=>1}) + d.feed(time, {"a"=>2}) + } + d.instance.last_written_path + } + + path = write_once.call + # Rotated at 2011-01-02 17:00:00+02:00 + assert_equal "#{TMP_DIR}/out_file_test.20110103.log.gz", path + check_gzipped_result(path, formatted_lines) + + path = write_once.call + assert_equal "#{TMP_DIR}/out_file_test.20110103.log.gz", path + check_gzipped_result(path, formatted_lines * 2) + + path = write_once.call + assert_equal "#{TMP_DIR}/out_file_test.20110103.log.gz", path + check_gzipped_result(path, formatted_lines * 3) + end + end + test '${chunk_id}' do time = event_time("2011-01-02 13:14:15 UTC") formatted_lines = %[2011-01-02T13:14:15Z\ttest\t{"a":1}\n] + %[2011-01-02T13:14:15Z\ttest\t{"a":2}\n] From 54bda48657f157bef10c389fa83989d1e19a9972 Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Tue, 17 Jul 2018 14:35:35 +0900 Subject: [PATCH 8/8] Add cache for calculating timezone offset For `NUMERIC_PATTERN`, just return static value. For `NAME_PATTERN`, return lambda to calculate offset later. This version is 5-10% faster than the previous version. Signed-off-by: Kenji Okimoto --- lib/fluent/plugin/output.rb | 4 +++- lib/fluent/timezone.rb | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/fluent/plugin/output.rb b/lib/fluent/plugin/output.rb index d2591cce85..0da22cb5bc 100644 --- a/lib/fluent/plugin/output.rb +++ b/lib/fluent/plugin/output.rb @@ -306,6 +306,8 @@ def configure(conf) @timekey_zone = @buffer_config.timekey_use_utc ? '+0000' : @buffer_config.timekey_zone @timekey = @buffer_config.timekey @timekey_use_utc = @buffer_config.timekey_use_utc + @offset = Fluent::Timezone.utc_offset(@timekey_zone) + @calculate_offset = @offset.respond_to?(:call) ? @offset : nil @output_time_formatter_cache = {} end @@ -829,7 +831,7 @@ def calculate_timekey(time) if @timekey_use_utc (time_int - (time_int % @timekey)).to_i else - offset = Fluent::Timezone.utc_offset(time, @timekey_zone) + offset = @calculate_offset ? @calculate_offset.call(time) : @offset (time_int - ((time_int + offset)% @timekey)).to_i end end diff --git a/lib/fluent/timezone.rb b/lib/fluent/timezone.rb index 0e79dd0387..53a155153b 100644 --- a/lib/fluent/timezone.rb +++ b/lib/fluent/timezone.rb @@ -140,7 +140,7 @@ def self.formatter(timezone = nil, format = nil) return nil end - def self.utc_offset(time, timezone) + def self.utc_offset(timezone) return 0 if timezone.nil? case timezone @@ -148,7 +148,9 @@ def self.utc_offset(time, timezone) Time.zone_offset(timezone) when NAME_PATTERN tz = TZInfo::Timezone.get(timezone) - tz.period_for_utc(time).utc_total_offset + ->(time) { + tz.period_for_utc(time).utc_total_offset + } end end end