From 9be8e86edca890ccfb799a24cf2e9b80dd706de9 Mon Sep 17 00:00:00 2001 From: Fujimoto Seiji Date: Fri, 16 Mar 2018 10:04:53 +0900 Subject: [PATCH] output: Prevent flushing threads from consuming too much CPU. This fixes the CPU spike issue reported by several users. If one flush thread sets the retry state, this makes other threads wait until the next retry time. This behaviour itself is fine and ok. However, in this case the `interval` variable has a meaningless value because `state.next_time` is behind the system time. And since the sleep call gets skipped in such a case (output.rb#L1343): sleep interval if interval > 0 it effectively makes the waiting threads to consume the resource aggressively with busy loops. --- lib/fluent/plugin/output.rb | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/fluent/plugin/output.rb b/lib/fluent/plugin/output.rb index db49644948..4a8b42d9b6 100644 --- a/lib/fluent/plugin/output.rb +++ b/lib/fluent/plugin/output.rb @@ -1313,9 +1313,17 @@ def flush_thread_run(state) # This thread don't use `thread_current_running?` because this thread should run in `before_shutdown` phase while @output_flush_threads_running current_clock = Fluent::Clock.now - interval = state.next_clock - current_clock + next_retry_time = nil - if state.next_clock <= current_clock && @retry_mutex.synchronize { @retry ? @retry.next_time <= Time.now : true } + @retry_mutex.synchronize do + next_retry_time = @retry ? @retry.next_time : nil + end + + if state.next_clock > current_clock + interval = state.next_clock - current_clock + elsif next_retry_time && next_retry_time > Time.now + interval = next_retry_time.to_f - Time.now.to_f + else try_flush # next_flush_time uses flush_thread_interval or flush_thread_burst_interval (or retrying)