-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add shutdown controller to force exit on stalled shutdown #4051
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -143,6 +143,7 @@ def filter(event) | |
# @return [Array<LogStash::Event] filtered events and any new events generated by the filter | ||
public | ||
def multi_filter(events) | ||
LogStash::Util.set_thread_plugin(self) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
result = [] | ||
events.each do |event| | ||
unless event.cancelled? | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,11 +9,11 @@ | |
require "logstash/filters/base" | ||
require "logstash/inputs/base" | ||
require "logstash/outputs/base" | ||
require "logstash/util/reporter" | ||
require "logstash/config/cpu_core_strategy" | ||
require "logstash/util/defaults_printer" | ||
require "logstash/shutdown_controller" | ||
|
||
class LogStash::Pipeline | ||
module LogStash; class Pipeline | ||
attr_reader :inputs, :filters, :outputs, :input_to_filter, :filter_to_output | ||
|
||
def initialize(configstr) | ||
|
@@ -25,6 +25,7 @@ def initialize(configstr) | |
|
||
grammar = LogStashConfigParser.new | ||
@config = grammar.parse(configstr) | ||
|
||
if @config.nil? | ||
raise LogStash::ConfigurationError, grammar.failure_reason | ||
end | ||
|
@@ -150,8 +151,11 @@ def start_inputs | |
def start_filters | ||
@filters.each(&:register) | ||
to_start = @settings["filter-workers"] | ||
@filter_threads = to_start.times.collect do | ||
Thread.new { filterworker } | ||
@filter_threads = to_start.times.collect do |i| | ||
Thread.new do | ||
LogStash::Util.set_thread_name("|filterworker.#{i}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
filterworker | ||
end | ||
end | ||
actually_started = @filter_threads.select(&:alive?).size | ||
msg = "Worker threads expected: #{to_start}, worker threads started: #{actually_started}" | ||
|
@@ -175,7 +179,8 @@ def start_input(plugin) | |
end | ||
|
||
def inputworker(plugin) | ||
LogStash::Util::set_thread_name("<#{plugin.class.config_name}") | ||
LogStash::Util.set_thread_name("<#{plugin.class.config_name}") | ||
LogStash::Util.set_thread_plugin(plugin) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Love having this around for troubleshooting! |
||
begin | ||
plugin.run(@input_to_filter) | ||
rescue => e | ||
|
@@ -208,7 +213,6 @@ def inputworker(plugin) | |
end # def inputworker | ||
|
||
def filterworker | ||
LogStash::Util.set_thread_name("|worker") | ||
begin | ||
while true | ||
event = @input_to_filter.pop | ||
|
@@ -250,6 +254,7 @@ def outputworker | |
event = @filter_to_output.pop | ||
break if event == LogStash::SHUTDOWN | ||
output_func(event) | ||
LogStash::Util.set_thread_plugin(nil) | ||
end | ||
ensure | ||
@outputs.each do |output| | ||
|
@@ -309,4 +314,49 @@ def flush_filters_to_output!(options = {}) | |
end | ||
end # flush_filters_to_output! | ||
|
||
end # class Pipeline | ||
def inflight_count | ||
data = {} | ||
total = 0 | ||
|
||
input_to_filter = @input_to_filter.size | ||
total += input_to_filter | ||
filter_to_output = @filter_to_output.size | ||
total += filter_to_output | ||
|
||
data["input_to_filter"] = input_to_filter if input_to_filter > 0 | ||
data["filter_to_output"] = filter_to_output if filter_to_output > 0 | ||
|
||
output_worker_queues = [] | ||
@outputs.each do |output| | ||
next unless output.worker_queue && output.worker_queue.size > 0 | ||
plugin_info = output.debug_info | ||
size = output.worker_queue.size | ||
total += size | ||
plugin_info << size | ||
output_worker_queues << plugin_info | ||
end | ||
data["output_worker_queues"] = output_worker_queues unless output_worker_queues.empty? | ||
data["total"] = total | ||
data | ||
end | ||
|
||
def stalling_threads | ||
plugin_threads | ||
.reject {|t| t["blocked_on"] } # known begnin blocking statuses | ||
.each {|t| t.delete("backtrace") } | ||
.each {|t| t.delete("blocked_on") } | ||
.each {|t| t.delete("status") } | ||
end | ||
|
||
def plugin_threads | ||
input_threads = @input_threads.select {|t| t.alive? }.map {|t| thread_info(t) } | ||
filter_threads = @filter_threads.select {|t| t.alive? }.map {|t| thread_info(t) } | ||
output_threads = @output_threads.select {|t| t.alive? }.map {|t| thread_info(t) } | ||
output_worker_threads = @outputs.flat_map {|output| output.worker_threads }.map {|t| thread_info(t) } | ||
input_threads + filter_threads + output_threads + output_worker_threads | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think to refactor |
||
end | ||
|
||
def thread_info(thread) | ||
LogStash::Util.thread_info(thread) | ||
end | ||
end; end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
# encoding: utf-8 | ||
|
||
module LogStash | ||
class ShutdownController | ||
|
||
CHECK_EVERY = 1 # second | ||
REPORT_EVERY = 5 # checks | ||
ABORT_AFTER = 3 # stalled reports | ||
|
||
attr_reader :cycle_period, :report_every, :abort_threshold | ||
|
||
def initialize(pipeline, cycle_period=CHECK_EVERY, report_every=REPORT_EVERY, abort_threshold=ABORT_AFTER) | ||
@pipeline = pipeline | ||
@cycle_period = cycle_period | ||
@report_every = report_every | ||
@abort_threshold = abort_threshold | ||
@reports = [] | ||
end | ||
|
||
def self.unsafe_shutdown=(boolean) | ||
@unsafe_shutdown = boolean | ||
end | ||
|
||
def self.unsafe_shutdown? | ||
@unsafe_shutdown | ||
end | ||
|
||
def self.logger=(logger) | ||
@logger = logger | ||
end | ||
|
||
def self.logger | ||
@logger ||= Cabin::Channel.get(LogStash) | ||
end | ||
|
||
def self.start(pipeline, cycle_period=CHECK_EVERY, report_every=REPORT_EVERY, abort_threshold=ABORT_AFTER) | ||
controller = self.new(pipeline, cycle_period, report_every, abort_threshold) | ||
Thread.new(controller) { |controller| controller.start } | ||
end | ||
|
||
def logger | ||
self.class.logger | ||
end | ||
|
||
def start | ||
sleep(@cycle_period) | ||
cycle_number = 0 | ||
stalled_count = 0 | ||
Stud.interval(@cycle_period) do | ||
@reports << Report.from_pipeline(@pipeline) | ||
@reports.delete_at(0) if @reports.size > @report_every # expire old report | ||
if cycle_number == (@report_every - 1) # it's report time! | ||
logger.warn(@reports.last.to_hash) | ||
|
||
if shutdown_stalled? | ||
logger.error("The shutdown process appears to be stalled due to busy or blocked plugins. Check the logs for more information.") if stalled_count == 0 | ||
stalled_count += 1 | ||
|
||
if self.class.unsafe_shutdown? && @abort_threshold == stalled_count | ||
logger.fatal("Forcefully quitting logstash..") | ||
force_exit() | ||
break | ||
end | ||
else | ||
stalled_count = 0 | ||
end | ||
end | ||
cycle_number = (cycle_number + 1) % @report_every | ||
end | ||
end | ||
|
||
# A pipeline shutdown is stalled if | ||
# * at least REPORT_EVERY reports have been created | ||
# * the inflight event count is in monotonically increasing | ||
# * there are worker threads running which aren't blocked on SizedQueue pop/push | ||
# * the stalled thread list is constant in the previous REPORT_EVERY reports | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 for the comment |
||
def shutdown_stalled? | ||
return false unless @reports.size == @report_every # | ||
# is stalled if inflight count is either constant or increasing | ||
stalled_event_count = @reports.each_cons(2).all? do |prev_report, next_report| | ||
prev_report.inflight_count["total"] <= next_report.inflight_count["total"] | ||
end | ||
if stalled_event_count | ||
@reports.each_cons(2).all? do |prev_report, next_report| | ||
prev_report.stalling_threads == next_report.stalling_threads | ||
end | ||
else | ||
false | ||
end | ||
end | ||
|
||
def force_exit | ||
exit(-1) | ||
end | ||
end | ||
|
||
class Report | ||
|
||
attr_reader :inflight_count, :stalling_threads | ||
|
||
def self.from_pipeline(pipeline) | ||
new(pipeline.inflight_count, pipeline.stalling_threads) | ||
end | ||
|
||
def initialize(inflight_count, stalling_threads) | ||
@inflight_count = inflight_count | ||
@stalling_threads = format_threads_by_plugin(stalling_threads) | ||
end | ||
|
||
def to_hash | ||
{ | ||
"INFLIGHT_EVENT_COUNT" => @inflight_count, | ||
"STALLING_THREADS" => @stalling_threads | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could use the accessor here for theses values. |
||
} | ||
end | ||
|
||
def format_threads_by_plugin(stalling_threads) | ||
stalled_plugins = {} | ||
stalling_threads.each do |thr| | ||
key = (thr.delete("plugin") || "other") | ||
stalled_plugins[key] ||= [] | ||
stalled_plugins[key] << thr | ||
end | ||
stalled_plugins | ||
end | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we are removing most the content of this method, is there any reason not to call pipeline.shutdown directly and remove the inderection?