From 1a3beb5c4b7e689d4e7b612a34ce5fe55c4fbe35 Mon Sep 17 00:00:00 2001 From: Joshua Hahn Date: Mon, 27 Jan 2025 07:49:08 -0800 Subject: [PATCH] Oomd: fix pid counting Summary: Previously, oomd counted the number of processes that were present in a cgroup by recursively going down and aggregating cgroup.pids. However, since this required recursive travel, oomd switched over to using pids.current instead. This change caused problems on cgroups that did not have the pid controller enabled, and caused oomd to be unable to kill. This diff fixes this behavior by using cgroup.events to determine whether a cgroup is empty (and therefore, whether to select this cgroup), and uses either pids.current to increment the number of processes killed if possible, or returns 1 as the number of processes killed. Reviewed By: lnyng Differential Revision: D68645177 fbshipit-source-id: 8f8b331d3880f47bafafb19fc8c1e7a5e82b4d2b --- src/oomd/plugins/BaseKillPlugin.cpp | 14 ++++++++++---- src/oomd/plugins/CorePluginsTest.cpp | 25 ++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/oomd/plugins/BaseKillPlugin.cpp b/src/oomd/plugins/BaseKillPlugin.cpp index e02d268f..2583fd6d 100644 --- a/src/oomd/plugins/BaseKillPlugin.cpp +++ b/src/oomd/plugins/BaseKillPlugin.cpp @@ -564,14 +564,20 @@ int BaseKillPlugin::tryToKillCgroup( } auto procsBeforeKill = Fs::readPidsCurrentAt(target.fd()); - if (!procsBeforeKill) { - OLOG << "Failed to read pids from " << target.cgroup().absolutePath() - << ", skip killing cgroup"; - } else if (*procsBeforeKill == 0) { + auto populated = Fs::readIsPopulatedAt(target.fd()); + + if (!populated) { + OLOG << "Failed to read cgroup.events in " + << target.cgroup().absolutePath() + << ", something is wrong. Skip killing cgroup."; + } else if (!populated.value()) { OLOG << "No pids in " << target.cgroup().absolutePath() << ", skip killing cgroup"; } else if (kernelKillCgroup(target)) { OLOG << "Failed to kill cgroup " << target.cgroup().absolutePath(); + } else if (!procsBeforeKill || procsBeforeKill.value() == 0) { + // Placeholder if we cannot figure out how many we actually killed + nrKilled = 1; } else { nrKilled = procsBeforeKill.value(); } diff --git a/src/oomd/plugins/CorePluginsTest.cpp b/src/oomd/plugins/CorePluginsTest.cpp index b2b4f54a..9a5da270 100644 --- a/src/oomd/plugins/CorePluginsTest.cpp +++ b/src/oomd/plugins/CorePluginsTest.cpp @@ -714,7 +714,8 @@ TEST_F(DoubleKillTest, KillsTwice) { tempdir_, {F::makeDir( "A", - {F::makeFile("pids.current", "1\n"), + {F::makeFile("cgroup.events", "populated 1"), + F::makeFile("pids.current", "1\n"), F::makeFile("cgroup.kill", "0")})})); // Should do nothing, since it will write to cgroup.kill with no side effect. @@ -734,6 +735,28 @@ TEST_F(DoubleKillTest, KillsTwice) { EXPECT_EQ(plugin->run(ctx_), Engine::PluginRet::STOP); } +class NoPidControllerTest : public CorePluginsTest {}; + +TEST_F(NoPidControllerTest, KillsWithoutPidController) { + // Despite not having a pid controller, we should still be able to kill + F::materialize(F::makeDir( + tempdir_, + {F::makeDir( + "A", + {F::makeFile("cgroup.kill", "0"), + F::makeFile("cgroup.events", "populated 1")})})); + auto plugin = std::make_shared(); + ASSERT_NE(plugin, nullptr); + const PluginConstructionContext compile_context(tempdir_); + Engine::PluginArgs args; + args["cgroup"] = "*"; + args["recursive"] = "true"; + args["kernelkill"] = "true"; + + ASSERT_EQ(plugin->init(args, compile_context), 0); + EXPECT_EQ(plugin->run(ctx_), Engine::PluginRet::STOP); +} + class PressureRisingBeyondTest : public CorePluginsTest {}; TEST_F(PressureRisingBeyondTest, DetectsHighMemPressure) {