Skip to content

Commit

Permalink
Oomd: fix pid counting
Browse files Browse the repository at this point in the history
Summary:
Previously, oomd counted the number of processes that were present in a cgroup by recursively going down and aggregating cgroup.pids. However, since this required recursive travel, oomd switched over to using pids.current instead. This change caused problems on cgroups that did not have the pid controller enabled, and caused oomd to be unable to kill.

This diff fixes this behavior by using cgroup.events to determine whether a cgroup is empty (and therefore, whether to select this cgroup), and uses either pids.current to increment the number of processes killed if possible, or returns 1 as the number of processes killed.

Reviewed By: lnyng

Differential Revision: D68645177

fbshipit-source-id: 8f8b331d3880f47bafafb19fc8c1e7a5e82b4d2b
  • Loading branch information
Joshua Hahn authored and facebook-github-bot committed Jan 27, 2025
1 parent c3b5f13 commit 1a3beb5
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 5 deletions.
14 changes: 10 additions & 4 deletions src/oomd/plugins/BaseKillPlugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -564,14 +564,20 @@ int BaseKillPlugin::tryToKillCgroup(
}

auto procsBeforeKill = Fs::readPidsCurrentAt(target.fd());
if (!procsBeforeKill) {
OLOG << "Failed to read pids from " << target.cgroup().absolutePath()
<< ", skip killing cgroup";
} else if (*procsBeforeKill == 0) {
auto populated = Fs::readIsPopulatedAt(target.fd());

if (!populated) {
OLOG << "Failed to read cgroup.events in "
<< target.cgroup().absolutePath()
<< ", something is wrong. Skip killing cgroup.";
} else if (!populated.value()) {
OLOG << "No pids in " << target.cgroup().absolutePath()
<< ", skip killing cgroup";
} else if (kernelKillCgroup(target)) {
OLOG << "Failed to kill cgroup " << target.cgroup().absolutePath();
} else if (!procsBeforeKill || procsBeforeKill.value() == 0) {
// Placeholder if we cannot figure out how many we actually killed
nrKilled = 1;
} else {
nrKilled = procsBeforeKill.value();
}
Expand Down
25 changes: 24 additions & 1 deletion src/oomd/plugins/CorePluginsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,8 @@ TEST_F(DoubleKillTest, KillsTwice) {
tempdir_,
{F::makeDir(
"A",
{F::makeFile("pids.current", "1\n"),
{F::makeFile("cgroup.events", "populated 1"),
F::makeFile("pids.current", "1\n"),
F::makeFile("cgroup.kill", "0")})}));

// Should do nothing, since it will write to cgroup.kill with no side effect.
Expand All @@ -734,6 +735,28 @@ TEST_F(DoubleKillTest, KillsTwice) {
EXPECT_EQ(plugin->run(ctx_), Engine::PluginRet::STOP);
}

class NoPidControllerTest : public CorePluginsTest {};

TEST_F(NoPidControllerTest, KillsWithoutPidController) {
// Despite not having a pid controller, we should still be able to kill
F::materialize(F::makeDir(
tempdir_,
{F::makeDir(
"A",
{F::makeFile("cgroup.kill", "0"),
F::makeFile("cgroup.events", "populated 1")})}));
auto plugin = std::make_shared<KernelKillPlugin>();
ASSERT_NE(plugin, nullptr);
const PluginConstructionContext compile_context(tempdir_);
Engine::PluginArgs args;
args["cgroup"] = "*";
args["recursive"] = "true";
args["kernelkill"] = "true";

ASSERT_EQ(plugin->init(args, compile_context), 0);
EXPECT_EQ(plugin->run(ctx_), Engine::PluginRet::STOP);
}

class PressureRisingBeyondTest : public CorePluginsTest {};

TEST_F(PressureRisingBeyondTest, DetectsHighMemPressure) {
Expand Down

0 comments on commit 1a3beb5

Please sign in to comment.