Skip to content

Commit 40fe92e

Browse files
minhancaoczentgr
authored andcommitted
[native] Add LinuxMemoryChecker check/warning to ensure system-mem-limit-gb is reasonably set
Add additional checks and warnings to ensure system-memory-gb <= system-mem-limit-gb < available machine memory of deployment. For cgroup v1: Set available machine memory of deployment to be the smaller number between /proc/meminfo and memory.limit_in_bytes. For cgroup v2: Set available machine memory of deployment to be the smaller number between /proc/meminfo and memory.max. If memory.max contains "max" string, then look at /proc/meminfo for the MemTotal, otherwise use the value in memory.max.
1 parent bd29a38 commit 40fe92e

File tree

7 files changed

+418
-132
lines changed

7 files changed

+418
-132
lines changed

presto-docs/src/main/sphinx/presto_cpp/properties.rst

+9-4
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,15 @@ The configuration properties of Presto C++ workers are described here, in alphab
146146
^^^^^^^^^^^^^^^^^^^^
147147

148148
* **Type:** ``integer``
149-
* **Default value:** ``40``
149+
* **Default value:** ``57``
150150

151151
Memory allocation limit enforced by an internal memory allocator. It consists of two parts:
152152
1) Memory used by the queries as specified in ``query-memory-gb``; 2) Memory used by the
153153
system, such as disk spilling and cache prefetch.
154154

155-
Set ``system-memory-gb`` to the available machine memory of the deployment.
155+
Set ``system-memory-gb`` to about 90% of available machine memory of the deployment.
156+
This allows some buffer room to handle unaccounted memory in order to prevent out-of-memory conditions.
157+
The default value of 57 gb is calculated based on available machine memory of 64 gb.
156158

157159

158160
``query-memory-gb``
@@ -321,11 +323,14 @@ server is under low memory pressure.
321323
^^^^^^^^^^^^^^^^^^^^^^^
322324

323325
* **Type:** ``integer``
324-
* **Default value:** ``55``
326+
* **Default value:** ``60``
325327

326328
Specifies the system memory limit that triggers the memory pushback or heap dump if
327329
the server memory usage is beyond this limit. A value of zero means no limit is set.
328-
This only applies if ``system-mem-pushback-enabled`` is ``true``.
330+
This only applies if ``system-mem-pushback-enabled`` is ``true``.
331+
Set ``system-mem-limit-gb`` to be greater than or equal to system-memory-gb but not
332+
higher than the available machine memory of the deployment.
333+
The default value of 60 gb is calculated based on available machine memory of 64 gb.
329334

330335
``system-mem-shrink-gb``
331336
^^^^^^^^^^^^^^^^^^^^^^^^

presto-native-execution/presto_cpp/main/LinuxMemoryChecker.cpp

+138-23
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@
1818
#include <sys/stat.h>
1919
#include "presto_cpp/main/PeriodicMemoryChecker.h"
2020
#include "presto_cpp/main/common/Configs.h"
21+
#include "presto_cpp/main/common/Utils.h"
2122

2223
namespace facebook::presto {
2324

25+
using int128_t = __int128_t;
26+
2427
class LinuxMemoryChecker : public PeriodicMemoryChecker {
2528
public:
2629
explicit LinuxMemoryChecker(const PeriodicMemoryChecker::Config& config)
@@ -29,13 +32,32 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker {
2932
// it's mounted.
3033
struct stat buffer;
3134
if ((stat(kCgroupV1Path, &buffer) == 0)) {
32-
statFile_ = kCgroupV1Path;
33-
} else if ((stat(kCgroupV2Path, &buffer) == 0)) {
34-
statFile_ = kCgroupV2Path;
35-
} else {
36-
statFile_ = "None";
35+
PRESTO_STARTUP_LOG(INFO) << "Using cgroup v1.";
36+
if (stat(kCgroupV1MemStatFile, &buffer) == 0) {
37+
memStatFile_ = kCgroupV1MemStatFile;
38+
}
39+
if ((stat(kCgroupV1MaxMemFile, &buffer) == 0)) {
40+
memMaxFile_ = kCgroupV1MaxMemFile;
41+
}
42+
}
43+
44+
// In cgroup v2.
45+
else {
46+
PRESTO_STARTUP_LOG(INFO) << "Using cgroup v2.";
47+
if (stat(kCgroupV2MemStatFile, &buffer) == 0) {
48+
memStatFile_ = kCgroupV2MemStatFile;
49+
}
50+
if ((stat(kCgroupV2MaxMemFile, &buffer) == 0)) {
51+
memMaxFile_ = kCgroupV2MaxMemFile;
52+
}
3753
}
38-
LOG(INFO) << fmt::format("Using memory stat file {}", statFile_);
54+
55+
PRESTO_STARTUP_LOG(INFO) << fmt::format(
56+
"Using memory stat file: {}",
57+
memStatFile_.empty() ? memInfoFile_ : memStatFile_);
58+
PRESTO_STARTUP_LOG(INFO) << fmt::format(
59+
"Using memory max file {}",
60+
memMaxFile_.empty() ? memInfoFile_ : memMaxFile_);
3961
}
4062

4163
~LinuxMemoryChecker() override {}
@@ -45,8 +67,92 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker {
4567
}
4668

4769
void setStatFile(std::string statFile) {
48-
statFile_ = statFile;
49-
LOG(INFO) << fmt::format("Changed to using memory stat file {}", statFile_);
70+
memStatFile_ = statFile;
71+
LOG(INFO) << fmt::format(
72+
"Changed to using memory stat file {}", memStatFile_);
73+
}
74+
75+
// This function is used for testing.
76+
void setMemMaxFile(const std::string& memMaxFile) {
77+
memMaxFile_ = memMaxFile;
78+
}
79+
80+
// This function is used for testing.
81+
void setMemInfoFile(const std::string& memInfoFile) {
82+
memInfoFile_ = memInfoFile;
83+
}
84+
85+
void start() override {
86+
// Check system-memory-gb < system-mem-limit-gb < available machine memory
87+
// of deployment.
88+
auto* systemConfig = SystemConfig::instance();
89+
int64_t systemMemoryInBytes =
90+
static_cast<int64_t>(systemConfig->systemMemoryGb()) << 30;
91+
PRESTO_STARTUP_LOG(INFO)
92+
<< fmt::format("System memory in bytes: {}", systemMemoryInBytes);
93+
94+
PRESTO_STARTUP_LOG(INFO) << fmt::format(
95+
"System memory limit in bytes: {}", config_.systemMemLimitBytes);
96+
97+
auto availableMemoryOfDeployment = getAvailableMemoryOfDeployment();
98+
PRESTO_STARTUP_LOG(INFO) << fmt::format(
99+
"Available machine memory of deployment in bytes: {}",
100+
availableMemoryOfDeployment);
101+
102+
VELOX_CHECK_LE(
103+
config_.systemMemLimitBytes,
104+
availableMemoryOfDeployment,
105+
"system memory limit = {} bytes is higher than the available machine memory of deployment = {} bytes.",
106+
config_.systemMemLimitBytes,
107+
availableMemoryOfDeployment);
108+
109+
if (config_.systemMemLimitBytes < systemMemoryInBytes) {
110+
LOG(WARNING) << "system-mem-limit-gb is smaller than system-memory-gb. "
111+
<< "Expected: system-mem-limit-gb >= system-memory-gb.";
112+
}
113+
114+
PeriodicMemoryChecker::start();
115+
}
116+
117+
int128_t getAvailableMemoryOfDeployment() {
118+
// Set the available machine memory of deployment to be the smaller number
119+
// between /proc/meminfo and memMaxFile_.
120+
int128_t availableMemoryOfDeployment = 0;
121+
// meminfo's units is in kB.
122+
folly::gen::byLine(memInfoFile_.c_str()) |
123+
[&](const folly::StringPiece& line) -> void {
124+
if (availableMemoryOfDeployment != 0) {
125+
return;
126+
}
127+
availableMemoryOfDeployment = static_cast<int128_t>(
128+
extractNumericConfigValueWithRegex(line, kMemTotalRegex) * 1024);
129+
};
130+
131+
// For cgroup v1, memory.limit_in_bytes can default to a really big numeric
132+
// value in bytes like 9223372036854771712 to represent that
133+
// memory.limit_in_bytes is not set to a value. The default value here is
134+
// set to PAGE_COUNTER_MAX, which is LONG_MAX/PAGE_SIZE on the 64-bit
135+
// platform. The default value can vary based upon the platform's PAGE_SIZE.
136+
// If memory.limit_in_bytes contains a really big numeric value, then we
137+
// will use MemTotal from /proc/meminfo.
138+
139+
// For cgroup v2, memory.max can contain a numeric value in bytes or string
140+
// "max" which represents no value has been set. If memory.max contains
141+
// "max", then we will use MemTotal from /proc/meminfo.
142+
if (!memMaxFile_.empty()) {
143+
folly::gen::byLine(memMaxFile_.c_str()) |
144+
[&](const folly::StringPiece& line) -> void {
145+
if (line == "max") {
146+
return;
147+
}
148+
availableMemoryOfDeployment =
149+
std::min(availableMemoryOfDeployment, folly::to<int128_t>(line));
150+
return;
151+
};
152+
}
153+
154+
// Unit is in bytes.
155+
return availableMemoryOfDeployment;
50156
}
51157

52158
protected:
@@ -80,8 +186,8 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker {
80186
size_t inactiveAnon = 0;
81187
size_t activeAnon = 0;
82188

83-
if (statFile_ != "None") {
84-
folly::gen::byLine(statFile_.c_str()) |
189+
if (!memStatFile_.empty()) {
190+
folly::gen::byLine(memStatFile_.c_str()) |
85191
[&](const folly::StringPiece& line) -> void {
86192
if (inactiveAnon == 0) {
87193
inactiveAnon =
@@ -103,7 +209,7 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker {
103209
}
104210

105211
// Last resort use host machine info.
106-
folly::gen::byLine("/proc/meminfo") |
212+
folly::gen::byLine(memInfoFile_.c_str()) |
107213
[&](const folly::StringPiece& line) -> void {
108214
if (memAvailable == 0) {
109215
memAvailable =
@@ -143,10 +249,16 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker {
143249
const boost::regex kInactiveAnonRegex{R"!(inactive_anon\s*(\d+)\s*)!"};
144250
const boost::regex kActiveAnonRegex{R"!(active_anon\s*(\d+)\s*)!"};
145251
const boost::regex kMemAvailableRegex{R"!(MemAvailable:\s*(\d+)\s*kB)!"};
146-
const boost::regex kMemTotalRegex{R"!(MemTotal:\s*(\d+)\s*kB)!"};
147-
const char* kCgroupV1Path = "/sys/fs/cgroup/memory/memory.stat";
148-
const char* kCgroupV2Path = "/sys/fs/cgroup/memory.stat";
149-
std::string statFile_;
252+
const boost::regex kMemTotalRegex{R"!(MemTotal:\s*(\d+)\s+kB)!"};
253+
const char* kCgroupV1Path = "/sys/fs/cgroup/memory";
254+
const char* kCgroupV1MemStatFile = "/sys/fs/cgroup/memory/memory.stat";
255+
const char* kCgroupV2MemStatFile = "/sys/fs/cgroup/memory.stat";
256+
const char* kCgroupV1MaxMemFile =
257+
"/sys/fs/cgroup/memory/memory.limit_in_bytes";
258+
const char* kCgroupV2MaxMemFile = "/sys/fs/cgroup/memory.max";
259+
std::string memInfoFile_ = "/proc/meminfo";
260+
std::string memStatFile_;
261+
std::string memMaxFile_;
150262

151263
size_t extractNumericConfigValueWithRegex(
152264
const folly::StringPiece& line,
@@ -164,15 +276,18 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker {
164276

165277
folly::Singleton<facebook::presto::PeriodicMemoryChecker> checker(
166278
[]() -> facebook::presto::PeriodicMemoryChecker* {
167-
PeriodicMemoryChecker::Config config;
168279
auto* systemConfig = SystemConfig::instance();
169-
config.systemMemPushbackEnabled =
170-
systemConfig->systemMemPushbackEnabled();
171-
config.systemMemLimitBytes =
172-
static_cast<uint64_t>(systemConfig->systemMemLimitGb()) << 30;
173-
config.systemMemShrinkBytes =
174-
static_cast<uint64_t>(systemConfig->systemMemShrinkGb()) << 30;
175-
return std::make_unique<LinuxMemoryChecker>(config).release();
280+
if (systemConfig->systemMemPushbackEnabled()) {
281+
PeriodicMemoryChecker::Config config;
282+
config.systemMemPushbackEnabled =
283+
systemConfig->systemMemPushbackEnabled();
284+
config.systemMemLimitBytes =
285+
static_cast<uint64_t>(systemConfig->systemMemLimitGb()) << 30;
286+
config.systemMemShrinkBytes =
287+
static_cast<uint64_t>(systemConfig->systemMemShrinkGb()) << 30;
288+
return std::make_unique<LinuxMemoryChecker>(config).release();
289+
}
290+
return nullptr;
176291
});
177292

178293
} // namespace facebook::presto

presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class PeriodicMemoryChecker {
7171

7272
/// Starts the 'PeriodicMemoryChecker'. A background scheduler will be
7373
/// launched to perform the checks. This should only be called once.
74-
void start();
74+
virtual void start();
7575

7676
/// Stops the 'PeriodicMemoryChecker'.
7777
void stop();

presto-native-execution/presto_cpp/main/common/Configs.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,9 @@ SystemConfig::SystemConfig() {
168168
STR_PROP(kSpillerDirectoryCreateConfig, ""),
169169
NONE_PROP(kSpillerSpillPath),
170170
NUM_PROP(kShutdownOnsetSec, 10),
171-
NUM_PROP(kSystemMemoryGb, 40),
171+
NUM_PROP(kSystemMemoryGb, 57),
172172
BOOL_PROP(kSystemMemPushbackEnabled, false),
173-
NUM_PROP(kSystemMemLimitGb, 55),
173+
NUM_PROP(kSystemMemLimitGb, 60),
174174
NUM_PROP(kSystemMemShrinkGb, 8),
175175
BOOL_PROP(kMallocMemHeapDumpEnabled, false),
176176
BOOL_PROP(kSystemMemPushbackAbortEnabled, false),

0 commit comments

Comments
 (0)