Skip to content

Commit

Permalink
cfgen: impelement devserver template
Browse files Browse the repository at this point in the history
Summary: This diff implement the template for devservers.

Differential Revision: D57588751

fbshipit-source-id: 6f1956984171f79242737fa9c8d312f3f6b88fc0
  • Loading branch information
Chengxiong Ruan authored and facebook-github-bot committed May 22, 2024
1 parent 3a28688 commit faf5964
Show file tree
Hide file tree
Showing 7 changed files with 434 additions and 15 deletions.
172 changes: 161 additions & 11 deletions src/oomd/cfgen/src/cfgen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@ mod types;
use libcfgen::prelude::*;
use types::*;

const CONFIG_VERSION: &str = "1.0.0";

fn oomd_json(node: &Node) -> json::JsonValue {
let attrs = get_attributes(node);
default_json_config(&attrs)
match attrs.host_type {
HostType::DevServer => devserver_json_config(node, &attrs),
HostType::OnDemand => od_json_config(&attrs),
_ => default_json_config(&attrs),
}
// TODO(chengxiong) add other templates
}

Expand All @@ -29,24 +35,45 @@ fn default_json_config(attrs: &ConfigParams) -> json::JsonValue {
rulesets.push(rule_protection_against_wdb_io_thrashing(attrs));
}
if !attrs.fbtax2.disable_swap_protection {
rulesets.push(rule_protection_against_low_swap(attrs));
rulesets.push(rule_fbtax2_protection_against_low_swap(attrs));
}
if attrs.senpai.target.is_some() {
rulesets.push(rule_senpai_ruleset(attrs));
}
rulesets.append(&mut attrs.fbtax2.oomd_extra_rulesets.clone());
rulesets.push(rule_senpai_drop_in_ruleset(attrs));
rulesets.push(rule_tw_container_drop_in_ruleset());
rulesets.push(rule_tw_container_drop_in_ruleset(attrs));

// TODO(chengxiong): add more rule sections
json::object! {
"rulesets": rulesets,
"version": "1.0.0",
"version": CONFIG_VERSION,
}
}

fn devserver_json_config(node: &Node, attrs: &ConfigParams) -> json::JsonValue {
let mut rulesets = json::Array::new();
rulesets.push(rule_system_overview(attrs));
rulesets.push(rule_user_session_protection(node, attrs));
if !attrs.oomd2.disable_swap_protection {
rulesets.push(rule_oomd2_protection_against_low_swap(attrs));
}
rulesets.push(rule_senpai_drop_in_ruleset(attrs));
rulesets.append(&mut rules_restart_cgroup_on_mem_threshold(attrs));
rulesets.push(rule_tw_container_drop_in_ruleset(attrs));
json::object! {
"rulesets": rulesets,
"version": CONFIG_VERSION,
}
}

fn od_json_config(_attrs: &ConfigParams) -> json::JsonValue {
// TODO(chengxiong): implement this.
json::object! {}
}

fn rule_system_overview(attrs: &ConfigParams) -> json::JsonValue {
let cgroup = if [HostType::ShellServer, HostType::OnDemand].contains(&attrs.host_type) {
let cgroup = if [HostType::DevServer, HostType::OnDemand].contains(&attrs.host_type) {
attrs.oomd2.oomd_target.as_str()
} else {
"workload.slice"
Expand Down Expand Up @@ -288,7 +315,7 @@ fn rule_protection_against_wdb_io_thrashing(attrs: &ConfigParams) -> json::JsonV
}
}

fn rule_protection_against_low_swap(attrs: &ConfigParams) -> json::JsonValue {
fn rule_fbtax2_protection_against_low_swap(attrs: &ConfigParams) -> json::JsonValue {
let mut detector = json::array! {
format!("free swap goes below {} percent", attrs.fbtax2.low_swap_threshold)
};
Expand Down Expand Up @@ -322,6 +349,33 @@ fn rule_protection_against_low_swap(attrs: &ConfigParams) -> json::JsonValue {
}
}

fn rule_oomd2_protection_against_low_swap(attrs: &ConfigParams) -> json::JsonValue {
json::object! {
"name": "protection against low swap",
"detectors": [
[
format!("free swap goes below {}%", attrs.oomd2.swap_protection_detect_threshold),
{
"name": "swap_free",
"args": {
"threshold_pct": attrs.oomd2.swap_protection_detect_threshold.as_str(),
}
}
]
],
"actions": [
{
"name": "kill_by_swap_usage",
"args": {
"cgroup": attrs.oomd2.kill_target.as_str(),
"threshold": attrs.oomd2.swap_protection_kill_threshold.as_str(),
"recursive": true,
}
}
]
}
}

fn rule_senpai_ruleset(attrs: &ConfigParams) -> json::JsonValue {
let mut action_args = json::object! {
"io_pressure_pct": attrs.senpai.io_pressure_pct.as_str(),
Expand Down Expand Up @@ -399,10 +453,9 @@ fn rule_senpai_drop_in_ruleset(attrs: &ConfigParams) -> json::JsonValue {
}
}

fn rule_tw_container_drop_in_ruleset() -> json::JsonValue {
json::object! {
fn rule_tw_container_drop_in_ruleset(attrs: &ConfigParams) -> json::JsonValue {
let mut rule = json::object! {
"name": "tw_container drop-in ruleset",
"prekill_hook_timeout": "45",
"drop-in": {
"detectors": true,
"actions": true,
Expand All @@ -423,6 +476,93 @@ fn rule_tw_container_drop_in_ruleset() -> json::JsonValue {
"args": {}
}
],
};

if attrs.host_type != HostType::DevServer {
rule["prekill_hook_timeout"] = json::JsonValue::String(String::from("45"));
}

rule
}

fn rule_user_session_protection(node: &Node, attrs: &ConfigParams) -> json::JsonValue {
let mut user_pressure_detector = json::array! {
format!("user pressure above {} for 300s", attrs.devserver.user_mempress),
{
"name": "pressure_above",
"args": {
"cgroup": "user.slice,workload.slice,www.slice",
"resource": "memory",
"threshold": attrs.devserver.user_mempress.as_str(),
"duration": "300",
}
},
};

let mut system_pressure_detector = json::array! {
format!("system pressure above {} for 300s", attrs.devserver.system_mempress),
{
"name": "pressure_above",
"args": {
"cgroup": "system.slice",
"resource": "memory",
"threshold": attrs.devserver.system_mempress.as_str(),
"duration": "300"
},
}
};

if node.in_dynamic_smc_tier("devbig") {
_ = user_pressure_detector.push(json::object! {
"name": "nr_dying_descendants",
"args": {
"cgroup": "/",
"count": "30000",
"lte": "true"
}
});

_ = system_pressure_detector.push(json::object! {
"name": "nr_dying_descendants",
"args": {
"cgroup": "/",
"count": "30000",
"lte": "true"
}
});
}

_ = user_pressure_detector.push(json::object! {
"name": "memory_reclaim",
"args": {
"cgroup": "user.slice,workload.slice,www.slice",
"duration": "30"
}
});

_ = system_pressure_detector.push(json::object! {
"name": "memory_reclaim",
"args": {
"cgroup": "system.slice",
"duration": "30"
}
});

json::object! {
"name": "user session protection",
"detectors": [
user_pressure_detector,
system_pressure_detector,
],
"actions": [
{
"name": "kill_by_memory_size_or_growth",
"args": {
"cgroup": attrs.oomd2.kill_target.as_str(),
"recursive": true,
}
}
]
}
}

Expand All @@ -445,6 +585,7 @@ fn get_attributes(node: &Node) -> ConfigParams {
oomd2: Oomd2Attributes {
blacklisted_jobs: Vec::new(),
disable_swap_protection: false,
kill_target: String::from("user.slice/,system.slice/,workload.slice/,www.slice/"),
plugins: convert_args!(btreemap!(
"pressure_above" => "pressure_above",
"pressure_rising_beyond" => "pressure_rising_beyond",
Expand All @@ -467,10 +608,14 @@ fn get_attributes(node: &Node) -> ConfigParams {
oomd_restart_threshold: oomd2_oomd_restart_threshold(),
oomd_reclaim_duation: String::from("10"),
oomd_post_action_delay: String::from("15"),
swap_protection_detect_threshold: String::from("5"),
swap_protection_kill_threshold: String::from("5"),
},
devserver: DevServerAttributes {
user_mempress: String::from("60"),
system_mempress: String::from("80"),
// TODO(chengxiong): add overriding logic for user_mempress and system_mempress.
// Like this: https://fburl.com/code/rjcg895c
user_mempress: String::from("40"),
system_mempress: String::from("60"),
},
senpai: SenpaiAttributes {
silence_logs: String::from("engine"),
Expand Down Expand Up @@ -567,6 +712,10 @@ fn get_host_type(node: &Node) -> HostType {
if node.hostname_prefix() == "twshared".into() {
return HostType::TwShared;
}

if node.is_devserver() {
return HostType::DevServer;
}
HostType::Default
}

Expand All @@ -586,6 +735,7 @@ mod tests {

#[rstest]
#[case::shard99("twshared2434.02.cco1", HostType::TwShared)]
#[case::shard99("devvm3170.cln0", HostType::DevServer)]
fn test_get_host_type(#[case] hostname: &str, #[case] expected: HostType) {
let node = FakeNodeBuilder::new().hostname(hostname).build();
assert_eq!(get_host_type(&node), expected);
Expand Down
5 changes: 4 additions & 1 deletion src/oomd/cfgen/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use libcfgen::prelude::json::JsonValue;
#[derive(Debug, PartialEq, Eq)]
pub enum HostType {
Default,
ShellServer,
DevServer,
OnDemand,
TwShared,
}
Expand Down Expand Up @@ -36,6 +36,7 @@ pub struct FBTax2Attributes {
pub struct Oomd2Attributes {
pub blacklisted_jobs: Vec<String>,
pub disable_swap_protection: bool,
pub kill_target: String,
pub plugins: BTreeMap<String, String>,
pub oomd_dry: bool,
pub oomd_disable_on_drop_in: bool,
Expand All @@ -49,6 +50,8 @@ pub struct Oomd2Attributes {
pub oomd_restart_threshold: BTreeMap<String, OomdRestartThreshold>,
pub oomd_reclaim_duation: String,
pub oomd_post_action_delay: String,
pub swap_protection_detect_threshold: String,
pub swap_protection_kill_threshold: String,
}

pub struct DevServerAttributes {
Expand Down
85 changes: 85 additions & 0 deletions src/oomd/cfgen/test/cfgen_test_inputs/devvm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
@generated SignedSource<<a0b9ff8a6a1b096881c6f45b66fb9c8e>>
@codegen-command arc cfgen update-inputs fb-oomd
{
"fqdn": "devvm3170.cln0.facebook.com",
"region": "ireland",
"clusterType": "DEV",
"modelId": 4077,
"kernelRelease": "5.19.0-0_fbk11_hardened_11538_g61e82533119f",
"serverType": "TYPE_VIRTUAL_MACHINE",
"experiments": [],
"cpuArchitecture": "",
"metalosRootfs": false,
"provisioningConfig": {
"ethtoolByInterface": {
"eth0": {
"maxChannelsCombined": 8
}
},
"cpuCoreCount": 72,
"parentModelId": 333282,
"recoveryEnvironment": false,
"deviceType": "SERVER",
"datacenter": "cln0",
"cluster": "01",
"memTotal": 240305004544,
"osVersion": {
"distribution_name": "CentOS Stream release",
"version": 9,
"is_in_ramdisk": false,
"is_metalos": false
},
"pciByAddress": {
"0000:00:02.0": {
"vendor_id": 6900,
"device_id": 4096,
"class_code": 131072
}
},
"static_smc_tiers": [],
"machine": "x86_64"
},
"bootConfig": {
"ethtoolByInterface": {
"eth0": {
"driver": "virtio_net",
"driver_version": "1.0.0",
"firmware_version": "",
"bus_info": "0000:00:02.0"
}
}
},
"runtimeConfig": {
"hasHighPrivCert": true,
"regionRoutableCluster": "cln1.02",
"block_devices": {
"block_devices": {
"vda": {
"vendor": "0x1af4",
"size_bytes": 1717986918400,
"is_rotational": true,
"physical_block_size": 512,
"logical_block_size": 512,
"is_root": true
}
}
},
"dynamic_smc_tiers": [],
"cluster_state": "CLUSTER_IN_USE",
"installed_platforms": [
"platform009",
"platform010",
"platform010-aarch64",
"platform010-compat"
],
"device_nics_enum": [
"ETH0",
"ETH1",
"SVC0"
]
},
"reservationConfig": {
"active_machine_materialization_id": "",
"current_reservation_host_profile_id": "NEWLY_PROVISIONED_PROFILE"
}
}
1 change: 1 addition & 0 deletions src/oomd/cfgen/test/cfgen_test_manifest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@
# You're only going to need it in rare cases, e.g. when new samples are added.
# `arc cfgen ui fb-oomd --remcmd-use-globalcert`
library_samples:
- devvm
- twshared_vll_shard00
# Add more samples from https://fburl.com/code/vjwmkoa1 if needed
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@generated SignedSource<<31b3f2f747768088bd5523d8e690bfac>>
@codegen-command arc cfgen update-outputs fb-oomd
[Service]
[Unit]
Loading

0 comments on commit faf5964

Please sign in to comment.