From b2f1c295fff5848fa286403d955fe2bce4607ef5 Mon Sep 17 00:00:00 2001 From: Daniel Lipovetsky Date: Fri, 9 Sep 2022 14:34:10 -0700 Subject: [PATCH 1/7] fix: Configure containerd runc plugin to use systemd cgroup driver konvoy-image-builder installs kubelet as a systemd service, and kubelet therefore uses the systemd cgroup driver. The container runtime must use the same cgroup driver. > Note: In v1.22, if the user is not setting the cgroupDriver field > under KubeletConfiguration, kubeadm will default it to systemd. > -- https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/configure-cgroup-driver/ --- ansible/roles/config/templates/config.toml.tmpl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/config/templates/config.toml.tmpl b/ansible/roles/config/templates/config.toml.tmpl index 5be9d8e1e..1eaacfa90 100644 --- a/ansible/roles/config/templates/config.toml.tmpl +++ b/ansible/roles/config/templates/config.toml.tmpl @@ -64,7 +64,6 @@ imports = ["/etc/containerd/conf.d/*.toml"] enable_selinux = false sandbox_image = "{{ pause_image }}" stats_collect_period = 10 - systemd_cgroup = false enable_tls_streaming = false max_container_log_line_size = 16384 disable_cgroup = false @@ -96,6 +95,8 @@ imports = ["/etc/containerd/conf.d/*.toml"] runtime_engine = "" runtime_root = "" privileged_without_host_devices = false + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + systemd_cgroup = true [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime] runtime_type = "io.containerd.runc.v1" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime.options] From eb6788cf1d97d88888e99513bb7b0296cc022fcd Mon Sep 17 00:00:00 2001 From: Daniel Lipovetsky Date: Mon, 12 Sep 2022 13:02:00 -0700 Subject: [PATCH 2/7] Use runtime type 'io.containerd.runc.v2' The 'io.containerd.runc.v1' runtime type was deprecated in containerd 1.4, and does not support the systemd cgroup driver. --- ansible/roles/config/templates/config.toml.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/config/templates/config.toml.tmpl b/ansible/roles/config/templates/config.toml.tmpl index 1eaacfa90..c5fc06bf1 100644 --- a/ansible/roles/config/templates/config.toml.tmpl +++ b/ansible/roles/config/templates/config.toml.tmpl @@ -91,7 +91,7 @@ imports = ["/etc/containerd/conf.d/*.toml"] privileged_without_host_devices = false [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] - runtime_type = "io.containerd.runc.v1" + runtime_type = "io.containerd.runc.v2" runtime_engine = "" runtime_root = "" privileged_without_host_devices = false From 42810c6471f2ddab0bed238d845a76abc931c498 Mon Sep 17 00:00:00 2001 From: Daniel Lipovetsky Date: Mon, 12 Sep 2022 13:02:55 -0700 Subject: [PATCH 3/7] Use systemd cgroup driver with the nvidia runtime The option should work, because the nvidia runtime is just a repackaged runc runtime. --- ansible/roles/config/templates/config.toml.tmpl | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/config/templates/config.toml.tmpl b/ansible/roles/config/templates/config.toml.tmpl index c5fc06bf1..25e4f155e 100644 --- a/ansible/roles/config/templates/config.toml.tmpl +++ b/ansible/roles/config/templates/config.toml.tmpl @@ -101,6 +101,7 @@ imports = ["/etc/containerd/conf.d/*.toml"] runtime_type = "io.containerd.runc.v1" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime.options] BinaryName = "{{ sysusr_prefix }}/bin/nvidia-container-runtime" + systemd_cgroup = true [plugins."io.containerd.grpc.v1.cri".cni] bin_dir = "/opt/cni/bin" conf_dir = "/etc/cni/net.d" From d9e4d09cfb5a83b09c3eea8bb1f0251151193fda Mon Sep 17 00:00:00 2001 From: Daniel Lipovetsky Date: Tue, 13 Sep 2022 10:25:25 -0700 Subject: [PATCH 4/7] Use SystemdCgroup instead of systemd_cgroup Using snake case has no effect. Confirmed independently by both @faiq and @dlipovetsky. --- ansible/roles/config/templates/config.toml.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/config/templates/config.toml.tmpl b/ansible/roles/config/templates/config.toml.tmpl index 25e4f155e..82873eac8 100644 --- a/ansible/roles/config/templates/config.toml.tmpl +++ b/ansible/roles/config/templates/config.toml.tmpl @@ -96,12 +96,12 @@ imports = ["/etc/containerd/conf.d/*.toml"] runtime_root = "" privileged_without_host_devices = false [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] - systemd_cgroup = true + SystemdCgroup = true [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime] runtime_type = "io.containerd.runc.v1" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime.options] BinaryName = "{{ sysusr_prefix }}/bin/nvidia-container-runtime" - systemd_cgroup = true + SystemdCgroup = true [plugins."io.containerd.grpc.v1.cri".cni] bin_dir = "/opt/cni/bin" conf_dir = "/etc/cni/net.d" From d9b1737cd84447df44a70d760e34e56951a6184c Mon Sep 17 00:00:00 2001 From: Daniel Lipovetsky Date: Tue, 13 Sep 2022 10:26:43 -0700 Subject: [PATCH 5/7] Use "io.containerd.runc.v2" for nvidia container runtime Only "io.containerd.runc.v2" supports the SystemdCgroup option we need. I have seen multiple examples of this configuration in the wild, and since nvidia-container-runtime is a wrapper around runc, there should be no issues with using v2 of the containerd wrapper to runc. --- ansible/roles/config/templates/config.toml.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/config/templates/config.toml.tmpl b/ansible/roles/config/templates/config.toml.tmpl index 82873eac8..cc343c3cb 100644 --- a/ansible/roles/config/templates/config.toml.tmpl +++ b/ansible/roles/config/templates/config.toml.tmpl @@ -98,7 +98,7 @@ imports = ["/etc/containerd/conf.d/*.toml"] [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] SystemdCgroup = true [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime] - runtime_type = "io.containerd.runc.v1" + runtime_type = "io.containerd.runc.v2" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime.options] BinaryName = "{{ sysusr_prefix }}/bin/nvidia-container-runtime" SystemdCgroup = true From 5c9997f1480a396d9c243076f769a7d2c65ccd60 Mon Sep 17 00:00:00 2001 From: faiq Date: Wed, 14 Sep 2022 10:21:03 -0700 Subject: [PATCH 6/7] fix: from poor merge --- ansible/roles/config/templates/config.toml.tmpl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/config/templates/config.toml.tmpl b/ansible/roles/config/templates/config.toml.tmpl index 36cf63675..a2fcd80df 100644 --- a/ansible/roles/config/templates/config.toml.tmpl +++ b/ansible/roles/config/templates/config.toml.tmpl @@ -91,6 +91,8 @@ imports = ["/etc/containerd/conf.d/*.toml"] runtime_engine = "" runtime_root = "" privileged_without_host_devices = false + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + SystemdCgroup = true [plugins."io.containerd.grpc.v1.cri".cni] bin_dir = "/opt/cni/bin" conf_dir = "/etc/cni/net.d" From c8906212c9ce7cffa43c252526fec96c30e06382 Mon Sep 17 00:00:00 2001 From: faiq Date: Thu, 15 Sep 2022 08:12:23 -0700 Subject: [PATCH 7/7] fix: allows cgroups v2 by default for flatcar --- ansible/roles/containerd/tasks/install-flatcar.yaml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/ansible/roles/containerd/tasks/install-flatcar.yaml b/ansible/roles/containerd/tasks/install-flatcar.yaml index 23fc63214..28350e66c 100644 --- a/ansible/roles/containerd/tasks/install-flatcar.yaml +++ b/ansible/roles/containerd/tasks/install-flatcar.yaml @@ -28,16 +28,3 @@ copy: content: "export PATH=$PATH:/run/torcx/unpack/docker/bin/" dest: "/etc/profile.d/my_path.sh" - -- name: ensure cgroups v2 are disabled - command: grep -q systemd.unified_cgroup_hierarchy=0 /usr/share/oem/grub.cfg - changed_when: no - failed_when: false - register: cgroupsv2_check - -- name: cgroups setup - when: - - cgroupsv2_check.rc != 0 - block: - - name: disable cgroups v2 - command: sed -E -i 's/^(set linux_append=.*)"$/\1 systemd.unified_cgroup_hierarchy=0 systemd.legacy_systemd_cgroup_controller"/g' /usr/share/oem/grub.cfg