From 424bac56d5f1813bddfbd89cfaa1fb61f3f72ac0 Mon Sep 17 00:00:00 2001 From: subhamkrai Date: Wed, 7 Sep 2022 10:22:42 +0530 Subject: [PATCH 1/2] core: check node count of 3 not pod count when checking if pods mons/osds are running on 3 three differents nodes we should count uniq nodes not unique pods see `{print $7}` instead of `{print $2}` Closes: #51 Signed-off-by: subhamkrai --- kubectl-rook-ceph.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubectl-rook-ceph.sh b/kubectl-rook-ceph.sh index 2d805e4e..e8b69dc9 100755 --- a/kubectl-rook-ceph.sh +++ b/kubectl-rook-ceph.sh @@ -309,7 +309,7 @@ function run_cluster_health() { function check_mon_pods_nodes() { info_msg " Checking if at least three mon pods are running on different nodes" - mon_unique_node_count=$(KUBECTL_NS_CLUSTER get pod | grep mon | grep -v canary | awk '{print $2}' | sort | uniq | wc -l) + mon_unique_node_count=$(KUBECTL_NS_CLUSTER get pod -o wide | grep mon | grep -v canary | awk '{print $7}' | sort | uniq | wc -l) if [ "$mon_unique_node_count" -lt 3 ]; then warn_msg " At least three mon pods should running on different nodes" fi @@ -330,7 +330,7 @@ function check_mon_quorum() { function check_osd_pod_count_and_nodes() { info_msg " Checking if at least three osd pods are running on different nodes" - osd_unique_node_count=$(KUBECTL_NS_CLUSTER get pod | grep osd | grep -v prepare | awk '{print $2}' | sort | uniq | wc -l) + osd_unique_node_count=$(KUBECTL_NS_CLUSTER get pod -o wide | grep osd | grep -v prepare | awk '{print $7}' | sort | uniq | wc -l) if [ "$osd_unique_node_count" -lt 3 ]; then warn_msg " At least three osd pods should running on different nodes" fi From a20f76f9ea132d36198351bef9898dbf4aab60c5 Mon Sep 17 00:00:00 2001 From: subhamkrai Date: Wed, 7 Sep 2022 10:27:04 +0530 Subject: [PATCH 2/2] clean: use method `run_ceph_command` to run ceph commands running ceph command separetly we can miss to add --connect-timeout arg so it better to use method `run_ceph_command` to run ceph commands with `--connect-timeout` arg. It will help exit when command is stuck. Signed-off-by: subhamkrai --- kubectl-rook-ceph.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubectl-rook-ceph.sh b/kubectl-rook-ceph.sh index e8b69dc9..ec5eef51 100755 --- a/kubectl-rook-ceph.sh +++ b/kubectl-rook-ceph.sh @@ -318,7 +318,7 @@ function check_mon_pods_nodes() { function check_mon_quorum() { info_msg " Checking mon quorum and ceph health details" - ceph_health_details=$(KUBECTL_NS_OPERATOR exec deploy/rook-ceph-operator -- ceph health detail --conf="$CEPH_CONF_PATH") + ceph_health_details=$(run_ceph_command health detail) if [[ "$ceph_health_details" = "HEALTH_OK" ]]; then echo -e "$ceph_health_details" elif [[ "$ceph_health_details" =~ "HEALTH_WARN" ]]; then @@ -354,7 +354,7 @@ function check_all_pods_status() { function check_pg_are_active_clean() { info_msg " checking placement group status" - pg_state=$(KUBECTL_NS_OPERATOR exec deploy/rook-ceph-operator -- ceph pg stat --conf="$CEPH_CONF_PATH") + pg_state=$(run_ceph_command pg stat) pg_state_code=$(echo "${pg_state}" | awk '{print $4}') if [[ "$pg_state_code" = *"active+clean;"* ]]; then info_msg " $pg_state"