From afb654a8e7fa38d1f99b43cabdd52a94adf21a44 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 10:57:02 +0100 Subject: [PATCH 001/147] Add project skeleton --- rethinkdb/CHANGELOG.md | 2 + rethinkdb/MANIFEST.in | 10 ++++ rethinkdb/README.md | 50 +++++++++++++++++ rethinkdb/assets/configuration/spec.yaml | 10 ++++ rethinkdb/assets/service_checks.json | 1 + rethinkdb/datadog_checks/__init__.py | 4 ++ .../datadog_checks/rethinkdb/__about__.py | 4 ++ .../datadog_checks/rethinkdb/__init__.py | 7 +++ .../rethinkdb/data/conf.yaml.example | 30 +++++++++++ .../datadog_checks/rethinkdb/rethinkdb.py | 9 ++++ rethinkdb/manifest.json | 33 ++++++++++++ rethinkdb/metadata.csv | 1 + rethinkdb/requirements-dev.txt | 1 + rethinkdb/requirements.in | 0 rethinkdb/setup.py | 54 +++++++++++++++++++ rethinkdb/tests/__init__.py | 3 ++ rethinkdb/tests/conftest.py | 14 +++++ rethinkdb/tests/test_rethinkdb.py | 11 ++++ rethinkdb/tox.ini | 22 ++++++++ 19 files changed, 266 insertions(+) create mode 100644 rethinkdb/CHANGELOG.md create mode 100644 rethinkdb/MANIFEST.in create mode 100644 rethinkdb/README.md create mode 100644 rethinkdb/assets/configuration/spec.yaml create mode 100644 rethinkdb/assets/service_checks.json create mode 100644 rethinkdb/datadog_checks/__init__.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/__about__.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/__init__.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example create mode 100644 rethinkdb/datadog_checks/rethinkdb/rethinkdb.py create mode 100644 rethinkdb/manifest.json create mode 100644 rethinkdb/metadata.csv create mode 100644 rethinkdb/requirements-dev.txt create mode 100644 rethinkdb/requirements.in create mode 100644 rethinkdb/setup.py create mode 100644 rethinkdb/tests/__init__.py create mode 100644 rethinkdb/tests/conftest.py create mode 100644 rethinkdb/tests/test_rethinkdb.py create mode 100644 rethinkdb/tox.ini diff --git a/rethinkdb/CHANGELOG.md b/rethinkdb/CHANGELOG.md new file mode 100644 index 0000000000000..77e25dcd26c5c --- /dev/null +++ b/rethinkdb/CHANGELOG.md @@ -0,0 +1,2 @@ +# CHANGELOG - RethinkDB + diff --git a/rethinkdb/MANIFEST.in b/rethinkdb/MANIFEST.in new file mode 100644 index 0000000000000..6fa1c2388a4eb --- /dev/null +++ b/rethinkdb/MANIFEST.in @@ -0,0 +1,10 @@ +graft datadog_checks +graft tests + +include MANIFEST.in +include README.md +include requirements.in +include requirements-dev.txt +include manifest.json + +global-exclude *.py[cod] __pycache__ diff --git a/rethinkdb/README.md b/rethinkdb/README.md new file mode 100644 index 0000000000000..1b27f43411b8d --- /dev/null +++ b/rethinkdb/README.md @@ -0,0 +1,50 @@ +# Agent Check: RethinkDB + +## Overview + +This check monitors [RethinkDB][1] through the Datadog Agent. + +## Setup + +Follow the instructions below to install and configure this check for an Agent running on a host. For containerized environments, see the [Autodiscovery Integration Templates][2] for guidance on applying these instructions. + +### Installation + +The RethinkDB check is included in the [Datadog Agent][2] package. +No additional installation is needed on your server. + +### Configuration + +1. Edit the `rethinkdb.d/conf.yaml` file, in the `conf.d/` folder at the root of your Agent's configuration directory to start collecting your rethinkdb performance data. See the [sample rethinkdb.d/conf.yaml][3] for all available configuration options. + +2. [Restart the Agent][4]. + +### Validation + +[Run the Agent's status subcommand][5] and look for `rethinkdb` under the Checks section. + +## Data Collected + +### Metrics + +See [metadata.csv][6] for a list of metrics provided by this check. + +### Service Checks + +RethinkDB does not include any service checks. + +### Events + +RethinkDB does not include any events. + +## Troubleshooting + +Need help? Contact [Datadog support][7]. + +[1]: **LINK_TO_INTEGRATION_SITE** +[2]: https://docs.datadoghq.com/agent/autodiscovery/integrations +[3]: https://github.com/DataDog/integrations-core/blob/master/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example +[4]: https://docs.datadoghq.com/agent/guide/agent-commands/#start-stop-and-restart-the-agent +[5]: https://docs.datadoghq.com/agent/guide/agent-commands/#agent-status-and-information +[6]: https://github.com/DataDog/integrations-core/blob/master/rethinkdb/metadata.csv +[7]: https://docs.datadoghq.com/help diff --git a/rethinkdb/assets/configuration/spec.yaml b/rethinkdb/assets/configuration/spec.yaml new file mode 100644 index 0000000000000..d06d5e70c68c4 --- /dev/null +++ b/rethinkdb/assets/configuration/spec.yaml @@ -0,0 +1,10 @@ +name: RethinkDB +files: +- name: rethinkdb.yaml + options: + - template: init_config + options: [] + - template: instances + options: + - template: instances/tags + - template: instances/global diff --git a/rethinkdb/assets/service_checks.json b/rethinkdb/assets/service_checks.json new file mode 100644 index 0000000000000..fe51488c7066f --- /dev/null +++ b/rethinkdb/assets/service_checks.json @@ -0,0 +1 @@ +[] diff --git a/rethinkdb/datadog_checks/__init__.py b/rethinkdb/datadog_checks/__init__.py new file mode 100644 index 0000000000000..cdddf032324d5 --- /dev/null +++ b/rethinkdb/datadog_checks/__init__.py @@ -0,0 +1,4 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +__path__ = __import__('pkgutil').extend_path(__path__, __name__) # type: ignore diff --git a/rethinkdb/datadog_checks/rethinkdb/__about__.py b/rethinkdb/datadog_checks/rethinkdb/__about__.py new file mode 100644 index 0000000000000..e675c84da2568 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/__about__.py @@ -0,0 +1,4 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +__version__ = '0.0.1' diff --git a/rethinkdb/datadog_checks/rethinkdb/__init__.py b/rethinkdb/datadog_checks/rethinkdb/__init__.py new file mode 100644 index 0000000000000..38642a2b1799b --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/__init__.py @@ -0,0 +1,7 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from .__about__ import __version__ +from .rethinkdb import RethinkdbCheck + +__all__ = ['__version__', 'RethinkdbCheck'] diff --git a/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example new file mode 100644 index 0000000000000..745b4afccafd9 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example @@ -0,0 +1,30 @@ +## All options defined here are available to all instances. +# +init_config: + +## Every instance is scheduled independent of the others. +# +instances: + + - + ## @param tags - list of strings - optional + ## A list of tags to attach to every metric and service check emitted by this instance. + ## + ## Learn more about tagging at https://docs.datadoghq.com/tagging + # + # tags: + # - : + # - : + + ## @param min_collection_interval - number - optional - default: 15 + ## This changes the collection interval of the check. For more information, see: + ## https://docs.datadoghq.com/developers/write_agent_check/#collection-interval + # + # min_collection_interval: 15 + + ## @param empty_default_hostname - boolean - optional - default: false + ## This forces the check to send metrics with no hostname. + ## + ## This is useful for cluster-level checks. + # + # empty_default_hostname: false diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py new file mode 100644 index 0000000000000..85756a4aa410b --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -0,0 +1,9 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from datadog_checks.base import AgentCheck + + +class RethinkdbCheck(AgentCheck): + def check(self, instance): + pass diff --git a/rethinkdb/manifest.json b/rethinkdb/manifest.json new file mode 100644 index 0000000000000..65b6e0d56f06f --- /dev/null +++ b/rethinkdb/manifest.json @@ -0,0 +1,33 @@ +{ + "display_name": "RethinkDB", + "maintainer": "help@datadoghq.com", + "manifest_version": "1.0.0", + "name": "rethinkdb", + "metric_prefix": "rethinkdb.", + "metric_to_check": "", + "creates_events": false, + "short_description": "", + "guid": "a09f3ed3-c947-413c-a9c6-0dcb641ea890", + "support": "core", + "supported_os": [ + "linux", + "mac_os", + "windows" + ], + "public_title": "Datadog-RethinkDB Integration", + "categories": [ + "" + ], + "type": "check", + "is_public": false, + "integration_id": "rethinkdb", + "assets": { + "configuration": { + "spec": "assets/configuration/spec.yaml" + }, + "dashboards": {}, + "monitors": {}, + "saved_views": {}, + "service_checks": "assets/service_checks.json" + } +} diff --git a/rethinkdb/metadata.csv b/rethinkdb/metadata.csv new file mode 100644 index 0000000000000..ae0af074191ec --- /dev/null +++ b/rethinkdb/metadata.csv @@ -0,0 +1 @@ +metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name diff --git a/rethinkdb/requirements-dev.txt b/rethinkdb/requirements-dev.txt new file mode 100644 index 0000000000000..98b5456bbd0e2 --- /dev/null +++ b/rethinkdb/requirements-dev.txt @@ -0,0 +1 @@ +-e ../datadog_checks_dev diff --git a/rethinkdb/requirements.in b/rethinkdb/requirements.in new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/rethinkdb/setup.py b/rethinkdb/setup.py new file mode 100644 index 0000000000000..114b8ac239325 --- /dev/null +++ b/rethinkdb/setup.py @@ -0,0 +1,54 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from codecs import open # To use a consistent encoding +from os import path + +from setuptools import setup + +HERE = path.dirname(path.abspath(__file__)) + +# Get version info +ABOUT = {} +with open(path.join(HERE, 'datadog_checks', 'rethinkdb', '__about__.py')) as f: + exec(f.read(), ABOUT) + +# Get the long description from the README file +with open(path.join(HERE, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + + +CHECKS_BASE_REQ = 'datadog-checks-base>=4.2.0' + + +setup( + name='datadog-rethinkdb', + version=ABOUT['__version__'], + description='The RethinkDB check', + long_description=long_description, + long_description_content_type='text/markdown', + keywords='datadog agent rethinkdb check', + # The project's main homepage. + url='https://github.com/DataDog/integrations-core', + # Author details + author='Datadog', + author_email='packages@datadoghq.com', + # License + license='BSD-3-Clause', + # See https://pypi.org/classifiers + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: System Administrators', + 'Topic :: System :: Monitoring', + 'License :: OSI Approved :: BSD License', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.7', + ], + # The package we're going to ship + packages=['datadog_checks.rethinkdb'], + # Run-time dependencies + install_requires=[CHECKS_BASE_REQ], + # Extra files to ship with the wheel package + include_package_data=True, +) diff --git a/rethinkdb/tests/__init__.py b/rethinkdb/tests/__init__.py new file mode 100644 index 0000000000000..46dd167dcde48 --- /dev/null +++ b/rethinkdb/tests/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py new file mode 100644 index 0000000000000..63c9071c99816 --- /dev/null +++ b/rethinkdb/tests/conftest.py @@ -0,0 +1,14 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import pytest + + +@pytest.fixture(scope='session') +def dd_environment(): + yield + + +@pytest.fixture +def instance(): + return {} diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py new file mode 100644 index 0000000000000..b67804196d10b --- /dev/null +++ b/rethinkdb/tests/test_rethinkdb.py @@ -0,0 +1,11 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from datadog_checks.rethinkdb import RethinkdbCheck + + +def test_check(aggregator, instance): + check = RethinkdbCheck('rethinkdb', {}, {}) + check.check(instance) + + aggregator.assert_all_metrics_covered() diff --git a/rethinkdb/tox.ini b/rethinkdb/tox.ini new file mode 100644 index 0000000000000..ac9110708622e --- /dev/null +++ b/rethinkdb/tox.ini @@ -0,0 +1,22 @@ +[tox] +minversion = 2.0 +skip_missing_interpreters = true +basepython = py37 +envlist = + py{27,37} + +[testenv] +dd_check_style = true +dd_check_types = true +dd_mypy_args = --py2 datadog_checks/ +usedevelop = true +platform = linux|darwin|win32 +deps = + -e../datadog_checks_base[deps] + -rrequirements-dev.txt +passenv = + DOCKER* + COMPOSE* +commands = + pip install -r requirements.in + pytest -v {posargs} From 324b4730def4ed7a4aeea5b7c4f67208737008f3 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 12:19:54 +0100 Subject: [PATCH 002/147] Add initial type hints --- rethinkdb/datadog_checks/rethinkdb/rethinkdb.py | 3 +++ rethinkdb/tests/test_rethinkdb.py | 4 ++++ rethinkdb/tox.ini | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 85756a4aa410b..1a37073dee713 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -1,9 +1,12 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +from typing import Any, Dict + from datadog_checks.base import AgentCheck class RethinkdbCheck(AgentCheck): def check(self, instance): + # type: (Dict[str, Any]) -> None pass diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index b67804196d10b..e1b7357ae57d2 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -1,10 +1,14 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +from typing import Any, Dict + +from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.rethinkdb import RethinkdbCheck def test_check(aggregator, instance): + # type: (AggregatorStub, Dict[str, Any]) -> None check = RethinkdbCheck('rethinkdb', {}, {}) check.check(instance) diff --git a/rethinkdb/tox.ini b/rethinkdb/tox.ini index ac9110708622e..738ed85b98720 100644 --- a/rethinkdb/tox.ini +++ b/rethinkdb/tox.ini @@ -8,7 +8,7 @@ envlist = [testenv] dd_check_style = true dd_check_types = true -dd_mypy_args = --py2 datadog_checks/ +dd_mypy_args = --py2 datadog_checks/ tests/ usedevelop = true platform = linux|darwin|win32 deps = From 78e79e1e915989d22cadc9b8840e85ae69458a8a Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 12:37:21 +0100 Subject: [PATCH 003/147] Add rethinkdb to test-all-checks.yml --- .azure-pipelines/templates/test-all-checks.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.azure-pipelines/templates/test-all-checks.yml b/.azure-pipelines/templates/test-all-checks.yml index 8862fc3babfd7..5ebeba602da04 100644 --- a/.azure-pipelines/templates/test-all-checks.yml +++ b/.azure-pipelines/templates/test-all-checks.yml @@ -305,6 +305,9 @@ jobs: - checkName: redisdb displayName: Redis os: linux + - checkName: rethinkdb + displayName: RethinkDB + os: linux - checkName: riak displayName: Riak os: linux From 2f34ca998f7971122356d295b358bdd74f165765 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 13:44:19 +0100 Subject: [PATCH 004/147] Add short description --- rethinkdb/manifest.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rethinkdb/manifest.json b/rethinkdb/manifest.json index 65b6e0d56f06f..c1def93c6f792 100644 --- a/rethinkdb/manifest.json +++ b/rethinkdb/manifest.json @@ -6,7 +6,7 @@ "metric_prefix": "rethinkdb.", "metric_to_check": "", "creates_events": false, - "short_description": "", + "short_description": "Collect status, performance and other metrics from a RethinkDB cluster.", "guid": "a09f3ed3-c947-413c-a9c6-0dcb641ea890", "support": "core", "supported_os": [ From 1791c1f5eb8f532de823e5b19e66f72be9efa885 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 13:53:40 +0100 Subject: [PATCH 005/147] Rename check class: Rethinkdb -> RethinkDB --- rethinkdb/datadog_checks/rethinkdb/__init__.py | 4 ++-- rethinkdb/datadog_checks/rethinkdb/rethinkdb.py | 2 +- rethinkdb/tests/test_rethinkdb.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/__init__.py b/rethinkdb/datadog_checks/rethinkdb/__init__.py index 38642a2b1799b..bb253cfad3fda 100644 --- a/rethinkdb/datadog_checks/rethinkdb/__init__.py +++ b/rethinkdb/datadog_checks/rethinkdb/__init__.py @@ -2,6 +2,6 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) from .__about__ import __version__ -from .rethinkdb import RethinkdbCheck +from .rethinkdb import RethinkDBCheck -__all__ = ['__version__', 'RethinkdbCheck'] +__all__ = ['__version__', 'RethinkDBCheck'] diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 1a37073dee713..002ffc7749268 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -6,7 +6,7 @@ from datadog_checks.base import AgentCheck -class RethinkdbCheck(AgentCheck): +class RethinkDBCheck(AgentCheck): def check(self, instance): # type: (Dict[str, Any]) -> None pass diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index e1b7357ae57d2..4dabdef346978 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -4,12 +4,12 @@ from typing import Any, Dict from datadog_checks.base.stubs.aggregator import AggregatorStub -from datadog_checks.rethinkdb import RethinkdbCheck +from datadog_checks.rethinkdb import RethinkDBCheck def test_check(aggregator, instance): # type: (AggregatorStub, Dict[str, Any]) -> None - check = RethinkdbCheck('rethinkdb', {}, {}) + check = RethinkDBCheck('rethinkdb', {}, {}) check.check(instance) aggregator.assert_all_metrics_covered() From a86ff28eff0ce1d5c9432c9a3c80484a3606cc28 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 13:56:30 +0100 Subject: [PATCH 006/147] Add dependency on rethinkdb (Python driver) --- .../datadog_checks/base/data/agent_requirements.in | 1 + rethinkdb/requirements.in | 1 + 2 files changed, 2 insertions(+) diff --git a/datadog_checks_base/datadog_checks/base/data/agent_requirements.in b/datadog_checks_base/datadog_checks/base/data/agent_requirements.in index 55b6fc1b2cf92..edbd40b0383d8 100644 --- a/datadog_checks_base/datadog_checks/base/data/agent_requirements.in +++ b/datadog_checks_base/datadog_checks/base/data/agent_requirements.in @@ -59,6 +59,7 @@ redis==3.3.11 requests==2.22.0 requests-kerberos==0.12.0 requests_ntlm==1.1.0 +rethinkdb==2.4.4 scandir==1.8 securesystemslib[crypto,pynacl]==0.14.0 selectors34==1.2.0; sys_platform == 'win32' and python_version < '3.4' diff --git a/rethinkdb/requirements.in b/rethinkdb/requirements.in index e69de29bb2d1d..5b339f2f0d300 100644 --- a/rethinkdb/requirements.in +++ b/rethinkdb/requirements.in @@ -0,0 +1 @@ +rethinkdb==2.4.4 From fbd6ff294ebe2d138a9c4254c5e374ab7d600269 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 14:03:52 +0100 Subject: [PATCH 007/147] Update signature used to instantiate check --- rethinkdb/tests/test_rethinkdb.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 4dabdef346978..5fde082dea4d2 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -7,9 +7,10 @@ from datadog_checks.rethinkdb import RethinkDBCheck -def test_check(aggregator, instance): - # type: (AggregatorStub, Dict[str, Any]) -> None - check = RethinkDBCheck('rethinkdb', {}, {}) +def test_check(aggregator): + # type: (AggregatorStub) -> None + instance = {} # type: Dict[str, Any] + check = RethinkDBCheck('rethinkdb', {}, [instance]) check.check(instance) aggregator.assert_all_metrics_covered() From 0f6a392e30b2374ab38bdddc8426fff41fd8f3e1 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 14:59:57 +0100 Subject: [PATCH 008/147] Use Python 3.8 instead of 3.7 for Tox --- rethinkdb/tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rethinkdb/tox.ini b/rethinkdb/tox.ini index 738ed85b98720..d3fcb0fed7b8f 100644 --- a/rethinkdb/tox.ini +++ b/rethinkdb/tox.ini @@ -1,9 +1,9 @@ [tox] minversion = 2.0 skip_missing_interpreters = true -basepython = py37 +basepython = py38 envlist = - py{27,37} + py{27,38} [testenv] dd_check_style = true From 3ae186536d3f47bf0eacd608571c6c69844388f0 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 15:02:33 +0100 Subject: [PATCH 009/147] Setup Docker Compose --- rethinkdb/tests/common.py | 17 ++++++++++++++++ rethinkdb/tests/compose/docker-compose.yaml | 9 +++++++++ rethinkdb/tests/conftest.py | 22 +++++++++++++++++---- 3 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 rethinkdb/tests/common.py create mode 100644 rethinkdb/tests/compose/docker-compose.yaml diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py new file mode 100644 index 0000000000000..58f066947f947 --- /dev/null +++ b/rethinkdb/tests/common.py @@ -0,0 +1,17 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import os + +from datadog_checks.utils.common import get_docker_hostname + +HERE = os.path.dirname(os.path.abspath(__file__)) +ROOT = os.path.dirname(os.path.dirname(HERE)) + +CHECK_NAME = 'rethinkdb' + +IMAGE = 'rethinkdb:2.4.0' +CONTAINER_NAME = 'rethinkdb' + +HOST = get_docker_hostname() +PORT = 28015 diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml new file mode 100644 index 0000000000000..676ffbfbe44c9 --- /dev/null +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -0,0 +1,9 @@ +version: "3" + +services: + rethinkdb: + tty: true # Required otherwise RethinkDB won't output any logs. + image: ${RETHINKDB_IMAGE} + container_name: ${RETHINKDB_CONTAINER_NAME} + ports: + - ${RETHINKDB_PORT}:28015 diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 63c9071c99816..f82df441520cb 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -1,14 +1,28 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import os +from typing import Any, Dict, Iterator + import pytest +from datadog_checks.dev import docker_run + +from .common import CONTAINER_NAME, HERE, IMAGE, PORT + @pytest.fixture(scope='session') def dd_environment(): - yield + # type: () -> Iterator[dict] + compose_file = os.path.join(HERE, 'compose', 'docker-compose.yaml') + env_vars = { + 'RETHINKDB_PORT': str(PORT), + 'RETHINKDB_IMAGE': IMAGE, + 'RETHINKDB_CONTAINER_NAME': CONTAINER_NAME, + } -@pytest.fixture -def instance(): - return {} + with docker_run(compose_file, env_vars=env_vars, log_patterns=[r'Server ready.*']): + instance = {} # type: Dict[str, Any] + config = {'instances': [instance]} + yield config From 9d9a6e5ba7b30831c52166d9b8ba9c168fe741a4 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 15:08:17 +0100 Subject: [PATCH 010/147] Add E2E test --- rethinkdb/tests/test_e2e.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 rethinkdb/tests/test_e2e.py diff --git a/rethinkdb/tests/test_e2e.py b/rethinkdb/tests/test_e2e.py new file mode 100644 index 0000000000000..a979810fb7154 --- /dev/null +++ b/rethinkdb/tests/test_e2e.py @@ -0,0 +1,11 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import pytest + + +@pytest.mark.e2e +def test_check_ok(dd_agent_check): + aggregator = dd_agent_check(rate=True) + aggregator.assert_all_metrics_covered() From 72e91309cfb69423ab2bb630ea9cadb2ae58e99d Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 16:13:19 +0100 Subject: [PATCH 011/147] Submit service check --- rethinkdb/datadog_checks/rethinkdb/rethinkdb.py | 17 +++++++++++++++-- rethinkdb/tests/common.py | 1 + rethinkdb/tests/compose/docker-compose.yaml | 1 + rethinkdb/tests/conftest.py | 3 ++- rethinkdb/tests/test_rethinkdb.py | 7 +++++++ 5 files changed, 26 insertions(+), 3 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 002ffc7749268..5d51061206a01 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -1,7 +1,10 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Dict +from contextlib import contextmanager +from typing import Any, Dict, Iterator + +import rethinkdb from datadog_checks.base import AgentCheck @@ -9,4 +12,14 @@ class RethinkDBCheck(AgentCheck): def check(self, instance): # type: (Dict[str, Any]) -> None - pass + with _connect(database='rethinkdb', host='localhost', port=28015) as conn: + server = conn.server() # type: Dict[str, Any] + tags = ['server:{}'.format(server['name'])] + self.service_check('rethinkdb.can_connect', self.OK, tags=tags) + + +@contextmanager +def _connect(database, host, port): + # type: (str, str, int) -> Iterator[rethinkdb.net.Connection] + with rethinkdb.r.connect(db=database, host=host, port=port) as conn: + yield conn diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 58f066947f947..925f39e55973f 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -12,6 +12,7 @@ IMAGE = 'rethinkdb:2.4.0' CONTAINER_NAME = 'rethinkdb' +SERVER_NAME = 'server0' HOST = get_docker_hostname() PORT = 28015 diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml index 676ffbfbe44c9..2d626858a70e4 100644 --- a/rethinkdb/tests/compose/docker-compose.yaml +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -5,5 +5,6 @@ services: tty: true # Required otherwise RethinkDB won't output any logs. image: ${RETHINKDB_IMAGE} container_name: ${RETHINKDB_CONTAINER_NAME} + command: rethinkdb --bind all --server-name ${RETHINKDB_SERVER_NAME} ports: - ${RETHINKDB_PORT}:28015 diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index f82df441520cb..a4b50ddc9d3f4 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -8,7 +8,7 @@ from datadog_checks.dev import docker_run -from .common import CONTAINER_NAME, HERE, IMAGE, PORT +from .common import CONTAINER_NAME, HERE, IMAGE, PORT, SERVER_NAME @pytest.fixture(scope='session') @@ -20,6 +20,7 @@ def dd_environment(): 'RETHINKDB_PORT': str(PORT), 'RETHINKDB_IMAGE': IMAGE, 'RETHINKDB_CONTAINER_NAME': CONTAINER_NAME, + 'RETHINKDB_SERVER_NAME': SERVER_NAME, } with docker_run(compose_file, env_vars=env_vars, log_patterns=[r'Server ready.*']): diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 5fde082dea4d2..2debc84d4ceb7 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -3,10 +3,16 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from typing import Any, Dict +import pytest + from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.rethinkdb import RethinkDBCheck +from .common import SERVER_NAME + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') def test_check(aggregator): # type: (AggregatorStub) -> None instance = {} # type: Dict[str, Any] @@ -14,3 +20,4 @@ def test_check(aggregator): check.check(instance) aggregator.assert_all_metrics_covered() + aggregator.assert_service_check('rethinkdb.can_connect', count=1, tags=['server:{}'.format(SERVER_NAME)]) From 699b0732a70e69daebfea26d2c9e5a2b8b7b4c05 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 16:18:55 +0100 Subject: [PATCH 012/147] Allow viewing admin UI on localhost --- rethinkdb/tests/compose/docker-compose.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml index 2d626858a70e4..3d0d43c266b01 100644 --- a/rethinkdb/tests/compose/docker-compose.yaml +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -8,3 +8,4 @@ services: command: rethinkdb --bind all --server-name ${RETHINKDB_SERVER_NAME} ports: - ${RETHINKDB_PORT}:28015 + - 8080:8080 # Not used by tests, but allows accessing the RethinkDB admin web UI at 'localhost:8080' on the host machine. From 79c5d6949147cfcefd9db553a707a8182fb873bc Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 17:02:34 +0100 Subject: [PATCH 013/147] =?UTF-8?q?=F0=9F=94=A5=F0=9F=90=8D2=EF=B8=8F?= =?UTF-8?q?=E2=83=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rethinkdb/datadog_checks/rethinkdb/rethinkdb.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 5d51061206a01..901a6bc470f2f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -5,9 +5,25 @@ from typing import Any, Dict, Iterator import rethinkdb +import six from datadog_checks.base import AgentCheck +try: + rethinkdb.r +except AttributeError: + if not six.PY2: + # This would be unexpected. + raise + + # HACK: running `import rethinkdb` on Python 2.7 made it import our `rethinkdb` package, + # instead of the RethinkDB Python client package. Let's hack our way around this. + # NOTE: we deal with this edge case in an 'except' block (instead of proactively checking for `six.PY2`) so that + # IDEs and linters don't get confused. + import importlib + + rethinkdb = importlib.import_module('rethinkdb') # type: ignore + class RethinkDBCheck(AgentCheck): def check(self, instance): From cc5a2e1d564a1b58e4cfcc1fecd98f896673c44f Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 17:07:37 +0100 Subject: [PATCH 014/147] Simplify Python 2 import fix --- .../datadog_checks/rethinkdb/rethinkdb.py | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 901a6bc470f2f..bf124132a6624 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -1,29 +1,17 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) + +# Required for `import rethinkdb` to import the Python client instead of this package on Python 2. +from __future__ import absolute_import + from contextlib import contextmanager from typing import Any, Dict, Iterator import rethinkdb -import six from datadog_checks.base import AgentCheck -try: - rethinkdb.r -except AttributeError: - if not six.PY2: - # This would be unexpected. - raise - - # HACK: running `import rethinkdb` on Python 2.7 made it import our `rethinkdb` package, - # instead of the RethinkDB Python client package. Let's hack our way around this. - # NOTE: we deal with this edge case in an 'except' block (instead of proactively checking for `six.PY2`) so that - # IDEs and linters don't get confused. - import importlib - - rethinkdb = importlib.import_module('rethinkdb') # type: ignore - class RethinkDBCheck(AgentCheck): def check(self, instance): From 678ed5759550f75c037525c7f3027dd369ae9bb3 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 12 Feb 2020 17:45:24 +0100 Subject: [PATCH 015/147] Refactor submission of service check --- .../datadog_checks/rethinkdb/rethinkdb.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index bf124132a6624..6c53159bc779e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -14,12 +14,22 @@ class RethinkDBCheck(AgentCheck): + @contextmanager + def _submit_service_check(self): + # type: () -> Iterator[None] + try: + yield + except rethinkdb.errors.ReqlDriverError: + self.service_check('rethinkdb.can_connect', self.CRITICAL) + raise + else: + self.service_check('rethinkdb.can_connect', self.OK) + def check(self, instance): # type: (Dict[str, Any]) -> None - with _connect(database='rethinkdb', host='localhost', port=28015) as conn: - server = conn.server() # type: Dict[str, Any] - tags = ['server:{}'.format(server['name'])] - self.service_check('rethinkdb.can_connect', self.OK, tags=tags) + with self._submit_service_check(): + with _connect(database='rethinkdb', host='localhost', port=28015): + pass @contextmanager From 2f481a774c9a611cfac629738948e78ee30ae172 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 13 Feb 2020 10:54:41 +0100 Subject: [PATCH 016/147] Collect cluster and server statistics --- .../datadog_checks/rethinkdb/rethinkdb.py | 118 ++++++++++++++++-- rethinkdb/datadog_checks/rethinkdb/types.py | 51 ++++++++ rethinkdb/tests/common.py | 61 +++++++++ rethinkdb/tests/test_rethinkdb.py | 8 +- 4 files changed, 228 insertions(+), 10 deletions(-) create mode 100644 rethinkdb/datadog_checks/rethinkdb/types.py diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 6c53159bc779e..ca68cc320c6c7 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -6,12 +6,14 @@ from __future__ import absolute_import from contextlib import contextmanager -from typing import Any, Dict, Iterator +from typing import Any, Dict, Iterator, Tuple import rethinkdb from datadog_checks.base import AgentCheck +from .types import ClusterStats, EqJoinRow, Server, ServerStats, Table, TableStats + class RethinkDBCheck(AgentCheck): @contextmanager @@ -28,12 +30,112 @@ def _submit_service_check(self): def check(self, instance): # type: (Dict[str, Any]) -> None with self._submit_service_check(): - with _connect(database='rethinkdb', host='localhost', port=28015): - pass + with rethinkdb.r.connect(db='rethinkdb', host='localhost', port=28015) as conn: + self._collect_statistics(conn) + self._collect_statuses(conn) + self._collect_jobs(conn) + self._collect_current_issues(conn) + + def _collect_statistics(self, conn): + # type: (rethinkdb.net.Connection) -> None + self._collect_cluster_statistics(conn) + self._collect_servers_statistics(conn) + self._collect_table_statistics(conn) + self._collect_replicas_statistics(conn) + + def _collect_cluster_statistics(self, conn): + # type: (rethinkdb.net.Connection) -> None + stats = rethinkdb.r.table('stats').get(['cluster']).run(conn) # type: ClusterStats + query_engine = stats['query_engine'] + + self.rate('rethinkdb.stats.cluster.queries_per_sec', value=query_engine['queries_per_sec']) + self.rate('rethinkdb.stats.cluster.read_docs_per_sec', value=query_engine['read_docs_per_sec']) + self.rate('rethinkdb.stats.cluster.written_docs_per_sec', value=query_engine['written_docs_per_sec']) + + def _collect_servers_statistics(self, conn): + # type: (rethinkdb.net.Connection) -> None + for server, stats in _query_server_stats(conn): + name = server['name'] + server_tags = server['tags'] + query_engine = stats['query_engine'] + + tags = ['server:{}'.format(name)] + server_tags + + self.gauge('rethinkdb.stats.server.client_connections', value=query_engine['client_connections'], tags=tags) + self.gauge('rethinkdb.stats.server.clients_active', value=query_engine['clients_active'], tags=tags) + + self.rate('rethinkdb.stats.server.queries_per_sec', value=query_engine['queries_per_sec'], tags=tags) + self.monotonic_count('rethinkdb.stats.server.queries_total', query_engine['queries_total'], tags=tags) + + self.rate('rethinkdb.stats.server.read_docs_per_sec', value=query_engine['read_docs_per_sec'], tags=tags) + self.monotonic_count( + 'rethinkdb.stats.server.read_docs_total', value=query_engine['read_docs_total'], tags=tags + ) + + self.rate( + 'rethinkdb.stats.server.written_docs_per_sec', value=query_engine['written_docs_per_sec'], tags=tags + ) + self.monotonic_count( + 'rethinkdb.stats.server.written_docs_total', value=query_engine['written_docs_total'], tags=tags + ) + + def _collect_table_statistics(self, conn): + # type: (rethinkdb.net.Connection) -> None + tables = rethinkdb.r.table('table_config').run(conn) # type: Iterator[Table] + + for table in tables: + # TODO: get rid of N+1 query problem. + stats = rethinkdb.r.table('stats').get(['table', table['id']]).run(conn) # type: TableStats + + name = table['name'] + database = table['db'] + query_engine = stats['query_engine'] + + tags = ['table:{}'.format(name), 'database:{}'.format(database)] + + self.rate('rethinkdb.stats.table.read_docs_per_sec', value=query_engine['read_docs_per_sec'], tags=tags) + self.rate( + 'rethinkdb.stats.table.written_docs_per_sec', value=query_engine['written_docs_per_sec'], tags=tags + ) + + def _collect_replicas_statistics(self, conn): + # type: (rethinkdb.net.Connection) -> None + pass # TODO + + def _collect_statuses(self, conn): + # type: (rethinkdb.net.Connection) -> None + pass # TODO + + def _collect_jobs(self, conn): + # type: (rethinkdb.net.Connection) -> None + pass # TODO + + def _collect_current_issues(self, conn): + # type: (rethinkdb.net.Connection) -> None + pass # TODO + + # TODO: version metadata. + + +def _query_server_stats(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Server, ServerStats]] + + # Here, we want to retrieve each server in the cluster along with its statistics. + # A naive approach would be to query 'server_config', then for each server find the row in 'stats' that + # corresponds to each server's ID. This would lead to the N+1 query problem. + # Instead, we make a single (but more complex) query by joining 'stats' with 'server_config' on the server ID. + # See: https://rethinkdb.com/api/python/eq_join/ + + def _join_on_server_id(server_stats): + # type: (rethinkdb.ast.RqlQuery) -> str + server_stats_id = server_stats['id'] # ['server', ''] + return server_stats_id.nth(1) + rows = ( + rethinkdb.r.table('stats').eq_join(_join_on_server_id, rethinkdb.r.table('server_config')).run(conn) + ) # type: Iterator[EqJoinRow] -@contextmanager -def _connect(database, host, port): - # type: (str, str, int) -> Iterator[rethinkdb.net.Connection] - with rethinkdb.r.connect(db=database, host=host, port=port) as conn: - yield conn + for row in rows: + stats = row['left'] # type: ServerStats + server = row['right'] # type: Server + yield server, stats diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py new file mode 100644 index 0000000000000..842386a0633fc --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -0,0 +1,51 @@ +""" +Types used to represent JSON documents returned by RethinkDB queries. Used for type checking our own code. +""" + +from typing import Any, List, Literal, Tuple, TypedDict + +# Configuration documents. +# See: https://rethinkdb.com/docs/system-tables/#configuration-tables + +Server = TypedDict('Server', {'id': str, 'name': str, 'cache_size_mb': str, 'tags': List[str]}) + +Table = TypedDict('Table', {'id': str, 'name': str, 'db': str}) # TODO: more fields + +# System statistics documents. +# See: https://rethinkdb.com/docs/system-stats/ + +ClusterQueryEngine = TypedDict( + 'ClusterQueryEngine', {'queries_per_sec': int, 'read_docs_per_sec': int, 'written_docs_per_sec': int}, +) + +ClusterStats = TypedDict('ClusterStats', {'id': Tuple[Literal['cluster']], 'query_engine': ClusterQueryEngine}) + +ServerQueryEngine = TypedDict( + 'ServerQueryEngine', + { + 'client_connections': int, + 'clients_active': int, + 'queries_per_sec': int, + 'queries_total': int, + 'read_docs_per_sec': int, + 'read_docs_total': int, + 'written_docs_per_sec': int, + 'written_docs_total': int, + }, +) + +ServerStats = TypedDict( + 'ServerStats', {'id': Tuple[Literal['server'], str], 'server': str, 'query_engine': ServerQueryEngine}, +) + +TableQueryEngine = TypedDict('TableQueryEngine', {'read_docs_per_sec': int, 'written_docs_per_sec': int}) + +TableStats = TypedDict( + 'TableStats', {'id': Tuple[Literal['table'], str], 'table': str, 'db': str, 'query_engine': TableQueryEngine}, +) + +# ReQL commands. + +# NOTE: Ideally 'left' and 'right' would be generics here, but this isn't supported by 'TypedDict' yet. +# See: https://github.com/python/mypy/issues/3863 +EqJoinRow = TypedDict('EqJoinRow', {'left': Any, 'right': Any}) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 925f39e55973f..2e731d3bddbca 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -16,3 +16,64 @@ HOST = get_docker_hostname() PORT = 28015 + +SYSTEM_STATISTICS_METRICS = ( + 'rethinkdb.stats.cluster.queries_per_sec', + 'rethinkdb.stats.cluster.read_docs_per_sec', + 'rethinkdb.stats.cluster.written_docs_per_sec', + 'rethinkdb.stats.server.queries_per_sec', + 'rethinkdb.stats.server.queries_total', + 'rethinkdb.stats.server.read_docs_per_sec', + 'rethinkdb.stats.server.read_docs_total', + 'rethinkdb.stats.server.written_docs_per_sec', + 'rethinkdb.stats.server.written_docs_total', + 'rethinkdb.stats.server.client_connections', + 'rethinkdb.stats.server.clients_active', # NOTE: sent, but not documented on the RethinkDB website. + # WIP + # TODO: add a database, tables and replicas to the Docker Compose setup. + # 'rethinkdb.stats.table.read_docs_per_sec', + # 'rethinkdb.stats.table.written_docs_per_sec', + # 'rethinkdb.stats.table_server.read_docs_per_sec', + # 'rethinkdb.stats.table_server.read_docs_total', + # 'rethinkdb.stats.table_server.written_docs_per_sec', + # 'rethinkdb.stats.table_server.written_docs_total', + # 'rethinkdb.stats.table_server.cache.in_use_bytes', + # 'rethinkdb.stats.table_server.disk.read_bytes_per_sec', + # 'rethinkdb.stats.table_server.disk.read_bytes_total', + # 'rethinkdb.stats.table_server.disk.written_bytes_per_sec', + # 'rethinkdb.stats.table_server.disk.written_bytes_total', + # 'rethinkdb.stats.table_server.disk.space_usage.metadata_bytes', + # 'rethinkdb.stats.table_server.disk.space_usage.data_bytes', + # 'rethinkdb.stats.table_server.disk.space_usage.garbage_bytes', + # 'rethinkdb.stats.table_server.disk.space_usage.preallocated_bytes', +) + +STATUS_METRICS = ( + 'rethinkdb.table_status.ready_for_outdated_reads' 'rethinkdb.table_status.ready_for_reads', + 'rethinkdb.table_status.ready_for_writes', + 'rethinkdb.table_status.all_replicas_ready', + 'rethinkdb.table_status.shards.total', + 'rethinkdb.table_status.shards.replicas.total', + 'rethinkdb.table_status.shards.replicas.state', + 'rethinkdb.server_status.network.time_connected', + 'rethinkdb.server_status.network.connected_to', + 'rethinkdb.server_status.process.time_started', +) + +SYSTEM_JOBS_METRICS = ( + 'rethinkdb.jobs.duration_sec', +) + +SYSTEM_CURRENT_ISSUES_METRICS = ( + 'rethinkdb.current_issues.log_write_error', + 'rethinkdb.current_issues.server_name_collision', + 'rethinkdb.current_issues.db_name_collision', + 'rethinkdb.current_issues.table_name_collision', + 'rethinkdb.current_issues.outdated_index', + 'rethinkdb.current_issues.table_availability', + 'rethinkdb.current_issues.memory_error', + 'rethinkdb.current_issues.non_transitive_error', +) + +# WIP +METRICS = SYSTEM_STATISTICS_METRICS diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 2debc84d4ceb7..db5cb85ba8f52 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -8,7 +8,7 @@ from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.rethinkdb import RethinkDBCheck -from .common import SERVER_NAME +from .common import METRICS @pytest.mark.integration @@ -19,5 +19,9 @@ def test_check(aggregator): check = RethinkDBCheck('rethinkdb', {}, [instance]) check.check(instance) + for metric in METRICS: + aggregator.assert_metric(metric) + aggregator.assert_all_metrics_covered() - aggregator.assert_service_check('rethinkdb.can_connect', count=1, tags=['server:{}'.format(SERVER_NAME)]) + + aggregator.assert_service_check('rethinkdb.can_connect', count=1) From a1a65b56da18e5a4e216b648574bf0ecbc6a8c7b Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 13 Feb 2020 10:56:18 +0100 Subject: [PATCH 017/147] Document more TODOs --- rethinkdb/datadog_checks/rethinkdb/rethinkdb.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index ca68cc320c6c7..0169de1ce14eb 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -115,6 +115,9 @@ def _collect_current_issues(self, conn): pass # TODO # TODO: version metadata. + # TODO: custom queries. (Hint: look at `QueryManager`.) + # TODO: allow not sending default metrics. + # TODO: decide if and how to deal with `identifier_format`: https://rethinkdb.com/api/python/table/#description def _query_server_stats(conn): From 25ce3f08a8922640f2b3541eeb454772e45f9f7d Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 13 Feb 2020 14:49:19 +0100 Subject: [PATCH 018/147] Update jobs and current issues metrics listings --- rethinkdb/tests/common.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 2e731d3bddbca..2c5e69126b509 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -61,18 +61,22 @@ ) SYSTEM_JOBS_METRICS = ( - 'rethinkdb.jobs.duration_sec', + 'rethinkdb.jobs.query.duration', + 'rethinkdb.jobs.index_construction.duration', + 'rethinkdb.jobs.index_construction.progress', + 'rethinkdb.jobs.backfill.duration', + 'rethinkdb.jobs.backfill.progress', ) SYSTEM_CURRENT_ISSUES_METRICS = ( - 'rethinkdb.current_issues.log_write_error', - 'rethinkdb.current_issues.server_name_collision', - 'rethinkdb.current_issues.db_name_collision', - 'rethinkdb.current_issues.table_name_collision', - 'rethinkdb.current_issues.outdated_index', - 'rethinkdb.current_issues.table_availability', - 'rethinkdb.current_issues.memory_error', - 'rethinkdb.current_issues.non_transitive_error', + 'rethinkdb.current_issues.log_write_error.total', + 'rethinkdb.current_issues.server_name_collision.total', + 'rethinkdb.current_issues.db_name_collision.total', + 'rethinkdb.current_issues.table_name_collision.total', + 'rethinkdb.current_issues.outdated_index.total', + 'rethinkdb.current_issues.table_availability.total', + 'rethinkdb.current_issues.memory_error.total', + 'rethinkdb.current_issues.non_transitive_error.total', ) # WIP From 1db54283612245b5fdc36211047390ac27fd1568 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 13 Feb 2020 15:37:45 +0100 Subject: [PATCH 019/147] Refactor collection of default metrics --- .../rethinkdb/_default_metrics/__init__.py | 31 ++++ .../_default_metrics/_current_issues.py | 21 +++ .../rethinkdb/_default_metrics/_jobs.py | 21 +++ .../rethinkdb/_default_metrics/_statistics.py | 159 ++++++++++++++++++ .../rethinkdb/_default_metrics/_statuses.py | 21 +++ .../datadog_checks/rethinkdb/_queries.py | 45 +++++ .../rethinkdb/{types.py => _types.py} | 17 +- .../datadog_checks/rethinkdb/rethinkdb.py | 138 +++------------ 8 files changed, 337 insertions(+), 116 deletions(-) create mode 100644 rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/_default_metrics/_current_issues.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/_queries.py rename rethinkdb/datadog_checks/rethinkdb/{types.py => _types.py} (76%) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py new file mode 100644 index 0000000000000..127caa8eb0824 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py @@ -0,0 +1,31 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from __future__ import absolute_import + +import itertools +from typing import Iterator + +import rethinkdb + +from .._types import Metric +from ._current_issues import collect_current_issues +from ._jobs import collect_jobs +from ._statistics import collect_statistics +from ._statuses import collect_statuses + + +def collect_default_metrics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + """ + Collect default metrics from various system tables. + + See: https://rethinkdb.com/docs/system-tables/ + """ + metrics = itertools.chain( + collect_statistics(conn), collect_statuses(conn), collect_jobs(conn), collect_current_issues(conn) + ) + + for metric in metrics: + yield metric diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_current_issues.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_current_issues.py new file mode 100644 index 0000000000000..12f668f75a719 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_current_issues.py @@ -0,0 +1,21 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from __future__ import absolute_import + +from typing import Iterator + +import rethinkdb + +from .._types import Metric + + +def collect_current_issues(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + """ + Collect metrics about current system issues. + + See: https://rethinkdb.com/docs/system-issues/ + """ + return iter(()) # TODO diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py new file mode 100644 index 0000000000000..812795f28f51e --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py @@ -0,0 +1,21 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from __future__ import absolute_import + +from typing import Iterator + +import rethinkdb + +from .._types import Metric + + +def collect_jobs(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + """ + Collect metrics about system jobs. + + See: https://rethinkdb.com/docs/system-jobs/ + """ + return iter(()) # TODO diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py new file mode 100644 index 0000000000000..5fca013d084ab --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py @@ -0,0 +1,159 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from __future__ import absolute_import + +import itertools +from typing import Iterator + +import rethinkdb + +from .._queries import query_cluster_stats, query_servers_with_stats +from .._types import Metric, Table, TableStats + + +def collect_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + """ + Collect metrics about system statistics. + + See: https://rethinkdb.com/docs/system-stats/ + """ + metrics = itertools.chain( + _collect_cluster_statistics(conn), + _collect_servers_statistics(conn), + _collect_table_statistics(conn), + _collect_replicas_statistics(conn), + ) + + for metric in metrics: + yield metric + + +def _collect_cluster_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + stats = query_cluster_stats(conn) + + query_engine = stats['query_engine'] + + yield { + 'type': 'rate', + 'name': 'rethinkdb.stats.cluster.queries_per_sec', + 'value': query_engine['queries_per_sec'], + 'tags': [], + } + + yield { + 'type': 'rate', + 'name': 'rethinkdb.stats.cluster.read_docs_per_sec', + 'value': query_engine['read_docs_per_sec'], + 'tags': [], + } + + yield { + 'type': 'rate', + 'name': 'rethinkdb.stats.cluster.written_docs_per_sec', + 'value': query_engine['written_docs_per_sec'], + 'tags': [], + } + + +def _collect_servers_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + for server, stats in query_servers_with_stats(conn): + name = server['name'] + server_tags = server['tags'] + query_engine = stats['query_engine'] + + tags = ['server:{}'.format(name)] + server_tags + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.server.client_connections', + 'value': query_engine['client_connections'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.server.clients_active', + 'value': query_engine['clients_active'], + 'tags': tags, + } + + yield { + 'type': 'rate', + 'name': 'rethinkdb.stats.server.queries_per_sec', + 'value': query_engine['queries_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'monotonic_count', + 'name': 'rethinkdb.stats.server.queries_total', + 'value': query_engine['queries_total'], + 'tags': tags, + } + + yield { + 'type': 'rate', + 'name': 'rethinkdb.stats.server.read_docs_per_sec', + 'value': query_engine['read_docs_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'monotonic_count', + 'name': 'rethinkdb.stats.server.read_docs_total', + 'value': query_engine['read_docs_total'], + 'tags': tags, + } + + yield { + 'type': 'rate', + 'name': 'rethinkdb.stats.server.written_docs_per_sec', + 'value': query_engine['written_docs_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'monotonic_count', + 'name': 'rethinkdb.stats.server.written_docs_total', + 'value': query_engine['written_docs_total'], + 'tags': tags, + } + + +def _collect_table_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + tables = rethinkdb.r.table('table_config').run(conn) # type: Iterator[Table] + + for table in tables: + # TODO: get rid of N+1 query problem. + stats = rethinkdb.r.table('stats').get(['table', table['id']]).run(conn) # type: TableStats + + name = table['name'] + database = table['db'] + query_engine = stats['query_engine'] + + tags = ['table:{}'.format(name), 'database:{}'.format(database)] + + yield { + 'type': 'rate', + 'name': 'rethinkdb.stats.table.read_docs_per_sec', + 'value': query_engine['read_docs_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'rate', + 'name': 'rethinkdb.stats.table.written_docs_per_sec', + 'value': query_engine['written_docs_per_sec'], + 'tags': tags, + } + + +def _collect_replicas_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + return iter(()) # TODO diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py new file mode 100644 index 0000000000000..b58e29eefdb69 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py @@ -0,0 +1,21 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from __future__ import absolute_import + +from typing import Iterator + +import rethinkdb + +from .._types import Metric + + +def collect_statuses(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + """ + Collect metrics about server and table statuses. + + See: https://rethinkdb.com/docs/system-tables/#status-tables + """ + return iter(()) # TODO diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py new file mode 100644 index 0000000000000..3a455da2da93f --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -0,0 +1,45 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from __future__ import absolute_import + +from typing import Iterator, Tuple + +import rethinkdb + +from ._types import ClusterStats, EqJoinRow, Server, ServerStats + + +def query_cluster_stats(conn): + # type: (rethinkdb.net.Connection) -> ClusterStats + """ + Retrieve statistics about the cluster. + """ + return rethinkdb.r.table('stats').get(['cluster']).run(conn) + + +def query_servers_with_stats(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Server, ServerStats]] + """ + Retrieve each server in the cluster along with its statistics. + """ + + # A naive approach would be to query 'server_config', then for each server find the row in 'stats' that + # corresponds to each server's ID. This would lead to the N+1 query problem. + # Instead, we make a single (but more complex) query by joining 'stats' with 'server_config' on the server ID. + # See: https://rethinkdb.com/api/python/eq_join/ + + def _join_on_server_id(server_stats): + # type: (rethinkdb.ast.RqlQuery) -> str + server_stats_id = server_stats['id'] # ['server', ''] + return server_stats_id.nth(1) + + rows = ( + rethinkdb.r.table('stats').eq_join(_join_on_server_id, rethinkdb.r.table('server_config')).run(conn) + ) # type: Iterator[EqJoinRow] + + for row in rows: + stats = row['left'] # type: ServerStats + server = row['right'] # type: Server + yield server, stats diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py similarity index 76% rename from rethinkdb/datadog_checks/rethinkdb/types.py rename to rethinkdb/datadog_checks/rethinkdb/_types.py index 842386a0633fc..a9854ff8f87cb 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -1,9 +1,19 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + """ -Types used to represent JSON documents returned by RethinkDB queries. Used for type checking our own code. +Declarations used for type checking our code, including our manipulation of JSON documents returned by RethinkDB. """ from typing import Any, List, Literal, Tuple, TypedDict +# Lightweight shim to decouple collection functions from the check class. +Metric = TypedDict( + 'Metric', {'type': Literal['rate', 'gauge', 'monotonic_count'], 'name': str, 'value': float, 'tags': List[str]} +) + + # Configuration documents. # See: https://rethinkdb.com/docs/system-tables/#configuration-tables @@ -11,6 +21,7 @@ Table = TypedDict('Table', {'id': str, 'name': str, 'db': str}) # TODO: more fields + # System statistics documents. # See: https://rethinkdb.com/docs/system-stats/ @@ -44,7 +55,9 @@ 'TableStats', {'id': Tuple[Literal['table'], str], 'table': str, 'db': str, 'query_engine': TableQueryEngine}, ) -# ReQL commands. + +# ReQL command results. +# See: https://rethinkdb.com/api/python/ # NOTE: Ideally 'left' and 'right' would be generics here, but this isn't supported by 'TypedDict' yet. # See: https://github.com/python/mypy/issues/3863 diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 0169de1ce14eb..67de86273e71e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -2,22 +2,38 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -# Required for `import rethinkdb` to import the Python client instead of this package on Python 2. +# Required for `import rethinkdb` to correctly import the client package (instead of this package) on Python 2. from __future__ import absolute_import from contextlib import contextmanager -from typing import Any, Dict, Iterator, Tuple +from typing import Any, Callable, Dict, Iterator import rethinkdb from datadog_checks.base import AgentCheck -from .types import ClusterStats, EqJoinRow, Server, ServerStats, Table, TableStats +from ._default_metrics import collect_default_metrics +from ._types import Metric class RethinkDBCheck(AgentCheck): + """ + Collect metrics from a RethinkDB cluster. + + A set of default metrics is collected from system tables. + """ + + def check(self, instance): + # type: (Dict[str, Any]) -> None + with self.__submit_service_check(): + with rethinkdb.r.connect(db='rethinkdb', host='localhost', port=28015) as conn: + for metric in collect_default_metrics(conn): + self.__submit_metric(metric) + + # NOTE: usage of private methods (double underscores) prevents name clashes with the base class. + @contextmanager - def _submit_service_check(self): + def __submit_service_check(self): # type: () -> Iterator[None] try: yield @@ -27,118 +43,12 @@ def _submit_service_check(self): else: self.service_check('rethinkdb.can_connect', self.OK) - def check(self, instance): - # type: (Dict[str, Any]) -> None - with self._submit_service_check(): - with rethinkdb.r.connect(db='rethinkdb', host='localhost', port=28015) as conn: - self._collect_statistics(conn) - self._collect_statuses(conn) - self._collect_jobs(conn) - self._collect_current_issues(conn) - - def _collect_statistics(self, conn): - # type: (rethinkdb.net.Connection) -> None - self._collect_cluster_statistics(conn) - self._collect_servers_statistics(conn) - self._collect_table_statistics(conn) - self._collect_replicas_statistics(conn) - - def _collect_cluster_statistics(self, conn): - # type: (rethinkdb.net.Connection) -> None - stats = rethinkdb.r.table('stats').get(['cluster']).run(conn) # type: ClusterStats - query_engine = stats['query_engine'] - - self.rate('rethinkdb.stats.cluster.queries_per_sec', value=query_engine['queries_per_sec']) - self.rate('rethinkdb.stats.cluster.read_docs_per_sec', value=query_engine['read_docs_per_sec']) - self.rate('rethinkdb.stats.cluster.written_docs_per_sec', value=query_engine['written_docs_per_sec']) - - def _collect_servers_statistics(self, conn): - # type: (rethinkdb.net.Connection) -> None - for server, stats in _query_server_stats(conn): - name = server['name'] - server_tags = server['tags'] - query_engine = stats['query_engine'] - - tags = ['server:{}'.format(name)] + server_tags - - self.gauge('rethinkdb.stats.server.client_connections', value=query_engine['client_connections'], tags=tags) - self.gauge('rethinkdb.stats.server.clients_active', value=query_engine['clients_active'], tags=tags) - - self.rate('rethinkdb.stats.server.queries_per_sec', value=query_engine['queries_per_sec'], tags=tags) - self.monotonic_count('rethinkdb.stats.server.queries_total', query_engine['queries_total'], tags=tags) - - self.rate('rethinkdb.stats.server.read_docs_per_sec', value=query_engine['read_docs_per_sec'], tags=tags) - self.monotonic_count( - 'rethinkdb.stats.server.read_docs_total', value=query_engine['read_docs_total'], tags=tags - ) - - self.rate( - 'rethinkdb.stats.server.written_docs_per_sec', value=query_engine['written_docs_per_sec'], tags=tags - ) - self.monotonic_count( - 'rethinkdb.stats.server.written_docs_total', value=query_engine['written_docs_total'], tags=tags - ) - - def _collect_table_statistics(self, conn): - # type: (rethinkdb.net.Connection) -> None - tables = rethinkdb.r.table('table_config').run(conn) # type: Iterator[Table] - - for table in tables: - # TODO: get rid of N+1 query problem. - stats = rethinkdb.r.table('stats').get(['table', table['id']]).run(conn) # type: TableStats - - name = table['name'] - database = table['db'] - query_engine = stats['query_engine'] - - tags = ['table:{}'.format(name), 'database:{}'.format(database)] - - self.rate('rethinkdb.stats.table.read_docs_per_sec', value=query_engine['read_docs_per_sec'], tags=tags) - self.rate( - 'rethinkdb.stats.table.written_docs_per_sec', value=query_engine['written_docs_per_sec'], tags=tags - ) - - def _collect_replicas_statistics(self, conn): - # type: (rethinkdb.net.Connection) -> None - pass # TODO - - def _collect_statuses(self, conn): - # type: (rethinkdb.net.Connection) -> None - pass # TODO - - def _collect_jobs(self, conn): - # type: (rethinkdb.net.Connection) -> None - pass # TODO - - def _collect_current_issues(self, conn): - # type: (rethinkdb.net.Connection) -> None - pass # TODO + def __submit_metric(self, metric): + # type: (Metric) -> None + submit = getattr(self, metric['type']) # type: Callable + submit(metric['name'], value=metric['value'], tags=metric['tags']) # TODO: version metadata. # TODO: custom queries. (Hint: look at `QueryManager`.) # TODO: allow not sending default metrics. # TODO: decide if and how to deal with `identifier_format`: https://rethinkdb.com/api/python/table/#description - - -def _query_server_stats(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Server, ServerStats]] - - # Here, we want to retrieve each server in the cluster along with its statistics. - # A naive approach would be to query 'server_config', then for each server find the row in 'stats' that - # corresponds to each server's ID. This would lead to the N+1 query problem. - # Instead, we make a single (but more complex) query by joining 'stats' with 'server_config' on the server ID. - # See: https://rethinkdb.com/api/python/eq_join/ - - def _join_on_server_id(server_stats): - # type: (rethinkdb.ast.RqlQuery) -> str - server_stats_id = server_stats['id'] # ['server', ''] - return server_stats_id.nth(1) - - rows = ( - rethinkdb.r.table('stats').eq_join(_join_on_server_id, rethinkdb.r.table('server_config')).run(conn) - ) # type: Iterator[EqJoinRow] - - for row in rows: - stats = row['left'] # type: ServerStats - server = row['right'] # type: Server - yield server, stats From 9079a9a0e7becd0e2d650c84c452329239eb5e1e Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 14 Feb 2020 14:00:00 +0100 Subject: [PATCH 020/147] Add more nodes to cluster in Compose setup --- rethinkdb/tests/common.py | 39 ++++++++++++------ rethinkdb/tests/compose/docker-compose.yaml | 44 ++++++++++++++++++--- rethinkdb/tests/conftest.py | 22 +++++++---- rethinkdb/tests/test_rethinkdb.py | 12 ++++-- 4 files changed, 87 insertions(+), 30 deletions(-) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 2c5e69126b509..5e284b52f091e 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -11,16 +11,21 @@ CHECK_NAME = 'rethinkdb' IMAGE = 'rethinkdb:2.4.0' -CONTAINER_NAME = 'rethinkdb' -SERVER_NAME = 'server0' HOST = get_docker_hostname() -PORT = 28015 -SYSTEM_STATISTICS_METRICS = ( +SERVERS = ['server0', 'server1', 'server2'] +CONNECT_SERVER_NAME = 'server0' +CONNECT_SERVER_PORT = 28015 +PROXY_PORT = 28018 + +CLUSTER_STATISTICS_METRICS = ( 'rethinkdb.stats.cluster.queries_per_sec', 'rethinkdb.stats.cluster.read_docs_per_sec', 'rethinkdb.stats.cluster.written_docs_per_sec', +) + +SERVER_STATISTICS_METRICS = ( 'rethinkdb.stats.server.queries_per_sec', 'rethinkdb.stats.server.queries_total', 'rethinkdb.stats.server.read_docs_per_sec', @@ -29,10 +34,17 @@ 'rethinkdb.stats.server.written_docs_total', 'rethinkdb.stats.server.client_connections', 'rethinkdb.stats.server.clients_active', # NOTE: sent, but not documented on the RethinkDB website. - # WIP - # TODO: add a database, tables and replicas to the Docker Compose setup. +) + +# WIP +# TODO: add a database, tables and replicas to the Docker Compose setup. + +TABLE_STATISTICS_METRICS = ( # 'rethinkdb.stats.table.read_docs_per_sec', # 'rethinkdb.stats.table.written_docs_per_sec', +) + +REPLICA_STATISTICS_METRICS = ( # 'rethinkdb.stats.table_server.read_docs_per_sec', # 'rethinkdb.stats.table_server.read_docs_total', # 'rethinkdb.stats.table_server.written_docs_per_sec', @@ -48,19 +60,23 @@ # 'rethinkdb.stats.table_server.disk.space_usage.preallocated_bytes', ) -STATUS_METRICS = ( - 'rethinkdb.table_status.ready_for_outdated_reads' 'rethinkdb.table_status.ready_for_reads', +TABLE_STATUS_METRICS = ( + 'rethinkdb.table_status.ready_for_outdated_reads', + 'rethinkdb.table_status.ready_for_reads', 'rethinkdb.table_status.ready_for_writes', 'rethinkdb.table_status.all_replicas_ready', 'rethinkdb.table_status.shards.total', 'rethinkdb.table_status.shards.replicas.total', 'rethinkdb.table_status.shards.replicas.state', +) + +SERVER_STATUS_METRICS = ( 'rethinkdb.server_status.network.time_connected', 'rethinkdb.server_status.network.connected_to', 'rethinkdb.server_status.process.time_started', ) -SYSTEM_JOBS_METRICS = ( +JOBS_METRICS = ( 'rethinkdb.jobs.query.duration', 'rethinkdb.jobs.index_construction.duration', 'rethinkdb.jobs.index_construction.progress', @@ -68,7 +84,7 @@ 'rethinkdb.jobs.backfill.progress', ) -SYSTEM_CURRENT_ISSUES_METRICS = ( +CURRENT_ISSUES_METRICS = ( 'rethinkdb.current_issues.log_write_error.total', 'rethinkdb.current_issues.server_name_collision.total', 'rethinkdb.current_issues.db_name_collision.total', @@ -78,6 +94,3 @@ 'rethinkdb.current_issues.memory_error.total', 'rethinkdb.current_issues.non_transitive_error.total', ) - -# WIP -METRICS = SYSTEM_STATISTICS_METRICS diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml index 3d0d43c266b01..016652b927ae5 100644 --- a/rethinkdb/tests/compose/docker-compose.yaml +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -1,11 +1,45 @@ version: "3" services: - rethinkdb: + # 3-node RethinkDB cluster with 1 proxy node. + + rethinkdb-server0: tty: true # Required otherwise RethinkDB won't output any logs. image: ${RETHINKDB_IMAGE} - container_name: ${RETHINKDB_CONTAINER_NAME} - command: rethinkdb --bind all --server-name ${RETHINKDB_SERVER_NAME} + container_name: rethinkdb-server0 + command: rethinkdb --bind all --server-name server0 --server-tag default --server-tag us + ports: + - ${RETHINKDB_CONNECT_SERVER_PORT}:28015 # Client driver port. + - 8080:8080 # Port for the web UI. Debugging only (not used by tests). + + rethinkdb-server1: + tty: true + image: ${RETHINKDB_IMAGE} + container_name: rethinkdb-server1 + command: rethinkdb --join rethinkdb-server0:29015 --bind all --server-name server1 --server-tag us + links: + - rethinkdb-server0 + depends_on: + - rethinkdb-server0 + + rethinkdb-server2: + tty: true + image: ${RETHINKDB_IMAGE} + container_name: rethinkdb-server2 + command: rethinkdb --join rethinkdb-server0:29015 --bind all --server-name server2 --server-tag us + links: + - rethinkdb-server0 + depends_on: + - rethinkdb-server0 + + rethinkdb-proxy0: + tty: true + image: ${RETHINKDB_IMAGE} + container_name: rethinkdb-proxy0 + command: rethinkdb proxy --join rethinkdb-server0:29015 --bind all + links: + - rethinkdb-server0 + depends_on: + - rethinkdb-server0 ports: - - ${RETHINKDB_PORT}:28015 - - 8080:8080 # Not used by tests, but allows accessing the RethinkDB admin web UI at 'localhost:8080' on the host machine. + - ${RETHINKDB_PROXY_PORT}:28015 diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index a4b50ddc9d3f4..9d96f020776a4 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -2,28 +2,34 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import os -from typing import Any, Dict, Iterator +from typing import Any, Dict, Iterator, List import pytest from datadog_checks.dev import docker_run -from .common import CONTAINER_NAME, HERE, IMAGE, PORT, SERVER_NAME +from .common import HERE, IMAGE, CONNECT_SERVER_PORT, PROXY_PORT @pytest.fixture(scope='session') def dd_environment(): - # type: () -> Iterator[dict] + # type: () -> Iterator[Dict[str, Any]] compose_file = os.path.join(HERE, 'compose', 'docker-compose.yaml') env_vars = { - 'RETHINKDB_PORT': str(PORT), 'RETHINKDB_IMAGE': IMAGE, - 'RETHINKDB_CONTAINER_NAME': CONTAINER_NAME, - 'RETHINKDB_SERVER_NAME': SERVER_NAME, - } + 'RETHINKDB_CONNECT_SERVER_PORT': str(CONNECT_SERVER_PORT), + 'RETHINKDB_PROXY_PORT': str(PROXY_PORT), + } # type: Dict[str, str] - with docker_run(compose_file, env_vars=env_vars, log_patterns=[r'Server ready.*']): + log_patterns = [ + r'Server ready, "server0".*', + r'Connected to server "server1".*', + r'Connected to server "server2".*', + r'Connected to proxy.*', + ] # type: List[str] + + with docker_run(compose_file, env_vars=env_vars, log_patterns=log_patterns): instance = {} # type: Dict[str, Any] config = {'instances': [instance]} yield config diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index db5cb85ba8f52..b8716eca41c98 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -8,7 +8,7 @@ from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.rethinkdb import RethinkDBCheck -from .common import METRICS +from .common import CLUSTER_STATISTICS_METRICS, SERVER_STATISTICS_METRICS, SERVERS, CONNECT_SERVER @pytest.mark.integration @@ -19,9 +19,13 @@ def test_check(aggregator): check = RethinkDBCheck('rethinkdb', {}, [instance]) check.check(instance) - for metric in METRICS: - aggregator.assert_metric(metric) + for metric in CLUSTER_STATISTICS_METRICS: + aggregator.assert_metric(metric, tags=[]) + + for metric in SERVER_STATISTICS_METRICS: + for server in SERVERS: + aggregator.assert_metric(metric, tags=['server:{}'.format(server)]) aggregator.assert_all_metrics_covered() - aggregator.assert_service_check('rethinkdb.can_connect', count=1) + aggregator.assert_service_check('rethinkdb.can_connect', count=1, tags=['server:{}'.format(CONNECT_SERVER)]) From 2c8cbc741def3906ffe6dfc762057df5220d394f Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 14 Feb 2020 14:00:14 +0100 Subject: [PATCH 021/147] Turn rates into gauges --- .../rethinkdb/_default_metrics/_statistics.py | 16 ++++++++-------- rethinkdb/datadog_checks/rethinkdb/_types.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py index 5fca013d084ab..f8de1b0f365eb 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py @@ -38,21 +38,21 @@ def _collect_cluster_statistics(conn): query_engine = stats['query_engine'] yield { - 'type': 'rate', + 'type': 'gauge', 'name': 'rethinkdb.stats.cluster.queries_per_sec', 'value': query_engine['queries_per_sec'], 'tags': [], } yield { - 'type': 'rate', + 'type': 'gauge', 'name': 'rethinkdb.stats.cluster.read_docs_per_sec', 'value': query_engine['read_docs_per_sec'], 'tags': [], } yield { - 'type': 'rate', + 'type': 'gauge', 'name': 'rethinkdb.stats.cluster.written_docs_per_sec', 'value': query_engine['written_docs_per_sec'], 'tags': [], @@ -83,7 +83,7 @@ def _collect_servers_statistics(conn): } yield { - 'type': 'rate', + 'type': 'gauge', 'name': 'rethinkdb.stats.server.queries_per_sec', 'value': query_engine['queries_per_sec'], 'tags': tags, @@ -97,7 +97,7 @@ def _collect_servers_statistics(conn): } yield { - 'type': 'rate', + 'type': 'gauge', 'name': 'rethinkdb.stats.server.read_docs_per_sec', 'value': query_engine['read_docs_per_sec'], 'tags': tags, @@ -111,7 +111,7 @@ def _collect_servers_statistics(conn): } yield { - 'type': 'rate', + 'type': 'gauge', 'name': 'rethinkdb.stats.server.written_docs_per_sec', 'value': query_engine['written_docs_per_sec'], 'tags': tags, @@ -140,14 +140,14 @@ def _collect_table_statistics(conn): tags = ['table:{}'.format(name), 'database:{}'.format(database)] yield { - 'type': 'rate', + 'type': 'gauge', 'name': 'rethinkdb.stats.table.read_docs_per_sec', 'value': query_engine['read_docs_per_sec'], 'tags': tags, } yield { - 'type': 'rate', + 'type': 'gauge', 'name': 'rethinkdb.stats.table.written_docs_per_sec', 'value': query_engine['written_docs_per_sec'], 'tags': tags, diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index a9854ff8f87cb..05b5146620265 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -10,7 +10,7 @@ # Lightweight shim to decouple collection functions from the check class. Metric = TypedDict( - 'Metric', {'type': Literal['rate', 'gauge', 'monotonic_count'], 'name': str, 'value': float, 'tags': List[str]} + 'Metric', {'type': Literal['gauge', 'monotonic_count'], 'name': str, 'value': float, 'tags': List[str]} ) From a32961a90c3f494684f133fea554b12f432cb898 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 14 Feb 2020 14:27:33 +0100 Subject: [PATCH 022/147] Add logging to stats collection funcs --- .../rethinkdb/_default_metrics/_statistics.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py index f8de1b0f365eb..9249d749f1e99 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py @@ -5,6 +5,7 @@ from __future__ import absolute_import import itertools +import logging from typing import Iterator import rethinkdb @@ -12,6 +13,8 @@ from .._queries import query_cluster_stats, query_servers_with_stats from .._types import Metric, Table, TableStats +logger = logging.getLogger(__name__) + def collect_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] @@ -34,6 +37,7 @@ def collect_statistics(conn): def _collect_cluster_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] stats = query_cluster_stats(conn) + logger.debug('cluster_statistics stats=%r', stats) query_engine = stats['query_engine'] @@ -62,6 +66,8 @@ def _collect_cluster_statistics(conn): def _collect_servers_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] for server, stats in query_servers_with_stats(conn): + logger.debug('server_statistics server=%r, stats=%r', server, stats) + name = server['name'] server_tags = server['tags'] query_engine = stats['query_engine'] From 9ade5d3a8750dd98fdf32eed9e28f4738bbfef2e Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 14 Feb 2020 14:29:39 +0100 Subject: [PATCH 023/147] Check server tags, submit server tag in service check --- rethinkdb/datadog_checks/rethinkdb/_types.py | 3 +++ .../datadog_checks/rethinkdb/rethinkdb.py | 23 +++++++++++++------ rethinkdb/tests/common.py | 1 + rethinkdb/tests/compose/docker-compose.yaml | 4 ++-- rethinkdb/tests/conftest.py | 2 +- rethinkdb/tests/test_rethinkdb.py | 8 ++++--- 6 files changed, 28 insertions(+), 13 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 05b5146620265..6ed7fec9df4d2 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -62,3 +62,6 @@ # NOTE: Ideally 'left' and 'right' would be generics here, but this isn't supported by 'TypedDict' yet. # See: https://github.com/python/mypy/issues/3863 EqJoinRow = TypedDict('EqJoinRow', {'left': Any, 'right': Any}) + +# See: https://rethinkdb.com/api/python/server +ConnectionServer = TypedDict('ConnectionServer', {'id': str, 'name': str, 'proxy': bool}) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 67de86273e71e..d1da8b18a0e8b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -6,14 +6,14 @@ from __future__ import absolute_import from contextlib import contextmanager -from typing import Any, Callable, Dict, Iterator +from typing import Any, Callable, Dict, Iterator, List import rethinkdb from datadog_checks.base import AgentCheck from ._default_metrics import collect_default_metrics -from ._types import Metric +from ._types import ConnectionServer, Metric class RethinkDBCheck(AgentCheck): @@ -25,8 +25,9 @@ class RethinkDBCheck(AgentCheck): def check(self, instance): # type: (Dict[str, Any]) -> None - with self.__submit_service_check(): + with self.__submit_service_check() as on_connection_established: with rethinkdb.r.connect(db='rethinkdb', host='localhost', port=28015) as conn: + on_connection_established(conn) for metric in collect_default_metrics(conn): self.__submit_metric(metric) @@ -34,14 +35,22 @@ def check(self, instance): @contextmanager def __submit_service_check(self): - # type: () -> Iterator[None] + # type: () -> Iterator[Callable[[rethinkdb.net.Connection], None]] + tags = [] # type: List[str] + + def on_connection_established(conn): + # type: (rethinkdb.net.Connection) -> None + server = conn.server() # type: ConnectionServer + tags.append('server:{}'.format(server['name'])) + # TODO: add a 'proxy' tag if server is a proxy? + try: - yield + yield on_connection_established except rethinkdb.errors.ReqlDriverError: - self.service_check('rethinkdb.can_connect', self.CRITICAL) + self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=tags) raise else: - self.service_check('rethinkdb.can_connect', self.OK) + self.service_check('rethinkdb.can_connect', self.OK, tags=tags) def __submit_metric(self, metric): # type: (Metric) -> None diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 5e284b52f091e..3ce0cc00f5c5b 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -15,6 +15,7 @@ HOST = get_docker_hostname() SERVERS = ['server0', 'server1', 'server2'] +SERVER_TAGS = {'server0': ['default', 'us'], 'server1': ['default', 'us'], 'server2': ['default', 'eu']} CONNECT_SERVER_NAME = 'server0' CONNECT_SERVER_PORT = 28015 PROXY_PORT = 28018 diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml index 016652b927ae5..b1115b0cce1cd 100644 --- a/rethinkdb/tests/compose/docker-compose.yaml +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -7,7 +7,7 @@ services: tty: true # Required otherwise RethinkDB won't output any logs. image: ${RETHINKDB_IMAGE} container_name: rethinkdb-server0 - command: rethinkdb --bind all --server-name server0 --server-tag default --server-tag us + command: rethinkdb --bind all --server-name server0 --server-tag us ports: - ${RETHINKDB_CONNECT_SERVER_PORT}:28015 # Client driver port. - 8080:8080 # Port for the web UI. Debugging only (not used by tests). @@ -26,7 +26,7 @@ services: tty: true image: ${RETHINKDB_IMAGE} container_name: rethinkdb-server2 - command: rethinkdb --join rethinkdb-server0:29015 --bind all --server-name server2 --server-tag us + command: rethinkdb --join rethinkdb-server0:29015 --bind all --server-name server2 --server-tag eu links: - rethinkdb-server0 depends_on: diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 9d96f020776a4..7e08f513aaf78 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -8,7 +8,7 @@ from datadog_checks.dev import docker_run -from .common import HERE, IMAGE, CONNECT_SERVER_PORT, PROXY_PORT +from .common import CONNECT_SERVER_PORT, HERE, IMAGE, PROXY_PORT @pytest.fixture(scope='session') diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index b8716eca41c98..8528e36bcc9cb 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -8,7 +8,7 @@ from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.rethinkdb import RethinkDBCheck -from .common import CLUSTER_STATISTICS_METRICS, SERVER_STATISTICS_METRICS, SERVERS, CONNECT_SERVER +from .common import CLUSTER_STATISTICS_METRICS, CONNECT_SERVER_NAME, SERVER_STATISTICS_METRICS, SERVER_TAGS, SERVERS @pytest.mark.integration @@ -24,8 +24,10 @@ def test_check(aggregator): for metric in SERVER_STATISTICS_METRICS: for server in SERVERS: - aggregator.assert_metric(metric, tags=['server:{}'.format(server)]) + tags = ['server:{}'.format(server)] + SERVER_TAGS[server] + aggregator.assert_metric(metric, tags=tags) aggregator.assert_all_metrics_covered() - aggregator.assert_service_check('rethinkdb.can_connect', count=1, tags=['server:{}'.format(CONNECT_SERVER)]) + service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] + aggregator.assert_service_check('rethinkdb.can_connect', count=1, tags=service_check_tags) From 592766c498ad8e4b9142d83841bc500bab186989 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 14 Feb 2020 14:49:05 +0100 Subject: [PATCH 024/147] Make host and port instance-configurable, improve service check status --- rethinkdb/datadog_checks/rethinkdb/_config.py | 27 +++++++++++++++++++ .../datadog_checks/rethinkdb/rethinkdb.py | 23 ++++++++++++---- rethinkdb/tests/conftest.py | 16 ++++++++--- rethinkdb/tests/test_rethinkdb.py | 7 +++-- 4 files changed, 60 insertions(+), 13 deletions(-) create mode 100644 rethinkdb/datadog_checks/rethinkdb/_config.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py new file mode 100644 index 0000000000000..e83e95cd83155 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -0,0 +1,27 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from typing import Any, Dict + +from datadog_checks.base import ConfigurationError + + +class Config: + def __init__(self, instance): + # type: (Dict[str, Any]) -> None + host = instance.get('host', 'localhost') + port = instance.get('port', 28015) + + if not isinstance(host, str): + raise ConfigurationError('host must be a string (got {!r})'.format(type(host))) + + if not isinstance(port, int): + raise ConfigurationError('port must be an integer (got {!r})'.format(type(port))) + + self.host = host # type: str + self.port = port # type: int + + def __repr__(self): + # type: () -> str + return ''.format(host=self.host, port=self.port) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index d1da8b18a0e8b..a13ea79d5fd54 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -12,6 +12,7 @@ from datadog_checks.base import AgentCheck +from ._config import Config from ._default_metrics import collect_default_metrics from ._types import ConnectionServer, Metric @@ -23,16 +24,25 @@ class RethinkDBCheck(AgentCheck): A set of default metrics is collected from system tables. """ + # NOTE: use of private names (double underscores, e.g. '__member') prevents name clashes with the base class. + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.__config = Config(self.instance) # type: Config # (Mypy is confused without this hint... :wtf:) + def check(self, instance): # type: (Dict[str, Any]) -> None + self.log.debug('check config=%r', self.__config) + + host = self.__config.host + port = self.__config.port + with self.__submit_service_check() as on_connection_established: - with rethinkdb.r.connect(db='rethinkdb', host='localhost', port=28015) as conn: + with rethinkdb.r.connect(db='rethinkdb', host=host, port=port) as conn: on_connection_established(conn) for metric in collect_default_metrics(conn): self.__submit_metric(metric) - # NOTE: usage of private methods (double underscores) prevents name clashes with the base class. - @contextmanager def __submit_service_check(self): # type: () -> Iterator[Callable[[rethinkdb.net.Connection], None]] @@ -46,9 +56,12 @@ def on_connection_established(conn): try: yield on_connection_established - except rethinkdb.errors.ReqlDriverError: + except rethinkdb.errors.ReqlDriverError as exc: + self.log.error('Could not connect to RethinkDB server: %r', exc) + self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=tags) + except Exception as exc: + self.log.error('Unexpected error while executing RethinkDB check: %r', exc) self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=tags) - raise else: self.service_check('rethinkdb.can_connect', self.OK, tags=tags) diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 7e08f513aaf78..1c6dc8d4eb570 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -8,12 +8,21 @@ from datadog_checks.dev import docker_run -from .common import CONNECT_SERVER_PORT, HERE, IMAGE, PROXY_PORT +from .common import CONNECT_SERVER_PORT, HERE, HOST, IMAGE, PROXY_PORT @pytest.fixture(scope='session') -def dd_environment(): - # type: () -> Iterator[Dict[str, Any]] +def instance(): + # type: () -> Dict[str, Any] + return { + 'host': HOST, + 'port': CONNECT_SERVER_PORT, + } + + +@pytest.fixture(scope='session') +def dd_environment(instance): + # type: (Dict[str, Any]) -> Iterator[Dict[str, Any]] compose_file = os.path.join(HERE, 'compose', 'docker-compose.yaml') env_vars = { @@ -30,6 +39,5 @@ def dd_environment(): ] # type: List[str] with docker_run(compose_file, env_vars=env_vars, log_patterns=log_patterns): - instance = {} # type: Dict[str, Any] config = {'instances': [instance]} yield config diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 8528e36bcc9cb..2e9f338bf9e85 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -13,9 +13,8 @@ @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') -def test_check(aggregator): - # type: (AggregatorStub) -> None - instance = {} # type: Dict[str, Any] +def test_check(aggregator, instance): + # type: (AggregatorStub, Dict[str, Any]) -> None check = RethinkDBCheck('rethinkdb', {}, [instance]) check.check(instance) @@ -30,4 +29,4 @@ def test_check(aggregator): aggregator.assert_all_metrics_covered() service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] - aggregator.assert_service_check('rethinkdb.can_connect', count=1, tags=service_check_tags) + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) From 46c28b60d85a9698d94a1354d4d61d48243bbc1c Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 14 Feb 2020 14:58:01 +0100 Subject: [PATCH 025/147] Add config unit tests --- rethinkdb/datadog_checks/rethinkdb/_config.py | 7 ++- rethinkdb/tests/unit/__init__.py | 3 ++ rethinkdb/tests/unit/test_config.py | 44 +++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 rethinkdb/tests/unit/__init__.py create mode 100644 rethinkdb/tests/unit/test_config.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index e83e95cd83155..b50515ebe03b0 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -16,12 +16,15 @@ def __init__(self, instance): if not isinstance(host, str): raise ConfigurationError('host must be a string (got {!r})'.format(type(host))) - if not isinstance(port, int): + if isinstance(port, bool) or not isinstance(port, int): raise ConfigurationError('port must be an integer (got {!r})'.format(type(port))) + if port < 0: + raise ConfigurationError('port must be positive (got {!r})'.format(port)) + self.host = host # type: str self.port = port # type: int def __repr__(self): # type: () -> str - return ''.format(host=self.host, port=self.port) + return 'Config(host={host!r}, port={port!r})'.format(host=self.host, port=self.port) diff --git a/rethinkdb/tests/unit/__init__.py b/rethinkdb/tests/unit/__init__.py new file mode 100644 index 0000000000000..46dd167dcde48 --- /dev/null +++ b/rethinkdb/tests/unit/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py new file mode 100644 index 0000000000000..de418188481e2 --- /dev/null +++ b/rethinkdb/tests/unit/test_config.py @@ -0,0 +1,44 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from typing import Any + +import pytest + +from datadog_checks.base import ConfigurationError +from datadog_checks.rethinkdb._config import Config + + +def test_default_config(): + # type: () -> None + config = Config(instance={}) + assert config.host == 'localhost' + assert config.port == 28015 + + +def test_config(): + # type: () -> None + config = Config(instance={'host': '192.168.121.1', 'port': 28016}) + assert config.host == '192.168.121.1' + assert config.port == 28016 + + +def test_config_repr(): + # type: () -> None + config = Config(instance={}) + assert repr(config) == "Config(host='localhost', port=28015)" + + +@pytest.mark.parametrize('host', [42, True, object()]) +def test_invalid_host(host): + # type: (Any) -> None + with pytest.raises(ConfigurationError): + Config(instance={'host': host}) + + +@pytest.mark.parametrize('port', [42.42, -42, True, object()]) +def test_invalid_port(port): + # type: (Any) -> None + with pytest.raises(ConfigurationError): + Config(instance={'port': port}) From a1f7d7ea4282c2531047a230119972066586a994 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 14 Feb 2020 15:28:56 +0100 Subject: [PATCH 026/147] Test service check failure modes, add more logging --- rethinkdb/datadog_checks/rethinkdb/_config.py | 12 ++++-- rethinkdb/datadog_checks/rethinkdb/_types.py | 3 ++ .../datadog_checks/rethinkdb/rethinkdb.py | 31 +++++++++++---- rethinkdb/tests/conftest.py | 5 ++- rethinkdb/tests/test_rethinkdb.py | 39 ++++++++++++++++++- rethinkdb/tests/unit/test_config.py | 2 + 6 files changed, 77 insertions(+), 15 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index b50515ebe03b0..adc51e6402738 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -2,14 +2,20 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Dict - from datadog_checks.base import ConfigurationError +from ._types import Instance + class Config: + """ + Hold instance configuration for a RethinkDB check. + + Encapsulates the validation of an `instance` dictionary while improving type information. + """ + def __init__(self, instance): - # type: (Dict[str, Any]) -> None + # type: (Instance) -> None host = instance.get('host', 'localhost') port = instance.get('port', 28015) diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 6ed7fec9df4d2..f4f1aa5e4fe70 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -13,6 +13,9 @@ 'Metric', {'type': Literal['gauge', 'monotonic_count'], 'name': str, 'value': float, 'tags': List[str]} ) +# Expected shape of an `instance` dictionary. +Instance = TypedDict('Instance', {'host': str, 'port': int}, total=False) + # Configuration documents. # See: https://rethinkdb.com/docs/system-tables/#configuration-tables diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index a13ea79d5fd54..4a7cac8c356ca 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -6,7 +6,7 @@ from __future__ import absolute_import from contextlib import contextmanager -from typing import Any, Callable, Dict, Iterator, List +from typing import Callable, Iterator, List import rethinkdb @@ -14,7 +14,7 @@ from ._config import Config from ._default_metrics import collect_default_metrics -from ._types import ConnectionServer, Metric +from ._types import ConnectionServer, Instance, Metric class RethinkDBCheck(AgentCheck): @@ -27,22 +27,34 @@ class RethinkDBCheck(AgentCheck): # NOTE: use of private names (double underscores, e.g. '__member') prevents name clashes with the base class. def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + super(RethinkDBCheck, self).__init__(*args, **kwargs) + self.__config = Config(self.instance) # type: Config # (Mypy is confused without this hint... :wtf:) + # NOTE: this list is exposed for testing purposes. + self._metric_collectors = [] # type: List[Callable[[rethinkdb.net.Connection], Iterator[Metric]]] + self._metric_collectors.append(collect_default_metrics) + def check(self, instance): - # type: (Dict[str, Any]) -> None - self.log.debug('check config=%r', self.__config) + # type: (Instance) -> None + config = self.__config + self.log.debug('check config=%r', config) - host = self.__config.host - port = self.__config.port + host = config.host + port = config.port with self.__submit_service_check() as on_connection_established: with rethinkdb.r.connect(db='rethinkdb', host=host, port=port) as conn: on_connection_established(conn) - for metric in collect_default_metrics(conn): + for metric in self.__collect_metrics(conn): self.__submit_metric(metric) + def __collect_metrics(self, conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + for collect in self._metric_collectors: + for metric in collect(conn): + yield metric + @contextmanager def __submit_service_check(self): # type: () -> Iterator[Callable[[rethinkdb.net.Connection], None]] @@ -51,6 +63,7 @@ def __submit_service_check(self): def on_connection_established(conn): # type: (rethinkdb.net.Connection) -> None server = conn.server() # type: ConnectionServer + self.log.debug('connected server=%r', server) tags.append('server:{}'.format(server['name'])) # TODO: add a 'proxy' tag if server is a proxy? @@ -63,10 +76,12 @@ def on_connection_established(conn): self.log.error('Unexpected error while executing RethinkDB check: %r', exc) self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=tags) else: + self.log.debug('service_check OK') self.service_check('rethinkdb.can_connect', self.OK, tags=tags) def __submit_metric(self, metric): # type: (Metric) -> None + self.log.debug('submit_metric metric=%r', metric) submit = getattr(self, metric['type']) # type: Callable submit(metric['name'], value=metric['value'], tags=metric['tags']) diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 1c6dc8d4eb570..7f4912707b549 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -7,13 +7,14 @@ import pytest from datadog_checks.dev import docker_run +from datadog_checks.rethinkdb._types import Instance from .common import CONNECT_SERVER_PORT, HERE, HOST, IMAGE, PROXY_PORT @pytest.fixture(scope='session') def instance(): - # type: () -> Dict[str, Any] + # type: () -> Instance return { 'host': HOST, 'port': CONNECT_SERVER_PORT, @@ -22,7 +23,7 @@ def instance(): @pytest.fixture(scope='session') def dd_environment(instance): - # type: (Dict[str, Any]) -> Iterator[Dict[str, Any]] + # type: (Instance) -> Iterator[Dict[str, Any]] compose_file = os.path.join(HERE, 'compose', 'docker-compose.yaml') env_vars = { diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 2e9f338bf9e85..51b10beffa801 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -1,12 +1,17 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Dict +from __future__ import absolute_import + +import copy +from typing import Iterator import pytest +import rethinkdb from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.rethinkdb import RethinkDBCheck +from datadog_checks.rethinkdb._types import Instance, Metric from .common import CLUSTER_STATISTICS_METRICS, CONNECT_SERVER_NAME, SERVER_STATISTICS_METRICS, SERVER_TAGS, SERVERS @@ -14,7 +19,7 @@ @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_check(aggregator, instance): - # type: (AggregatorStub, Dict[str, Any]) -> None + # type: (AggregatorStub, Instance) -> None check = RethinkDBCheck('rethinkdb', {}, [instance]) check.check(instance) @@ -30,3 +35,33 @@ def test_check(aggregator, instance): service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) + + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_cannot_connect_unknown_host(aggregator, instance): + # type: (AggregatorStub, Instance) -> None + instance = copy.deepcopy(instance) + instance['host'] = 'doesnotexist' + + check = RethinkDBCheck('rethinkdb', {}, [instance]) + check.check(instance) + + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=[]) + + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_connected_but_check_failed(aggregator, instance): + # type: (AggregatorStub, Instance) -> None + def collect_and_fail(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + yield {'type': 'gauge', 'name': 'rethinkdb.some.metric', 'value': 42, 'tags': []} + raise RuntimeError('Oops!') + + check = RethinkDBCheck('rethinkdb', {}, [instance]) + check._metric_collectors.append(collect_and_fail) + check.check(instance) + + service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=service_check_tags) diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index de418188481e2..05e6d23ccc625 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -9,6 +9,8 @@ from datadog_checks.base import ConfigurationError from datadog_checks.rethinkdb._config import Config +pytestmark = pytest.mark.unit + def test_default_config(): # type: () -> None From 106b73752f6dc1388a60c1544a675c54ca914e8a Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 14 Feb 2020 15:31:32 +0100 Subject: [PATCH 027/147] Fix mypy Config type detection --- rethinkdb/datadog_checks/rethinkdb/rethinkdb.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 4a7cac8c356ca..aa31cffc62585 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -6,7 +6,7 @@ from __future__ import absolute_import from contextlib import contextmanager -from typing import Callable, Iterator, List +from typing import Any, Callable, Iterator, List import rethinkdb @@ -27,9 +27,10 @@ class RethinkDBCheck(AgentCheck): # NOTE: use of private names (double underscores, e.g. '__member') prevents name clashes with the base class. def __init__(self, *args, **kwargs): + # type: (*Any, **Any) -> None super(RethinkDBCheck, self).__init__(*args, **kwargs) - self.__config = Config(self.instance) # type: Config # (Mypy is confused without this hint... :wtf:) + self.__config = Config(self.instance) # NOTE: this list is exposed for testing purposes. self._metric_collectors = [] # type: List[Callable[[rethinkdb.net.Connection], Iterator[Metric]]] From 55524aa4c59e5f6ab6f3c7de152a1d78bf4a2f27 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 14 Feb 2020 16:36:22 +0100 Subject: [PATCH 028/147] Setup test data, fix and test table metrics --- .../rethinkdb/_default_metrics/_statistics.py | 11 +-- .../datadog_checks/rethinkdb/_queries.py | 25 +++++- rethinkdb/tests/common.py | 35 ++++++++- rethinkdb/tests/conftest.py | 78 +++++++++++++++++-- rethinkdb/tests/test_rethinkdb.py | 16 +++- 5 files changed, 146 insertions(+), 19 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py index 9249d749f1e99..090f50e03610e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py @@ -10,8 +10,8 @@ import rethinkdb -from .._queries import query_cluster_stats, query_servers_with_stats -from .._types import Metric, Table, TableStats +from .._queries import query_cluster_stats, query_servers_with_stats, query_tables_with_stats +from .._types import Metric logger = logging.getLogger(__name__) @@ -133,11 +133,8 @@ def _collect_servers_statistics(conn): def _collect_table_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] - tables = rethinkdb.r.table('table_config').run(conn) # type: Iterator[Table] - - for table in tables: - # TODO: get rid of N+1 query problem. - stats = rethinkdb.r.table('stats').get(['table', table['id']]).run(conn) # type: TableStats + for table, stats in query_tables_with_stats(conn): + logger.debug('table_statistics table=%r, stats=%r', table, stats) name = table['name'] database = table['db'] diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 3a455da2da93f..dbb6625018950 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -8,7 +8,7 @@ import rethinkdb -from ._types import ClusterStats, EqJoinRow, Server, ServerStats +from ._types import ClusterStats, EqJoinRow, Server, ServerStats, Table, TableStats def query_cluster_stats(conn): @@ -33,7 +33,7 @@ def query_servers_with_stats(conn): def _join_on_server_id(server_stats): # type: (rethinkdb.ast.RqlQuery) -> str server_stats_id = server_stats['id'] # ['server', ''] - return server_stats_id.nth(1) + return server_stats_id.nth(1) # '' rows = ( rethinkdb.r.table('stats').eq_join(_join_on_server_id, rethinkdb.r.table('server_config')).run(conn) @@ -43,3 +43,24 @@ def _join_on_server_id(server_stats): stats = row['left'] # type: ServerStats server = row['right'] # type: Server yield server, stats + + +def query_tables_with_stats(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, TableStats]] + """ + Retrieve each table in the cluster along with its statistics. + """ + + def _join_on_table_id(table_stats): + # type: (rethinkdb.ast.RqlQuery) -> str + table_stats_id = table_stats['id'] # ['table', ''] + return table_stats_id.nth(1) # '' + + rows = ( + rethinkdb.r.table('stats').eq_join(_join_on_table_id, rethinkdb.r.table('table_config')).run(conn) + ) # type: Iterator[EqJoinRow] + + for row in rows: + stats = row['left'] # type: TableStats + table = row['right'] # type: Table + yield table, stats diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 3ce0cc00f5c5b..bafd78a567678 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -16,10 +16,40 @@ SERVERS = ['server0', 'server1', 'server2'] SERVER_TAGS = {'server0': ['default', 'us'], 'server1': ['default', 'us'], 'server2': ['default', 'eu']} + CONNECT_SERVER_NAME = 'server0' CONNECT_SERVER_PORT = 28015 + PROXY_PORT = 28018 +DATABASE = 'doghouse' + +HEROES_TABLE = 'heroes' +# TODO: add some indexes +HEROES_TABLE_OPTIONS = {'shards': 2, 'replicas': 3} +HEROES_INITIAL_DOCUMENTS = [ + { + "hero": "Magneto", + "name": "Max Eisenhardt", + "aka": ["Magnus", "Erik Lehnsherr", "Lehnsherr"], + "magazine_titles": ["Alpha Flight", "Avengers", "Avengers West Coast"], + "appearances_count": 42, + }, + { + "hero": "Professor Xavier", + "name": "Charles Francis Xavier", + "magazine_titles": ["Alpha Flight", "Avengers", "Bishop", "Defenders"], + "appearances_count": 72, + }, + { + "hero": "Storm", + "name": "Ororo Monroe", + "magazine_titles": ["Amazing Spider-Man vs. Wolverine", "Excalibur", "Fantastic Four", "Iron Fist"], + "appearances_count": 72, + }, +] +NUM_FAMOUS_HEROES = 2 + CLUSTER_STATISTICS_METRICS = ( 'rethinkdb.stats.cluster.queries_per_sec', 'rethinkdb.stats.cluster.read_docs_per_sec', @@ -38,11 +68,10 @@ ) # WIP -# TODO: add a database, tables and replicas to the Docker Compose setup. TABLE_STATISTICS_METRICS = ( - # 'rethinkdb.stats.table.read_docs_per_sec', - # 'rethinkdb.stats.table.written_docs_per_sec', + 'rethinkdb.stats.table.read_docs_per_sec', + 'rethinkdb.stats.table.written_docs_per_sec', ) REPLICA_STATISTICS_METRICS = ( diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 7f4912707b549..2314a8b895444 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -1,15 +1,32 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) + +from __future__ import absolute_import + import os -from typing import Any, Dict, Iterator, List +from typing import Dict, Iterator, List import pytest +import rethinkdb -from datadog_checks.dev import docker_run +from datadog_checks.dev import WaitFor, docker_run from datadog_checks.rethinkdb._types import Instance -from .common import CONNECT_SERVER_PORT, HERE, HOST, IMAGE, PROXY_PORT +from .common import ( + CONNECT_SERVER_PORT, + DATABASE, + HERE, + HEROES_INITIAL_DOCUMENTS, + HEROES_TABLE, + HEROES_TABLE_OPTIONS, + HOST, + IMAGE, + NUM_FAMOUS_HEROES, + PROXY_PORT, +) + +E2E_METADATA = {'start_commands': ['pip install rethinkdb==2.4.4']} @pytest.fixture(scope='session') @@ -21,9 +38,52 @@ def instance(): } +def create_tables(): + # type: () -> None + with rethinkdb.r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: + # See: https://rethinkdb.com/api/python/db_create + response = rethinkdb.r.db_create(DATABASE).run(conn) + assert response['dbs_created'] == 1 + + table = HEROES_TABLE + options = HEROES_TABLE_OPTIONS + + # See: https://rethinkdb.com/api/python/table_create/ + response = rethinkdb.r.db(DATABASE).table_create(table, **options).run(conn) + assert response['tables_created'] == 1 + + +def simulate_client_writes(): + # type: () -> None + """ + Simulate a client application that inserts rows by connecting via the proxy node. + """ + with rethinkdb.r.connect(host=HOST, port=PROXY_PORT) as conn: + table = HEROES_TABLE + documents = HEROES_INITIAL_DOCUMENTS + + # See: https://rethinkdb.com/api/python/insert + response = rethinkdb.r.db(DATABASE).table(table).insert(documents).run(conn) + assert response['errors'] == 0 + assert response['inserted'] == len(documents) + + +def simulate_client_reads(): + # type: () -> None + """ + Simulate a client application that reads rows by connecting via the proxy node. + """ + with rethinkdb.r.connect(db=DATABASE, host=HOST, port=PROXY_PORT) as conn: + all_heroes = list(rethinkdb.r.table('heroes').run(conn)) + assert len(all_heroes) == len(HEROES_INITIAL_DOCUMENTS) + + famous_heroes = list(rethinkdb.r.table('heroes').filter(rethinkdb.r.row['appearances_count'] >= 50).run(conn)) + assert len(famous_heroes) == NUM_FAMOUS_HEROES + + @pytest.fixture(scope='session') def dd_environment(instance): - # type: (Instance) -> Iterator[Dict[str, Any]] + # type: (Instance) -> Iterator compose_file = os.path.join(HERE, 'compose', 'docker-compose.yaml') env_vars = { @@ -32,6 +92,12 @@ def dd_environment(instance): 'RETHINKDB_PROXY_PORT': str(PROXY_PORT), } # type: Dict[str, str] + conditions = [ + WaitFor(create_tables, attempts=1), + WaitFor(simulate_client_writes, attempts=1), + WaitFor(simulate_client_reads, attempts=1), + ] + log_patterns = [ r'Server ready, "server0".*', r'Connected to server "server1".*', @@ -39,6 +105,6 @@ def dd_environment(instance): r'Connected to proxy.*', ] # type: List[str] - with docker_run(compose_file, env_vars=env_vars, log_patterns=log_patterns): + with docker_run(compose_file, env_vars=env_vars, conditions=conditions, log_patterns=log_patterns): config = {'instances': [instance]} - yield config + yield config, E2E_METADATA diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 51b10beffa801..ed11dd7dd708c 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -13,7 +13,16 @@ from datadog_checks.rethinkdb import RethinkDBCheck from datadog_checks.rethinkdb._types import Instance, Metric -from .common import CLUSTER_STATISTICS_METRICS, CONNECT_SERVER_NAME, SERVER_STATISTICS_METRICS, SERVER_TAGS, SERVERS +from .common import ( + CLUSTER_STATISTICS_METRICS, + CONNECT_SERVER_NAME, + DATABASE, + HEROES_TABLE, + SERVER_STATISTICS_METRICS, + SERVER_TAGS, + SERVERS, + TABLE_STATISTICS_METRICS, +) @pytest.mark.integration @@ -31,6 +40,11 @@ def test_check(aggregator, instance): tags = ['server:{}'.format(server)] + SERVER_TAGS[server] aggregator.assert_metric(metric, tags=tags) + for metric in TABLE_STATISTICS_METRICS: + tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] + aggregator.assert_metric(metric, tags=tags) + # TODO: test shards/replicas. + aggregator.assert_all_metrics_covered() service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] From 58bb8b895aa35531b808c9579561e4cc4eadfe91 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 14 Feb 2020 18:10:00 +0100 Subject: [PATCH 029/147] Simplify usage of of eq_join --- .../datadog_checks/rethinkdb/_queries.py | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index dbb6625018950..365695b9e1e3e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -25,18 +25,13 @@ def query_servers_with_stats(conn): Retrieve each server in the cluster along with its statistics. """ - # A naive approach would be to query 'server_config', then for each server find the row in 'stats' that - # corresponds to each server's ID. This would lead to the N+1 query problem. - # Instead, we make a single (but more complex) query by joining 'stats' with 'server_config' on the server ID. # See: https://rethinkdb.com/api/python/eq_join/ - def _join_on_server_id(server_stats): - # type: (rethinkdb.ast.RqlQuery) -> str - server_stats_id = server_stats['id'] # ['server', ''] - return server_stats_id.nth(1) # '' + # stats['id'] = ['server', ''] -> '' (= server_config['id']) + server_id = rethinkdb.r.row['id'].nth(1) rows = ( - rethinkdb.r.table('stats').eq_join(_join_on_server_id, rethinkdb.r.table('server_config')).run(conn) + rethinkdb.r.table('stats').eq_join(server_id, rethinkdb.r.table('server_config')).run(conn) ) # type: Iterator[EqJoinRow] for row in rows: @@ -51,13 +46,13 @@ def query_tables_with_stats(conn): Retrieve each table in the cluster along with its statistics. """ - def _join_on_table_id(table_stats): - # type: (rethinkdb.ast.RqlQuery) -> str - table_stats_id = table_stats['id'] # ['table', ''] - return table_stats_id.nth(1) # '' + # See: https://rethinkdb.com/api/python/eq_join/ + + # stats['id'] = ['table', ''] -> '' (= table_config['id']) + table_id = rethinkdb.r.row['id'].nth(1) rows = ( - rethinkdb.r.table('stats').eq_join(_join_on_table_id, rethinkdb.r.table('table_config')).run(conn) + rethinkdb.r.table('stats').eq_join(table_id, rethinkdb.r.table('table_config')).run(conn) ) # type: Iterator[EqJoinRow] for row in rows: From 95af7c751531c25bb8835201074b1ab545d17263 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 14 Feb 2020 19:12:38 +0100 Subject: [PATCH 030/147] Collect first replica metric --- .../rethinkdb/_default_metrics/_statistics.py | 27 +++++++++- .../datadog_checks/rethinkdb/_queries.py | 52 ++++++++++++++++--- rethinkdb/datadog_checks/rethinkdb/_types.py | 38 +++++++++++++- rethinkdb/tests/common.py | 7 +-- rethinkdb/tests/compose/docker-compose.yaml | 2 +- rethinkdb/tests/test_rethinkdb.py | 22 ++++++-- 6 files changed, 130 insertions(+), 18 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py index 090f50e03610e..d34c6ff2086ce 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py @@ -10,7 +10,7 @@ import rethinkdb -from .._queries import query_cluster_stats, query_servers_with_stats, query_tables_with_stats +from .._queries import query_cluster_stats, query_replica_stats, query_servers_with_stats, query_tables_with_stats from .._types import Metric logger = logging.getLogger(__name__) @@ -159,4 +159,27 @@ def _collect_table_statistics(conn): def _collect_replicas_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] - return iter(()) # TODO + for table, server, stats in query_replica_stats(conn): + logger.debug('replica_statistics table=%r server=%r stats=%r', table, server, stats) + + database = stats['db'] + server_name = server['name'] + table_name = table['name'] + server_tags = server['tags'] + query_engine = stats['query_engine'] + # storage_engine = stats['storage_engine'] + + tags = [ + 'table:{}'.format(table_name), + 'database:{}'.format(database), + 'server:{}'.format(server_name), + ] + server_tags + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.read_docs_per_sec', + 'value': query_engine['read_docs_per_sec'], + 'tags': tags, + } + + # TODO: add the rest of metrics diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 365695b9e1e3e..dd1127bb71323 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -8,7 +8,7 @@ import rethinkdb -from ._types import ClusterStats, EqJoinRow, Server, ServerStats, Table, TableStats +from ._types import ClusterStats, JoinRow, ReplicaStats, Server, ServerStats, Table, TableStats def query_cluster_stats(conn): @@ -27,12 +27,16 @@ def query_servers_with_stats(conn): # See: https://rethinkdb.com/api/python/eq_join/ - # stats['id'] = ['server', ''] -> '' (= server_config['id']) + # For servers: stats['id'] = ['server', ''] + is_server_stats_row = rethinkdb.r.row['id'].nth(0) == 'server' server_id = rethinkdb.r.row['id'].nth(1) rows = ( - rethinkdb.r.table('stats').eq_join(server_id, rethinkdb.r.table('server_config')).run(conn) - ) # type: Iterator[EqJoinRow] + rethinkdb.r.table('stats') + .filter(is_server_stats_row) + .eq_join(server_id, rethinkdb.r.table('server_config')) + .run(conn) + ) # type: Iterator[JoinRow] for row in rows: stats = row['left'] # type: ServerStats @@ -48,14 +52,48 @@ def query_tables_with_stats(conn): # See: https://rethinkdb.com/api/python/eq_join/ - # stats['id'] = ['table', ''] -> '' (= table_config['id']) + # For tables: stats['id'] = ['table', ''] + + is_table_stats_row = rethinkdb.r.row['id'].nth(0) == 'table' table_id = rethinkdb.r.row['id'].nth(1) rows = ( - rethinkdb.r.table('stats').eq_join(table_id, rethinkdb.r.table('table_config')).run(conn) - ) # type: Iterator[EqJoinRow] + rethinkdb.r.table('stats') + .filter(is_table_stats_row) + .eq_join(table_id, rethinkdb.r.table('table_config')) + .run(conn) + ) # type: Iterator[JoinRow] for row in rows: stats = row['left'] # type: TableStats table = row['right'] # type: Table yield table, stats + + +def query_replica_stats(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, Server, ReplicaStats]] + """ + Retrieve each replica (table/server pair) in the cluster along with its statistics. + """ + + # For replicas: stats['id'] = ['table_server', '', 'SERVER_ID'] + + is_table_server_stats_row = rethinkdb.r.row['id'].nth(0) == 'table_server' + table_id = rethinkdb.r.row['id'].nth(1) + server_id = rethinkdb.r.row['left']['id'].nth(2) + + rows = ( + rethinkdb.r.table('stats') + .filter(is_table_server_stats_row) + .eq_join(table_id, rethinkdb.r.table('table_config')) + .eq_join(server_id, rethinkdb.r.table('server_config')) + # TODO: filter entries where + .run(conn) + ) # type: Iterator[JoinRow] + + for row in rows: + join_row = row['left'] # type: JoinRow + stats = join_row['left'] # type: ReplicaStats + table = join_row['right'] # type: Table + server = row['right'] # type: Server + yield table, server, stats diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index f4f1aa5e4fe70..6c4edd5324d59 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -58,13 +58,49 @@ 'TableStats', {'id': Tuple[Literal['table'], str], 'table': str, 'db': str, 'query_engine': TableQueryEngine}, ) +ReplicaQueryEngine = TypedDict( + 'ReplicaQueryEngine', + {'read_docs_per_sec': int, 'read_docs_total': int, 'writen_docs_per_sec': int, 'written_docs_total': int}, +) + +ReplicaCache = TypedDict('ReplicaCache', {'in_use_bytes': int}) + +ReplicaDiskSpaceUsage = TypedDict( + 'ReplicaDiskSpaceUsage', {'metadata_bytes': int, 'data_bytes': int, 'garbage_bytes': int, 'preallocated_bytes': int} +) + +ReplicaDisk = TypedDict( + 'ReplicaDisk', + { + 'read_bytes_per_sec': int, + 'read_bytes_total': int, + 'written_bytes_per_sec': int, + 'written_bytes_total': int, + 'space_usage': ReplicaDiskSpaceUsage, + }, +) + +ReplicaStorageEngine = TypedDict('ReplicaStorageEngine', {'cache': ReplicaCache, 'disk': ReplicaDisk}) + +ReplicaStats = TypedDict( + 'ReplicaStats', + { + 'id': Tuple[Literal['table_server'], str, str], + 'server': str, + 'table': str, + 'db': str, + 'query_engine': ReplicaQueryEngine, + 'storage_engine': ReplicaStorageEngine, + }, +) + # ReQL command results. # See: https://rethinkdb.com/api/python/ # NOTE: Ideally 'left' and 'right' would be generics here, but this isn't supported by 'TypedDict' yet. # See: https://github.com/python/mypy/issues/3863 -EqJoinRow = TypedDict('EqJoinRow', {'left': Any, 'right': Any}) +JoinRow = TypedDict('JoinRow', {'left': Any, 'right': Any}) # See: https://rethinkdb.com/api/python/server ConnectionServer = TypedDict('ConnectionServer', {'id': str, 'name': str, 'proxy': bool}) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index bafd78a567678..aff31f7dc5027 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -15,7 +15,7 @@ HOST = get_docker_hostname() SERVERS = ['server0', 'server1', 'server2'] -SERVER_TAGS = {'server0': ['default', 'us'], 'server1': ['default', 'us'], 'server2': ['default', 'eu']} +SERVER_TAGS = {'server0': ['default', 'us'], 'server1': ['default', 'primary', 'us'], 'server2': ['default', 'eu']} CONNECT_SERVER_NAME = 'server0' CONNECT_SERVER_PORT = 28015 @@ -26,7 +26,8 @@ HEROES_TABLE = 'heroes' # TODO: add some indexes -HEROES_TABLE_OPTIONS = {'shards': 2, 'replicas': 3} +HEROES_TABLE_OPTIONS = {'shards': 1, 'replicas': {'primary': 1, 'eu': 1}, 'primary_replica_tag': 'primary'} +HEROES_TABLE_REPLICAS = ['server1', 'server2'] HEROES_INITIAL_DOCUMENTS = [ { "hero": "Magneto", @@ -75,7 +76,7 @@ ) REPLICA_STATISTICS_METRICS = ( - # 'rethinkdb.stats.table_server.read_docs_per_sec', + 'rethinkdb.stats.table_server.read_docs_per_sec', # 'rethinkdb.stats.table_server.read_docs_total', # 'rethinkdb.stats.table_server.written_docs_per_sec', # 'rethinkdb.stats.table_server.written_docs_total', diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml index b1115b0cce1cd..fe4bcb086e7d7 100644 --- a/rethinkdb/tests/compose/docker-compose.yaml +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -16,7 +16,7 @@ services: tty: true image: ${RETHINKDB_IMAGE} container_name: rethinkdb-server1 - command: rethinkdb --join rethinkdb-server0:29015 --bind all --server-name server1 --server-tag us + command: rethinkdb --join rethinkdb-server0:29015 --bind all --server-name server1 --server-tag us --server-tag primary links: - rethinkdb-server0 depends_on: diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index ed11dd7dd708c..8c19062240570 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -18,6 +18,8 @@ CONNECT_SERVER_NAME, DATABASE, HEROES_TABLE, + HEROES_TABLE_REPLICAS, + REPLICA_STATISTICS_METRICS, SERVER_STATISTICS_METRICS, SERVER_TAGS, SERVERS, @@ -33,17 +35,29 @@ def test_check(aggregator, instance): check.check(instance) for metric in CLUSTER_STATISTICS_METRICS: - aggregator.assert_metric(metric, tags=[]) + aggregator.assert_metric(metric, count=1, tags=[]) for metric in SERVER_STATISTICS_METRICS: for server in SERVERS: tags = ['server:{}'.format(server)] + SERVER_TAGS[server] - aggregator.assert_metric(metric, tags=tags) + aggregator.assert_metric(metric, count=1, tags=tags) for metric in TABLE_STATISTICS_METRICS: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] - aggregator.assert_metric(metric, tags=tags) - # TODO: test shards/replicas. + aggregator.assert_metric(metric, count=1, tags=tags) + + for metric in REPLICA_STATISTICS_METRICS: + for server in HEROES_TABLE_REPLICAS: + tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(server)] + tags.extend(SERVER_TAGS[server]) + aggregator.assert_metric(metric, count=1, tags=tags) + + for server in SERVERS: + if server not in HEROES_TABLE_REPLICAS: + tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(server)] + tags.extend(SERVER_TAGS[server]) + # Make sure servers that aren't replicas for the table don't yield metrics. + aggregator.assert_metric(metric, count=0, tags=tags) aggregator.assert_all_metrics_covered() From b49c4a7cf8ee1eb881f76c7fc7f301f78ad60b42 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 10:39:03 +0100 Subject: [PATCH 031/147] Update disk metric names --- rethinkdb/tests/common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index aff31f7dc5027..d635b1ee32e3b 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -85,10 +85,10 @@ # 'rethinkdb.stats.table_server.disk.read_bytes_total', # 'rethinkdb.stats.table_server.disk.written_bytes_per_sec', # 'rethinkdb.stats.table_server.disk.written_bytes_total', - # 'rethinkdb.stats.table_server.disk.space_usage.metadata_bytes', - # 'rethinkdb.stats.table_server.disk.space_usage.data_bytes', - # 'rethinkdb.stats.table_server.disk.space_usage.garbage_bytes', - # 'rethinkdb.stats.table_server.disk.space_usage.preallocated_bytes', + # 'rethinkdb.stats.table_server.disk.metadata_bytes', + # 'rethinkdb.stats.table_server.disk.data_bytes', + # 'rethinkdb.stats.table_server.disk.garbage_bytes', + # 'rethinkdb.stats.table_server.disk.preallocated_bytes', ) TABLE_STATUS_METRICS = ( From 97dce7d7d6baf8c4a6ec90d5eb5e9140390cfe09 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 10:47:50 +0100 Subject: [PATCH 032/147] Add the rest of replica stats metrics --- .../rethinkdb/_default_metrics/_statistics.py | 86 ++++++++++++++++++- rethinkdb/datadog_checks/rethinkdb/_types.py | 2 +- rethinkdb/tests/common.py | 30 +++---- rethinkdb/tests/test_rethinkdb.py | 14 +-- 4 files changed, 107 insertions(+), 25 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py index d34c6ff2086ce..22336788d09ea 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py @@ -167,7 +167,7 @@ def _collect_replicas_statistics(conn): table_name = table['name'] server_tags = server['tags'] query_engine = stats['query_engine'] - # storage_engine = stats['storage_engine'] + storage_engine = stats['storage_engine'] tags = [ 'table:{}'.format(table_name), @@ -182,4 +182,86 @@ def _collect_replicas_statistics(conn): 'tags': tags, } - # TODO: add the rest of metrics + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.read_docs_total', + 'value': query_engine['read_docs_total'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.written_docs_per_sec', + 'value': query_engine['written_docs_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.written_docs_total', + 'value': query_engine['written_docs_total'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.cache.in_use_bytes', + 'value': storage_engine['cache']['in_use_bytes'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.read_bytes_per_sec', + 'value': storage_engine['disk']['read_bytes_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.read_bytes_total', + 'value': storage_engine['disk']['read_bytes_total'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.written_bytes_per_sec', + 'value': storage_engine['disk']['written_bytes_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.written_bytes_total', + 'value': storage_engine['disk']['written_bytes_total'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.metadata_bytes', + 'value': storage_engine['disk']['space_usage']['metadata_bytes'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.data_bytes', + 'value': storage_engine['disk']['space_usage']['data_bytes'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.garbage_bytes', + 'value': storage_engine['disk']['space_usage']['garbage_bytes'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.preallocated_bytes', + 'value': storage_engine['disk']['space_usage']['preallocated_bytes'], + 'tags': tags, + } diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 6c4edd5324d59..ea28b95d47ea5 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -60,7 +60,7 @@ ReplicaQueryEngine = TypedDict( 'ReplicaQueryEngine', - {'read_docs_per_sec': int, 'read_docs_total': int, 'writen_docs_per_sec': int, 'written_docs_total': int}, + {'read_docs_per_sec': int, 'read_docs_total': int, 'written_docs_per_sec': int, 'written_docs_total': int}, ) ReplicaCache = TypedDict('ReplicaCache', {'in_use_bytes': int}) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index d635b1ee32e3b..e2b54900db569 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -14,7 +14,7 @@ HOST = get_docker_hostname() -SERVERS = ['server0', 'server1', 'server2'] +SERVERS = {'server0', 'server1', 'server2'} SERVER_TAGS = {'server0': ['default', 'us'], 'server1': ['default', 'primary', 'us'], 'server2': ['default', 'eu']} CONNECT_SERVER_NAME = 'server0' @@ -27,7 +27,7 @@ HEROES_TABLE = 'heroes' # TODO: add some indexes HEROES_TABLE_OPTIONS = {'shards': 1, 'replicas': {'primary': 1, 'eu': 1}, 'primary_replica_tag': 'primary'} -HEROES_TABLE_REPLICAS = ['server1', 'server2'] +HEROES_TABLE_REPLICAS = {'server1', 'server2'} HEROES_INITIAL_DOCUMENTS = [ { "hero": "Magneto", @@ -68,8 +68,6 @@ 'rethinkdb.stats.server.clients_active', # NOTE: sent, but not documented on the RethinkDB website. ) -# WIP - TABLE_STATISTICS_METRICS = ( 'rethinkdb.stats.table.read_docs_per_sec', 'rethinkdb.stats.table.written_docs_per_sec', @@ -77,18 +75,18 @@ REPLICA_STATISTICS_METRICS = ( 'rethinkdb.stats.table_server.read_docs_per_sec', - # 'rethinkdb.stats.table_server.read_docs_total', - # 'rethinkdb.stats.table_server.written_docs_per_sec', - # 'rethinkdb.stats.table_server.written_docs_total', - # 'rethinkdb.stats.table_server.cache.in_use_bytes', - # 'rethinkdb.stats.table_server.disk.read_bytes_per_sec', - # 'rethinkdb.stats.table_server.disk.read_bytes_total', - # 'rethinkdb.stats.table_server.disk.written_bytes_per_sec', - # 'rethinkdb.stats.table_server.disk.written_bytes_total', - # 'rethinkdb.stats.table_server.disk.metadata_bytes', - # 'rethinkdb.stats.table_server.disk.data_bytes', - # 'rethinkdb.stats.table_server.disk.garbage_bytes', - # 'rethinkdb.stats.table_server.disk.preallocated_bytes', + 'rethinkdb.stats.table_server.read_docs_total', + 'rethinkdb.stats.table_server.written_docs_per_sec', + 'rethinkdb.stats.table_server.written_docs_total', + 'rethinkdb.stats.table_server.cache.in_use_bytes', + 'rethinkdb.stats.table_server.disk.read_bytes_per_sec', + 'rethinkdb.stats.table_server.disk.read_bytes_total', + 'rethinkdb.stats.table_server.disk.written_bytes_per_sec', + 'rethinkdb.stats.table_server.disk.written_bytes_total', + 'rethinkdb.stats.table_server.disk.metadata_bytes', + 'rethinkdb.stats.table_server.disk.data_bytes', + 'rethinkdb.stats.table_server.disk.garbage_bytes', + 'rethinkdb.stats.table_server.disk.preallocated_bytes', ) TABLE_STATUS_METRICS = ( diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 8c19062240570..88f82edd05aa1 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -46,18 +46,20 @@ def test_check(aggregator, instance): tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] aggregator.assert_metric(metric, count=1, tags=tags) + assert len(HEROES_TABLE_REPLICAS) > 0 + NON_REPLICA_SERVERS = SERVERS - HEROES_TABLE_REPLICAS + assert len(NON_REPLICA_SERVERS) > 0 + for metric in REPLICA_STATISTICS_METRICS: for server in HEROES_TABLE_REPLICAS: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(server)] tags.extend(SERVER_TAGS[server]) aggregator.assert_metric(metric, count=1, tags=tags) - for server in SERVERS: - if server not in HEROES_TABLE_REPLICAS: - tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(server)] - tags.extend(SERVER_TAGS[server]) - # Make sure servers that aren't replicas for the table don't yield metrics. - aggregator.assert_metric(metric, count=0, tags=tags) + for server in NON_REPLICA_SERVERS: + tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(server)] + tags.extend(SERVER_TAGS[server]) + aggregator.assert_metric(metric, count=0, tags=tags) aggregator.assert_all_metrics_covered() From 2934e0a4a6534444aeb591180932a3790a0a63b9 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 10:58:57 +0100 Subject: [PATCH 033/147] Drop index todo --- rethinkdb/tests/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index e2b54900db569..9410cb52a7bfe 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -25,7 +25,6 @@ DATABASE = 'doghouse' HEROES_TABLE = 'heroes' -# TODO: add some indexes HEROES_TABLE_OPTIONS = {'shards': 1, 'replicas': {'primary': 1, 'eu': 1}, 'primary_replica_tag': 'primary'} HEROES_TABLE_REPLICAS = {'server1', 'server2'} HEROES_INITIAL_DOCUMENTS = [ From 4a568e674476e5c3e2531aff5df2b6436115ddd9 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 11:34:48 +0100 Subject: [PATCH 034/147] Add table status metrics --- .../rethinkdb/_default_metrics/__init__.py | 4 +- .../rethinkdb/_default_metrics/_statuses.py | 96 ++++++++++++++++++- .../datadog_checks/rethinkdb/_queries.py | 10 +- rethinkdb/datadog_checks/rethinkdb/_types.py | 16 ++++ rethinkdb/tests/common.py | 22 ++++- rethinkdb/tests/test_rethinkdb.py | 25 +++++ 6 files changed, 166 insertions(+), 7 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py index 127caa8eb0824..32d52f1cb8f58 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py @@ -13,7 +13,7 @@ from ._current_issues import collect_current_issues from ._jobs import collect_jobs from ._statistics import collect_statistics -from ._statuses import collect_statuses +from ._statuses import collect_status_metrics def collect_default_metrics(conn): @@ -24,7 +24,7 @@ def collect_default_metrics(conn): See: https://rethinkdb.com/docs/system-tables/ """ metrics = itertools.chain( - collect_statistics(conn), collect_statuses(conn), collect_jobs(conn), collect_current_issues(conn) + collect_statistics(conn), collect_status_metrics(conn), collect_jobs(conn), collect_current_issues(conn) ) for metric in metrics: diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py index b58e29eefdb69..e8d48c058e8a8 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py @@ -4,18 +4,110 @@ from __future__ import absolute_import +import itertools from typing import Iterator import rethinkdb -from .._types import Metric +from .._queries import query_table_status +from .._types import Metric, ReplicaState -def collect_statuses(conn): +def collect_status_metrics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about server and table statuses. See: https://rethinkdb.com/docs/system-tables/#status-tables """ + metrics = itertools.chain(_collect_table_status_metrics(conn), _collect_server_status_metrics(conn)) + + for metric in metrics: + yield metric + + +def _collect_table_status_metrics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + for table_status in query_table_status(conn): + table = table_status['name'] + database = table_status['db'] + + tags = ['table:{}'.format(table), 'database:{}'.format(database)] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.table_status.ready_for_outdated_reads', + 'value': 1 if table_status['status']['ready_for_outdated_reads'] else 0, + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.table_status.ready_for_reads', + 'value': 1 if table_status['status']['ready_for_reads'] else 0, + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.table_status.ready_for_writes', + 'value': 1 if table_status['status']['ready_for_writes'] else 0, + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.table_status.all_replicas_ready', + 'value': 1 if table_status['status']['all_replicas_ready'] else 0, + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.table_status.shards.total', + 'value': len(table_status['shards']), + 'tags': tags, + } + + for index, shard in enumerate(table_status['shards']): + shard_tags = tags + ['shard:{}'.format(index)] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.table_status.shards.replicas.total', + 'value': len(shard['replicas']), + 'tags': shard_tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.table_status.shards.replicas.primary.total', + 'value': len(shard['primary_replicas']), + 'tags': shard_tags, + } + + for replica in shard['replicas']: + server = replica['server'] + replica_tags = shard_tags + ['server:{}'.format(server)] + + # Helper function to benefit from type checking on 'ReplicaState' literals. + def _replica_state(state): + # type: (ReplicaState) -> Metric + return { + 'type': 'gauge', + 'name': 'rethinkdb.table_status.shards.replicas.state.{}'.format(state), + 'value': 1 if replica['state'] == state else 0, + 'tags': replica_tags, + } + + yield _replica_state('ready') + yield _replica_state('transitioning') + yield _replica_state('backfilling') + yield _replica_state('disconnected') + yield _replica_state('waiting_for_primary') + yield _replica_state('waiting_for_quorum') + + +def _collect_server_status_metrics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] return iter(()) # TODO diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index dd1127bb71323..9a01702c9d658 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -8,7 +8,7 @@ import rethinkdb -from ._types import ClusterStats, JoinRow, ReplicaStats, Server, ServerStats, Table, TableStats +from ._types import ClusterStats, JoinRow, ReplicaStats, Server, ServerStats, Table, TableStats, TableStatus def query_cluster_stats(conn): @@ -97,3 +97,11 @@ def query_replica_stats(conn): table = join_row['right'] # type: Table server = row['right'] # type: Server yield table, server, stats + + +def query_table_status(conn): + # type: (rethinkdb.net.Connection) -> Iterator[TableStatus] + """ + Retrieve the status of each table in the cluster. + """ + return rethinkdb.r.table('table_status').run(conn) diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index ea28b95d47ea5..22c46fa93a81d 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -95,6 +95,22 @@ ) +# Status documents. +# See: https://rethinkdb.com/docs/system-tables/#status-tables + +ReplicaState = Literal[ + 'ready', 'transitioning', 'backfilling', 'disconnected', 'waiting_for_primary', 'waiting_for_quorum' +] +ShardReplica = TypedDict('ShardReplica', {'server': str, 'state': ReplicaState}) +Shard = TypedDict('Shard', {'primary_replicas': List[str], 'replicas': List[ShardReplica]}) +TableStatusFlags = TypedDict( + 'TableStatusFlags', + {'ready_for_outdated_reads': bool, 'ready_for_reads': bool, 'ready_for_writes': bool, 'all_replicas_ready': bool}, +) +TableStatus = TypedDict( + 'TableStatus', {'id': str, 'name': str, 'db': str, 'status': TableStatusFlags, 'shards': List[Shard]} +) + # ReQL command results. # See: https://rethinkdb.com/api/python/ diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 9410cb52a7bfe..e158cd0c8c32e 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -25,8 +25,14 @@ DATABASE = 'doghouse' HEROES_TABLE = 'heroes' -HEROES_TABLE_OPTIONS = {'shards': 1, 'replicas': {'primary': 1, 'eu': 1}, 'primary_replica_tag': 'primary'} +HEROES_TABLE_NUM_SHARDS = 1 +HEROES_TABLE_OPTIONS = { + 'shards': HEROES_TABLE_NUM_SHARDS, + 'replicas': {'primary': 1, 'eu': 1}, + 'primary_replica_tag': 'primary', +} HEROES_TABLE_REPLICAS = {'server1', 'server2'} +HEROES_TABLE_SHARD_REPLICAS = {0: {'server1', 'server2'}} HEROES_INITIAL_DOCUMENTS = [ { "hero": "Magneto", @@ -94,8 +100,20 @@ 'rethinkdb.table_status.ready_for_writes', 'rethinkdb.table_status.all_replicas_ready', 'rethinkdb.table_status.shards.total', +) + +TABLE_STATUS_REPLICA_COUNT_METRICS = ( 'rethinkdb.table_status.shards.replicas.total', - 'rethinkdb.table_status.shards.replicas.state', + 'rethinkdb.table_status.shards.replicas.primary.total', +) + +TABLE_STATUS_REPLICA_STATE_METRICS = ( + 'rethinkdb.table_status.shards.replicas.state.ready', + 'rethinkdb.table_status.shards.replicas.state.transitioning', + 'rethinkdb.table_status.shards.replicas.state.backfilling', + 'rethinkdb.table_status.shards.replicas.state.disconnected', + 'rethinkdb.table_status.shards.replicas.state.waiting_for_primary', + 'rethinkdb.table_status.shards.replicas.state.waiting_for_quorum', ) SERVER_STATUS_METRICS = ( diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 88f82edd05aa1..5fdbd8f4f699e 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -18,12 +18,17 @@ CONNECT_SERVER_NAME, DATABASE, HEROES_TABLE, + HEROES_TABLE_NUM_SHARDS, HEROES_TABLE_REPLICAS, + HEROES_TABLE_SHARD_REPLICAS, REPLICA_STATISTICS_METRICS, SERVER_STATISTICS_METRICS, SERVER_TAGS, SERVERS, TABLE_STATISTICS_METRICS, + TABLE_STATUS_METRICS, + TABLE_STATUS_REPLICA_COUNT_METRICS, + TABLE_STATUS_REPLICA_STATE_METRICS, ) @@ -61,6 +66,26 @@ def test_check(aggregator, instance): tags.extend(SERVER_TAGS[server]) aggregator.assert_metric(metric, count=0, tags=tags) + for metric in TABLE_STATUS_METRICS: + tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) + + for shard in range(HEROES_TABLE_NUM_SHARDS): + tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'shard:{}'.format(shard)] + for metric in TABLE_STATUS_REPLICA_COUNT_METRICS: + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) + + for server in HEROES_TABLE_SHARD_REPLICAS[shard]: + tags = [ + 'table:{}'.format(HEROES_TABLE), + 'database:{}'.format(DATABASE), + 'shard:{}'.format(shard), + 'server:{}'.format(server), + ] + for metric in TABLE_STATUS_REPLICA_STATE_METRICS: + value = 1 if metric.endswith('.ready') else 0 # All servers in our test cluster are available. + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, value=value, count=1, tags=tags) + aggregator.assert_all_metrics_covered() service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] From 4d32e580c6709b939267f2647ddb97c0f98a5077 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 11:53:44 +0100 Subject: [PATCH 035/147] Add server status metrics --- .../rethinkdb/_default_metrics/_statuses.py | 37 ++++++++++++++++++- .../datadog_checks/rethinkdb/_queries.py | 20 +++++++++- rethinkdb/datadog_checks/rethinkdb/_types.py | 9 ++++- rethinkdb/tests/common.py | 3 +- rethinkdb/tests/test_rethinkdb.py | 6 +++ 5 files changed, 70 insertions(+), 5 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py index e8d48c058e8a8..cf98e9bc01872 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py @@ -9,7 +9,7 @@ import rethinkdb -from .._queries import query_table_status +from .._queries import query_server_status, query_table_status from .._types import Metric, ReplicaState @@ -110,4 +110,37 @@ def _replica_state(state): def _collect_server_status_metrics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] - return iter(()) # TODO + for server in query_server_status(conn): + name = server['name'] + network = server['network'] + process = server['process'] + + tags = ['server:{}'.format(name)] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.server_status.network.time_connected', + 'value': network['time_connected'].timestamp(), + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.server_status.network.connected_to.total', + 'value': len([other for other, connected in network['connected_to'].items() if connected]), + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.server_status.network.connected_to.pending.total', + 'value': len([other for other, connected in network['connected_to'].items() if not connected]), + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.server_status.process.time_started', + 'value': process['time_started'].timestamp(), + 'tags': tags, + } diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 9a01702c9d658..af97fd41f80e2 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -8,7 +8,17 @@ import rethinkdb -from ._types import ClusterStats, JoinRow, ReplicaStats, Server, ServerStats, Table, TableStats, TableStatus +from ._types import ( + ClusterStats, + JoinRow, + ReplicaStats, + Server, + ServerStats, + ServerStatus, + Table, + TableStats, + TableStatus, +) def query_cluster_stats(conn): @@ -105,3 +115,11 @@ def query_table_status(conn): Retrieve the status of each table in the cluster. """ return rethinkdb.r.table('table_status').run(conn) + + +def query_server_status(conn): + # type: (rethinkdb.net.Connection) -> Iterator[ServerStatus] + """ + Retrieve the status of each server in the cluster. + """ + return rethinkdb.r.table('server_status').run(conn) diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 22c46fa93a81d..6bc960d7e3fd6 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -6,7 +6,8 @@ Declarations used for type checking our code, including our manipulation of JSON documents returned by RethinkDB. """ -from typing import Any, List, Literal, Tuple, TypedDict +import datetime as dt +from typing import Any, Dict, List, Literal, Tuple, TypedDict # Lightweight shim to decouple collection functions from the check class. Metric = TypedDict( @@ -111,6 +112,12 @@ 'TableStatus', {'id': str, 'name': str, 'db': str, 'status': TableStatusFlags, 'shards': List[Shard]} ) +# vvv NOTE: only fields of interest are listed here. +ServerNetwork = TypedDict('ServerNetwork', {'time_connected': dt.datetime, 'connected_to': Dict[str, bool]}) +ServerProcess = TypedDict('ServerProcess', {'time_started': dt.datetime, 'version': str}) +# ^^^ +ServerStatus = TypedDict('ServerStatus', {'id': str, 'name': str, 'network': ServerNetwork, 'process': ServerProcess}) + # ReQL command results. # See: https://rethinkdb.com/api/python/ diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index e158cd0c8c32e..c4bdf2031e36e 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -118,7 +118,8 @@ SERVER_STATUS_METRICS = ( 'rethinkdb.server_status.network.time_connected', - 'rethinkdb.server_status.network.connected_to', + 'rethinkdb.server_status.network.connected_to.total', + 'rethinkdb.server_status.network.connected_to.pending.total', 'rethinkdb.server_status.process.time_started', ) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 5fdbd8f4f699e..7e80166902c2a 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -23,6 +23,7 @@ HEROES_TABLE_SHARD_REPLICAS, REPLICA_STATISTICS_METRICS, SERVER_STATISTICS_METRICS, + SERVER_STATUS_METRICS, SERVER_TAGS, SERVERS, TABLE_STATISTICS_METRICS, @@ -86,6 +87,11 @@ def test_check(aggregator, instance): value = 1 if metric.endswith('.ready') else 0 # All servers in our test cluster are available. aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, value=value, count=1, tags=tags) + for metric in SERVER_STATUS_METRICS: + for server in SERVERS: + tags = ['server:{}'.format(server)] + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) + aggregator.assert_all_metrics_covered() service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] From 61a6c6fe277b8c99f6e6515b6e69d696e3ab006b Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 16:23:53 +0100 Subject: [PATCH 036/147] Fix timestamp on py2 --- .../rethinkdb/_default_metrics/_statuses.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py index cf98e9bc01872..44db59022e510 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py @@ -4,7 +4,9 @@ from __future__ import absolute_import +import datetime as dt import itertools +import time from typing import Iterator import rethinkdb @@ -120,7 +122,7 @@ def _collect_server_status_metrics(conn): yield { 'type': 'gauge', 'name': 'rethinkdb.server_status.network.time_connected', - 'value': network['time_connected'].timestamp(), + 'value': _to_timestamp(network['time_connected']), 'tags': tags, } @@ -141,6 +143,15 @@ def _collect_server_status_metrics(conn): yield { 'type': 'gauge', 'name': 'rethinkdb.server_status.process.time_started', - 'value': process['time_started'].timestamp(), + 'value': _to_timestamp(process['time_started']), 'tags': tags, } + + +def _to_timestamp(datetime): + # type: (dt.datetime) -> float + try: + return datetime.timestamp() # type: ignore # (Mypy is run in --py2 mode.) + except AttributeError: + # Python 2. + return time.mktime(datetime.now().timetuple()) From 6a364455790ebff065fbd0c1fc151437a2f1a559 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 16:26:07 +0100 Subject: [PATCH 037/147] Add system job metrics, refactor assertions --- .../datadog_checks/dev/plugin/tox.py | 4 +- .../rethinkdb/_default_metrics/_jobs.py | 65 ++++++++- .../datadog_checks/rethinkdb/_queries.py | 9 ++ rethinkdb/datadog_checks/rethinkdb/_types.py | 48 ++++++- rethinkdb/tests/common.py | 49 +++++-- rethinkdb/tests/compose/docker-compose.yaml | 12 +- rethinkdb/tests/conftest.py | 67 +-------- rethinkdb/tests/test_rethinkdb.py | 129 ++++++++++++++---- rethinkdb/tests/utils/__init__.py | 0 rethinkdb/tests/utils/cluster.py | 113 +++++++++++++++ 10 files changed, 385 insertions(+), 111 deletions(-) create mode 100644 rethinkdb/tests/utils/__init__.py create mode 100644 rethinkdb/tests/utils/cluster.py diff --git a/datadog_checks_dev/datadog_checks/dev/plugin/tox.py b/datadog_checks_dev/datadog_checks/dev/plugin/tox.py index 75a85b0db068b..1e981ec6e152d 100644 --- a/datadog_checks_dev/datadog_checks/dev/plugin/tox.py +++ b/datadog_checks_dev/datadog_checks/dev/plugin/tox.py @@ -77,7 +77,9 @@ def add_style_checker(config, sections, make_envconfig, reader): # Allow using multiple lines for enhanced readability in case of large amount of options/files to check. mypy_args = mypy_args.replace('\n', ' ') - dependencies.append('mypy>=0.761') + # Allow using features from the latest development version (documented under the 'latest' tag). + mypy_latest = 'git+https://github.com/python/mypy.git@master' + dependencies.append(mypy_latest) commands.append('mypy --config-file=../mypy.ini {}'.format(mypy_args)) sections[section] = { diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py index 812795f28f51e..4528e38bbe65f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py @@ -8,6 +8,7 @@ import rethinkdb +from .._queries import query_system_jobs from .._types import Metric @@ -18,4 +19,66 @@ def collect_jobs(conn): See: https://rethinkdb.com/docs/system-jobs/ """ - return iter(()) # TODO + for job in query_system_jobs(conn): + duration = job['duration_sec'] + servers = job['servers'] + tags = ['server:{}'.format(server) for server in servers] + + if job['type'] == 'query': + client_address = job['info']['client_address'] + + query_tags = tags + ['client_address:{}'.format(client_address)] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.query.duration', + 'value': duration, + 'tags': query_tags, + } + + elif job['type'] == 'index_construction': + database = job['info']['db'] + table = job['info']['table'] + + index_construction_tags = tags + ['database:{}'.format(database), 'table:{}'.format(table)] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.index_construction.duration', + 'value': duration, + 'tags': index_construction_tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.index_construction.progress', + 'value': job['info']['progress'], + 'tags': index_construction_tags, + } + + elif job['type'] == 'backfill': + database = job['info']['db'] + destination_server = job['info']['destination_server'] + source_server = job['info']['source_server'] + table = job['info']['table'] + + backfill_tags = tags + [ + 'database:{}'.format(database), + 'destination_server:{}'.format(destination_server), + 'source_server:{}'.format(source_server), + 'table:{}'.format(table), + ] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.backfill.duration', + 'value': duration, + 'tags': backfill_tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.backfill.progress', + 'value': job['info']['progress'], + 'tags': backfill_tags, + } diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index af97fd41f80e2..20dda52096f1e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -10,6 +10,7 @@ from ._types import ( ClusterStats, + Job, JoinRow, ReplicaStats, Server, @@ -123,3 +124,11 @@ def query_server_status(conn): Retrieve the status of each server in the cluster. """ return rethinkdb.r.table('server_status').run(conn) + + +def query_system_jobs(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Job] + """ + Retrieve all the currently running system jobs. + """ + return rethinkdb.r.table('jobs').run(conn) diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 6bc960d7e3fd6..94b744ce7ec58 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -7,7 +7,7 @@ """ import datetime as dt -from typing import Any, Dict, List, Literal, Tuple, TypedDict +from typing import Any, Dict, List, Literal, Tuple, TypedDict, Union # Lightweight shim to decouple collection functions from the check class. Metric = TypedDict( @@ -118,6 +118,52 @@ # ^^^ ServerStatus = TypedDict('ServerStatus', {'id': str, 'name': str, 'network': ServerNetwork, 'process': ServerProcess}) + +# System jobs documents. +# See: https://rethinkdb.com/docs/system-jobs/ + +QueryInfo = TypedDict('QueryInfo', {'client_address': str, 'client_port': int, 'query': str, 'user': str}) +QueryJob = TypedDict( + 'QueryJob', + { + 'type': Literal['query'], + 'id': Tuple[Literal['query'], str], + 'duration_sec': float, + 'info': QueryInfo, + 'servers': List[str], + }, +) + +IndexConstructionInfo = TypedDict('IndexConstructionInfo', {'db': str, 'table': str, 'index': str, 'progress': int}) +IndexConstructionJob = TypedDict( + 'IndexConstructionJob', + { + 'type': Literal['index_construction'], + 'id': Tuple[Literal['index_construction'], str], + 'duration_sec': float, + 'info': IndexConstructionInfo, + 'servers': List[str], + }, +) + +BackfillInfo = TypedDict( + 'BackfillInfo', {'db': str, 'destination_server': str, 'source_server': str, 'table': str, 'progress': int} +) +BackfillJob = TypedDict( + 'BackfillJob', + { + 'type': Literal['backfill'], + 'id': Tuple[Literal['backfill'], str], + 'duration_sec': float, + 'info': BackfillInfo, + 'servers': List[str], + }, +) + +# NOTE: this is a union type tagged by the 'type' key. +# See: https://mypy.readthedocs.io/en/latest/literal_types.html#intelligent-indexing +Job = Union[QueryJob, IndexConstructionJob, BackfillJob] + # ReQL command results. # See: https://rethinkdb.com/api/python/ diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index c4bdf2031e36e..4d60c23ca914b 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -15,7 +15,12 @@ HOST = get_docker_hostname() SERVERS = {'server0', 'server1', 'server2'} -SERVER_TAGS = {'server0': ['default', 'us'], 'server1': ['default', 'primary', 'us'], 'server2': ['default', 'eu']} +SERVER_TAGS = { + 'server0': ['default', 'us', 'initial'], + 'server1': ['default', 'us', 'primary'], + 'server2': ['default', 'eu'], + 'server3': ['default', 'eu'], +} CONNECT_SERVER_NAME = 'server0' CONNECT_SERVER_PORT = 28015 @@ -25,15 +30,23 @@ DATABASE = 'doghouse' HEROES_TABLE = 'heroes' -HEROES_TABLE_NUM_SHARDS = 1 -HEROES_TABLE_OPTIONS = { - 'shards': HEROES_TABLE_NUM_SHARDS, - 'replicas': {'primary': 1, 'eu': 1}, - 'primary_replica_tag': 'primary', + +HEROES_TABLE_INITIAL_CONFIG = {'shards': 1, 'replicas': {'initial': 1}, 'primary_replica_tag': 'initial'} +HEROES_TABLE_SERVER_INITIAL = 'server0' + +HEROES_TABLE_REPLICATED_PRIMARY_REPLICA_TAG = 'primary' +HEROES_TABLE_REPLICATED_CONFIG = { + 'shards': 1, + 'replicas': {'primary': 1, 'eu': 2}, + 'primary_replica_tag': HEROES_TABLE_REPLICATED_PRIMARY_REPLICA_TAG, } -HEROES_TABLE_REPLICAS = {'server1', 'server2'} -HEROES_TABLE_SHARD_REPLICAS = {0: {'server1', 'server2'}} -HEROES_INITIAL_DOCUMENTS = [ +HEROES_TABLE_SERVERS_REPLICATED = {'server1', 'server2', 'server3'} +HEROES_TABLE_REPLICAS_FOR_SHARDS = {0: HEROES_TABLE_SERVERS_REPLICATED} + +# This should be big enough so that a backfill job lasts long enough for us to see it during a check. +HEROES_NUM_DOCUMENTS = 90000 + +_HEROES_TEMPLATE_DOCUMENTS = [ { "hero": "Magneto", "name": "Max Eisenhardt", @@ -54,7 +67,10 @@ "appearances_count": 72, }, ] -NUM_FAMOUS_HEROES = 2 + +assert HEROES_NUM_DOCUMENTS % len(_HEROES_TEMPLATE_DOCUMENTS) == 0 +HEROES_DOCUMENTS = _HEROES_TEMPLATE_DOCUMENTS * (HEROES_NUM_DOCUMENTS // 3) +NUM_FAMOUS_HEROES = len(HEROES_DOCUMENTS) * 2 / 3 CLUSTER_STATISTICS_METRICS = ( 'rethinkdb.stats.cluster.queries_per_sec', @@ -102,12 +118,12 @@ 'rethinkdb.table_status.shards.total', ) -TABLE_STATUS_REPLICA_COUNT_METRICS = ( +TABLE_STATUS_SHARDS_METRICS = ( 'rethinkdb.table_status.shards.replicas.total', 'rethinkdb.table_status.shards.replicas.primary.total', ) -TABLE_STATUS_REPLICA_STATE_METRICS = ( +TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS = ( 'rethinkdb.table_status.shards.replicas.state.ready', 'rethinkdb.table_status.shards.replicas.state.transitioning', 'rethinkdb.table_status.shards.replicas.state.backfilling', @@ -123,10 +139,15 @@ 'rethinkdb.server_status.process.time_started', ) -JOBS_METRICS = ( - 'rethinkdb.jobs.query.duration', +QUERY_JOBS_METRICS = ('rethinkdb.jobs.query.duration',) + +# TODO: trigger index construction +INDEX_CONSTRUCTION_JOBS_METRICS = ( 'rethinkdb.jobs.index_construction.duration', 'rethinkdb.jobs.index_construction.progress', +) + +BACKFILL_JOBS_METRICS = ( 'rethinkdb.jobs.backfill.duration', 'rethinkdb.jobs.backfill.progress', ) diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml index fe4bcb086e7d7..0f5a4a8906494 100644 --- a/rethinkdb/tests/compose/docker-compose.yaml +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -7,7 +7,7 @@ services: tty: true # Required otherwise RethinkDB won't output any logs. image: ${RETHINKDB_IMAGE} container_name: rethinkdb-server0 - command: rethinkdb --bind all --server-name server0 --server-tag us + command: rethinkdb --bind all --server-name server0 --server-tag us --server-tag initial ports: - ${RETHINKDB_CONNECT_SERVER_PORT}:28015 # Client driver port. - 8080:8080 # Port for the web UI. Debugging only (not used by tests). @@ -32,6 +32,16 @@ services: depends_on: - rethinkdb-server0 + rethinkdb-server3: + tty: true + image: ${RETHINKDB_IMAGE} + container_name: rethinkdb-server3 + command: rethinkdb --join rethinkdb-server0:29015 --bind all --server-name server3 --server-tag eu + links: + - rethinkdb-server0 + depends_on: + - rethinkdb-server0 + rethinkdb-proxy0: tty: true image: ${RETHINKDB_IMAGE} diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 2314a8b895444..ce2f9521dc010 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -8,23 +8,11 @@ from typing import Dict, Iterator, List import pytest -import rethinkdb -from datadog_checks.dev import WaitFor, docker_run +from datadog_checks.dev import docker_run from datadog_checks.rethinkdb._types import Instance -from .common import ( - CONNECT_SERVER_PORT, - DATABASE, - HERE, - HEROES_INITIAL_DOCUMENTS, - HEROES_TABLE, - HEROES_TABLE_OPTIONS, - HOST, - IMAGE, - NUM_FAMOUS_HEROES, - PROXY_PORT, -) +from .common import CONNECT_SERVER_PORT, HERE, HOST, IMAGE, PROXY_PORT E2E_METADATA = {'start_commands': ['pip install rethinkdb==2.4.4']} @@ -38,49 +26,6 @@ def instance(): } -def create_tables(): - # type: () -> None - with rethinkdb.r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: - # See: https://rethinkdb.com/api/python/db_create - response = rethinkdb.r.db_create(DATABASE).run(conn) - assert response['dbs_created'] == 1 - - table = HEROES_TABLE - options = HEROES_TABLE_OPTIONS - - # See: https://rethinkdb.com/api/python/table_create/ - response = rethinkdb.r.db(DATABASE).table_create(table, **options).run(conn) - assert response['tables_created'] == 1 - - -def simulate_client_writes(): - # type: () -> None - """ - Simulate a client application that inserts rows by connecting via the proxy node. - """ - with rethinkdb.r.connect(host=HOST, port=PROXY_PORT) as conn: - table = HEROES_TABLE - documents = HEROES_INITIAL_DOCUMENTS - - # See: https://rethinkdb.com/api/python/insert - response = rethinkdb.r.db(DATABASE).table(table).insert(documents).run(conn) - assert response['errors'] == 0 - assert response['inserted'] == len(documents) - - -def simulate_client_reads(): - # type: () -> None - """ - Simulate a client application that reads rows by connecting via the proxy node. - """ - with rethinkdb.r.connect(db=DATABASE, host=HOST, port=PROXY_PORT) as conn: - all_heroes = list(rethinkdb.r.table('heroes').run(conn)) - assert len(all_heroes) == len(HEROES_INITIAL_DOCUMENTS) - - famous_heroes = list(rethinkdb.r.table('heroes').filter(rethinkdb.r.row['appearances_count'] >= 50).run(conn)) - assert len(famous_heroes) == NUM_FAMOUS_HEROES - - @pytest.fixture(scope='session') def dd_environment(instance): # type: (Instance) -> Iterator @@ -92,12 +37,6 @@ def dd_environment(instance): 'RETHINKDB_PROXY_PORT': str(PROXY_PORT), } # type: Dict[str, str] - conditions = [ - WaitFor(create_tables, attempts=1), - WaitFor(simulate_client_writes, attempts=1), - WaitFor(simulate_client_reads, attempts=1), - ] - log_patterns = [ r'Server ready, "server0".*', r'Connected to server "server1".*', @@ -105,6 +44,6 @@ def dd_environment(instance): r'Connected to proxy.*', ] # type: List[str] - with docker_run(compose_file, env_vars=env_vars, conditions=conditions, log_patterns=log_patterns): + with docker_run(compose_file, env_vars=env_vars, log_patterns=log_patterns): config = {'instances': [instance]} yield config, E2E_METADATA diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 7e80166902c2a..6d271d819e80a 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -4,7 +4,7 @@ from __future__ import absolute_import import copy -from typing import Iterator +from typing import Iterator, List, TypedDict import pytest import rethinkdb @@ -14,13 +14,15 @@ from datadog_checks.rethinkdb._types import Instance, Metric from .common import ( + BACKFILL_JOBS_METRICS, CLUSTER_STATISTICS_METRICS, CONNECT_SERVER_NAME, DATABASE, HEROES_TABLE, - HEROES_TABLE_NUM_SHARDS, - HEROES_TABLE_REPLICAS, - HEROES_TABLE_SHARD_REPLICAS, + HEROES_TABLE_REPLICAS_FOR_SHARDS, + HEROES_TABLE_SERVER_INITIAL, + HEROES_TABLE_SERVERS_REPLICATED, + QUERY_JOBS_METRICS, REPLICA_STATISTICS_METRICS, SERVER_STATISTICS_METRICS, SERVER_STATUS_METRICS, @@ -28,9 +30,12 @@ SERVERS, TABLE_STATISTICS_METRICS, TABLE_STATUS_METRICS, - TABLE_STATUS_REPLICA_COUNT_METRICS, - TABLE_STATUS_REPLICA_STATE_METRICS, + TABLE_STATUS_SHARDS_METRICS, + TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS, ) +from .utils.cluster import setup_cluster_ensuring_all_default_metrics_are_defined + +Context = TypedDict('Context', {'backfilling_servers': List[str]}) @pytest.mark.integration @@ -38,64 +43,130 @@ def test_check(aggregator, instance): # type: (AggregatorStub, Instance) -> None check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.check(instance) + with setup_cluster_ensuring_all_default_metrics_are_defined(): + check.check(instance) + + context = {'backfilling_servers': []} # type: Context + + _assert_statistics_metrics(aggregator) + _assert_table_status_metrics(aggregator, context=context) + + assert context['backfilling_servers'], ( + 'Expected backfilling to be ongoing for at least one replica. ' + 'Aborting, as otherwise backfill metrics would not be covered.' + ) + + _assert_server_status_metrics(aggregator) + _assert_system_jobs_metrics(aggregator, context=context) + + aggregator.assert_all_metrics_covered() + + service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) + + +def _assert_statistics_metrics(aggregator): + # type: (AggregatorStub) -> None + + # Cluster. for metric in CLUSTER_STATISTICS_METRICS: aggregator.assert_metric(metric, count=1, tags=[]) - for metric in SERVER_STATISTICS_METRICS: - for server in SERVERS: + # Servers. + for server in SERVERS: + for metric in SERVER_STATISTICS_METRICS: tags = ['server:{}'.format(server)] + SERVER_TAGS[server] aggregator.assert_metric(metric, count=1, tags=tags) + # Tables. for metric in TABLE_STATISTICS_METRICS: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] aggregator.assert_metric(metric, count=1, tags=tags) - assert len(HEROES_TABLE_REPLICAS) > 0 - NON_REPLICA_SERVERS = SERVERS - HEROES_TABLE_REPLICAS - assert len(NON_REPLICA_SERVERS) > 0 - - for metric in REPLICA_STATISTICS_METRICS: - for server in HEROES_TABLE_REPLICAS: - tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(server)] - tags.extend(SERVER_TAGS[server]) + # Replicas (table/server pairs). + for replica_server in HEROES_TABLE_SERVERS_REPLICATED: + for metric in REPLICA_STATISTICS_METRICS: + tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(replica_server)] + tags.extend(SERVER_TAGS[replica_server]) aggregator.assert_metric(metric, count=1, tags=tags) - for server in NON_REPLICA_SERVERS: - tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(server)] - tags.extend(SERVER_TAGS[server]) + # Ensure non-replica servers haven't yielded replica statistics. + for non_replica_server in SERVERS - HEROES_TABLE_SERVERS_REPLICATED: + for metric in REPLICA_STATISTICS_METRICS: + tags = [ + 'table:{}'.format(HEROES_TABLE), + 'database:{}'.format(DATABASE), + 'server:{}'.format(non_replica_server), + ] + tags.extend(SERVER_TAGS[non_replica_server]) aggregator.assert_metric(metric, count=0, tags=tags) + +def _assert_table_status_metrics(aggregator, context): + # type: (AggregatorStub, Context) -> None + + # Status of tables. for metric in TABLE_STATUS_METRICS: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) - for shard in range(HEROES_TABLE_NUM_SHARDS): + # Status of shards. + for shard, servers in HEROES_TABLE_REPLICAS_FOR_SHARDS.items(): tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'shard:{}'.format(shard)] - for metric in TABLE_STATUS_REPLICA_COUNT_METRICS: + + for metric in TABLE_STATUS_SHARDS_METRICS: aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) - for server in HEROES_TABLE_SHARD_REPLICAS[shard]: + for server in servers: tags = [ 'table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'shard:{}'.format(shard), 'server:{}'.format(server), ] - for metric in TABLE_STATUS_REPLICA_STATE_METRICS: - value = 1 if metric.endswith('.ready') else 0 # All servers in our test cluster are available. - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, value=value, count=1, tags=tags) + for metric in TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS: + # Due to 'setup_cluster()', RethinkDB should currently be backfilling data from + # the initial server to the new replicas. + value = 1 if metric.endswith('.state.backfilling') else 0 + try: + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, value=value, count=1, tags=tags) + except AssertionError: # pragma: no cover + # Depending on timing, the server may already be ready. Fine! Re-assert to limit flakiness. + value = 1 if metric.endswith('.state.ready') else 0 + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) + else: + context['backfilling_servers'].append(server) + + +def _assert_server_status_metrics(aggregator): + # type: (AggregatorStub) -> None for metric in SERVER_STATUS_METRICS: for server in SERVERS: tags = ['server:{}'.format(server)] aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) - aggregator.assert_all_metrics_covered() - service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) +def _assert_system_jobs_metrics(aggregator, context): + # type: (AggregatorStub, Context) -> None + + # Query jobs. + for metric in QUERY_JOBS_METRICS: + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE) + + # Backfill jobs. + for metric in BACKFILL_JOBS_METRICS: + for server in context['backfilling_servers']: + tags = [ + 'database:{}'.format(DATABASE), + 'table:{}'.format(HEROES_TABLE), + 'destination_server:{}'.format(server), + 'source_server:{}'.format(HEROES_TABLE_SERVER_INITIAL), + 'server:{}'.format(server), + 'server:{}'.format(HEROES_TABLE_SERVER_INITIAL), + ] + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) @pytest.mark.integration diff --git a/rethinkdb/tests/utils/__init__.py b/rethinkdb/tests/utils/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/rethinkdb/tests/utils/cluster.py b/rethinkdb/tests/utils/cluster.py new file mode 100644 index 0000000000000..68fa268ef0b41 --- /dev/null +++ b/rethinkdb/tests/utils/cluster.py @@ -0,0 +1,113 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from contextlib import contextmanager +from typing import Iterator + +import rethinkdb +from rethinkdb import r + +from ..common import ( + CONNECT_SERVER_PORT, + DATABASE, + HEROES_DOCUMENTS, + HEROES_TABLE, + HEROES_TABLE_INITIAL_CONFIG, + HEROES_TABLE_REPLICATED_CONFIG, + HOST, + NUM_FAMOUS_HEROES, + PROXY_PORT, +) + + +@contextmanager +def setup_cluster_ensuring_all_default_metrics_are_defined(): + # type: () -> Iterator[None] + """ + Configure a cluster for integration testing purposes. + + This helper should make it so that all default metrics are defined within the context block, + including ones for transient activity such as system jobs. + """ + with _setup_database(): + _create_test_table() + _simulate_client_writes() + _simulate_client_reads() + _setup_test_table_replication() + yield + + +@contextmanager +def _setup_database(): + with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: + # See: https://rethinkdb.com/api/python/db_create + response = r.db_create(DATABASE).run(conn) + assert response['dbs_created'] == 1 + + yield + + response = r.db_drop(DATABASE).run(conn) + assert response['dbs_dropped'] == 1 + + +def _create_test_table(): + # type: () -> None + with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: + # See: https://rethinkdb.com/api/python/table_create/ + response = r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_INITIAL_CONFIG).run(conn) + assert response['tables_created'] == 1 + + +def _setup_test_table_replication(): + # type: () -> None + def _wait_backfill_started(conn): + # type: (rethinkdb.net.Connection) -> None + for change in r.db('rethinkdb').table('jobs').filter({'type': 'backfill'}).changes().run(conn): + assert change is not None + # Stop on the first backfill job event. + break + + with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: + # Existing data in tables will be rebalanced, triggering a backfill job on the RethinkDB server side. + # We do all of this instead of setting up replication initially so that metrics associated to this job + # are collected during tests. + r.db(DATABASE).table(HEROES_TABLE).reconfigure(**HEROES_TABLE_REPLICATED_CONFIG).run(conn) + _wait_backfill_started(conn) + + +def _simulate_client_writes(): + # type: () -> None + """ + Simulate a client application that inserts rows by connecting via the proxy node. + + Calling this ensures that 'written_docs_*' metrics will have a non-zero value. + """ + + with r.connect(host=HOST, port=PROXY_PORT) as conn: + table = HEROES_TABLE + documents = HEROES_DOCUMENTS + + # See: https://rethinkdb.com/api/python/insert + # NOTE: 'durability="soft"' speeds up the write by not waiting for data to be committed to disk. + response = ( + r.db(DATABASE).table(table).insert(documents).run(conn, durability="soft", array_limit=len(documents)) + ) + assert response['errors'] == 0 + assert response['inserted'] == len(documents) + + +def _simulate_client_reads(): + # type: () -> None + """ + Simulate a client application that reads rows by connecting via the proxy node. + + Calling this ensures that 'read_docs_*' metrics will have a non-zero value. + """ + + with r.connect(db=DATABASE, host=HOST, port=PROXY_PORT) as conn: + all_heroes = list(r.table('heroes').run(conn)) + assert len(all_heroes) == len(HEROES_DOCUMENTS) + + famous_heroes = list(r.table('heroes').filter(r.row['appearances_count'] >= 50).run(conn)) + assert len(famous_heroes) == NUM_FAMOUS_HEROES From f63d2d17513499494c5df860eb8a65d1bba8aeb8 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 16:57:32 +0100 Subject: [PATCH 038/147] Add comments about cluster setup --- rethinkdb/tests/{utils => }/cluster.py | 29 ++++--------- rethinkdb/tests/common.py | 58 ++++++++++++++------------ rethinkdb/tests/test_rethinkdb.py | 8 ++-- rethinkdb/tests/utils/__init__.py | 0 4 files changed, 44 insertions(+), 51 deletions(-) rename rethinkdb/tests/{utils => }/cluster.py (70%) delete mode 100644 rethinkdb/tests/utils/__init__.py diff --git a/rethinkdb/tests/utils/cluster.py b/rethinkdb/tests/cluster.py similarity index 70% rename from rethinkdb/tests/utils/cluster.py rename to rethinkdb/tests/cluster.py index 68fa268ef0b41..ee9b77c8010d0 100644 --- a/rethinkdb/tests/utils/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -8,27 +8,23 @@ import rethinkdb from rethinkdb import r -from ..common import ( +from .common import ( CONNECT_SERVER_PORT, DATABASE, HEROES_DOCUMENTS, HEROES_TABLE, - HEROES_TABLE_INITIAL_CONFIG, HEROES_TABLE_REPLICATED_CONFIG, + HEROES_TABLE_SINGLE_SERVER_CONFIG, HOST, - NUM_FAMOUS_HEROES, PROXY_PORT, ) @contextmanager -def setup_cluster_ensuring_all_default_metrics_are_defined(): +def setup_cluster_ensuring_all_default_metrics_are_emitted(): # type: () -> Iterator[None] """ - Configure a cluster for integration testing purposes. - - This helper should make it so that all default metrics are defined within the context block, - including ones for transient activity such as system jobs. + Configure a cluster so that all default metrics will be emitted if running a check within this context. """ with _setup_database(): _create_test_table() @@ -55,7 +51,7 @@ def _create_test_table(): # type: () -> None with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: # See: https://rethinkdb.com/api/python/table_create/ - response = r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_INITIAL_CONFIG).run(conn) + response = r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_SINGLE_SERVER_CONFIG).run(conn) assert response['tables_created'] == 1 @@ -63,15 +59,11 @@ def _setup_test_table_replication(): # type: () -> None def _wait_backfill_started(conn): # type: (rethinkdb.net.Connection) -> None - for change in r.db('rethinkdb').table('jobs').filter({'type': 'backfill'}).changes().run(conn): - assert change is not None - # Stop on the first backfill job event. - break + changes = r.db('rethinkdb').table('jobs').filter({'type': 'backfill'}).changes().run(conn) # type: Iterator + next(changes) with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: - # Existing data in tables will be rebalanced, triggering a backfill job on the RethinkDB server side. - # We do all of this instead of setting up replication initially so that metrics associated to this job - # are collected during tests. + # See: https://rethinkdb.com/api/python/reconfigure r.db(DATABASE).table(HEROES_TABLE).reconfigure(**HEROES_TABLE_REPLICATED_CONFIG).run(conn) _wait_backfill_started(conn) @@ -89,7 +81,7 @@ def _simulate_client_writes(): documents = HEROES_DOCUMENTS # See: https://rethinkdb.com/api/python/insert - # NOTE: 'durability="soft"' speeds up the write by not waiting for data to be committed to disk. + # NOTE: 'durability="soft"' speeds up writes by not waiting for data to be committed to disk. response = ( r.db(DATABASE).table(table).insert(documents).run(conn, durability="soft", array_limit=len(documents)) ) @@ -108,6 +100,3 @@ def _simulate_client_reads(): with r.connect(db=DATABASE, host=HOST, port=PROXY_PORT) as conn: all_heroes = list(r.table('heroes').run(conn)) assert len(all_heroes) == len(HEROES_DOCUMENTS) - - famous_heroes = list(r.table('heroes').filter(r.row['appearances_count'] >= 50).run(conn)) - assert len(famous_heroes) == NUM_FAMOUS_HEROES diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 4d60c23ca914b..df7f06a52e43d 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -14,13 +14,17 @@ HOST = get_docker_hostname() -SERVERS = {'server0', 'server1', 'server2'} + +# Cluster configuration. +# NOTE: server information used below is tightly coupled to the Docker Compose setup. + SERVER_TAGS = { 'server0': ['default', 'us', 'initial'], 'server1': ['default', 'us', 'primary'], 'server2': ['default', 'eu'], 'server3': ['default', 'eu'], } +SERVERS = set(SERVER_TAGS) CONNECT_SERVER_NAME = 'server0' CONNECT_SERVER_PORT = 28015 @@ -28,12 +32,23 @@ PROXY_PORT = 28018 DATABASE = 'doghouse' - HEROES_TABLE = 'heroes' -HEROES_TABLE_INITIAL_CONFIG = {'shards': 1, 'replicas': {'initial': 1}, 'primary_replica_tag': 'initial'} -HEROES_TABLE_SERVER_INITIAL = 'server0' - +# NOTE: Hello, reader! It may not be immediately obvious what's going on below, so let me explain. +# +# ARethinkDB cluster is dynamic: as nodes, shards or replicas are added or removed, the distribution of +# data may change -- also known as "rebalancing". +# Most of the default metrics we collect are emitted regardless of the state of the cluster. +# But some metrics are only emitted when the cluster is evolving. +# (For example, this includes metrics about backfill jobs.) +# +# So, in order for all default metrics to be emitted during tests, we have one "initial" configuration +# for a single-server cluster, which is used as a starting point: +HEROES_TABLE_SINGLE_SERVER_CONFIG = {'shards': 1, 'replicas': {'initial': 1}, 'primary_replica_tag': 'initial'} +HEROES_TABLE_SERVER_INITIAL = 'server0' # (Because it's the only server tagged as 'initial'.) +# We'll then create a table there and fill it with data. +# Then, we'll switch to a "replicated" configuration, which changes the primary replica, and replicates the +# table across more servers: HEROES_TABLE_REPLICATED_PRIMARY_REPLICA_TAG = 'primary' HEROES_TABLE_REPLICATED_CONFIG = { 'shards': 1, @@ -41,36 +56,25 @@ 'primary_replica_tag': HEROES_TABLE_REPLICATED_PRIMARY_REPLICA_TAG, } HEROES_TABLE_SERVERS_REPLICATED = {'server1', 'server2', 'server3'} -HEROES_TABLE_REPLICAS_FOR_SHARDS = {0: HEROES_TABLE_SERVERS_REPLICATED} - -# This should be big enough so that a backfill job lasts long enough for us to see it during a check. +HEROES_TABLE_REPLICAS_BY_SHARD = {0: HEROES_TABLE_SERVERS_REPLICATED} +# RethinkDB will then start moving data from server0 to those new replicas, emitting the transient metrics we'd +# like to test. +# The number of inserted documents should be large enough that any backfill job lasts long enough that its metrics +# are emitted during a check. Empirically, >80k documents seems to be enough: HEROES_NUM_DOCUMENTS = 90000 -_HEROES_TEMPLATE_DOCUMENTS = [ +HEROES_DOCUMENTS = [ { "hero": "Magneto", "name": "Max Eisenhardt", "aka": ["Magnus", "Erik Lehnsherr", "Lehnsherr"], "magazine_titles": ["Alpha Flight", "Avengers", "Avengers West Coast"], "appearances_count": 42, - }, - { - "hero": "Professor Xavier", - "name": "Charles Francis Xavier", - "magazine_titles": ["Alpha Flight", "Avengers", "Bishop", "Defenders"], - "appearances_count": 72, - }, - { - "hero": "Storm", - "name": "Ororo Monroe", - "magazine_titles": ["Amazing Spider-Man vs. Wolverine", "Excalibur", "Fantastic Four", "Iron Fist"], - "appearances_count": 72, - }, -] - -assert HEROES_NUM_DOCUMENTS % len(_HEROES_TEMPLATE_DOCUMENTS) == 0 -HEROES_DOCUMENTS = _HEROES_TEMPLATE_DOCUMENTS * (HEROES_NUM_DOCUMENTS // 3) -NUM_FAMOUS_HEROES = len(HEROES_DOCUMENTS) * 2 / 3 + } +] * HEROES_NUM_DOCUMENTS + + +# Metrics lists. CLUSTER_STATISTICS_METRICS = ( 'rethinkdb.stats.cluster.queries_per_sec', diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 6d271d819e80a..0611366fc369e 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -19,7 +19,7 @@ CONNECT_SERVER_NAME, DATABASE, HEROES_TABLE, - HEROES_TABLE_REPLICAS_FOR_SHARDS, + HEROES_TABLE_REPLICAS_BY_SHARD, HEROES_TABLE_SERVER_INITIAL, HEROES_TABLE_SERVERS_REPLICATED, QUERY_JOBS_METRICS, @@ -33,7 +33,7 @@ TABLE_STATUS_SHARDS_METRICS, TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS, ) -from .utils.cluster import setup_cluster_ensuring_all_default_metrics_are_defined +from .cluster import setup_cluster_ensuring_all_default_metrics_are_emitted Context = TypedDict('Context', {'backfilling_servers': List[str]}) @@ -44,7 +44,7 @@ def test_check(aggregator, instance): # type: (AggregatorStub, Instance) -> None check = RethinkDBCheck('rethinkdb', {}, [instance]) - with setup_cluster_ensuring_all_default_metrics_are_defined(): + with setup_cluster_ensuring_all_default_metrics_are_emitted(): check.check(instance) context = {'backfilling_servers': []} # type: Context @@ -112,7 +112,7 @@ def _assert_table_status_metrics(aggregator, context): aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) # Status of shards. - for shard, servers in HEROES_TABLE_REPLICAS_FOR_SHARDS.items(): + for shard, servers in HEROES_TABLE_REPLICAS_BY_SHARD.items(): tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'shard:{}'.format(shard)] for metric in TABLE_STATUS_SHARDS_METRICS: diff --git a/rethinkdb/tests/utils/__init__.py b/rethinkdb/tests/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 From 9912022204d40ee17c8ea00c3c8395b397260f7d Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 18:12:29 +0100 Subject: [PATCH 039/147] Drop complicated integration setup in favor of jobs metrics unit tests --- .../rethinkdb/_default_metrics/_jobs.py | 20 +++- .../rethinkdb/_default_metrics/_statuses.py | 2 +- rethinkdb/tests/cluster.py | 63 +++------- rethinkdb/tests/common.py | 68 ++++------- rethinkdb/tests/compose/docker-compose.yaml | 12 +- rethinkdb/tests/conftest.py | 7 +- rethinkdb/tests/test_rethinkdb.py | 70 +++-------- .../tests/unit/test_transient_metrics.py | 113 ++++++++++++++++++ rethinkdb/tests/unit/utils.py | 68 +++++++++++ 9 files changed, 261 insertions(+), 162 deletions(-) create mode 100644 rethinkdb/tests/unit/test_transient_metrics.py create mode 100644 rethinkdb/tests/unit/utils.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py index 4528e38bbe65f..dceaaa6f149d7 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py @@ -22,12 +22,14 @@ def collect_jobs(conn): for job in query_system_jobs(conn): duration = job['duration_sec'] servers = job['servers'] + tags = ['server:{}'.format(server) for server in servers] if job['type'] == 'query': client_address = job['info']['client_address'] + client_port = job['info']['client_port'] - query_tags = tags + ['client_address:{}'.format(client_address)] + query_tags = tags + ['client_address:{}'.format(client_address), 'client_port:{}'.format(client_port)] yield { 'type': 'gauge', @@ -39,8 +41,14 @@ def collect_jobs(conn): elif job['type'] == 'index_construction': database = job['info']['db'] table = job['info']['table'] + index = job['info']['index'] + progress = job['info']['progress'] - index_construction_tags = tags + ['database:{}'.format(database), 'table:{}'.format(table)] + index_construction_tags = tags + [ + 'database:{}'.format(database), + 'table:{}'.format(table), + 'index:{}'.format(index), + ] yield { 'type': 'gauge', @@ -52,7 +60,7 @@ def collect_jobs(conn): yield { 'type': 'gauge', 'name': 'rethinkdb.jobs.index_construction.progress', - 'value': job['info']['progress'], + 'value': progress, 'tags': index_construction_tags, } @@ -61,6 +69,7 @@ def collect_jobs(conn): destination_server = job['info']['destination_server'] source_server = job['info']['source_server'] table = job['info']['table'] + progress = job['info']['progress'] backfill_tags = tags + [ 'database:{}'.format(database), @@ -79,6 +88,9 @@ def collect_jobs(conn): yield { 'type': 'gauge', 'name': 'rethinkdb.jobs.backfill.progress', - 'value': job['info']['progress'], + 'value': progress, 'tags': backfill_tags, } + + else: + continue # pragma: no cover diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py index 44db59022e510..5883fe78d2fb7 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py @@ -152,6 +152,6 @@ def _to_timestamp(datetime): # type: (dt.datetime) -> float try: return datetime.timestamp() # type: ignore # (Mypy is run in --py2 mode.) - except AttributeError: + except AttributeError: # pragma: no cover # Python 2. return time.mktime(datetime.now().timetuple()) diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index ee9b77c8010d0..01201af866b3b 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -2,72 +2,45 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from contextlib import contextmanager -from typing import Iterator - -import rethinkdb from rethinkdb import r from .common import ( CONNECT_SERVER_PORT, DATABASE, - HEROES_DOCUMENTS, HEROES_TABLE, - HEROES_TABLE_REPLICATED_CONFIG, - HEROES_TABLE_SINGLE_SERVER_CONFIG, + HEROES_TABLE_CONFIG, + HEROES_TABLE_DOCUMENTS, HOST, PROXY_PORT, ) -@contextmanager -def setup_cluster_ensuring_all_default_metrics_are_emitted(): - # type: () -> Iterator[None] +def setup_cluster(): + # type: () -> None """ - Configure a cluster so that all default metrics will be emitted if running a check within this context. + Configure the test cluster. """ - with _setup_database(): - _create_test_table() - _simulate_client_writes() - _simulate_client_reads() - _setup_test_table_replication() - yield + _create_database() + _create_test_table() + _simulate_client_writes() + _simulate_client_reads() -@contextmanager -def _setup_database(): +def _create_database(): with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: # See: https://rethinkdb.com/api/python/db_create response = r.db_create(DATABASE).run(conn) assert response['dbs_created'] == 1 - yield - - response = r.db_drop(DATABASE).run(conn) - assert response['dbs_dropped'] == 1 - def _create_test_table(): # type: () -> None with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: # See: https://rethinkdb.com/api/python/table_create/ - response = r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_SINGLE_SERVER_CONFIG).run(conn) + response = r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_CONFIG).run(conn) assert response['tables_created'] == 1 -def _setup_test_table_replication(): - # type: () -> None - def _wait_backfill_started(conn): - # type: (rethinkdb.net.Connection) -> None - changes = r.db('rethinkdb').table('jobs').filter({'type': 'backfill'}).changes().run(conn) # type: Iterator - next(changes) - - with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: - # See: https://rethinkdb.com/api/python/reconfigure - r.db(DATABASE).table(HEROES_TABLE).reconfigure(**HEROES_TABLE_REPLICATED_CONFIG).run(conn) - _wait_backfill_started(conn) - - def _simulate_client_writes(): # type: () -> None """ @@ -77,16 +50,10 @@ def _simulate_client_writes(): """ with r.connect(host=HOST, port=PROXY_PORT) as conn: - table = HEROES_TABLE - documents = HEROES_DOCUMENTS - # See: https://rethinkdb.com/api/python/insert - # NOTE: 'durability="soft"' speeds up writes by not waiting for data to be committed to disk. - response = ( - r.db(DATABASE).table(table).insert(documents).run(conn, durability="soft", array_limit=len(documents)) - ) + response = r.db(DATABASE).table(HEROES_TABLE).insert(HEROES_TABLE_DOCUMENTS).run(conn) assert response['errors'] == 0 - assert response['inserted'] == len(documents) + assert response['inserted'] == len(HEROES_TABLE_DOCUMENTS) def _simulate_client_reads(): @@ -98,5 +65,5 @@ def _simulate_client_reads(): """ with r.connect(db=DATABASE, host=HOST, port=PROXY_PORT) as conn: - all_heroes = list(r.table('heroes').run(conn)) - assert len(all_heroes) == len(HEROES_DOCUMENTS) + all_heroes = list(r.table(HEROES_TABLE).run(conn)) + assert len(all_heroes) == len(HEROES_TABLE_DOCUMENTS) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index df7f06a52e43d..b6c40020950fe 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -19,10 +19,9 @@ # NOTE: server information used below is tightly coupled to the Docker Compose setup. SERVER_TAGS = { - 'server0': ['default', 'us', 'initial'], + 'server0': ['default', 'us'], 'server1': ['default', 'us', 'primary'], 'server2': ['default', 'eu'], - 'server3': ['default', 'eu'], } SERVERS = set(SERVER_TAGS) @@ -32,47 +31,36 @@ PROXY_PORT = 28018 DATABASE = 'doghouse' -HEROES_TABLE = 'heroes' -# NOTE: Hello, reader! It may not be immediately obvious what's going on below, so let me explain. -# -# ARethinkDB cluster is dynamic: as nodes, shards or replicas are added or removed, the distribution of -# data may change -- also known as "rebalancing". -# Most of the default metrics we collect are emitted regardless of the state of the cluster. -# But some metrics are only emitted when the cluster is evolving. -# (For example, this includes metrics about backfill jobs.) -# -# So, in order for all default metrics to be emitted during tests, we have one "initial" configuration -# for a single-server cluster, which is used as a starting point: -HEROES_TABLE_SINGLE_SERVER_CONFIG = {'shards': 1, 'replicas': {'initial': 1}, 'primary_replica_tag': 'initial'} -HEROES_TABLE_SERVER_INITIAL = 'server0' # (Because it's the only server tagged as 'initial'.) -# We'll then create a table there and fill it with data. -# Then, we'll switch to a "replicated" configuration, which changes the primary replica, and replicates the -# table across more servers: -HEROES_TABLE_REPLICATED_PRIMARY_REPLICA_TAG = 'primary' -HEROES_TABLE_REPLICATED_CONFIG = { +HEROES_TABLE = 'heroes' +HEROES_TABLE_CONFIG = { 'shards': 1, - 'replicas': {'primary': 1, 'eu': 2}, - 'primary_replica_tag': HEROES_TABLE_REPLICATED_PRIMARY_REPLICA_TAG, + 'replicas': {'primary': 1, 'eu': 1}, + 'primary_replica_tag': 'primary', } -HEROES_TABLE_SERVERS_REPLICATED = {'server1', 'server2', 'server3'} -HEROES_TABLE_REPLICAS_BY_SHARD = {0: HEROES_TABLE_SERVERS_REPLICATED} -# RethinkDB will then start moving data from server0 to those new replicas, emitting the transient metrics we'd -# like to test. -# The number of inserted documents should be large enough that any backfill job lasts long enough that its metrics -# are emitted during a check. Empirically, >80k documents seems to be enough: -HEROES_NUM_DOCUMENTS = 90000 - -HEROES_DOCUMENTS = [ +HEROES_TABLE_SERVERS = {'server1', 'server2'} +HEROES_TABLE_REPLICAS_BY_SHARD = {0: HEROES_TABLE_SERVERS} +HEROES_TABLE_DOCUMENTS = [ { "hero": "Magneto", "name": "Max Eisenhardt", "aka": ["Magnus", "Erik Lehnsherr", "Lehnsherr"], "magazine_titles": ["Alpha Flight", "Avengers", "Avengers West Coast"], "appearances_count": 42, - } -] * HEROES_NUM_DOCUMENTS - + }, + { + "hero": "Professor Xavier", + "name": "Charles Francis Xavier", + "magazine_titles": ["Alpha Flight", "Avengers", "Bishop", "Defenders"], + "appearances_count": 72, + }, + { + "hero": "Storm", + "name": "Ororo Monroe", + "magazine_titles": ["Amazing Spider-Man vs. Wolverine", "Excalibur", "Fantastic Four", "Iron Fist"], + "appearances_count": 72, + }, +] # Metrics lists. @@ -144,17 +132,7 @@ ) QUERY_JOBS_METRICS = ('rethinkdb.jobs.query.duration',) - -# TODO: trigger index construction -INDEX_CONSTRUCTION_JOBS_METRICS = ( - 'rethinkdb.jobs.index_construction.duration', - 'rethinkdb.jobs.index_construction.progress', -) - -BACKFILL_JOBS_METRICS = ( - 'rethinkdb.jobs.backfill.duration', - 'rethinkdb.jobs.backfill.progress', -) +# NOTE: other jobs metrics are not listed here as they are covered by unit tests instead of integration tests. CURRENT_ISSUES_METRICS = ( 'rethinkdb.current_issues.log_write_error.total', diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml index 0f5a4a8906494..fe4bcb086e7d7 100644 --- a/rethinkdb/tests/compose/docker-compose.yaml +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -7,7 +7,7 @@ services: tty: true # Required otherwise RethinkDB won't output any logs. image: ${RETHINKDB_IMAGE} container_name: rethinkdb-server0 - command: rethinkdb --bind all --server-name server0 --server-tag us --server-tag initial + command: rethinkdb --bind all --server-name server0 --server-tag us ports: - ${RETHINKDB_CONNECT_SERVER_PORT}:28015 # Client driver port. - 8080:8080 # Port for the web UI. Debugging only (not used by tests). @@ -32,16 +32,6 @@ services: depends_on: - rethinkdb-server0 - rethinkdb-server3: - tty: true - image: ${RETHINKDB_IMAGE} - container_name: rethinkdb-server3 - command: rethinkdb --join rethinkdb-server0:29015 --bind all --server-name server3 --server-tag eu - links: - - rethinkdb-server0 - depends_on: - - rethinkdb-server0 - rethinkdb-proxy0: tty: true image: ${RETHINKDB_IMAGE} diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index ce2f9521dc010..0c58eaa90f0c9 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -9,9 +9,10 @@ import pytest -from datadog_checks.dev import docker_run +from datadog_checks.dev import WaitFor, docker_run from datadog_checks.rethinkdb._types import Instance +from .cluster import setup_cluster from .common import CONNECT_SERVER_PORT, HERE, HOST, IMAGE, PROXY_PORT E2E_METADATA = {'start_commands': ['pip install rethinkdb==2.4.4']} @@ -37,6 +38,8 @@ def dd_environment(instance): 'RETHINKDB_PROXY_PORT': str(PROXY_PORT), } # type: Dict[str, str] + conditions = [WaitFor(setup_cluster)] + log_patterns = [ r'Server ready, "server0".*', r'Connected to server "server1".*', @@ -44,6 +47,6 @@ def dd_environment(instance): r'Connected to proxy.*', ] # type: List[str] - with docker_run(compose_file, env_vars=env_vars, log_patterns=log_patterns): + with docker_run(compose_file, conditions=conditions, env_vars=env_vars, log_patterns=log_patterns): config = {'instances': [instance]} yield config, E2E_METADATA diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 0611366fc369e..8609c5997796d 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -4,7 +4,7 @@ from __future__ import absolute_import import copy -from typing import Iterator, List, TypedDict +from typing import Iterator import pytest import rethinkdb @@ -14,14 +14,12 @@ from datadog_checks.rethinkdb._types import Instance, Metric from .common import ( - BACKFILL_JOBS_METRICS, CLUSTER_STATISTICS_METRICS, CONNECT_SERVER_NAME, DATABASE, HEROES_TABLE, HEROES_TABLE_REPLICAS_BY_SHARD, - HEROES_TABLE_SERVER_INITIAL, - HEROES_TABLE_SERVERS_REPLICATED, + HEROES_TABLE_SERVERS, QUERY_JOBS_METRICS, REPLICA_STATISTICS_METRICS, SERVER_STATISTICS_METRICS, @@ -33,9 +31,6 @@ TABLE_STATUS_SHARDS_METRICS, TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS, ) -from .cluster import setup_cluster_ensuring_all_default_metrics_are_emitted - -Context = TypedDict('Context', {'backfilling_servers': List[str]}) @pytest.mark.integration @@ -43,22 +38,12 @@ def test_check(aggregator, instance): # type: (AggregatorStub, Instance) -> None check = RethinkDBCheck('rethinkdb', {}, [instance]) - - with setup_cluster_ensuring_all_default_metrics_are_emitted(): - check.check(instance) - - context = {'backfilling_servers': []} # type: Context + check.check(instance) _assert_statistics_metrics(aggregator) - _assert_table_status_metrics(aggregator, context=context) - - assert context['backfilling_servers'], ( - 'Expected backfilling to be ongoing for at least one replica. ' - 'Aborting, as otherwise backfill metrics would not be covered.' - ) - + _assert_table_status_metrics(aggregator) _assert_server_status_metrics(aggregator) - _assert_system_jobs_metrics(aggregator, context=context) + _assert_system_jobs_metrics(aggregator) aggregator.assert_all_metrics_covered() @@ -85,14 +70,14 @@ def _assert_statistics_metrics(aggregator): aggregator.assert_metric(metric, count=1, tags=tags) # Replicas (table/server pairs). - for replica_server in HEROES_TABLE_SERVERS_REPLICATED: + for replica_server in HEROES_TABLE_SERVERS: for metric in REPLICA_STATISTICS_METRICS: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(replica_server)] tags.extend(SERVER_TAGS[replica_server]) aggregator.assert_metric(metric, count=1, tags=tags) # Ensure non-replica servers haven't yielded replica statistics. - for non_replica_server in SERVERS - HEROES_TABLE_SERVERS_REPLICATED: + for non_replica_server in SERVERS - HEROES_TABLE_SERVERS: for metric in REPLICA_STATISTICS_METRICS: tags = [ 'table:{}'.format(HEROES_TABLE), @@ -103,8 +88,8 @@ def _assert_statistics_metrics(aggregator): aggregator.assert_metric(metric, count=0, tags=tags) -def _assert_table_status_metrics(aggregator, context): - # type: (AggregatorStub, Context) -> None +def _assert_table_status_metrics(aggregator): + # type: (AggregatorStub) -> None # Status of tables. for metric in TABLE_STATUS_METRICS: @@ -127,17 +112,9 @@ def _assert_table_status_metrics(aggregator, context): ] for metric in TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS: - # Due to 'setup_cluster()', RethinkDB should currently be backfilling data from - # the initial server to the new replicas. - value = 1 if metric.endswith('.state.backfilling') else 0 - try: - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, value=value, count=1, tags=tags) - except AssertionError: # pragma: no cover - # Depending on timing, the server may already be ready. Fine! Re-assert to limit flakiness. - value = 1 if metric.endswith('.state.ready') else 0 - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) - else: - context['backfilling_servers'].append(server) + # Assumption: all replicas in the cluster are ready, i.e. no rebalancing is in progress. + value = 1 if metric.endswith('.state.ready') else 0 + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, value=value, count=1, tags=tags) def _assert_server_status_metrics(aggregator): @@ -148,25 +125,16 @@ def _assert_server_status_metrics(aggregator): aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) -def _assert_system_jobs_metrics(aggregator, context): - # type: (AggregatorStub, Context) -> None - - # Query jobs. +def _assert_system_jobs_metrics(aggregator): + # type: (AggregatorStub) -> None for metric in QUERY_JOBS_METRICS: + # NOTE: these metrics are emitted because the query issued to collect system jobs metrics is + # included in system jobs themselves. aggregator.assert_metric(metric, metric_type=aggregator.GAUGE) - # Backfill jobs. - for metric in BACKFILL_JOBS_METRICS: - for server in context['backfilling_servers']: - tags = [ - 'database:{}'.format(DATABASE), - 'table:{}'.format(HEROES_TABLE), - 'destination_server:{}'.format(server), - 'source_server:{}'.format(HEROES_TABLE_SERVER_INITIAL), - 'server:{}'.format(server), - 'server:{}'.format(HEROES_TABLE_SERVER_INITIAL), - ] - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) + # NOTE: other system jobs metrics are not covered here because they are only emitted when the cluster is + # changing (eg. an index is being created, or data is being rebalanced across servers), which is hard to + # test without introducing flakiness. @pytest.mark.integration diff --git a/rethinkdb/tests/unit/test_transient_metrics.py b/rethinkdb/tests/unit/test_transient_metrics.py new file mode 100644 index 0000000000000..159e22d7e14cc --- /dev/null +++ b/rethinkdb/tests/unit/test_transient_metrics.py @@ -0,0 +1,113 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +""" +Unit tests for metrics that are hard to test using integration tests, eg. because they depend on cluster dynamics. +""" + +from rethinkdb import r + +from datadog_checks.rethinkdb._default_metrics._jobs import collect_jobs +from datadog_checks.rethinkdb._types import BackfillJob, IndexConstructionJob, QueryJob + +from .utils import MockConnection, patch_connection_type + + +def test_jobs_metrics(): + # type: () -> None + """ + Verify jobs metrics submitted by RethinkDB are processed correctly. + + We provide unit tests for these metrics because testing them in a live environment is tricky. + """ + + mock_query_job_row = { + 'type': 'query', + 'id': ('query', 'abcd1234'), + 'duration_sec': 0.21, + 'info': { + 'client_address': 'localhost', + 'client_port': 8080, + 'query': "r.table('heroes').run(conn)", + 'user': 'johndoe', + }, + 'servers': ['server0'], + } # type: QueryJob + + mock_backfill_job_row = { + # See: https://rethinkdb.com/docs/system-jobs/#backfill + 'type': 'backfill', + 'id': ('backfill', 'abcd1234'), + 'duration_sec': 0.42, + 'info': { + 'db': 'doghouse', + 'table': 'heroes', + 'destination_server': 'server2', + 'source_server': 'server0', + 'progress': 42, + }, + 'servers': ['server0', 'server2'], + } # type: BackfillJob + + mock_index_construction_job_row = { + # See: https://rethinkdb.com/docs/system-jobs/#index_construction + 'type': 'index_construction', + 'id': ('index_construction', 'abcd1234'), + 'duration_sec': 0.42, + 'info': {'db': 'doghouse', 'table': 'heroes', 'index': 'appearances_count', 'progress': 42}, + 'servers': ['server1'], + } # type: IndexConstructionJob + + mock_rows = [mock_query_job_row, mock_backfill_job_row, mock_index_construction_job_row] + + with patch_connection_type(MockConnection): + conn = r.connect(rows=mock_rows) + metrics = list(collect_jobs(conn)) + + assert metrics == [ + { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.query.duration', + 'value': 0.21, + 'tags': ['server:server0', 'client_address:localhost', 'client_port:8080'], + }, + { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.backfill.duration', + 'value': 0.42, + 'tags': [ + 'server:server0', + 'server:server2', + 'database:doghouse', + 'destination_server:server2', + 'source_server:server0', + 'table:heroes', + ], + }, + { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.backfill.progress', + 'value': 42, + 'tags': [ + 'server:server0', + 'server:server2', + 'database:doghouse', + 'destination_server:server2', + 'source_server:server0', + 'table:heroes', + ], + }, + { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.index_construction.duration', + 'value': 0.42, + 'tags': ['server:server1', 'database:doghouse', 'table:heroes', 'index:appearances_count'], + }, + { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.index_construction.progress', + 'value': 42, + 'tags': ['server:server1', 'database:doghouse', 'table:heroes', 'index:appearances_count'], + }, + ] diff --git a/rethinkdb/tests/unit/utils.py b/rethinkdb/tests/unit/utils.py new file mode 100644 index 0000000000000..6b7bf8af56c05 --- /dev/null +++ b/rethinkdb/tests/unit/utils.py @@ -0,0 +1,68 @@ +from contextlib import contextmanager +from typing import Any, Dict, Iterator + +from rethinkdb import r +from rethinkdb.net import Connection + + +class MockConnectionInstance(object): + def __init__(self, parent, *args, **kwargs): + # type: (MockConnection, *Any, **Any) -> None + self._parent = parent + + # Implement the connection instance interface used by RethinkDB. + + def client_address(self): + # type: () -> str + return 'testserver' + + def client_port(self): + # type: () -> int + return 28015 + + def connect(self, timeout): + # type: (float) -> MockConnection + return self._parent + + def reconnect(self, timeout): + # type: (float) -> MockConnection + return self.connect(timeout) + + def is_open(self): + # type: () -> bool + return True + + def run_query(self, query, noreply): + # type: (Any, bool) -> Iterator[Dict[str, Any]] + return self._parent.mock_rows() + + +class MockConnection(Connection): + """ + A RethinkDB connection type that mocks all queries by sending a deterministic set of rows. + + Inspired by: + https://github.com/rethinkdb/rethinkdb-python/blob/9aa68feff16dc984406ae0e276f24e87df89b334/rethinkdb/asyncio_net/net_asyncio.py + """ + + def __init__(self, *args, **kwargs): + # type: (*Any, **Any) -> None + rows = kwargs.pop('rows') + super(MockConnection, self).__init__(MockConnectionInstance, *args, **kwargs) + self.rows = rows + + def mock_rows(self): + # type: () -> Iterator[Dict[str, Any]] + for row in self.rows: + yield row + + +@contextmanager +def patch_connection_type(conn_type): + # type: (type) -> Iterator[None] + initial_conn_type = r.connection_type + r.connection_type = conn_type + try: + yield + finally: + r.connection_type = initial_conn_type From e69c6c3b595e6fd20771be8c857e3935485173e6 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 18:19:54 +0100 Subject: [PATCH 040/147] Test unknown job types, simplify MockConnectionInstance --- .../rethinkdb/_default_metrics/_jobs.py | 3 --- rethinkdb/tests/unit/test_transient_metrics.py | 7 ++++++- rethinkdb/tests/unit/utils.py | 12 ------------ 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py index dceaaa6f149d7..892d08756d471 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py @@ -91,6 +91,3 @@ def collect_jobs(conn): 'value': progress, 'tags': backfill_tags, } - - else: - continue # pragma: no cover diff --git a/rethinkdb/tests/unit/test_transient_metrics.py b/rethinkdb/tests/unit/test_transient_metrics.py index 159e22d7e14cc..2abd987af8b79 100644 --- a/rethinkdb/tests/unit/test_transient_metrics.py +++ b/rethinkdb/tests/unit/test_transient_metrics.py @@ -6,6 +6,7 @@ Unit tests for metrics that are hard to test using integration tests, eg. because they depend on cluster dynamics. """ +import pytest from rethinkdb import r from datadog_checks.rethinkdb._default_metrics._jobs import collect_jobs @@ -13,6 +14,8 @@ from .utils import MockConnection, patch_connection_type +pytestmark = pytest.mark.unit + def test_jobs_metrics(): # type: () -> None @@ -59,7 +62,9 @@ def test_jobs_metrics(): 'servers': ['server1'], } # type: IndexConstructionJob - mock_rows = [mock_query_job_row, mock_backfill_job_row, mock_index_construction_job_row] + mock_unknown_job_row = {'type': 'an_unknown_type_that_should_be_ignored', 'duration_sec': 0.42, 'servers': []} + + mock_rows = [mock_query_job_row, mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] with patch_connection_type(MockConnection): conn = r.connect(rows=mock_rows) diff --git a/rethinkdb/tests/unit/utils.py b/rethinkdb/tests/unit/utils.py index 6b7bf8af56c05..4ec2acbbe1d5c 100644 --- a/rethinkdb/tests/unit/utils.py +++ b/rethinkdb/tests/unit/utils.py @@ -12,22 +12,10 @@ def __init__(self, parent, *args, **kwargs): # Implement the connection instance interface used by RethinkDB. - def client_address(self): - # type: () -> str - return 'testserver' - - def client_port(self): - # type: () -> int - return 28015 - def connect(self, timeout): # type: (float) -> MockConnection return self._parent - def reconnect(self, timeout): - # type: (float) -> MockConnection - return self.connect(timeout) - def is_open(self): # type: () -> bool return True From 25801233cf6453a082c7dfe7dce696a677f43ba8 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 18:29:12 +0100 Subject: [PATCH 041/147] Cleanup, fix e2e test --- rethinkdb/tests/common.py | 13 +++++++++++++ rethinkdb/tests/conftest.py | 6 +++--- rethinkdb/tests/test_e2e.py | 15 ++++++++++++++- rethinkdb/tests/test_rethinkdb.py | 8 -------- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index b6c40020950fe..28753eded7821 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -144,3 +144,16 @@ 'rethinkdb.current_issues.memory_error.total', 'rethinkdb.current_issues.non_transitive_error.total', ) + + +METRICS = ( + CLUSTER_STATISTICS_METRICS + + SERVER_STATISTICS_METRICS + + TABLE_STATISTICS_METRICS + + REPLICA_STATISTICS_METRICS + + TABLE_STATUS_METRICS + + TABLE_STATUS_SHARDS_METRICS + + TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS + + SERVER_STATUS_METRICS + + QUERY_JOBS_METRICS +) diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 0c58eaa90f0c9..c1d8aae7bf63f 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -5,7 +5,7 @@ from __future__ import absolute_import import os -from typing import Dict, Iterator, List +from typing import Iterator import pytest @@ -36,7 +36,7 @@ def dd_environment(instance): 'RETHINKDB_IMAGE': IMAGE, 'RETHINKDB_CONNECT_SERVER_PORT': str(CONNECT_SERVER_PORT), 'RETHINKDB_PROXY_PORT': str(PROXY_PORT), - } # type: Dict[str, str] + } conditions = [WaitFor(setup_cluster)] @@ -45,7 +45,7 @@ def dd_environment(instance): r'Connected to server "server1".*', r'Connected to server "server2".*', r'Connected to proxy.*', - ] # type: List[str] + ] with docker_run(compose_file, conditions=conditions, env_vars=env_vars, log_patterns=log_patterns): config = {'instances': [instance]} diff --git a/rethinkdb/tests/test_e2e.py b/rethinkdb/tests/test_e2e.py index a979810fb7154..9f311d4aba5fe 100644 --- a/rethinkdb/tests/test_e2e.py +++ b/rethinkdb/tests/test_e2e.py @@ -2,10 +2,23 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +from typing import Callable + import pytest +from datadog_checks.base.stubs.aggregator import AggregatorStub +from datadog_checks.rethinkdb import RethinkDBCheck + +from .common import METRICS + @pytest.mark.e2e def test_check_ok(dd_agent_check): - aggregator = dd_agent_check(rate=True) + # type: (Callable) -> None + aggregator = dd_agent_check(rate=True) # type: AggregatorStub + + for metric in METRICS: + aggregator.assert_metric(metric) + aggregator.assert_all_metrics_covered() + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 8609c5997796d..97caf0223278f 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -53,23 +53,18 @@ def test_check(aggregator, instance): def _assert_statistics_metrics(aggregator): # type: (AggregatorStub) -> None - - # Cluster. for metric in CLUSTER_STATISTICS_METRICS: aggregator.assert_metric(metric, count=1, tags=[]) - # Servers. for server in SERVERS: for metric in SERVER_STATISTICS_METRICS: tags = ['server:{}'.format(server)] + SERVER_TAGS[server] aggregator.assert_metric(metric, count=1, tags=tags) - # Tables. for metric in TABLE_STATISTICS_METRICS: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] aggregator.assert_metric(metric, count=1, tags=tags) - # Replicas (table/server pairs). for replica_server in HEROES_TABLE_SERVERS: for metric in REPLICA_STATISTICS_METRICS: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(replica_server)] @@ -90,13 +85,10 @@ def _assert_statistics_metrics(aggregator): def _assert_table_status_metrics(aggregator): # type: (AggregatorStub) -> None - - # Status of tables. for metric in TABLE_STATUS_METRICS: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) - # Status of shards. for shard, servers in HEROES_TABLE_REPLICAS_BY_SHARD.items(): tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'shard:{}'.format(shard)] From e0098d4714c2c99d9f302c77180becc5c6dbd3e4 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 17 Feb 2020 18:57:02 +0100 Subject: [PATCH 042/147] Use a more realistic client_port --- rethinkdb/tests/unit/test_transient_metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rethinkdb/tests/unit/test_transient_metrics.py b/rethinkdb/tests/unit/test_transient_metrics.py index 2abd987af8b79..3f1dd4529baf7 100644 --- a/rethinkdb/tests/unit/test_transient_metrics.py +++ b/rethinkdb/tests/unit/test_transient_metrics.py @@ -31,7 +31,7 @@ def test_jobs_metrics(): 'duration_sec': 0.21, 'info': { 'client_address': 'localhost', - 'client_port': 8080, + 'client_port': 28015, 'query': "r.table('heroes').run(conn)", 'user': 'johndoe', }, @@ -75,7 +75,7 @@ def test_jobs_metrics(): 'type': 'gauge', 'name': 'rethinkdb.jobs.query.duration', 'value': 0.21, - 'tags': ['server:server0', 'client_address:localhost', 'client_port:8080'], + 'tags': ['server:server0', 'client_address:localhost', 'client_port:28015'], }, { 'type': 'gauge', From 01018a0f30a3f12b4fa31d89bcddc2c771fc1b1b Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 18 Feb 2020 13:50:27 +0100 Subject: [PATCH 043/147] Add default metrics flags --- rethinkdb/datadog_checks/rethinkdb/_config.py | 37 +++++++++++++++- .../rethinkdb/_default_metrics/__init__.py | 43 ++++++++++--------- .../rethinkdb/_default_metrics/_statistics.py | 32 ++++---------- .../rethinkdb/_default_metrics/_statuses.py | 22 ++++------ .../{_jobs.py => _system_jobs.py} | 2 +- rethinkdb/datadog_checks/rethinkdb/_types.py | 16 ++++++- .../datadog_checks/rethinkdb/rethinkdb.py | 24 ++++------- rethinkdb/tests/test_rethinkdb.py | 2 +- rethinkdb/tests/unit/test_config.py | 29 +++++++++++++ .../tests/unit/test_transient_metrics.py | 4 +- 10 files changed, 132 insertions(+), 79 deletions(-) rename rethinkdb/datadog_checks/rethinkdb/_default_metrics/{_jobs.py => _system_jobs.py} (98%) diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index adc51e6402738..1c8b2d155151a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -2,9 +2,16 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +from __future__ import absolute_import + +from typing import Callable, Dict, Iterator, List + +import rethinkdb + from datadog_checks.base import ConfigurationError -from ._types import Instance +from ._default_metrics import DEFAULT_METRIC_GROUPS +from ._types import DefaultMetricGroup, Instance, Metric class Config: @@ -18,19 +25,45 @@ def __init__(self, instance): # type: (Instance) -> None host = instance.get('host', 'localhost') port = instance.get('port', 28015) + default_metrics = instance.get('default_metrics', True) if not isinstance(host, str): raise ConfigurationError('host must be a string (got {!r})'.format(type(host))) - if isinstance(port, bool) or not isinstance(port, int): + if not isinstance(port, int) or isinstance(port, bool): raise ConfigurationError('port must be an integer (got {!r})'.format(type(port))) + if isinstance(default_metrics, bool): + default_metrics = {group: default_metrics for group in DEFAULT_METRIC_GROUPS} + elif isinstance(default_metrics, dict): + unknown_groups = set(default_metrics) - set(DEFAULT_METRIC_GROUPS) + if unknown_groups: + raise ConfigurationError( + 'default_metrics contains unknown entries: {}'.format(', '.join(unknown_groups)) + ) + + invalid_groups = [group for group, enabled in default_metrics.items() if not isinstance(enabled, bool)] + if invalid_groups: + raise ConfigurationError( + 'default_metrics contains entries that are not booleans: {}'.format(', '.join(invalid_groups)) + ) + else: + raise ConfigurationError( + 'default_metrics must be a boolean or a mapping (got {!r})'.format(type(default_metrics)) + ) + if port < 0: raise ConfigurationError('port must be positive (got {!r})'.format(port)) self.host = host # type: str self.port = port # type: int + self.metric_streams = _build_metric_streams(default_metrics) def __repr__(self): # type: () -> str return 'Config(host={host!r}, port={port!r})'.format(host=self.host, port=self.port) + + +def _build_metric_streams(default_metrics): + # type: (Dict[DefaultMetricGroup, bool]) -> List[Callable[[rethinkdb.net.Connection], Iterator[Metric]]] + return [stream for group, stream in DEFAULT_METRIC_GROUPS.items() if default_metrics.get(group, False)] diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py index 32d52f1cb8f58..5c83556ff98d8 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py @@ -5,27 +5,30 @@ from __future__ import absolute_import import itertools -from typing import Iterator +from typing import Callable, Dict, Iterator import rethinkdb -from .._types import Metric +from .._types import DefaultMetricGroup, Metric from ._current_issues import collect_current_issues -from ._jobs import collect_jobs -from ._statistics import collect_statistics -from ._statuses import collect_status_metrics - - -def collect_default_metrics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect default metrics from various system tables. - - See: https://rethinkdb.com/docs/system-tables/ - """ - metrics = itertools.chain( - collect_statistics(conn), collect_status_metrics(conn), collect_jobs(conn), collect_current_issues(conn) - ) - - for metric in metrics: - yield metric +from ._statistics import ( + collect_cluster_statistics, + collect_replica_statistics, + collect_server_statistics, + collect_table_statistics, +) +from ._statuses import collect_server_status, collect_table_status +from ._system_jobs import collect_system_jobs + +DEFAULT_METRIC_GROUPS = { + 'cluster_statistics': collect_cluster_statistics, + 'server_statistics': collect_server_statistics, + 'table_statistics': collect_table_statistics, + 'replica_statistics': collect_replica_statistics, + 'server_status': collect_server_status, + 'table_status': collect_table_status, + 'system_jobs': collect_system_jobs, + 'current_issues': collect_current_issues, +} # type: Dict[DefaultMetricGroup, Callable[[rethinkdb.net.Connection], Iterator[Metric]]] + +__all__ = ['DEFAULT_METRIC_GROUPS'] diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py index 22336788d09ea..6ea5badd91e7f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py @@ -1,10 +1,14 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +""" +Collect metrics about system statistics. + +See: https://rethinkdb.com/docs/system-stats/ +""" from __future__ import absolute_import -import itertools import logging from typing import Iterator @@ -16,25 +20,7 @@ logger = logging.getLogger(__name__) -def collect_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect metrics about system statistics. - - See: https://rethinkdb.com/docs/system-stats/ - """ - metrics = itertools.chain( - _collect_cluster_statistics(conn), - _collect_servers_statistics(conn), - _collect_table_statistics(conn), - _collect_replicas_statistics(conn), - ) - - for metric in metrics: - yield metric - - -def _collect_cluster_statistics(conn): +def collect_cluster_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] stats = query_cluster_stats(conn) logger.debug('cluster_statistics stats=%r', stats) @@ -63,7 +49,7 @@ def _collect_cluster_statistics(conn): } -def _collect_servers_statistics(conn): +def collect_server_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] for server, stats in query_servers_with_stats(conn): logger.debug('server_statistics server=%r, stats=%r', server, stats) @@ -131,7 +117,7 @@ def _collect_servers_statistics(conn): } -def _collect_table_statistics(conn): +def collect_table_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] for table, stats in query_tables_with_stats(conn): logger.debug('table_statistics table=%r, stats=%r', table, stats) @@ -157,7 +143,7 @@ def _collect_table_statistics(conn): } -def _collect_replicas_statistics(conn): +def collect_replica_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] for table, server, stats in query_replica_stats(conn): logger.debug('replica_statistics table=%r server=%r stats=%r', table, server, stats) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py index 5883fe78d2fb7..e5f98fee527c9 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py @@ -5,7 +5,6 @@ from __future__ import absolute_import import datetime as dt -import itertools import time from typing import Iterator @@ -15,21 +14,13 @@ from .._types import Metric, ReplicaState -def collect_status_metrics(conn): +def collect_table_status(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ - Collect metrics about server and table statuses. + Collect metrics about server statuses. - See: https://rethinkdb.com/docs/system-tables/#status-tables + See: https://rethinkdb.com/docs/system-tables/#server_status """ - metrics = itertools.chain(_collect_table_status_metrics(conn), _collect_server_status_metrics(conn)) - - for metric in metrics: - yield metric - - -def _collect_table_status_metrics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] for table_status in query_table_status(conn): table = table_status['name'] database = table_status['db'] @@ -110,8 +101,13 @@ def _replica_state(state): yield _replica_state('waiting_for_quorum') -def _collect_server_status_metrics(conn): +def collect_server_status(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] + """ + Collect metrics about table statuses. + + See: https://rethinkdb.com/docs/system-tables/#table_status + """ for server in query_server_status(conn): name = server['name'] network = server['network'] diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_system_jobs.py similarity index 98% rename from rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py rename to rethinkdb/datadog_checks/rethinkdb/_default_metrics/_system_jobs.py index 892d08756d471..523d90a343633 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_system_jobs.py @@ -12,7 +12,7 @@ from .._types import Metric -def collect_jobs(conn): +def collect_system_jobs(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about system jobs. diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 94b744ce7ec58..39165b13cbb92 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -14,8 +14,20 @@ 'Metric', {'type': Literal['gauge', 'monotonic_count'], 'name': str, 'value': float, 'tags': List[str]} ) -# Expected shape of an `instance` dictionary. -Instance = TypedDict('Instance', {'host': str, 'port': int}, total=False) +DefaultMetricGroup = Literal[ + 'cluster_statistics', + 'server_statistics', + 'table_statistics', + 'replica_statistics', + 'table_status', + 'server_status', + 'system_jobs', + 'current_issues', +] + +Instance = TypedDict( + 'Instance', {'host': str, 'port': int, 'default_metrics': Union[bool, Dict[DefaultMetricGroup, bool]]}, total=False +) # Configuration documents. diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index aa31cffc62585..c8948c6b92f2f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -13,7 +13,6 @@ from datadog_checks.base import AgentCheck from ._config import Config -from ._default_metrics import collect_default_metrics from ._types import ConnectionServer, Instance, Metric @@ -24,31 +23,26 @@ class RethinkDBCheck(AgentCheck): A set of default metrics is collected from system tables. """ - # NOTE: use of private names (double underscores, e.g. '__member') prevents name clashes with the base class. - def __init__(self, *args, **kwargs): # type: (*Any, **Any) -> None super(RethinkDBCheck, self).__init__(*args, **kwargs) - - self.__config = Config(self.instance) - - # NOTE: this list is exposed for testing purposes. - self._metric_collectors = [] # type: List[Callable[[rethinkdb.net.Connection], Iterator[Metric]]] - self._metric_collectors.append(collect_default_metrics) + self.config = Config(self.instance) def check(self, instance): # type: (Instance) -> None - config = self.__config + config = self.config self.log.debug('check config=%r', config) host = config.host port = config.port + metric_streams = config.metric_streams - with self.__submit_service_check() as on_connection_established: + with self.submit_service_check() as on_connection_established: with rethinkdb.r.connect(db='rethinkdb', host=host, port=port) as conn: on_connection_established(conn) - for metric in self.__collect_metrics(conn): - self.__submit_metric(metric) + for metric_stream in metric_streams: + for metric in metric_stream(conn): + self.submit_metric(metric) def __collect_metrics(self, conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] @@ -57,7 +51,7 @@ def __collect_metrics(self, conn): yield metric @contextmanager - def __submit_service_check(self): + def submit_service_check(self): # type: () -> Iterator[Callable[[rethinkdb.net.Connection], None]] tags = [] # type: List[str] @@ -80,7 +74,7 @@ def on_connection_established(conn): self.log.debug('service_check OK') self.service_check('rethinkdb.can_connect', self.OK, tags=tags) - def __submit_metric(self, metric): + def submit_metric(self, metric): # type: (Metric) -> None self.log.debug('submit_metric metric=%r', metric) submit = getattr(self, metric['type']) # type: Callable diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 97caf0223278f..76674d9728e59 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -152,7 +152,7 @@ def collect_and_fail(conn): raise RuntimeError('Oops!') check = RethinkDBCheck('rethinkdb', {}, [instance]) - check._metric_collectors.append(collect_and_fail) + check.config.metric_streams.append(collect_and_fail) check.check(instance) service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index 05e6d23ccc625..85f096b6f8fc6 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -8,6 +8,7 @@ from datadog_checks.base import ConfigurationError from datadog_checks.rethinkdb._config import Config +from datadog_checks.rethinkdb._default_metrics import DEFAULT_METRIC_GROUPS pytestmark = pytest.mark.unit @@ -44,3 +45,31 @@ def test_invalid_port(port): # type: (Any) -> None with pytest.raises(ConfigurationError): Config(instance={'port': port}) + + +def test_default_metrics(): + # type: () -> None + config = Config(instance={}) + default_metric_streams = config.metric_streams + assert default_metric_streams == list(DEFAULT_METRIC_GROUPS.values()) + + config = Config(instance={'default_metrics': True}) + assert config.metric_streams == default_metric_streams + + config = Config(instance={'default_metrics': False}) + assert config.metric_streams == [] + + config = Config(instance={'default_metrics': {}}) + assert config.metric_streams == [] + + config = Config(instance={'default_metrics': {'table_statistics': True, 'server_statistics': False}}) + assert config.metric_streams == [DEFAULT_METRIC_GROUPS['table_statistics']] + + with pytest.raises(ConfigurationError): + Config(instance={'default_metrics': 'not a dict nor a bool'}) # type: ignore + + with pytest.raises(ConfigurationError): + Config(instance={'default_metrics': {'unknown_key': True}}) # type: ignore + + with pytest.raises(ConfigurationError): + Config(instance={'default_metrics': {'table_statistics': 'not a bool'}}) # type: ignore diff --git a/rethinkdb/tests/unit/test_transient_metrics.py b/rethinkdb/tests/unit/test_transient_metrics.py index 3f1dd4529baf7..f3e47487a1c6e 100644 --- a/rethinkdb/tests/unit/test_transient_metrics.py +++ b/rethinkdb/tests/unit/test_transient_metrics.py @@ -9,7 +9,7 @@ import pytest from rethinkdb import r -from datadog_checks.rethinkdb._default_metrics._jobs import collect_jobs +from datadog_checks.rethinkdb._default_metrics import collect_system_jobs from datadog_checks.rethinkdb._types import BackfillJob, IndexConstructionJob, QueryJob from .utils import MockConnection, patch_connection_type @@ -68,7 +68,7 @@ def test_jobs_metrics(): with patch_connection_type(MockConnection): conn = r.connect(rows=mock_rows) - metrics = list(collect_jobs(conn)) + metrics = list(collect_system_jobs(conn)) assert metrics == [ { From 4debc732817f533c1bb190e41e31753465160011 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 18 Feb 2020 13:54:47 +0100 Subject: [PATCH 044/147] Refactor service check submission, re-raise check errors for Agent-side logging --- .../datadog_checks/rethinkdb/rethinkdb.py | 63 ++++++++----------- rethinkdb/tests/test_rethinkdb.py | 15 +++-- 2 files changed, 38 insertions(+), 40 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index c8948c6b92f2f..1faf12c363f8a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -28,51 +28,28 @@ def __init__(self, *args, **kwargs): super(RethinkDBCheck, self).__init__(*args, **kwargs) self.config = Config(self.instance) - def check(self, instance): - # type: (Instance) -> None - config = self.config - self.log.debug('check config=%r', config) - - host = config.host - port = config.port - metric_streams = config.metric_streams - - with self.submit_service_check() as on_connection_established: - with rethinkdb.r.connect(db='rethinkdb', host=host, port=port) as conn: - on_connection_established(conn) - for metric_stream in metric_streams: - for metric in metric_stream(conn): - self.submit_metric(metric) - - def __collect_metrics(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - for collect in self._metric_collectors: - for metric in collect(conn): - yield metric - @contextmanager - def submit_service_check(self): - # type: () -> Iterator[Callable[[rethinkdb.net.Connection], None]] - tags = [] # type: List[str] - - def on_connection_established(conn): - # type: (rethinkdb.net.Connection) -> None - server = conn.server() # type: ConnectionServer - self.log.debug('connected server=%r', server) - tags.append('server:{}'.format(server['name'])) - # TODO: add a 'proxy' tag if server is a proxy? + def connect(self, host, port): + # type: (str, int) -> Iterator[rethinkdb.net.Connection] + service_check_tags = [] # type: List[str] try: - yield on_connection_established + with rethinkdb.r.connect(db='rethinkdb', host=host, port=port) as conn: + server = conn.server() # type: ConnectionServer + self.log.debug('connected server=%r', server) + service_check_tags.append('server:{}'.format(server['name'])) + yield conn except rethinkdb.errors.ReqlDriverError as exc: self.log.error('Could not connect to RethinkDB server: %r', exc) - self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=tags) + self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=service_check_tags) + raise except Exception as exc: self.log.error('Unexpected error while executing RethinkDB check: %r', exc) - self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=tags) + self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=service_check_tags) + raise else: self.log.debug('service_check OK') - self.service_check('rethinkdb.can_connect', self.OK, tags=tags) + self.service_check('rethinkdb.can_connect', self.OK, tags=service_check_tags) def submit_metric(self, metric): # type: (Metric) -> None @@ -80,6 +57,20 @@ def submit_metric(self, metric): submit = getattr(self, metric['type']) # type: Callable submit(metric['name'], value=metric['value'], tags=metric['tags']) + def check(self, instance): + # type: (Instance) -> None + config = self.config + self.log.debug('check config=%r', config) + + host = config.host + port = config.port + metric_streams = config.metric_streams + + with self.connect(host, port) as conn: + for metric_stream in metric_streams: + for metric in metric_stream(conn): + self.submit_metric(metric) + # TODO: version metadata. # TODO: custom queries. (Hint: look at `QueryManager`.) # TODO: allow not sending default metrics. diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 76674d9728e59..1cacb92d45688 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -137,7 +137,9 @@ def test_cannot_connect_unknown_host(aggregator, instance): instance['host'] = 'doesnotexist' check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.check(instance) + + with pytest.raises(rethinkdb.errors.ReqlDriverError): + check.check(instance) aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=[]) @@ -146,14 +148,19 @@ def test_cannot_connect_unknown_host(aggregator, instance): @pytest.mark.usefixtures('dd_environment') def test_connected_but_check_failed(aggregator, instance): # type: (AggregatorStub, Instance) -> None + class Failure(Exception): + pass + def collect_and_fail(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] yield {'type': 'gauge', 'name': 'rethinkdb.some.metric', 'value': 42, 'tags': []} - raise RuntimeError('Oops!') + raise Failure check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.config.metric_streams.append(collect_and_fail) - check.check(instance) + check.config.metric_streams = [collect_and_fail] + + with pytest.raises(Failure): + check.check(instance) service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=service_check_tags) From 64b7238a8933932ffba93db70cb8f775def41751 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 18 Feb 2020 14:03:58 +0100 Subject: [PATCH 045/147] Switch from rethinkdb.r.[...] to r.[...] --- .../rethinkdb/_default_metrics/_statistics.py | 4 +- .../datadog_checks/rethinkdb/_queries.py | 80 +++++++++---------- .../datadog_checks/rethinkdb/rethinkdb.py | 3 +- 3 files changed, 43 insertions(+), 44 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py index 6ea5badd91e7f..f9defdc33ce7d 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py @@ -14,7 +14,7 @@ import rethinkdb -from .._queries import query_cluster_stats, query_replica_stats, query_servers_with_stats, query_tables_with_stats +from .._queries import query_cluster_stats, query_replicas_with_stats, query_servers_with_stats, query_tables_with_stats from .._types import Metric logger = logging.getLogger(__name__) @@ -145,7 +145,7 @@ def collect_table_statistics(conn): def collect_replica_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] - for table, server, stats in query_replica_stats(conn): + for table, server, stats in query_replicas_with_stats(conn): logger.debug('replica_statistics table=%r server=%r stats=%r', table, server, stats) database = stats['db'] diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 20dda52096f1e..50409bb30371f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -1,12 +1,20 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +""" +Definition of RethinkDB queries used by the RethinkDB integration. + +Useful reference documentation: +- Python ReQL command reference: https://rethinkdb.com/api/python/ +- Usage of `eq_join`: https://rethinkdb.com/api/python/eq_join/ +""" from __future__ import absolute_import from typing import Iterator, Tuple import rethinkdb +from rethinkdb import r from ._types import ( ClusterStats, @@ -27,7 +35,7 @@ def query_cluster_stats(conn): """ Retrieve statistics about the cluster. """ - return rethinkdb.r.table('stats').get(['cluster']).run(conn) + return r.db('rethinkdb').table('stats').get(['cluster']).run(conn) def query_servers_with_stats(conn): @@ -35,24 +43,19 @@ def query_servers_with_stats(conn): """ Retrieve each server in the cluster along with its statistics. """ - - # See: https://rethinkdb.com/api/python/eq_join/ - # For servers: stats['id'] = ['server', ''] - is_server_stats_row = rethinkdb.r.row['id'].nth(0) == 'server' - server_id = rethinkdb.r.row['id'].nth(1) + is_server_stats_row = r.row['id'].nth(0) == 'server' + server_id = r.row['id'].nth(1) - rows = ( - rethinkdb.r.table('stats') - .filter(is_server_stats_row) - .eq_join(server_id, rethinkdb.r.table('server_config')) - .run(conn) - ) # type: Iterator[JoinRow] + stats = r.db('rethinkdb').table('stats') + server_config = r.db('rethinkdb').table('server_config') + + rows = stats.filter(is_server_stats_row).eq_join(server_id, server_config).run(conn) # type: Iterator[JoinRow] for row in rows: - stats = row['left'] # type: ServerStats + server_stats = row['left'] # type: ServerStats server = row['right'] # type: Server - yield server, stats + yield server, server_stats def query_tables_with_stats(conn): @@ -60,54 +63,49 @@ def query_tables_with_stats(conn): """ Retrieve each table in the cluster along with its statistics. """ - - # See: https://rethinkdb.com/api/python/eq_join/ - # For tables: stats['id'] = ['table', ''] + is_table_stats_row = r.row['id'].nth(0) == 'table' + table_id = r.row['id'].nth(1) - is_table_stats_row = rethinkdb.r.row['id'].nth(0) == 'table' - table_id = rethinkdb.r.row['id'].nth(1) + stats = r.db('rethinkdb').table('stats') + table_config = r.db('rethinkdb').table('table_config') - rows = ( - rethinkdb.r.table('stats') - .filter(is_table_stats_row) - .eq_join(table_id, rethinkdb.r.table('table_config')) - .run(conn) - ) # type: Iterator[JoinRow] + rows = stats.filter(is_table_stats_row).eq_join(table_id, table_config).run(conn) # type: Iterator[JoinRow] for row in rows: - stats = row['left'] # type: TableStats + table_stats = row['left'] # type: TableStats table = row['right'] # type: Table - yield table, stats + yield table, table_stats -def query_replica_stats(conn): +def query_replicas_with_stats(conn): # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, Server, ReplicaStats]] """ Retrieve each replica (table/server pair) in the cluster along with its statistics. """ # For replicas: stats['id'] = ['table_server', '', 'SERVER_ID'] + is_table_server_stats_row = r.row['id'].nth(0) == 'table_server' + table_id = r.row['id'].nth(1) + server_id = r.row['left']['id'].nth(2) - is_table_server_stats_row = rethinkdb.r.row['id'].nth(0) == 'table_server' - table_id = rethinkdb.r.row['id'].nth(1) - server_id = rethinkdb.r.row['left']['id'].nth(2) + stats = r.db('rethinkdb').table('stats') + server_config = r.db('rethinkdb').table('server_config') + table_config = r.db('rethinkdb').table('table_config') rows = ( - rethinkdb.r.table('stats') - .filter(is_table_server_stats_row) - .eq_join(table_id, rethinkdb.r.table('table_config')) - .eq_join(server_id, rethinkdb.r.table('server_config')) - # TODO: filter entries where + stats.filter(is_table_server_stats_row) + .eq_join(table_id, table_config) + .eq_join(server_id, server_config) .run(conn) ) # type: Iterator[JoinRow] for row in rows: join_row = row['left'] # type: JoinRow - stats = join_row['left'] # type: ReplicaStats + replica_stats = join_row['left'] # type: ReplicaStats table = join_row['right'] # type: Table server = row['right'] # type: Server - yield table, server, stats + yield table, server, replica_stats def query_table_status(conn): @@ -115,7 +113,7 @@ def query_table_status(conn): """ Retrieve the status of each table in the cluster. """ - return rethinkdb.r.table('table_status').run(conn) + return r.db('rethinkdb').table('table_status').run(conn) def query_server_status(conn): @@ -123,7 +121,7 @@ def query_server_status(conn): """ Retrieve the status of each server in the cluster. """ - return rethinkdb.r.table('server_status').run(conn) + return r.db('rethinkdb').table('server_status').run(conn) def query_system_jobs(conn): @@ -131,4 +129,4 @@ def query_system_jobs(conn): """ Retrieve all the currently running system jobs. """ - return rethinkdb.r.table('jobs').run(conn) + return r.db('rethinkdb').table('jobs').run(conn) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 1faf12c363f8a..ba112355437ae 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -9,6 +9,7 @@ from typing import Any, Callable, Iterator, List import rethinkdb +from rethinkdb import r from datadog_checks.base import AgentCheck @@ -34,7 +35,7 @@ def connect(self, host, port): service_check_tags = [] # type: List[str] try: - with rethinkdb.r.connect(db='rethinkdb', host=host, port=port) as conn: + with r.connect(host=host, port=port) as conn: server = conn.server() # type: ConnectionServer self.log.debug('connected server=%r', server) service_check_tags.append('server:{}'.format(server['name'])) From 3d480ddb51ba98d846ab64c2bd451f2aa98991b4 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 20 Feb 2020 10:28:06 +0100 Subject: [PATCH 046/147] Drop 'default_metrics' option --- rethinkdb/datadog_checks/rethinkdb/_config.py | 43 ++++++------------- .../rethinkdb/_default_metrics/__init__.py | 32 ++++++++------ rethinkdb/datadog_checks/rethinkdb/_types.py | 15 +------ .../datadog_checks/rethinkdb/rethinkdb.py | 6 +-- rethinkdb/tests/unit/test_config.py | 29 ------------- 5 files changed, 36 insertions(+), 89 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index 1c8b2d155151a..4083df8712127 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -4,14 +4,14 @@ from __future__ import absolute_import -from typing import Callable, Dict, Iterator, List +from typing import Callable, Iterator, List import rethinkdb from datadog_checks.base import ConfigurationError -from ._default_metrics import DEFAULT_METRIC_GROUPS -from ._types import DefaultMetricGroup, Instance, Metric +from ._default_metrics import collect_default_metrics +from ._types import Instance, Metric class Config: @@ -25,7 +25,6 @@ def __init__(self, instance): # type: (Instance) -> None host = instance.get('host', 'localhost') port = instance.get('port', 28015) - default_metrics = instance.get('default_metrics', True) if not isinstance(host, str): raise ConfigurationError('host must be a string (got {!r})'.format(type(host))) @@ -33,37 +32,23 @@ def __init__(self, instance): if not isinstance(port, int) or isinstance(port, bool): raise ConfigurationError('port must be an integer (got {!r})'.format(type(port))) - if isinstance(default_metrics, bool): - default_metrics = {group: default_metrics for group in DEFAULT_METRIC_GROUPS} - elif isinstance(default_metrics, dict): - unknown_groups = set(default_metrics) - set(DEFAULT_METRIC_GROUPS) - if unknown_groups: - raise ConfigurationError( - 'default_metrics contains unknown entries: {}'.format(', '.join(unknown_groups)) - ) - - invalid_groups = [group for group, enabled in default_metrics.items() if not isinstance(enabled, bool)] - if invalid_groups: - raise ConfigurationError( - 'default_metrics contains entries that are not booleans: {}'.format(', '.join(invalid_groups)) - ) - else: - raise ConfigurationError( - 'default_metrics must be a boolean or a mapping (got {!r})'.format(type(default_metrics)) - ) - if port < 0: raise ConfigurationError('port must be positive (got {!r})'.format(port)) self.host = host # type: str self.port = port # type: int - self.metric_streams = _build_metric_streams(default_metrics) + + # NOTE: this attribute exists for encapsulation and testing purposes. + self.metric_streams = [ + collect_default_metrics + ] # type: List[Callable[[rethinkdb.net.Connection], Iterator[Metric]]] + + def collect_metrics(self, conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + for stream in self.metric_streams: + for metric in stream(conn): + yield metric def __repr__(self): # type: () -> str return 'Config(host={host!r}, port={port!r})'.format(host=self.host, port=self.port) - - -def _build_metric_streams(default_metrics): - # type: (Dict[DefaultMetricGroup, bool]) -> List[Callable[[rethinkdb.net.Connection], Iterator[Metric]]] - return [stream for group, stream in DEFAULT_METRIC_GROUPS.items() if default_metrics.get(group, False)] diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py index 5c83556ff98d8..ee7f3807e5e41 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py +++ b/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py @@ -9,7 +9,7 @@ import rethinkdb -from .._types import DefaultMetricGroup, Metric +from .._types import Metric from ._current_issues import collect_current_issues from ._statistics import ( collect_cluster_statistics, @@ -20,15 +20,21 @@ from ._statuses import collect_server_status, collect_table_status from ._system_jobs import collect_system_jobs -DEFAULT_METRIC_GROUPS = { - 'cluster_statistics': collect_cluster_statistics, - 'server_statistics': collect_server_statistics, - 'table_statistics': collect_table_statistics, - 'replica_statistics': collect_replica_statistics, - 'server_status': collect_server_status, - 'table_status': collect_table_status, - 'system_jobs': collect_system_jobs, - 'current_issues': collect_current_issues, -} # type: Dict[DefaultMetricGroup, Callable[[rethinkdb.net.Connection], Iterator[Metric]]] - -__all__ = ['DEFAULT_METRIC_GROUPS'] +__all__ = ['collect_default_metrics'] + + +def collect_default_metrics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + metrics = itertools.chain( + collect_cluster_statistics(conn), + collect_server_statistics(conn), + collect_table_statistics(conn), + collect_replica_statistics(conn), + collect_server_status(conn), + collect_table_status(conn), + collect_system_jobs(conn), + collect_current_issues(conn), + ) + + for metric in metrics: + yield metric diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 39165b13cbb92..a0d93da34b65e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -14,20 +14,7 @@ 'Metric', {'type': Literal['gauge', 'monotonic_count'], 'name': str, 'value': float, 'tags': List[str]} ) -DefaultMetricGroup = Literal[ - 'cluster_statistics', - 'server_statistics', - 'table_statistics', - 'replica_statistics', - 'table_status', - 'server_status', - 'system_jobs', - 'current_issues', -] - -Instance = TypedDict( - 'Instance', {'host': str, 'port': int, 'default_metrics': Union[bool, Dict[DefaultMetricGroup, bool]]}, total=False -) +Instance = TypedDict('Instance', {'host': str, 'port': int}, total=False) # Configuration documents. diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index ba112355437ae..adf6fdbe3e9ae 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -65,12 +65,10 @@ def check(self, instance): host = config.host port = config.port - metric_streams = config.metric_streams with self.connect(host, port) as conn: - for metric_stream in metric_streams: - for metric in metric_stream(conn): - self.submit_metric(metric) + for metric in config.collect_metrics(conn): + self.submit_metric(metric) # TODO: version metadata. # TODO: custom queries. (Hint: look at `QueryManager`.) diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index 85f096b6f8fc6..05e6d23ccc625 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -8,7 +8,6 @@ from datadog_checks.base import ConfigurationError from datadog_checks.rethinkdb._config import Config -from datadog_checks.rethinkdb._default_metrics import DEFAULT_METRIC_GROUPS pytestmark = pytest.mark.unit @@ -45,31 +44,3 @@ def test_invalid_port(port): # type: (Any) -> None with pytest.raises(ConfigurationError): Config(instance={'port': port}) - - -def test_default_metrics(): - # type: () -> None - config = Config(instance={}) - default_metric_streams = config.metric_streams - assert default_metric_streams == list(DEFAULT_METRIC_GROUPS.values()) - - config = Config(instance={'default_metrics': True}) - assert config.metric_streams == default_metric_streams - - config = Config(instance={'default_metrics': False}) - assert config.metric_streams == [] - - config = Config(instance={'default_metrics': {}}) - assert config.metric_streams == [] - - config = Config(instance={'default_metrics': {'table_statistics': True, 'server_statistics': False}}) - assert config.metric_streams == [DEFAULT_METRIC_GROUPS['table_statistics']] - - with pytest.raises(ConfigurationError): - Config(instance={'default_metrics': 'not a dict nor a bool'}) # type: ignore - - with pytest.raises(ConfigurationError): - Config(instance={'default_metrics': {'unknown_key': True}}) # type: ignore - - with pytest.raises(ConfigurationError): - Config(instance={'default_metrics': {'table_statistics': 'not a bool'}}) # type: ignore From 0f2b93a5e122476ed1021d5285d10ee79523b881 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 20 Feb 2020 14:33:26 +0100 Subject: [PATCH 047/147] Rename _default_metrics module to _metrics --- rethinkdb/datadog_checks/rethinkdb/_config.py | 2 +- .../rethinkdb/{_default_metrics => _metrics}/__init__.py | 0 .../rethinkdb/{_default_metrics => _metrics}/_current_issues.py | 0 .../rethinkdb/{_default_metrics => _metrics}/_statistics.py | 0 .../rethinkdb/{_default_metrics => _metrics}/_statuses.py | 0 .../rethinkdb/{_default_metrics => _metrics}/_system_jobs.py | 0 rethinkdb/tests/unit/test_transient_metrics.py | 2 +- 7 files changed, 2 insertions(+), 2 deletions(-) rename rethinkdb/datadog_checks/rethinkdb/{_default_metrics => _metrics}/__init__.py (100%) rename rethinkdb/datadog_checks/rethinkdb/{_default_metrics => _metrics}/_current_issues.py (100%) rename rethinkdb/datadog_checks/rethinkdb/{_default_metrics => _metrics}/_statistics.py (100%) rename rethinkdb/datadog_checks/rethinkdb/{_default_metrics => _metrics}/_statuses.py (100%) rename rethinkdb/datadog_checks/rethinkdb/{_default_metrics => _metrics}/_system_jobs.py (100%) diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index 4083df8712127..285ddefffb63f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -10,7 +10,7 @@ from datadog_checks.base import ConfigurationError -from ._default_metrics import collect_default_metrics +from ._metrics import collect_default_metrics from ._types import Instance, Metric diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/__init__.py similarity index 100% rename from rethinkdb/datadog_checks/rethinkdb/_default_metrics/__init__.py rename to rethinkdb/datadog_checks/rethinkdb/_metrics/__init__.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_current_issues.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_current_issues.py similarity index 100% rename from rethinkdb/datadog_checks/rethinkdb/_default_metrics/_current_issues.py rename to rethinkdb/datadog_checks/rethinkdb/_metrics/_current_issues.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py similarity index 100% rename from rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statistics.py rename to rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py similarity index 100% rename from rethinkdb/datadog_checks/rethinkdb/_default_metrics/_statuses.py rename to rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_default_metrics/_system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py similarity index 100% rename from rethinkdb/datadog_checks/rethinkdb/_default_metrics/_system_jobs.py rename to rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py diff --git a/rethinkdb/tests/unit/test_transient_metrics.py b/rethinkdb/tests/unit/test_transient_metrics.py index f3e47487a1c6e..0fe28033cbff0 100644 --- a/rethinkdb/tests/unit/test_transient_metrics.py +++ b/rethinkdb/tests/unit/test_transient_metrics.py @@ -9,7 +9,7 @@ import pytest from rethinkdb import r -from datadog_checks.rethinkdb._default_metrics import collect_system_jobs +from datadog_checks.rethinkdb._metrics import collect_system_jobs from datadog_checks.rethinkdb._types import BackfillJob, IndexConstructionJob, QueryJob from .utils import MockConnection, patch_connection_type From dc558dbe1ccb556d1d9b269d562fa093388c91e2 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 24 Feb 2020 11:26:27 +0100 Subject: [PATCH 048/147] Drop query jobs metrics --- .../rethinkdb/_metrics/_system_jobs.py | 15 +----------- rethinkdb/datadog_checks/rethinkdb/_types.py | 14 +---------- rethinkdb/tests/common.py | 4 +--- rethinkdb/tests/test_rethinkdb.py | 18 ++++----------- .../tests/unit/test_transient_metrics.py | 23 ++----------------- 5 files changed, 9 insertions(+), 65 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py index 523d90a343633..bf9b884d50f0f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py @@ -25,20 +25,7 @@ def collect_system_jobs(conn): tags = ['server:{}'.format(server) for server in servers] - if job['type'] == 'query': - client_address = job['info']['client_address'] - client_port = job['info']['client_port'] - - query_tags = tags + ['client_address:{}'.format(client_address), 'client_port:{}'.format(client_port)] - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.query.duration', - 'value': duration, - 'tags': query_tags, - } - - elif job['type'] == 'index_construction': + if job['type'] == 'index_construction': database = job['info']['db'] table = job['info']['table'] index = job['info']['index'] diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index a0d93da34b65e..6faecc056e2a6 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -121,18 +121,6 @@ # System jobs documents. # See: https://rethinkdb.com/docs/system-jobs/ -QueryInfo = TypedDict('QueryInfo', {'client_address': str, 'client_port': int, 'query': str, 'user': str}) -QueryJob = TypedDict( - 'QueryJob', - { - 'type': Literal['query'], - 'id': Tuple[Literal['query'], str], - 'duration_sec': float, - 'info': QueryInfo, - 'servers': List[str], - }, -) - IndexConstructionInfo = TypedDict('IndexConstructionInfo', {'db': str, 'table': str, 'index': str, 'progress': int}) IndexConstructionJob = TypedDict( 'IndexConstructionJob', @@ -161,7 +149,7 @@ # NOTE: this is a union type tagged by the 'type' key. # See: https://mypy.readthedocs.io/en/latest/literal_types.html#intelligent-indexing -Job = Union[QueryJob, IndexConstructionJob, BackfillJob] +Job = Union[IndexConstructionJob, BackfillJob] # ReQL command results. # See: https://rethinkdb.com/api/python/ diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 28753eded7821..920862a670658 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -131,8 +131,7 @@ 'rethinkdb.server_status.process.time_started', ) -QUERY_JOBS_METRICS = ('rethinkdb.jobs.query.duration',) -# NOTE: other jobs metrics are not listed here as they are covered by unit tests instead of integration tests. +# NOTE: jobs metrics are not listed here as they are covered by unit tests instead of integration tests. CURRENT_ISSUES_METRICS = ( 'rethinkdb.current_issues.log_write_error.total', @@ -155,5 +154,4 @@ + TABLE_STATUS_SHARDS_METRICS + TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS + SERVER_STATUS_METRICS - + QUERY_JOBS_METRICS ) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 1cacb92d45688..d50922fba1749 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -20,7 +20,6 @@ HEROES_TABLE, HEROES_TABLE_REPLICAS_BY_SHARD, HEROES_TABLE_SERVERS, - QUERY_JOBS_METRICS, REPLICA_STATISTICS_METRICS, SERVER_STATISTICS_METRICS, SERVER_STATUS_METRICS, @@ -43,7 +42,10 @@ def test_check(aggregator, instance): _assert_statistics_metrics(aggregator) _assert_table_status_metrics(aggregator) _assert_server_status_metrics(aggregator) - _assert_system_jobs_metrics(aggregator) + + # NOTE: system jobs metrics are not asserted here because they are only emitted when the cluster is + # changing (eg. an index is being created, or data is being rebalanced across servers), which is hard to + # test without introducing flakiness. aggregator.assert_all_metrics_covered() @@ -117,18 +119,6 @@ def _assert_server_status_metrics(aggregator): aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) -def _assert_system_jobs_metrics(aggregator): - # type: (AggregatorStub) -> None - for metric in QUERY_JOBS_METRICS: - # NOTE: these metrics are emitted because the query issued to collect system jobs metrics is - # included in system jobs themselves. - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE) - - # NOTE: other system jobs metrics are not covered here because they are only emitted when the cluster is - # changing (eg. an index is being created, or data is being rebalanced across servers), which is hard to - # test without introducing flakiness. - - @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_cannot_connect_unknown_host(aggregator, instance): diff --git a/rethinkdb/tests/unit/test_transient_metrics.py b/rethinkdb/tests/unit/test_transient_metrics.py index 0fe28033cbff0..1d59707df945b 100644 --- a/rethinkdb/tests/unit/test_transient_metrics.py +++ b/rethinkdb/tests/unit/test_transient_metrics.py @@ -10,7 +10,7 @@ from rethinkdb import r from datadog_checks.rethinkdb._metrics import collect_system_jobs -from datadog_checks.rethinkdb._types import BackfillJob, IndexConstructionJob, QueryJob +from datadog_checks.rethinkdb._types import BackfillJob, IndexConstructionJob from .utils import MockConnection, patch_connection_type @@ -25,19 +25,6 @@ def test_jobs_metrics(): We provide unit tests for these metrics because testing them in a live environment is tricky. """ - mock_query_job_row = { - 'type': 'query', - 'id': ('query', 'abcd1234'), - 'duration_sec': 0.21, - 'info': { - 'client_address': 'localhost', - 'client_port': 28015, - 'query': "r.table('heroes').run(conn)", - 'user': 'johndoe', - }, - 'servers': ['server0'], - } # type: QueryJob - mock_backfill_job_row = { # See: https://rethinkdb.com/docs/system-jobs/#backfill 'type': 'backfill', @@ -64,19 +51,13 @@ def test_jobs_metrics(): mock_unknown_job_row = {'type': 'an_unknown_type_that_should_be_ignored', 'duration_sec': 0.42, 'servers': []} - mock_rows = [mock_query_job_row, mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] + mock_rows = [mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] with patch_connection_type(MockConnection): conn = r.connect(rows=mock_rows) metrics = list(collect_system_jobs(conn)) assert metrics == [ - { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.query.duration', - 'value': 0.21, - 'tags': ['server:server0', 'client_address:localhost', 'client_port:28015'], - }, { 'type': 'gauge', 'name': 'rethinkdb.jobs.backfill.duration', From f45660eac40211105340996bd6dd8d79186ce366 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 24 Feb 2020 11:44:26 +0100 Subject: [PATCH 049/147] Submit table status gauges as service checks --- .../rethinkdb/_metrics/_statuses.py | 18 +++++++++------- rethinkdb/datadog_checks/rethinkdb/_types.py | 3 ++- .../datadog_checks/rethinkdb/rethinkdb.py | 21 +++++++++++-------- rethinkdb/tests/common.py | 5 +++-- rethinkdb/tests/test_rethinkdb.py | 5 +++++ 5 files changed, 32 insertions(+), 20 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py index e5f98fee527c9..cf1b8b66dd5bb 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py @@ -10,6 +10,8 @@ import rethinkdb +from datadog_checks.base import AgentCheck + from .._queries import query_server_status, query_table_status from .._types import Metric, ReplicaState @@ -28,30 +30,30 @@ def collect_table_status(conn): tags = ['table:{}'.format(table), 'database:{}'.format(database)] yield { - 'type': 'gauge', + 'type': 'service_check', 'name': 'rethinkdb.table_status.ready_for_outdated_reads', - 'value': 1 if table_status['status']['ready_for_outdated_reads'] else 0, + 'value': AgentCheck.OK if table_status['status']['ready_for_outdated_reads'] else AgentCheck.WARNING, 'tags': tags, } yield { - 'type': 'gauge', + 'type': 'service_check', 'name': 'rethinkdb.table_status.ready_for_reads', - 'value': 1 if table_status['status']['ready_for_reads'] else 0, + 'value': AgentCheck.OK if table_status['status']['ready_for_reads'] else AgentCheck.WARNING, 'tags': tags, } yield { - 'type': 'gauge', + 'type': 'service_check', 'name': 'rethinkdb.table_status.ready_for_writes', - 'value': 1 if table_status['status']['ready_for_writes'] else 0, + 'value': AgentCheck.OK if table_status['status']['ready_for_writes'] else AgentCheck.WARNING, 'tags': tags, } yield { - 'type': 'gauge', + 'type': 'service_check', 'name': 'rethinkdb.table_status.all_replicas_ready', - 'value': 1 if table_status['status']['all_replicas_ready'] else 0, + 'value': AgentCheck.OK if table_status['status']['all_replicas_ready'] else AgentCheck.WARNING, 'tags': tags, } diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 6faecc056e2a6..836b85ce1e583 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -11,7 +11,8 @@ # Lightweight shim to decouple collection functions from the check class. Metric = TypedDict( - 'Metric', {'type': Literal['gauge', 'monotonic_count'], 'name': str, 'value': float, 'tags': List[str]} + 'Metric', + {'type': Literal['gauge', 'monotonic_count', 'service_check'], 'name': str, 'value': float, 'tags': List[str]}, ) Instance = TypedDict('Instance', {'host': str, 'port': int}, total=False) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index adf6fdbe3e9ae..9d4e09344814f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -30,33 +30,36 @@ def __init__(self, *args, **kwargs): self.config = Config(self.instance) @contextmanager - def connect(self, host, port): + def connect_submitting_service_check(self, host, port): # type: (str, int) -> Iterator[rethinkdb.net.Connection] - service_check_tags = [] # type: List[str] + tags = [] # type: List[str] try: with r.connect(host=host, port=port) as conn: server = conn.server() # type: ConnectionServer self.log.debug('connected server=%r', server) - service_check_tags.append('server:{}'.format(server['name'])) + tags.append('server:{}'.format(server['name'])) yield conn except rethinkdb.errors.ReqlDriverError as exc: self.log.error('Could not connect to RethinkDB server: %r', exc) - self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=service_check_tags) + self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=tags) raise except Exception as exc: self.log.error('Unexpected error while executing RethinkDB check: %r', exc) - self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=service_check_tags) + self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=tags) raise else: self.log.debug('service_check OK') - self.service_check('rethinkdb.can_connect', self.OK, tags=service_check_tags) + self.service_check('rethinkdb.can_connect', self.OK, tags=tags) def submit_metric(self, metric): # type: (Metric) -> None self.log.debug('submit_metric metric=%r', metric) - submit = getattr(self, metric['type']) # type: Callable - submit(metric['name'], value=metric['value'], tags=metric['tags']) + if metric['type'] == 'service_check': + self.service_check(metric['name'], metric['value'], tags=metric['tags']) + else: + submit = getattr(self, metric['type']) # type: Callable + submit(metric['name'], value=metric['value'], tags=metric['tags']) def check(self, instance): # type: (Instance) -> None @@ -66,7 +69,7 @@ def check(self, instance): host = config.host port = config.port - with self.connect(host, port) as conn: + with self.connect_submitting_service_check(host, port) as conn: for metric in config.collect_metrics(conn): self.submit_metric(metric) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 920862a670658..33fb2d03e62f8 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -102,14 +102,15 @@ 'rethinkdb.stats.table_server.disk.preallocated_bytes', ) -TABLE_STATUS_METRICS = ( +TABLE_STATUS_SERVICE_CHECKS = ( 'rethinkdb.table_status.ready_for_outdated_reads', 'rethinkdb.table_status.ready_for_reads', 'rethinkdb.table_status.ready_for_writes', 'rethinkdb.table_status.all_replicas_ready', - 'rethinkdb.table_status.shards.total', ) +TABLE_STATUS_METRICS = ('rethinkdb.table_status.shards.total',) + TABLE_STATUS_SHARDS_METRICS = ( 'rethinkdb.table_status.shards.replicas.total', 'rethinkdb.table_status.shards.replicas.primary.total', diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index d50922fba1749..a17256992dab2 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -27,6 +27,7 @@ SERVERS, TABLE_STATISTICS_METRICS, TABLE_STATUS_METRICS, + TABLE_STATUS_SERVICE_CHECKS, TABLE_STATUS_SHARDS_METRICS, TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS, ) @@ -52,6 +53,10 @@ def test_check(aggregator, instance): service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) + for service_check in TABLE_STATUS_SERVICE_CHECKS: + tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] + aggregator.assert_service_check(service_check, RethinkDBCheck.OK, count=1, tags=tags) + def _assert_statistics_metrics(aggregator): # type: (AggregatorStub) -> None From 1599f67ca88d0dcb9e279a8a808ef6c12dc99091 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 24 Feb 2020 15:58:59 +0100 Subject: [PATCH 050/147] Submit replica state as a tag instead of metrics --- .../rethinkdb/_metrics/_statistics.py | 8 ++- .../rethinkdb/_metrics/_statuses.py | 23 +------ .../datadog_checks/rethinkdb/_queries.py | 66 ++++++++++++++----- rethinkdb/datadog_checks/rethinkdb/_types.py | 5 +- rethinkdb/tests/common.py | 10 --- rethinkdb/tests/test_rethinkdb.py | 23 ++----- 6 files changed, 63 insertions(+), 72 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py index f9defdc33ce7d..182bd208f6f9d 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py @@ -145,20 +145,22 @@ def collect_table_statistics(conn): def collect_replica_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] - for table, server, stats in query_replicas_with_stats(conn): - logger.debug('replica_statistics table=%r server=%r stats=%r', table, server, stats) + for table, server, replica, stats in query_replicas_with_stats(conn): + logger.debug('replica_statistics table=%r server=%r replica=%r stats=%r', table, server, replica, stats) - database = stats['db'] + database = table['db'] server_name = server['name'] table_name = table['name'] server_tags = server['tags'] query_engine = stats['query_engine'] storage_engine = stats['storage_engine'] + state = replica['state'] tags = [ 'table:{}'.format(table_name), 'database:{}'.format(database), 'server:{}'.format(server_name), + 'state:{}'.format(state), ] + server_tags yield { diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py index cf1b8b66dd5bb..37fc058f8dc6c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py @@ -13,7 +13,7 @@ from datadog_checks.base import AgentCheck from .._queries import query_server_status, query_table_status -from .._types import Metric, ReplicaState +from .._types import Metric def collect_table_status(conn): @@ -81,27 +81,6 @@ def collect_table_status(conn): 'tags': shard_tags, } - for replica in shard['replicas']: - server = replica['server'] - replica_tags = shard_tags + ['server:{}'.format(server)] - - # Helper function to benefit from type checking on 'ReplicaState' literals. - def _replica_state(state): - # type: (ReplicaState) -> Metric - return { - 'type': 'gauge', - 'name': 'rethinkdb.table_status.shards.replicas.state.{}'.format(state), - 'value': 1 if replica['state'] == state else 0, - 'tags': replica_tags, - } - - yield _replica_state('ready') - yield _replica_state('transitioning') - yield _replica_state('backfilling') - yield _replica_state('disconnected') - yield _replica_state('waiting_for_primary') - yield _replica_state('waiting_for_quorum') - def collect_server_status(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 50409bb30371f..a0135530ecd93 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -24,6 +24,7 @@ Server, ServerStats, ServerStatus, + ShardReplica, Table, TableStats, TableStatus, @@ -79,33 +80,64 @@ def query_tables_with_stats(conn): def query_replicas_with_stats(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, Server, ReplicaStats]] + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, Server, ShardReplica, ReplicaStats]] """ Retrieve each replica (table/server pair) in the cluster along with its statistics. """ - # For replicas: stats['id'] = ['table_server', '', 'SERVER_ID'] - is_table_server_stats_row = r.row['id'].nth(0) == 'table_server' - table_id = r.row['id'].nth(1) - server_id = r.row['left']['id'].nth(2) + # NOTE: To reduce bandwidth usage, we make heavy use of the `.pluck()` operation (i.e. ask RethinkDB for a specific + # set of fields, instead of sending entire objects, which can be expensive when joining data as we do here.) + # See: https://rethinkdb.com/api/python/pluck/ stats = r.db('rethinkdb').table('stats') server_config = r.db('rethinkdb').table('server_config') table_config = r.db('rethinkdb').table('table_config') - - rows = ( - stats.filter(is_table_server_stats_row) - .eq_join(table_id, table_config) - .eq_join(server_id, server_config) - .run(conn) - ) # type: Iterator[JoinRow] + table_status = r.db('rethinkdb').table( + 'table_status', + # Required so that 'server' fields in 'replicas' entries refer contain UUIDs instead of names. + # This way, we can join server information more efficiently, as we don't have to lookup UUIDs from names. + # See: https://rethinkdb.com/api/python/table/#description + identifier_format='uuid', + ) + + query = ( + # Start from table statuses, as they contain the list of replicas for each shard of the table. + # See: https://rethinkdb.com/docs/system-tables/#table_status + table_status.pluck('id', {'shards': ['replicas']}) + .merge({'table': r.row['id']}) + .without('id') + # Flatten each table status entry into one entry per shard and replica. + .concat_map(lambda row: row['shards'].map(lambda shard: row.merge(shard.pluck('replicas')))) + .without('shards') + .concat_map( + lambda row: row['replicas'].map(lambda replica: row.merge({'replica': replica.pluck('server', 'state')})) + ) + .without('replicas') + # Grab table information for each replica. + # See: https://rethinkdb.com/docs/system-tables#table_config + .merge({'table': table_config.get(r.row['table']).pluck('id', 'db', 'name')}) + # Grab relevant server information for each replica. + # See: https://rethinkdb.com/docs/system-tables#server_config + .merge({'server': server_config.get(r.row['replica']['server']).pluck('id', 'name', 'tags')}) + # Grab statistics for each replica. + # See: https://rethinkdb.com/docs/system-stats/#replica-tableserver-pair + .merge( + { + 'stats': stats.get(['table_server', r.row['table']['id'], r.row['server']['id']]).pluck( + 'query_engine', 'storage_engine' + ) + } + ) + ) + + rows = query.run(conn) # type: Iterator[dict] for row in rows: - join_row = row['left'] # type: JoinRow - replica_stats = join_row['left'] # type: ReplicaStats - table = join_row['right'] # type: Table - server = row['right'] # type: Server - yield table, server, replica_stats + table = row['table'] # type: Table + server = row['server'] # type: Server + replica = row['replica'] # type: ShardReplica + replica_stats = row['stats'] # type: ReplicaStats + yield table, server, replica, replica_stats def query_table_status(conn): diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 836b85ce1e583..43c71c5aedb0a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -99,10 +99,7 @@ # Status documents. # See: https://rethinkdb.com/docs/system-tables/#status-tables -ReplicaState = Literal[ - 'ready', 'transitioning', 'backfilling', 'disconnected', 'waiting_for_primary', 'waiting_for_quorum' -] -ShardReplica = TypedDict('ShardReplica', {'server': str, 'state': ReplicaState}) +ShardReplica = TypedDict('ShardReplica', {'server': str, 'state': str}) Shard = TypedDict('Shard', {'primary_replicas': List[str], 'replicas': List[ShardReplica]}) TableStatusFlags = TypedDict( 'TableStatusFlags', diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 33fb2d03e62f8..b21e56cf2f5a1 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -116,15 +116,6 @@ 'rethinkdb.table_status.shards.replicas.primary.total', ) -TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS = ( - 'rethinkdb.table_status.shards.replicas.state.ready', - 'rethinkdb.table_status.shards.replicas.state.transitioning', - 'rethinkdb.table_status.shards.replicas.state.backfilling', - 'rethinkdb.table_status.shards.replicas.state.disconnected', - 'rethinkdb.table_status.shards.replicas.state.waiting_for_primary', - 'rethinkdb.table_status.shards.replicas.state.waiting_for_quorum', -) - SERVER_STATUS_METRICS = ( 'rethinkdb.server_status.network.time_connected', 'rethinkdb.server_status.network.connected_to.total', @@ -153,6 +144,5 @@ + REPLICA_STATISTICS_METRICS + TABLE_STATUS_METRICS + TABLE_STATUS_SHARDS_METRICS - + TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS + SERVER_STATUS_METRICS ) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index a17256992dab2..b004b027eb5e2 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -29,7 +29,6 @@ TABLE_STATUS_METRICS, TABLE_STATUS_SERVICE_CHECKS, TABLE_STATUS_SHARDS_METRICS, - TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS, ) @@ -74,7 +73,12 @@ def _assert_statistics_metrics(aggregator): for replica_server in HEROES_TABLE_SERVERS: for metric in REPLICA_STATISTICS_METRICS: - tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(replica_server)] + tags = [ + 'table:{}'.format(HEROES_TABLE), + 'database:{}'.format(DATABASE), + 'server:{}'.format(replica_server), + 'state:ready', # Assumption: cluster is stable (not currently rebalancing). + ] tags.extend(SERVER_TAGS[replica_server]) aggregator.assert_metric(metric, count=1, tags=tags) @@ -96,25 +100,12 @@ def _assert_table_status_metrics(aggregator): tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) - for shard, servers in HEROES_TABLE_REPLICAS_BY_SHARD.items(): + for shard in HEROES_TABLE_REPLICAS_BY_SHARD: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'shard:{}'.format(shard)] for metric in TABLE_STATUS_SHARDS_METRICS: aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) - for server in servers: - tags = [ - 'table:{}'.format(HEROES_TABLE), - 'database:{}'.format(DATABASE), - 'shard:{}'.format(shard), - 'server:{}'.format(server), - ] - - for metric in TABLE_STATUS_SHARDS_REPLICA_STATE_METRICS: - # Assumption: all replicas in the cluster are ready, i.e. no rebalancing is in progress. - value = 1 if metric.endswith('.state.ready') else 0 - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, value=value, count=1, tags=tags) - def _assert_server_status_metrics(aggregator): # type: (AggregatorStub) -> None From d172f7bd2758046247f1cbc9bbb765ae4429674c Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 24 Feb 2020 17:11:23 +0100 Subject: [PATCH 051/147] Handle disconnected replica server -- needs unit tests --- .../rethinkdb/_metrics/_statistics.py | 6 ++++++ rethinkdb/datadog_checks/rethinkdb/_queries.py | 14 +++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py index 182bd208f6f9d..5c2c72dee499b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py @@ -148,6 +148,12 @@ def collect_replica_statistics(conn): for table, server, replica, stats in query_replicas_with_stats(conn): logger.debug('replica_statistics table=%r server=%r replica=%r stats=%r', table, server, replica, stats) + state = replica['state'] + + if state == 'disconnected': + # TODO: submit service checks? + continue + database = table['db'] server_name = server['name'] table_name = table['name'] diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index a0135530ecd93..96a59d4bfbc6e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -118,14 +118,22 @@ def query_replicas_with_stats(conn): .merge({'table': table_config.get(r.row['table']).pluck('id', 'db', 'name')}) # Grab relevant server information for each replica. # See: https://rethinkdb.com/docs/system-tables#server_config - .merge({'server': server_config.get(r.row['replica']['server']).pluck('id', 'name', 'tags')}) + .merge( + { + 'server': ( + server_config.get(r.row['replica']['server']) + .default({'id': None}) # Disconnected servers aren't present in the 'server_config' table. + .pluck('id', 'name', 'tags') + ) + } + ) # Grab statistics for each replica. # See: https://rethinkdb.com/docs/system-stats/#replica-tableserver-pair .merge( { - 'stats': stats.get(['table_server', r.row['table']['id'], r.row['server']['id']]).pluck( + 'stats': stats.get(['table_server', r.row['table']['id'], r.row['server']['id']]).default({}).pluck( 'query_engine', 'storage_engine' - ) + ), } ) ) From 56479b3d4244679e99077ed228de35eeb4bf91ad Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 27 Feb 2020 13:56:12 +0100 Subject: [PATCH 052/147] Refactor submission of service check --- .../datadog_checks/rethinkdb/rethinkdb.py | 37 +++++++++++-------- rethinkdb/tests/test_rethinkdb.py | 2 +- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 9d4e09344814f..3aa0e43972b16 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -17,6 +17,9 @@ from ._types import ConnectionServer, Instance, Metric +SC_CONNECT = 'rethinkdb.can_connect' + + class RethinkDBCheck(AgentCheck): """ Collect metrics from a RethinkDB cluster. @@ -30,27 +33,29 @@ def __init__(self, *args, **kwargs): self.config = Config(self.instance) @contextmanager - def connect_submitting_service_check(self, host, port): + def connect_submitting_service_checks(self, host, port): # type: (str, int) -> Iterator[rethinkdb.net.Connection] - tags = [] # type: List[str] - try: - with r.connect(host=host, port=port) as conn: - server = conn.server() # type: ConnectionServer - self.log.debug('connected server=%r', server) - tags.append('server:{}'.format(server['name'])) - yield conn + conn = r.connect(host=host, port=port) except rethinkdb.errors.ReqlDriverError as exc: - self.log.error('Could not connect to RethinkDB server: %r', exc) - self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=tags) + message = 'Could not connect to RethinkDB server: {!r}'.format(exc) + self.log.error(message) + self.service_check(SC_CONNECT, self.CRITICAL, message=message) raise + + server = conn.server() # type: ConnectionServer + self.log.debug('connected server=%r', server) + tags = ['server:{}'.format(server['name'])] + + try: + yield conn except Exception as exc: - self.log.error('Unexpected error while executing RethinkDB check: %r', exc) - self.service_check('rethinkdb.can_connect', self.CRITICAL, tags=tags) + message = 'Unexpected error while executing RethinkDB check: {!r}'.format(exc) + self.log.error(message) + self.service_check(SC_CONNECT, self.CRITICAL, tags=tags, message=message) raise - else: - self.log.debug('service_check OK') - self.service_check('rethinkdb.can_connect', self.OK, tags=tags) + + self.service_check(SC_CONNECT, self.OK, tags=tags) def submit_metric(self, metric): # type: (Metric) -> None @@ -69,7 +74,7 @@ def check(self, instance): host = config.host port = config.port - with self.connect_submitting_service_check(host, port) as conn: + with self.connect_submitting_service_checks(host, port) as conn: for metric in config.collect_metrics(conn): self.submit_metric(metric) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index b004b027eb5e2..7616de1dc40c7 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -132,7 +132,7 @@ def test_cannot_connect_unknown_host(aggregator, instance): @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') -def test_connected_but_check_failed(aggregator, instance): +def test_connected_but_check_failed_unexpectedly(aggregator, instance): # type: (AggregatorStub, Instance) -> None class Failure(Exception): pass From cd6a1071666dad386220bf44ec34375eb0f0a1a9 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 27 Feb 2020 13:57:19 +0100 Subject: [PATCH 053/147] Lint --- rethinkdb/datadog_checks/rethinkdb/_queries.py | 6 +++--- rethinkdb/datadog_checks/rethinkdb/rethinkdb.py | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 96a59d4bfbc6e..1d157392a52fb 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -131,9 +131,9 @@ def query_replicas_with_stats(conn): # See: https://rethinkdb.com/docs/system-stats/#replica-tableserver-pair .merge( { - 'stats': stats.get(['table_server', r.row['table']['id'], r.row['server']['id']]).default({}).pluck( - 'query_engine', 'storage_engine' - ), + 'stats': stats.get(['table_server', r.row['table']['id'], r.row['server']['id']]) + .default({}) + .pluck('query_engine', 'storage_engine'), } ) ) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 3aa0e43972b16..62342a4529a63 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -6,7 +6,7 @@ from __future__ import absolute_import from contextlib import contextmanager -from typing import Any, Callable, Iterator, List +from typing import Any, Callable, Iterator import rethinkdb from rethinkdb import r @@ -16,7 +16,6 @@ from ._config import Config from ._types import ConnectionServer, Instance, Metric - SC_CONNECT = 'rethinkdb.can_connect' From e34aee9c610b76c69c2bcd41e48b30f7506b9468 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 27 Feb 2020 14:49:21 +0100 Subject: [PATCH 054/147] Add more debug logs --- .../datadog_checks/rethinkdb/_metrics/_statistics.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py index 5c2c72dee499b..247b56087bc8e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py @@ -22,6 +22,8 @@ def collect_cluster_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] + logger.debug('collect_cluster_statistics') + stats = query_cluster_stats(conn) logger.debug('cluster_statistics stats=%r', stats) @@ -51,8 +53,10 @@ def collect_cluster_statistics(conn): def collect_server_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] + logger.debug('collect_server_statistics') + for server, stats in query_servers_with_stats(conn): - logger.debug('server_statistics server=%r, stats=%r', server, stats) + logger.debug('server_statistics server=%r stats=%r', server, stats) name = server['name'] server_tags = server['tags'] @@ -119,8 +123,10 @@ def collect_server_statistics(conn): def collect_table_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] + logger.debug('collect_table_statistics') + for table, stats in query_tables_with_stats(conn): - logger.debug('table_statistics table=%r, stats=%r', table, stats) + logger.debug('table_statistics table=%r stats=%r', table, stats) name = table['name'] database = table['db'] @@ -145,6 +151,8 @@ def collect_table_statistics(conn): def collect_replica_statistics(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] + logger.debug('collect_replica_statistics') + for table, server, replica, stats in query_replicas_with_stats(conn): logger.debug('replica_statistics table=%r server=%r replica=%r stats=%r', table, server, replica, stats) From 31cdbe5dcc3536a7263b669642c6acdce3bc437c Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 27 Feb 2020 16:15:14 +0100 Subject: [PATCH 055/147] Improve robustness to disconnected servers --- .../datadog_checks/dev/docker.py | 13 ++- .../datadog_checks/rethinkdb/_queries.py | 20 ++--- rethinkdb/tests/cluster.py | 29 ++++++ rethinkdb/tests/common.py | 10 +++ rethinkdb/tests/conftest.py | 7 +- rethinkdb/tests/test_rethinkdb.py | 89 ++++++++++++++----- 6 files changed, 127 insertions(+), 41 deletions(-) diff --git a/datadog_checks_dev/datadog_checks/dev/docker.py b/datadog_checks_dev/datadog_checks/dev/docker.py index 22f261d2ae280..c1b8989aec791 100644 --- a/datadog_checks_dev/datadog_checks/dev/docker.py +++ b/datadog_checks_dev/datadog_checks/dev/docker.py @@ -3,6 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os from contextlib import contextmanager +from typing import Iterator from six import string_types from six.moves.urllib.parse import urlparse @@ -17,7 +18,7 @@ try: from contextlib import ExitStack except ImportError: - from contextlib2 import ExitStack + from contextlib2 import ExitStack # type: ignore def get_docker_hostname(): @@ -246,3 +247,13 @@ def _read_example_logs_config(check_root): return option['example'] raise ValueError('No logs example found') + + +@contextmanager +def temporarily_pause_service(service, compose_file, check=True): + # type: (str, str, bool) -> Iterator[None] + run_command(['docker-compose', '-f', compose_file, 'pause', service], capture=False, check=check) + try: + yield + finally: + run_command(['docker-compose', '-f', compose_file, 'unpause', service], capture=False, check=check) diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 1d157392a52fb..a374eacac8ac3 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -116,24 +116,18 @@ def query_replicas_with_stats(conn): # Grab table information for each replica. # See: https://rethinkdb.com/docs/system-tables#table_config .merge({'table': table_config.get(r.row['table']).pluck('id', 'db', 'name')}) - # Grab relevant server information for each replica. + # Grab server information for each replica. # See: https://rethinkdb.com/docs/system-tables#server_config - .merge( - { - 'server': ( - server_config.get(r.row['replica']['server']) - .default({'id': None}) # Disconnected servers aren't present in the 'server_config' table. - .pluck('id', 'name', 'tags') - ) - } - ) + .merge({'server': server_config.get(r.row['replica']['server'])}) + .filter(r.row['server']) # Skip replicas stored on disconnected servers. + .merge({'server': r.row['server'].pluck('id', 'name', 'tags')}) # Grab statistics for each replica. # See: https://rethinkdb.com/docs/system-stats/#replica-tableserver-pair .merge( { - 'stats': stats.get(['table_server', r.row['table']['id'], r.row['server']['id']]) - .default({}) - .pluck('query_engine', 'storage_engine'), + 'stats': stats.get(['table_server', r.row['table']['id'], r.row['server']['id']]).pluck( + 'query_engine', 'storage_engine' + ), } ) ) diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index 01201af866b3b..8bc9c873ae45c 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -2,9 +2,18 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +from contextlib import contextmanager + +import rethinkdb from rethinkdb import r +from datadog_checks.dev.conditions import WaitFor +from datadog_checks.dev.docker import temporarily_pause_service +from datadog_checks.dev.structures import EnvVars + from .common import ( + COMPOSE_ENV_VARS, + COMPOSE_FILE, CONNECT_SERVER_PORT, DATABASE, HEROES_TABLE, @@ -12,6 +21,7 @@ HEROES_TABLE_DOCUMENTS, HOST, PROXY_PORT, + SERVERS, ) @@ -67,3 +77,22 @@ def _simulate_client_reads(): with r.connect(db=DATABASE, host=HOST, port=PROXY_PORT) as conn: all_heroes = list(r.table(HEROES_TABLE).run(conn)) assert len(all_heroes) == len(HEROES_TABLE_DOCUMENTS) + + +@contextmanager +def temporarily_disconnect_server(server): + service = 'rethinkdb-{}'.format(server) + + def _servers_have_rebalanced(conn): + # type: (rethinkdb.net.Connection) -> bool + # RethinkDB will rebalance data across tables and remove the server from 'server_status' afterwards. + servers = list(r.db('rethinkdb').table('server_status').run(conn)) + return len(servers) == len(SERVERS) - 1 + + with EnvVars(COMPOSE_ENV_VARS): + with temporarily_pause_service(service, compose_file=COMPOSE_FILE): + with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: + wait_until_rebalanced = WaitFor(lambda: _servers_have_rebalanced(conn)) + wait_until_rebalanced() + + yield diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index b21e56cf2f5a1..d701cd39f08a9 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -146,3 +146,13 @@ + TABLE_STATUS_SHARDS_METRICS + SERVER_STATUS_METRICS ) + + +# Docker Compose configuration. + +COMPOSE_FILE = os.path.join(HERE, 'compose', 'docker-compose.yaml') +COMPOSE_ENV_VARS = env_vars = { + 'RETHINKDB_IMAGE': IMAGE, + 'RETHINKDB_CONNECT_SERVER_PORT': str(CONNECT_SERVER_PORT), + 'RETHINKDB_PROXY_PORT': str(PROXY_PORT), +} diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index c1d8aae7bf63f..57859a07a9d7b 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -4,7 +4,6 @@ from __future__ import absolute_import -import os from typing import Iterator import pytest @@ -13,7 +12,7 @@ from datadog_checks.rethinkdb._types import Instance from .cluster import setup_cluster -from .common import CONNECT_SERVER_PORT, HERE, HOST, IMAGE, PROXY_PORT +from .common import COMPOSE_FILE, CONNECT_SERVER_PORT, HOST, IMAGE, PROXY_PORT E2E_METADATA = {'start_commands': ['pip install rethinkdb==2.4.4']} @@ -30,8 +29,6 @@ def instance(): @pytest.fixture(scope='session') def dd_environment(instance): # type: (Instance) -> Iterator - compose_file = os.path.join(HERE, 'compose', 'docker-compose.yaml') - env_vars = { 'RETHINKDB_IMAGE': IMAGE, 'RETHINKDB_CONNECT_SERVER_PORT': str(CONNECT_SERVER_PORT), @@ -47,6 +44,6 @@ def dd_environment(instance): r'Connected to proxy.*', ] - with docker_run(compose_file, conditions=conditions, env_vars=env_vars, log_patterns=log_patterns): + with docker_run(COMPOSE_FILE, conditions=conditions, env_vars=env_vars, log_patterns=log_patterns): config = {'instances': [instance]} yield config, E2E_METADATA diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 7616de1dc40c7..652e33ccab082 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -4,7 +4,7 @@ from __future__ import absolute_import import copy -from typing import Iterator +from typing import Iterator, Set import pytest import rethinkdb @@ -13,6 +13,7 @@ from datadog_checks.rethinkdb import RethinkDBCheck from datadog_checks.rethinkdb._types import Instance, Metric +from .cluster import temporarily_disconnect_server from .common import ( CLUSTER_STATISTICS_METRICS, CONNECT_SERVER_NAME, @@ -57,41 +58,81 @@ def test_check(aggregator, instance): aggregator.assert_service_check(service_check, RethinkDBCheck.OK, count=1, tags=tags) -def _assert_statistics_metrics(aggregator): - # type: (AggregatorStub) -> None +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_check_with_disconnected_server(aggregator, instance): + # type: (AggregatorStub, Instance) -> None + """ + Verify that the check still runs to completion and sends appropriate service checks if one of the + servers that holds data is disconnected. + """ + check = RethinkDBCheck('rethinkdb', {}, [instance]) + + server_with_data = 'server2' + with temporarily_disconnect_server(server_with_data): + check.check(instance) + + disconnected_servers = {server_with_data} + + _assert_statistics_metrics(aggregator, disconnected_servers=disconnected_servers) + _assert_table_status_metrics(aggregator) + _assert_server_status_metrics(aggregator, disconnected_servers=disconnected_servers) + + aggregator.assert_all_metrics_covered() + + service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) + + table_status_tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] + + aggregator.assert_service_check( + 'rethinkdb.table_status.ready_for_outdated_reads', RethinkDBCheck.OK, count=1, tags=table_status_tags + ) + aggregator.assert_service_check( + 'rethinkdb.table_status.ready_for_reads', RethinkDBCheck.WARNING, count=1, tags=table_status_tags + ) + aggregator.assert_service_check( + 'rethinkdb.table_status.ready_for_writes', RethinkDBCheck.WARNING, count=1, tags=table_status_tags + ) + aggregator.assert_service_check( + 'rethinkdb.table_status.all_replicas_ready', RethinkDBCheck.WARNING, count=1, tags=table_status_tags + ) + + +def _assert_statistics_metrics(aggregator, disconnected_servers=None): + # type: (AggregatorStub, Set[str]) -> None + if disconnected_servers is None: + disconnected_servers = set() + for metric in CLUSTER_STATISTICS_METRICS: aggregator.assert_metric(metric, count=1, tags=[]) for server in SERVERS: for metric in SERVER_STATISTICS_METRICS: tags = ['server:{}'.format(server)] + SERVER_TAGS[server] - aggregator.assert_metric(metric, count=1, tags=tags) + count = 0 if server in disconnected_servers else 1 + aggregator.assert_metric(metric, count=count, tags=tags) for metric in TABLE_STATISTICS_METRICS: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] aggregator.assert_metric(metric, count=1, tags=tags) - for replica_server in HEROES_TABLE_SERVERS: + for server in HEROES_TABLE_SERVERS: for metric in REPLICA_STATISTICS_METRICS: tags = [ 'table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), - 'server:{}'.format(replica_server), - 'state:ready', # Assumption: cluster is stable (not currently rebalancing). + 'server:{}'.format(server), ] - tags.extend(SERVER_TAGS[replica_server]) - aggregator.assert_metric(metric, count=1, tags=tags) + tags.extend(SERVER_TAGS[server]) - # Ensure non-replica servers haven't yielded replica statistics. - for non_replica_server in SERVERS - HEROES_TABLE_SERVERS: - for metric in REPLICA_STATISTICS_METRICS: - tags = [ - 'table:{}'.format(HEROES_TABLE), - 'database:{}'.format(DATABASE), - 'server:{}'.format(non_replica_server), - ] - tags.extend(SERVER_TAGS[non_replica_server]) - aggregator.assert_metric(metric, count=0, tags=tags) + if server in disconnected_servers: + count = 0 + else: + tags.append('state:ready') # Assumption: cluster is stable (not currently rebalancing). + count = 1 + + aggregator.assert_metric(metric, count=count, tags=tags) def _assert_table_status_metrics(aggregator): @@ -107,12 +148,16 @@ def _assert_table_status_metrics(aggregator): aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) -def _assert_server_status_metrics(aggregator): - # type: (AggregatorStub) -> None +def _assert_server_status_metrics(aggregator, disconnected_servers=None): + # type: (AggregatorStub, Set[str]) -> None + if disconnected_servers is None: + disconnected_servers = set() + for metric in SERVER_STATUS_METRICS: for server in SERVERS: tags = ['server:{}'.format(server)] - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) + count = 0 if server in disconnected_servers else 1 + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=count, tags=tags) @pytest.mark.integration From 1b0916eb1b7c2a5aecaf5fce3113b0b17cbab354 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 27 Feb 2020 17:17:18 +0100 Subject: [PATCH 056/147] Improve robustness of disconnect tests --- .../datadog_checks/dev/docker.py | 10 ++-- rethinkdb/tests/cluster.py | 47 +++++++++++++++---- rethinkdb/tests/common.py | 1 + rethinkdb/tests/test_rethinkdb.py | 33 ++++++------- 4 files changed, 61 insertions(+), 30 deletions(-) diff --git a/datadog_checks_dev/datadog_checks/dev/docker.py b/datadog_checks_dev/datadog_checks/dev/docker.py index c1b8989aec791..12102f5ade622 100644 --- a/datadog_checks_dev/datadog_checks/dev/docker.py +++ b/datadog_checks_dev/datadog_checks/dev/docker.py @@ -250,10 +250,12 @@ def _read_example_logs_config(check_root): @contextmanager -def temporarily_pause_service(service, compose_file, check=True): +def temporarily_stop_service(service, compose_file, check=True): # type: (str, str, bool) -> Iterator[None] - run_command(['docker-compose', '-f', compose_file, 'pause', service], capture=False, check=check) + run_command(['docker-compose', '-f', compose_file, 'stop', service], capture=False, check=check) try: yield - finally: - run_command(['docker-compose', '-f', compose_file, 'unpause', service], capture=False, check=check) + except: + raise + else: + run_command(['docker-compose', '-f', compose_file, 'start', service], capture=False, check=check) diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index 8bc9c873ae45c..43c8e690987c7 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -3,12 +3,13 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from contextlib import contextmanager +from typing import Iterator import rethinkdb from rethinkdb import r from datadog_checks.dev.conditions import WaitFor -from datadog_checks.dev.docker import temporarily_pause_service +from datadog_checks.dev.docker import temporarily_stop_service from datadog_checks.dev.structures import EnvVars from .common import ( @@ -21,7 +22,6 @@ HEROES_TABLE_DOCUMENTS, HOST, PROXY_PORT, - SERVERS, ) @@ -81,18 +81,45 @@ def _simulate_client_reads(): @contextmanager def temporarily_disconnect_server(server): + """ + Gracefully disconnect a server from the cluster. + Ensures that the stable is left in a stable state inside and after exiting the context. + """ service = 'rethinkdb-{}'.format(server) - def _servers_have_rebalanced(conn): + def _server_exists(conn): + # type: (rethinkdb.net.Connection) -> bool + return r.db('rethinkdb').table('server_status').map(r.row['name']).contains(server).run(conn) + + def _leader_election_done(conn): + # type: (rethinkdb.net.Connection) -> bool + STABLE_REPLICA_STATES = {'ready', 'waiting_for_primary', 'disconnected'} + + replica_states = ( + r.db('rethinkdb') + .table('table_status') + .concat_map(r.row['shards']) + .concat_map(r.row['replicas']) + .map(r.row['state']) + .run(conn) + ) # type: Iterator[str] + + return all(state in STABLE_REPLICA_STATES for state in replica_states) + + def _server_disconnected(conn): + # type: (rethinkdb.net.Connection) -> bool + return not _server_exists(conn) and _leader_election_done(conn) + + def _server_reconnected(conn): # type: (rethinkdb.net.Connection) -> bool - # RethinkDB will rebalance data across tables and remove the server from 'server_status' afterwards. - servers = list(r.db('rethinkdb').table('server_status').run(conn)) - return len(servers) == len(SERVERS) - 1 + return _server_exists(conn) and _leader_election_done(conn) - with EnvVars(COMPOSE_ENV_VARS): - with temporarily_pause_service(service, compose_file=COMPOSE_FILE): + with temporarily_stop_service(service, compose_file=COMPOSE_FILE): + with EnvVars(COMPOSE_ENV_VARS): with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: - wait_until_rebalanced = WaitFor(lambda: _servers_have_rebalanced(conn)) - wait_until_rebalanced() + WaitFor(lambda: _server_disconnected(conn))() yield + + with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: + WaitFor(lambda: _server_reconnected(conn))() diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index d701cd39f08a9..cc1d3411b0c10 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -39,6 +39,7 @@ 'primary_replica_tag': 'primary', } HEROES_TABLE_SERVERS = {'server1', 'server2'} +HEROES_TABLE_PRIMARY_REPLICA = 'server1' HEROES_TABLE_REPLICAS_BY_SHARD = {0: HEROES_TABLE_SERVERS} HEROES_TABLE_DOCUMENTS = [ { diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 652e33ccab082..58c1b58140c49 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -19,6 +19,7 @@ CONNECT_SERVER_NAME, DATABASE, HEROES_TABLE, + HEROES_TABLE_PRIMARY_REPLICA, HEROES_TABLE_REPLICAS_BY_SHARD, HEROES_TABLE_SERVERS, REPLICA_STATISTICS_METRICS, @@ -59,16 +60,16 @@ def test_check(aggregator, instance): @pytest.mark.integration +@pytest.mark.parametrize('server_with_data', list(HEROES_TABLE_SERVERS)) @pytest.mark.usefixtures('dd_environment') -def test_check_with_disconnected_server(aggregator, instance): - # type: (AggregatorStub, Instance) -> None +def test_check_with_disconnected_server(aggregator, instance, server_with_data): + # type: (AggregatorStub, Instance, str) -> None """ Verify that the check still runs to completion and sends appropriate service checks if one of the servers that holds data is disconnected. """ check = RethinkDBCheck('rethinkdb', {}, [instance]) - server_with_data = 'server2' with temporarily_disconnect_server(server_with_data): check.check(instance) @@ -108,8 +109,8 @@ def _assert_statistics_metrics(aggregator, disconnected_servers=None): aggregator.assert_metric(metric, count=1, tags=[]) for server in SERVERS: + tags = ['server:{}'.format(server)] + SERVER_TAGS[server] for metric in SERVER_STATISTICS_METRICS: - tags = ['server:{}'.format(server)] + SERVER_TAGS[server] count = 0 if server in disconnected_servers else 1 aggregator.assert_metric(metric, count=count, tags=tags) @@ -118,21 +119,21 @@ def _assert_statistics_metrics(aggregator, disconnected_servers=None): aggregator.assert_metric(metric, count=1, tags=tags) for server in HEROES_TABLE_SERVERS: - for metric in REPLICA_STATISTICS_METRICS: - tags = [ - 'table:{}'.format(HEROES_TABLE), - 'database:{}'.format(DATABASE), - 'server:{}'.format(server), - ] - tags.extend(SERVER_TAGS[server]) + tags = [ + 'table:{}'.format(HEROES_TABLE), + 'database:{}'.format(DATABASE), + 'server:{}'.format(server), + ] + SERVER_TAGS[server] + for metric in REPLICA_STATISTICS_METRICS: if server in disconnected_servers: - count = 0 - else: - tags.append('state:ready') # Assumption: cluster is stable (not currently rebalancing). - count = 1 + aggregator.assert_metric(metric, count=0, tags=tags) + continue - aggregator.assert_metric(metric, count=count, tags=tags) + # Assumption: cluster is stable (not currently rebalancing), so only these two states can exist. + state = 'waiting_for_primary' if HEROES_TABLE_PRIMARY_REPLICA in disconnected_servers else 'ready' + state_tag = 'state:{}'.format(state) + aggregator.assert_metric(metric, count=1, tags=tags + [state_tag]) def _assert_table_status_metrics(aggregator): From 80de5312457c43436cc9cd543c6668d74ba27fad Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 27 Feb 2020 17:22:18 +0100 Subject: [PATCH 057/147] Nit: standardize imports --- rethinkdb/datadog_checks/rethinkdb/_config.py | 1 - rethinkdb/datadog_checks/rethinkdb/_metrics/_current_issues.py | 1 - rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py | 1 - rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py | 1 - rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py | 1 - rethinkdb/datadog_checks/rethinkdb/_queries.py | 1 - rethinkdb/datadog_checks/rethinkdb/_types.py | 2 -- rethinkdb/tests/cluster.py | 1 - rethinkdb/tests/conftest.py | 3 --- rethinkdb/tests/test_e2e.py | 1 - rethinkdb/tests/test_rethinkdb.py | 2 -- rethinkdb/tests/unit/test_config.py | 1 - rethinkdb/tests/unit/test_transient_metrics.py | 2 -- 13 files changed, 18 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index 285ddefffb63f..45946208006be 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - from __future__ import absolute_import from typing import Callable, Iterator, List diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_current_issues.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_current_issues.py index 12f668f75a719..d54837e7c7948 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_current_issues.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - from __future__ import absolute_import from typing import Iterator diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py index 247b56087bc8e..8e1a9b73ad6bb 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py @@ -6,7 +6,6 @@ See: https://rethinkdb.com/docs/system-stats/ """ - from __future__ import absolute_import import logging diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py index 37fc058f8dc6c..34cc3c50524df 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - from __future__ import absolute_import import datetime as dt diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py index bf9b884d50f0f..604cc0c3c9169 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - from __future__ import absolute_import from typing import Iterator diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index a374eacac8ac3..ba9dc2ea9ff40 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -8,7 +8,6 @@ - Python ReQL command reference: https://rethinkdb.com/api/python/ - Usage of `eq_join`: https://rethinkdb.com/api/python/eq_join/ """ - from __future__ import absolute_import from typing import Iterator, Tuple diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 43c71c5aedb0a..233e939203953 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -1,11 +1,9 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - """ Declarations used for type checking our code, including our manipulation of JSON documents returned by RethinkDB. """ - import datetime as dt from typing import Any, Dict, List, Literal, Tuple, TypedDict, Union diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index 43c8e690987c7..d9038ade78e06 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - from contextlib import contextmanager from typing import Iterator diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 57859a07a9d7b..e48b8a8cbb7a0 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -1,9 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - -from __future__ import absolute_import - from typing import Iterator import pytest diff --git a/rethinkdb/tests/test_e2e.py b/rethinkdb/tests/test_e2e.py index 9f311d4aba5fe..1f644920a148a 100644 --- a/rethinkdb/tests/test_e2e.py +++ b/rethinkdb/tests/test_e2e.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - from typing import Callable import pytest diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 58c1b58140c49..2aea7f12b597b 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -1,8 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from __future__ import absolute_import - import copy from typing import Iterator, Set diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index 05e6d23ccc625..639082600142a 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - from typing import Any import pytest diff --git a/rethinkdb/tests/unit/test_transient_metrics.py b/rethinkdb/tests/unit/test_transient_metrics.py index 1d59707df945b..b274f59c0dbe3 100644 --- a/rethinkdb/tests/unit/test_transient_metrics.py +++ b/rethinkdb/tests/unit/test_transient_metrics.py @@ -1,11 +1,9 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - """ Unit tests for metrics that are hard to test using integration tests, eg. because they depend on cluster dynamics. """ - import pytest from rethinkdb import r From 86f342c3b2638160a12bcb854bd7d5570550871d Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 27 Feb 2020 17:27:49 +0100 Subject: [PATCH 058/147] Add more debug logs --- .../rethinkdb/_metrics/_statuses.py | 21 ++++++++++++++----- .../rethinkdb/_metrics/_system_jobs.py | 7 +++++++ .../datadog_checks/rethinkdb/_queries.py | 4 ++-- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py index 34cc3c50524df..4ec7b75906184 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py @@ -4,6 +4,7 @@ from __future__ import absolute_import import datetime as dt +import logging import time from typing import Iterator @@ -14,6 +15,8 @@ from .._queries import query_server_status, query_table_status from .._types import Metric +logger = logging.getLogger(__name__) + def collect_table_status(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] @@ -22,7 +25,11 @@ def collect_table_status(conn): See: https://rethinkdb.com/docs/system-tables/#server_status """ + logger.debug('collect_table_status') + for table_status in query_table_status(conn): + logger.debug('table_status %r', table_status) + table = table_status['name'] database = table_status['db'] @@ -88,12 +95,16 @@ def collect_server_status(conn): See: https://rethinkdb.com/docs/system-tables/#table_status """ - for server in query_server_status(conn): - name = server['name'] - network = server['network'] - process = server['process'] + logger.debug('collect_server_status') + + for server_status in query_server_status(conn): + logger.debug('server_status %r', server_status) + + server = server_status['name'] + network = server_status['network'] + process = server_status['process'] - tags = ['server:{}'.format(name)] + tags = ['server:{}'.format(server)] yield { 'type': 'gauge', diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py index 604cc0c3c9169..03bbe76239463 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py @@ -3,6 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from __future__ import absolute_import +import logging from typing import Iterator import rethinkdb @@ -10,6 +11,8 @@ from .._queries import query_system_jobs from .._types import Metric +logger = logging.getLogger(__name__) + def collect_system_jobs(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] @@ -18,7 +21,11 @@ def collect_system_jobs(conn): See: https://rethinkdb.com/docs/system-jobs/ """ + logger.debug('collect_system_jobs') + for job in query_system_jobs(conn): + logger.debug('job %r', job) + duration = job['duration_sec'] servers = job['servers'] diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index ba9dc2ea9ff40..221a6efbee00b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -10,7 +10,7 @@ """ from __future__ import absolute_import -from typing import Iterator, Tuple +from typing import Any, Iterator, Mapping, Tuple import rethinkdb from rethinkdb import r @@ -131,7 +131,7 @@ def query_replicas_with_stats(conn): ) ) - rows = query.run(conn) # type: Iterator[dict] + rows = query.run(conn) # type: Iterator[Mapping[str, Any]] for row in rows: table = row['table'] # type: Table From e1b01f49b78fcb2dec3463e00091e6f782cbbc17 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 27 Feb 2020 18:24:54 +0100 Subject: [PATCH 059/147] Ensure connection is closed after check --- .../datadog_checks/rethinkdb/rethinkdb.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 62342a4529a63..cf3d72b0ccda4 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -35,27 +35,27 @@ def __init__(self, *args, **kwargs): def connect_submitting_service_checks(self, host, port): # type: (str, int) -> Iterator[rethinkdb.net.Connection] try: - conn = r.connect(host=host, port=port) + with r.connect(host=host, port=port) as conn: + server = conn.server() # type: ConnectionServer + self.log.debug('connected server=%r', server) + tags = ['server:{}'.format(server['name'])] + + try: + yield conn + except Exception as exc: + message = 'Unexpected error while executing RethinkDB check: {!r}'.format(exc) + self.log.error(message) + self.service_check(SC_CONNECT, self.CRITICAL, tags=tags, message=message) + raise + else: + self.service_check(SC_CONNECT, self.OK, tags=tags) + except rethinkdb.errors.ReqlDriverError as exc: message = 'Could not connect to RethinkDB server: {!r}'.format(exc) self.log.error(message) self.service_check(SC_CONNECT, self.CRITICAL, message=message) raise - server = conn.server() # type: ConnectionServer - self.log.debug('connected server=%r', server) - tags = ['server:{}'.format(server['name'])] - - try: - yield conn - except Exception as exc: - message = 'Unexpected error while executing RethinkDB check: {!r}'.format(exc) - self.log.error(message) - self.service_check(SC_CONNECT, self.CRITICAL, tags=tags, message=message) - raise - - self.service_check(SC_CONNECT, self.OK, tags=tags) - def submit_metric(self, metric): # type: (Metric) -> None self.log.debug('submit_metric metric=%r', metric) From 5504f48363d228e01f76993ce49031c3af1fa598 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 27 Feb 2020 18:39:40 +0100 Subject: [PATCH 060/147] Get rid of 'r' imports, improve encapsulation of Config --- rethinkdb/datadog_checks/rethinkdb/_config.py | 52 +++- .../rethinkdb/_metrics/__init__.py | 37 --- .../{_current_issues.py => current_issues.py} | 5 +- .../{_statistics.py => statistics.py} | 26 +- .../_metrics/{_statuses.py => statuses.py} | 14 +- .../{_system_jobs.py => system_jobs.py} | 8 +- .../datadog_checks/rethinkdb/_queries.py | 272 +++++++++--------- .../datadog_checks/rethinkdb/rethinkdb.py | 12 +- rethinkdb/tests/test_rethinkdb.py | 6 +- rethinkdb/tests/unit/test_config.py | 10 +- .../tests/unit/test_transient_metrics.py | 12 +- rethinkdb/tests/unit/utils.py | 24 +- 12 files changed, 239 insertions(+), 239 deletions(-) rename rethinkdb/datadog_checks/rethinkdb/_metrics/{_current_issues.py => current_issues.py} (71%) rename rethinkdb/datadog_checks/rethinkdb/_metrics/{_statistics.py => statistics.py} (90%) rename rethinkdb/datadog_checks/rethinkdb/_metrics/{_statuses.py => statuses.py} (91%) rename rethinkdb/datadog_checks/rethinkdb/_metrics/{_system_jobs.py => system_jobs.py} (92%) diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index 45946208006be..602783e1ec2c0 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -9,7 +9,16 @@ from datadog_checks.base import ConfigurationError -from ._metrics import collect_default_metrics +from ._metrics.current_issues import collect_current_issues +from ._metrics.statistics import ( + collect_cluster_statistics, + collect_replica_statistics, + collect_server_statistics, + collect_table_statistics, +) +from ._metrics.statuses import collect_server_status, collect_table_status +from ._metrics.system_jobs import collect_system_jobs +from ._queries import QueryEngine from ._types import Instance, Metric @@ -34,20 +43,43 @@ def __init__(self, instance): if port < 0: raise ConfigurationError('port must be positive (got {!r})'.format(port)) - self.host = host # type: str - self.port = port # type: int + self._host = host # type: str + self._port = port # type: int + self._query_engine = QueryEngine(r=rethinkdb.r) - # NOTE: this attribute exists for encapsulation and testing purposes. - self.metric_streams = [ - collect_default_metrics - ] # type: List[Callable[[rethinkdb.net.Connection], Iterator[Metric]]] + self._collect_funcs = [ + collect_cluster_statistics, + collect_server_statistics, + collect_table_statistics, + collect_replica_statistics, + collect_server_status, + collect_table_status, + collect_system_jobs, + collect_current_issues, + ] # type: List[Callable[[QueryEngine, rethinkdb.net.Connection], Iterator[Metric]]] + + @property + def host(self): + # type: () -> str + return self._host + + @property + def port(self): + # type: () -> int + return self._port + + def connect(self): + # type: () -> rethinkdb.net.Connection + host = self._host + port = self._port + return self._query_engine.connect(host, port) def collect_metrics(self, conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] - for stream in self.metric_streams: - for metric in stream(conn): + for collect in self._collect_funcs: + for metric in collect(self._query_engine, conn): yield metric def __repr__(self): # type: () -> str - return 'Config(host={host!r}, port={port!r})'.format(host=self.host, port=self.port) + return 'Config(host={host!r}, port={port!r})'.format(host=self._host, port=self._port) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/__init__.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/__init__.py index ee7f3807e5e41..46dd167dcde48 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/__init__.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/__init__.py @@ -1,40 +1,3 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - -from __future__ import absolute_import - -import itertools -from typing import Callable, Dict, Iterator - -import rethinkdb - -from .._types import Metric -from ._current_issues import collect_current_issues -from ._statistics import ( - collect_cluster_statistics, - collect_replica_statistics, - collect_server_statistics, - collect_table_statistics, -) -from ._statuses import collect_server_status, collect_table_status -from ._system_jobs import collect_system_jobs - -__all__ = ['collect_default_metrics'] - - -def collect_default_metrics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - metrics = itertools.chain( - collect_cluster_statistics(conn), - collect_server_statistics(conn), - collect_table_statistics(conn), - collect_replica_statistics(conn), - collect_server_status(conn), - collect_table_status(conn), - collect_system_jobs(conn), - collect_current_issues(conn), - ) - - for metric in metrics: - yield metric diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_current_issues.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py similarity index 71% rename from rethinkdb/datadog_checks/rethinkdb/_metrics/_current_issues.py rename to rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py index d54837e7c7948..4671ebaaf2925 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py @@ -7,11 +7,12 @@ import rethinkdb +from .._queries import QueryEngine from .._types import Metric -def collect_current_issues(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] +def collect_current_issues(engine, conn): + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about current system issues. diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py similarity index 90% rename from rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py rename to rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py index 8e1a9b73ad6bb..cf6f089d8c07c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py @@ -13,17 +13,17 @@ import rethinkdb -from .._queries import query_cluster_stats, query_replicas_with_stats, query_servers_with_stats, query_tables_with_stats +from .._queries import QueryEngine from .._types import Metric logger = logging.getLogger(__name__) -def collect_cluster_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] +def collect_cluster_statistics(engine, conn): + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] logger.debug('collect_cluster_statistics') - stats = query_cluster_stats(conn) + stats = engine.query_cluster_stats(conn) logger.debug('cluster_statistics stats=%r', stats) query_engine = stats['query_engine'] @@ -50,11 +50,11 @@ def collect_cluster_statistics(conn): } -def collect_server_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] +def collect_server_statistics(engine, conn): + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] logger.debug('collect_server_statistics') - for server, stats in query_servers_with_stats(conn): + for server, stats in engine.query_servers_with_stats(conn): logger.debug('server_statistics server=%r stats=%r', server, stats) name = server['name'] @@ -120,11 +120,11 @@ def collect_server_statistics(conn): } -def collect_table_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] +def collect_table_statistics(engine, conn): + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] logger.debug('collect_table_statistics') - for table, stats in query_tables_with_stats(conn): + for table, stats in engine.query_tables_with_stats(conn): logger.debug('table_statistics table=%r stats=%r', table, stats) name = table['name'] @@ -148,11 +148,11 @@ def collect_table_statistics(conn): } -def collect_replica_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] +def collect_replica_statistics(engine, conn): + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] logger.debug('collect_replica_statistics') - for table, server, replica, stats in query_replicas_with_stats(conn): + for table, server, replica, stats in engine.query_replicas_with_stats(conn): logger.debug('replica_statistics table=%r server=%r replica=%r stats=%r', table, server, replica, stats) state = replica['state'] diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py similarity index 91% rename from rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py rename to rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py index 4ec7b75906184..dcde64f2c629f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py @@ -12,14 +12,14 @@ from datadog_checks.base import AgentCheck -from .._queries import query_server_status, query_table_status +from .._queries import QueryEngine from .._types import Metric logger = logging.getLogger(__name__) -def collect_table_status(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] +def collect_table_status(engine, conn): + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about server statuses. @@ -27,7 +27,7 @@ def collect_table_status(conn): """ logger.debug('collect_table_status') - for table_status in query_table_status(conn): + for table_status in engine.query_table_status(conn): logger.debug('table_status %r', table_status) table = table_status['name'] @@ -88,8 +88,8 @@ def collect_table_status(conn): } -def collect_server_status(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] +def collect_server_status(engine, conn): + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about table statuses. @@ -97,7 +97,7 @@ def collect_server_status(conn): """ logger.debug('collect_server_status') - for server_status in query_server_status(conn): + for server_status in engine.query_server_status(conn): logger.debug('server_status %r', server_status) server = server_status['name'] diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/system_jobs.py similarity index 92% rename from rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py rename to rethinkdb/datadog_checks/rethinkdb/_metrics/system_jobs.py index 03bbe76239463..65ef0744322f7 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/_system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/system_jobs.py @@ -8,14 +8,14 @@ import rethinkdb -from .._queries import query_system_jobs +from .._queries import QueryEngine from .._types import Metric logger = logging.getLogger(__name__) -def collect_system_jobs(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] +def collect_system_jobs(engine, conn): + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about system jobs. @@ -23,7 +23,7 @@ def collect_system_jobs(conn): """ logger.debug('collect_system_jobs') - for job in query_system_jobs(conn): + for job in engine.query_system_jobs(conn): logger.debug('job %r', job) duration = job['duration_sec'] diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 221a6efbee00b..1102cf691eb1c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -13,7 +13,6 @@ from typing import Any, Iterator, Mapping, Tuple import rethinkdb -from rethinkdb import r from ._types import ( ClusterStats, @@ -30,136 +29,147 @@ ) -def query_cluster_stats(conn): - # type: (rethinkdb.net.Connection) -> ClusterStats - """ - Retrieve statistics about the cluster. - """ - return r.db('rethinkdb').table('stats').get(['cluster']).run(conn) - - -def query_servers_with_stats(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Server, ServerStats]] - """ - Retrieve each server in the cluster along with its statistics. - """ - # For servers: stats['id'] = ['server', ''] - is_server_stats_row = r.row['id'].nth(0) == 'server' - server_id = r.row['id'].nth(1) - - stats = r.db('rethinkdb').table('stats') - server_config = r.db('rethinkdb').table('server_config') - - rows = stats.filter(is_server_stats_row).eq_join(server_id, server_config).run(conn) # type: Iterator[JoinRow] - - for row in rows: - server_stats = row['left'] # type: ServerStats - server = row['right'] # type: Server - yield server, server_stats - - -def query_tables_with_stats(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, TableStats]] - """ - Retrieve each table in the cluster along with its statistics. - """ - # For tables: stats['id'] = ['table', ''] - is_table_stats_row = r.row['id'].nth(0) == 'table' - table_id = r.row['id'].nth(1) - - stats = r.db('rethinkdb').table('stats') - table_config = r.db('rethinkdb').table('table_config') - - rows = stats.filter(is_table_stats_row).eq_join(table_id, table_config).run(conn) # type: Iterator[JoinRow] - - for row in rows: - table_stats = row['left'] # type: TableStats - table = row['right'] # type: Table - yield table, table_stats - - -def query_replicas_with_stats(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, Server, ShardReplica, ReplicaStats]] - """ - Retrieve each replica (table/server pair) in the cluster along with its statistics. - """ - - # NOTE: To reduce bandwidth usage, we make heavy use of the `.pluck()` operation (i.e. ask RethinkDB for a specific - # set of fields, instead of sending entire objects, which can be expensive when joining data as we do here.) - # See: https://rethinkdb.com/api/python/pluck/ - - stats = r.db('rethinkdb').table('stats') - server_config = r.db('rethinkdb').table('server_config') - table_config = r.db('rethinkdb').table('table_config') - table_status = r.db('rethinkdb').table( - 'table_status', - # Required so that 'server' fields in 'replicas' entries refer contain UUIDs instead of names. - # This way, we can join server information more efficiently, as we don't have to lookup UUIDs from names. - # See: https://rethinkdb.com/api/python/table/#description - identifier_format='uuid', - ) - - query = ( - # Start from table statuses, as they contain the list of replicas for each shard of the table. - # See: https://rethinkdb.com/docs/system-tables/#table_status - table_status.pluck('id', {'shards': ['replicas']}) - .merge({'table': r.row['id']}) - .without('id') - # Flatten each table status entry into one entry per shard and replica. - .concat_map(lambda row: row['shards'].map(lambda shard: row.merge(shard.pluck('replicas')))) - .without('shards') - .concat_map( - lambda row: row['replicas'].map(lambda replica: row.merge({'replica': replica.pluck('server', 'state')})) +class QueryEngine: + def __init__(self, r): + # type: (rethinkdb.RethinkDB) -> None + self._r = r + + def connect(self, host, port, **kwargs): + # type: (str, int, **Any) -> rethinkdb.net.Connection + return self._r.connect(host, port, **kwargs) + + def query_cluster_stats(self, conn): + # type: (rethinkdb.net.Connection) -> ClusterStats + """ + Retrieve statistics about the cluster. + """ + return self._r.db('rethinkdb').table('stats').get(['cluster']).run(conn) + + def query_servers_with_stats(self, conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Server, ServerStats]] + """ + Retrieve each server in the cluster along with its statistics. + """ + r = self._r + + # For servers: stats['id'] = ['server', ''] + is_server_stats_row = r.row['id'].nth(0) == 'server' + server_id = r.row['id'].nth(1) + + stats = r.db('rethinkdb').table('stats') + server_config = r.db('rethinkdb').table('server_config') + + rows = stats.filter(is_server_stats_row).eq_join(server_id, server_config).run(conn) # type: Iterator[JoinRow] + + for row in rows: + server_stats = row['left'] # type: ServerStats + server = row['right'] # type: Server + yield server, server_stats + + def query_tables_with_stats(self, conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, TableStats]] + """ + Retrieve each table in the cluster along with its statistics. + """ + r = self._r + + # For tables: stats['id'] = ['table', ''] + is_table_stats_row = r.row['id'].nth(0) == 'table' + table_id = r.row['id'].nth(1) + + stats = r.db('rethinkdb').table('stats') + table_config = r.db('rethinkdb').table('table_config') + + rows = stats.filter(is_table_stats_row).eq_join(table_id, table_config).run(conn) # type: Iterator[JoinRow] + + for row in rows: + table_stats = row['left'] # type: TableStats + table = row['right'] # type: Table + yield table, table_stats + + def query_replicas_with_stats(self, conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, Server, ShardReplica, ReplicaStats]] + """ + Retrieve each replica (table/server pair) in the cluster along with its statistics. + """ + r = self._r + + # NOTE: To reduce bandwidth usage, we make heavy use of the `.pluck()` operation (i.e. ask RethinkDB + # for a specific set of fields, instead of sending entire objects, which can be expensive when joining + # data as we do here.) + # See: https://rethinkdb.com/api/python/pluck/ + + stats = r.db('rethinkdb').table('stats') + server_config = r.db('rethinkdb').table('server_config') + table_config = r.db('rethinkdb').table('table_config') + table_status = r.db('rethinkdb').table( + 'table_status', + # Required so that 'server' fields in 'replicas' entries refer contain UUIDs instead of names. + # This way, we can join server information more efficiently, as we don't have to lookup UUIDs from names. + # See: https://rethinkdb.com/api/python/table/#description + identifier_format='uuid', ) - .without('replicas') - # Grab table information for each replica. - # See: https://rethinkdb.com/docs/system-tables#table_config - .merge({'table': table_config.get(r.row['table']).pluck('id', 'db', 'name')}) - # Grab server information for each replica. - # See: https://rethinkdb.com/docs/system-tables#server_config - .merge({'server': server_config.get(r.row['replica']['server'])}) - .filter(r.row['server']) # Skip replicas stored on disconnected servers. - .merge({'server': r.row['server'].pluck('id', 'name', 'tags')}) - # Grab statistics for each replica. - # See: https://rethinkdb.com/docs/system-stats/#replica-tableserver-pair - .merge( - { - 'stats': stats.get(['table_server', r.row['table']['id'], r.row['server']['id']]).pluck( - 'query_engine', 'storage_engine' - ), - } - ) - ) - - rows = query.run(conn) # type: Iterator[Mapping[str, Any]] - - for row in rows: - table = row['table'] # type: Table - server = row['server'] # type: Server - replica = row['replica'] # type: ShardReplica - replica_stats = row['stats'] # type: ReplicaStats - yield table, server, replica, replica_stats - - -def query_table_status(conn): - # type: (rethinkdb.net.Connection) -> Iterator[TableStatus] - """ - Retrieve the status of each table in the cluster. - """ - return r.db('rethinkdb').table('table_status').run(conn) - - -def query_server_status(conn): - # type: (rethinkdb.net.Connection) -> Iterator[ServerStatus] - """ - Retrieve the status of each server in the cluster. - """ - return r.db('rethinkdb').table('server_status').run(conn) + query = ( + # Start from table statuses, as they contain the list of replicas for each shard of the table. + # See: https://rethinkdb.com/docs/system-tables/#table_status + table_status.pluck('id', {'shards': ['replicas']}) + .merge({'table': r.row['id']}) + .without('id') + # Flatten each table status entry into one entry per shard and replica. + .concat_map(lambda row: row['shards'].map(lambda shard: row.merge(shard.pluck('replicas')))) + .without('shards') + .concat_map( + lambda row: ( + row['replicas'].map(lambda replica: row.merge({'replica': replica.pluck('server', 'state')})) + ) + ) + .without('replicas') + # Grab table information for each replica. + # See: https://rethinkdb.com/docs/system-tables#table_config + .merge({'table': table_config.get(r.row['table']).pluck('id', 'db', 'name')}) + # Grab server information for each replica. + # See: https://rethinkdb.com/docs/system-tables#server_config + .merge({'server': server_config.get(r.row['replica']['server'])}) + .filter(r.row['server']) # Skip replicas stored on disconnected servers. + .merge({'server': r.row['server'].pluck('id', 'name', 'tags')}) + # Grab statistics for each replica. + # See: https://rethinkdb.com/docs/system-stats/#replica-tableserver-pair + .merge( + { + 'stats': stats.get(['table_server', r.row['table']['id'], r.row['server']['id']]).pluck( + 'query_engine', 'storage_engine' + ), + } + ) + ) -def query_system_jobs(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Job] - """ - Retrieve all the currently running system jobs. - """ - return r.db('rethinkdb').table('jobs').run(conn) + rows = query.run(conn) # type: Iterator[Mapping[str, Any]] + + for row in rows: + table = row['table'] # type: Table + server = row['server'] # type: Server + replica = row['replica'] # type: ShardReplica + replica_stats = row['stats'] # type: ReplicaStats + yield table, server, replica, replica_stats + + def query_table_status(self, conn): + # type: (rethinkdb.net.Connection) -> Iterator[TableStatus] + """ + Retrieve the status of each table in the cluster. + """ + return self._r.db('rethinkdb').table('table_status').run(conn) + + def query_server_status(self, conn): + # type: (rethinkdb.net.Connection) -> Iterator[ServerStatus] + """ + Retrieve the status of each server in the cluster. + """ + return self._r.db('rethinkdb').table('server_status').run(conn) + + def query_system_jobs(self, conn): + # type: (rethinkdb.net.Connection) -> Iterator[Job] + """ + Retrieve all the currently running system jobs. + """ + return self._r.db('rethinkdb').table('jobs').run(conn) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index cf3d72b0ccda4..a302610a42a46 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -9,7 +9,6 @@ from typing import Any, Callable, Iterator import rethinkdb -from rethinkdb import r from datadog_checks.base import AgentCheck @@ -32,10 +31,10 @@ def __init__(self, *args, **kwargs): self.config = Config(self.instance) @contextmanager - def connect_submitting_service_checks(self, host, port): - # type: (str, int) -> Iterator[rethinkdb.net.Connection] + def connect_submitting_service_checks(self, config): + # type: (Config) -> Iterator[rethinkdb.net.Connection] try: - with r.connect(host=host, port=port) as conn: + with config.connect() as conn: server = conn.server() # type: ConnectionServer self.log.debug('connected server=%r', server) tags = ['server:{}'.format(server['name'])] @@ -70,10 +69,7 @@ def check(self, instance): config = self.config self.log.debug('check config=%r', config) - host = config.host - port = config.port - - with self.connect_submitting_service_checks(host, port) as conn: + with self.connect_submitting_service_checks(config) as conn: for metric in config.collect_metrics(conn): self.submit_metric(metric) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 2aea7f12b597b..a38f5ae09686e 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -181,13 +181,13 @@ def test_connected_but_check_failed_unexpectedly(aggregator, instance): class Failure(Exception): pass - def collect_and_fail(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] + def collect_and_fail(): + # type: () -> Iterator[Metric] yield {'type': 'gauge', 'name': 'rethinkdb.some.metric', 'value': 42, 'tags': []} raise Failure check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.config.metric_streams = [collect_and_fail] + check.config._collect_funcs = [lambda engine, conn: collect_and_fail()] with pytest.raises(Failure): check.check(instance) diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index 639082600142a..e1840a44d1ea2 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -7,27 +7,31 @@ from datadog_checks.base import ConfigurationError from datadog_checks.rethinkdb._config import Config +from datadog_checks.rethinkdb._types import Instance pytestmark = pytest.mark.unit def test_default_config(): # type: () -> None - config = Config(instance={}) + instance = {} # type: Instance + config = Config(instance) assert config.host == 'localhost' assert config.port == 28015 def test_config(): # type: () -> None - config = Config(instance={'host': '192.168.121.1', 'port': 28016}) + instance = {'host': '192.168.121.1', 'port': 28016} # type: Instance + config = Config(instance) assert config.host == '192.168.121.1' assert config.port == 28016 def test_config_repr(): # type: () -> None - config = Config(instance={}) + instance = {} # type: Instance + config = Config(instance) assert repr(config) == "Config(host='localhost', port=28015)" diff --git a/rethinkdb/tests/unit/test_transient_metrics.py b/rethinkdb/tests/unit/test_transient_metrics.py index b274f59c0dbe3..58b389dfc39d4 100644 --- a/rethinkdb/tests/unit/test_transient_metrics.py +++ b/rethinkdb/tests/unit/test_transient_metrics.py @@ -5,12 +5,12 @@ Unit tests for metrics that are hard to test using integration tests, eg. because they depend on cluster dynamics. """ import pytest -from rethinkdb import r -from datadog_checks.rethinkdb._metrics import collect_system_jobs +from datadog_checks.rethinkdb._metrics.system_jobs import collect_system_jobs +from datadog_checks.rethinkdb._queries import QueryEngine from datadog_checks.rethinkdb._types import BackfillJob, IndexConstructionJob -from .utils import MockConnection, patch_connection_type +from .utils import MockConnection, MockRethinkDB pytestmark = pytest.mark.unit @@ -51,9 +51,9 @@ def test_jobs_metrics(): mock_rows = [mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] - with patch_connection_type(MockConnection): - conn = r.connect(rows=mock_rows) - metrics = list(collect_system_jobs(conn)) + mock_engine = QueryEngine(r=MockRethinkDB(connection_type=MockConnection)) + conn = mock_engine.connect(host='testserver', port=28015, rows=mock_rows) + metrics = list(collect_system_jobs(mock_engine, conn)) assert metrics == [ { diff --git a/rethinkdb/tests/unit/utils.py b/rethinkdb/tests/unit/utils.py index 4ec2acbbe1d5c..3d1ecee66ad1b 100644 --- a/rethinkdb/tests/unit/utils.py +++ b/rethinkdb/tests/unit/utils.py @@ -1,8 +1,6 @@ -from contextlib import contextmanager -from typing import Any, Dict, Iterator +from typing import Any, Dict, Iterator, List, Type -from rethinkdb import r -from rethinkdb.net import Connection +import rethinkdb class MockConnectionInstance(object): @@ -25,7 +23,7 @@ def run_query(self, query, noreply): return self._parent.mock_rows() -class MockConnection(Connection): +class MockConnection(rethinkdb.net.Connection): """ A RethinkDB connection type that mocks all queries by sending a deterministic set of rows. @@ -37,7 +35,7 @@ def __init__(self, *args, **kwargs): # type: (*Any, **Any) -> None rows = kwargs.pop('rows') super(MockConnection, self).__init__(MockConnectionInstance, *args, **kwargs) - self.rows = rows + self.rows = rows # type: List[Dict[str, Any]] def mock_rows(self): # type: () -> Iterator[Dict[str, Any]] @@ -45,12 +43,8 @@ def mock_rows(self): yield row -@contextmanager -def patch_connection_type(conn_type): - # type: (type) -> Iterator[None] - initial_conn_type = r.connection_type - r.connection_type = conn_type - try: - yield - finally: - r.connection_type = initial_conn_type +class MockRethinkDB(rethinkdb.RethinkDB): + def __init__(self, connection_type): + # type: (Type[rethinkdb.net.Connection]) -> None + super(MockRethinkDB, self).__init__() + self.connection_type = connection_type From fc7643511e24d38178069ef820043c0240ba6e7c Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 27 Feb 2020 18:42:00 +0100 Subject: [PATCH 061/147] Rename test file, add missing copyright notice --- .../tests/unit/{test_transient_metrics.py => test_metrics.py} | 0 rethinkdb/tests/unit/utils.py | 3 +++ 2 files changed, 3 insertions(+) rename rethinkdb/tests/unit/{test_transient_metrics.py => test_metrics.py} (100%) diff --git a/rethinkdb/tests/unit/test_transient_metrics.py b/rethinkdb/tests/unit/test_metrics.py similarity index 100% rename from rethinkdb/tests/unit/test_transient_metrics.py rename to rethinkdb/tests/unit/test_metrics.py diff --git a/rethinkdb/tests/unit/utils.py b/rethinkdb/tests/unit/utils.py index 3d1ecee66ad1b..d3ba88967cd5a 100644 --- a/rethinkdb/tests/unit/utils.py +++ b/rethinkdb/tests/unit/utils.py @@ -1,3 +1,6 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) from typing import Any, Dict, Iterator, List, Type import rethinkdb From 9e7525c918cf750eeb0a57793c6659975c1d3128 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 27 Feb 2020 18:43:04 +0100 Subject: [PATCH 062/147] Cleanup rethinkdb.py --- rethinkdb/datadog_checks/rethinkdb/rethinkdb.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index a302610a42a46..733b0574f3bdc 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -15,7 +15,7 @@ from ._config import Config from ._types import ConnectionServer, Instance, Metric -SC_CONNECT = 'rethinkdb.can_connect' +SERVICE_CHECK_CONNECT = 'rethinkdb.can_connect' class RethinkDBCheck(AgentCheck): @@ -44,15 +44,15 @@ def connect_submitting_service_checks(self, config): except Exception as exc: message = 'Unexpected error while executing RethinkDB check: {!r}'.format(exc) self.log.error(message) - self.service_check(SC_CONNECT, self.CRITICAL, tags=tags, message=message) + self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) raise else: - self.service_check(SC_CONNECT, self.OK, tags=tags) + self.service_check(SERVICE_CHECK_CONNECT, self.OK, tags=tags) except rethinkdb.errors.ReqlDriverError as exc: message = 'Could not connect to RethinkDB server: {!r}'.format(exc) self.log.error(message) - self.service_check(SC_CONNECT, self.CRITICAL, message=message) + self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, message=message) raise def submit_metric(self, metric): @@ -74,6 +74,3 @@ def check(self, instance): self.submit_metric(metric) # TODO: version metadata. - # TODO: custom queries. (Hint: look at `QueryManager`.) - # TODO: allow not sending default metrics. - # TODO: decide if and how to deal with `identifier_format`: https://rethinkdb.com/api/python/table/#description From 0da808f401550b4f29d78786053b5e334092af57 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 28 Feb 2020 10:16:04 +0100 Subject: [PATCH 063/147] Lint --- datadog_checks_dev/datadog_checks/dev/docker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datadog_checks_dev/datadog_checks/dev/docker.py b/datadog_checks_dev/datadog_checks/dev/docker.py index 12102f5ade622..334cdb8228a34 100644 --- a/datadog_checks_dev/datadog_checks/dev/docker.py +++ b/datadog_checks_dev/datadog_checks/dev/docker.py @@ -255,7 +255,7 @@ def temporarily_stop_service(service, compose_file, check=True): run_command(['docker-compose', '-f', compose_file, 'stop', service], capture=False, check=check) try: yield - except: + except Exception: raise else: run_command(['docker-compose', '-f', compose_file, 'start', service], capture=False, check=check) From 1181b750ebd1ea89df05f2145f3677b460e5165c Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 28 Feb 2020 10:38:05 +0100 Subject: [PATCH 064/147] Fix docstrings --- rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py index dcde64f2c629f..0a3f6e535656b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py @@ -21,9 +21,9 @@ def collect_table_status(engine, conn): # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ - Collect metrics about server statuses. + Collect metrics about table statuses. - See: https://rethinkdb.com/docs/system-tables/#server_status + See: https://rethinkdb.com/docs/system-tables/#table_status """ logger.debug('collect_table_status') @@ -91,9 +91,9 @@ def collect_table_status(engine, conn): def collect_server_status(engine, conn): # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ - Collect metrics about table statuses. + Collect metrics about server statuses. - See: https://rethinkdb.com/docs/system-tables/#table_status + See: https://rethinkdb.com/docs/system-tables/#server_status """ logger.debug('collect_server_status') From 45ca24ac9824a528791786355acde0a84140f1bb Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 28 Feb 2020 12:00:51 +0100 Subject: [PATCH 065/147] Add config totals metrics --- rethinkdb/datadog_checks/rethinkdb/_config.py | 2 + .../rethinkdb/_metrics/config.py | 61 +++++++++++++++++++ .../datadog_checks/rethinkdb/_queries.py | 36 ++++++++++- rethinkdb/datadog_checks/rethinkdb/_types.py | 14 ++++- rethinkdb/tests/cluster.py | 15 +++++ rethinkdb/tests/common.py | 1 + rethinkdb/tests/test_rethinkdb.py | 41 ++++++++----- 7 files changed, 152 insertions(+), 18 deletions(-) create mode 100644 rethinkdb/datadog_checks/rethinkdb/_metrics/config.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index 602783e1ec2c0..3ec5c56a7a8c4 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -9,6 +9,7 @@ from datadog_checks.base import ConfigurationError +from ._metrics.config import collect_config_totals from ._metrics.current_issues import collect_current_issues from ._metrics.statistics import ( collect_cluster_statistics, @@ -48,6 +49,7 @@ def __init__(self, instance): self._query_engine = QueryEngine(r=rethinkdb.r) self._collect_funcs = [ + collect_config_totals, collect_cluster_statistics, collect_server_statistics, collect_table_statistics, diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/config.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/config.py new file mode 100644 index 0000000000000..033cac77a7807 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/config.py @@ -0,0 +1,61 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from __future__ import absolute_import + +import logging +from typing import Iterator + +import rethinkdb + +from .._queries import QueryEngine +from .._types import Metric + +logger = logging.getLogger(__name__) + + +def collect_config_totals(engine, conn): + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] + """ + Collect aggregated metrics about cluster configuration. + + See: https://rethinkdb.com/docs/system-tables/#configuration-tables + """ + logger.debug('collect_config_totals') + + totals = engine.query_config_totals(conn) + logger.debug('config_totals totals=%r', totals) + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.server.total', + 'value': totals['servers'], + 'tags': [], + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.database.total', + 'value': totals['databases'], + 'tags': [], + } + + for database, total in totals['tables_per_database'].items(): + tags = ['database:{}'.format(database)] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.database.table.total', + 'value': total, + 'tags': tags, + } + + for table, total in totals['secondary_indexes_per_table'].items(): + tags = ['table:{}'.format(table)] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.table.secondary_index.total', + 'value': total, + 'tags': tags, + } diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 1102cf691eb1c..fdb0d4aba7998 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -16,6 +16,7 @@ from ._types import ( ClusterStats, + ConfigTotals, Job, JoinRow, ReplicaStats, @@ -34,10 +35,43 @@ def __init__(self, r): # type: (rethinkdb.RethinkDB) -> None self._r = r - def connect(self, host, port, **kwargs): + def connect(self, host='localhost', port=28015, **kwargs): # type: (str, int, **Any) -> rethinkdb.net.Connection return self._r.connect(host, port, **kwargs) + def query_config_totals(self, conn): + # type: (rethinkdb.net.Connection) -> ConfigTotals + r = self._r + + table_config = r.db('rethinkdb').table('table_config') + server_config = r.db('rethinkdb').table('server_config') + db_config = r.db('rethinkdb').table('db_config') + + # Need to `.run()` these separately because ReQL does not support putting grouped data in raw expressions yet. + # See: https://github.com/rethinkdb/rethinkdb/issues/2067 + + tables_per_database = table_config.group('db').count().run(conn) # type: Mapping[str, int] + + secondary_indexes_per_table = ( + # NOTE: this is an example of a map-reduce query. + # See: https://rethinkdb.com/docs/map-reduce/#a-more-complex-example + table_config.pluck('name', 'indexes') + .concat_map(lambda row: row['indexes'].map(lambda _: {'table': row['name']})) + .group('table') + .count() + ).run( + conn + ) # type: Mapping[str, int] + + totals = { + 'servers': server_config.count(), + 'databases': db_config.count(), + 'tables_per_database': tables_per_database, + 'secondary_indexes_per_table': secondary_indexes_per_table, + } # type: ConfigTotals # Enforce keys to match. (Values are `Any`, so ignored by type checker.) + + return r.expr(totals).run(conn) + def query_cluster_stats(self, conn): # type: (rethinkdb.net.Connection) -> ClusterStats """ diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 233e939203953..4ae39586e0b3f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -5,7 +5,7 @@ Declarations used for type checking our code, including our manipulation of JSON documents returned by RethinkDB. """ import datetime as dt -from typing import Any, Dict, List, Literal, Tuple, TypedDict, Union +from typing import Any, List, Literal, Mapping, Tuple, TypedDict, Union # Lightweight shim to decouple collection functions from the check class. Metric = TypedDict( @@ -33,6 +33,16 @@ ClusterStats = TypedDict('ClusterStats', {'id': Tuple[Literal['cluster']], 'query_engine': ClusterQueryEngine}) +ConfigTotals = TypedDict( + 'ConfigTotals', + { + 'servers': int, + 'databases': int, + 'tables_per_database': Mapping[str, int], + 'secondary_indexes_per_table': Mapping[str, int], + }, +) + ServerQueryEngine = TypedDict( 'ServerQueryEngine', { @@ -108,7 +118,7 @@ ) # vvv NOTE: only fields of interest are listed here. -ServerNetwork = TypedDict('ServerNetwork', {'time_connected': dt.datetime, 'connected_to': Dict[str, bool]}) +ServerNetwork = TypedDict('ServerNetwork', {'time_connected': dt.datetime, 'connected_to': Mapping[str, bool]}) ServerProcess = TypedDict('ServerProcess', {'time_started': dt.datetime, 'version': str}) # ^^^ ServerStatus = TypedDict('ServerStatus', {'id': str, 'name': str, 'network': ServerNetwork, 'process': ServerProcess}) diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index d9038ade78e06..49b0bf28d4a50 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -19,6 +19,7 @@ HEROES_TABLE, HEROES_TABLE_CONFIG, HEROES_TABLE_DOCUMENTS, + HEROES_TABLE_INDEX_FIELD, HOST, PROXY_PORT, ) @@ -29,13 +30,23 @@ def setup_cluster(): """ Configure the test cluster. """ + _drop_test_database() # Automatically created by RethinkDB, but we don't use it and it would skew our metrics. _create_database() _create_test_table() _simulate_client_writes() _simulate_client_reads() +def _drop_test_database(): + # type: () -> None + with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: + # See: https://rethinkdb.com/api/python/db_drop + response = r.db_drop('test').run(conn) + assert response['dbs_dropped'] == 1 + + def _create_database(): + # type: () -> None with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: # See: https://rethinkdb.com/api/python/db_create response = r.db_create(DATABASE).run(conn) @@ -49,6 +60,10 @@ def _create_test_table(): response = r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_CONFIG).run(conn) assert response['tables_created'] == 1 + # See: https://rethinkdb.com/api/python/index_create/ + response = r.db(DATABASE).table(HEROES_TABLE).index_create(HEROES_TABLE_INDEX_FIELD).run(conn) + assert response['created'] == 1 + def _simulate_client_writes(): # type: () -> None diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index cc1d3411b0c10..d056082cbf6f9 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -62,6 +62,7 @@ "appearances_count": 72, }, ] +HEROES_TABLE_INDEX_FIELD = 'appearances_count' # Metrics lists. diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index a38f5ae09686e..de9a686185539 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -39,13 +39,7 @@ def test_check(aggregator, instance): check = RethinkDBCheck('rethinkdb', {}, [instance]) check.check(instance) - _assert_statistics_metrics(aggregator) - _assert_table_status_metrics(aggregator) - _assert_server_status_metrics(aggregator) - - # NOTE: system jobs metrics are not asserted here because they are only emitted when the cluster is - # changing (eg. an index is being created, or data is being rebalanced across servers), which is hard to - # test without introducing flakiness. + _assert_metrics(aggregator) aggregator.assert_all_metrics_covered() @@ -73,9 +67,7 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): disconnected_servers = {server_with_data} - _assert_statistics_metrics(aggregator, disconnected_servers=disconnected_servers) - _assert_table_status_metrics(aggregator) - _assert_server_status_metrics(aggregator, disconnected_servers=disconnected_servers) + _assert_metrics(aggregator, disconnected_servers=disconnected_servers) aggregator.assert_all_metrics_covered() @@ -98,11 +90,33 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): ) -def _assert_statistics_metrics(aggregator, disconnected_servers=None): +def _assert_metrics(aggregator, disconnected_servers=None): # type: (AggregatorStub, Set[str]) -> None if disconnected_servers is None: disconnected_servers = set() + _assert_config_totals_metrics(aggregator, disconnected_servers=disconnected_servers) + _assert_statistics_metrics(aggregator, disconnected_servers=disconnected_servers) + _assert_table_status_metrics(aggregator) + _assert_server_status_metrics(aggregator, disconnected_servers=disconnected_servers) + + # NOTE: system jobs metrics are not asserted here because they are only emitted when the cluster is + # changing (eg. an index is being created, or data is being rebalanced across servers), which is hard to + # test without introducing flakiness. + + +def _assert_config_totals_metrics(aggregator, disconnected_servers): + # type: (AggregatorStub, Set[str]) -> None + aggregator.assert_metric('rethinkdb.server.total', count=1, value=len(SERVERS) - len(disconnected_servers)) + aggregator.assert_metric('rethinkdb.database.total', count=1, value=1) + aggregator.assert_metric('rethinkdb.database.table.total', count=1, value=1, tags=['database:{}'.format(DATABASE)]) + aggregator.assert_metric( + 'rethinkdb.table.secondary_index.total', count=1, value=1, tags=['table:{}'.format(HEROES_TABLE)] + ) + + +def _assert_statistics_metrics(aggregator, disconnected_servers): + # type: (AggregatorStub, Set[str]) -> None for metric in CLUSTER_STATISTICS_METRICS: aggregator.assert_metric(metric, count=1, tags=[]) @@ -147,11 +161,8 @@ def _assert_table_status_metrics(aggregator): aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) -def _assert_server_status_metrics(aggregator, disconnected_servers=None): +def _assert_server_status_metrics(aggregator, disconnected_servers): # type: (AggregatorStub, Set[str]) -> None - if disconnected_servers is None: - disconnected_servers = set() - for metric in SERVER_STATUS_METRICS: for server in SERVERS: tags = ['server:{}'.format(server)] From 3d07c6cde9ae5f85f98c875a6997adf4c5879456 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 28 Feb 2020 14:46:11 +0100 Subject: [PATCH 066/147] Add version metadata --- rethinkdb/datadog_checks/rethinkdb/_config.py | 14 ++++++++++ .../datadog_checks/rethinkdb/_queries.py | 12 ++++++++ rethinkdb/datadog_checks/rethinkdb/_types.py | 15 +++++++--- .../datadog_checks/rethinkdb/_version.py | 28 +++++++++++++++++++ .../datadog_checks/rethinkdb/rethinkdb.py | 3 +- rethinkdb/tests/common.py | 1 + rethinkdb/tests/test_rethinkdb.py | 27 ++++++++++++++++++ rethinkdb/tests/unit/test_version.py | 19 +++++++++++++ 8 files changed, 114 insertions(+), 5 deletions(-) create mode 100644 rethinkdb/datadog_checks/rethinkdb/_version.py create mode 100644 rethinkdb/tests/unit/test_version.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index 3ec5c56a7a8c4..1ecbf4315340e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -21,6 +21,7 @@ from ._metrics.system_jobs import collect_system_jobs from ._queries import QueryEngine from ._types import Instance, Metric +from ._version import parse_version class Config: @@ -82,6 +83,19 @@ def collect_metrics(self, conn): for metric in collect(self._query_engine, conn): yield metric + def get_connected_server_version(self, conn): + # type: (rethinkdb.net.Connection) -> str + """ + Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. + + Example: + + >>> config.get_version(conn) + '2.4.0~0bionic' + """ + version_string = self._query_engine.get_connected_server_version_string(conn) + return parse_version(version_string) + def __repr__(self): # type: () -> str return 'Config(host={host!r}, port={port!r})'.format(host=self._host, port=self._port) diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index fdb0d4aba7998..45df736101c10 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -17,6 +17,7 @@ from ._types import ( ClusterStats, ConfigTotals, + ConnectionServer, Job, JoinRow, ReplicaStats, @@ -39,6 +40,17 @@ def connect(self, host='localhost', port=28015, **kwargs): # type: (str, int, **Any) -> rethinkdb.net.Connection return self._r.connect(host, port, **kwargs) + def get_connected_server_version_string(self, conn): + # type: (rethinkdb.net.Connection) -> str + """ + Return the raw string of the RethinkDB version used by the server at the other end of the connection. + """ + r = self._r + # See: https://rethinkdb.com/docs/system-tables/#server_status + server = conn.server() # type: ConnectionServer + server_status = r.db('rethinkdb').table('server_status').get(server['id']).run(conn) # type: ServerStatus + return server_status['process']['version'] + def query_config_totals(self, conn): # type: (rethinkdb.net.Connection) -> ConfigTotals r = self._r diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 4ae39586e0b3f..dde44d963e368 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -117,10 +117,17 @@ 'TableStatus', {'id': str, 'name': str, 'db': str, 'status': TableStatusFlags, 'shards': List[Shard]} ) -# vvv NOTE: only fields of interest are listed here. -ServerNetwork = TypedDict('ServerNetwork', {'time_connected': dt.datetime, 'connected_to': Mapping[str, bool]}) -ServerProcess = TypedDict('ServerProcess', {'time_started': dt.datetime, 'version': str}) -# ^^^ +ServerNetwork = TypedDict( + 'ServerNetwork', + { + # NOTE: only fields of interest are listed here. + 'time_connected': dt.datetime, + 'connected_to': Mapping[str, bool], + }, +) +ServerProcess = TypedDict( + 'ServerProcess', {'argv': List[str], 'cache_size_mb': int, 'pid': int, 'time_started': dt.datetime, 'version': str} +) ServerStatus = TypedDict('ServerStatus', {'id': str, 'name': str, 'network': ServerNetwork, 'process': ServerProcess}) diff --git a/rethinkdb/datadog_checks/rethinkdb/_version.py b/rethinkdb/datadog_checks/rethinkdb/_version.py new file mode 100644 index 0000000000000..ea2d0d82ff00d --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/_version.py @@ -0,0 +1,28 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import re + +# See: https://github.com/rethinkdb/rethinkdb/blob/95cfed8a62f08e3198ac25417c9b6900be8b6877/src/utils.hpp#L117 +_RETHINKDB_VERSION_STR_REGEX = re.compile(r'^rethinkdb\s+(?P(?:\S+|[^\(]+))') + + +def parse_version(rethinkdb_version_string): + # type: (str) -> str + """ + Given a RethinkDB version string, extract the SemVer version. + https://github.com/rethinkdb/rethinkdb/blob/95cfed8a62f08e3198ac25417c9b6900be8b6877/src/utils.hpp#L117 + + Example: + >>> parse_version('rethinkdb 2.4.0~0bionic (CLANG 6.0.0 (tags/RELEASE_600/final))') + '2.4.0~0bionic' + """ + match = _RETHINKDB_VERSION_STR_REGEX.match(rethinkdb_version_string) + + if match is None: + message = 'Version string did not match pattern (version_string={!r} pattern={!r})'.format( + rethinkdb_version_string, _RETHINKDB_VERSION_STR_REGEX + ) + raise ValueError(message) + + return match.group('rethinkdb_version') diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 733b0574f3bdc..2c362dc4b8a49 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -73,4 +73,5 @@ def check(self, instance): for metric in config.collect_metrics(conn): self.submit_metric(metric) - # TODO: version metadata. + version = config.get_connected_server_version(conn) + self.set_metadata('version', version) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index d056082cbf6f9..b0eccedc1e3b9 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -11,6 +11,7 @@ CHECK_NAME = 'rethinkdb' IMAGE = 'rethinkdb:2.4.0' +RETHINKDB_VERSION = '2.4.0~0bionic' HOST = get_docker_hostname() diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index de9a686185539..b14d8412be816 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -8,6 +8,7 @@ import rethinkdb from datadog_checks.base.stubs.aggregator import AggregatorStub +from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub from datadog_checks.rethinkdb import RethinkDBCheck from datadog_checks.rethinkdb._types import Instance, Metric @@ -21,6 +22,7 @@ HEROES_TABLE_REPLICAS_BY_SHARD, HEROES_TABLE_SERVERS, REPLICA_STATISTICS_METRICS, + RETHINKDB_VERSION, SERVER_STATISTICS_METRICS, SERVER_STATUS_METRICS, SERVER_TAGS, @@ -205,3 +207,28 @@ def collect_and_fail(): service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=service_check_tags) + + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_version_metadata(aggregator, instance, datadog_agent): + # type: (AggregatorStub, Instance, DatadogAgentStub) -> None + check_id = 'test' + + check = RethinkDBCheck('rethinkdb', {}, [instance]) + check.check_id = check_id + + check.check(instance) + + raw_version = RETHINKDB_VERSION + version, _, build = raw_version.partition('~') + major, minor, patch = version.split('.') + version_metadata = { + 'version.scheme': 'semver', + 'version.major': major, + 'version.minor': minor, + 'version.patch': patch, + 'version.raw': raw_version, + } + + datadog_agent.assert_metadata(check_id, version_metadata) diff --git a/rethinkdb/tests/unit/test_version.py b/rethinkdb/tests/unit/test_version.py new file mode 100644 index 0000000000000..558d8b05f2a21 --- /dev/null +++ b/rethinkdb/tests/unit/test_version.py @@ -0,0 +1,19 @@ +import pytest + +from datadog_checks.rethinkdb._version import parse_version + + +@pytest.mark.unit +@pytest.mark.parametrize( + 'version_string, expected_version', + [ + pytest.param('rethinkdb 2.4.0~0bionic (CLANG 6.0.0 (tags/RELEASE_600/final))', '2.4.0~0bionic', id='2.4'), + pytest.param('rethinkdb 2.4.0-beta~0bionic (debug)', '2.4.0-beta~0bionic', id='2.4-beta'), + pytest.param('rethinkdb 2.4.0~0bionic (debug)', '2.4.0~0bionic', id='2.4-debug'), + pytest.param('rethinkdb 2.3.3~0jessie (GCC 4.9.2)', '2.3.3~0jessie', id='2.3'), + pytest.param('rethinkdb 2.3.3 (GCC 4.9.2)', '2.3.3', id='2.3-no-build'), + ], +) +def test_parse_version(version_string, expected_version): + # type: (str, str) -> None + assert parse_version(version_string) == expected_version From 412e7437d3078a0638a24bb221c13dfbef0ac043 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 28 Feb 2020 16:10:33 +0100 Subject: [PATCH 067/147] Add basic auth support, simplify cluster setup --- rethinkdb/datadog_checks/rethinkdb/_config.py | 11 ++- .../datadog_checks/rethinkdb/_queries.py | 6 +- rethinkdb/datadog_checks/rethinkdb/_types.py | 2 +- rethinkdb/tests/cluster.py | 88 +++++++------------ rethinkdb/tests/common.py | 4 + rethinkdb/tests/conftest.py | 26 ++++-- 6 files changed, 69 insertions(+), 68 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index 1ecbf4315340e..278de278d3850 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -3,7 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from __future__ import absolute_import -from typing import Callable, Iterator, List +from typing import Callable, Iterator, List, Optional import rethinkdb @@ -35,6 +35,8 @@ def __init__(self, instance): # type: (Instance) -> None host = instance.get('host', 'localhost') port = instance.get('port', 28015) + user = instance.get('user') + password = instance.get('password') if not isinstance(host, str): raise ConfigurationError('host must be a string (got {!r})'.format(type(host))) @@ -47,6 +49,8 @@ def __init__(self, instance): self._host = host # type: str self._port = port # type: int + self._user = user # type: Optional[str] + self._password = password # type: Optional[str] self._query_engine = QueryEngine(r=rethinkdb.r) self._collect_funcs = [ @@ -75,7 +79,10 @@ def connect(self): # type: () -> rethinkdb.net.Connection host = self._host port = self._port - return self._query_engine.connect(host, port) + user = self._user + password = self._password + + return self._query_engine.connect(host, port, user=user, password=password) def collect_metrics(self, conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 45df736101c10..660515a35a1ea 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -36,9 +36,9 @@ def __init__(self, r): # type: (rethinkdb.RethinkDB) -> None self._r = r - def connect(self, host='localhost', port=28015, **kwargs): - # type: (str, int, **Any) -> rethinkdb.net.Connection - return self._r.connect(host, port, **kwargs) + def connect(self, host='localhost', port=28015, user=None, password=None, **kwargs): + # type: (str, int, str, str, **Any) -> rethinkdb.net.Connection + return self._r.connect(host, port, user=user, password=password, **kwargs) def get_connected_server_version_string(self, conn): # type: (rethinkdb.net.Connection) -> str diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index dde44d963e368..9e380dc2343f8 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -13,7 +13,7 @@ {'type': Literal['gauge', 'monotonic_count', 'service_check'], 'name': str, 'value': float, 'tags': List[str]}, ) -Instance = TypedDict('Instance', {'host': str, 'port': int}, total=False) +Instance = TypedDict('Instance', {'host': str, 'port': int, 'user': str, 'password': str}, total=False) # Configuration documents. diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index 49b0bf28d4a50..85e6e00b0b398 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -1,8 +1,9 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import logging from contextlib import contextmanager -from typing import Iterator +from typing import Iterator, List import rethinkdb from rethinkdb import r @@ -12,6 +13,9 @@ from datadog_checks.dev.structures import EnvVars from .common import ( + AGENT_PASSWORD, + AGENT_USER, + CLIENT_USER, COMPOSE_ENV_VARS, COMPOSE_FILE, CONNECT_SERVER_PORT, @@ -24,99 +28,73 @@ PROXY_PORT, ) +logger = logging.getLogger(__name__) + def setup_cluster(): # type: () -> None """ Configure the test cluster. """ - _drop_test_database() # Automatically created by RethinkDB, but we don't use it and it would skew our metrics. - _create_database() - _create_test_table() - _simulate_client_writes() - _simulate_client_reads() - + logger.debug('setup_cluster') -def _drop_test_database(): - # type: () -> None with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: - # See: https://rethinkdb.com/api/python/db_drop - response = r.db_drop('test').run(conn) - assert response['dbs_dropped'] == 1 + r.db_drop('test').run(conn) # Automatically created, but we don't use it and it would skew our metrics. + # Cluster content. + r.db_create(DATABASE).run(conn) + r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_CONFIG).run(conn) + r.db(DATABASE).table(HEROES_TABLE).index_create(HEROES_TABLE_INDEX_FIELD).run(conn) -def _create_database(): - # type: () -> None - with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: - # See: https://rethinkdb.com/api/python/db_create - response = r.db_create(DATABASE).run(conn) - assert response['dbs_created'] == 1 + # Users. + # See: https://rethinkdb.com/docs/permissions-and-accounts/ + r.db('rethinkdb').table('users').insert({'id': AGENT_USER, 'password': AGENT_PASSWORD}).run(conn) + r.db('rethinkdb').grant(AGENT_USER, {'read': True}).run(conn) + r.db('rethinkdb').table('users').insert({'id': CLIENT_USER, 'password': False}).run(conn) + r.db(DATABASE).grant(CLIENT_USER, {'read': True, 'write': True}).run(conn) -def _create_test_table(): - # type: () -> None - with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: - # See: https://rethinkdb.com/api/python/table_create/ - response = r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_CONFIG).run(conn) - assert response['tables_created'] == 1 - - # See: https://rethinkdb.com/api/python/index_create/ - response = r.db(DATABASE).table(HEROES_TABLE).index_create(HEROES_TABLE_INDEX_FIELD).run(conn) - assert response['created'] == 1 - + # Simulate client activity. + # NOTE: ensures that 'written_docs_*' and 'read_docs_*' metrics have non-zero values. -def _simulate_client_writes(): - # type: () -> None - """ - Simulate a client application that inserts rows by connecting via the proxy node. - - Calling this ensures that 'written_docs_*' metrics will have a non-zero value. - """ - - with r.connect(host=HOST, port=PROXY_PORT) as conn: - # See: https://rethinkdb.com/api/python/insert + with r.connect(host=HOST, port=PROXY_PORT, user=CLIENT_USER) as conn: response = r.db(DATABASE).table(HEROES_TABLE).insert(HEROES_TABLE_DOCUMENTS).run(conn) - assert response['errors'] == 0 assert response['inserted'] == len(HEROES_TABLE_DOCUMENTS) - -def _simulate_client_reads(): - # type: () -> None - """ - Simulate a client application that reads rows by connecting via the proxy node. - - Calling this ensures that 'read_docs_*' metrics will have a non-zero value. - """ - - with r.connect(db=DATABASE, host=HOST, port=PROXY_PORT) as conn: - all_heroes = list(r.table(HEROES_TABLE).run(conn)) - assert len(all_heroes) == len(HEROES_TABLE_DOCUMENTS) + documents = list(r.db(DATABASE).table(HEROES_TABLE).run(conn)) + assert len(documents) == len(HEROES_TABLE_DOCUMENTS) @contextmanager def temporarily_disconnect_server(server): + # type: (str) -> Iterator[None] """ Gracefully disconnect a server from the cluster. Ensures that the stable is left in a stable state inside and after exiting the context. """ service = 'rethinkdb-{}'.format(server) + logger.debug('temporarily_disconnect_server server=%r service=%r', server, service) def _server_exists(conn): # type: (rethinkdb.net.Connection) -> bool - return r.db('rethinkdb').table('server_status').map(r.row['name']).contains(server).run(conn) + servers = r.db('rethinkdb').table('server_status').map(r.row['name']).run(conn) # type: List[str] + logger.debug('server_exists server=%r servers=%r', server, servers) + return server in servers def _leader_election_done(conn): # type: (rethinkdb.net.Connection) -> bool STABLE_REPLICA_STATES = {'ready', 'waiting_for_primary', 'disconnected'} - replica_states = ( + replica_states = list( r.db('rethinkdb') .table('table_status') .concat_map(r.row['shards']) .concat_map(r.row['replicas']) .map(r.row['state']) .run(conn) - ) # type: Iterator[str] + ) # type: List[str] + + logger.debug('replica_states %r', replica_states) return all(state in STABLE_REPLICA_STATES for state in replica_states) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index b0eccedc1e3b9..006f490e77419 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -31,6 +31,10 @@ PROXY_PORT = 28018 +AGENT_USER = 'datadog-agent' +AGENT_PASSWORD = 'r3th1nK' +CLIENT_USER = 'doggo' + DATABASE = 'doghouse' HEROES_TABLE = 'heroes' diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index e48b8a8cbb7a0..4f2ce7b532875 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -1,25 +1,37 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Iterator +from typing import Any, Iterator import pytest -from datadog_checks.dev import WaitFor, docker_run +from datadog_checks.dev import docker_run from datadog_checks.rethinkdb._types import Instance from .cluster import setup_cluster -from .common import COMPOSE_FILE, CONNECT_SERVER_PORT, HOST, IMAGE, PROXY_PORT +from .common import AGENT_PASSWORD, AGENT_USER, COMPOSE_FILE, CONNECT_SERVER_PORT, HOST, IMAGE, PROXY_PORT E2E_METADATA = {'start_commands': ['pip install rethinkdb==2.4.4']} -@pytest.fixture(scope='session') -def instance(): - # type: () -> Instance +@pytest.fixture( + scope='session', + params=[ + # By default, only the admin user has access to system tables (which this integration fetches metrics from). + # We must make sure that users can setup the integration with either the admin user, or a specific user + # set up for the sole purpose of the Agent. + pytest.param((None, None), id='unauthenticated_admin'), + pytest.param((AGENT_USER, AGENT_PASSWORD), id='authenticated'), + ], +) +def instance(request): + # type: (Any) -> Instance + user, password = request.param return { 'host': HOST, 'port': CONNECT_SERVER_PORT, + 'user': user, + 'password': password, } @@ -32,7 +44,7 @@ def dd_environment(instance): 'RETHINKDB_PROXY_PORT': str(PROXY_PORT), } - conditions = [WaitFor(setup_cluster)] + conditions = [setup_cluster] log_patterns = [ r'Server ready, "server0".*', From 9e3c65469ad10eb321f4126c1e0427acd5479415 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 28 Feb 2020 16:11:13 +0100 Subject: [PATCH 068/147] Add --disallow-untyped-defs mypy option --- rethinkdb/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rethinkdb/tox.ini b/rethinkdb/tox.ini index d3fcb0fed7b8f..b45647b7ab827 100644 --- a/rethinkdb/tox.ini +++ b/rethinkdb/tox.ini @@ -8,7 +8,7 @@ envlist = [testenv] dd_check_style = true dd_check_types = true -dd_mypy_args = --py2 datadog_checks/ tests/ +dd_mypy_args = --py2 --disallow-untyped-defs datadog_checks/ tests/ usedevelop = true platform = linux|darwin|win32 deps = From 157efd46c9f469b61ab5581684b5c1c68e96dcf4 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 28 Feb 2020 16:54:54 +0100 Subject: [PATCH 069/147] Inject port for all 3 servers in Compose file --- rethinkdb/tests/_types.py | 6 ++++++ rethinkdb/tests/cluster.py | 11 +++++----- rethinkdb/tests/common.py | 23 ++++++++++++--------- rethinkdb/tests/compose/docker-compose.yaml | 8 +++++-- rethinkdb/tests/conftest.py | 12 +++-------- rethinkdb/tests/test_rethinkdb.py | 18 ++++++++-------- 6 files changed, 42 insertions(+), 36 deletions(-) create mode 100644 rethinkdb/tests/_types.py diff --git a/rethinkdb/tests/_types.py b/rethinkdb/tests/_types.py new file mode 100644 index 0000000000000..ea61bd73ab931 --- /dev/null +++ b/rethinkdb/tests/_types.py @@ -0,0 +1,6 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from typing import Literal + +ServerName = Literal['server0', 'server1', 'server2', 'proxy'] diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index 85e6e00b0b398..cc51b31e452b3 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -18,14 +18,13 @@ CLIENT_USER, COMPOSE_ENV_VARS, COMPOSE_FILE, - CONNECT_SERVER_PORT, DATABASE, HEROES_TABLE, HEROES_TABLE_CONFIG, HEROES_TABLE_DOCUMENTS, HEROES_TABLE_INDEX_FIELD, HOST, - PROXY_PORT, + SERVER_PORTS, ) logger = logging.getLogger(__name__) @@ -38,7 +37,7 @@ def setup_cluster(): """ logger.debug('setup_cluster') - with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: + with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: r.db_drop('test').run(conn) # Automatically created, but we don't use it and it would skew our metrics. # Cluster content. @@ -57,7 +56,7 @@ def setup_cluster(): # Simulate client activity. # NOTE: ensures that 'written_docs_*' and 'read_docs_*' metrics have non-zero values. - with r.connect(host=HOST, port=PROXY_PORT, user=CLIENT_USER) as conn: + with r.connect(host=HOST, port=SERVER_PORTS['proxy'], user=CLIENT_USER) as conn: response = r.db(DATABASE).table(HEROES_TABLE).insert(HEROES_TABLE_DOCUMENTS).run(conn) assert response['inserted'] == len(HEROES_TABLE_DOCUMENTS) @@ -108,10 +107,10 @@ def _server_reconnected(conn): with temporarily_stop_service(service, compose_file=COMPOSE_FILE): with EnvVars(COMPOSE_ENV_VARS): - with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: + with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: WaitFor(lambda: _server_disconnected(conn))() yield - with r.connect(host=HOST, port=CONNECT_SERVER_PORT) as conn: + with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: WaitFor(lambda: _server_reconnected(conn))() diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 006f490e77419..3c0e872b7b16a 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -2,9 +2,12 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import os +from typing import Dict, List, Set from datadog_checks.utils.common import get_docker_hostname +from ._types import ServerName + HERE = os.path.dirname(os.path.abspath(__file__)) ROOT = os.path.dirname(os.path.dirname(HERE)) @@ -23,13 +26,10 @@ 'server0': ['default', 'us'], 'server1': ['default', 'us', 'primary'], 'server2': ['default', 'eu'], -} -SERVERS = set(SERVER_TAGS) +} # type: Dict[ServerName, List[str]] +SERVERS = {'server0', 'server1', 'server2'} # type: Set[ServerName] -CONNECT_SERVER_NAME = 'server0' -CONNECT_SERVER_PORT = 28015 - -PROXY_PORT = 28018 +SERVER_PORTS = {'server0': 28015, 'server1': 28016, 'server2': 28017, 'proxy': 28018} # type: Dict[ServerName, int] AGENT_USER = 'datadog-agent' AGENT_PASSWORD = 'r3th1nK' @@ -43,8 +43,8 @@ 'replicas': {'primary': 1, 'eu': 1}, 'primary_replica_tag': 'primary', } -HEROES_TABLE_SERVERS = {'server1', 'server2'} -HEROES_TABLE_PRIMARY_REPLICA = 'server1' +HEROES_TABLE_SERVERS = {'server1', 'server2'} # type: Set[ServerName] +HEROES_TABLE_PRIMARY_REPLICA = 'server1' # type: ServerName HEROES_TABLE_REPLICAS_BY_SHARD = {0: HEROES_TABLE_SERVERS} HEROES_TABLE_DOCUMENTS = [ { @@ -158,8 +158,11 @@ # Docker Compose configuration. COMPOSE_FILE = os.path.join(HERE, 'compose', 'docker-compose.yaml') + COMPOSE_ENV_VARS = env_vars = { 'RETHINKDB_IMAGE': IMAGE, - 'RETHINKDB_CONNECT_SERVER_PORT': str(CONNECT_SERVER_PORT), - 'RETHINKDB_PROXY_PORT': str(PROXY_PORT), + 'RETHINKDB_PORT_SERVER0': str(SERVER_PORTS['server0']), + 'RETHINKDB_PORT_SERVER1': str(SERVER_PORTS['server1']), + 'RETHINKDB_PORT_SERVER2': str(SERVER_PORTS['server2']), + 'RETHINKDB_PORT_PROXY': str(SERVER_PORTS['proxy']), } diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml index fe4bcb086e7d7..f8075467ce800 100644 --- a/rethinkdb/tests/compose/docker-compose.yaml +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -9,7 +9,7 @@ services: container_name: rethinkdb-server0 command: rethinkdb --bind all --server-name server0 --server-tag us ports: - - ${RETHINKDB_CONNECT_SERVER_PORT}:28015 # Client driver port. + - ${RETHINKDB_PORT_SERVER0}:28015 # Client driver port. - 8080:8080 # Port for the web UI. Debugging only (not used by tests). rethinkdb-server1: @@ -21,6 +21,8 @@ services: - rethinkdb-server0 depends_on: - rethinkdb-server0 + ports: + - ${RETHINKDB_PORT_SERVER1}:2015 rethinkdb-server2: tty: true @@ -31,6 +33,8 @@ services: - rethinkdb-server0 depends_on: - rethinkdb-server0 + ports: + - ${RETHINKDB_PORT_SERVER2}:28015 rethinkdb-proxy0: tty: true @@ -42,4 +46,4 @@ services: depends_on: - rethinkdb-server0 ports: - - ${RETHINKDB_PROXY_PORT}:28015 + - ${RETHINKDB_PORT_PROXY}:28015 diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 4f2ce7b532875..2d7144fe3dae7 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -9,7 +9,7 @@ from datadog_checks.rethinkdb._types import Instance from .cluster import setup_cluster -from .common import AGENT_PASSWORD, AGENT_USER, COMPOSE_FILE, CONNECT_SERVER_PORT, HOST, IMAGE, PROXY_PORT +from .common import AGENT_PASSWORD, AGENT_USER, COMPOSE_ENV_VARS, COMPOSE_FILE, HOST, SERVER_PORTS E2E_METADATA = {'start_commands': ['pip install rethinkdb==2.4.4']} @@ -29,7 +29,7 @@ def instance(request): user, password = request.param return { 'host': HOST, - 'port': CONNECT_SERVER_PORT, + 'port': SERVER_PORTS['server0'], 'user': user, 'password': password, } @@ -38,12 +38,6 @@ def instance(request): @pytest.fixture(scope='session') def dd_environment(instance): # type: (Instance) -> Iterator - env_vars = { - 'RETHINKDB_IMAGE': IMAGE, - 'RETHINKDB_CONNECT_SERVER_PORT': str(CONNECT_SERVER_PORT), - 'RETHINKDB_PROXY_PORT': str(PROXY_PORT), - } - conditions = [setup_cluster] log_patterns = [ @@ -53,6 +47,6 @@ def dd_environment(instance): r'Connected to proxy.*', ] - with docker_run(COMPOSE_FILE, conditions=conditions, env_vars=env_vars, log_patterns=log_patterns): + with docker_run(COMPOSE_FILE, conditions=conditions, env_vars=COMPOSE_ENV_VARS, log_patterns=log_patterns): config = {'instances': [instance]} yield config, E2E_METADATA diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index b14d8412be816..d13f595f307dc 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -12,10 +12,10 @@ from datadog_checks.rethinkdb import RethinkDBCheck from datadog_checks.rethinkdb._types import Instance, Metric +from ._types import ServerName from .cluster import temporarily_disconnect_server from .common import ( CLUSTER_STATISTICS_METRICS, - CONNECT_SERVER_NAME, DATABASE, HEROES_TABLE, HEROES_TABLE_PRIMARY_REPLICA, @@ -45,7 +45,7 @@ def test_check(aggregator, instance): aggregator.assert_all_metrics_covered() - service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] + service_check_tags = ['server:server0'] aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) for service_check in TABLE_STATUS_SERVICE_CHECKS: @@ -57,7 +57,7 @@ def test_check(aggregator, instance): @pytest.mark.parametrize('server_with_data', list(HEROES_TABLE_SERVERS)) @pytest.mark.usefixtures('dd_environment') def test_check_with_disconnected_server(aggregator, instance, server_with_data): - # type: (AggregatorStub, Instance, str) -> None + # type: (AggregatorStub, Instance, ServerName) -> None """ Verify that the check still runs to completion and sends appropriate service checks if one of the servers that holds data is disconnected. @@ -73,7 +73,7 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): aggregator.assert_all_metrics_covered() - service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] + service_check_tags = ['server:server0'] aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) table_status_tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] @@ -93,7 +93,7 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): def _assert_metrics(aggregator, disconnected_servers=None): - # type: (AggregatorStub, Set[str]) -> None + # type: (AggregatorStub, Set[ServerName]) -> None if disconnected_servers is None: disconnected_servers = set() @@ -108,7 +108,7 @@ def _assert_metrics(aggregator, disconnected_servers=None): def _assert_config_totals_metrics(aggregator, disconnected_servers): - # type: (AggregatorStub, Set[str]) -> None + # type: (AggregatorStub, Set[ServerName]) -> None aggregator.assert_metric('rethinkdb.server.total', count=1, value=len(SERVERS) - len(disconnected_servers)) aggregator.assert_metric('rethinkdb.database.total', count=1, value=1) aggregator.assert_metric('rethinkdb.database.table.total', count=1, value=1, tags=['database:{}'.format(DATABASE)]) @@ -118,7 +118,7 @@ def _assert_config_totals_metrics(aggregator, disconnected_servers): def _assert_statistics_metrics(aggregator, disconnected_servers): - # type: (AggregatorStub, Set[str]) -> None + # type: (AggregatorStub, Set[ServerName]) -> None for metric in CLUSTER_STATISTICS_METRICS: aggregator.assert_metric(metric, count=1, tags=[]) @@ -164,7 +164,7 @@ def _assert_table_status_metrics(aggregator): def _assert_server_status_metrics(aggregator, disconnected_servers): - # type: (AggregatorStub, Set[str]) -> None + # type: (AggregatorStub, Set[ServerName]) -> None for metric in SERVER_STATUS_METRICS: for server in SERVERS: tags = ['server:{}'.format(server)] @@ -205,7 +205,7 @@ def collect_and_fail(): with pytest.raises(Failure): check.check(instance) - service_check_tags = ['server:{}'.format(CONNECT_SERVER_NAME)] + service_check_tags = ['server:server0'] aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=service_check_tags) From c9a00852f4267cb732542008726f3689f40ade53 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 28 Feb 2020 19:00:46 +0100 Subject: [PATCH 070/147] Setup driver TLS on server1, prepare test --- rethinkdb/tests/common.py | 5 ++++ rethinkdb/tests/compose/docker-compose.yaml | 17 +++++++++-- rethinkdb/tests/conftest.py | 28 +++++------------ rethinkdb/tests/data/tls/README.md | 21 +++++++++++++ rethinkdb/tests/data/tls/client.pem | 21 +++++++++++++ rethinkdb/tests/data/tls/server.key | 27 +++++++++++++++++ rethinkdb/tests/data/tls/server.pem | 22 ++++++++++++++ rethinkdb/tests/test_rethinkdb.py | 33 +++++++++++++++++++++ 8 files changed, 152 insertions(+), 22 deletions(-) create mode 100644 rethinkdb/tests/data/tls/README.md create mode 100644 rethinkdb/tests/data/tls/client.pem create mode 100644 rethinkdb/tests/data/tls/server.key create mode 100644 rethinkdb/tests/data/tls/server.pem diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 3c0e872b7b16a..3810185350af8 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -159,10 +159,15 @@ COMPOSE_FILE = os.path.join(HERE, 'compose', 'docker-compose.yaml') +DRIVER_TLS_KEY = os.path.join(HERE, 'data', 'tls', 'server.key') +DRIVER_TLS_CERT = os.path.join(HERE, 'data', 'tls', 'server.pem') + COMPOSE_ENV_VARS = env_vars = { 'RETHINKDB_IMAGE': IMAGE, 'RETHINKDB_PORT_SERVER0': str(SERVER_PORTS['server0']), 'RETHINKDB_PORT_SERVER1': str(SERVER_PORTS['server1']), 'RETHINKDB_PORT_SERVER2': str(SERVER_PORTS['server2']), 'RETHINKDB_PORT_PROXY': str(SERVER_PORTS['proxy']), + 'RETHINKDB_DRIVER_TLS_KEY': DRIVER_TLS_KEY, + 'RETHINKDB_DRIVER_TLS_CERT': DRIVER_TLS_CERT, } diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml index f8075467ce800..9673ed6045621 100644 --- a/rethinkdb/tests/compose/docker-compose.yaml +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -16,13 +16,26 @@ services: tty: true image: ${RETHINKDB_IMAGE} container_name: rethinkdb-server1 - command: rethinkdb --join rethinkdb-server0:29015 --bind all --server-name server1 --server-tag us --server-tag primary + command: | + rethinkdb + --join rethinkdb-server0:29015 + --bind all + --server-name server1 + --server-tag us + --server-tag primary + --http-tls-key /opt/server.key + --http-tls-cert /opt/server.pem + --driver-tls-key /opt/server.key + --driver-tls-cert /opt/server.pem + volumes: + - ${RETHINKDB_DRIVER_TLS_KEY}:/opt/server.key + - ${RETHINKDB_DRIVER_TLS_CERT}:/opt/server.pem links: - rethinkdb-server0 depends_on: - rethinkdb-server0 ports: - - ${RETHINKDB_PORT_SERVER1}:2015 + - ${RETHINKDB_PORT_SERVER1}:28015 rethinkdb-server2: tty: true diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 2d7144fe3dae7..fdc8e89f502b1 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -1,7 +1,7 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Iterator +from typing import Iterator import pytest @@ -11,27 +11,15 @@ from .cluster import setup_cluster from .common import AGENT_PASSWORD, AGENT_USER, COMPOSE_ENV_VARS, COMPOSE_FILE, HOST, SERVER_PORTS -E2E_METADATA = {'start_commands': ['pip install rethinkdb==2.4.4']} - - -@pytest.fixture( - scope='session', - params=[ - # By default, only the admin user has access to system tables (which this integration fetches metrics from). - # We must make sure that users can setup the integration with either the admin user, or a specific user - # set up for the sole purpose of the Agent. - pytest.param((None, None), id='unauthenticated_admin'), - pytest.param((AGENT_USER, AGENT_PASSWORD), id='authenticated'), - ], -) -def instance(request): - # type: (Any) -> Instance - user, password = request.param + +@pytest.fixture(scope='session') +def instance(): + # type: () -> Instance return { 'host': HOST, 'port': SERVER_PORTS['server0'], - 'user': user, - 'password': password, + 'user': AGENT_USER, + 'password': AGENT_PASSWORD, } @@ -49,4 +37,4 @@ def dd_environment(instance): with docker_run(COMPOSE_FILE, conditions=conditions, env_vars=COMPOSE_ENV_VARS, log_patterns=log_patterns): config = {'instances': [instance]} - yield config, E2E_METADATA + yield config diff --git a/rethinkdb/tests/data/tls/README.md b/rethinkdb/tests/data/tls/README.md new file mode 100644 index 0000000000000..dde1c51089364 --- /dev/null +++ b/rethinkdb/tests/data/tls/README.md @@ -0,0 +1,21 @@ +# TLS certificates + +TLS certificates were generated using [`trustme-cli`](https://github.com/sethmlarson/trustme-cli): + +```bash +trustme-cli --common-name localhost +mv server.key server.pem client.pem rethinkdb/tests/data/tls/ +``` + +To connect to a server configured with these certificates, use: + +```python +import os +from rethinkdb import r + +ca_certs = os.path.join('rethinkdb', 'tests', 'data', 'tls', 'client.pem') +port = 28016 # TODO: adjust to the server you want to connect to. +conn = r.connect(port=port, ssl={'ca_certs': ca_certs}) +``` + +See also: https://rethinkdb.com/docs/security/#securing-the-driver-port diff --git a/rethinkdb/tests/data/tls/client.pem b/rethinkdb/tests/data/tls/client.pem new file mode 100644 index 0000000000000..9db7292eaa46c --- /dev/null +++ b/rethinkdb/tests/data/tls/client.pem @@ -0,0 +1,21 @@ +-----BEGIN CERTIFICATE----- +MIIDfzCCAmegAwIBAgIUQSxcZpbgiAtbD0MiJ9/W4I1bANAwDQYJKoZIhvcNAQEL +BQAwQDEXMBUGA1UECgwOdHJ1c3RtZSB2MC42LjAxJTAjBgNVBAsMHFRlc3Rpbmcg +Q0EgI1huaHFmcFdJWWp5Rzc2MmwwHhcNMDAwMTAxMDAwMDAwWhcNMzgwMTAxMDAw +MDAwWjBAMRcwFQYDVQQKDA50cnVzdG1lIHYwLjYuMDElMCMGA1UECwwcVGVzdGlu +ZyBDQSAjWG5ocWZwV0lZanlHNzYybDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCC +AQoCggEBAKKxnEEXL749nMdsvvBWIvUvwBx7gpKopxFOU1LhwlCATDESwCP6GPDw +/cgYIxbINDATLk8JMxduWKkC4asfVutUCdlZoNG7iCXU2CxeZPmxvh0Or6m0yCO7 +XYDeihXSsFvmvZw2JYMdzCFZ7ltWcqtzSvFuKZQrBoEPW1o2+fiRwNB2AFWR0Ez1 +1Y/d7///Od6CNoanT9fRY6NFO1k4NH+Netj2igoJX1aRnOxmzVEPwYYkPk/11cJt +6nteYblPbQqluBD/8bEfDB9rUbWi4TFG7ilqGgRmestaIV467I0iNc42qmVVz870 +Kuanpw4i9O4CmfSy/NDtHWloNdaXuR0CAwEAAaNxMG8wHQYDVR0OBBYEFI8n9Qo4 +cCsyQyihaEN6dOv5lKLfMBIGA1UdEwEB/wQIMAYBAf8CAQkwDgYDVR0PAQH/BAQD +AgEGMCoGA1UdJQEB/wQgMB4GCCsGAQUFBwMCBggrBgEFBQcDAQYIKwYBBQUHAwMw +DQYJKoZIhvcNAQELBQADggEBAFJ2dhOSK+8vuUGWjL+7uSLwzb79EpTga9xa3rg0 +wfA18LZCapRSacvfG2lhnTgqfKp8JsecHMZfhqXFoqhMXZ3Bo9YTKmT7UlasPnbb +09/YcYXCUanBqi0BA+2Onl4hi5PsWdvqF9/AypQrjY7XsrbuAfnihJvmtbqCIMv7 +47f+qvFBKHGsXwcpCICB6woLRSN9JMQ2t0eJ9gQANG2smZQ/aHKzfhdn+qUu1f5u +3/QpZ3huvKcdw04McCDtOeqbx3RbQ4aczd1ZkUC396fVYqQw0Dtg/AEKMz0fZLvH +2ZZbuVCpKA8i652anYhVRsZG8xWYnP/VhTfBlT9AbiyT9AI= +-----END CERTIFICATE----- diff --git a/rethinkdb/tests/data/tls/server.key b/rethinkdb/tests/data/tls/server.key new file mode 100644 index 0000000000000..c12d805cd61e0 --- /dev/null +++ b/rethinkdb/tests/data/tls/server.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEogIBAAKCAQEAy4ZuvuprIyUAm8esD6QM2TKf0dwlpgvNRL+O7QhY9+XVq4+S +ww7pIVcVjqzoVHRs58HyD5ekHlT6Uc+d9Ql+KC5JlfAT1M7klslcBQfFCzYMCB7N +JDc9cjTzls1hUtrpekRqonn6FSreZHD8uHGrNPicxxLp1KsGmWuSQP+a9Lkfq8k1 +/O3hZb+4ypVbigqxmLU9hrJoHpvVIqRUdhiPYjtovfHk94cJThWi/chtdJxxChCS +fXuCwmMsbCz24RBsZStuCLqIkLtycc4kAuahOFUZDMNAejIG01bRtmvCHjYsQq0B +92DB0H6TUGIa9wt5kcSSS2gf8XAEtQ3pC8MsQwIDAQABAoIBAHSvoHLo54AAyPaH +ZgiZn5wvQB4Lv7IRdiCCSylu7cNDDFwONrCKrfKlgQCJ9bHmBkJhmayC3l9djR62 +j/Na+++BikLioBwUek5RJ+bia4Bf/knxjt/CpAIEhdvh6HieddnZu+FnWZAlQdgI +R3xf/y6hkxE3sMRzQZdYa0PEyEyhiLgl7iFePaHOQ24waSU2twGq4rlR2i3NVYA6 +kAMiS7CWNP7EGL/v47qZLEqDIbK75mHCFCspz4oTvS8gTtojd0YOg1JNlwft4OUU +wz+9StLgY2wOH6OFfhakig1y2hdt0JnqyEyvCGTVEzaHYIAoDk/bIHZAJZ35cqzQ ++ht6W5ECgYEA7EYnkaxDYuDWb+lhbCi3ZV9+srse1N1rArUtixZk9CTcoCTWpnmF +hjtamMwUcgI37XrqVSiBxOBLNN1//0pXK7Kj2VSN4v4zU/2Vm0QSyhH1CdFnDoAs +Dlqvn2hQtxYBOWoa+2J2rjwAdA8/zRVWTqlf5JpAnlb8h+OYqglxiycCgYEA3IRX +S0jh/Od1WynOPtap5G2ifPoC7xXqnBMeyQ//mHC3I+p2ZQqLcO7cE/BvstwVAnbu +mnJTudm7XeLSiQ/HX6xtKdb03VlEZ07cBaDb+LlsbiaRvFCSHwiA0df1TQzeImP1 +oi+JxWR2i6RRHh/JNXuFlYzZbXnq5ESy/rBrt4UCgYAHGFQ326RqY7YMxkVWqiJX +uPZlB5l1avC7tBUaCCIf8rttU63ecefAkCe4TuiOQ+LKDtbal8zNzK51d/FQCTt0 +5HhnHlLkbh28d/D2KKUnzEI/eTpPBSpNhAuiDc/er5p5zuXghqRptwOMeKjBz9I3 +qn5mrvshxhrvObZr5Ly0IQKBgAfHg2zcfVx2reUxbF07JXMxivHtv/y/QS+QXR2q +utGd6FgUBWk7HgA17P2nFcmxiew0VeaM8fc0fy+ouNStAdOWOaOoOas1YvxiA9EP +A7OalwCry6hhD0aY/jJo8/Zr5cpAzIGUO8AC80up2FK/vFFush+8gE8kZez3n3js +sGAhAoGAPHYw2QpAkF37xJJEBPelGcPvbTOpgkQt6EVsjEnWMU+ljfGr2dqOqBHn +++yH9b5yZzYQhh0o6rB6cBe+tUJ/oNxckT2uu5ovXtAmIj/NLNNU+HAjlTUPWGTW +p0VLlKYwg76yy2N4dbJy9I3a0I9gKXZH1s4K2i+FGJQhWX5/e/M= +-----END RSA PRIVATE KEY----- diff --git a/rethinkdb/tests/data/tls/server.pem b/rethinkdb/tests/data/tls/server.pem new file mode 100644 index 0000000000000..aee254d2c5d38 --- /dev/null +++ b/rethinkdb/tests/data/tls/server.pem @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDpjCCAo6gAwIBAgIUYJxtZnXFrD4YsEygUZDBe0DTzmIwDQYJKoZIhvcNAQEL +BQAwQDEXMBUGA1UECgwOdHJ1c3RtZSB2MC42LjAxJTAjBgNVBAsMHFRlc3Rpbmcg +Q0EgI1huaHFmcFdJWWp5Rzc2MmwwHhcNMDAwMTAxMDAwMDAwWhcNMzgwMTAxMDAw +MDAwWjBWMRcwFQYDVQQKDA50cnVzdG1lIHYwLjYuMDEnMCUGA1UECwweVGVzdGlu +ZyBjZXJ0ICNNRkF3SVQ3RU5Zd3NnVTZ5MRIwEAYDVQQDDAlsb2NhbGhvc3QwggEi +MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDLhm6+6msjJQCbx6wPpAzZMp/R +3CWmC81Ev47tCFj35dWrj5LDDukhVxWOrOhUdGznwfIPl6QeVPpRz531CX4oLkmV +8BPUzuSWyVwFB8ULNgwIHs0kNz1yNPOWzWFS2ul6RGqiefoVKt5kcPy4cas0+JzH +EunUqwaZa5JA/5r0uR+ryTX87eFlv7jKlVuKCrGYtT2Gsmgem9UipFR2GI9iO2i9 +8eT3hwlOFaL9yG10nHEKEJJ9e4LCYyxsLPbhEGxlK24IuoiQu3JxziQC5qE4VRkM +w0B6MgbTVtG2a8IeNixCrQH3YMHQfpNQYhr3C3mRxJJLaB/xcAS1DekLwyxDAgMB +AAGjgYEwfzAdBgNVHQ4EFgQUIo0NrhsVWeSddfnvrXACJLVGVxEwDAYDVR0TAQH/ +BAIwADAfBgNVHSMEGDAWgBSPJ/UKOHArMkMooWhDenTr+ZSi3zAvBgNVHREBAf8E +JTAjgglsb2NhbGhvc3SHBH8AAAGHEAAAAAAAAAAAAAAAAAAAAAEwDQYJKoZIhvcN +AQELBQADggEBACdg5dhTitGlWEiV2qTDApUBhGOp0ZyLA6Gq2YCXdSc+nwgkq8j6 +mAVeQXu14zri+aXLUZJK7wsbCVErIb7RWbDAoiQYw9gKd8soNE5vVCKDueCXUN24 +5oPosNBOZI4CkSS/dQrOJPXGvmyn40e46fZI5AWwKzQ49+nMQpiI6hD4H01fW1pa +sVC2pJ8G+TWxG6CmGsGoBNlsoilzGwC6u4zZlue1CWdcet7mS4TMCi5L9TVQXh66 +CS5FQX/BXspjHnsOvCbZ/U+zLq9pi24FamG/t574Ym6mbgOwt2/lW3jZpLd6LZfk +5T+9KTr2DcPcb5VnwYrXVunv6s7eBrNPP+8= +-----END CERTIFICATE----- diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index d13f595f307dc..9813ed62b87fe 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -23,6 +23,7 @@ HEROES_TABLE_SERVERS, REPLICA_STATISTICS_METRICS, RETHINKDB_VERSION, + SERVER_PORTS, SERVER_STATISTICS_METRICS, SERVER_STATUS_METRICS, SERVER_TAGS, @@ -53,6 +54,38 @@ def test_check(aggregator, instance): aggregator.assert_service_check(service_check, RethinkDBCheck.OK, count=1, tags=tags) +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_check_as_admin(aggregator, instance): + # type: (AggregatorStub, Instance) -> None + instance = instance.copy() + instance.pop('user') + instance.pop('password') + + check = RethinkDBCheck('rethinkdb', {}, [instance]) + check.check(instance) + + _assert_metrics(aggregator) + aggregator.assert_all_metrics_covered() + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1) + + +@pytest.mark.xfail(reason="TODO") +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_check_connect_to_server_with_tls(aggregator, instance): + # type: (AggregatorStub, Instance) -> None + instance = instance.copy() + instance['port'] = SERVER_PORTS['server1'] + + check = RethinkDBCheck('rethinkdb', {}, [instance]) + check.check(instance) + + _assert_metrics(aggregator) + aggregator.assert_all_metrics_covered() + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1) + + @pytest.mark.integration @pytest.mark.parametrize('server_with_data', list(HEROES_TABLE_SERVERS)) @pytest.mark.usefixtures('dd_environment') From 9912ed16fb0e30fe78d7546bcbfdcfb699de7ea3 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 2 Mar 2020 10:30:03 +0100 Subject: [PATCH 071/147] Implement TLS client cert support --- rethinkdb/datadog_checks/rethinkdb/_config.py | 6 ++++- .../datadog_checks/rethinkdb/_queries.py | 13 ++++++++--- rethinkdb/datadog_checks/rethinkdb/_types.py | 4 +++- .../datadog_checks/rethinkdb/_version.py | 3 ++- rethinkdb/tests/common.py | 22 +++++++++++++------ rethinkdb/tests/compose/docker-compose.yaml | 4 ++-- rethinkdb/tests/test_rethinkdb.py | 6 +++-- 7 files changed, 41 insertions(+), 17 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index 278de278d3850..33da5abad7f5c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -37,6 +37,7 @@ def __init__(self, instance): port = instance.get('port', 28015) user = instance.get('user') password = instance.get('password') + tls_ca_cert = instance.get('tls_ca_cert') if not isinstance(host, str): raise ConfigurationError('host must be a string (got {!r})'.format(type(host))) @@ -51,6 +52,8 @@ def __init__(self, instance): self._port = port # type: int self._user = user # type: Optional[str] self._password = password # type: Optional[str] + self._tls_ca_cert = tls_ca_cert # type: Optional[str] + self._query_engine = QueryEngine(r=rethinkdb.r) self._collect_funcs = [ @@ -81,8 +84,9 @@ def connect(self): port = self._port user = self._user password = self._password + tls_ca_cert = self._tls_ca_cert - return self._query_engine.connect(host, port, user=user, password=password) + return self._query_engine.connect(host, port, user=user, password=password, tls_ca_cert=tls_ca_cert) def collect_metrics(self, conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 660515a35a1ea..524a9153ee759 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -36,9 +36,16 @@ def __init__(self, r): # type: (rethinkdb.RethinkDB) -> None self._r = r - def connect(self, host='localhost', port=28015, user=None, password=None, **kwargs): - # type: (str, int, str, str, **Any) -> rethinkdb.net.Connection - return self._r.connect(host, port, user=user, password=password, **kwargs) + def connect(self, host='localhost', port=28015, user=None, password=None, tls_ca_cert=None, **kwargs): + # type: (str, int, str, str, str, **Any) -> rethinkdb.net.Connection + """ + Establish a connection to a RethinkDB server. + """ + ssl = ( + {'ca_certs': tls_ca_cert} if tls_ca_cert is not None else None + ) # See: https://rethinkdb.com/docs/security/#telling-rethinkdb-to-use-your-certificate + + return self._r.connect(host, port, user=user, password=password, ssl=ssl, **kwargs) def get_connected_server_version_string(self, conn): # type: (rethinkdb.net.Connection) -> str diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 9e380dc2343f8..888d7431eab38 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -13,7 +13,9 @@ {'type': Literal['gauge', 'monotonic_count', 'service_check'], 'name': str, 'value': float, 'tags': List[str]}, ) -Instance = TypedDict('Instance', {'host': str, 'port': int, 'user': str, 'password': str}, total=False) +Instance = TypedDict( + 'Instance', {'host': str, 'port': int, 'user': str, 'password': str, 'tls_ca_cert': str}, total=False +) # Configuration documents. diff --git a/rethinkdb/datadog_checks/rethinkdb/_version.py b/rethinkdb/datadog_checks/rethinkdb/_version.py index ea2d0d82ff00d..6a64b541282e5 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_version.py +++ b/rethinkdb/datadog_checks/rethinkdb/_version.py @@ -13,7 +13,8 @@ def parse_version(rethinkdb_version_string): Given a RethinkDB version string, extract the SemVer version. https://github.com/rethinkdb/rethinkdb/blob/95cfed8a62f08e3198ac25417c9b6900be8b6877/src/utils.hpp#L117 - Example: + Example + ------- >>> parse_version('rethinkdb 2.4.0~0bionic (CLANG 6.0.0 (tags/RELEASE_600/final))') '2.4.0~0bionic' """ diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 3810185350af8..f6f369618c9e0 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -19,8 +19,8 @@ HOST = get_docker_hostname() -# Cluster configuration. -# NOTE: server information used below is tightly coupled to the Docker Compose setup. +# Servers. +# NOTE: server information is tightly coupled to the Docker Compose setup. SERVER_TAGS = { 'server0': ['default', 'us'], @@ -31,10 +31,21 @@ SERVER_PORTS = {'server0': 28015, 'server1': 28016, 'server2': 28017, 'proxy': 28018} # type: Dict[ServerName, int] +# Users. + AGENT_USER = 'datadog-agent' AGENT_PASSWORD = 'r3th1nK' CLIENT_USER = 'doggo' +# TLS. + +TLS_SERVER = 'server1' # type: ServerName +TLS_DRIVER_KEY = os.path.join(HERE, 'data', 'tls', 'server.key') +TLS_DRIVER_CERT = os.path.join(HERE, 'data', 'tls', 'server.pem') +TLS_CLIENT_CERT = os.path.join(HERE, 'data', 'tls', 'client.pem') + +# Database content. + DATABASE = 'doghouse' HEROES_TABLE = 'heroes' @@ -159,15 +170,12 @@ COMPOSE_FILE = os.path.join(HERE, 'compose', 'docker-compose.yaml') -DRIVER_TLS_KEY = os.path.join(HERE, 'data', 'tls', 'server.key') -DRIVER_TLS_CERT = os.path.join(HERE, 'data', 'tls', 'server.pem') - COMPOSE_ENV_VARS = env_vars = { 'RETHINKDB_IMAGE': IMAGE, 'RETHINKDB_PORT_SERVER0': str(SERVER_PORTS['server0']), 'RETHINKDB_PORT_SERVER1': str(SERVER_PORTS['server1']), 'RETHINKDB_PORT_SERVER2': str(SERVER_PORTS['server2']), 'RETHINKDB_PORT_PROXY': str(SERVER_PORTS['proxy']), - 'RETHINKDB_DRIVER_TLS_KEY': DRIVER_TLS_KEY, - 'RETHINKDB_DRIVER_TLS_CERT': DRIVER_TLS_CERT, + 'RETHINKDB_TLS_DRIVER_KEY': TLS_DRIVER_KEY, + 'RETHINKDB_TLS_DRIVER_CERT': TLS_DRIVER_CERT, } diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml index 9673ed6045621..73c2fd595f2ab 100644 --- a/rethinkdb/tests/compose/docker-compose.yaml +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -28,8 +28,8 @@ services: --driver-tls-key /opt/server.key --driver-tls-cert /opt/server.pem volumes: - - ${RETHINKDB_DRIVER_TLS_KEY}:/opt/server.key - - ${RETHINKDB_DRIVER_TLS_CERT}:/opt/server.pem + - ${RETHINKDB_TLS_DRIVER_KEY}:/opt/server.key + - ${RETHINKDB_TLS_DRIVER_CERT}:/opt/server.pem links: - rethinkdb-server0 depends_on: diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 9813ed62b87fe..e1d6f9e784384 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -27,6 +27,8 @@ SERVER_STATISTICS_METRICS, SERVER_STATUS_METRICS, SERVER_TAGS, + TLS_SERVER, + TLS_CLIENT_CERT, SERVERS, TABLE_STATISTICS_METRICS, TABLE_STATUS_METRICS, @@ -70,13 +72,13 @@ def test_check_as_admin(aggregator, instance): aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1) -@pytest.mark.xfail(reason="TODO") @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_check_connect_to_server_with_tls(aggregator, instance): # type: (AggregatorStub, Instance) -> None instance = instance.copy() - instance['port'] = SERVER_PORTS['server1'] + instance['port'] = SERVER_PORTS[TLS_SERVER] + instance['tls_ca_cert'] = TLS_CLIENT_CERT check = RethinkDBCheck('rethinkdb', {}, [instance]) check.check(instance) From b900c48ef55410e6b659d45fc8d7d355ae4bc48a Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 2 Mar 2020 12:17:08 +0100 Subject: [PATCH 072/147] Introduce Connection interface --- rethinkdb/datadog_checks/rethinkdb/_config.py | 22 ++++--- .../datadog_checks/rethinkdb/_connections.py | 63 +++++++++++++++++++ .../datadog_checks/rethinkdb/_exceptions.py | 11 ++++ .../rethinkdb/_metrics/config.py | 7 +-- .../rethinkdb/_metrics/current_issues.py | 7 +-- .../rethinkdb/_metrics/statistics.py | 13 ++-- .../rethinkdb/_metrics/statuses.py | 9 +-- .../rethinkdb/_metrics/system_jobs.py | 7 +-- .../datadog_checks/rethinkdb/_queries.py | 62 ++++++++---------- rethinkdb/datadog_checks/rethinkdb/_types.py | 3 - .../datadog_checks/rethinkdb/rethinkdb.py | 10 +-- rethinkdb/tests/cluster.py | 53 ++++++++-------- rethinkdb/tests/test_rethinkdb.py | 8 +-- rethinkdb/tests/unit/test_metrics.py | 8 +-- rethinkdb/tests/unit/utils.py | 54 ++++------------ 15 files changed, 180 insertions(+), 157 deletions(-) create mode 100644 rethinkdb/datadog_checks/rethinkdb/_connections.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/_exceptions.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index 33da5abad7f5c..121825a135d62 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -9,6 +9,8 @@ from datadog_checks.base import ConfigurationError +from ._connections import Connection, RethinkDBConnection +from ._exceptions import CouldNotConnect from ._metrics.config import collect_config_totals from ._metrics.current_issues import collect_current_issues from ._metrics.statistics import ( @@ -54,7 +56,8 @@ def __init__(self, instance): self._password = password # type: Optional[str] self._tls_ca_cert = tls_ca_cert # type: Optional[str] - self._query_engine = QueryEngine(r=rethinkdb.r) + self._r = rethinkdb.r + self._query_engine = QueryEngine(r=self._r) self._collect_funcs = [ collect_config_totals, @@ -66,7 +69,7 @@ def __init__(self, instance): collect_table_status, collect_system_jobs, collect_current_issues, - ] # type: List[Callable[[QueryEngine, rethinkdb.net.Connection], Iterator[Metric]]] + ] # type: List[Callable[[QueryEngine, Connection], Iterator[Metric]]] @property def host(self): @@ -79,23 +82,28 @@ def port(self): return self._port def connect(self): - # type: () -> rethinkdb.net.Connection + # type: () -> Connection host = self._host port = self._port user = self._user password = self._password - tls_ca_cert = self._tls_ca_cert + ssl = {'ca_certs': self._tls_ca_cert} if self._tls_ca_cert is not None else None - return self._query_engine.connect(host, port, user=user, password=password, tls_ca_cert=tls_ca_cert) + try: + conn = self._r.connect(host=host, port=port, user=user, password=password, ssl=ssl) + except rethinkdb.errors.ReqlDriverError as exc: + raise CouldNotConnect(exc) + + return RethinkDBConnection(conn) def collect_metrics(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] + # type: (Connection) -> Iterator[Metric] for collect in self._collect_funcs: for metric in collect(self._query_engine, conn): yield metric def get_connected_server_version(self, conn): - # type: (rethinkdb.net.Connection) -> str + # type: (Connection) -> str """ Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. diff --git a/rethinkdb/datadog_checks/rethinkdb/_connections.py b/rethinkdb/datadog_checks/rethinkdb/_connections.py new file mode 100644 index 0000000000000..f2519cac81309 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/_connections.py @@ -0,0 +1,63 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +""" +RethinkDB connection interface and implementations. +""" +from __future__ import absolute_import + +from typing import Any, TypedDict + +import rethinkdb + +# See: https://rethinkdb.com/api/python/server +ConnectionServer = TypedDict('ConnectionServer', {'id': str, 'name': str, 'proxy': bool}) + + +class Connection: + """ + Base class and interface for connection objects. + """ + + def __enter__(self): + # type: () -> Connection + return self + + def __exit__(self, *args): + # type: (*Any) -> None + pass + + def server(self): + # type: () -> ConnectionServer + raise NotImplementedError + + def run(self, query): + # type: (rethinkdb.RqlQuery) -> Any + raise NotImplementedError + + +class RethinkDBConnection(Connection): + """ + A connection backed by an actual RethinkDB connection. + """ + + def __init__(self, conn): + # type: (rethinkdb.net.Connection) -> None + self._conn = conn + + def __enter__(self): + # type: () -> RethinkDBConnection + self._conn.__enter__() + return self + + def __exit__(self, *args): + # type: (*Any) -> Any + return self._conn.__exit__(*args) + + def server(self): + # type: () -> ConnectionServer + return self._conn.server() + + def run(self, query): + # type: (rethinkdb.RqlQuery) -> Any + return query.run(self._conn) diff --git a/rethinkdb/datadog_checks/rethinkdb/_exceptions.py b/rethinkdb/datadog_checks/rethinkdb/_exceptions.py new file mode 100644 index 0000000000000..8a48351725fcc --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/_exceptions.py @@ -0,0 +1,11 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + + +class RethinkDBError(Exception): + """Base class for exceptions raised by the RethinkDB check.""" + + +class CouldNotConnect(RethinkDBError): + """Failed to connect to a RethinkDB server.""" diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/config.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/config.py index 033cac77a7807..7bb1662a44abf 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/config.py @@ -1,13 +1,10 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from __future__ import absolute_import - import logging from typing import Iterator -import rethinkdb - +from .._connections import Connection from .._queries import QueryEngine from .._types import Metric @@ -15,7 +12,7 @@ def collect_config_totals(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect aggregated metrics about cluster configuration. diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py index 4671ebaaf2925..e4f7133baf214 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py @@ -1,18 +1,15 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from __future__ import absolute_import - from typing import Iterator -import rethinkdb - +from .._connections import Connection from .._queries import QueryEngine from .._types import Metric def collect_current_issues(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about current system issues. diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py index cf6f089d8c07c..af7dd0b1b8448 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py @@ -6,13 +6,10 @@ See: https://rethinkdb.com/docs/system-stats/ """ -from __future__ import absolute_import - import logging from typing import Iterator -import rethinkdb - +from .._connections import Connection from .._queries import QueryEngine from .._types import Metric @@ -20,7 +17,7 @@ def collect_cluster_statistics(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] + # type: (QueryEngine, Connection) -> Iterator[Metric] logger.debug('collect_cluster_statistics') stats = engine.query_cluster_stats(conn) @@ -51,7 +48,7 @@ def collect_cluster_statistics(engine, conn): def collect_server_statistics(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] + # type: (QueryEngine, Connection) -> Iterator[Metric] logger.debug('collect_server_statistics') for server, stats in engine.query_servers_with_stats(conn): @@ -121,7 +118,7 @@ def collect_server_statistics(engine, conn): def collect_table_statistics(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] + # type: (QueryEngine, Connection) -> Iterator[Metric] logger.debug('collect_table_statistics') for table, stats in engine.query_tables_with_stats(conn): @@ -149,7 +146,7 @@ def collect_table_statistics(engine, conn): def collect_replica_statistics(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] + # type: (QueryEngine, Connection) -> Iterator[Metric] logger.debug('collect_replica_statistics') for table, server, replica, stats in engine.query_replicas_with_stats(conn): diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py index 0a3f6e535656b..acc584d18522f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py @@ -1,17 +1,14 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from __future__ import absolute_import - import datetime as dt import logging import time from typing import Iterator -import rethinkdb - from datadog_checks.base import AgentCheck +from .._connections import Connection from .._queries import QueryEngine from .._types import Metric @@ -19,7 +16,7 @@ def collect_table_status(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about table statuses. @@ -89,7 +86,7 @@ def collect_table_status(engine, conn): def collect_server_status(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about server statuses. diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/system_jobs.py index 65ef0744322f7..c5ca55d2f898b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/system_jobs.py @@ -1,13 +1,10 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from __future__ import absolute_import - import logging from typing import Iterator -import rethinkdb - +from .._connections import Connection from .._queries import QueryEngine from .._types import Metric @@ -15,7 +12,7 @@ def collect_system_jobs(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about system jobs. diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 524a9153ee759..0e1a588bf614f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -14,10 +14,10 @@ import rethinkdb +from ._connections import Connection, ConnectionServer from ._types import ( ClusterStats, ConfigTotals, - ConnectionServer, Job, JoinRow, ReplicaStats, @@ -32,34 +32,26 @@ class QueryEngine: - def __init__(self, r): + def __init__(self, r=None): # type: (rethinkdb.RethinkDB) -> None - self._r = r + if r is None: + r = rethinkdb.r - def connect(self, host='localhost', port=28015, user=None, password=None, tls_ca_cert=None, **kwargs): - # type: (str, int, str, str, str, **Any) -> rethinkdb.net.Connection - """ - Establish a connection to a RethinkDB server. - """ - ssl = ( - {'ca_certs': tls_ca_cert} if tls_ca_cert is not None else None - ) # See: https://rethinkdb.com/docs/security/#telling-rethinkdb-to-use-your-certificate - - return self._r.connect(host, port, user=user, password=password, ssl=ssl, **kwargs) + self._r = r def get_connected_server_version_string(self, conn): - # type: (rethinkdb.net.Connection) -> str + # type: (Connection) -> str """ Return the raw string of the RethinkDB version used by the server at the other end of the connection. """ r = self._r # See: https://rethinkdb.com/docs/system-tables/#server_status server = conn.server() # type: ConnectionServer - server_status = r.db('rethinkdb').table('server_status').get(server['id']).run(conn) # type: ServerStatus + server_status = conn.run(r.db('rethinkdb').table('server_status').get(server['id'])) # type: ServerStatus return server_status['process']['version'] def query_config_totals(self, conn): - # type: (rethinkdb.net.Connection) -> ConfigTotals + # type: (Connection) -> ConfigTotals r = self._r table_config = r.db('rethinkdb').table('table_config') @@ -69,17 +61,15 @@ def query_config_totals(self, conn): # Need to `.run()` these separately because ReQL does not support putting grouped data in raw expressions yet. # See: https://github.com/rethinkdb/rethinkdb/issues/2067 - tables_per_database = table_config.group('db').count().run(conn) # type: Mapping[str, int] + tables_per_database = conn.run(table_config.group('db').count()) # type: Mapping[str, int] - secondary_indexes_per_table = ( + secondary_indexes_per_table = conn.run( # NOTE: this is an example of a map-reduce query. # See: https://rethinkdb.com/docs/map-reduce/#a-more-complex-example table_config.pluck('name', 'indexes') .concat_map(lambda row: row['indexes'].map(lambda _: {'table': row['name']})) .group('table') .count() - ).run( - conn ) # type: Mapping[str, int] totals = { @@ -87,19 +77,19 @@ def query_config_totals(self, conn): 'databases': db_config.count(), 'tables_per_database': tables_per_database, 'secondary_indexes_per_table': secondary_indexes_per_table, - } # type: ConfigTotals # Enforce keys to match. (Values are `Any`, so ignored by type checker.) + } # type: dict - return r.expr(totals).run(conn) + return conn.run(r.expr(totals)) def query_cluster_stats(self, conn): - # type: (rethinkdb.net.Connection) -> ClusterStats + # type: (Connection) -> ClusterStats """ Retrieve statistics about the cluster. """ - return self._r.db('rethinkdb').table('stats').get(['cluster']).run(conn) + return conn.run(self._r.db('rethinkdb').table('stats').get(['cluster'])) def query_servers_with_stats(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Server, ServerStats]] + # type: (Connection) -> Iterator[Tuple[Server, ServerStats]] """ Retrieve each server in the cluster along with its statistics. """ @@ -112,7 +102,7 @@ def query_servers_with_stats(self, conn): stats = r.db('rethinkdb').table('stats') server_config = r.db('rethinkdb').table('server_config') - rows = stats.filter(is_server_stats_row).eq_join(server_id, server_config).run(conn) # type: Iterator[JoinRow] + rows = conn.run(stats.filter(is_server_stats_row).eq_join(server_id, server_config)) # type: Iterator[JoinRow] for row in rows: server_stats = row['left'] # type: ServerStats @@ -120,7 +110,7 @@ def query_servers_with_stats(self, conn): yield server, server_stats def query_tables_with_stats(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, TableStats]] + # type: (Connection) -> Iterator[Tuple[Table, TableStats]] """ Retrieve each table in the cluster along with its statistics. """ @@ -133,7 +123,7 @@ def query_tables_with_stats(self, conn): stats = r.db('rethinkdb').table('stats') table_config = r.db('rethinkdb').table('table_config') - rows = stats.filter(is_table_stats_row).eq_join(table_id, table_config).run(conn) # type: Iterator[JoinRow] + rows = conn.run(stats.filter(is_table_stats_row).eq_join(table_id, table_config)) # type: Iterator[JoinRow] for row in rows: table_stats = row['left'] # type: TableStats @@ -141,7 +131,7 @@ def query_tables_with_stats(self, conn): yield table, table_stats def query_replicas_with_stats(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, Server, ShardReplica, ReplicaStats]] + # type: (Connection) -> Iterator[Tuple[Table, Server, ShardReplica, ReplicaStats]] """ Retrieve each replica (table/server pair) in the cluster along with its statistics. """ @@ -197,7 +187,7 @@ def query_replicas_with_stats(self, conn): ) ) - rows = query.run(conn) # type: Iterator[Mapping[str, Any]] + rows = conn.run(query) # type: Iterator[Mapping[str, Any]] for row in rows: table = row['table'] # type: Table @@ -207,22 +197,22 @@ def query_replicas_with_stats(self, conn): yield table, server, replica, replica_stats def query_table_status(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[TableStatus] + # type: (Connection) -> Iterator[TableStatus] """ Retrieve the status of each table in the cluster. """ - return self._r.db('rethinkdb').table('table_status').run(conn) + return conn.run(self._r.db('rethinkdb').table('table_status')) def query_server_status(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[ServerStatus] + # type: (Connection) -> Iterator[ServerStatus] """ Retrieve the status of each server in the cluster. """ - return self._r.db('rethinkdb').table('server_status').run(conn) + return conn.run(self._r.db('rethinkdb').table('server_status')) def query_system_jobs(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[Job] + # type: (Connection) -> Iterator[Job] """ Retrieve all the currently running system jobs. """ - return self._r.db('rethinkdb').table('jobs').run(conn) + return conn.run(self._r.db('rethinkdb').table('jobs')) diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 888d7431eab38..1bbe78de26aa1 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -172,6 +172,3 @@ # NOTE: Ideally 'left' and 'right' would be generics here, but this isn't supported by 'TypedDict' yet. # See: https://github.com/python/mypy/issues/3863 JoinRow = TypedDict('JoinRow', {'left': Any, 'right': Any}) - -# See: https://rethinkdb.com/api/python/server -ConnectionServer = TypedDict('ConnectionServer', {'id': str, 'name': str, 'proxy': bool}) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 2c362dc4b8a49..a00c143979abd 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -8,12 +8,12 @@ from contextlib import contextmanager from typing import Any, Callable, Iterator -import rethinkdb - from datadog_checks.base import AgentCheck from ._config import Config -from ._types import ConnectionServer, Instance, Metric +from ._connections import Connection, ConnectionServer +from ._exceptions import CouldNotConnect +from ._types import Instance, Metric SERVICE_CHECK_CONNECT = 'rethinkdb.can_connect' @@ -32,7 +32,7 @@ def __init__(self, *args, **kwargs): @contextmanager def connect_submitting_service_checks(self, config): - # type: (Config) -> Iterator[rethinkdb.net.Connection] + # type: (Config) -> Iterator[Connection] try: with config.connect() as conn: server = conn.server() # type: ConnectionServer @@ -49,7 +49,7 @@ def connect_submitting_service_checks(self, config): else: self.service_check(SERVICE_CHECK_CONNECT, self.OK, tags=tags) - except rethinkdb.errors.ReqlDriverError as exc: + except CouldNotConnect as exc: message = 'Could not connect to RethinkDB server: {!r}'.format(exc) self.log.error(message) self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, message=message) diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index cc51b31e452b3..2599c4cc0852e 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -5,12 +5,12 @@ from contextlib import contextmanager from typing import Iterator, List -import rethinkdb from rethinkdb import r from datadog_checks.dev.conditions import WaitFor from datadog_checks.dev.docker import temporarily_stop_service from datadog_checks.dev.structures import EnvVars +from datadog_checks.rethinkdb._connections import Connection, RethinkDBConnection from .common import ( AGENT_PASSWORD, @@ -37,30 +37,30 @@ def setup_cluster(): """ logger.debug('setup_cluster') - with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: - r.db_drop('test').run(conn) # Automatically created, but we don't use it and it would skew our metrics. + with RethinkDBConnection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: + conn.run(r.db_drop('test')) # Automatically created, but we don't use it and it would skew our metrics. # Cluster content. - r.db_create(DATABASE).run(conn) - r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_CONFIG).run(conn) - r.db(DATABASE).table(HEROES_TABLE).index_create(HEROES_TABLE_INDEX_FIELD).run(conn) + conn.run(r.db_create(DATABASE)) + conn.run(r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_CONFIG)) + conn.run(r.db(DATABASE).table(HEROES_TABLE).index_create(HEROES_TABLE_INDEX_FIELD)) # Users. # See: https://rethinkdb.com/docs/permissions-and-accounts/ - r.db('rethinkdb').table('users').insert({'id': AGENT_USER, 'password': AGENT_PASSWORD}).run(conn) - r.db('rethinkdb').grant(AGENT_USER, {'read': True}).run(conn) + conn.run(r.db('rethinkdb').table('users').insert({'id': AGENT_USER, 'password': AGENT_PASSWORD})) + conn.run(r.db('rethinkdb').grant(AGENT_USER, {'read': True})) - r.db('rethinkdb').table('users').insert({'id': CLIENT_USER, 'password': False}).run(conn) - r.db(DATABASE).grant(CLIENT_USER, {'read': True, 'write': True}).run(conn) + conn.run(r.db('rethinkdb').table('users').insert({'id': CLIENT_USER, 'password': False})) + conn.run(r.db(DATABASE).grant(CLIENT_USER, {'read': True, 'write': True})) # Simulate client activity. # NOTE: ensures that 'written_docs_*' and 'read_docs_*' metrics have non-zero values. - with r.connect(host=HOST, port=SERVER_PORTS['proxy'], user=CLIENT_USER) as conn: - response = r.db(DATABASE).table(HEROES_TABLE).insert(HEROES_TABLE_DOCUMENTS).run(conn) + with RethinkDBConnection(r.connect(host=HOST, port=SERVER_PORTS['proxy'], user=CLIENT_USER)) as conn: + response = conn.run(r.db(DATABASE).table(HEROES_TABLE).insert(HEROES_TABLE_DOCUMENTS)) assert response['inserted'] == len(HEROES_TABLE_DOCUMENTS) - documents = list(r.db(DATABASE).table(HEROES_TABLE).run(conn)) + documents = list(conn.run(r.db(DATABASE).table(HEROES_TABLE))) assert len(documents) == len(HEROES_TABLE_DOCUMENTS) @@ -75,22 +75,23 @@ def temporarily_disconnect_server(server): logger.debug('temporarily_disconnect_server server=%r service=%r', server, service) def _server_exists(conn): - # type: (rethinkdb.net.Connection) -> bool - servers = r.db('rethinkdb').table('server_status').map(r.row['name']).run(conn) # type: List[str] + # type: (Connection) -> bool + servers = conn.run(r.db('rethinkdb').table('server_status').map(r.row['name'])) # type: List[str] logger.debug('server_exists server=%r servers=%r', server, servers) return server in servers def _leader_election_done(conn): - # type: (rethinkdb.net.Connection) -> bool + # type: (Connection) -> bool STABLE_REPLICA_STATES = {'ready', 'waiting_for_primary', 'disconnected'} replica_states = list( - r.db('rethinkdb') - .table('table_status') - .concat_map(r.row['shards']) - .concat_map(r.row['replicas']) - .map(r.row['state']) - .run(conn) + conn.run( + r.db('rethinkdb') + .table('table_status') + .concat_map(r.row['shards']) + .concat_map(r.row['replicas']) + .map(r.row['state']) + ) ) # type: List[str] logger.debug('replica_states %r', replica_states) @@ -98,19 +99,19 @@ def _leader_election_done(conn): return all(state in STABLE_REPLICA_STATES for state in replica_states) def _server_disconnected(conn): - # type: (rethinkdb.net.Connection) -> bool + # type: (Connection) -> bool return not _server_exists(conn) and _leader_election_done(conn) def _server_reconnected(conn): - # type: (rethinkdb.net.Connection) -> bool + # type: (Connection) -> bool return _server_exists(conn) and _leader_election_done(conn) with temporarily_stop_service(service, compose_file=COMPOSE_FILE): with EnvVars(COMPOSE_ENV_VARS): - with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: + with RethinkDBConnection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: WaitFor(lambda: _server_disconnected(conn))() yield - with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: + with RethinkDBConnection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: WaitFor(lambda: _server_reconnected(conn))() diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index e1d6f9e784384..e1654d579768c 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -5,11 +5,11 @@ from typing import Iterator, Set import pytest -import rethinkdb from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub from datadog_checks.rethinkdb import RethinkDBCheck +from datadog_checks.rethinkdb._exceptions import CouldNotConnect from datadog_checks.rethinkdb._types import Instance, Metric from ._types import ServerName @@ -27,13 +27,13 @@ SERVER_STATISTICS_METRICS, SERVER_STATUS_METRICS, SERVER_TAGS, - TLS_SERVER, - TLS_CLIENT_CERT, SERVERS, TABLE_STATISTICS_METRICS, TABLE_STATUS_METRICS, TABLE_STATUS_SERVICE_CHECKS, TABLE_STATUS_SHARDS_METRICS, + TLS_CLIENT_CERT, + TLS_SERVER, ) @@ -216,7 +216,7 @@ def test_cannot_connect_unknown_host(aggregator, instance): check = RethinkDBCheck('rethinkdb', {}, [instance]) - with pytest.raises(rethinkdb.errors.ReqlDriverError): + with pytest.raises(CouldNotConnect): check.check(instance) aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=[]) diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index 58b389dfc39d4..41c913a7911bf 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -10,7 +10,7 @@ from datadog_checks.rethinkdb._queries import QueryEngine from datadog_checks.rethinkdb._types import BackfillJob, IndexConstructionJob -from .utils import MockConnection, MockRethinkDB +from .utils import MockConnection pytestmark = pytest.mark.unit @@ -51,9 +51,9 @@ def test_jobs_metrics(): mock_rows = [mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] - mock_engine = QueryEngine(r=MockRethinkDB(connection_type=MockConnection)) - conn = mock_engine.connect(host='testserver', port=28015, rows=mock_rows) - metrics = list(collect_system_jobs(mock_engine, conn)) + engine = QueryEngine() + conn = MockConnection(rows=mock_rows) + metrics = list(collect_system_jobs(engine, conn)) assert metrics == [ { diff --git a/rethinkdb/tests/unit/utils.py b/rethinkdb/tests/unit/utils.py index d3ba88967cd5a..525622f87a13a 100644 --- a/rethinkdb/tests/unit/utils.py +++ b/rethinkdb/tests/unit/utils.py @@ -1,53 +1,21 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Dict, Iterator, List, Type +from typing import Any, Iterator, List, Mapping -import rethinkdb +from datadog_checks.rethinkdb._connections import Connection, ConnectionServer -class MockConnectionInstance(object): - def __init__(self, parent, *args, **kwargs): - # type: (MockConnection, *Any, **Any) -> None - self._parent = parent +class MockConnection(Connection): + def __init__(self, rows): + # type: (List[Mapping[str, Any]]) -> None + self.rows = rows - # Implement the connection instance interface used by RethinkDB. + def server(self): + # type: () -> ConnectionServer + return {'id': 'test', 'name': 'testserver', 'proxy': False} - def connect(self, timeout): - # type: (float) -> MockConnection - return self._parent - - def is_open(self): - # type: () -> bool - return True - - def run_query(self, query, noreply): - # type: (Any, bool) -> Iterator[Dict[str, Any]] - return self._parent.mock_rows() - - -class MockConnection(rethinkdb.net.Connection): - """ - A RethinkDB connection type that mocks all queries by sending a deterministic set of rows. - - Inspired by: - https://github.com/rethinkdb/rethinkdb-python/blob/9aa68feff16dc984406ae0e276f24e87df89b334/rethinkdb/asyncio_net/net_asyncio.py - """ - - def __init__(self, *args, **kwargs): - # type: (*Any, **Any) -> None - rows = kwargs.pop('rows') - super(MockConnection, self).__init__(MockConnectionInstance, *args, **kwargs) - self.rows = rows # type: List[Dict[str, Any]] - - def mock_rows(self): - # type: () -> Iterator[Dict[str, Any]] + def run(self, query): + # type: (Any) -> Iterator for row in self.rows: yield row - - -class MockRethinkDB(rethinkdb.RethinkDB): - def __init__(self, connection_type): - # type: (Type[rethinkdb.net.Connection]) -> None - super(MockRethinkDB, self).__init__() - self.connection_type = connection_type From f792bc7becd092ba28ce48e2af6c9fce9952dc1c Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 2 Mar 2020 14:26:23 +0100 Subject: [PATCH 073/147] Collect and submit current issues metrics --- .../rethinkdb/_metrics/current_issues.py | 32 ++++++++++++++++++- .../datadog_checks/rethinkdb/_queries.py | 28 +++++++++++++++- rethinkdb/datadog_checks/rethinkdb/_types.py | 12 +++++++ rethinkdb/tests/common.py | 23 +++++++++++++ rethinkdb/tests/test_rethinkdb.py | 17 ++++++++++ rethinkdb/tests/unit/utils.py | 4 +-- 6 files changed, 112 insertions(+), 4 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py index e4f7133baf214..c09dffd518540 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py @@ -15,4 +15,34 @@ def collect_current_issues(engine, conn): See: https://rethinkdb.com/docs/system-issues/ """ - return iter(()) # TODO + totals = engine.query_current_issues_totals(conn) + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.current_issues.total', + 'value': totals['issues'], + 'tags': [], + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.current_issues.critical.total', + 'value': totals['critical_issues'], + 'tags': [], + } + + for issue_type, total in totals['issues_by_type'].items(): + yield { + 'type': 'gauge', + 'name': 'rethinkdb.current_issues.{issue_type}.total'.format(issue_type=issue_type), + 'value': total, + 'tags': [], + } + + for issue_type, total in totals['critical_issues_by_type'].items(): + yield { + 'type': 'gauge', + 'name': 'rethinkdb.current_issues.{issue_type}.critical.total'.format(issue_type=issue_type), + 'value': total, + 'tags': [], + } diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 0e1a588bf614f..9b83a0d2230a4 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -18,6 +18,7 @@ from ._types import ( ClusterStats, ConfigTotals, + CurrentIssuesTotals, Job, JoinRow, ReplicaStats, @@ -77,7 +78,7 @@ def query_config_totals(self, conn): 'databases': db_config.count(), 'tables_per_database': tables_per_database, 'secondary_indexes_per_table': secondary_indexes_per_table, - } # type: dict + } # type: ConfigTotals # Enforce keys to match. return conn.run(r.expr(totals)) @@ -216,3 +217,28 @@ def query_system_jobs(self, conn): Retrieve all the currently running system jobs. """ return conn.run(self._r.db('rethinkdb').table('jobs')) + + def query_current_issues_totals(self, conn): + # type: (Connection) -> CurrentIssuesTotals + """ + Retrieve all the problems detected with the cluster. + """ + r = self._r + + current_issues = r.db('rethinkdb').table('current_issues').pluck('type', 'critical') + critical_current_issues = current_issues.filter(r.row['critical']) + + # NOTE: Need to `.run()` these separately because ReQL does not support putting grouped data in raw + # expressions yet. See: https://github.com/rethinkdb/rethinkdb/issues/2067 + + issues_by_type = conn.run(current_issues.group('type').count()) # type: Mapping[str, int] + critical_issues_by_type = conn.run(critical_current_issues.group('type').count()) # type: Mapping[str, int] + + totals = { + 'issues': current_issues.count(), + 'critical_issues': critical_current_issues.count(), + 'issues_by_type': issues_by_type, + 'critical_issues_by_type': critical_issues_by_type, + } # type: CurrentIssuesTotals # Enforce keys to match. + + return conn.run(r.expr(totals)) diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 1bbe78de26aa1..48287bf871a99 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -162,6 +162,18 @@ }, ) +# System current issues. + +CurrentIssuesTotals = TypedDict( + 'CurrentIssuesTotals', + { + 'issues': int, + 'critical_issues': int, + 'issues_by_type': Mapping[str, int], + 'critical_issues_by_type': Mapping[str, int], + }, +) + # NOTE: this is a union type tagged by the 'type' key. # See: https://mypy.readthedocs.io/en/latest/literal_types.html#intelligent-indexing Job = Union[IndexConstructionJob, BackfillJob] diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index f6f369618c9e0..e4fff197f64d0 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -144,16 +144,39 @@ # NOTE: jobs metrics are not listed here as they are covered by unit tests instead of integration tests. CURRENT_ISSUES_METRICS = ( + 'rethinkdb.current_issues.total', + 'rethinkdb.current_issues.critical.total', 'rethinkdb.current_issues.log_write_error.total', + 'rethinkdb.current_issues.log_write_error.critical.total', 'rethinkdb.current_issues.server_name_collision.total', + 'rethinkdb.current_issues.server_name_collision.critical.total', 'rethinkdb.current_issues.db_name_collision.total', + 'rethinkdb.current_issues.db_name_collision.critical.total', 'rethinkdb.current_issues.table_name_collision.total', + 'rethinkdb.current_issues.table_name_collision.critical.total', 'rethinkdb.current_issues.outdated_index.total', + 'rethinkdb.current_issues.outdated_index.critical.total', 'rethinkdb.current_issues.table_availability.total', + 'rethinkdb.current_issues.table_availability.critical.total', 'rethinkdb.current_issues.memory_error.total', + 'rethinkdb.current_issues.memory_error.critical.total', 'rethinkdb.current_issues.non_transitive_error.total', + 'rethinkdb.current_issues.non_transitive_error.critical.total', ) +CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS = ( + 'rethinkdb.current_issues.total', + 'rethinkdb.current_issues.critical.total', +) + +CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS = ( + 'rethinkdb.current_issues.table_availability.total', + 'rethinkdb.current_issues.table_availability.critical.total', +) + +assert set(CURRENT_ISSUES_METRICS).issuperset(CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS) +assert set(CURRENT_ISSUES_METRICS).issuperset(CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS) + METRICS = ( CLUSTER_STATISTICS_METRICS diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index e1654d579768c..ec519e26a54cf 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -16,6 +16,9 @@ from .cluster import temporarily_disconnect_server from .common import ( CLUSTER_STATISTICS_METRICS, + CURRENT_ISSUES_METRICS, + CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS, + CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS, DATABASE, HEROES_TABLE, HEROES_TABLE_PRIMARY_REPLICA, @@ -136,6 +139,7 @@ def _assert_metrics(aggregator, disconnected_servers=None): _assert_statistics_metrics(aggregator, disconnected_servers=disconnected_servers) _assert_table_status_metrics(aggregator) _assert_server_status_metrics(aggregator, disconnected_servers=disconnected_servers) + _assert_current_issues_metrics(aggregator, disconnected_servers=disconnected_servers) # NOTE: system jobs metrics are not asserted here because they are only emitted when the cluster is # changing (eg. an index is being created, or data is being rebalanced across servers), which is hard to @@ -207,6 +211,19 @@ def _assert_server_status_metrics(aggregator, disconnected_servers): aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=count, tags=tags) +def _assert_current_issues_metrics(aggregator, disconnected_servers): + # type: (AggregatorStub, Set[ServerName]) -> None + for metric in CURRENT_ISSUES_METRICS: + if metric in CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS: + count = 1 + elif disconnected_servers and metric in CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS: + count = 1 + else: + count = 0 + + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=count, tags=[]) + + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_cannot_connect_unknown_host(aggregator, instance): diff --git a/rethinkdb/tests/unit/utils.py b/rethinkdb/tests/unit/utils.py index 525622f87a13a..ad5612840f8cc 100644 --- a/rethinkdb/tests/unit/utils.py +++ b/rethinkdb/tests/unit/utils.py @@ -1,14 +1,14 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Iterator, List, Mapping +from typing import Any, Iterator, Mapping, Sequence from datadog_checks.rethinkdb._connections import Connection, ConnectionServer class MockConnection(Connection): def __init__(self, rows): - # type: (List[Mapping[str, Any]]) -> None + # type: (Sequence[Mapping[str, Any]]) -> None self.rows = rows def server(self): From 03e82b9990445062b5d44dba2e375cbf238b1b91 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 2 Mar 2020 15:02:20 +0100 Subject: [PATCH 074/147] Polish --- .../rethinkdb/_metrics/current_issues.py | 6 +++ .../rethinkdb/_metrics/statistics.py | 25 ++++++++--- .../datadog_checks/rethinkdb/_queries.py | 25 +++++------ rethinkdb/datadog_checks/rethinkdb/_types.py | 42 ++++++++++++------- .../datadog_checks/rethinkdb/rethinkdb.py | 30 +++++++------ 5 files changed, 76 insertions(+), 52 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py index c09dffd518540..3f8b7bb08420a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py @@ -1,12 +1,15 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import logging from typing import Iterator from .._connections import Connection from .._queries import QueryEngine from .._types import Metric +logger = logging.getLogger(__name__) + def collect_current_issues(engine, conn): # type: (QueryEngine, Connection) -> Iterator[Metric] @@ -15,7 +18,10 @@ def collect_current_issues(engine, conn): See: https://rethinkdb.com/docs/system-issues/ """ + logger.debug('collect_current_issues') + totals = engine.query_current_issues_totals(conn) + logger.debug('current_issues totals=%r', totals) yield { 'type': 'gauge', diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py index af7dd0b1b8448..74a6454e69d67 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py @@ -1,11 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -""" -Collect metrics about system statistics. - -See: https://rethinkdb.com/docs/system-stats/ -""" import logging from typing import Iterator @@ -18,6 +13,11 @@ def collect_cluster_statistics(engine, conn): # type: (QueryEngine, Connection) -> Iterator[Metric] + """ + Collect metrics about cluster statistics. + + See: https://rethinkdb.com/docs/system-stats#cluster + """ logger.debug('collect_cluster_statistics') stats = engine.query_cluster_stats(conn) @@ -49,6 +49,11 @@ def collect_cluster_statistics(engine, conn): def collect_server_statistics(engine, conn): # type: (QueryEngine, Connection) -> Iterator[Metric] + """ + Collect metrics about server statistics. + + See: https://rethinkdb.com/docs/system-stats#server + """ logger.debug('collect_server_statistics') for server, stats in engine.query_servers_with_stats(conn): @@ -119,6 +124,11 @@ def collect_server_statistics(engine, conn): def collect_table_statistics(engine, conn): # type: (QueryEngine, Connection) -> Iterator[Metric] + """ + Collect metrics about table statistics. + + See: https://rethinkdb.com/docs/system-stats#table + """ logger.debug('collect_table_statistics') for table, stats in engine.query_tables_with_stats(conn): @@ -147,6 +157,11 @@ def collect_table_statistics(engine, conn): def collect_replica_statistics(engine, conn): # type: (QueryEngine, Connection) -> Iterator[Metric] + """ + Collect metrics about replicas (table/server pairs) statistics. + + See: https://rethinkdb.com/docs/system-stats#replica + """ logger.debug('collect_replica_statistics') for table, server, replica, stats in engine.query_replicas_with_stats(conn): diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 9b83a0d2230a4..2873c044b7819 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -1,13 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -""" -Definition of RethinkDB queries used by the RethinkDB integration. - -Useful reference documentation: -- Python ReQL command reference: https://rethinkdb.com/api/python/ -- Usage of `eq_join`: https://rethinkdb.com/api/python/eq_join/ -""" from __future__ import absolute_import from typing import Any, Iterator, Mapping, Tuple @@ -33,12 +26,15 @@ class QueryEngine: + """ + Definition of RethinkDB queries used by the RethinkDB check. + + Python ReQL reference documentation: https://rethinkdb.com/api/python/ + """ + def __init__(self, r=None): # type: (rethinkdb.RethinkDB) -> None - if r is None: - r = rethinkdb.r - - self._r = r + self._r = rethinkdb.r if r is None else r def get_connected_server_version_string(self, conn): # type: (Connection) -> str @@ -138,9 +134,9 @@ def query_replicas_with_stats(self, conn): """ r = self._r - # NOTE: To reduce bandwidth usage, we make heavy use of the `.pluck()` operation (i.e. ask RethinkDB + # NOTE: To reduce bandwidth usage, we make heavy use of the `.pluck()` operation, i.e. ask RethinkDB # for a specific set of fields, instead of sending entire objects, which can be expensive when joining - # data as we do here.) + # data as we do here. # See: https://rethinkdb.com/api/python/pluck/ stats = r.db('rethinkdb').table('stats') @@ -148,8 +144,7 @@ def query_replicas_with_stats(self, conn): table_config = r.db('rethinkdb').table('table_config') table_status = r.db('rethinkdb').table( 'table_status', - # Required so that 'server' fields in 'replicas' entries refer contain UUIDs instead of names. - # This way, we can join server information more efficiently, as we don't have to lookup UUIDs from names. + # Required so that we can join on 'server_config' below without having to look up UUIDs from names. # See: https://rethinkdb.com/api/python/table/#description identifier_format='uuid', ) diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 48287bf871a99..6b047c9a31f01 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -2,12 +2,15 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) """ -Declarations used for type checking our code, including our manipulation of JSON documents returned by RethinkDB. +Declarations used for type checking our code (e.g. manipulation of JSON documents returned by RethinkDB). """ import datetime as dt from typing import Any, List, Literal, Mapping, Tuple, TypedDict, Union -# Lightweight shim to decouple collection functions from the check class. + +# Check interfaces. + +# A lightweight shim to decouple metric collection from metric submission. Metric = TypedDict( 'Metric', {'type': Literal['gauge', 'monotonic_count', 'service_check'], 'name': str, 'value': float, 'tags': List[str]}, @@ -23,17 +26,7 @@ Server = TypedDict('Server', {'id': str, 'name': str, 'cache_size_mb': str, 'tags': List[str]}) -Table = TypedDict('Table', {'id': str, 'name': str, 'db': str}) # TODO: more fields - - -# System statistics documents. -# See: https://rethinkdb.com/docs/system-stats/ - -ClusterQueryEngine = TypedDict( - 'ClusterQueryEngine', {'queries_per_sec': int, 'read_docs_per_sec': int, 'written_docs_per_sec': int}, -) - -ClusterStats = TypedDict('ClusterStats', {'id': Tuple[Literal['cluster']], 'query_engine': ClusterQueryEngine}) +Table = TypedDict('Table', {'id': str, 'name': str, 'db': str}) ConfigTotals = TypedDict( 'ConfigTotals', @@ -45,6 +38,16 @@ }, ) + +# System statistics documents. +# See: https://rethinkdb.com/docs/system-stats/ + +ClusterQueryEngine = TypedDict( + 'ClusterQueryEngine', {'queries_per_sec': int, 'read_docs_per_sec': int, 'written_docs_per_sec': int}, +) + +ClusterStats = TypedDict('ClusterStats', {'id': Tuple[Literal['cluster']], 'query_engine': ClusterQueryEngine}) + ServerQueryEngine = TypedDict( 'ServerQueryEngine', { @@ -110,11 +113,14 @@ # See: https://rethinkdb.com/docs/system-tables/#status-tables ShardReplica = TypedDict('ShardReplica', {'server': str, 'state': str}) + Shard = TypedDict('Shard', {'primary_replicas': List[str], 'replicas': List[ShardReplica]}) + TableStatusFlags = TypedDict( 'TableStatusFlags', {'ready_for_outdated_reads': bool, 'ready_for_reads': bool, 'ready_for_writes': bool, 'all_replicas_ready': bool}, ) + TableStatus = TypedDict( 'TableStatus', {'id': str, 'name': str, 'db': str, 'status': TableStatusFlags, 'shards': List[Shard]} ) @@ -127,9 +133,11 @@ 'connected_to': Mapping[str, bool], }, ) + ServerProcess = TypedDict( 'ServerProcess', {'argv': List[str], 'cache_size_mb': int, 'pid': int, 'time_started': dt.datetime, 'version': str} ) + ServerStatus = TypedDict('ServerStatus', {'id': str, 'name': str, 'network': ServerNetwork, 'process': ServerProcess}) @@ -137,6 +145,7 @@ # See: https://rethinkdb.com/docs/system-jobs/ IndexConstructionInfo = TypedDict('IndexConstructionInfo', {'db': str, 'table': str, 'index': str, 'progress': int}) + IndexConstructionJob = TypedDict( 'IndexConstructionJob', { @@ -151,6 +160,7 @@ BackfillInfo = TypedDict( 'BackfillInfo', {'db': str, 'destination_server': str, 'source_server': str, 'table': str, 'progress': int} ) + BackfillJob = TypedDict( 'BackfillJob', { @@ -162,6 +172,9 @@ }, ) +Job = Union[IndexConstructionJob, BackfillJob] + + # System current issues. CurrentIssuesTotals = TypedDict( @@ -174,9 +187,6 @@ }, ) -# NOTE: this is a union type tagged by the 'type' key. -# See: https://mypy.readthedocs.io/en/latest/literal_types.html#intelligent-indexing -Job = Union[IndexConstructionJob, BackfillJob] # ReQL command results. # See: https://rethinkdb.com/api/python/ diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index a00c143979abd..0d71acc1ae401 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -6,7 +6,7 @@ from __future__ import absolute_import from contextlib import contextmanager -from typing import Any, Callable, Iterator +from typing import Any, Callable, Iterator, List from datadog_checks.base import AgentCheck @@ -21,8 +21,6 @@ class RethinkDBCheck(AgentCheck): """ Collect metrics from a RethinkDB cluster. - - A set of default metrics is collected from system tables. """ def __init__(self, *args, **kwargs): @@ -33,31 +31,31 @@ def __init__(self, *args, **kwargs): @contextmanager def connect_submitting_service_checks(self, config): # type: (Config) -> Iterator[Connection] + tags = [] # type: List[str] + try: with config.connect() as conn: server = conn.server() # type: ConnectionServer self.log.debug('connected server=%r', server) - tags = ['server:{}'.format(server['name'])] - - try: - yield conn - except Exception as exc: - message = 'Unexpected error while executing RethinkDB check: {!r}'.format(exc) - self.log.error(message) - self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) - raise - else: - self.service_check(SERVICE_CHECK_CONNECT, self.OK, tags=tags) - + tags.append('server:{}'.format(server['name'])) + yield conn except CouldNotConnect as exc: message = 'Could not connect to RethinkDB server: {!r}'.format(exc) self.log.error(message) - self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, message=message) + self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) raise + except Exception as exc: + message = 'Unexpected error while executing RethinkDB check: {!r}'.format(exc) + self.log.error(message) + self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) + raise + else: + self.service_check(SERVICE_CHECK_CONNECT, self.OK, tags=tags) def submit_metric(self, metric): # type: (Metric) -> None self.log.debug('submit_metric metric=%r', metric) + if metric['type'] == 'service_check': self.service_check(metric['name'], metric['value'], tags=metric['tags']) else: From b89c4edffb1de8a5455dabcf0799b51905c537cf Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 2 Mar 2020 15:20:14 +0100 Subject: [PATCH 075/147] Write up README --- rethinkdb/README.md | 56 +++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/rethinkdb/README.md b/rethinkdb/README.md index 1b27f43411b8d..42bad0826a103 100644 --- a/rethinkdb/README.md +++ b/rethinkdb/README.md @@ -2,7 +2,9 @@ ## Overview -This check monitors [RethinkDB][1] through the Datadog Agent. +This check monitors [RethinkDB][1] through the Datadog Agent and collects key performance, status and system infrastructure metrics. + +RethinkDB is a distributed documented-oriented NoSQL database, with first class support for realtime change feeds. ## Setup @@ -10,28 +12,51 @@ Follow the instructions below to install and configure this check for an Agent r ### Installation -The RethinkDB check is included in the [Datadog Agent][2] package. +The RethinkDB check is included in the [Datadog Agent][3] package. No additional installation is needed on your server. ### Configuration -1. Edit the `rethinkdb.d/conf.yaml` file, in the `conf.d/` folder at the root of your Agent's configuration directory to start collecting your rethinkdb performance data. See the [sample rethinkdb.d/conf.yaml][3] for all available configuration options. +1. Add a `datadog-agent` user with read-only permissions on the `rethinkdb` database. + + This can be done using the following ReQL commands (see [Permissions and user accounts][4] for details): + + ```python + r.db('rethinkdb').table('users').insert({'id': 'datadog-agent', 'password': ''}) + r.db('rethinkdb').grant('datadog-agent', {'read': True}) + ``` + +2. Edit the `rethinkdb.d/conf.yaml` file, in the `conf.d/` folder at the root of your [Agent's configuration directory][5]: + + ```yaml + init_config: + + instances: + - user: datadog-agent + password: + ``` + + See the [sample rethinkdb.d/conf.yaml][6] for all available configuration options. -2. [Restart the Agent][4]. +3. [Restart the Agent][7]. ### Validation -[Run the Agent's status subcommand][5] and look for `rethinkdb` under the Checks section. +[Run the Agent's status subcommand][8] and look for `rethinkdb` under the Checks section. ## Data Collected ### Metrics -See [metadata.csv][6] for a list of metrics provided by this check. +See [metadata.csv][9] for a list of metrics provided by this check. ### Service Checks -RethinkDB does not include any service checks. +- `rethinkdb.can_connect`: Returns `CRITICAL` if the Agent cannot reach the configured RethinkDB server, `OK` otherwise. +- `rethinkdb.table_status.ready_for_outdated_reads`: Returns `OK` if all shards of a table are ready to accept outdated read queries, `WARNING` if some are not ready yet. +- `rethinkdb.table_status.ready_for_reads`: Returns `OK` if all shards of a table are ready to accept read queries, `WARNING` if some are not ready yet. +- `rethinkdb.table_status.ready_for_writes`: Returns `OK` if all shards of a table are ready to accept write queries, `WARNING` if some are not ready yet. +- `rethinkdb.table_status.all_replicas_ready`: Returns `WARNING` if some replicas aren't ready for reads of writes (e.g. if backfills are in progress), `OK` otherwise. ### Events @@ -39,12 +64,15 @@ RethinkDB does not include any events. ## Troubleshooting -Need help? Contact [Datadog support][7]. +Need help? Contact [Datadog support][10]. -[1]: **LINK_TO_INTEGRATION_SITE** +[1]: https://rethinkdb.com/ [2]: https://docs.datadoghq.com/agent/autodiscovery/integrations -[3]: https://github.com/DataDog/integrations-core/blob/master/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example -[4]: https://docs.datadoghq.com/agent/guide/agent-commands/#start-stop-and-restart-the-agent -[5]: https://docs.datadoghq.com/agent/guide/agent-commands/#agent-status-and-information -[6]: https://github.com/DataDog/integrations-core/blob/master/rethinkdb/metadata.csv -[7]: https://docs.datadoghq.com/help +[3]: https://docs.datadoghq.com/agent +[4]: https://rethinkdb.com/docs/permissions-and-accounts/ +[5]: https://docs.datadoghq.com/agent/guide/agent-configuration-files/#agent-configuration-directory +[6]: https://github.com/DataDog/integrations-core/blob/master/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example +[7]: https://docs.datadoghq.com/agent/guide/agent-commands/#start-stop-and-restart-the-agent +[8]: https://docs.datadoghq.com/agent/guide/agent-commands/#agent-status-and-information +[9]: https://github.com/DataDog/integrations-core/blob/master/rethinkdb/metadata.csv +[10]: https://docs.datadoghq.com/help From 3734e97778962b0b98ede5a709b7876ec8920f8f Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 2 Mar 2020 15:20:35 +0100 Subject: [PATCH 076/147] Lint --- rethinkdb/datadog_checks/rethinkdb/_types.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/_types.py index 6b047c9a31f01..c4d2e9fee01c3 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_types.py +++ b/rethinkdb/datadog_checks/rethinkdb/_types.py @@ -7,7 +7,6 @@ import datetime as dt from typing import Any, List, Literal, Mapping, Tuple, TypedDict, Union - # Check interfaces. # A lightweight shim to decouple metric collection from metric submission. From 083eb95b25b2e9e3f841eec1ab1e45f19692a599 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 2 Mar 2020 15:40:12 +0100 Subject: [PATCH 077/147] Add config spec --- rethinkdb/assets/configuration/spec.yaml | 51 ++++++++++++++++--- .../rethinkdb/data/conf.yaml.example | 40 +++++++++++++++ 2 files changed, 84 insertions(+), 7 deletions(-) diff --git a/rethinkdb/assets/configuration/spec.yaml b/rethinkdb/assets/configuration/spec.yaml index d06d5e70c68c4..1865a7a52c1a1 100644 --- a/rethinkdb/assets/configuration/spec.yaml +++ b/rethinkdb/assets/configuration/spec.yaml @@ -1,10 +1,47 @@ name: RethinkDB + files: -- name: rethinkdb.yaml - options: - - template: init_config - options: [] - - template: instances + - name: rethinkdb.yaml + options: - - template: instances/tags - - template: instances/global + - template: init_config + options: + - template: init_config/default + + - template: instances + options: + - template: instances/default + + - name: host + required: false + description: Host of the RethinkDB server. + value: + example: localhost + type: string + + - name: port + required: false + description: Driver port of the RethinkDB server. + value: + example: 28015 + type: integer + + - name: user + required: false + description: The user account to connect as. + value: + type: string + + - name: password + required: false + description: The password for the user account to connect as. + value: + type: string + + - name: tls_ca_cert + required: false + description: | + Path to a TLS/SSL client certificate to use when connecting to the RethinkDB server. + See also: https://rethinkdb.com/docs/security/#using-tls + value: + type: string diff --git a/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example index 745b4afccafd9..bce20d2188170 100644 --- a/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example +++ b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example @@ -2,6 +2,13 @@ # init_config: + ## @param service - string - optional + ## Attach the tag `service:` to every metric, event, and service check emitted by this integration. + ## + ## Additionally, this sets the default `service` for every log source. + # + # service: + ## Every instance is scheduled independent of the others. # instances: @@ -16,6 +23,13 @@ instances: # - : # - : + ## @param service - string - optional + ## Attach the tag `service:` to every metric, event, and service check emitted by this integration. + ## + ## Overrides any `service` defined in the `init_config` section. + # + # service: + ## @param min_collection_interval - number - optional - default: 15 ## This changes the collection interval of the check. For more information, see: ## https://docs.datadoghq.com/developers/write_agent_check/#collection-interval @@ -28,3 +42,29 @@ instances: ## This is useful for cluster-level checks. # # empty_default_hostname: false + + ## @param host - string - optional - default: localhost + ## Host of the RethinkDB server. + # + # host: localhost + + ## @param port - integer - optional - default: 28015 + ## Driver port of the RethinkDB server. + # + # port: 28015 + + ## @param user - string - optional + ## The user account to connect as. + # + # user: + + ## @param password - string - optional + ## The password for the user account to connect as. + # + # password: + + ## @param tls_ca_cert - string - optional + ## Path to a TLS/SSL client certificate to use when connecting to the RethinkDB server. + ## See also: https://rethinkdb.com/docs/security/#using-tls + # + # tls_ca_cert: From d2cf6a134c83d554bd382e31312dde1f3bbab26a Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 2 Mar 2020 15:49:00 +0100 Subject: [PATCH 078/147] Fill service_checks.json --- rethinkdb/README.md | 8 +-- rethinkdb/assets/service_checks.json | 78 +++++++++++++++++++++++++++- 2 files changed, 81 insertions(+), 5 deletions(-) diff --git a/rethinkdb/README.md b/rethinkdb/README.md index 42bad0826a103..ffa86daab674b 100644 --- a/rethinkdb/README.md +++ b/rethinkdb/README.md @@ -53,10 +53,10 @@ See [metadata.csv][9] for a list of metrics provided by this check. ### Service Checks - `rethinkdb.can_connect`: Returns `CRITICAL` if the Agent cannot reach the configured RethinkDB server, `OK` otherwise. -- `rethinkdb.table_status.ready_for_outdated_reads`: Returns `OK` if all shards of a table are ready to accept outdated read queries, `WARNING` if some are not ready yet. -- `rethinkdb.table_status.ready_for_reads`: Returns `OK` if all shards of a table are ready to accept read queries, `WARNING` if some are not ready yet. -- `rethinkdb.table_status.ready_for_writes`: Returns `OK` if all shards of a table are ready to accept write queries, `WARNING` if some are not ready yet. -- `rethinkdb.table_status.all_replicas_ready`: Returns `WARNING` if some replicas aren't ready for reads of writes (e.g. if backfills are in progress), `OK` otherwise. +- `rethinkdb.table_status.ready_for_outdated_reads`: Returns `OK` if all shards of a table are ready to accept outdated read queries, `WARNING` otherwise. +- `rethinkdb.table_status.ready_for_reads`: Returns `OK` if all shards of a table are ready to accept read queries, `WARNING` otherwise. +- `rethinkdb.table_status.ready_for_writes`: Returns `OK` if all shards of a table are ready to accept write queries, `WARNING` otherwise. +- `rethinkdb.table_status.all_replicas_ready`: Returns `WARNING` if some replicas aren't ready for reads of writes yet (e.g. if backfills are in progress), `OK` otherwise. ### Events diff --git a/rethinkdb/assets/service_checks.json b/rethinkdb/assets/service_checks.json index fe51488c7066f..b6e97ca83f810 100644 --- a/rethinkdb/assets/service_checks.json +++ b/rethinkdb/assets/service_checks.json @@ -1 +1,77 @@ -[] +[ + { + "agent_version": "6.19.0", + "integration": "RethinkDB", + "groups": [ + "host", + "endpoint" + ], + "check": "rethinkdb.can_connect", + "statuses": [ + "ok", + "critical" + ], + "name": "Can Connect", + "description": "Returns `CRITICAL` if the Agent is unable to reach the configured RethinkDB server, `OK` otherwise." + }, + { + "agent_version": "6.19.0", + "integration": "RethinkDB", + "groups": [ + "host", + "endpoint" + ], + "check": "rethinkdb.table_status.ready_for_outdated_reads", + "statuses": [ + "ok", + "warning" + ], + "name": "Table Ready For Outdated Reads", + "description": "Returns `OK` if all shards of a table are ready to accept outdated read queries, `WARNING` otherwise." + }, + { + "agent_version": "6.19.0", + "integration": "RethinkDB", + "groups": [ + "host", + "endpoint" + ], + "check": "rethinkdb.table_status.ready_for_reads", + "statuses": [ + "ok", + "warning" + ], + "name": "Table Ready For Reads", + "description": "Returns `OK` if all shards of a table are ready to accept read queries, `WARNING` otherwise." + }, + { + "agent_version": "6.19.0", + "integration": "RethinkDB", + "groups": [ + "host", + "endpoint" + ], + "check": "rethinkdb.table_status.ready_for_writes", + "statuses": [ + "ok", + "warning" + ], + "name": "Table Ready For Writes", + "description": "Returns `OK` if all shards of a table are ready to accept write queries, `WARNING` otherwise." + }, + { + "agent_version": "6.19.0", + "integration": "RethinkDB", + "groups": [ + "host", + "endpoint" + ], + "check": "rethinkdb.table_status.all_replicas_ready", + "statuses": [ + "ok", + "warning" + ], + "name": "Table All Replicas Ready", + "description": "Returns `WARNING` if some replicas aren't ready for reads of writes yet (e.g. if backfills are in progress), `OK` otherwise." + } +] From e4f6d70340e5d93f31fd81fd0f7eae2420359f75 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 2 Mar 2020 15:51:55 +0100 Subject: [PATCH 079/147] Polish manifest.json --- rethinkdb/manifest.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rethinkdb/manifest.json b/rethinkdb/manifest.json index c1def93c6f792..199a7c2ed9551 100644 --- a/rethinkdb/manifest.json +++ b/rethinkdb/manifest.json @@ -4,7 +4,7 @@ "manifest_version": "1.0.0", "name": "rethinkdb", "metric_prefix": "rethinkdb.", - "metric_to_check": "", + "metric_to_check": "rethinkdb.server.total", "creates_events": false, "short_description": "Collect status, performance and other metrics from a RethinkDB cluster.", "guid": "a09f3ed3-c947-413c-a9c6-0dcb641ea890", @@ -16,7 +16,7 @@ ], "public_title": "Datadog-RethinkDB Integration", "categories": [ - "" + "data store" ], "type": "check", "is_public": false, From 10c014e3891a2c82bfa9a1ef1e1f64465e2ce9a5 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 3 Mar 2020 13:26:45 +0100 Subject: [PATCH 080/147] Update service check tags and service_checks.json --- rethinkdb/assets/service_checks.json | 20 ++++++----- .../datadog_checks/rethinkdb/_connections.py | 26 +++++++++++++-- .../datadog_checks/rethinkdb/rethinkdb.py | 18 +++++++--- rethinkdb/tests/test_rethinkdb.py | 33 ++++++++++++++----- 4 files changed, 73 insertions(+), 24 deletions(-) diff --git a/rethinkdb/assets/service_checks.json b/rethinkdb/assets/service_checks.json index b6e97ca83f810..91d8a226ae756 100644 --- a/rethinkdb/assets/service_checks.json +++ b/rethinkdb/assets/service_checks.json @@ -4,7 +4,9 @@ "integration": "RethinkDB", "groups": [ "host", - "endpoint" + "port", + "server", + "proxy" ], "check": "rethinkdb.can_connect", "statuses": [ @@ -18,8 +20,8 @@ "agent_version": "6.19.0", "integration": "RethinkDB", "groups": [ - "host", - "endpoint" + "database", + "table" ], "check": "rethinkdb.table_status.ready_for_outdated_reads", "statuses": [ @@ -33,8 +35,8 @@ "agent_version": "6.19.0", "integration": "RethinkDB", "groups": [ - "host", - "endpoint" + "database", + "table" ], "check": "rethinkdb.table_status.ready_for_reads", "statuses": [ @@ -48,8 +50,8 @@ "agent_version": "6.19.0", "integration": "RethinkDB", "groups": [ - "host", - "endpoint" + "database", + "table" ], "check": "rethinkdb.table_status.ready_for_writes", "statuses": [ @@ -63,8 +65,8 @@ "agent_version": "6.19.0", "integration": "RethinkDB", "groups": [ - "host", - "endpoint" + "database", + "table" ], "check": "rethinkdb.table_status.all_replicas_ready", "statuses": [ diff --git a/rethinkdb/datadog_checks/rethinkdb/_connections.py b/rethinkdb/datadog_checks/rethinkdb/_connections.py index f2519cac81309..352fc55589c01 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_connections.py +++ b/rethinkdb/datadog_checks/rethinkdb/_connections.py @@ -13,6 +13,8 @@ # See: https://rethinkdb.com/api/python/server ConnectionServer = TypedDict('ConnectionServer', {'id': str, 'name': str, 'proxy': bool}) +ConnectionTags = TypedDict('ConnectionTags', {'server': str, 'host': str, 'port': int, 'proxy': bool}) + class Connection: """ @@ -27,13 +29,23 @@ def __exit__(self, *args): # type: (*Any) -> None pass + @property + def host(self): + # type: () -> str + raise NotImplementedError # pragma: no cover + + @property + def port(self): + # type: () -> int + raise NotImplementedError # pragma: no cover + def server(self): # type: () -> ConnectionServer - raise NotImplementedError + raise NotImplementedError # pragma: no cover def run(self, query): # type: (rethinkdb.RqlQuery) -> Any - raise NotImplementedError + raise NotImplementedError # pragma: no cover class RethinkDBConnection(Connection): @@ -54,6 +66,16 @@ def __exit__(self, *args): # type: (*Any) -> Any return self._conn.__exit__(*args) + @property + def host(self): + # type: () -> str + return self._conn.host + + @property + def port(self): + # type: () -> int + return self._conn.port + def server(self): # type: () -> ConnectionServer return self._conn.server() diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 0d71acc1ae401..9ac665cecf321 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -11,7 +11,7 @@ from datadog_checks.base import AgentCheck from ._config import Config -from ._connections import Connection, ConnectionServer +from ._connections import Connection from ._exceptions import CouldNotConnect from ._types import Instance, Metric @@ -35,10 +35,20 @@ def connect_submitting_service_checks(self, config): try: with config.connect() as conn: - server = conn.server() # type: ConnectionServer - self.log.debug('connected server=%r', server) - tags.append('server:{}'.format(server['name'])) + server = conn.server() + + connection_tags = [ + 'host:{}'.format(conn.host), + 'port:{}'.format(conn.port), + 'server:{}'.format(server['name']), + 'proxy:{}'.format('true' if server['proxy'] else 'false'), + ] + + self.log.debug('connected connection_tags=%r', connection_tags) + tags.extend(connection_tags) + yield conn + except CouldNotConnect as exc: message = 'Could not connect to RethinkDB server: {!r}'.format(exc) self.log.error(message) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index ec519e26a54cf..bfa6e83d9755d 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -2,7 +2,7 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import copy -from typing import Iterator, Set +from typing import Iterator, List, Set import pytest @@ -24,6 +24,7 @@ HEROES_TABLE_PRIMARY_REPLICA, HEROES_TABLE_REPLICAS_BY_SHARD, HEROES_TABLE_SERVERS, + HOST, REPLICA_STATISTICS_METRICS, RETHINKDB_VERSION, SERVER_PORTS, @@ -40,6 +41,16 @@ ) +def _get_connect_service_check_tags(server='server0'): + # type: (ServerName) -> List[str] + return [ + 'host:{}'.format(HOST), + 'port:{}'.format(SERVER_PORTS[server]), + 'server:{}'.format(server), + 'proxy:false', + ] + + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_check(aggregator, instance): @@ -48,10 +59,9 @@ def test_check(aggregator, instance): check.check(instance) _assert_metrics(aggregator) - aggregator.assert_all_metrics_covered() - service_check_tags = ['server:server0'] + service_check_tags = _get_connect_service_check_tags() aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) for service_check in TABLE_STATUS_SERVICE_CHECKS: @@ -72,15 +82,19 @@ def test_check_as_admin(aggregator, instance): _assert_metrics(aggregator) aggregator.assert_all_metrics_covered() - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1) + + service_check_tags = _get_connect_service_check_tags() + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_check_connect_to_server_with_tls(aggregator, instance): # type: (AggregatorStub, Instance) -> None + server = TLS_SERVER + instance = instance.copy() - instance['port'] = SERVER_PORTS[TLS_SERVER] + instance['port'] = SERVER_PORTS[server] instance['tls_ca_cert'] = TLS_CLIENT_CERT check = RethinkDBCheck('rethinkdb', {}, [instance]) @@ -88,7 +102,9 @@ def test_check_connect_to_server_with_tls(aggregator, instance): _assert_metrics(aggregator) aggregator.assert_all_metrics_covered() - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1) + + service_check_tags = _get_connect_service_check_tags(server=server) + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) @pytest.mark.integration @@ -108,10 +124,9 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): disconnected_servers = {server_with_data} _assert_metrics(aggregator, disconnected_servers=disconnected_servers) - aggregator.assert_all_metrics_covered() - service_check_tags = ['server:server0'] + service_check_tags = _get_connect_service_check_tags() aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) table_status_tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] @@ -257,7 +272,7 @@ def collect_and_fail(): with pytest.raises(Failure): check.check(instance) - service_check_tags = ['server:server0'] + service_check_tags = _get_connect_service_check_tags() aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=service_check_tags) From 542531f1f230ef46e9d789ea3c6b57b6a47e8561 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 3 Mar 2020 14:20:55 +0100 Subject: [PATCH 081/147] Handle malformed version strings, improve test coverage --- rethinkdb/datadog_checks/rethinkdb/_config.py | 11 ++--- .../datadog_checks/rethinkdb/_connections.py | 2 +- .../datadog_checks/rethinkdb/_exceptions.py | 4 ++ .../rethinkdb/_metrics/statistics.py | 6 --- .../datadog_checks/rethinkdb/_queries.py | 4 +- .../datadog_checks/rethinkdb/_version.py | 6 ++- .../datadog_checks/rethinkdb/rethinkdb.py | 10 +++-- rethinkdb/tests/test_rethinkdb.py | 40 +++++++++++++++++-- rethinkdb/tests/unit/common.py | 11 +++++ rethinkdb/tests/unit/test_metrics.py | 4 +- rethinkdb/tests/unit/test_version.py | 14 +++++++ rethinkdb/tests/unit/utils.py | 19 ++++++--- 12 files changed, 99 insertions(+), 32 deletions(-) create mode 100644 rethinkdb/tests/unit/common.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/_config.py index 121825a135d62..f6bb066e2e267 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/_config.py @@ -26,7 +26,7 @@ from ._version import parse_version -class Config: +class Config(object): """ Hold instance configuration for a RethinkDB check. @@ -102,17 +102,12 @@ def collect_metrics(self, conn): for metric in collect(self._query_engine, conn): yield metric - def get_connected_server_version(self, conn): + def collect_connected_server_version(self, conn): # type: (Connection) -> str """ Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. - - Example: - - >>> config.get_version(conn) - '2.4.0~0bionic' """ - version_string = self._query_engine.get_connected_server_version_string(conn) + version_string = self._query_engine.query_connected_server_version_string(conn) return parse_version(version_string) def __repr__(self): diff --git a/rethinkdb/datadog_checks/rethinkdb/_connections.py b/rethinkdb/datadog_checks/rethinkdb/_connections.py index 352fc55589c01..41a2a40956e37 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_connections.py +++ b/rethinkdb/datadog_checks/rethinkdb/_connections.py @@ -16,7 +16,7 @@ ConnectionTags = TypedDict('ConnectionTags', {'server': str, 'host': str, 'port': int, 'proxy': bool}) -class Connection: +class Connection(object): """ Base class and interface for connection objects. """ diff --git a/rethinkdb/datadog_checks/rethinkdb/_exceptions.py b/rethinkdb/datadog_checks/rethinkdb/_exceptions.py index 8a48351725fcc..60a7485aa882a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_exceptions.py +++ b/rethinkdb/datadog_checks/rethinkdb/_exceptions.py @@ -9,3 +9,7 @@ class RethinkDBError(Exception): class CouldNotConnect(RethinkDBError): """Failed to connect to a RethinkDB server.""" + + +class VersionCollectionFailed(RethinkDBError): + """Failed to collect or parse the RethinkDB version from a server.""" diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py index 74a6454e69d67..9880342866bf5 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py @@ -167,12 +167,6 @@ def collect_replica_statistics(engine, conn): for table, server, replica, stats in engine.query_replicas_with_stats(conn): logger.debug('replica_statistics table=%r server=%r replica=%r stats=%r', table, server, replica, stats) - state = replica['state'] - - if state == 'disconnected': - # TODO: submit service checks? - continue - database = table['db'] server_name = server['name'] table_name = table['name'] diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/_queries.py index 2873c044b7819..90348ea776d55 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/_queries.py @@ -25,7 +25,7 @@ ) -class QueryEngine: +class QueryEngine(object): """ Definition of RethinkDB queries used by the RethinkDB check. @@ -36,7 +36,7 @@ def __init__(self, r=None): # type: (rethinkdb.RethinkDB) -> None self._r = rethinkdb.r if r is None else r - def get_connected_server_version_string(self, conn): + def query_connected_server_version_string(self, conn): # type: (Connection) -> str """ Return the raw string of the RethinkDB version used by the server at the other end of the connection. diff --git a/rethinkdb/datadog_checks/rethinkdb/_version.py b/rethinkdb/datadog_checks/rethinkdb/_version.py index 6a64b541282e5..349326a633dad 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_version.py +++ b/rethinkdb/datadog_checks/rethinkdb/_version.py @@ -3,8 +3,10 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import re +from ._exceptions import VersionCollectionFailed + # See: https://github.com/rethinkdb/rethinkdb/blob/95cfed8a62f08e3198ac25417c9b6900be8b6877/src/utils.hpp#L117 -_RETHINKDB_VERSION_STR_REGEX = re.compile(r'^rethinkdb\s+(?P(?:\S+|[^\(]+))') +_RETHINKDB_VERSION_STR_REGEX = re.compile(r'^rethinkdb\s+(?P\S+)\s\(.*') def parse_version(rethinkdb_version_string): @@ -24,6 +26,6 @@ def parse_version(rethinkdb_version_string): message = 'Version string did not match pattern (version_string={!r} pattern={!r})'.format( rethinkdb_version_string, _RETHINKDB_VERSION_STR_REGEX ) - raise ValueError(message) + raise VersionCollectionFailed(message) return match.group('rethinkdb_version') diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 9ac665cecf321..d240c86920bf4 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -12,7 +12,7 @@ from ._config import Config from ._connections import Connection -from ._exceptions import CouldNotConnect +from ._exceptions import CouldNotConnect, VersionCollectionFailed from ._types import Instance, Metric SERVICE_CHECK_CONNECT = 'rethinkdb.can_connect' @@ -81,5 +81,9 @@ def check(self, instance): for metric in config.collect_metrics(conn): self.submit_metric(metric) - version = config.get_connected_server_version(conn) - self.set_metadata('version', version) + try: + version = config.collect_connected_server_version(conn) + except VersionCollectionFailed as exc: + self.log.error(exc) + else: + self.set_metadata('version', version) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index bfa6e83d9755d..a0f3c53306876 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -2,13 +2,15 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import copy -from typing import Iterator, List, Set +from typing import Any, Iterator, List, Set import pytest from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub from datadog_checks.rethinkdb import RethinkDBCheck +from datadog_checks.rethinkdb._config import Config +from datadog_checks.rethinkdb._connections import Connection from datadog_checks.rethinkdb._exceptions import CouldNotConnect from datadog_checks.rethinkdb._types import Instance, Metric @@ -39,6 +41,8 @@ TLS_CLIENT_CERT, TLS_SERVER, ) +from .unit.common import MALFORMED_VERSION_STRING_PARAMS +from .unit.utils import MockConnection def _get_connect_service_check_tags(server='server0'): @@ -278,8 +282,8 @@ def collect_and_fail(): @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') -def test_version_metadata(aggregator, instance, datadog_agent): - # type: (AggregatorStub, Instance, DatadogAgentStub) -> None +def test_version_metadata(instance, datadog_agent): + # type: (Instance, DatadogAgentStub) -> None check_id = 'test' check = RethinkDBCheck('rethinkdb', {}, [instance]) @@ -299,3 +303,33 @@ def test_version_metadata(aggregator, instance, datadog_agent): } datadog_agent.assert_metadata(check_id, version_metadata) + + +@pytest.mark.unit +@pytest.mark.parametrize('malformed_version_string', MALFORMED_VERSION_STRING_PARAMS) +def test_version_metadata_failure(monkeypatch, instance, datadog_agent, malformed_version_string): + # type: (Any, Instance, DatadogAgentStub, str) -> None + """ + Verify that check still runs to completion if version provided by RethinkDB is malformed. + """ + + class FakeConfig(Config): + def __init__(self, *args, **kwargs): + # type: (*Any, **Any) -> None + super(FakeConfig, self).__init__(*args, **kwargs) + self._collect_funcs = [] # Skip metrics as we only provide a row for server version. + + def connect(self): + # type: () -> Connection + server_status = {'process': {'version': malformed_version_string}} + return MockConnection(rows=lambda: server_status) + + check_id = 'test' + + check = RethinkDBCheck('rethinkdb', {}, [instance]) + check.check_id = check_id + check.config = FakeConfig(instance) + + check.check(instance) + + datadog_agent.assert_metadata(check_id, {}) diff --git a/rethinkdb/tests/unit/common.py b/rethinkdb/tests/unit/common.py new file mode 100644 index 0000000000000..b2e207c87da8c --- /dev/null +++ b/rethinkdb/tests/unit/common.py @@ -0,0 +1,11 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import pytest + +MALFORMED_VERSION_STRING_PARAMS = [ + pytest.param('rethinkdb 2.3.3', id='no-compilation-string'), + pytest.param('rethinkdb (GCC 4.9.2)', id='no-version'), + pytest.param('rethinkdb', id='prefix-only'), + pytest.param('abc 2.4.0~0bionic (GCC 4.9.2)', id='wrong-prefix'), +] diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index 41c913a7911bf..20e35abb0e657 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -52,8 +52,8 @@ def test_jobs_metrics(): mock_rows = [mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] engine = QueryEngine() - conn = MockConnection(rows=mock_rows) - metrics = list(collect_system_jobs(engine, conn)) + with MockConnection(rows=lambda: mock_rows) as conn: + metrics = list(collect_system_jobs(engine, conn)) assert metrics == [ { diff --git a/rethinkdb/tests/unit/test_version.py b/rethinkdb/tests/unit/test_version.py index 558d8b05f2a21..cb9e339f7e053 100644 --- a/rethinkdb/tests/unit/test_version.py +++ b/rethinkdb/tests/unit/test_version.py @@ -1,7 +1,13 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) import pytest +from datadog_checks.rethinkdb._exceptions import VersionCollectionFailed from datadog_checks.rethinkdb._version import parse_version +from .common import MALFORMED_VERSION_STRING_PARAMS + @pytest.mark.unit @pytest.mark.parametrize( @@ -17,3 +23,11 @@ def test_parse_version(version_string, expected_version): # type: (str, str) -> None assert parse_version(version_string) == expected_version + + +@pytest.mark.unit +@pytest.mark.parametrize('version_string', MALFORMED_VERSION_STRING_PARAMS) +def test_parse_malformed_version(version_string): + # type: (str) -> None + with pytest.raises(VersionCollectionFailed): + parse_version(version_string) diff --git a/rethinkdb/tests/unit/utils.py b/rethinkdb/tests/unit/utils.py index ad5612840f8cc..8d9cbe894fae3 100644 --- a/rethinkdb/tests/unit/utils.py +++ b/rethinkdb/tests/unit/utils.py @@ -1,21 +1,30 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Iterator, Mapping, Sequence +from typing import Any, Callable from datadog_checks.rethinkdb._connections import Connection, ConnectionServer class MockConnection(Connection): def __init__(self, rows): - # type: (Sequence[Mapping[str, Any]]) -> None + # type: (Callable[[], Any]) -> None self.rows = rows + @property + def host(self): + # type: () -> str + return 'mock.local' + + @property + def port(self): + # type: () -> int + return 28015 + def server(self): # type: () -> ConnectionServer return {'id': 'test', 'name': 'testserver', 'proxy': False} def run(self, query): - # type: (Any) -> Iterator - for row in self.rows: - yield row + # type: (Any) -> Any + return self.rows() From 0f8b997e85089eaaca7f3b96b42a4a032a1b3360 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 3 Mar 2020 14:29:44 +0100 Subject: [PATCH 082/147] Add py.typed --- rethinkdb/datadog_checks/rethinkdb/py.typed | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 rethinkdb/datadog_checks/rethinkdb/py.typed diff --git a/rethinkdb/datadog_checks/rethinkdb/py.typed b/rethinkdb/datadog_checks/rethinkdb/py.typed new file mode 100644 index 0000000000000..e69de29bb2d1d From c2db56ab0d8786ac36f17f07cfeb42c41c425ced Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 4 Mar 2020 18:00:08 +0100 Subject: [PATCH 083/147] Cleanup --- .../datadog_checks/dev/docker.py | 2 +- rethinkdb/README.md | 23 +++++++++---------- rethinkdb/assets/configuration/spec.yaml | 2 +- rethinkdb/assets/service_checks.json | 2 +- .../datadog_checks/rethinkdb/_version.py | 3 +-- .../rethinkdb/data/conf.yaml.example | 2 +- 6 files changed, 16 insertions(+), 18 deletions(-) diff --git a/datadog_checks_dev/datadog_checks/dev/docker.py b/datadog_checks_dev/datadog_checks/dev/docker.py index 334cdb8228a34..04990ba410699 100644 --- a/datadog_checks_dev/datadog_checks/dev/docker.py +++ b/datadog_checks_dev/datadog_checks/dev/docker.py @@ -18,7 +18,7 @@ try: from contextlib import ExitStack except ImportError: - from contextlib2 import ExitStack # type: ignore + from contextlib2 import ExitStack def get_docker_hostname(): diff --git a/rethinkdb/README.md b/rethinkdb/README.md index ffa86daab674b..a65d7badcefe5 100644 --- a/rethinkdb/README.md +++ b/rethinkdb/README.md @@ -2,9 +2,9 @@ ## Overview -This check monitors [RethinkDB][1] through the Datadog Agent and collects key performance, status and system infrastructure metrics. +[RethinkDB][1] is a distributed documented-oriented NoSQL database, with first class support for realtime change feeds. -RethinkDB is a distributed documented-oriented NoSQL database, with first class support for realtime change feeds. +This check monitors a RethinkDB cluster through the Datadog Agent and collects metrics about performance, data availability, cluster configuration, and more. ## Setup @@ -12,34 +12,33 @@ Follow the instructions below to install and configure this check for an Agent r ### Installation -The RethinkDB check is included in the [Datadog Agent][3] package. -No additional installation is needed on your server. +The RethinkDB check is included in the [Datadog Agent][3] package. No additional installation is needed on your server. ### Configuration -1. Add a `datadog-agent` user with read-only permissions on the `rethinkdb` database. - - This can be done using the following ReQL commands (see [Permissions and user accounts][4] for details): +1. Recommended: add a `datadog-agent` user with read-only permissions on the `rethinkdb` database. Use the following ReQL commands, referring to [Permissions and user accounts][4] for details: ```python r.db('rethinkdb').table('users').insert({'id': 'datadog-agent', 'password': ''}) r.db('rethinkdb').grant('datadog-agent', {'read': True}) ``` -2. Edit the `rethinkdb.d/conf.yaml` file, in the `conf.d/` folder at the root of your [Agent's configuration directory][5]: +2. Edit the `rethinkdb.d/conf.yaml` file in the `conf.d/` folder at the root of your [Agent's configuration directory][5]. See the [sample rethinkdb.d/conf.yaml][6] for all available configuration options. ```yaml init_config: instances: - - user: datadog-agent + - host: localhost + port: 28015 + user: datadog-agent password: ``` - See the [sample rethinkdb.d/conf.yaml][6] for all available configuration options. - 3. [Restart the Agent][7]. +**Note**: this integration collects metrics from all servers in the cluster, so you only need a single Agent. + ### Validation [Run the Agent's status subcommand][8] and look for `rethinkdb` under the Checks section. @@ -56,7 +55,7 @@ See [metadata.csv][9] for a list of metrics provided by this check. - `rethinkdb.table_status.ready_for_outdated_reads`: Returns `OK` if all shards of a table are ready to accept outdated read queries, `WARNING` otherwise. - `rethinkdb.table_status.ready_for_reads`: Returns `OK` if all shards of a table are ready to accept read queries, `WARNING` otherwise. - `rethinkdb.table_status.ready_for_writes`: Returns `OK` if all shards of a table are ready to accept write queries, `WARNING` otherwise. -- `rethinkdb.table_status.all_replicas_ready`: Returns `WARNING` if some replicas aren't ready for reads of writes yet (e.g. if backfills are in progress), `OK` otherwise. +- `rethinkdb.table_status.all_replicas_ready`: Returns `OK` if all replicas are ready for reads and writes, `WARNING` otherwise (e.g. if backfills are in progress). ### Events diff --git a/rethinkdb/assets/configuration/spec.yaml b/rethinkdb/assets/configuration/spec.yaml index 1865a7a52c1a1..2ee3f10cdae68 100644 --- a/rethinkdb/assets/configuration/spec.yaml +++ b/rethinkdb/assets/configuration/spec.yaml @@ -41,7 +41,7 @@ files: - name: tls_ca_cert required: false description: | - Path to a TLS/SSL client certificate to use when connecting to the RethinkDB server. + Path to a TLS client certificate to use when connecting to the RethinkDB server. See also: https://rethinkdb.com/docs/security/#using-tls value: type: string diff --git a/rethinkdb/assets/service_checks.json b/rethinkdb/assets/service_checks.json index 91d8a226ae756..db3792907da4e 100644 --- a/rethinkdb/assets/service_checks.json +++ b/rethinkdb/assets/service_checks.json @@ -74,6 +74,6 @@ "warning" ], "name": "Table All Replicas Ready", - "description": "Returns `WARNING` if some replicas aren't ready for reads of writes yet (e.g. if backfills are in progress), `OK` otherwise." + "description": "Returns `OK` if all replicas are ready for reads and writes, `WARNING` otherwise (e.g. if backfills are in progress)." } ] diff --git a/rethinkdb/datadog_checks/rethinkdb/_version.py b/rethinkdb/datadog_checks/rethinkdb/_version.py index 349326a633dad..b9f11a6c9d65a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_version.py +++ b/rethinkdb/datadog_checks/rethinkdb/_version.py @@ -13,7 +13,6 @@ def parse_version(rethinkdb_version_string): # type: (str) -> str """ Given a RethinkDB version string, extract the SemVer version. - https://github.com/rethinkdb/rethinkdb/blob/95cfed8a62f08e3198ac25417c9b6900be8b6877/src/utils.hpp#L117 Example ------- @@ -23,7 +22,7 @@ def parse_version(rethinkdb_version_string): match = _RETHINKDB_VERSION_STR_REGEX.match(rethinkdb_version_string) if match is None: - message = 'Version string did not match pattern (version_string={!r} pattern={!r})'.format( + message = 'Version string {!r} did not match pattern {!r}'.format( rethinkdb_version_string, _RETHINKDB_VERSION_STR_REGEX ) raise VersionCollectionFailed(message) diff --git a/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example index bce20d2188170..accc917a0093e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example +++ b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example @@ -64,7 +64,7 @@ instances: # password: ## @param tls_ca_cert - string - optional - ## Path to a TLS/SSL client certificate to use when connecting to the RethinkDB server. + ## Path to a TLS client certificate to use when connecting to the RethinkDB server. ## See also: https://rethinkdb.com/docs/security/#using-tls # # tls_ca_cert: From 5df6a5699d7e45caf63efaafac70dd63a956817c Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 4 Mar 2020 18:10:01 +0100 Subject: [PATCH 084/147] Stick to non-private internal module names --- .../rethinkdb/{_config.py => config.py} | 20 +++++++++---------- .../{_connections.py => connections.py} | 0 .../{_exceptions.py => exceptions.py} | 0 .../{_metrics => metrics}/__init__.py | 0 .../rethinkdb/{_metrics => metrics}/config.py | 6 +++--- .../{_metrics => metrics}/current_issues.py | 6 +++--- .../{_metrics => metrics}/statistics.py | 6 +++--- .../{_metrics => metrics}/statuses.py | 6 +++--- .../{_metrics => metrics}/system_jobs.py | 6 +++--- .../rethinkdb/{_queries.py => queries.py} | 4 ++-- .../datadog_checks/rethinkdb/rethinkdb.py | 8 ++++---- .../rethinkdb/{_types.py => types.py} | 0 .../rethinkdb/{_version.py => version.py} | 2 +- rethinkdb/tests/cluster.py | 2 +- rethinkdb/tests/conftest.py | 2 +- rethinkdb/tests/test_rethinkdb.py | 8 ++++---- rethinkdb/tests/unit/test_config.py | 4 ++-- rethinkdb/tests/unit/test_metrics.py | 6 +++--- rethinkdb/tests/unit/test_version.py | 4 ++-- rethinkdb/tests/unit/utils.py | 2 +- 20 files changed, 46 insertions(+), 46 deletions(-) rename rethinkdb/datadog_checks/rethinkdb/{_config.py => config.py} (87%) rename rethinkdb/datadog_checks/rethinkdb/{_connections.py => connections.py} (100%) rename rethinkdb/datadog_checks/rethinkdb/{_exceptions.py => exceptions.py} (100%) rename rethinkdb/datadog_checks/rethinkdb/{_metrics => metrics}/__init__.py (100%) rename rethinkdb/datadog_checks/rethinkdb/{_metrics => metrics}/config.py (93%) rename rethinkdb/datadog_checks/rethinkdb/{_metrics => metrics}/current_issues.py (93%) rename rethinkdb/datadog_checks/rethinkdb/{_metrics => metrics}/statistics.py (98%) rename rethinkdb/datadog_checks/rethinkdb/{_metrics => metrics}/statuses.py (97%) rename rethinkdb/datadog_checks/rethinkdb/{_metrics => metrics}/system_jobs.py (96%) rename rethinkdb/datadog_checks/rethinkdb/{_queries.py => queries.py} (99%) rename rethinkdb/datadog_checks/rethinkdb/{_types.py => types.py} (100%) rename rethinkdb/datadog_checks/rethinkdb/{_version.py => version.py} (95%) diff --git a/rethinkdb/datadog_checks/rethinkdb/_config.py b/rethinkdb/datadog_checks/rethinkdb/config.py similarity index 87% rename from rethinkdb/datadog_checks/rethinkdb/_config.py rename to rethinkdb/datadog_checks/rethinkdb/config.py index f6bb066e2e267..10f83b03bd9a2 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_config.py +++ b/rethinkdb/datadog_checks/rethinkdb/config.py @@ -9,21 +9,21 @@ from datadog_checks.base import ConfigurationError -from ._connections import Connection, RethinkDBConnection -from ._exceptions import CouldNotConnect -from ._metrics.config import collect_config_totals -from ._metrics.current_issues import collect_current_issues -from ._metrics.statistics import ( +from .connections import Connection, RethinkDBConnection +from .exceptions import CouldNotConnect +from .metrics.config import collect_config_totals +from .metrics.current_issues import collect_current_issues +from .metrics.statistics import ( collect_cluster_statistics, collect_replica_statistics, collect_server_statistics, collect_table_statistics, ) -from ._metrics.statuses import collect_server_status, collect_table_status -from ._metrics.system_jobs import collect_system_jobs -from ._queries import QueryEngine -from ._types import Instance, Metric -from ._version import parse_version +from .metrics.statuses import collect_server_status, collect_table_status +from .metrics.system_jobs import collect_system_jobs +from .queries import QueryEngine +from .types import Instance, Metric +from .version import parse_version class Config(object): diff --git a/rethinkdb/datadog_checks/rethinkdb/_connections.py b/rethinkdb/datadog_checks/rethinkdb/connections.py similarity index 100% rename from rethinkdb/datadog_checks/rethinkdb/_connections.py rename to rethinkdb/datadog_checks/rethinkdb/connections.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_exceptions.py b/rethinkdb/datadog_checks/rethinkdb/exceptions.py similarity index 100% rename from rethinkdb/datadog_checks/rethinkdb/_exceptions.py rename to rethinkdb/datadog_checks/rethinkdb/exceptions.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/__init__.py b/rethinkdb/datadog_checks/rethinkdb/metrics/__init__.py similarity index 100% rename from rethinkdb/datadog_checks/rethinkdb/_metrics/__init__.py rename to rethinkdb/datadog_checks/rethinkdb/metrics/__init__.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/config.py b/rethinkdb/datadog_checks/rethinkdb/metrics/config.py similarity index 93% rename from rethinkdb/datadog_checks/rethinkdb/_metrics/config.py rename to rethinkdb/datadog_checks/rethinkdb/metrics/config.py index 7bb1662a44abf..f2d22f50d9401 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/config.py @@ -4,9 +4,9 @@ import logging from typing import Iterator -from .._connections import Connection -from .._queries import QueryEngine -from .._types import Metric +from ..connections import Connection +from ..queries import QueryEngine +from ..types import Metric logger = logging.getLogger(__name__) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py similarity index 93% rename from rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py rename to rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py index 3f8b7bb08420a..ec37eba1f2095 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py @@ -4,9 +4,9 @@ import logging from typing import Iterator -from .._connections import Connection -from .._queries import QueryEngine -from .._types import Metric +from ..connections import Connection +from ..queries import QueryEngine +from ..types import Metric logger = logging.getLogger(__name__) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py similarity index 98% rename from rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py rename to rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py index 9880342866bf5..5995291d7388f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py @@ -4,9 +4,9 @@ import logging from typing import Iterator -from .._connections import Connection -from .._queries import QueryEngine -from .._types import Metric +from ..connections import Connection +from ..queries import QueryEngine +from ..types import Metric logger = logging.getLogger(__name__) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py similarity index 97% rename from rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py rename to rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py index acc584d18522f..dfe9e1a6d7411 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py @@ -8,9 +8,9 @@ from datadog_checks.base import AgentCheck -from .._connections import Connection -from .._queries import QueryEngine -from .._types import Metric +from ..connections import Connection +from ..queries import QueryEngine +from ..types import Metric logger = logging.getLogger(__name__) diff --git a/rethinkdb/datadog_checks/rethinkdb/_metrics/system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py similarity index 96% rename from rethinkdb/datadog_checks/rethinkdb/_metrics/system_jobs.py rename to rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py index c5ca55d2f898b..17d87adb0e6d9 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_metrics/system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py @@ -4,9 +4,9 @@ import logging from typing import Iterator -from .._connections import Connection -from .._queries import QueryEngine -from .._types import Metric +from ..connections import Connection +from ..queries import QueryEngine +from ..types import Metric logger = logging.getLogger(__name__) diff --git a/rethinkdb/datadog_checks/rethinkdb/_queries.py b/rethinkdb/datadog_checks/rethinkdb/queries.py similarity index 99% rename from rethinkdb/datadog_checks/rethinkdb/_queries.py rename to rethinkdb/datadog_checks/rethinkdb/queries.py index 90348ea776d55..5a9ac317863a1 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/queries.py @@ -7,8 +7,8 @@ import rethinkdb -from ._connections import Connection, ConnectionServer -from ._types import ( +from .connections import Connection, ConnectionServer +from .types import ( ClusterStats, ConfigTotals, CurrentIssuesTotals, diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index d240c86920bf4..9541fd3517287 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -10,10 +10,10 @@ from datadog_checks.base import AgentCheck -from ._config import Config -from ._connections import Connection -from ._exceptions import CouldNotConnect, VersionCollectionFailed -from ._types import Instance, Metric +from .config import Config +from .connections import Connection +from .exceptions import CouldNotConnect, VersionCollectionFailed +from .types import Instance, Metric SERVICE_CHECK_CONNECT = 'rethinkdb.can_connect' diff --git a/rethinkdb/datadog_checks/rethinkdb/_types.py b/rethinkdb/datadog_checks/rethinkdb/types.py similarity index 100% rename from rethinkdb/datadog_checks/rethinkdb/_types.py rename to rethinkdb/datadog_checks/rethinkdb/types.py diff --git a/rethinkdb/datadog_checks/rethinkdb/_version.py b/rethinkdb/datadog_checks/rethinkdb/version.py similarity index 95% rename from rethinkdb/datadog_checks/rethinkdb/_version.py rename to rethinkdb/datadog_checks/rethinkdb/version.py index b9f11a6c9d65a..9c9cd93816e10 100644 --- a/rethinkdb/datadog_checks/rethinkdb/_version.py +++ b/rethinkdb/datadog_checks/rethinkdb/version.py @@ -3,7 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import re -from ._exceptions import VersionCollectionFailed +from .exceptions import VersionCollectionFailed # See: https://github.com/rethinkdb/rethinkdb/blob/95cfed8a62f08e3198ac25417c9b6900be8b6877/src/utils.hpp#L117 _RETHINKDB_VERSION_STR_REGEX = re.compile(r'^rethinkdb\s+(?P\S+)\s\(.*') diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index 2599c4cc0852e..ca37289125344 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -10,7 +10,7 @@ from datadog_checks.dev.conditions import WaitFor from datadog_checks.dev.docker import temporarily_stop_service from datadog_checks.dev.structures import EnvVars -from datadog_checks.rethinkdb._connections import Connection, RethinkDBConnection +from datadog_checks.rethinkdb.connections import Connection, RethinkDBConnection from .common import ( AGENT_PASSWORD, diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index fdc8e89f502b1..d145efad14842 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -6,7 +6,7 @@ import pytest from datadog_checks.dev import docker_run -from datadog_checks.rethinkdb._types import Instance +from datadog_checks.rethinkdb.types import Instance from .cluster import setup_cluster from .common import AGENT_PASSWORD, AGENT_USER, COMPOSE_ENV_VARS, COMPOSE_FILE, HOST, SERVER_PORTS diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index a0f3c53306876..bc91ce0d7b707 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -9,10 +9,10 @@ from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub from datadog_checks.rethinkdb import RethinkDBCheck -from datadog_checks.rethinkdb._config import Config -from datadog_checks.rethinkdb._connections import Connection -from datadog_checks.rethinkdb._exceptions import CouldNotConnect -from datadog_checks.rethinkdb._types import Instance, Metric +from datadog_checks.rethinkdb.config import Config +from datadog_checks.rethinkdb.connections import Connection +from datadog_checks.rethinkdb.exceptions import CouldNotConnect +from datadog_checks.rethinkdb.types import Instance, Metric from ._types import ServerName from .cluster import temporarily_disconnect_server diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index e1840a44d1ea2..d49a1bce61fd0 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -6,8 +6,8 @@ import pytest from datadog_checks.base import ConfigurationError -from datadog_checks.rethinkdb._config import Config -from datadog_checks.rethinkdb._types import Instance +from datadog_checks.rethinkdb.config import Config +from datadog_checks.rethinkdb.types import Instance pytestmark = pytest.mark.unit diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index 20e35abb0e657..aa41f20713a36 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -6,9 +6,9 @@ """ import pytest -from datadog_checks.rethinkdb._metrics.system_jobs import collect_system_jobs -from datadog_checks.rethinkdb._queries import QueryEngine -from datadog_checks.rethinkdb._types import BackfillJob, IndexConstructionJob +from datadog_checks.rethinkdb.metrics.system_jobs import collect_system_jobs +from datadog_checks.rethinkdb.queries import QueryEngine +from datadog_checks.rethinkdb.types import BackfillJob, IndexConstructionJob from .utils import MockConnection diff --git a/rethinkdb/tests/unit/test_version.py b/rethinkdb/tests/unit/test_version.py index cb9e339f7e053..02daa0ecc998f 100644 --- a/rethinkdb/tests/unit/test_version.py +++ b/rethinkdb/tests/unit/test_version.py @@ -3,8 +3,8 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import pytest -from datadog_checks.rethinkdb._exceptions import VersionCollectionFailed -from datadog_checks.rethinkdb._version import parse_version +from datadog_checks.rethinkdb.exceptions import VersionCollectionFailed +from datadog_checks.rethinkdb.version import parse_version from .common import MALFORMED_VERSION_STRING_PARAMS diff --git a/rethinkdb/tests/unit/utils.py b/rethinkdb/tests/unit/utils.py index 8d9cbe894fae3..56dd6120edb34 100644 --- a/rethinkdb/tests/unit/utils.py +++ b/rethinkdb/tests/unit/utils.py @@ -3,7 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from typing import Any, Callable -from datadog_checks.rethinkdb._connections import Connection, ConnectionServer +from datadog_checks.rethinkdb.connections import Connection, ConnectionServer class MockConnection(Connection): From 0534f21b5c136a67305521b9a11ebb08d6ecd872 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 4 Mar 2020 18:29:17 +0100 Subject: [PATCH 085/147] Move assertions to module, fix e2e test --- rethinkdb/tests/assertions.py | 121 ++++++++++++++++++++++++++++++ rethinkdb/tests/common.py | 13 +++- rethinkdb/tests/test_e2e.py | 4 +- rethinkdb/tests/test_rethinkdb.py | 119 ++--------------------------- rethinkdb/tox.ini | 2 + 5 files changed, 142 insertions(+), 117 deletions(-) create mode 100644 rethinkdb/tests/assertions.py diff --git a/rethinkdb/tests/assertions.py b/rethinkdb/tests/assertions.py new file mode 100644 index 0000000000000..1ebc399aa6dd1 --- /dev/null +++ b/rethinkdb/tests/assertions.py @@ -0,0 +1,121 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from typing import Set + +from datadog_checks.base.stubs.aggregator import AggregatorStub + +from ._types import ServerName +from .common import ( + CLUSTER_STATISTICS_METRICS, + CURRENT_ISSUES_METRICS, + CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS, + CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS, + DATABASE, + HEROES_TABLE, + HEROES_TABLE_PRIMARY_REPLICA, + HEROES_TABLE_REPLICAS_BY_SHARD, + HEROES_TABLE_SERVERS, + REPLICA_STATISTICS_METRICS, + SERVER_STATISTICS_METRICS, + SERVER_STATUS_METRICS, + SERVER_TAGS, + SERVERS, + TABLE_STATISTICS_METRICS, + TABLE_STATUS_METRICS, + TABLE_STATUS_SHARDS_METRICS, +) + + +def assert_metrics(aggregator, disconnected_servers=None): + # type: (AggregatorStub, Set[ServerName]) -> None + if disconnected_servers is None: + disconnected_servers = set() + + _assert_config_totals_metrics(aggregator, disconnected_servers=disconnected_servers) + _assert_statistics_metrics(aggregator, disconnected_servers=disconnected_servers) + _assert_table_status_metrics(aggregator) + _assert_server_status_metrics(aggregator, disconnected_servers=disconnected_servers) + _assert_current_issues_metrics(aggregator, disconnected_servers=disconnected_servers) + + # NOTE: system jobs metrics are not asserted here because they are only emitted when the cluster is + # changing (eg. an index is being created, or data is being rebalanced across servers), which is hard to + # test without introducing flakiness. + + +def _assert_config_totals_metrics(aggregator, disconnected_servers): + # type: (AggregatorStub, Set[ServerName]) -> None + aggregator.assert_metric('rethinkdb.server.total', count=1, value=len(SERVERS) - len(disconnected_servers)) + aggregator.assert_metric('rethinkdb.database.total', count=1, value=1) + aggregator.assert_metric('rethinkdb.database.table.total', count=1, value=1, tags=['database:{}'.format(DATABASE)]) + aggregator.assert_metric( + 'rethinkdb.table.secondary_index.total', count=1, value=1, tags=['table:{}'.format(HEROES_TABLE)] + ) + + +def _assert_statistics_metrics(aggregator, disconnected_servers): + # type: (AggregatorStub, Set[ServerName]) -> None + for metric in CLUSTER_STATISTICS_METRICS: + aggregator.assert_metric(metric, count=1, tags=[]) + + for server in SERVERS: + tags = ['server:{}'.format(server)] + SERVER_TAGS[server] + for metric in SERVER_STATISTICS_METRICS: + count = 0 if server in disconnected_servers else 1 + aggregator.assert_metric(metric, count=count, tags=tags) + + for metric in TABLE_STATISTICS_METRICS: + tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] + aggregator.assert_metric(metric, count=1, tags=tags) + + for server in HEROES_TABLE_SERVERS: + tags = [ + 'table:{}'.format(HEROES_TABLE), + 'database:{}'.format(DATABASE), + 'server:{}'.format(server), + ] + SERVER_TAGS[server] + + for metric in REPLICA_STATISTICS_METRICS: + if server in disconnected_servers: + aggregator.assert_metric(metric, count=0, tags=tags) + continue + + # Assumption: cluster is stable (not currently rebalancing), so only these two states can exist. + state = 'waiting_for_primary' if HEROES_TABLE_PRIMARY_REPLICA in disconnected_servers else 'ready' + state_tag = 'state:{}'.format(state) + aggregator.assert_metric(metric, count=1, tags=tags + [state_tag]) + + +def _assert_table_status_metrics(aggregator): + # type: (AggregatorStub) -> None + for metric in TABLE_STATUS_METRICS: + tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) + + for shard in HEROES_TABLE_REPLICAS_BY_SHARD: + tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'shard:{}'.format(shard)] + + for metric in TABLE_STATUS_SHARDS_METRICS: + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) + + +def _assert_server_status_metrics(aggregator, disconnected_servers): + # type: (AggregatorStub, Set[ServerName]) -> None + for metric in SERVER_STATUS_METRICS: + for server in SERVERS: + tags = ['server:{}'.format(server)] + count = 0 if server in disconnected_servers else 1 + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=count, tags=tags) + + +def _assert_current_issues_metrics(aggregator, disconnected_servers): + # type: (AggregatorStub, Set[ServerName]) -> None + for metric in CURRENT_ISSUES_METRICS: + if metric in CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS: + count = 1 + elif disconnected_servers and metric in CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS: + count = 1 + else: + count = 0 + + aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=count, tags=[]) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index e4fff197f64d0..f9eadc83097fd 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -82,6 +82,13 @@ # Metrics lists. +CONFIG_TOTALS_METRICS = ( + 'rethinkdb.server.total', + 'rethinkdb.database.total', + 'rethinkdb.database.table.total', + 'rethinkdb.table.secondary_index.total', +) + CLUSTER_STATISTICS_METRICS = ( 'rethinkdb.stats.cluster.queries_per_sec', 'rethinkdb.stats.cluster.read_docs_per_sec', @@ -178,14 +185,16 @@ assert set(CURRENT_ISSUES_METRICS).issuperset(CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS) -METRICS = ( - CLUSTER_STATISTICS_METRICS +E2E_METRICS = ( + CONFIG_TOTALS_METRICS + + CLUSTER_STATISTICS_METRICS + SERVER_STATISTICS_METRICS + TABLE_STATISTICS_METRICS + REPLICA_STATISTICS_METRICS + TABLE_STATUS_METRICS + TABLE_STATUS_SHARDS_METRICS + SERVER_STATUS_METRICS + + CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS ) diff --git a/rethinkdb/tests/test_e2e.py b/rethinkdb/tests/test_e2e.py index 1f644920a148a..c85980bf9111b 100644 --- a/rethinkdb/tests/test_e2e.py +++ b/rethinkdb/tests/test_e2e.py @@ -8,7 +8,7 @@ from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.rethinkdb import RethinkDBCheck -from .common import METRICS +from .common import E2E_METRICS @pytest.mark.e2e @@ -16,7 +16,7 @@ def test_check_ok(dd_agent_check): # type: (Callable) -> None aggregator = dd_agent_check(rate=True) # type: AggregatorStub - for metric in METRICS: + for metric in E2E_METRICS: aggregator.assert_metric(metric) aggregator.assert_all_metrics_covered() diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index bc91ce0d7b707..4b2cc0a08252a 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -2,7 +2,7 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import copy -from typing import Any, Iterator, List, Set +from typing import Any, Iterator, List import pytest @@ -15,29 +15,16 @@ from datadog_checks.rethinkdb.types import Instance, Metric from ._types import ServerName +from .assertions import assert_metrics from .cluster import temporarily_disconnect_server from .common import ( - CLUSTER_STATISTICS_METRICS, - CURRENT_ISSUES_METRICS, - CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS, - CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS, DATABASE, HEROES_TABLE, - HEROES_TABLE_PRIMARY_REPLICA, - HEROES_TABLE_REPLICAS_BY_SHARD, HEROES_TABLE_SERVERS, HOST, - REPLICA_STATISTICS_METRICS, RETHINKDB_VERSION, SERVER_PORTS, - SERVER_STATISTICS_METRICS, - SERVER_STATUS_METRICS, - SERVER_TAGS, - SERVERS, - TABLE_STATISTICS_METRICS, - TABLE_STATUS_METRICS, TABLE_STATUS_SERVICE_CHECKS, - TABLE_STATUS_SHARDS_METRICS, TLS_CLIENT_CERT, TLS_SERVER, ) @@ -62,7 +49,7 @@ def test_check(aggregator, instance): check = RethinkDBCheck('rethinkdb', {}, [instance]) check.check(instance) - _assert_metrics(aggregator) + assert_metrics(aggregator) aggregator.assert_all_metrics_covered() service_check_tags = _get_connect_service_check_tags() @@ -84,7 +71,7 @@ def test_check_as_admin(aggregator, instance): check = RethinkDBCheck('rethinkdb', {}, [instance]) check.check(instance) - _assert_metrics(aggregator) + assert_metrics(aggregator) aggregator.assert_all_metrics_covered() service_check_tags = _get_connect_service_check_tags() @@ -104,7 +91,7 @@ def test_check_connect_to_server_with_tls(aggregator, instance): check = RethinkDBCheck('rethinkdb', {}, [instance]) check.check(instance) - _assert_metrics(aggregator) + assert_metrics(aggregator) aggregator.assert_all_metrics_covered() service_check_tags = _get_connect_service_check_tags(server=server) @@ -127,7 +114,7 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): disconnected_servers = {server_with_data} - _assert_metrics(aggregator, disconnected_servers=disconnected_servers) + assert_metrics(aggregator, disconnected_servers=disconnected_servers) aggregator.assert_all_metrics_covered() service_check_tags = _get_connect_service_check_tags() @@ -149,100 +136,6 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): ) -def _assert_metrics(aggregator, disconnected_servers=None): - # type: (AggregatorStub, Set[ServerName]) -> None - if disconnected_servers is None: - disconnected_servers = set() - - _assert_config_totals_metrics(aggregator, disconnected_servers=disconnected_servers) - _assert_statistics_metrics(aggregator, disconnected_servers=disconnected_servers) - _assert_table_status_metrics(aggregator) - _assert_server_status_metrics(aggregator, disconnected_servers=disconnected_servers) - _assert_current_issues_metrics(aggregator, disconnected_servers=disconnected_servers) - - # NOTE: system jobs metrics are not asserted here because they are only emitted when the cluster is - # changing (eg. an index is being created, or data is being rebalanced across servers), which is hard to - # test without introducing flakiness. - - -def _assert_config_totals_metrics(aggregator, disconnected_servers): - # type: (AggregatorStub, Set[ServerName]) -> None - aggregator.assert_metric('rethinkdb.server.total', count=1, value=len(SERVERS) - len(disconnected_servers)) - aggregator.assert_metric('rethinkdb.database.total', count=1, value=1) - aggregator.assert_metric('rethinkdb.database.table.total', count=1, value=1, tags=['database:{}'.format(DATABASE)]) - aggregator.assert_metric( - 'rethinkdb.table.secondary_index.total', count=1, value=1, tags=['table:{}'.format(HEROES_TABLE)] - ) - - -def _assert_statistics_metrics(aggregator, disconnected_servers): - # type: (AggregatorStub, Set[ServerName]) -> None - for metric in CLUSTER_STATISTICS_METRICS: - aggregator.assert_metric(metric, count=1, tags=[]) - - for server in SERVERS: - tags = ['server:{}'.format(server)] + SERVER_TAGS[server] - for metric in SERVER_STATISTICS_METRICS: - count = 0 if server in disconnected_servers else 1 - aggregator.assert_metric(metric, count=count, tags=tags) - - for metric in TABLE_STATISTICS_METRICS: - tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] - aggregator.assert_metric(metric, count=1, tags=tags) - - for server in HEROES_TABLE_SERVERS: - tags = [ - 'table:{}'.format(HEROES_TABLE), - 'database:{}'.format(DATABASE), - 'server:{}'.format(server), - ] + SERVER_TAGS[server] - - for metric in REPLICA_STATISTICS_METRICS: - if server in disconnected_servers: - aggregator.assert_metric(metric, count=0, tags=tags) - continue - - # Assumption: cluster is stable (not currently rebalancing), so only these two states can exist. - state = 'waiting_for_primary' if HEROES_TABLE_PRIMARY_REPLICA in disconnected_servers else 'ready' - state_tag = 'state:{}'.format(state) - aggregator.assert_metric(metric, count=1, tags=tags + [state_tag]) - - -def _assert_table_status_metrics(aggregator): - # type: (AggregatorStub) -> None - for metric in TABLE_STATUS_METRICS: - tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) - - for shard in HEROES_TABLE_REPLICAS_BY_SHARD: - tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'shard:{}'.format(shard)] - - for metric in TABLE_STATUS_SHARDS_METRICS: - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) - - -def _assert_server_status_metrics(aggregator, disconnected_servers): - # type: (AggregatorStub, Set[ServerName]) -> None - for metric in SERVER_STATUS_METRICS: - for server in SERVERS: - tags = ['server:{}'.format(server)] - count = 0 if server in disconnected_servers else 1 - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=count, tags=tags) - - -def _assert_current_issues_metrics(aggregator, disconnected_servers): - # type: (AggregatorStub, Set[ServerName]) -> None - for metric in CURRENT_ISSUES_METRICS: - if metric in CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS: - count = 1 - elif disconnected_servers and metric in CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS: - count = 1 - else: - count = 0 - - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=count, tags=[]) - - @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_cannot_connect_unknown_host(aggregator, instance): diff --git a/rethinkdb/tox.ini b/rethinkdb/tox.ini index b45647b7ab827..7f74f4d9053d0 100644 --- a/rethinkdb/tox.ini +++ b/rethinkdb/tox.ini @@ -9,6 +9,8 @@ envlist = dd_check_style = true dd_check_types = true dd_mypy_args = --py2 --disallow-untyped-defs datadog_checks/ tests/ +description = + py{27,38}: e2e ready usedevelop = true platform = linux|darwin|win32 deps = From b8246a05fafb5a3017abc9d2cd06da445b513bc1 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 5 Mar 2020 11:43:57 +0100 Subject: [PATCH 086/147] Rename 'user' option to 'username', clarify purpose of admin test --- rethinkdb/assets/configuration/spec.yaml | 2 +- rethinkdb/datadog_checks/rethinkdb/config.py | 2 +- .../datadog_checks/rethinkdb/data/conf.yaml.example | 4 ++-- rethinkdb/datadog_checks/rethinkdb/types.py | 2 +- rethinkdb/tests/conftest.py | 2 +- rethinkdb/tests/test_rethinkdb.py | 12 ++++++++++-- 6 files changed, 16 insertions(+), 8 deletions(-) diff --git a/rethinkdb/assets/configuration/spec.yaml b/rethinkdb/assets/configuration/spec.yaml index 2ee3f10cdae68..35816ff656a1b 100644 --- a/rethinkdb/assets/configuration/spec.yaml +++ b/rethinkdb/assets/configuration/spec.yaml @@ -26,7 +26,7 @@ files: example: 28015 type: integer - - name: user + - name: username required: false description: The user account to connect as. value: diff --git a/rethinkdb/datadog_checks/rethinkdb/config.py b/rethinkdb/datadog_checks/rethinkdb/config.py index 10f83b03bd9a2..f8d186251be24 100644 --- a/rethinkdb/datadog_checks/rethinkdb/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/config.py @@ -37,7 +37,7 @@ def __init__(self, instance): # type: (Instance) -> None host = instance.get('host', 'localhost') port = instance.get('port', 28015) - user = instance.get('user') + user = instance.get('username') password = instance.get('password') tls_ca_cert = instance.get('tls_ca_cert') diff --git a/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example index accc917a0093e..d4a623fbee20a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example +++ b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example @@ -53,10 +53,10 @@ instances: # # port: 28015 - ## @param user - string - optional + ## @param username - string - optional ## The user account to connect as. # - # user: + # username: ## @param password - string - optional ## The password for the user account to connect as. diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index c4d2e9fee01c3..8b3d4baa19c02 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -16,7 +16,7 @@ ) Instance = TypedDict( - 'Instance', {'host': str, 'port': int, 'user': str, 'password': str, 'tls_ca_cert': str}, total=False + 'Instance', {'host': str, 'port': int, 'username': str, 'password': str, 'tls_ca_cert': str}, total=False ) diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index d145efad14842..f7e7e5a800db3 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -18,7 +18,7 @@ def instance(): return { 'host': HOST, 'port': SERVER_PORTS['server0'], - 'user': AGENT_USER, + 'username': AGENT_USER, 'password': AGENT_PASSWORD, } diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 4b2cc0a08252a..0360f27242c4a 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -62,10 +62,18 @@ def test_check(aggregator, instance): @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') -def test_check_as_admin(aggregator, instance): +def test_check_without_credentials_uses_admin(aggregator, instance): # type: (AggregatorStub, Instance) -> None + """ + Verify that when no credentials are configured (not recommended though), the check still runs successfully provided + the admin account doesn't have a password set. + """ instance = instance.copy() - instance.pop('user') + + # Remove any credentials so that the Python driver uses the default credentials (i.e. admin account w/o password) + # when connecting to RethinkDB. + # See: https://rethinkdb.com/api/python/connect/#description + instance.pop('username') instance.pop('password') check = RethinkDBCheck('rethinkdb', {}, [instance]) From 62cee7461c223596bcba9b20aa456b9e3bfcf80c Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 5 Mar 2020 11:56:28 +0100 Subject: [PATCH 087/147] Honor enable_metadata_collection config option --- rethinkdb/datadog_checks/rethinkdb/rethinkdb.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 9541fd3517287..55fd67499635a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -72,6 +72,15 @@ def submit_metric(self, metric): submit = getattr(self, metric['type']) # type: Callable submit(metric['name'], value=metric['value'], tags=metric['tags']) + def submit_version_metadata(self, config, conn): + # type: (Config, Connection) -> None + try: + version = config.collect_connected_server_version(conn) + except VersionCollectionFailed as exc: + self.log.error(exc) + else: + self.set_metadata('version', version) + def check(self, instance): # type: (Instance) -> None config = self.config @@ -81,9 +90,5 @@ def check(self, instance): for metric in config.collect_metrics(conn): self.submit_metric(metric) - try: - version = config.collect_connected_server_version(conn) - except VersionCollectionFailed as exc: - self.log.error(exc) - else: - self.set_metadata('version', version) + if self.is_metadata_collection_enabled(): + self.submit_version_metadata(config, conn) From a4fb134c3caa748ed3bce332fcc0153f2d4be733 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 5 Mar 2020 12:07:40 +0100 Subject: [PATCH 088/147] Rename tests/_types.py -> tests/types.py --- rethinkdb/tests/assertions.py | 2 +- rethinkdb/tests/common.py | 2 +- rethinkdb/tests/test_rethinkdb.py | 2 +- rethinkdb/tests/{_types.py => types.py} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename rethinkdb/tests/{_types.py => types.py} (100%) diff --git a/rethinkdb/tests/assertions.py b/rethinkdb/tests/assertions.py index 1ebc399aa6dd1..a274536dd985a 100644 --- a/rethinkdb/tests/assertions.py +++ b/rethinkdb/tests/assertions.py @@ -5,7 +5,6 @@ from datadog_checks.base.stubs.aggregator import AggregatorStub -from ._types import ServerName from .common import ( CLUSTER_STATISTICS_METRICS, CURRENT_ISSUES_METRICS, @@ -25,6 +24,7 @@ TABLE_STATUS_METRICS, TABLE_STATUS_SHARDS_METRICS, ) +from .types import ServerName def assert_metrics(aggregator, disconnected_servers=None): diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index f9eadc83097fd..b78b3962490c7 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -6,7 +6,7 @@ from datadog_checks.utils.common import get_docker_hostname -from ._types import ServerName +from .types import ServerName HERE = os.path.dirname(os.path.abspath(__file__)) ROOT = os.path.dirname(os.path.dirname(HERE)) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 0360f27242c4a..081ec97b5a72b 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -14,7 +14,6 @@ from datadog_checks.rethinkdb.exceptions import CouldNotConnect from datadog_checks.rethinkdb.types import Instance, Metric -from ._types import ServerName from .assertions import assert_metrics from .cluster import temporarily_disconnect_server from .common import ( @@ -28,6 +27,7 @@ TLS_CLIENT_CERT, TLS_SERVER, ) +from .types import ServerName from .unit.common import MALFORMED_VERSION_STRING_PARAMS from .unit.utils import MockConnection diff --git a/rethinkdb/tests/_types.py b/rethinkdb/tests/types.py similarity index 100% rename from rethinkdb/tests/_types.py rename to rethinkdb/tests/types.py From 57be0566d18c3aa2a4c24ca323aaa55a8e8dcfc1 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 5 Mar 2020 12:11:33 +0100 Subject: [PATCH 089/147] Reorganization test utils --- rethinkdb/tests/common.py | 12 ++++++++++++ rethinkdb/tests/test_rethinkdb.py | 4 ++-- rethinkdb/tests/unit/common.py | 11 ----------- rethinkdb/tests/unit/test_metrics.py | 2 +- rethinkdb/tests/unit/test_version.py | 2 +- rethinkdb/tests/{unit => }/utils.py | 0 6 files changed, 16 insertions(+), 15 deletions(-) delete mode 100644 rethinkdb/tests/unit/common.py rename rethinkdb/tests/{unit => }/utils.py (100%) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index b78b3962490c7..914787cf5edaa 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -4,6 +4,8 @@ import os from typing import Dict, List, Set +import pytest + from datadog_checks.utils.common import get_docker_hostname from .types import ServerName @@ -211,3 +213,13 @@ 'RETHINKDB_TLS_DRIVER_KEY': TLS_DRIVER_KEY, 'RETHINKDB_TLS_DRIVER_CERT': TLS_DRIVER_CERT, } + + +# Pytest common test data. + +MALFORMED_VERSION_STRING_PARAMS = [ + pytest.param('rethinkdb 2.3.3', id='no-compilation-string'), + pytest.param('rethinkdb (GCC 4.9.2)', id='no-version'), + pytest.param('rethinkdb', id='prefix-only'), + pytest.param('abc 2.4.0~0bionic (GCC 4.9.2)', id='wrong-prefix'), +] diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 081ec97b5a72b..976d24b048ef2 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -21,6 +21,7 @@ HEROES_TABLE, HEROES_TABLE_SERVERS, HOST, + MALFORMED_VERSION_STRING_PARAMS, RETHINKDB_VERSION, SERVER_PORTS, TABLE_STATUS_SERVICE_CHECKS, @@ -28,8 +29,7 @@ TLS_SERVER, ) from .types import ServerName -from .unit.common import MALFORMED_VERSION_STRING_PARAMS -from .unit.utils import MockConnection +from .utils import MockConnection def _get_connect_service_check_tags(server='server0'): diff --git a/rethinkdb/tests/unit/common.py b/rethinkdb/tests/unit/common.py deleted file mode 100644 index b2e207c87da8c..0000000000000 --- a/rethinkdb/tests/unit/common.py +++ /dev/null @@ -1,11 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -import pytest - -MALFORMED_VERSION_STRING_PARAMS = [ - pytest.param('rethinkdb 2.3.3', id='no-compilation-string'), - pytest.param('rethinkdb (GCC 4.9.2)', id='no-version'), - pytest.param('rethinkdb', id='prefix-only'), - pytest.param('abc 2.4.0~0bionic (GCC 4.9.2)', id='wrong-prefix'), -] diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index aa41f20713a36..5c25b75d29690 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -10,7 +10,7 @@ from datadog_checks.rethinkdb.queries import QueryEngine from datadog_checks.rethinkdb.types import BackfillJob, IndexConstructionJob -from .utils import MockConnection +from ..utils import MockConnection pytestmark = pytest.mark.unit diff --git a/rethinkdb/tests/unit/test_version.py b/rethinkdb/tests/unit/test_version.py index 02daa0ecc998f..09002382618fc 100644 --- a/rethinkdb/tests/unit/test_version.py +++ b/rethinkdb/tests/unit/test_version.py @@ -6,7 +6,7 @@ from datadog_checks.rethinkdb.exceptions import VersionCollectionFailed from datadog_checks.rethinkdb.version import parse_version -from .common import MALFORMED_VERSION_STRING_PARAMS +from ..common import MALFORMED_VERSION_STRING_PARAMS @pytest.mark.unit diff --git a/rethinkdb/tests/unit/utils.py b/rethinkdb/tests/utils.py similarity index 100% rename from rethinkdb/tests/unit/utils.py rename to rethinkdb/tests/utils.py From e058f591d4a8a8e61f2dd78fc14e7f9b7752d970 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 5 Mar 2020 14:06:54 +0100 Subject: [PATCH 090/147] Parametrize log_patterns by server constants --- rethinkdb/tests/common.py | 6 +++--- rethinkdb/tests/conftest.py | 20 +++++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 914787cf5edaa..7dde29a4b7071 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -24,14 +24,14 @@ # Servers. # NOTE: server information is tightly coupled to the Docker Compose setup. +SERVERS = {'server0', 'server1', 'server2'} # type: Set[ServerName] +BOOTSTRAP_SERVER = 'server0' # type: ServerName +SERVER_PORTS = {'server0': 28015, 'server1': 28016, 'server2': 28017, 'proxy': 28018} # type: Dict[ServerName, int] SERVER_TAGS = { 'server0': ['default', 'us'], 'server1': ['default', 'us', 'primary'], 'server2': ['default', 'eu'], } # type: Dict[ServerName, List[str]] -SERVERS = {'server0', 'server1', 'server2'} # type: Set[ServerName] - -SERVER_PORTS = {'server0': 28015, 'server1': 28016, 'server2': 28017, 'proxy': 28018} # type: Dict[ServerName, int] # Users. diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index f7e7e5a800db3..0249fea729021 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -9,7 +9,16 @@ from datadog_checks.rethinkdb.types import Instance from .cluster import setup_cluster -from .common import AGENT_PASSWORD, AGENT_USER, COMPOSE_ENV_VARS, COMPOSE_FILE, HOST, SERVER_PORTS +from .common import ( + AGENT_PASSWORD, + AGENT_USER, + BOOTSTRAP_SERVER, + COMPOSE_ENV_VARS, + COMPOSE_FILE, + HOST, + SERVER_PORTS, + SERVERS, +) @pytest.fixture(scope='session') @@ -28,12 +37,9 @@ def dd_environment(instance): # type: (Instance) -> Iterator conditions = [setup_cluster] - log_patterns = [ - r'Server ready, "server0".*', - r'Connected to server "server1".*', - r'Connected to server "server2".*', - r'Connected to proxy.*', - ] + log_patterns = [r'Server ready, "{}".*'.format(BOOTSTRAP_SERVER)] + log_patterns += [r'Connected to server "{}".*'.format(server) for server in SERVERS - {BOOTSTRAP_SERVER}] + log_patterns += [r'Connected to proxy.*'] with docker_run(COMPOSE_FILE, conditions=conditions, env_vars=COMPOSE_ENV_VARS, log_patterns=log_patterns): config = {'instances': [instance]} From 5b1b69cc9bcf5432ca0ea6c210c219c380a5ce01 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 5 Mar 2020 17:10:13 +0100 Subject: [PATCH 091/147] Generate metadata.csv --- rethinkdb/metadata.csv | 59 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/rethinkdb/metadata.csv b/rethinkdb/metadata.csv index ae0af074191ec..2eeca80ba8894 100644 --- a/rethinkdb/metadata.csv +++ b/rethinkdb/metadata.csv @@ -1 +1,60 @@ metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name +rethinkdb.server.total,gauge,,node,,Number of connected servers in the cluster.,0,rethinkdb,Server total +rethinkdb.database.total,gauge,,,,Number of databases in the cluster.,0,rethinkdb,Database total +rethinkdb.database.table.total,gauge,,table,,Number of tables in a given database.,0,rethinkdb,Database table total +rethinkdb.table.secondary_index.total,gauge,,index,,Number of secondary indexes in a given table.,0,rethinkdb,Table secondary index total +rethinkdb.stats.cluster.queries_per_sec,gauge,,query,,Number of queries executed in a cluster per second.,0,rethinkdb,Stats cluster queries per sec +rethinkdb.stats.cluster.read_docs_per_sec,gauge,,document,,Number of documents read in a cluster per second.,0,rethinkdb,Stats cluster read docs per sec +rethinkdb.stats.cluster.written_docs_per_sec,gauge,,document,,Number of documents written in a cluster per second.,0,rethinkdb,Stats cluster written docs per sec +rethinkdb.stats.server.queries_per_sec,gauge,,query,,Number of queries executed on a server per second.,0,rethinkdb,Stats server queries per sec +rethinkdb.stats.server.queries_total,count,,query,,Total number of queries executed on a server.,0,rethinkdb,Stats server queries total +rethinkdb.stats.server.read_docs_per_sec,gauge,,document,,Number of documents read from a server per second.,0,rethinkdb,Stats server read docs per sec +rethinkdb.stats.server.read_docs_total,count,,document,,Total number of documents read from a server.,0,rethinkdb,Stats server read docs total +rethinkdb.stats.server.written_docs_per_sec,gauge,,document,,Number of documents written to a server per second.,0,rethinkdb,Stats server written docs per sec +rethinkdb.stats.server.written_docs_total,count,,document,,Total number of documents written to a server.,0,rethinkdb,Stats server written docs total +rethinkdb.stats.server.client_connections,gauge,,connection,,Current number of client connections to a server.,0,rethinkdb,Stats server client connections +rethinkdb.stats.server.clients_active,gauge,,host,,Current number of clients actively connected to a server.,0,rethinkdb,Stats server clients active +rethinkdb.stats.table.read_docs_per_sec,gauge,,document,,Number of documents read from a table per second.,0,rethinkdb,Stats table read docs per sec +rethinkdb.stats.table.written_docs_per_sec,gauge,,document,,Number of documents written to a table per second.,0,rethinkdb,Stats table written docs per sec +rethinkdb.stats.table_server.read_docs_per_sec,gauge,,document,,Number of documents read from a replica per second.,0,rethinkdb,Stats table server read docs per sec +rethinkdb.stats.table_server.read_docs_total,count,,document,,Total number of documents read from a replica.,0,rethinkdb,Stats table server read docs total +rethinkdb.stats.table_server.written_docs_per_sec,gauge,,document,,Number of documents written to a replica per second.,0,rethinkdb,Stats table server written docs per sec +rethinkdb.stats.table_server.written_docs_total,count,,document,,Total number of documents written to a replica.,0,rethinkdb,Stats table server written docs total +rethinkdb.stats.table_server.cache.in_use_bytes,gauge,,byte,,Current amount of memory used by the cache on a replica.,0,rethinkdb,Stats table server cache in use bytes +rethinkdb.stats.table_server.disk.read_bytes_per_sec,gauge,,byte,,Number of bytes read from the disk of a replica per second.,0,rethinkdb,Stats table server disk read bytes per sec +rethinkdb.stats.table_server.disk.read_bytes_total,count,,byte,,Total number of bytes read from the disk of a replica.,0,rethinkdb,Stats table server disk read bytes total +rethinkdb.stats.table_server.disk.written_bytes_per_sec,gauge,,byte,,Number of bytes written to the disk of a replica per second.,0,rethinkdb,Stats table server disk written bytes per sec +rethinkdb.stats.table_server.disk.written_bytes_total,count,,byte,,Total number of bytes written to the disk of a replica.,0,rethinkdb,Stats table server disk written bytes total +rethinkdb.stats.table_server.disk.metadata_bytes,gauge,,byte,,Current disk space used by metadata on a replica.,0,rethinkdb,Stats table server disk metadata bytes +rethinkdb.stats.table_server.disk.data_bytes,gauge,,byte,,Current disk space used by data on a replica.,0,rethinkdb,Stats table server disk data bytes +rethinkdb.stats.table_server.disk.garbage_bytes,gauge,,byte,,Current disk space used by the garbage collector on a replica.,0,rethinkdb,Stats table server disk garbage bytes +rethinkdb.stats.table_server.disk.preallocated_bytes,gauge,,byte,,Current disk space preallocated on a replica.,0,rethinkdb,Stats table server disk preallocated bytes +rethinkdb.table_status.shards.total,gauge,,shard,,Total number of shards for a table.,0,rethinkdb,Table status shards total +rethinkdb.table_status.shards.replicas.total,gauge,,node,,Total number of replicas for a table shard.,0,rethinkdb,Table status shards replicas total +rethinkdb.table_status.shards.replicas.primary.total,gauge,,node,,Total number of primary replicas for a table shard.,0,rethinkdb,Table status shards replicas primary total +rethinkdb.server_status.network.time_connected,gauge,,second,,Current total time a server has been connected to the network.,0,rethinkdb,Server status network time connected +rethinkdb.server_status.network.connected_to.total,gauge,,node,,Number of other RethinkDB servers a server is currently connected to.,0,rethinkdb,Server status network connected to total +rethinkdb.server_status.network.connected_to.pending.total,gauge,,node,,Number of other RethinkDB servers a server knows about but is not currently connected to.,0,rethinkdb,Server status network connected to pending total +rethinkdb.server_status.process.time_started,gauge,,second,,Time when the RethinkDB server process started.,0,rethinkdb,Server status process time started +rethinkdb.jobs.index_construction.duration,gauge,,second,,Duration of a task that constructs secondary indexes in the background.,0,rethinkdb,Jobs index construction duration +rethinkdb.jobs.index_construction.progress,gauge,,percent,,Progress of a task that constructs secondary indexes in the background.,0,rethinkdb,Jobs index construction progress +rethinkdb.jobs.backfill.duration,gauge,,second,,Duration of a task that brings out of date shards up to date (known as backfilling).,0,rethinkdb,Jobs backfill duration +rethinkdb.jobs.backfill.progress,gauge,,percent,,Progress of a task that brings out of date shards up to date (known as backfilling).,0,rethinkdb,Jobs backfill progress +rethinkdb.current_issues.total,gauge,,,,Total number of current issues.,0,rethinkdb,Current issues total +rethinkdb.current_issues.log_write_error.total,gauge,,,,Total number of issues reporting that RethinkDB has failed to write to its log file.,0,rethinkdb,Current issues log write error total +rethinkdb.current_issues.server_name_collision.total,gauge,,,,Total number of issues reporting that multiple servers have been assigned the same name.,0,rethinkdb,Current issues server name collision total +rethinkdb.current_issues.db_name_collision.total,gauge,,,,Total number of issues reporting that multiple databases have been assigned the same name.,0,rethinkdb,Current issues db name collision total +rethinkdb.current_issues.table_name_collision.total,gauge,,,,Total number of issues reporting that multiple tables in the same database have been assigned the same name.,0,rethinkdb,Current issues table name collision total +rethinkdb.current_issues.outdated_index.total,gauge,,,,Total number of issues reporting that indexes built with an older version of RethinkDB needs to be rebuilt due to changes in the way ReQL handles indexing.,0,rethinkdb,Current issues outdated index total +rethinkdb.current_issues.table_availability.total,gauge,,,,Total number of issues reporting that a table on the cluster is missing at least one replica.,0,rethinkdb,Current issues table availability total +rethinkdb.current_issues.memory_error.total,gauge,,,,Total number of issues reporting that a page fault has occurred on a RethinkDB server and swap space is being used.,0,rethinkdb,Current issues memory error total +rethinkdb.current_issues.non_transitive_error.total,gauge,,,,"Total number of issues reporting that there are currently servers that cannot see every server in the cluster, which may cause table availability issues.",0,rethinkdb,Current issues non transitive error total +rethinkdb.current_issues.critical.total,gauge,,,,Total number of critical current issues.,0,rethinkdb,Current issues critical total +rethinkdb.current_issues.log_write_error.critical.total,gauge,,,,Total number of critical issues reporting that RethinkDB has failed to write to its log file.,0,rethinkdb,Current issues log write error critical total +rethinkdb.current_issues.server_name_collision.critical.total,gauge,,,,Total number of critical issues reporting that multiple servers have been assigned the same name.,0,rethinkdb,Current issues server name collision critical total +rethinkdb.current_issues.db_name_collision.critical.total,gauge,,,,Total number of critical issues reporting that multiple databases have been assigned the same name.,0,rethinkdb,Current issues db name collision critical total +rethinkdb.current_issues.table_name_collision.critical.total,gauge,,,,Total number of critical issues reporting that multiple tables in the same database have been assigned the same name.,0,rethinkdb,Current issues table name collision critical total +rethinkdb.current_issues.outdated_index.critical.total,gauge,,,,Total number of critical issues reporting that indexes built with an older version of RethinkDB needs to be rebuilt due to changes in the way ReQL handles indexing.,0,rethinkdb,Current issues outdated index critical total +rethinkdb.current_issues.table_availability.critical.total,gauge,,,,Total number of critical issues reporting that a table on the cluster is missing at least one replica.,0,rethinkdb,Current issues table availability critical total +rethinkdb.current_issues.memory_error.critical.total,gauge,,,,Total number of critical issues reporting that a page fault has occurred on a RethinkDB server and swap space is being used.,0,rethinkdb,Current issues memory error critical total +rethinkdb.current_issues.non_transitive_error.critical.total,gauge,,,,"Total number of critical issues reporting that there are currently servers that cannot see every server in the cluster, which may cause table availability issues.",0,rethinkdb,Current issues non transitive error critical total From 81ae6973ac17d8a2e5d95bbefe5fa96d7ea72c61 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 5 Mar 2020 17:32:10 +0100 Subject: [PATCH 092/147] Fix wrong type of stats monotonic_count metrics --- rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py index 5995291d7388f..68a53dece5068 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py @@ -190,7 +190,7 @@ def collect_replica_statistics(engine, conn): } yield { - 'type': 'gauge', + 'type': 'monotonic_count', 'name': 'rethinkdb.stats.table_server.read_docs_total', 'value': query_engine['read_docs_total'], 'tags': tags, @@ -204,7 +204,7 @@ def collect_replica_statistics(engine, conn): } yield { - 'type': 'gauge', + 'type': 'monotonic_count', 'name': 'rethinkdb.stats.table_server.written_docs_total', 'value': query_engine['written_docs_total'], 'tags': tags, @@ -225,7 +225,7 @@ def collect_replica_statistics(engine, conn): } yield { - 'type': 'gauge', + 'type': 'monotonic_count', 'name': 'rethinkdb.stats.table_server.disk.read_bytes_total', 'value': storage_engine['disk']['read_bytes_total'], 'tags': tags, @@ -239,7 +239,7 @@ def collect_replica_statistics(engine, conn): } yield { - 'type': 'gauge', + 'type': 'monotonic_count', 'name': 'rethinkdb.stats.table_server.disk.written_bytes_total', 'value': storage_engine['disk']['written_bytes_total'], 'tags': tags, From c123be5b161851b3bd68a39504510cb604febeb2 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 5 Mar 2020 17:37:04 +0100 Subject: [PATCH 093/147] Assert type of metrics --- rethinkdb/tests/assertions.py | 43 +++++----- rethinkdb/tests/common.py | 146 +++++++++++++++++++--------------- 2 files changed, 101 insertions(+), 88 deletions(-) diff --git a/rethinkdb/tests/assertions.py b/rethinkdb/tests/assertions.py index a274536dd985a..f521d4026e955 100644 --- a/rethinkdb/tests/assertions.py +++ b/rethinkdb/tests/assertions.py @@ -7,6 +7,7 @@ from .common import ( CLUSTER_STATISTICS_METRICS, + CONFIG_TOTALS_METRICS, CURRENT_ISSUES_METRICS, CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS, CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS, @@ -45,28 +46,26 @@ def assert_metrics(aggregator, disconnected_servers=None): def _assert_config_totals_metrics(aggregator, disconnected_servers): # type: (AggregatorStub, Set[ServerName]) -> None - aggregator.assert_metric('rethinkdb.server.total', count=1, value=len(SERVERS) - len(disconnected_servers)) - aggregator.assert_metric('rethinkdb.database.total', count=1, value=1) - aggregator.assert_metric('rethinkdb.database.table.total', count=1, value=1, tags=['database:{}'.format(DATABASE)]) - aggregator.assert_metric( - 'rethinkdb.table.secondary_index.total', count=1, value=1, tags=['table:{}'.format(HEROES_TABLE)] - ) + for metric, typ, value, tags in CONFIG_TOTALS_METRICS: + if callable(value): + value = value(disconnected_servers) + aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags, value=value) def _assert_statistics_metrics(aggregator, disconnected_servers): # type: (AggregatorStub, Set[ServerName]) -> None - for metric in CLUSTER_STATISTICS_METRICS: - aggregator.assert_metric(metric, count=1, tags=[]) + for metric, typ in CLUSTER_STATISTICS_METRICS: + aggregator.assert_metric(metric, metric_type=typ, count=1, tags=[]) for server in SERVERS: tags = ['server:{}'.format(server)] + SERVER_TAGS[server] - for metric in SERVER_STATISTICS_METRICS: + for metric, typ in SERVER_STATISTICS_METRICS: count = 0 if server in disconnected_servers else 1 - aggregator.assert_metric(metric, count=count, tags=tags) + aggregator.assert_metric(metric, metric_type=typ, count=count, tags=tags) - for metric in TABLE_STATISTICS_METRICS: + for metric, typ in TABLE_STATISTICS_METRICS: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] - aggregator.assert_metric(metric, count=1, tags=tags) + aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags) for server in HEROES_TABLE_SERVERS: tags = [ @@ -75,7 +74,7 @@ def _assert_statistics_metrics(aggregator, disconnected_servers): 'server:{}'.format(server), ] + SERVER_TAGS[server] - for metric in REPLICA_STATISTICS_METRICS: + for metric, typ in REPLICA_STATISTICS_METRICS: if server in disconnected_servers: aggregator.assert_metric(metric, count=0, tags=tags) continue @@ -83,34 +82,34 @@ def _assert_statistics_metrics(aggregator, disconnected_servers): # Assumption: cluster is stable (not currently rebalancing), so only these two states can exist. state = 'waiting_for_primary' if HEROES_TABLE_PRIMARY_REPLICA in disconnected_servers else 'ready' state_tag = 'state:{}'.format(state) - aggregator.assert_metric(metric, count=1, tags=tags + [state_tag]) + aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags + [state_tag]) def _assert_table_status_metrics(aggregator): # type: (AggregatorStub) -> None - for metric in TABLE_STATUS_METRICS: + for metric, typ in TABLE_STATUS_METRICS: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) + aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags) for shard in HEROES_TABLE_REPLICAS_BY_SHARD: tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'shard:{}'.format(shard)] - for metric in TABLE_STATUS_SHARDS_METRICS: - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=1, tags=tags) + for metric, typ in TABLE_STATUS_SHARDS_METRICS: + aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags) def _assert_server_status_metrics(aggregator, disconnected_servers): # type: (AggregatorStub, Set[ServerName]) -> None - for metric in SERVER_STATUS_METRICS: + for metric, typ in SERVER_STATUS_METRICS: for server in SERVERS: tags = ['server:{}'.format(server)] count = 0 if server in disconnected_servers else 1 - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=count, tags=tags) + aggregator.assert_metric(metric, metric_type=typ, count=count, tags=tags) def _assert_current_issues_metrics(aggregator, disconnected_servers): # type: (AggregatorStub, Set[ServerName]) -> None - for metric in CURRENT_ISSUES_METRICS: + for metric, typ in CURRENT_ISSUES_METRICS: if metric in CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS: count = 1 elif disconnected_servers and metric in CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS: @@ -118,4 +117,4 @@ def _assert_current_issues_metrics(aggregator, disconnected_servers): else: count = 0 - aggregator.assert_metric(metric, metric_type=aggregator.GAUGE, count=count, tags=[]) + aggregator.assert_metric(metric, metric_type=typ, count=count, tags=[]) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 7dde29a4b7071..14f39815f1365 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -2,10 +2,11 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import os -from typing import Dict, List, Set +from typing import Callable, Dict, List, Set, Tuple, Union import pytest +from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.utils.common import get_docker_hostname from .types import ServerName @@ -85,49 +86,58 @@ # Metrics lists. CONFIG_TOTALS_METRICS = ( - 'rethinkdb.server.total', - 'rethinkdb.database.total', - 'rethinkdb.database.table.total', - 'rethinkdb.table.secondary_index.total', -) + ( + 'rethinkdb.server.total', + AggregatorStub.GAUGE, + lambda disconnected_servers: len(SERVERS) - len(disconnected_servers), + [], + ), + ('rethinkdb.database.total', AggregatorStub.GAUGE, 1, []), + ('rethinkdb.database.table.total', AggregatorStub.GAUGE, 1, ['database:{}'.format(DATABASE)]), + ('rethinkdb.table.secondary_index.total', AggregatorStub.GAUGE, 1, ['table:{}'.format(HEROES_TABLE)]), +) # type: Tuple[Tuple[str, int, Union[int, Callable[[set], int]], List[str]], ...] CLUSTER_STATISTICS_METRICS = ( - 'rethinkdb.stats.cluster.queries_per_sec', - 'rethinkdb.stats.cluster.read_docs_per_sec', - 'rethinkdb.stats.cluster.written_docs_per_sec', -) + ('rethinkdb.stats.cluster.queries_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.cluster.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.cluster.written_docs_per_sec', AggregatorStub.GAUGE), +) # type: Tuple[Tuple[str, int], ...] SERVER_STATISTICS_METRICS = ( - 'rethinkdb.stats.server.queries_per_sec', - 'rethinkdb.stats.server.queries_total', - 'rethinkdb.stats.server.read_docs_per_sec', - 'rethinkdb.stats.server.read_docs_total', - 'rethinkdb.stats.server.written_docs_per_sec', - 'rethinkdb.stats.server.written_docs_total', - 'rethinkdb.stats.server.client_connections', - 'rethinkdb.stats.server.clients_active', # NOTE: sent, but not documented on the RethinkDB website. -) + ('rethinkdb.stats.server.queries_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.queries_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.read_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.written_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.client_connections', AggregatorStub.GAUGE), + ( + # NOTE: submitted but not documented on the RethinkDB website. + 'rethinkdb.stats.server.clients_active', + AggregatorStub.GAUGE, + ), +) # type: Tuple[Tuple[str, int], ...] TABLE_STATISTICS_METRICS = ( - 'rethinkdb.stats.table.read_docs_per_sec', - 'rethinkdb.stats.table.written_docs_per_sec', -) + ('rethinkdb.stats.table.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table.written_docs_per_sec', AggregatorStub.GAUGE), +) # type: Tuple[Tuple[str, int], ...] REPLICA_STATISTICS_METRICS = ( - 'rethinkdb.stats.table_server.read_docs_per_sec', - 'rethinkdb.stats.table_server.read_docs_total', - 'rethinkdb.stats.table_server.written_docs_per_sec', - 'rethinkdb.stats.table_server.written_docs_total', - 'rethinkdb.stats.table_server.cache.in_use_bytes', - 'rethinkdb.stats.table_server.disk.read_bytes_per_sec', - 'rethinkdb.stats.table_server.disk.read_bytes_total', - 'rethinkdb.stats.table_server.disk.written_bytes_per_sec', - 'rethinkdb.stats.table_server.disk.written_bytes_total', - 'rethinkdb.stats.table_server.disk.metadata_bytes', - 'rethinkdb.stats.table_server.disk.data_bytes', - 'rethinkdb.stats.table_server.disk.garbage_bytes', - 'rethinkdb.stats.table_server.disk.preallocated_bytes', -) + ('rethinkdb.stats.table_server.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.read_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.written_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.cache.in_use_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.read_bytes_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.read_bytes_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.disk.written_bytes_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.written_bytes_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.disk.metadata_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.data_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.garbage_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.preallocated_bytes', AggregatorStub.GAUGE), +) # type: Tuple[Tuple[str, int], ...] TABLE_STATUS_SERVICE_CHECKS = ( 'rethinkdb.table_status.ready_for_outdated_reads', @@ -136,42 +146,44 @@ 'rethinkdb.table_status.all_replicas_ready', ) -TABLE_STATUS_METRICS = ('rethinkdb.table_status.shards.total',) +TABLE_STATUS_METRICS = ( + ('rethinkdb.table_status.shards.total', AggregatorStub.GAUGE), +) # type: Tuple[Tuple[str, int], ...] TABLE_STATUS_SHARDS_METRICS = ( - 'rethinkdb.table_status.shards.replicas.total', - 'rethinkdb.table_status.shards.replicas.primary.total', -) + ('rethinkdb.table_status.shards.replicas.total', AggregatorStub.GAUGE), + ('rethinkdb.table_status.shards.replicas.primary.total', AggregatorStub.GAUGE), +) # type: Tuple[Tuple[str, int], ...] SERVER_STATUS_METRICS = ( - 'rethinkdb.server_status.network.time_connected', - 'rethinkdb.server_status.network.connected_to.total', - 'rethinkdb.server_status.network.connected_to.pending.total', - 'rethinkdb.server_status.process.time_started', -) + ('rethinkdb.server_status.network.time_connected', AggregatorStub.GAUGE), + ('rethinkdb.server_status.network.connected_to.total', AggregatorStub.GAUGE), + ('rethinkdb.server_status.network.connected_to.pending.total', AggregatorStub.GAUGE), + ('rethinkdb.server_status.process.time_started', AggregatorStub.GAUGE), +) # type: Tuple[Tuple[str, int], ...] # NOTE: jobs metrics are not listed here as they are covered by unit tests instead of integration tests. CURRENT_ISSUES_METRICS = ( - 'rethinkdb.current_issues.total', - 'rethinkdb.current_issues.critical.total', - 'rethinkdb.current_issues.log_write_error.total', - 'rethinkdb.current_issues.log_write_error.critical.total', - 'rethinkdb.current_issues.server_name_collision.total', - 'rethinkdb.current_issues.server_name_collision.critical.total', - 'rethinkdb.current_issues.db_name_collision.total', - 'rethinkdb.current_issues.db_name_collision.critical.total', - 'rethinkdb.current_issues.table_name_collision.total', - 'rethinkdb.current_issues.table_name_collision.critical.total', - 'rethinkdb.current_issues.outdated_index.total', - 'rethinkdb.current_issues.outdated_index.critical.total', - 'rethinkdb.current_issues.table_availability.total', - 'rethinkdb.current_issues.table_availability.critical.total', - 'rethinkdb.current_issues.memory_error.total', - 'rethinkdb.current_issues.memory_error.critical.total', - 'rethinkdb.current_issues.non_transitive_error.total', - 'rethinkdb.current_issues.non_transitive_error.critical.total', -) + ('rethinkdb.current_issues.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.critical.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.log_write_error.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.log_write_error.critical.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.server_name_collision.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.server_name_collision.critical.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.db_name_collision.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.db_name_collision.critical.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.table_name_collision.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.table_name_collision.critical.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.outdated_index.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.outdated_index.critical.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.table_availability.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.table_availability.critical.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.memory_error.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.memory_error.critical.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.non_transitive_error.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.non_transitive_error.critical.total', AggregatorStub.GAUGE), +) # type: Tuple[Tuple[str, int], ...] CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS = ( 'rethinkdb.current_issues.total', @@ -183,8 +195,10 @@ 'rethinkdb.current_issues.table_availability.critical.total', ) -assert set(CURRENT_ISSUES_METRICS).issuperset(CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS) -assert set(CURRENT_ISSUES_METRICS).issuperset(CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS) +assert set(name for name, typ in CURRENT_ISSUES_METRICS).issuperset(CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS) +assert set(name for name, typ in CURRENT_ISSUES_METRICS).issuperset( + CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS +) E2E_METRICS = ( From 6d448712685f5d4e9a2eb1db84995cce5ac0dcc4 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 5 Mar 2020 17:57:16 +0100 Subject: [PATCH 094/147] Fix E2E --- rethinkdb/tests/common.py | 21 ++++++++++----------- rethinkdb/tests/test_e2e.py | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 14f39815f1365..17e0ec493a0f9 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -201,17 +201,16 @@ ) -E2E_METRICS = ( - CONFIG_TOTALS_METRICS - + CLUSTER_STATISTICS_METRICS - + SERVER_STATISTICS_METRICS - + TABLE_STATISTICS_METRICS - + REPLICA_STATISTICS_METRICS - + TABLE_STATUS_METRICS - + TABLE_STATUS_SHARDS_METRICS - + SERVER_STATUS_METRICS - + CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS -) +E2E_METRICS = [] # type: List[Tuple[str, int]] +E2E_METRICS += [(name, typ) for name, typ, _, _ in CONFIG_TOTALS_METRICS] +E2E_METRICS += CLUSTER_STATISTICS_METRICS +E2E_METRICS += SERVER_STATISTICS_METRICS +E2E_METRICS += TABLE_STATISTICS_METRICS +E2E_METRICS += REPLICA_STATISTICS_METRICS +E2E_METRICS += TABLE_STATUS_METRICS +E2E_METRICS += TABLE_STATUS_SHARDS_METRICS +E2E_METRICS += SERVER_STATUS_METRICS +E2E_METRICS += [(name, typ) for name, typ in CURRENT_ISSUES_METRICS if name in CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS] # Docker Compose configuration. diff --git a/rethinkdb/tests/test_e2e.py b/rethinkdb/tests/test_e2e.py index c85980bf9111b..ccbc49f7b7d8a 100644 --- a/rethinkdb/tests/test_e2e.py +++ b/rethinkdb/tests/test_e2e.py @@ -16,7 +16,7 @@ def test_check_ok(dd_agent_check): # type: (Callable) -> None aggregator = dd_agent_check(rate=True) # type: AggregatorStub - for metric in E2E_METRICS: + for metric, _ in E2E_METRICS: aggregator.assert_metric(metric) aggregator.assert_all_metrics_covered() From 6e63922b842dce5086179d73c9b4c5541c2dfd34 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 6 Mar 2020 10:40:25 +0100 Subject: [PATCH 095/147] Improvement robustness of test cluster setup --- rethinkdb/tests/cluster.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index ca37289125344..1e2ddec096ca7 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -38,20 +38,32 @@ def setup_cluster(): logger.debug('setup_cluster') with RethinkDBConnection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: - conn.run(r.db_drop('test')) # Automatically created, but we don't use it and it would skew our metrics. + # A test DB is automatically created, but we don't use it and it would skew our metrics. + response = conn.run(r.db_drop('test')) + assert response['dbs_dropped'] == 1 # Cluster content. - conn.run(r.db_create(DATABASE)) - conn.run(r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_CONFIG)) - conn.run(r.db(DATABASE).table(HEROES_TABLE).index_create(HEROES_TABLE_INDEX_FIELD)) + response = conn.run(r.db_create(DATABASE)) + assert response['dbs_created'] == 1 + response = conn.run(r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_CONFIG)) + assert response['tables_created'] == 1 + response = conn.run(r.db(DATABASE).table(HEROES_TABLE).index_create(HEROES_TABLE_INDEX_FIELD)) + assert response['created'] == 1 + + response = conn.run(r.db(DATABASE).table(HEROES_TABLE).wait(timeout=1)) + assert response['ready'] == 1 # Users. # See: https://rethinkdb.com/docs/permissions-and-accounts/ - conn.run(r.db('rethinkdb').table('users').insert({'id': AGENT_USER, 'password': AGENT_PASSWORD})) - conn.run(r.db('rethinkdb').grant(AGENT_USER, {'read': True})) - - conn.run(r.db('rethinkdb').table('users').insert({'id': CLIENT_USER, 'password': False})) - conn.run(r.db(DATABASE).grant(CLIENT_USER, {'read': True, 'write': True})) + response = conn.run(r.db('rethinkdb').table('users').insert({'id': AGENT_USER, 'password': AGENT_PASSWORD})) + assert response['inserted'] == 1 + response = conn.run(r.db('rethinkdb').grant(AGENT_USER, {'read': True})) + assert response['granted'] == 1 + + response = conn.run(r.db('rethinkdb').table('users').insert({'id': CLIENT_USER, 'password': False})) + assert response['inserted'] == 1 + response = conn.run(r.db(DATABASE).grant(CLIENT_USER, {'read': True, 'write': True})) + assert response['granted'] == 1 # Simulate client activity. # NOTE: ensures that 'written_docs_*' and 'read_docs_*' metrics have non-zero values. From d526ec57d2aa28efd2efca9c922cb3ba17c929bc Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 6 Mar 2020 11:10:26 +0100 Subject: [PATCH 096/147] Drop dependency on mypy latest - for now --- .../datadog_checks/dev/plugin/tox.py | 4 +-- .../rethinkdb/metrics/system_jobs.py | 27 +++++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/datadog_checks_dev/datadog_checks/dev/plugin/tox.py b/datadog_checks_dev/datadog_checks/dev/plugin/tox.py index 1e981ec6e152d..75a85b0db068b 100644 --- a/datadog_checks_dev/datadog_checks/dev/plugin/tox.py +++ b/datadog_checks_dev/datadog_checks/dev/plugin/tox.py @@ -77,9 +77,7 @@ def add_style_checker(config, sections, make_envconfig, reader): # Allow using multiple lines for enhanced readability in case of large amount of options/files to check. mypy_args = mypy_args.replace('\n', ' ') - # Allow using features from the latest development version (documented under the 'latest' tag). - mypy_latest = 'git+https://github.com/python/mypy.git@master' - dependencies.append(mypy_latest) + dependencies.append('mypy>=0.761') commands.append('mypy --config-file=../mypy.ini {}'.format(mypy_args)) sections[section] = { diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py index 17d87adb0e6d9..cf367ce71245b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py @@ -2,11 +2,11 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import logging -from typing import Iterator +from typing import Iterator, cast from ..connections import Connection from ..queries import QueryEngine -from ..types import Metric +from ..types import BackfillInfo, IndexConstructionInfo, Metric logger = logging.getLogger(__name__) @@ -29,10 +29,14 @@ def collect_system_jobs(engine, conn): tags = ['server:{}'.format(server) for server in servers] if job['type'] == 'index_construction': - database = job['info']['db'] - table = job['info']['table'] - index = job['info']['index'] - progress = job['info']['progress'] + # NOTE: Using `cast()` is required until tagged unions are released in mypy stable. Until then, avoid using + # 'info' as a variable name in all cases (workaround for https://github.com/python/mypy/issues/6232). + # See: https://mypy.readthedocs.io/en/latest/literal_types.html#tagged-unions + index_construction_info = cast(IndexConstructionInfo, job['info']) + database = index_construction_info['db'] + table = index_construction_info['table'] + index = index_construction_info['index'] + progress = index_construction_info['progress'] index_construction_tags = tags + [ 'database:{}'.format(database), @@ -55,11 +59,12 @@ def collect_system_jobs(engine, conn): } elif job['type'] == 'backfill': - database = job['info']['db'] - destination_server = job['info']['destination_server'] - source_server = job['info']['source_server'] - table = job['info']['table'] - progress = job['info']['progress'] + backfill_info = cast(BackfillInfo, job['info']) + database = backfill_info['db'] + destination_server = backfill_info['destination_server'] + source_server = backfill_info['source_server'] + table = backfill_info['table'] + progress = backfill_info['progress'] backfill_tags = tags + [ 'database:{}'.format(database), From ca5c299ba875c393fcc366b73b4078ffee31738b Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 9 Mar 2020 14:13:45 +0100 Subject: [PATCH 097/147] Address docs review --- rethinkdb/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rethinkdb/README.md b/rethinkdb/README.md index a65d7badcefe5..2541ca7c562ea 100644 --- a/rethinkdb/README.md +++ b/rethinkdb/README.md @@ -16,7 +16,7 @@ The RethinkDB check is included in the [Datadog Agent][3] package. No additional ### Configuration -1. Recommended: add a `datadog-agent` user with read-only permissions on the `rethinkdb` database. Use the following ReQL commands, referring to [Permissions and user accounts][4] for details: +1. Add a `datadog-agent` user with read-only permissions on the `rethinkdb` database. Use the following ReQL commands, referring to [Permissions and user accounts][4] for details: ```python r.db('rethinkdb').table('users').insert({'id': 'datadog-agent', 'password': ''}) @@ -32,7 +32,7 @@ The RethinkDB check is included in the [Datadog Agent][3] package. No additional - host: localhost port: 28015 user: datadog-agent - password: + password: "" ``` 3. [Restart the Agent][7]. From 91cc30629ea6ad435d822bf855e9e001823b48f4 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 13 Mar 2020 18:37:11 +0100 Subject: [PATCH 098/147] Move any `Config` smarts to a `Backend` interface, update tests --- .../datadog_checks/rethinkdb/backends.py | 95 +++++++++++++++++++ rethinkdb/datadog_checks/rethinkdb/config.py | 92 +++--------------- .../datadog_checks/rethinkdb/rethinkdb.py | 33 ++++--- rethinkdb/datadog_checks/rethinkdb/types.py | 3 +- rethinkdb/tests/test_rethinkdb.py | 37 ++++---- rethinkdb/tests/unit/test_config.py | 14 ++- 6 files changed, 156 insertions(+), 118 deletions(-) create mode 100644 rethinkdb/datadog_checks/rethinkdb/backends.py diff --git a/rethinkdb/datadog_checks/rethinkdb/backends.py b/rethinkdb/datadog_checks/rethinkdb/backends.py new file mode 100644 index 0000000000000..f0c14a6b13417 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/backends.py @@ -0,0 +1,95 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from __future__ import absolute_import + +from typing import Callable, Iterator, List + +import rethinkdb + +from .config import Config +from .connections import Connection, RethinkDBConnection +from .exceptions import CouldNotConnect +from .metrics.config import collect_config_totals +from .metrics.current_issues import collect_current_issues +from .metrics.statistics import ( + collect_cluster_statistics, + collect_replica_statistics, + collect_server_statistics, + collect_table_statistics, +) +from .metrics.statuses import collect_server_status, collect_table_status +from .metrics.system_jobs import collect_system_jobs +from .queries import QueryEngine +from .types import Metric +from .version import parse_version + + +class Backend(object): + """ + Base interface for high-level operations performed during a RethinkDB check. + """ + + def connect(self, config): + # type: (Config) -> Connection + raise NotImplementedError # pragma: no cover + + def collect_metrics(self, conn): + # type: (Connection) -> Iterator[Metric] + raise NotImplementedError # pragma: no cover + + def collect_connected_server_version(self, conn): + # type: (Connection) -> str + raise NotImplementedError # pragma: no cover + + +class DefaultBackend(Backend): + """ + A backend that uses the RethinkDB Python client library and the built-in metrics collection functions. + """ + + collect_funcs = [ + collect_config_totals, + collect_cluster_statistics, + collect_server_statistics, + collect_table_statistics, + collect_replica_statistics, + collect_server_status, + collect_table_status, + collect_system_jobs, + collect_current_issues, + ] # type: List[Callable[[QueryEngine, Connection], Iterator[Metric]]] + + def __init__(self): + # type: () -> None + self._r = rethinkdb.r + self._query_engine = QueryEngine(r=self._r) + + def connect(self, config): + # type: (Config) -> Connection + host = config.host + port = config.port + user = config.user + password = config.password + ssl = {'ca_certs': config.tls_ca_cert} if config.tls_ca_cert is not None else None + + try: + conn = self._r.connect(host=host, port=port, user=user, password=password, ssl=ssl) + except rethinkdb.errors.ReqlDriverError as exc: + raise CouldNotConnect(exc) + + return RethinkDBConnection(conn) + + def collect_metrics(self, conn): + # type: (Connection) -> Iterator[Metric] + for collect in self.collect_funcs: + for metric in collect(self._query_engine, conn): + yield metric + + def collect_connected_server_version(self, conn): + # type: (Connection) -> str + """ + Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. + """ + version_string = self._query_engine.query_connected_server_version_string(conn) + return parse_version(version_string) diff --git a/rethinkdb/datadog_checks/rethinkdb/config.py b/rethinkdb/datadog_checks/rethinkdb/config.py index f8d186251be24..c391066dac992 100644 --- a/rethinkdb/datadog_checks/rethinkdb/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/config.py @@ -3,27 +3,11 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from __future__ import absolute_import -from typing import Callable, Iterator, List, Optional - -import rethinkdb +from typing import Optional from datadog_checks.base import ConfigurationError -from .connections import Connection, RethinkDBConnection -from .exceptions import CouldNotConnect -from .metrics.config import collect_config_totals -from .metrics.current_issues import collect_current_issues -from .metrics.statistics import ( - collect_cluster_statistics, - collect_replica_statistics, - collect_server_statistics, - collect_table_statistics, -) -from .metrics.statuses import collect_server_status, collect_table_status -from .metrics.system_jobs import collect_system_jobs -from .queries import QueryEngine -from .types import Instance, Metric -from .version import parse_version +from .types import Instance class Config(object): @@ -50,66 +34,18 @@ def __init__(self, instance): if port < 0: raise ConfigurationError('port must be positive (got {!r})'.format(port)) - self._host = host # type: str - self._port = port # type: int - self._user = user # type: Optional[str] - self._password = password # type: Optional[str] - self._tls_ca_cert = tls_ca_cert # type: Optional[str] - - self._r = rethinkdb.r - self._query_engine = QueryEngine(r=self._r) - - self._collect_funcs = [ - collect_config_totals, - collect_cluster_statistics, - collect_server_statistics, - collect_table_statistics, - collect_replica_statistics, - collect_server_status, - collect_table_status, - collect_system_jobs, - collect_current_issues, - ] # type: List[Callable[[QueryEngine, Connection], Iterator[Metric]]] - - @property - def host(self): - # type: () -> str - return self._host - - @property - def port(self): - # type: () -> int - return self._port - - def connect(self): - # type: () -> Connection - host = self._host - port = self._port - user = self._user - password = self._password - ssl = {'ca_certs': self._tls_ca_cert} if self._tls_ca_cert is not None else None - - try: - conn = self._r.connect(host=host, port=port, user=user, password=password, ssl=ssl) - except rethinkdb.errors.ReqlDriverError as exc: - raise CouldNotConnect(exc) - - return RethinkDBConnection(conn) - - def collect_metrics(self, conn): - # type: (Connection) -> Iterator[Metric] - for collect in self._collect_funcs: - for metric in collect(self._query_engine, conn): - yield metric - - def collect_connected_server_version(self, conn): - # type: (Connection) -> str - """ - Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. - """ - version_string = self._query_engine.query_connected_server_version_string(conn) - return parse_version(version_string) + self.host = host # type: str + self.port = port # type: int + self.user = user # type: Optional[str] + self.password = password # type: Optional[str] + self.tls_ca_cert = tls_ca_cert # type: Optional[str] def __repr__(self): # type: () -> str - return 'Config(host={host!r}, port={port!r})'.format(host=self._host, port=self._port) + return ( + 'Config(host={host!r}, ' + 'port={port!r}, ' + 'user={user!r}, ' + "password='*****', " + 'tls_ca_cert={tls_ca_cert!r})' + ).format(host=self.host, port=self.port, user=self.user, tls_ca_cert=self.tls_ca_cert) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py index 55fd67499635a..ca5e1ae10bc7d 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py @@ -6,10 +6,12 @@ from __future__ import absolute_import from contextlib import contextmanager -from typing import Any, Callable, Iterator, List +from typing import Any, Callable, Iterator, List, cast from datadog_checks.base import AgentCheck +from datadog_checks.base.types import ServiceCheckStatus +from .backends import Backend, DefaultBackend from .config import Config from .connections import Connection from .exceptions import CouldNotConnect, VersionCollectionFailed @@ -26,15 +28,16 @@ class RethinkDBCheck(AgentCheck): def __init__(self, *args, **kwargs): # type: (*Any, **Any) -> None super(RethinkDBCheck, self).__init__(*args, **kwargs) - self.config = Config(self.instance) + self.config = Config(cast(Instance, self.instance)) + self.backend = DefaultBackend() # type: Backend @contextmanager - def connect_submitting_service_checks(self, config): - # type: (Config) -> Iterator[Connection] + def connect_submitting_service_checks(self): + # type: () -> Iterator[Connection] tags = [] # type: List[str] try: - with config.connect() as conn: + with self.backend.connect(self.config) as conn: server = conn.server() connection_tags = [ @@ -67,28 +70,28 @@ def submit_metric(self, metric): self.log.debug('submit_metric metric=%r', metric) if metric['type'] == 'service_check': - self.service_check(metric['name'], metric['value'], tags=metric['tags']) + value = cast(ServiceCheckStatus, metric['value']) + self.service_check(metric['name'], value, tags=metric['tags']) else: submit = getattr(self, metric['type']) # type: Callable submit(metric['name'], value=metric['value'], tags=metric['tags']) - def submit_version_metadata(self, config, conn): - # type: (Config, Connection) -> None + def submit_version_metadata(self, conn): + # type: (Connection) -> None try: - version = config.collect_connected_server_version(conn) + version = self.backend.collect_connected_server_version(conn) except VersionCollectionFailed as exc: self.log.error(exc) else: self.set_metadata('version', version) def check(self, instance): - # type: (Instance) -> None - config = self.config - self.log.debug('check config=%r', config) + # type: (Any) -> None + self.log.debug('check config=%r', self.config) - with self.connect_submitting_service_checks(config) as conn: - for metric in config.collect_metrics(conn): + with self.connect_submitting_service_checks() as conn: + for metric in self.backend.collect_metrics(conn): self.submit_metric(metric) if self.is_metadata_collection_enabled(): - self.submit_version_metadata(config, conn) + self.submit_version_metadata(conn) diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index 8b3d4baa19c02..5f72e95cdf994 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -9,7 +9,6 @@ # Check interfaces. -# A lightweight shim to decouple metric collection from metric submission. Metric = TypedDict( 'Metric', {'type': Literal['gauge', 'monotonic_count', 'service_check'], 'name': str, 'value': float, 'tags': List[str]}, @@ -134,7 +133,7 @@ ) ServerProcess = TypedDict( - 'ServerProcess', {'argv': List[str], 'cache_size_mb': int, 'pid': int, 'time_started': dt.datetime, 'version': str} + 'ServerProcess', {'argv': List[str], 'cache_size_mb': int, 'pid': int, 'time_started': dt.datetime, 'version': str}, ) ServerStatus = TypedDict('ServerStatus', {'id': str, 'name': str, 'network': ServerNetwork, 'process': ServerProcess}) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 976d24b048ef2..975ff5fed9ded 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -2,14 +2,14 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import copy -from typing import Any, Iterator, List +from typing import Iterator, List import pytest from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub from datadog_checks.rethinkdb import RethinkDBCheck -from datadog_checks.rethinkdb.config import Config +from datadog_checks.rethinkdb.backends import DefaultBackend from datadog_checks.rethinkdb.connections import Connection from datadog_checks.rethinkdb.exceptions import CouldNotConnect from datadog_checks.rethinkdb.types import Instance, Metric @@ -29,7 +29,6 @@ TLS_SERVER, ) from .types import ServerName -from .utils import MockConnection def _get_connect_service_check_tags(server='server0'): @@ -166,13 +165,14 @@ def test_connected_but_check_failed_unexpectedly(aggregator, instance): class Failure(Exception): pass - def collect_and_fail(): - # type: () -> Iterator[Metric] - yield {'type': 'gauge', 'name': 'rethinkdb.some.metric', 'value': 42, 'tags': []} - raise Failure + class MockBackend(DefaultBackend): + def collect_metrics(self, conn): + # type: (Connection) -> Iterator[Metric] + yield {'type': 'gauge', 'name': 'rethinkdb.some.metric', 'value': 42, 'tags': []} + raise Failure check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.config._collect_funcs = [lambda engine, conn: collect_and_fail()] + check.backend = MockBackend() with pytest.raises(Failure): check.check(instance) @@ -208,29 +208,24 @@ def test_version_metadata(instance, datadog_agent): @pytest.mark.unit @pytest.mark.parametrize('malformed_version_string', MALFORMED_VERSION_STRING_PARAMS) -def test_version_metadata_failure(monkeypatch, instance, datadog_agent, malformed_version_string): - # type: (Any, Instance, DatadogAgentStub, str) -> None +def test_version_metadata_failure(instance, aggregator, datadog_agent, malformed_version_string): + # type: (Instance, AggregatorStub, DatadogAgentStub, str) -> None """ Verify that check still runs to completion if version provided by RethinkDB is malformed. """ - class FakeConfig(Config): - def __init__(self, *args, **kwargs): - # type: (*Any, **Any) -> None - super(FakeConfig, self).__init__(*args, **kwargs) - self._collect_funcs = [] # Skip metrics as we only provide a row for server version. - - def connect(self): - # type: () -> Connection - server_status = {'process': {'version': malformed_version_string}} - return MockConnection(rows=lambda: server_status) + class MockBackend(DefaultBackend): + def collect_connected_server_version(self, conn): + # type: (Connection) -> str + return malformed_version_string check_id = 'test' check = RethinkDBCheck('rethinkdb', {}, [instance]) check.check_id = check_id - check.config = FakeConfig(instance) + check.backend = MockBackend() check.check(instance) + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK) datadog_agent.assert_metadata(check_id, {}) diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index d49a1bce61fd0..765d7eff339b6 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -18,21 +18,31 @@ def test_default_config(): config = Config(instance) assert config.host == 'localhost' assert config.port == 28015 + assert config.user is None + assert config.tls_ca_cert is None def test_config(): # type: () -> None - instance = {'host': '192.168.121.1', 'port': 28016} # type: Instance + instance = { + 'host': '192.168.121.1', + 'port': 28016, + 'username': 'datadog-agent', + 'password': 's3kr3t', + 'tls_ca_cert': '/path/to/client.cert', + } # type: Instance config = Config(instance) assert config.host == '192.168.121.1' assert config.port == 28016 + assert config.user == 'datadog-agent' + assert config.tls_ca_cert == '/path/to/client.cert' def test_config_repr(): # type: () -> None instance = {} # type: Instance config = Config(instance) - assert repr(config) == "Config(host='localhost', port=28015)" + assert repr(config) == "Config(host='localhost', port=28015, user=None, password='*****', tls_ca_cert=None)" @pytest.mark.parametrize('host', [42, True, object()]) From 467e12680f1b719797edb19f7c58a3bce4340e88 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 13 Mar 2020 19:16:10 +0100 Subject: [PATCH 099/147] Wait for all server log patterns --- rethinkdb/tests/conftest.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 0249fea729021..74488158f452a 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -6,6 +6,7 @@ import pytest from datadog_checks.dev import docker_run +from datadog_checks.dev.conditions import CheckDockerLogs from datadog_checks.rethinkdb.types import Instance from .cluster import setup_cluster @@ -35,12 +36,13 @@ def instance(): @pytest.fixture(scope='session') def dd_environment(instance): # type: (Instance) -> Iterator - conditions = [setup_cluster] - log_patterns = [r'Server ready, "{}".*'.format(BOOTSTRAP_SERVER)] log_patterns += [r'Connected to server "{}".*'.format(server) for server in SERVERS - {BOOTSTRAP_SERVER}] log_patterns += [r'Connected to proxy.*'] + wait_servers_ready = CheckDockerLogs(COMPOSE_FILE, patterns=log_patterns, matches='all') + + conditions = [wait_servers_ready, setup_cluster] - with docker_run(COMPOSE_FILE, conditions=conditions, env_vars=COMPOSE_ENV_VARS, log_patterns=log_patterns): + with docker_run(COMPOSE_FILE, conditions=conditions, env_vars=COMPOSE_ENV_VARS): config = {'instances': [instance]} yield config From 54a50f50c75252f6570afbc4085a9c5bbf12cec4 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 16 Mar 2020 11:27:36 +0100 Subject: [PATCH 100/147] Update note about `tty: true` --- rethinkdb/tests/compose/docker-compose.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rethinkdb/tests/compose/docker-compose.yaml b/rethinkdb/tests/compose/docker-compose.yaml index 73c2fd595f2ab..fcc48c8b644e4 100644 --- a/rethinkdb/tests/compose/docker-compose.yaml +++ b/rethinkdb/tests/compose/docker-compose.yaml @@ -4,7 +4,9 @@ services: # 3-node RethinkDB cluster with 1 proxy node. rethinkdb-server0: - tty: true # Required otherwise RethinkDB won't output any logs. + # NOTE: `tty` is required for RethinkDB 2.4.0, otherwise `docker logs` won't see logs. + # This due to an issue with I/O buffering on this version, see: https://github.com/rethinkdb/rethinkdb/issues/6819 + tty: true image: ${RETHINKDB_IMAGE} container_name: rethinkdb-server0 command: rethinkdb --bind all --server-name server0 --server-tag us From e23a2a42783a2214852d62392efe6ae0f95ae810 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 16 Mar 2020 11:37:48 +0100 Subject: [PATCH 101/147] Address feedback from @ofek --- rethinkdb/assets/configuration/spec.yaml | 4 +- .../datadog_checks/rethinkdb/__init__.py | 2 +- .../datadog_checks/rethinkdb/backends.py | 16 +++--- .../rethinkdb/{rethinkdb.py => check.py} | 4 -- .../datadog_checks/rethinkdb/connections.py | 2 - .../rethinkdb/data/conf.yaml.example | 52 +++++++++---------- .../rethinkdb/metrics/statistics.py | 6 ++- rethinkdb/datadog_checks/rethinkdb/queries.py | 2 - rethinkdb/tests/common.py | 5 +- rethinkdb/tests/conftest.py | 5 +- 10 files changed, 44 insertions(+), 54 deletions(-) rename rethinkdb/datadog_checks/rethinkdb/{rethinkdb.py => check.py} (95%) diff --git a/rethinkdb/assets/configuration/spec.yaml b/rethinkdb/assets/configuration/spec.yaml index 35816ff656a1b..3dc0a163b8df4 100644 --- a/rethinkdb/assets/configuration/spec.yaml +++ b/rethinkdb/assets/configuration/spec.yaml @@ -10,8 +10,6 @@ files: - template: instances options: - - template: instances/default - - name: host required: false description: Host of the RethinkDB server. @@ -45,3 +43,5 @@ files: See also: https://rethinkdb.com/docs/security/#using-tls value: type: string + + - template: instances/default diff --git a/rethinkdb/datadog_checks/rethinkdb/__init__.py b/rethinkdb/datadog_checks/rethinkdb/__init__.py index bb253cfad3fda..1f5edac2932e8 100644 --- a/rethinkdb/datadog_checks/rethinkdb/__init__.py +++ b/rethinkdb/datadog_checks/rethinkdb/__init__.py @@ -2,6 +2,6 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) from .__about__ import __version__ -from .rethinkdb import RethinkDBCheck +from .check import RethinkDBCheck __all__ = ['__version__', 'RethinkDBCheck'] diff --git a/rethinkdb/datadog_checks/rethinkdb/backends.py b/rethinkdb/datadog_checks/rethinkdb/backends.py index f0c14a6b13417..1a06790bdc4cd 100644 --- a/rethinkdb/datadog_checks/rethinkdb/backends.py +++ b/rethinkdb/datadog_checks/rethinkdb/backends.py @@ -1,8 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from __future__ import absolute_import - from typing import Callable, Iterator, List import rethinkdb @@ -67,14 +65,14 @@ def __init__(self): def connect(self, config): # type: (Config) -> Connection - host = config.host - port = config.port - user = config.user - password = config.password - ssl = {'ca_certs': config.tls_ca_cert} if config.tls_ca_cert is not None else None - try: - conn = self._r.connect(host=host, port=port, user=user, password=password, ssl=ssl) + conn = self._r.connect( + host=config.host, + port=config.port, + user=config.user, + password=config.password, + ssl={'ca_certs': config.tls_ca_cert} if config.tls_ca_cert is not None else None, + ) except rethinkdb.errors.ReqlDriverError as exc: raise CouldNotConnect(exc) diff --git a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py b/rethinkdb/datadog_checks/rethinkdb/check.py similarity index 95% rename from rethinkdb/datadog_checks/rethinkdb/rethinkdb.py rename to rethinkdb/datadog_checks/rethinkdb/check.py index ca5e1ae10bc7d..8b39c0d6c721e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/rethinkdb.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -1,10 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) - -# Required for `import rethinkdb` to correctly import the client package (instead of this package) on Python 2. -from __future__ import absolute_import - from contextlib import contextmanager from typing import Any, Callable, Iterator, List, cast diff --git a/rethinkdb/datadog_checks/rethinkdb/connections.py b/rethinkdb/datadog_checks/rethinkdb/connections.py index 41a2a40956e37..94a05c393b11e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/connections.py +++ b/rethinkdb/datadog_checks/rethinkdb/connections.py @@ -4,8 +4,6 @@ """ RethinkDB connection interface and implementations. """ -from __future__ import absolute_import - from typing import Any, TypedDict import rethinkdb diff --git a/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example index d4a623fbee20a..658f54ef38152 100644 --- a/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example +++ b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example @@ -14,6 +14,32 @@ init_config: instances: - + ## @param host - string - optional - default: localhost + ## Host of the RethinkDB server. + # + # host: localhost + + ## @param port - integer - optional - default: 28015 + ## Driver port of the RethinkDB server. + # + # port: 28015 + + ## @param username - string - optional + ## The user account to connect as. + # + # username: + + ## @param password - string - optional + ## The password for the user account to connect as. + # + # password: + + ## @param tls_ca_cert - string - optional + ## Path to a TLS client certificate to use when connecting to the RethinkDB server. + ## See also: https://rethinkdb.com/docs/security/#using-tls + # + # tls_ca_cert: + ## @param tags - list of strings - optional ## A list of tags to attach to every metric and service check emitted by this instance. ## @@ -42,29 +68,3 @@ instances: ## This is useful for cluster-level checks. # # empty_default_hostname: false - - ## @param host - string - optional - default: localhost - ## Host of the RethinkDB server. - # - # host: localhost - - ## @param port - integer - optional - default: 28015 - ## Driver port of the RethinkDB server. - # - # port: 28015 - - ## @param username - string - optional - ## The user account to connect as. - # - # username: - - ## @param password - string - optional - ## The password for the user account to connect as. - # - # password: - - ## @param tls_ca_cert - string - optional - ## Path to a TLS client certificate to use when connecting to the RethinkDB server. - ## See also: https://rethinkdb.com/docs/security/#using-tls - # - # tls_ca_cert: diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py index 68a53dece5068..05019826cf13a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py @@ -63,7 +63,8 @@ def collect_server_statistics(engine, conn): server_tags = server['tags'] query_engine = stats['query_engine'] - tags = ['server:{}'.format(name)] + server_tags + tags = ['server:{}'.format(name)] + tags.extend(server_tags) yield { 'type': 'gauge', @@ -180,7 +181,8 @@ def collect_replica_statistics(engine, conn): 'database:{}'.format(database), 'server:{}'.format(server_name), 'state:{}'.format(state), - ] + server_tags + ] + tags.extend(server_tags) yield { 'type': 'gauge', diff --git a/rethinkdb/datadog_checks/rethinkdb/queries.py b/rethinkdb/datadog_checks/rethinkdb/queries.py index 5a9ac317863a1..04dc92a42a237 100644 --- a/rethinkdb/datadog_checks/rethinkdb/queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/queries.py @@ -1,8 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from __future__ import absolute_import - from typing import Any, Iterator, Mapping, Tuple import rethinkdb diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 17e0ec493a0f9..ba9354625cef3 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -7,12 +7,11 @@ import pytest from datadog_checks.base.stubs.aggregator import AggregatorStub -from datadog_checks.utils.common import get_docker_hostname +from datadog_checks.dev import get_docker_hostname, get_here from .types import ServerName -HERE = os.path.dirname(os.path.abspath(__file__)) -ROOT = os.path.dirname(os.path.dirname(HERE)) +HERE = get_here() CHECK_NAME = 'rethinkdb' diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 74488158f452a..a679e60d04c33 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -36,9 +36,8 @@ def instance(): @pytest.fixture(scope='session') def dd_environment(instance): # type: (Instance) -> Iterator - log_patterns = [r'Server ready, "{}".*'.format(BOOTSTRAP_SERVER)] - log_patterns += [r'Connected to server "{}".*'.format(server) for server in SERVERS - {BOOTSTRAP_SERVER}] - log_patterns += [r'Connected to proxy.*'] + log_patterns = [r'Server ready, "{}".*'.format(BOOTSTRAP_SERVER), r'Connected to proxy.*'] + log_patterns.extend(r'Connected to server "{}".*'.format(server) for server in SERVERS - {BOOTSTRAP_SERVER}) wait_servers_ready = CheckDockerLogs(COMPOSE_FILE, patterns=log_patterns, matches='all') conditions = [wait_servers_ready, setup_cluster] From b4a1c928f96106363f5bd4e01c725693f6596d56 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Mon, 16 Mar 2020 11:53:30 +0100 Subject: [PATCH 102/147] Tag current issues metrics by `issue_type` --- .../rethinkdb/metrics/current_issues.py | 22 +--- rethinkdb/datadog_checks/rethinkdb/queries.py | 13 +-- rethinkdb/datadog_checks/rethinkdb/types.py | 8 +- rethinkdb/metadata.csv | 104 ++++++++---------- rethinkdb/tests/assertions.py | 15 +-- rethinkdb/tests/common.py | 55 ++------- 6 files changed, 72 insertions(+), 145 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py index ec37eba1f2095..8d1cb9ae05658 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py @@ -23,32 +23,18 @@ def collect_current_issues(engine, conn): totals = engine.query_current_issues_totals(conn) logger.debug('current_issues totals=%r', totals) - yield { - 'type': 'gauge', - 'name': 'rethinkdb.current_issues.total', - 'value': totals['issues'], - 'tags': [], - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.current_issues.critical.total', - 'value': totals['critical_issues'], - 'tags': [], - } - for issue_type, total in totals['issues_by_type'].items(): yield { 'type': 'gauge', - 'name': 'rethinkdb.current_issues.{issue_type}.total'.format(issue_type=issue_type), + 'name': 'rethinkdb.current_issues.total', 'value': total, - 'tags': [], + 'tags': ['issue_type:{}'.format(issue_type)], } for issue_type, total in totals['critical_issues_by_type'].items(): yield { 'type': 'gauge', - 'name': 'rethinkdb.current_issues.{issue_type}.critical.total'.format(issue_type=issue_type), + 'name': 'rethinkdb.current_issues.critical.total', 'value': total, - 'tags': [], + 'tags': ['issue_type:{}'.format(issue_type)], } diff --git a/rethinkdb/datadog_checks/rethinkdb/queries.py b/rethinkdb/datadog_checks/rethinkdb/queries.py index 04dc92a42a237..8b8cfa9b98243 100644 --- a/rethinkdb/datadog_checks/rethinkdb/queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/queries.py @@ -219,19 +219,16 @@ def query_current_issues_totals(self, conn): r = self._r current_issues = r.db('rethinkdb').table('current_issues').pluck('type', 'critical') - critical_current_issues = current_issues.filter(r.row['critical']) # NOTE: Need to `.run()` these separately because ReQL does not support putting grouped data in raw # expressions yet. See: https://github.com/rethinkdb/rethinkdb/issues/2067 issues_by_type = conn.run(current_issues.group('type').count()) # type: Mapping[str, int] - critical_issues_by_type = conn.run(critical_current_issues.group('type').count()) # type: Mapping[str, int] + critical_issues_by_type = conn.run( + current_issues.filter(r.row['critical']).group('type').count() + ) # type: Mapping[str, int] - totals = { - 'issues': current_issues.count(), - 'critical_issues': critical_current_issues.count(), + return { 'issues_by_type': issues_by_type, 'critical_issues_by_type': critical_issues_by_type, - } # type: CurrentIssuesTotals # Enforce keys to match. - - return conn.run(r.expr(totals)) + } diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index 5f72e95cdf994..aa6b1b3026d2d 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -176,13 +176,7 @@ # System current issues. CurrentIssuesTotals = TypedDict( - 'CurrentIssuesTotals', - { - 'issues': int, - 'critical_issues': int, - 'issues_by_type': Mapping[str, int], - 'critical_issues_by_type': Mapping[str, int], - }, + 'CurrentIssuesTotals', {'issues_by_type': Mapping[str, int], 'critical_issues_by_type': Mapping[str, int]}, ) diff --git a/rethinkdb/metadata.csv b/rethinkdb/metadata.csv index 2eeca80ba8894..011659d32d496 100644 --- a/rethinkdb/metadata.csv +++ b/rethinkdb/metadata.csv @@ -1,60 +1,44 @@ -metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name -rethinkdb.server.total,gauge,,node,,Number of connected servers in the cluster.,0,rethinkdb,Server total -rethinkdb.database.total,gauge,,,,Number of databases in the cluster.,0,rethinkdb,Database total -rethinkdb.database.table.total,gauge,,table,,Number of tables in a given database.,0,rethinkdb,Database table total -rethinkdb.table.secondary_index.total,gauge,,index,,Number of secondary indexes in a given table.,0,rethinkdb,Table secondary index total -rethinkdb.stats.cluster.queries_per_sec,gauge,,query,,Number of queries executed in a cluster per second.,0,rethinkdb,Stats cluster queries per sec -rethinkdb.stats.cluster.read_docs_per_sec,gauge,,document,,Number of documents read in a cluster per second.,0,rethinkdb,Stats cluster read docs per sec -rethinkdb.stats.cluster.written_docs_per_sec,gauge,,document,,Number of documents written in a cluster per second.,0,rethinkdb,Stats cluster written docs per sec -rethinkdb.stats.server.queries_per_sec,gauge,,query,,Number of queries executed on a server per second.,0,rethinkdb,Stats server queries per sec -rethinkdb.stats.server.queries_total,count,,query,,Total number of queries executed on a server.,0,rethinkdb,Stats server queries total -rethinkdb.stats.server.read_docs_per_sec,gauge,,document,,Number of documents read from a server per second.,0,rethinkdb,Stats server read docs per sec -rethinkdb.stats.server.read_docs_total,count,,document,,Total number of documents read from a server.,0,rethinkdb,Stats server read docs total -rethinkdb.stats.server.written_docs_per_sec,gauge,,document,,Number of documents written to a server per second.,0,rethinkdb,Stats server written docs per sec -rethinkdb.stats.server.written_docs_total,count,,document,,Total number of documents written to a server.,0,rethinkdb,Stats server written docs total -rethinkdb.stats.server.client_connections,gauge,,connection,,Current number of client connections to a server.,0,rethinkdb,Stats server client connections -rethinkdb.stats.server.clients_active,gauge,,host,,Current number of clients actively connected to a server.,0,rethinkdb,Stats server clients active -rethinkdb.stats.table.read_docs_per_sec,gauge,,document,,Number of documents read from a table per second.,0,rethinkdb,Stats table read docs per sec -rethinkdb.stats.table.written_docs_per_sec,gauge,,document,,Number of documents written to a table per second.,0,rethinkdb,Stats table written docs per sec -rethinkdb.stats.table_server.read_docs_per_sec,gauge,,document,,Number of documents read from a replica per second.,0,rethinkdb,Stats table server read docs per sec -rethinkdb.stats.table_server.read_docs_total,count,,document,,Total number of documents read from a replica.,0,rethinkdb,Stats table server read docs total -rethinkdb.stats.table_server.written_docs_per_sec,gauge,,document,,Number of documents written to a replica per second.,0,rethinkdb,Stats table server written docs per sec -rethinkdb.stats.table_server.written_docs_total,count,,document,,Total number of documents written to a replica.,0,rethinkdb,Stats table server written docs total -rethinkdb.stats.table_server.cache.in_use_bytes,gauge,,byte,,Current amount of memory used by the cache on a replica.,0,rethinkdb,Stats table server cache in use bytes -rethinkdb.stats.table_server.disk.read_bytes_per_sec,gauge,,byte,,Number of bytes read from the disk of a replica per second.,0,rethinkdb,Stats table server disk read bytes per sec -rethinkdb.stats.table_server.disk.read_bytes_total,count,,byte,,Total number of bytes read from the disk of a replica.,0,rethinkdb,Stats table server disk read bytes total -rethinkdb.stats.table_server.disk.written_bytes_per_sec,gauge,,byte,,Number of bytes written to the disk of a replica per second.,0,rethinkdb,Stats table server disk written bytes per sec -rethinkdb.stats.table_server.disk.written_bytes_total,count,,byte,,Total number of bytes written to the disk of a replica.,0,rethinkdb,Stats table server disk written bytes total -rethinkdb.stats.table_server.disk.metadata_bytes,gauge,,byte,,Current disk space used by metadata on a replica.,0,rethinkdb,Stats table server disk metadata bytes -rethinkdb.stats.table_server.disk.data_bytes,gauge,,byte,,Current disk space used by data on a replica.,0,rethinkdb,Stats table server disk data bytes -rethinkdb.stats.table_server.disk.garbage_bytes,gauge,,byte,,Current disk space used by the garbage collector on a replica.,0,rethinkdb,Stats table server disk garbage bytes -rethinkdb.stats.table_server.disk.preallocated_bytes,gauge,,byte,,Current disk space preallocated on a replica.,0,rethinkdb,Stats table server disk preallocated bytes -rethinkdb.table_status.shards.total,gauge,,shard,,Total number of shards for a table.,0,rethinkdb,Table status shards total -rethinkdb.table_status.shards.replicas.total,gauge,,node,,Total number of replicas for a table shard.,0,rethinkdb,Table status shards replicas total -rethinkdb.table_status.shards.replicas.primary.total,gauge,,node,,Total number of primary replicas for a table shard.,0,rethinkdb,Table status shards replicas primary total -rethinkdb.server_status.network.time_connected,gauge,,second,,Current total time a server has been connected to the network.,0,rethinkdb,Server status network time connected -rethinkdb.server_status.network.connected_to.total,gauge,,node,,Number of other RethinkDB servers a server is currently connected to.,0,rethinkdb,Server status network connected to total -rethinkdb.server_status.network.connected_to.pending.total,gauge,,node,,Number of other RethinkDB servers a server knows about but is not currently connected to.,0,rethinkdb,Server status network connected to pending total -rethinkdb.server_status.process.time_started,gauge,,second,,Time when the RethinkDB server process started.,0,rethinkdb,Server status process time started -rethinkdb.jobs.index_construction.duration,gauge,,second,,Duration of a task that constructs secondary indexes in the background.,0,rethinkdb,Jobs index construction duration -rethinkdb.jobs.index_construction.progress,gauge,,percent,,Progress of a task that constructs secondary indexes in the background.,0,rethinkdb,Jobs index construction progress -rethinkdb.jobs.backfill.duration,gauge,,second,,Duration of a task that brings out of date shards up to date (known as backfilling).,0,rethinkdb,Jobs backfill duration -rethinkdb.jobs.backfill.progress,gauge,,percent,,Progress of a task that brings out of date shards up to date (known as backfilling).,0,rethinkdb,Jobs backfill progress -rethinkdb.current_issues.total,gauge,,,,Total number of current issues.,0,rethinkdb,Current issues total -rethinkdb.current_issues.log_write_error.total,gauge,,,,Total number of issues reporting that RethinkDB has failed to write to its log file.,0,rethinkdb,Current issues log write error total -rethinkdb.current_issues.server_name_collision.total,gauge,,,,Total number of issues reporting that multiple servers have been assigned the same name.,0,rethinkdb,Current issues server name collision total -rethinkdb.current_issues.db_name_collision.total,gauge,,,,Total number of issues reporting that multiple databases have been assigned the same name.,0,rethinkdb,Current issues db name collision total -rethinkdb.current_issues.table_name_collision.total,gauge,,,,Total number of issues reporting that multiple tables in the same database have been assigned the same name.,0,rethinkdb,Current issues table name collision total -rethinkdb.current_issues.outdated_index.total,gauge,,,,Total number of issues reporting that indexes built with an older version of RethinkDB needs to be rebuilt due to changes in the way ReQL handles indexing.,0,rethinkdb,Current issues outdated index total -rethinkdb.current_issues.table_availability.total,gauge,,,,Total number of issues reporting that a table on the cluster is missing at least one replica.,0,rethinkdb,Current issues table availability total -rethinkdb.current_issues.memory_error.total,gauge,,,,Total number of issues reporting that a page fault has occurred on a RethinkDB server and swap space is being used.,0,rethinkdb,Current issues memory error total -rethinkdb.current_issues.non_transitive_error.total,gauge,,,,"Total number of issues reporting that there are currently servers that cannot see every server in the cluster, which may cause table availability issues.",0,rethinkdb,Current issues non transitive error total -rethinkdb.current_issues.critical.total,gauge,,,,Total number of critical current issues.,0,rethinkdb,Current issues critical total -rethinkdb.current_issues.log_write_error.critical.total,gauge,,,,Total number of critical issues reporting that RethinkDB has failed to write to its log file.,0,rethinkdb,Current issues log write error critical total -rethinkdb.current_issues.server_name_collision.critical.total,gauge,,,,Total number of critical issues reporting that multiple servers have been assigned the same name.,0,rethinkdb,Current issues server name collision critical total -rethinkdb.current_issues.db_name_collision.critical.total,gauge,,,,Total number of critical issues reporting that multiple databases have been assigned the same name.,0,rethinkdb,Current issues db name collision critical total -rethinkdb.current_issues.table_name_collision.critical.total,gauge,,,,Total number of critical issues reporting that multiple tables in the same database have been assigned the same name.,0,rethinkdb,Current issues table name collision critical total -rethinkdb.current_issues.outdated_index.critical.total,gauge,,,,Total number of critical issues reporting that indexes built with an older version of RethinkDB needs to be rebuilt due to changes in the way ReQL handles indexing.,0,rethinkdb,Current issues outdated index critical total -rethinkdb.current_issues.table_availability.critical.total,gauge,,,,Total number of critical issues reporting that a table on the cluster is missing at least one replica.,0,rethinkdb,Current issues table availability critical total -rethinkdb.current_issues.memory_error.critical.total,gauge,,,,Total number of critical issues reporting that a page fault has occurred on a RethinkDB server and swap space is being used.,0,rethinkdb,Current issues memory error critical total -rethinkdb.current_issues.non_transitive_error.critical.total,gauge,,,,"Total number of critical issues reporting that there are currently servers that cannot see every server in the cluster, which may cause table availability issues.",0,rethinkdb,Current issues non transitive error critical total +metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name +rethinkdb.server.total,gauge,,node,,Number of connected servers in the cluster.,0,rethinkdb,Server total +rethinkdb.database.total,gauge,,,,Number of databases in the cluster.,0,rethinkdb,Database total +rethinkdb.database.table.total,gauge,,table,,Number of tables in a given database.,0,rethinkdb,Database table total +rethinkdb.table.secondary_index.total,gauge,,index,,Number of secondary indexes in a given table.,0,rethinkdb,Table secondary index total +rethinkdb.stats.cluster.queries_per_sec,gauge,,query,,Number of queries executed in a cluster per second.,0,rethinkdb,Stats cluster queries per sec +rethinkdb.stats.cluster.read_docs_per_sec,gauge,,document,,Number of documents read in a cluster per second.,0,rethinkdb,Stats cluster read docs per sec +rethinkdb.stats.cluster.written_docs_per_sec,gauge,,document,,Number of documents written in a cluster per second.,0,rethinkdb,Stats cluster written docs per sec +rethinkdb.stats.server.queries_per_sec,gauge,,query,,Number of queries executed on a server per second.,0,rethinkdb,Stats server queries per sec +rethinkdb.stats.server.queries_total,count,,query,,Total number of queries executed on a server.,0,rethinkdb,Stats server queries total +rethinkdb.stats.server.read_docs_per_sec,gauge,,document,,Number of documents read from a server per second.,0,rethinkdb,Stats server read docs per sec +rethinkdb.stats.server.read_docs_total,count,,document,,Total number of documents read from a server.,0,rethinkdb,Stats server read docs total +rethinkdb.stats.server.written_docs_per_sec,gauge,,document,,Number of documents written to a server per second.,0,rethinkdb,Stats server written docs per sec +rethinkdb.stats.server.written_docs_total,count,,document,,Total number of documents written to a server.,0,rethinkdb,Stats server written docs total +rethinkdb.stats.server.client_connections,gauge,,connection,,Current number of client connections to a server.,0,rethinkdb,Stats server client connections +rethinkdb.stats.server.clients_active,gauge,,host,,Current number of clients actively connected to a server.,0,rethinkdb,Stats server clients active +rethinkdb.stats.table.read_docs_per_sec,gauge,,document,,Number of documents read from a table per second.,0,rethinkdb,Stats table read docs per sec +rethinkdb.stats.table.written_docs_per_sec,gauge,,document,,Number of documents written to a table per second.,0,rethinkdb,Stats table written docs per sec +rethinkdb.stats.table_server.read_docs_per_sec,gauge,,document,,Number of documents read from a replica per second.,0,rethinkdb,Stats table server read docs per sec +rethinkdb.stats.table_server.read_docs_total,count,,document,,Total number of documents read from a replica.,0,rethinkdb,Stats table server read docs total +rethinkdb.stats.table_server.written_docs_per_sec,gauge,,document,,Number of documents written to a replica per second.,0,rethinkdb,Stats table server written docs per sec +rethinkdb.stats.table_server.written_docs_total,count,,document,,Total number of documents written to a replica.,0,rethinkdb,Stats table server written docs total +rethinkdb.stats.table_server.cache.in_use_bytes,gauge,,byte,,Current amount of memory used by the cache on a replica.,0,rethinkdb,Stats table server cache in use bytes +rethinkdb.stats.table_server.disk.read_bytes_per_sec,gauge,,byte,,Number of bytes read from the disk of a replica per second.,0,rethinkdb,Stats table server disk read bytes per sec +rethinkdb.stats.table_server.disk.read_bytes_total,count,,byte,,Total number of bytes read from the disk of a replica.,0,rethinkdb,Stats table server disk read bytes total +rethinkdb.stats.table_server.disk.written_bytes_per_sec,gauge,,byte,,Number of bytes written to the disk of a replica per second.,0,rethinkdb,Stats table server disk written bytes per sec +rethinkdb.stats.table_server.disk.written_bytes_total,count,,byte,,Total number of bytes written to the disk of a replica.,0,rethinkdb,Stats table server disk written bytes total +rethinkdb.stats.table_server.disk.metadata_bytes,gauge,,byte,,Current disk space used by metadata on a replica.,0,rethinkdb,Stats table server disk metadata bytes +rethinkdb.stats.table_server.disk.data_bytes,gauge,,byte,,Current disk space used by data on a replica.,0,rethinkdb,Stats table server disk data bytes +rethinkdb.stats.table_server.disk.garbage_bytes,gauge,,byte,,Current disk space used by the garbage collector on a replica.,0,rethinkdb,Stats table server disk garbage bytes +rethinkdb.stats.table_server.disk.preallocated_bytes,gauge,,byte,,Current disk space preallocated on a replica.,0,rethinkdb,Stats table server disk preallocated bytes +rethinkdb.table_status.shards.total,gauge,,shard,,Total number of shards for a table.,0,rethinkdb,Table status shards total +rethinkdb.table_status.shards.replicas.total,gauge,,node,,Total number of replicas for a table shard.,0,rethinkdb,Table status shards replicas total +rethinkdb.table_status.shards.replicas.primary.total,gauge,,node,,Total number of primary replicas for a table shard.,0,rethinkdb,Table status shards replicas primary total +rethinkdb.server_status.network.time_connected,gauge,,second,,Current total time a server has been connected to the network.,0,rethinkdb,Server status network time connected +rethinkdb.server_status.network.connected_to.total,gauge,,node,,Number of other RethinkDB servers a server is currently connected to.,0,rethinkdb,Server status network connected to total +rethinkdb.server_status.network.connected_to.pending.total,gauge,,node,,Number of other RethinkDB servers a server knows about but is not currently connected to.,0,rethinkdb,Server status network connected to pending total +rethinkdb.server_status.process.time_started,gauge,,second,,Time when the RethinkDB server process started.,0,rethinkdb,Server status process time started +rethinkdb.jobs.index_construction.duration,gauge,,second,,Duration of a task that constructs secondary indexes in the background.,0,rethinkdb,Jobs index construction duration +rethinkdb.jobs.index_construction.progress,gauge,,percent,,Progress of a task that constructs secondary indexes in the background.,0,rethinkdb,Jobs index construction progress +rethinkdb.jobs.backfill.duration,gauge,,second,,Duration of a task that brings out of date shards up to date (known as backfilling).,0,rethinkdb,Jobs backfill duration +rethinkdb.jobs.backfill.progress,gauge,,percent,,Progress of a task that brings out of date shards up to date (known as backfilling).,0,rethinkdb,Jobs backfill progress +rethinkdb.current_issues.total,gauge,,,,Total number of current issues of a given issue_type.,0,rethinkdb,Current issues total +rethinkdb.current_issues.critical.total,gauge,,,,Total number of critical current issues of a given issue_type.,0,rethinkdb,Current issues critical total diff --git a/rethinkdb/tests/assertions.py b/rethinkdb/tests/assertions.py index f521d4026e955..c980d9bbaecce 100644 --- a/rethinkdb/tests/assertions.py +++ b/rethinkdb/tests/assertions.py @@ -8,9 +8,8 @@ from .common import ( CLUSTER_STATISTICS_METRICS, CONFIG_TOTALS_METRICS, + CURRENT_ISSUE_TYPES_SUBMITTED_IF_DISCONNECTED_SERVERS, CURRENT_ISSUES_METRICS, - CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS, - CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS, DATABASE, HEROES_TABLE, HEROES_TABLE_PRIMARY_REPLICA, @@ -110,11 +109,9 @@ def _assert_server_status_metrics(aggregator, disconnected_servers): def _assert_current_issues_metrics(aggregator, disconnected_servers): # type: (AggregatorStub, Set[ServerName]) -> None for metric, typ in CURRENT_ISSUES_METRICS: - if metric in CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS: - count = 1 - elif disconnected_servers and metric in CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS: - count = 1 + if disconnected_servers: + for issue_type in CURRENT_ISSUE_TYPES_SUBMITTED_IF_DISCONNECTED_SERVERS: + tags = ['issue_type:{}'.format(issue_type)] + aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags) else: - count = 0 - - aggregator.assert_metric(metric, metric_type=typ, count=count, tags=[]) + aggregator.assert_metric(metric, metric_type=typ, count=0) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index ba9354625cef3..037f7508b6c00 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -83,6 +83,7 @@ HEROES_TABLE_INDEX_FIELD = 'appearances_count' # Metrics lists. +# NOTE: jobs metrics are not listed here as they're hard to trigger, so they're covered by unit tests instead. CONFIG_TOTALS_METRICS = ( ( @@ -161,56 +162,24 @@ ('rethinkdb.server_status.process.time_started', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] -# NOTE: jobs metrics are not listed here as they are covered by unit tests instead of integration tests. - CURRENT_ISSUES_METRICS = ( ('rethinkdb.current_issues.total', AggregatorStub.GAUGE), ('rethinkdb.current_issues.critical.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.log_write_error.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.log_write_error.critical.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.server_name_collision.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.server_name_collision.critical.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.db_name_collision.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.db_name_collision.critical.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.table_name_collision.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.table_name_collision.critical.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.outdated_index.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.outdated_index.critical.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.table_availability.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.table_availability.critical.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.memory_error.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.memory_error.critical.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.non_transitive_error.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.non_transitive_error.critical.total', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] -CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS = ( - 'rethinkdb.current_issues.total', - 'rethinkdb.current_issues.critical.total', -) - -CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS = ( - 'rethinkdb.current_issues.table_availability.total', - 'rethinkdb.current_issues.table_availability.critical.total', -) - -assert set(name for name, typ in CURRENT_ISSUES_METRICS).issuperset(CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS) -assert set(name for name, typ in CURRENT_ISSUES_METRICS).issuperset( - CURRENT_ISSUES_METRICS_SUBMITTED_IF_DISCONNECTED_SERVERS -) - +CURRENT_ISSUE_TYPES_SUBMITTED_IF_DISCONNECTED_SERVERS = ['table_availability'] -E2E_METRICS = [] # type: List[Tuple[str, int]] -E2E_METRICS += [(name, typ) for name, typ, _, _ in CONFIG_TOTALS_METRICS] -E2E_METRICS += CLUSTER_STATISTICS_METRICS -E2E_METRICS += SERVER_STATISTICS_METRICS -E2E_METRICS += TABLE_STATISTICS_METRICS -E2E_METRICS += REPLICA_STATISTICS_METRICS -E2E_METRICS += TABLE_STATUS_METRICS -E2E_METRICS += TABLE_STATUS_SHARDS_METRICS -E2E_METRICS += SERVER_STATUS_METRICS -E2E_METRICS += [(name, typ) for name, typ in CURRENT_ISSUES_METRICS if name in CURRENT_ISSUES_METRICS_SUBMITTED_ALWAYS] +E2E_METRICS = ( + tuple((name, typ) for name, typ, _, _ in CONFIG_TOTALS_METRICS) + + CLUSTER_STATISTICS_METRICS + + SERVER_STATISTICS_METRICS + + TABLE_STATISTICS_METRICS + + REPLICA_STATISTICS_METRICS + + TABLE_STATUS_METRICS + + TABLE_STATUS_SHARDS_METRICS + + SERVER_STATUS_METRICS +) # type: Tuple[Tuple[str, int], ...] # Docker Compose configuration. From 8701bca18c6cbdcd9c465c42818681f6cccbf652 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 17 Mar 2020 15:27:25 +0100 Subject: [PATCH 103/147] Add support for custom tags --- rethinkdb/datadog_checks/rethinkdb/check.py | 18 ++++++++----- rethinkdb/datadog_checks/rethinkdb/config.py | 18 ++++++++++--- rethinkdb/datadog_checks/rethinkdb/types.py | 4 ++- rethinkdb/tests/assertions.py | 27 ++++++++++---------- rethinkdb/tests/common.py | 1 + rethinkdb/tests/conftest.py | 2 ++ rethinkdb/tests/test_rethinkdb.py | 17 ++++++------ rethinkdb/tests/unit/test_config.py | 5 +++- 8 files changed, 59 insertions(+), 33 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 8b39c0d6c721e..85dd3df2bd574 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -31,6 +31,7 @@ def __init__(self, *args, **kwargs): def connect_submitting_service_checks(self): # type: () -> Iterator[Connection] tags = [] # type: List[str] + tags.extend(self.config.tags) try: with self.backend.connect(self.config) as conn: @@ -63,14 +64,19 @@ def connect_submitting_service_checks(self): def submit_metric(self, metric): # type: (Metric) -> None - self.log.debug('submit_metric metric=%r', metric) + typ = metric['type'] + name = metric['name'] + value = metric['value'] + tags = self.config.tags + metric['tags'] - if metric['type'] == 'service_check': - value = cast(ServiceCheckStatus, metric['value']) - self.service_check(metric['name'], value, tags=metric['tags']) + self.log.debug('submit_metric type=%r name=%r value=%r tags=%r', typ, name, value, tags) + + if typ == 'service_check': + value = cast(ServiceCheckStatus, value) + self.service_check(name, value, tags=tags) else: - submit = getattr(self, metric['type']) # type: Callable - submit(metric['name'], value=metric['value'], tags=metric['tags']) + submit = getattr(self, typ) # type: Callable + submit(name, value, tags=tags) def submit_version_metadata(self, conn): # type: (Connection) -> None diff --git a/rethinkdb/datadog_checks/rethinkdb/config.py b/rethinkdb/datadog_checks/rethinkdb/config.py index c391066dac992..1b41cf26b61ee 100644 --- a/rethinkdb/datadog_checks/rethinkdb/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/config.py @@ -3,7 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from __future__ import absolute_import -from typing import Optional +from typing import List, Optional from datadog_checks.base import ConfigurationError @@ -24,6 +24,7 @@ def __init__(self, instance): user = instance.get('username') password = instance.get('password') tls_ca_cert = instance.get('tls_ca_cert') + tags = instance.get('tags', []) if not isinstance(host, str): raise ConfigurationError('host must be a string (got {!r})'.format(type(host))) @@ -39,6 +40,7 @@ def __init__(self, instance): self.user = user # type: Optional[str] self.password = password # type: Optional[str] self.tls_ca_cert = tls_ca_cert # type: Optional[str] + self.tags = tags # type: List[str] def __repr__(self): # type: () -> str @@ -46,6 +48,14 @@ def __repr__(self): 'Config(host={host!r}, ' 'port={port!r}, ' 'user={user!r}, ' - "password='*****', " - 'tls_ca_cert={tls_ca_cert!r})' - ).format(host=self.host, port=self.port, user=self.user, tls_ca_cert=self.tls_ca_cert) + "password={password!r}, " + 'tls_ca_cert={tls_ca_cert!r}, ' + 'tags={tags!r})' + ).format( + host=self.host, + port=self.port, + user=self.user, + password='********' if self.password else '', + tls_ca_cert=self.tls_ca_cert, + tags=self.tags, + ) diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index aa6b1b3026d2d..3b1bb80d39f27 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -15,7 +15,9 @@ ) Instance = TypedDict( - 'Instance', {'host': str, 'port': int, 'username': str, 'password': str, 'tls_ca_cert': str}, total=False + 'Instance', + {'host': str, 'port': int, 'username': str, 'password': str, 'tls_ca_cert': str, 'tags': List[str]}, + total=False, ) diff --git a/rethinkdb/tests/assertions.py b/rethinkdb/tests/assertions.py index c980d9bbaecce..5be034ea15581 100644 --- a/rethinkdb/tests/assertions.py +++ b/rethinkdb/tests/assertions.py @@ -23,6 +23,7 @@ TABLE_STATISTICS_METRICS, TABLE_STATUS_METRICS, TABLE_STATUS_SHARDS_METRICS, + TAGS, ) from .types import ServerName @@ -48,30 +49,30 @@ def _assert_config_totals_metrics(aggregator, disconnected_servers): for metric, typ, value, tags in CONFIG_TOTALS_METRICS: if callable(value): value = value(disconnected_servers) - aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags, value=value) + aggregator.assert_metric(metric, metric_type=typ, count=1, tags=TAGS + tags, value=value) def _assert_statistics_metrics(aggregator, disconnected_servers): # type: (AggregatorStub, Set[ServerName]) -> None for metric, typ in CLUSTER_STATISTICS_METRICS: - aggregator.assert_metric(metric, metric_type=typ, count=1, tags=[]) + aggregator.assert_metric(metric, metric_type=typ, count=1, tags=TAGS) for server in SERVERS: - tags = ['server:{}'.format(server)] + SERVER_TAGS[server] + tags = TAGS + ['server:{}'.format(server)] + SERVER_TAGS[server] for metric, typ in SERVER_STATISTICS_METRICS: count = 0 if server in disconnected_servers else 1 aggregator.assert_metric(metric, metric_type=typ, count=count, tags=tags) for metric, typ in TABLE_STATISTICS_METRICS: - tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] + tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags) for server in HEROES_TABLE_SERVERS: - tags = [ - 'table:{}'.format(HEROES_TABLE), - 'database:{}'.format(DATABASE), - 'server:{}'.format(server), - ] + SERVER_TAGS[server] + tags = ( + TAGS + + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'server:{}'.format(server)] + + SERVER_TAGS[server] + ) for metric, typ in REPLICA_STATISTICS_METRICS: if server in disconnected_servers: @@ -87,11 +88,11 @@ def _assert_statistics_metrics(aggregator, disconnected_servers): def _assert_table_status_metrics(aggregator): # type: (AggregatorStub) -> None for metric, typ in TABLE_STATUS_METRICS: - tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] + tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags) for shard in HEROES_TABLE_REPLICAS_BY_SHARD: - tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'shard:{}'.format(shard)] + tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE), 'shard:{}'.format(shard)] for metric, typ in TABLE_STATUS_SHARDS_METRICS: aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags) @@ -101,7 +102,7 @@ def _assert_server_status_metrics(aggregator, disconnected_servers): # type: (AggregatorStub, Set[ServerName]) -> None for metric, typ in SERVER_STATUS_METRICS: for server in SERVERS: - tags = ['server:{}'.format(server)] + tags = TAGS + ['server:{}'.format(server)] count = 0 if server in disconnected_servers else 1 aggregator.assert_metric(metric, metric_type=typ, count=count, tags=tags) @@ -111,7 +112,7 @@ def _assert_current_issues_metrics(aggregator, disconnected_servers): for metric, typ in CURRENT_ISSUES_METRICS: if disconnected_servers: for issue_type in CURRENT_ISSUE_TYPES_SUBMITTED_IF_DISCONNECTED_SERVERS: - tags = ['issue_type:{}'.format(issue_type)] + tags = TAGS + ['issue_type:{}'.format(issue_type)] aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags) else: aggregator.assert_metric(metric, metric_type=typ, count=0) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 037f7508b6c00..ef7b2aea480c0 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -20,6 +20,7 @@ HOST = get_docker_hostname() +TAGS = ['env:testing'] # Servers. # NOTE: server information is tightly coupled to the Docker Compose setup. diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index a679e60d04c33..60301b0117224 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -19,6 +19,7 @@ HOST, SERVER_PORTS, SERVERS, + TAGS, ) @@ -30,6 +31,7 @@ def instance(): 'port': SERVER_PORTS['server0'], 'username': AGENT_USER, 'password': AGENT_PASSWORD, + 'tags': TAGS, } diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 975ff5fed9ded..ced9318344361 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -25,6 +25,7 @@ RETHINKDB_VERSION, SERVER_PORTS, TABLE_STATUS_SERVICE_CHECKS, + TAGS, TLS_CLIENT_CERT, TLS_SERVER, ) @@ -51,11 +52,11 @@ def test_check(aggregator, instance): assert_metrics(aggregator) aggregator.assert_all_metrics_covered() - service_check_tags = _get_connect_service_check_tags() + service_check_tags = TAGS + _get_connect_service_check_tags() aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) for service_check in TABLE_STATUS_SERVICE_CHECKS: - tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] + tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] aggregator.assert_service_check(service_check, RethinkDBCheck.OK, count=1, tags=tags) @@ -81,7 +82,7 @@ def test_check_without_credentials_uses_admin(aggregator, instance): assert_metrics(aggregator) aggregator.assert_all_metrics_covered() - service_check_tags = _get_connect_service_check_tags() + service_check_tags = TAGS + _get_connect_service_check_tags() aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) @@ -101,7 +102,7 @@ def test_check_connect_to_server_with_tls(aggregator, instance): assert_metrics(aggregator) aggregator.assert_all_metrics_covered() - service_check_tags = _get_connect_service_check_tags(server=server) + service_check_tags = TAGS + _get_connect_service_check_tags(server=server) aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) @@ -124,10 +125,10 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): assert_metrics(aggregator, disconnected_servers=disconnected_servers) aggregator.assert_all_metrics_covered() - service_check_tags = _get_connect_service_check_tags() + service_check_tags = TAGS + _get_connect_service_check_tags() aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) - table_status_tags = ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] + table_status_tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] aggregator.assert_service_check( 'rethinkdb.table_status.ready_for_outdated_reads', RethinkDBCheck.OK, count=1, tags=table_status_tags @@ -155,7 +156,7 @@ def test_cannot_connect_unknown_host(aggregator, instance): with pytest.raises(CouldNotConnect): check.check(instance) - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=[]) + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=TAGS) @pytest.mark.integration @@ -177,7 +178,7 @@ def collect_metrics(self, conn): with pytest.raises(Failure): check.check(instance) - service_check_tags = _get_connect_service_check_tags() + service_check_tags = TAGS + _get_connect_service_check_tags() aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=service_check_tags) diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index 765d7eff339b6..55d8d2bebdd31 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -20,6 +20,7 @@ def test_default_config(): assert config.port == 28015 assert config.user is None assert config.tls_ca_cert is None + assert config.tags == [] def test_config(): @@ -30,19 +31,21 @@ def test_config(): 'username': 'datadog-agent', 'password': 's3kr3t', 'tls_ca_cert': '/path/to/client.cert', + 'tags': ['env:testing'], } # type: Instance config = Config(instance) assert config.host == '192.168.121.1' assert config.port == 28016 assert config.user == 'datadog-agent' assert config.tls_ca_cert == '/path/to/client.cert' + assert config.tags == ['env:testing'] def test_config_repr(): # type: () -> None instance = {} # type: Instance config = Config(instance) - assert repr(config) == "Config(host='localhost', port=28015, user=None, password='*****', tls_ca_cert=None)" + assert repr(config) == "Config(host='localhost', port=28015, user=None, password='', tls_ca_cert=None, tags=[])" @pytest.mark.parametrize('host', [42, True, object()]) From 865420f1271cf56368d74f53be980f6c288ac11f Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 17 Mar 2020 16:21:34 +0100 Subject: [PATCH 104/147] Derive image and version from env --- rethinkdb/tests/common.py | 4 ++-- rethinkdb/tests/test_rethinkdb.py | 4 ++-- rethinkdb/tox.ini | 5 ++++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index ef7b2aea480c0..ab2477219b331 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -15,8 +15,8 @@ CHECK_NAME = 'rethinkdb' -IMAGE = 'rethinkdb:2.4.0' -RETHINKDB_VERSION = '2.4.0~0bionic' +IMAGE = os.environ.get('RETHINKDB_IMAGE', '') +RAW_VERSION = os.environ.get('RETHINKDB_RAW_VERSION', '') HOST = get_docker_hostname() diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index ced9318344361..85d24531981ba 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -22,7 +22,7 @@ HEROES_TABLE_SERVERS, HOST, MALFORMED_VERSION_STRING_PARAMS, - RETHINKDB_VERSION, + RAW_VERSION, SERVER_PORTS, TABLE_STATUS_SERVICE_CHECKS, TAGS, @@ -193,7 +193,7 @@ def test_version_metadata(instance, datadog_agent): check.check(instance) - raw_version = RETHINKDB_VERSION + raw_version = RAW_VERSION version, _, build = raw_version.partition('~') major, minor, patch = version.split('.') version_metadata = { diff --git a/rethinkdb/tox.ini b/rethinkdb/tox.ini index 7f74f4d9053d0..2a7c91e576a8c 100644 --- a/rethinkdb/tox.ini +++ b/rethinkdb/tox.ini @@ -3,7 +3,7 @@ minversion = 2.0 skip_missing_interpreters = true basepython = py38 envlist = - py{27,38} + py{27,38}-{2.4} [testenv] dd_check_style = true @@ -22,3 +22,6 @@ passenv = commands = pip install -r requirements.in pytest -v {posargs} +setenv = + 2.4: RETHINKDB_IMAGE = rethinkdb:2.4.0 + 2.4: RETHINKDB_RAW_VERSION = 2.4.0~0bionic From 43b580f6d7b32bd40f8f5fa01c4af1ea40ba24fc Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 18 Mar 2020 09:56:34 +0100 Subject: [PATCH 105/147] Tweak coverage --- rethinkdb/tests/test_rethinkdb.py | 30 +++++++++++++++++++++++++++--- rethinkdb/tests/utils.py | 16 +--------------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 85d24531981ba..3ec9fe39cccf0 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -11,7 +11,7 @@ from datadog_checks.rethinkdb import RethinkDBCheck from datadog_checks.rethinkdb.backends import DefaultBackend from datadog_checks.rethinkdb.connections import Connection -from datadog_checks.rethinkdb.exceptions import CouldNotConnect +from datadog_checks.rethinkdb.exceptions import CouldNotConnect, VersionCollectionFailed from datadog_checks.rethinkdb.types import Instance, Metric from .assertions import assert_metrics @@ -184,7 +184,7 @@ def collect_metrics(self, conn): @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') -def test_version_metadata(instance, datadog_agent): +def test_mestadata_version(instance, datadog_agent): # type: (Instance, DatadogAgentStub) -> None check_id = 'test' @@ -209,7 +209,7 @@ def test_version_metadata(instance, datadog_agent): @pytest.mark.unit @pytest.mark.parametrize('malformed_version_string', MALFORMED_VERSION_STRING_PARAMS) -def test_version_metadata_failure(instance, aggregator, datadog_agent, malformed_version_string): +def test_metadata_version_malformed(instance, aggregator, datadog_agent, malformed_version_string): # type: (Instance, AggregatorStub, DatadogAgentStub, str) -> None """ Verify that check still runs to completion if version provided by RethinkDB is malformed. @@ -230,3 +230,27 @@ def collect_connected_server_version(self, conn): aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK) datadog_agent.assert_metadata(check_id, {}) + + +@pytest.mark.unit +def test_metadata_version_failure(instance, aggregator, datadog_agent): + # type: (Instance, AggregatorStub, DatadogAgentStub) -> None + """ + Verify that check still runs to completion if it fails to retrieve the RethinkDB version. + """ + + class MockBackend(DefaultBackend): + def collect_connected_server_version(self, conn): + # type: (Connection) -> str + raise VersionCollectionFailed('Oops!') + + check_id = 'test' + + check = RethinkDBCheck('rethinkdb', {}, [instance]) + check.check_id = check_id + check.backend = MockBackend() + + check.check(instance) + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK) + + datadog_agent.assert_metadata(check_id, {}) diff --git a/rethinkdb/tests/utils.py b/rethinkdb/tests/utils.py index 56dd6120edb34..16ea48f6487fb 100644 --- a/rethinkdb/tests/utils.py +++ b/rethinkdb/tests/utils.py @@ -3,7 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from typing import Any, Callable -from datadog_checks.rethinkdb.connections import Connection, ConnectionServer +from datadog_checks.rethinkdb.connections import Connection class MockConnection(Connection): @@ -11,20 +11,6 @@ def __init__(self, rows): # type: (Callable[[], Any]) -> None self.rows = rows - @property - def host(self): - # type: () -> str - return 'mock.local' - - @property - def port(self): - # type: () -> int - return 28015 - - def server(self): - # type: () -> ConnectionServer - return {'id': 'test', 'name': 'testserver', 'proxy': False} - def run(self, query): # type: (Any) -> Any return self.rows() From 0fb3f791c20b6020a191935e858654b8c6ecec2a Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 18 Mar 2020 11:01:21 +0100 Subject: [PATCH 106/147] Cleanup, address feedback, more motivation docs --- rethinkdb/datadog_checks/rethinkdb/backends.py | 17 ++++++++++++++--- rethinkdb/datadog_checks/rethinkdb/check.py | 8 ++++---- .../datadog_checks/rethinkdb/connections.py | 3 +++ rethinkdb/setup.py | 4 ++-- rethinkdb/tests/unit/test_metrics.py | 2 +- rethinkdb/tests/utils.py | 10 +++++++--- 6 files changed, 31 insertions(+), 13 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/backends.py b/rethinkdb/datadog_checks/rethinkdb/backends.py index 1a06790bdc4cd..6db8cab4a8f68 100644 --- a/rethinkdb/datadog_checks/rethinkdb/backends.py +++ b/rethinkdb/datadog_checks/rethinkdb/backends.py @@ -26,18 +26,30 @@ class Backend(object): """ Base interface for high-level operations performed during a RethinkDB check. + + Abstracts any interfaces specific to the `rethinkdb` client library or our default metrics collection strategy + to facilitate swapping for alternative implementations (e.g. for testing purposes). """ def connect(self, config): # type: (Config) -> Connection + """ + Establish a connection with the configured RethinkDB server. + """ raise NotImplementedError # pragma: no cover def collect_metrics(self, conn): # type: (Connection) -> Iterator[Metric] + """ + Collect metrics from the RethinkDB cluster we are connected to. + """ raise NotImplementedError # pragma: no cover def collect_connected_server_version(self, conn): # type: (Connection) -> str + """ + Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. + """ raise NotImplementedError # pragma: no cover @@ -60,6 +72,8 @@ class DefaultBackend(Backend): def __init__(self): # type: () -> None + # NOTE: the name 'r' may look off-putting at first, but it was chosen for consistency with the officially + # advertised ReQL usage. For example, see: https://rethinkdb.com/docs/guide/python/ self._r = rethinkdb.r self._query_engine = QueryEngine(r=self._r) @@ -86,8 +100,5 @@ def collect_metrics(self, conn): def collect_connected_server_version(self, conn): # type: (Connection) -> str - """ - Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. - """ version_string = self._query_engine.query_connected_server_version_string(conn) return parse_version(version_string) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 85dd3df2bd574..af5689f61a895 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -64,18 +64,18 @@ def connect_submitting_service_checks(self): def submit_metric(self, metric): # type: (Metric) -> None - typ = metric['type'] + metric_type = metric['type'] name = metric['name'] value = metric['value'] tags = self.config.tags + metric['tags'] - self.log.debug('submit_metric type=%r name=%r value=%r tags=%r', typ, name, value, tags) + self.log.debug('submit_metric type=%r name=%r value=%r tags=%r', metric_type, name, value, tags) - if typ == 'service_check': + if metric_type == 'service_check': value = cast(ServiceCheckStatus, value) self.service_check(name, value, tags=tags) else: - submit = getattr(self, typ) # type: Callable + submit = getattr(self, metric_type) # type: Callable submit(name, value, tags=tags) def submit_version_metadata(self, conn): diff --git a/rethinkdb/datadog_checks/rethinkdb/connections.py b/rethinkdb/datadog_checks/rethinkdb/connections.py index 94a05c393b11e..d1b203fa419da 100644 --- a/rethinkdb/datadog_checks/rethinkdb/connections.py +++ b/rethinkdb/datadog_checks/rethinkdb/connections.py @@ -17,6 +17,9 @@ class Connection(object): """ Base class and interface for connection objects. + + Abstracts any interfaces specific to the `rethinkdb` client library to facilitate swapping for alternative + implementations (e.g. for testing purposes). """ def __enter__(self): diff --git a/rethinkdb/setup.py b/rethinkdb/setup.py index 114b8ac239325..f95b97d83b18e 100644 --- a/rethinkdb/setup.py +++ b/rethinkdb/setup.py @@ -18,7 +18,7 @@ long_description = f.read() -CHECKS_BASE_REQ = 'datadog-checks-base>=4.2.0' +CHECKS_BASE_REQ = 'datadog-checks-base>=11.0.0' setup( @@ -43,7 +43,7 @@ 'Topic :: System :: Monitoring', 'License :: OSI Approved :: BSD License', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', ], # The package we're going to ship packages=['datadog_checks.rethinkdb'], diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index 5c25b75d29690..a1cfeff60eb22 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -52,7 +52,7 @@ def test_jobs_metrics(): mock_rows = [mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] engine = QueryEngine() - with MockConnection(rows=lambda: mock_rows) as conn: + with MockConnection(rows=mock_rows) as conn: metrics = list(collect_system_jobs(engine, conn)) assert metrics == [ diff --git a/rethinkdb/tests/utils.py b/rethinkdb/tests/utils.py index 16ea48f6487fb..2b7c7d3c2660d 100644 --- a/rethinkdb/tests/utils.py +++ b/rethinkdb/tests/utils.py @@ -1,16 +1,20 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Callable +from typing import Any from datadog_checks.rethinkdb.connections import Connection class MockConnection(Connection): + """ + A connection class that returns a fixed set of rows regardless of the query. + """ + def __init__(self, rows): - # type: (Callable[[], Any]) -> None + # type: (Any) -> None self.rows = rows def run(self, query): # type: (Any) -> Any - return self.rows() + return self.rows From e60c27dd904496e5b080cc42049f25794e549f19 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 18 Mar 2020 19:08:47 +0100 Subject: [PATCH 107/147] TEMP: refactor metric collection --- rethinkdb/datadog_checks/rethinkdb/check.py | 2 +- .../datadog_checks/rethinkdb/metrics/_base.py | 146 ++++++++ .../rethinkdb/metrics/current_issues.py | 42 ++- .../rethinkdb/metrics/statistics.py | 322 +++++------------- .../rethinkdb/metrics/statuses.py | 176 ++++------ .../rethinkdb/metrics/system_jobs.py | 123 +++---- rethinkdb/datadog_checks/rethinkdb/types.py | 33 +- rethinkdb/datadog_checks/rethinkdb/utils.py | 66 ++++ rethinkdb/tests/common.py | 64 ++-- rethinkdb/tests/test_rethinkdb.py | 15 +- rethinkdb/tests/unit/test_metrics.py | 65 ++-- 11 files changed, 541 insertions(+), 513 deletions(-) create mode 100644 rethinkdb/datadog_checks/rethinkdb/metrics/_base.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/utils.py diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index af5689f61a895..409fc2a2e07bf 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -56,7 +56,7 @@ def connect_submitting_service_checks(self): raise except Exception as exc: message = 'Unexpected error while executing RethinkDB check: {!r}'.format(exc) - self.log.error(message) + self.log.exception(message) self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) raise else: diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/_base.py b/rethinkdb/datadog_checks/rethinkdb/metrics/_base.py new file mode 100644 index 0000000000000..cfd27aab6d7c2 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/_base.py @@ -0,0 +1,146 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import logging +from typing import ( + Any, + Callable, + ClassVar, + Generic, + Iterator, + List, + Literal, + Mapping, + Optional, + Sequence, + Tuple, + TypedDict, + TypeVar, + Union, +) + +from datadog_checks.base import AgentCheck + +from ..connections import Connection +from ..queries import QueryEngine +from ..types import Metric, MetricType +from ..utils import dotted_join, lookup_dotted, to_timestamp + +logger = logging.getLogger(__name__) + +T = TypeVar("T") +DocumentT = TypeVar("DocumentT", bound=Mapping) + +ModifierName = Literal['total', 'ok_warning', 'timestamp'] +TotalModifier = TypedDict('TotalModifier', {'name': Literal['total'], 'map': Callable[[Any], Sequence[T]]}) +Modifier = Union[ModifierName, TotalModifier] + +MetricDefinition = TypedDict( + 'MetricDefinition', + { + 'type': MetricType, + 'path': str, # Used as the default name. + 'name': str, # An explicit name for the metric. + 'modifier': Optional[Modifier], + }, + total=False, +) + +Enumeration = TypedDict('Enumeration', {'path': str, 'index_tag': str, 'metrics': List[MetricDefinition]}) + + +class DocumentMetricCollector(Generic[DocumentT]): + """ + TODO(before-merging): Explain how to use this. + """ + + name = '' # type: ClassVar[str] + group = '' # type: ClassVar[str] + + metrics = [] # type: ClassVar[List[MetricDefinition]] + enumerations = [] # type: ClassVar[List[Enumeration]] + + def iter_documents(self, engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Tuple[DocumentT, List[str]]] + raise NotImplementedError # pragma: no cover + + def _make_metric(self, type, name, value, tags=None): + # type: (MetricType, str, float, List[str]) -> Metric + name = dotted_join(('rethinkdb', self.group, name)) + tags = [] if tags is None else tags + return {'type': type, 'name': name, 'value': value, 'tags': tags} + + def _make_metric_from_definition(self, document, definition, tags): + # type: (DocumentT, MetricDefinition, List[str]) -> Metric + logger.debug('make_metric_from_definition definition=%r', definition) + + path = definition['path'] + name = definition.get('name', path) + value = lookup_dotted(document, path=path) + + if 'modifier' in definition and definition['modifier'] is not None: + value, suffix = self._modify(value, modifier=definition['modifier']) + name = dotted_join((name, suffix), drop_empty=True) + + if not isinstance(value, (int, float)): + raise RuntimeError('Expected float or int, got {!r} of type {}', value, type(value)) + + return self._make_metric(type=definition['type'], name=name, value=value, tags=tags) + + def _make_metrics_from_enumeration(self, document, enumeration, tags): + # type: (DocumentT, Enumeration, List[str]) -> Iterator[Metric] + logger.debug('make_metrics_from_enumeration enumeration=%r', enumeration) + + values = lookup_dotted(document, path=enumeration['path']) # type: Sequence + index_tag = enumeration['index_tag'] + + for index, value in enumerate(values): + item_tags = tags + ['{}:{}'.format(index_tag, index)] + for definition in enumeration['metrics']: + definition = { + 'type': definition['type'], + 'name': dotted_join((enumeration['path'], definition['path']), drop_empty=True), + 'path': definition['path'], + 'modifier': definition.get('modifier'), + } + yield self._make_metric_from_definition(value, definition, tags=item_tags) + + def _modify(self, value, modifier): + # type: (Any, Modifier) -> Tuple[float, str] + logger.debug('modify value=%r modifier=%r', value, modifier) + + if modifier == 'total': + return len(value), 'total' + + if modifier == 'ok_warning': + return AgentCheck.OK if value else AgentCheck.WARNING, '' + + if modifier == 'timestamp': + return to_timestamp(value), '' + + if isinstance(modifier, dict): + if modifier['name'] == 'total': + value = modifier['map'](value) + return self._modify(value, modifier='total') + + raise RuntimeError('Unknown modifier: {!r}'.format(modifier)) # pragma: no cover + + def _collect(self, engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Metric] + for document, tags in self.iter_documents(engine, conn): + logger.debug('%s %r', self.name, document) + + for definition in self.metrics: + yield self._make_metric_from_definition(document, definition, tags=tags) + + for enumeration in self.enumerations: + for metric in self._make_metrics_from_enumeration(document, enumeration, tags=tags): + yield metric + + # Collection function implementation. + + def __call__(self, engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Metric] + logger.debug('collect_%s', self.name) + for metric in self._collect(engine, conn): + yield metric diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py index 8d1cb9ae05658..59b0a3f2c2115 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py @@ -7,34 +7,32 @@ from ..connections import Connection from ..queries import QueryEngine from ..types import Metric +from ._base import DocumentMetricCollector logger = logging.getLogger(__name__) -def collect_current_issues(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] +class CurrentIssuesCollector(DocumentMetricCollector): """ Collect metrics about current system issues. See: https://rethinkdb.com/docs/system-issues/ """ - logger.debug('collect_current_issues') - - totals = engine.query_current_issues_totals(conn) - logger.debug('current_issues totals=%r', totals) - - for issue_type, total in totals['issues_by_type'].items(): - yield { - 'type': 'gauge', - 'name': 'rethinkdb.current_issues.total', - 'value': total, - 'tags': ['issue_type:{}'.format(issue_type)], - } - - for issue_type, total in totals['critical_issues_by_type'].items(): - yield { - 'type': 'gauge', - 'name': 'rethinkdb.current_issues.critical.total', - 'value': total, - 'tags': ['issue_type:{}'.format(issue_type)], - } + + name = 'current_issues' + group = 'current_issues' + + def _collect(self, engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Metric] + totals = engine.query_current_issues_totals(conn) + + for issue_type, total in totals['issues_by_type'].items(): + tags = ['issue_type:{}'.format(issue_type)] + yield self._make_metric(type='gauge', name='total', value=total, tags=tags) + + for issue_type, total in totals['critical_issues_by_type'].items(): + tags = ['issue_type:{}'.format(issue_type)] + yield self._make_metric(type='gauge', name='critical.total', value=total, tags=tags) + + +collect_current_issues = CurrentIssuesCollector() diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py index 05019826cf13a..0c24b09acc90b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py @@ -1,276 +1,122 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import logging -from typing import Iterator +from typing import Iterator, List, Tuple from ..connections import Connection from ..queries import QueryEngine -from ..types import Metric +from ..types import ClusterStats, ReplicaStats, ServerStats, TableStats +from ._base import DocumentMetricCollector -logger = logging.getLogger(__name__) - -def collect_cluster_statistics(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] +class ClusterStatisticsCollector(DocumentMetricCollector[ClusterStats]): """ Collect metrics about cluster statistics. See: https://rethinkdb.com/docs/system-stats#cluster """ - logger.debug('collect_cluster_statistics') - - stats = engine.query_cluster_stats(conn) - logger.debug('cluster_statistics stats=%r', stats) - - query_engine = stats['query_engine'] - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.cluster.queries_per_sec', - 'value': query_engine['queries_per_sec'], - 'tags': [], - } + name = 'cluster_statistics' + group = 'stats.cluster' + metrics = [ + {'type': 'gauge', 'path': 'query_engine.queries_per_sec'}, + {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, + {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, + ] - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.cluster.read_docs_per_sec', - 'value': query_engine['read_docs_per_sec'], - 'tags': [], - } + def iter_documents(self, engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Tuple[ClusterStats, List[str]]] + yield engine.query_cluster_stats(conn), [] - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.cluster.written_docs_per_sec', - 'value': query_engine['written_docs_per_sec'], - 'tags': [], - } - -def collect_server_statistics(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] +class ServerStatisticsCollector(DocumentMetricCollector[ServerStats]): """ Collect metrics about server statistics. See: https://rethinkdb.com/docs/system-stats#server """ - logger.debug('collect_server_statistics') - - for server, stats in engine.query_servers_with_stats(conn): - logger.debug('server_statistics server=%r stats=%r', server, stats) - - name = server['name'] - server_tags = server['tags'] - query_engine = stats['query_engine'] - - tags = ['server:{}'.format(name)] - tags.extend(server_tags) - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.server.client_connections', - 'value': query_engine['client_connections'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.server.clients_active', - 'value': query_engine['clients_active'], - 'tags': tags, - } - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.server.queries_per_sec', - 'value': query_engine['queries_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.server.queries_total', - 'value': query_engine['queries_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.server.read_docs_per_sec', - 'value': query_engine['read_docs_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.server.read_docs_total', - 'value': query_engine['read_docs_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.server.written_docs_per_sec', - 'value': query_engine['written_docs_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.server.written_docs_total', - 'value': query_engine['written_docs_total'], - 'tags': tags, - } - - -def collect_table_statistics(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] + name = 'server_statistics' + group = 'stats.server' + metrics = [ + {'type': 'gauge', 'path': 'query_engine.client_connections'}, + {'type': 'gauge', 'path': 'query_engine.clients_active'}, + {'type': 'gauge', 'path': 'query_engine.queries_per_sec'}, + {'type': 'monotonic_count', 'path': 'query_engine.queries_total'}, + {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, + {'type': 'monotonic_count', 'path': 'query_engine.read_docs_total'}, + {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, + {'type': 'monotonic_count', 'path': 'query_engine.written_docs_total'}, + ] + + def iter_documents(self, engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Tuple[ServerStats, List[str]]] + for server, stats in engine.query_servers_with_stats(conn): + tags = ['server:{}'.format(server['name'])] + tags.extend(server['tags']) + yield stats, tags + + +class TableStatisticsCollector(DocumentMetricCollector[TableStats]): """ Collect metrics about table statistics. See: https://rethinkdb.com/docs/system-stats#table """ - logger.debug('collect_table_statistics') - - for table, stats in engine.query_tables_with_stats(conn): - logger.debug('table_statistics table=%r stats=%r', table, stats) - name = table['name'] - database = table['db'] - query_engine = stats['query_engine'] + name = 'table_statistics' + group = 'stats.table' + metrics = [ + {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, + {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, + ] - tags = ['table:{}'.format(name), 'database:{}'.format(database)] + def iter_documents(self, engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Tuple[TableStats, List[str]]] + for table, stats in engine.query_tables_with_stats(conn): + tags = ['table:{}'.format(table['name']), 'database:{}'.format(table['db'])] + yield stats, tags - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table.read_docs_per_sec', - 'value': query_engine['read_docs_per_sec'], - 'tags': tags, - } - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table.written_docs_per_sec', - 'value': query_engine['written_docs_per_sec'], - 'tags': tags, - } - - -def collect_replica_statistics(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] +class ReplicaStatisticsCollector(DocumentMetricCollector[ReplicaStats]): """ Collect metrics about replicas (table/server pairs) statistics. See: https://rethinkdb.com/docs/system-stats#replica """ - logger.debug('collect_replica_statistics') - - for table, server, replica, stats in engine.query_replicas_with_stats(conn): - logger.debug('replica_statistics table=%r server=%r replica=%r stats=%r', table, server, replica, stats) - - database = table['db'] - server_name = server['name'] - table_name = table['name'] - server_tags = server['tags'] - query_engine = stats['query_engine'] - storage_engine = stats['storage_engine'] - state = replica['state'] - - tags = [ - 'table:{}'.format(table_name), - 'database:{}'.format(database), - 'server:{}'.format(server_name), - 'state:{}'.format(state), - ] - tags.extend(server_tags) - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.read_docs_per_sec', - 'value': query_engine['read_docs_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.read_docs_total', - 'value': query_engine['read_docs_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.written_docs_per_sec', - 'value': query_engine['written_docs_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.written_docs_total', - 'value': query_engine['written_docs_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.cache.in_use_bytes', - 'value': storage_engine['cache']['in_use_bytes'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.read_bytes_per_sec', - 'value': storage_engine['disk']['read_bytes_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.disk.read_bytes_total', - 'value': storage_engine['disk']['read_bytes_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.written_bytes_per_sec', - 'value': storage_engine['disk']['written_bytes_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.disk.written_bytes_total', - 'value': storage_engine['disk']['written_bytes_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.metadata_bytes', - 'value': storage_engine['disk']['space_usage']['metadata_bytes'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.data_bytes', - 'value': storage_engine['disk']['space_usage']['data_bytes'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.garbage_bytes', - 'value': storage_engine['disk']['space_usage']['garbage_bytes'], - 'tags': tags, - } - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.preallocated_bytes', - 'value': storage_engine['disk']['space_usage']['preallocated_bytes'], - 'tags': tags, - } + name = 'replica_statistics' + group = 'stats.table_server' + metrics = [ + {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, + {'type': 'monotonic_count', 'path': 'query_engine.read_docs_total'}, + {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, + {'type': 'monotonic_count', 'path': 'query_engine.written_docs_total'}, + {'type': 'gauge', 'path': 'storage_engine.cache.in_use_bytes'}, + {'type': 'gauge', 'path': 'storage_engine.disk.read_bytes_per_sec'}, + {'type': 'monotonic_count', 'path': 'storage_engine.disk.read_bytes_total'}, + {'type': 'gauge', 'path': 'storage_engine.disk.written_bytes_per_sec'}, + {'type': 'monotonic_count', 'path': 'storage_engine.disk.written_bytes_total'}, + {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.metadata_bytes'}, + {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.data_bytes'}, + {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.garbage_bytes'}, + {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.preallocated_bytes'}, + ] + + def iter_documents(self, engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Tuple[ReplicaStats, List[str]]] + for table, server, replica, stats in engine.query_replicas_with_stats(conn): + tags = [ + 'table:{}'.format(table['name']), + 'database:{}'.format(table['db']), + 'server:{}'.format(server['name']), + 'state:{}'.format(replica['state']), + ] + tags.extend(server['tags']) + yield stats, tags + + +collect_cluster_statistics = ClusterStatisticsCollector() +collect_server_statistics = ServerStatisticsCollector() +collect_table_statistics = TableStatisticsCollector() +collect_replica_statistics = ReplicaStatisticsCollector() diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py index dfe9e1a6d7411..266cfc1b28551 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py @@ -1,141 +1,99 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import datetime as dt import logging -import time -from typing import Iterator +from typing import Iterator, List, Tuple from datadog_checks.base import AgentCheck +from datadog_checks.base.types import ServiceCheckStatus from ..connections import Connection from ..queries import QueryEngine -from ..types import Metric +from ..types import ServerStatus, TableStatus +from ._base import DocumentMetricCollector logger = logging.getLogger(__name__) -def collect_table_status(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] +def transform_status(status): + # type: (bool) -> ServiceCheckStatus + return AgentCheck.OK if status else AgentCheck.WARNING + + +class TableStatusCollector(DocumentMetricCollector[TableStatus]): """ Collect metrics about table statuses. See: https://rethinkdb.com/docs/system-tables/#table_status """ - logger.debug('collect_table_status') - - for table_status in engine.query_table_status(conn): - logger.debug('table_status %r', table_status) - - table = table_status['name'] - database = table_status['db'] - tags = ['table:{}'.format(table), 'database:{}'.format(database)] - - yield { - 'type': 'service_check', - 'name': 'rethinkdb.table_status.ready_for_outdated_reads', - 'value': AgentCheck.OK if table_status['status']['ready_for_outdated_reads'] else AgentCheck.WARNING, - 'tags': tags, - } - - yield { - 'type': 'service_check', - 'name': 'rethinkdb.table_status.ready_for_reads', - 'value': AgentCheck.OK if table_status['status']['ready_for_reads'] else AgentCheck.WARNING, - 'tags': tags, + name = 'table_status' + group = 'table_status' + + metrics = [ + {'type': 'service_check', 'path': 'status.ready_for_outdated_reads', 'modifier': 'ok_warning'}, + {'type': 'service_check', 'path': 'status.ready_for_reads', 'modifier': 'ok_warning'}, + {'type': 'service_check', 'path': 'status.ready_for_writes', 'modifier': 'ok_warning'}, + {'type': 'service_check', 'path': 'status.all_replicas_ready', 'modifier': 'ok_warning'}, + {'type': 'gauge', 'path': 'shards', 'modifier': 'total'}, + ] + + enumerations = [ + { + 'path': 'shards', + 'index_tag': 'shard', + 'metrics': [ + {'type': 'gauge', 'path': 'replicas', 'modifier': 'total'}, + {'type': 'gauge', 'path': 'primary_replicas', 'modifier': 'total'}, + ], } + ] - yield { - 'type': 'service_check', - 'name': 'rethinkdb.table_status.ready_for_writes', - 'value': AgentCheck.OK if table_status['status']['ready_for_writes'] else AgentCheck.WARNING, - 'tags': tags, - } - - yield { - 'type': 'service_check', - 'name': 'rethinkdb.table_status.all_replicas_ready', - 'value': AgentCheck.OK if table_status['status']['all_replicas_ready'] else AgentCheck.WARNING, - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.table_status.shards.total', - 'value': len(table_status['shards']), - 'tags': tags, - } - - for index, shard in enumerate(table_status['shards']): - shard_tags = tags + ['shard:{}'.format(index)] - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.table_status.shards.replicas.total', - 'value': len(shard['replicas']), - 'tags': shard_tags, - } + def iter_documents(self, engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Tuple[TableStatus, List[str]]] + for table_status in engine.query_table_status(conn): + tags = ['table:{}'.format(table_status['name']), 'database:{}'.format(table_status['db'])] + yield table_status, tags - yield { - 'type': 'gauge', - 'name': 'rethinkdb.table_status.shards.replicas.primary.total', - 'value': len(shard['primary_replicas']), - 'tags': shard_tags, - } - -def collect_server_status(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] +class ServerStatusCollector(DocumentMetricCollector[ServerStatus]): """ Collect metrics about server statuses. See: https://rethinkdb.com/docs/system-tables/#server_status """ - logger.debug('collect_server_status') - - for server_status in engine.query_server_status(conn): - logger.debug('server_status %r', server_status) - - server = server_status['name'] - network = server_status['network'] - process = server_status['process'] - - tags = ['server:{}'.format(server)] - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.server_status.network.time_connected', - 'value': _to_timestamp(network['time_connected']), - 'tags': tags, - } - yield { - 'type': 'gauge', - 'name': 'rethinkdb.server_status.network.connected_to.total', - 'value': len([other for other, connected in network['connected_to'].items() if connected]), - 'tags': tags, - } + name = 'server_status' + group = 'server_status' - yield { + metrics = [ + {'type': 'gauge', 'path': 'network.time_connected', 'modifier': 'timestamp'}, + { 'type': 'gauge', - 'name': 'rethinkdb.server_status.network.connected_to.pending.total', - 'value': len([other for other, connected in network['connected_to'].items() if not connected]), - 'tags': tags, - } - - yield { + 'path': 'network.connected_to', + 'modifier': { + 'name': 'total', + 'map': lambda value: [other for other, connected in value.items() if connected], + }, + }, + { 'type': 'gauge', - 'name': 'rethinkdb.server_status.process.time_started', - 'value': _to_timestamp(process['time_started']), - 'tags': tags, - } - - -def _to_timestamp(datetime): - # type: (dt.datetime) -> float - try: - return datetime.timestamp() # type: ignore # (Mypy is run in --py2 mode.) - except AttributeError: # pragma: no cover - # Python 2. - return time.mktime(datetime.now().timetuple()) + 'path': 'network.connected_to', + 'name': 'network.not_connected_to', + 'modifier': { + 'name': 'total', + 'map': lambda value: [other for other, connected in value.items() if not connected], + }, + }, + {'type': 'gauge', 'path': 'process.time_started', 'modifier': 'timestamp'}, + ] + + def iter_documents(self, engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Tuple[ServerStatus, List[str]]] + for server_status in engine.query_server_status(conn): + tags = ['server:{}'.format(server_status['name'])] + yield server_status, tags + + +collect_table_status = TableStatusCollector() +collect_server_status = ServerStatusCollector() diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py index cf367ce71245b..15ea8d9663f21 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py @@ -2,87 +2,68 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import logging -from typing import Iterator, cast +from typing import Iterator, List, Tuple from ..connections import Connection from ..queries import QueryEngine -from ..types import BackfillInfo, IndexConstructionInfo, Metric +from ..types import Job +from ._base import DocumentMetricCollector logger = logging.getLogger(__name__) -def collect_system_jobs(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] +class SystemJobsCollector(DocumentMetricCollector[Job]): """ Collect metrics about system jobs. See: https://rethinkdb.com/docs/system-jobs/ """ - logger.debug('collect_system_jobs') - for job in engine.query_system_jobs(conn): - logger.debug('job %r', job) - - duration = job['duration_sec'] - servers = job['servers'] - - tags = ['server:{}'.format(server) for server in servers] - - if job['type'] == 'index_construction': - # NOTE: Using `cast()` is required until tagged unions are released in mypy stable. Until then, avoid using - # 'info' as a variable name in all cases (workaround for https://github.com/python/mypy/issues/6232). - # See: https://mypy.readthedocs.io/en/latest/literal_types.html#tagged-unions - index_construction_info = cast(IndexConstructionInfo, job['info']) - database = index_construction_info['db'] - table = index_construction_info['table'] - index = index_construction_info['index'] - progress = index_construction_info['progress'] - - index_construction_tags = tags + [ - 'database:{}'.format(database), - 'table:{}'.format(table), - 'index:{}'.format(index), - ] - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.index_construction.duration', - 'value': duration, - 'tags': index_construction_tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.index_construction.progress', - 'value': progress, - 'tags': index_construction_tags, - } - - elif job['type'] == 'backfill': - backfill_info = cast(BackfillInfo, job['info']) - database = backfill_info['db'] - destination_server = backfill_info['destination_server'] - source_server = backfill_info['source_server'] - table = backfill_info['table'] - progress = backfill_info['progress'] - - backfill_tags = tags + [ - 'database:{}'.format(database), - 'destination_server:{}'.format(destination_server), - 'source_server:{}'.format(source_server), - 'table:{}'.format(table), - ] - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.backfill.duration', - 'value': duration, - 'tags': backfill_tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.backfill.progress', - 'value': progress, - 'tags': backfill_tags, - } + name = 'system_jobs' + group = 'jobs' + + metrics = [{'type': 'gauge', 'path': 'duration_sec'}] + + def iter_documents(self, engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Tuple[Job, List[str]]] + for job in engine.query_system_jobs(conn): + tags = ['job_type:{}'.format(job['type'])] + tags.extend('server:{}'.format(server) for server in job['servers']) + + # Follow job types listed on: https://rethinkdb.com/docs/system-jobs/#document-schema + + if job['type'] == 'query': + # NOTE: Request-response queries are typically too short-lived to be captured across Agent checks. + # Change feed queries however are long-running, they we'd be able to capture them. + # See: https://rethinkdb.com/docs/system-jobs/#query + # TODO(before-merging): submit within a `duration_sec` threshold instead of skipping entirely. + continue + elif job['type'] == 'disk_compaction': + # Ongoing task on each server -- no information provided (i.e. `info` is empty). + # See: https://rethinkdb.com/docs/system-jobs/#disk_compaction + continue + if job['type'] == 'index_construction': + tags.extend( + [ + 'database:{}'.format(job['info']['db']), + 'table:{}'.format(job['info']['table']), + 'index:{}'.format(job['info']['index']), + ] + ) + elif job['type'] == 'backfill': + tags.extend( + [ + 'database:{}'.format(job['info']['db']), + 'destination_server:{}'.format(job['info']['destination_server']), + 'source_server:{}'.format(job['info']['source_server']), + 'table:{}'.format(job['info']['table']), + ] + ) + else: + info = job.get('info', {}) + raise RuntimeError('Unknown job type: {!r} (info: {!r})'.format(job['type'], info)) + + yield job, tags + + +collect_system_jobs = SystemJobsCollector() diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index 3b1bb80d39f27..4a3e1d5db5c6d 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -9,10 +9,9 @@ # Check interfaces. -Metric = TypedDict( - 'Metric', - {'type': Literal['gauge', 'monotonic_count', 'service_check'], 'name': str, 'value': float, 'tags': List[str]}, -) +MetricType = Literal['gauge', 'monotonic_count', 'service_check'] + +Metric = TypedDict('Metric', {'type': MetricType, 'name': str, 'value': float, 'tags': List[str]}) Instance = TypedDict( 'Instance', @@ -144,6 +143,32 @@ # System jobs documents. # See: https://rethinkdb.com/docs/system-jobs/ +QueryInfo = TypedDict('QueryInfo', {}) + +QueryJob = TypedDict( + 'QueryJob', + { + 'type': Literal['query'], + 'id': Tuple[Literal['query'], str], + 'duration_sec': float, + 'info': QueryInfo, + 'servers': List[str], + }, +) + +DiskCompactionInfo = TypedDict('DiskCompactionInfo', {}) + +DiskCompactionJob = TypedDict( + 'DiskCompactionJob', + { + 'type': Literal['disk_compaction'], + 'id': Tuple[Literal['disk_compaction'], str], + 'duration_sec': None, + 'info': DiskCompactionInfo, + 'servers': List[str], + }, +) + IndexConstructionInfo = TypedDict('IndexConstructionInfo', {'db': str, 'table': str, 'index': str, 'progress': int}) IndexConstructionJob = TypedDict( diff --git a/rethinkdb/datadog_checks/rethinkdb/utils.py b/rethinkdb/datadog_checks/rethinkdb/utils.py new file mode 100644 index 0000000000000..da4757186c41b --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/utils.py @@ -0,0 +1,66 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +""" +Miscellaneous utilities. +""" +import datetime as dt +import time +from typing import Any, Mapping, Sequence + + +def lookup_dotted(dct, path): + # type: (Mapping, str) -> Any + """ + Given a mapping and a dotted path `key1.key2...keyN`, return the item at `dct[key1][key2]...[keyN]`. + """ + keys = [key for key in reversed(path.split('.'))] + + value = dct + + while keys: + key = keys.pop() + + if isinstance(value, Sequence): + try: + index = int(key) + except (TypeError, IndexError): + raise RuntimeError('Expected key to be an int ') + try: + value = value[index] + except IndexError as exc: + raise RuntimeError( + 'Failed to access index {!r} on value {!r} along path {!r}: {!r}'.format(index, value, path, exc) + ) + + elif isinstance(value, Mapping): + try: + value = value[key] + except KeyError as exc: + raise RuntimeError('Failed to retrieve key {!r} on value {!r}: {!r}'.format(key, value, exc)) + + else: + # We screwed up. + raise RuntimeError( + 'followed path {!r} with remaining keys {!r}, but value {!r} is not a sequence nor a mapping'.format( + path, value, keys + ) + ) + + return value + + +def dotted_join(values, drop_empty=False): + # type: (Sequence[str], bool) -> str + if drop_empty: + values = [value for value in values if value] + return '.'.join(values) + + +def to_timestamp(datetime): + # type: (dt.datetime) -> float + try: + return datetime.timestamp() # type: ignore # (mypy runs in `--py2` mode.) + except AttributeError: # pragma: no cover + # Python 2. + return time.mktime(datetime.now().timetuple()) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index ab2477219b331..a8e77c98d1987 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -99,52 +99,52 @@ ) # type: Tuple[Tuple[str, int, Union[int, Callable[[set], int]], List[str]], ...] CLUSTER_STATISTICS_METRICS = ( - ('rethinkdb.stats.cluster.queries_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.cluster.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.cluster.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.cluster.query_engine.queries_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.cluster.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.cluster.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] SERVER_STATISTICS_METRICS = ( - ('rethinkdb.stats.server.queries_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.server.queries_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.server.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.server.read_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.server.written_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.server.written_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.server.client_connections', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.query_engine.queries_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.query_engine.queries_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.query_engine.read_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.query_engine.written_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.query_engine.client_connections', AggregatorStub.GAUGE), ( # NOTE: submitted but not documented on the RethinkDB website. - 'rethinkdb.stats.server.clients_active', + 'rethinkdb.stats.server.query_engine.clients_active', AggregatorStub.GAUGE, ), ) # type: Tuple[Tuple[str, int], ...] TABLE_STATISTICS_METRICS = ( - ('rethinkdb.stats.table.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] REPLICA_STATISTICS_METRICS = ( - ('rethinkdb.stats.table_server.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.read_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.written_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.written_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.cache.in_use_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.read_bytes_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.read_bytes_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.disk.written_bytes_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.written_bytes_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.disk.metadata_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.data_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.garbage_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.preallocated_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.query_engine.read_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.query_engine.written_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.storage_engine.cache.in_use_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.read_bytes_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.read_bytes_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.storage_engine.disk.written_bytes_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.written_bytes_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.metadata_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.data_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.garbage_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.preallocated_bytes', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] TABLE_STATUS_SERVICE_CHECKS = ( - 'rethinkdb.table_status.ready_for_outdated_reads', - 'rethinkdb.table_status.ready_for_reads', - 'rethinkdb.table_status.ready_for_writes', - 'rethinkdb.table_status.all_replicas_ready', + 'rethinkdb.table_status.status.ready_for_outdated_reads', + 'rethinkdb.table_status.status.ready_for_reads', + 'rethinkdb.table_status.status.ready_for_writes', + 'rethinkdb.table_status.status.all_replicas_ready', ) TABLE_STATUS_METRICS = ( @@ -153,13 +153,13 @@ TABLE_STATUS_SHARDS_METRICS = ( ('rethinkdb.table_status.shards.replicas.total', AggregatorStub.GAUGE), - ('rethinkdb.table_status.shards.replicas.primary.total', AggregatorStub.GAUGE), + ('rethinkdb.table_status.shards.primary_replicas.total', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] SERVER_STATUS_METRICS = ( ('rethinkdb.server_status.network.time_connected', AggregatorStub.GAUGE), ('rethinkdb.server_status.network.connected_to.total', AggregatorStub.GAUGE), - ('rethinkdb.server_status.network.connected_to.pending.total', AggregatorStub.GAUGE), + ('rethinkdb.server_status.network.not_connected_to.total', AggregatorStub.GAUGE), ('rethinkdb.server_status.process.time_started', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 3ec9fe39cccf0..5c023fa8fe138 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -130,18 +130,9 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): table_status_tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] - aggregator.assert_service_check( - 'rethinkdb.table_status.ready_for_outdated_reads', RethinkDBCheck.OK, count=1, tags=table_status_tags - ) - aggregator.assert_service_check( - 'rethinkdb.table_status.ready_for_reads', RethinkDBCheck.WARNING, count=1, tags=table_status_tags - ) - aggregator.assert_service_check( - 'rethinkdb.table_status.ready_for_writes', RethinkDBCheck.WARNING, count=1, tags=table_status_tags - ) - aggregator.assert_service_check( - 'rethinkdb.table_status.all_replicas_ready', RethinkDBCheck.WARNING, count=1, tags=table_status_tags - ) + for service_check in TABLE_STATUS_SERVICE_CHECKS: + status = RethinkDBCheck.OK if service_check.endswith('ready_for_outdated_reads') else RethinkDBCheck.WARNING + aggregator.assert_service_check(service_check, status, count=1, tags=table_status_tags) @pytest.mark.integration diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index a1cfeff60eb22..5ee50f2fdd400 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -8,7 +8,7 @@ from datadog_checks.rethinkdb.metrics.system_jobs import collect_system_jobs from datadog_checks.rethinkdb.queries import QueryEngine -from datadog_checks.rethinkdb.types import BackfillJob, IndexConstructionJob +from datadog_checks.rethinkdb.types import BackfillJob, DiskCompactionJob, IndexConstructionJob, QueryJob from ..utils import MockConnection @@ -23,6 +23,22 @@ def test_jobs_metrics(): We provide unit tests for these metrics because testing them in a live environment is tricky. """ + mock_query_job_row = { + 'type': 'query', + 'id': ('query', 'abcd1234'), + 'duration_sec': 0.12, + 'info': {}, + 'servers': ['server0'], + } # type: QueryJob + + mock_disk_compaction_row = { + 'type': 'disk_compaction', + 'id': ('disk_compaction', 'zero'), + 'duration_sec': None, + 'info': {}, + 'servers': ['server0'], + } # type: DiskCompactionJob + mock_backfill_job_row = { # See: https://rethinkdb.com/docs/system-jobs/#backfill 'type': 'backfill', @@ -42,25 +58,26 @@ def test_jobs_metrics(): # See: https://rethinkdb.com/docs/system-jobs/#index_construction 'type': 'index_construction', 'id': ('index_construction', 'abcd1234'), - 'duration_sec': 0.42, + 'duration_sec': 0.24, 'info': {'db': 'doghouse', 'table': 'heroes', 'index': 'appearances_count', 'progress': 42}, 'servers': ['server1'], } # type: IndexConstructionJob - mock_unknown_job_row = {'type': 'an_unknown_type_that_should_be_ignored', 'duration_sec': 0.42, 'servers': []} - - mock_rows = [mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] + mock_rows = [mock_query_job_row, mock_disk_compaction_row, mock_backfill_job_row, mock_index_construction_job_row] engine = QueryEngine() with MockConnection(rows=mock_rows) as conn: metrics = list(collect_system_jobs(engine, conn)) assert metrics == [ + # -- `query` job ignored -- + # -- `disk_compaction` job ignored -- { 'type': 'gauge', - 'name': 'rethinkdb.jobs.backfill.duration', + 'name': 'rethinkdb.jobs.duration_sec', 'value': 0.42, 'tags': [ + 'job_type:backfill', 'server:server0', 'server:server2', 'database:doghouse', @@ -71,27 +88,27 @@ def test_jobs_metrics(): }, { 'type': 'gauge', - 'name': 'rethinkdb.jobs.backfill.progress', - 'value': 42, + 'name': 'rethinkdb.jobs.duration_sec', + 'value': 0.24, 'tags': [ - 'server:server0', - 'server:server2', + 'job_type:index_construction', + 'server:server1', 'database:doghouse', - 'destination_server:server2', - 'source_server:server0', 'table:heroes', + 'index:appearances_count', ], }, - { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.index_construction.duration', - 'value': 0.42, - 'tags': ['server:server1', 'database:doghouse', 'table:heroes', 'index:appearances_count'], - }, - { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.index_construction.progress', - 'value': 42, - 'tags': ['server:server1', 'database:doghouse', 'table:heroes', 'index:appearances_count'], - }, ] + + +def test_unknown_job(): + # type: () -> None + """ + If a new job type is added, an exception should be raised so we are notified via CI failures and can add support. + """ + mock_unknown_job_row = {'type': 'an_unknown_type_that_should_be_ignored', 'duration_sec': 0.42, 'servers': []} + + engine = QueryEngine() + with MockConnection(rows=[mock_unknown_job_row]) as conn: + with pytest.raises(RuntimeError): + list(collect_system_jobs(engine, conn)) From 26e7dc9fbbfeb5cff3682e91aaf57f1975ffa609 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 09:41:10 +0100 Subject: [PATCH 108/147] Simplify backends into a single class --- .../datadog_checks/rethinkdb/backends.py | 69 +++++++------------ rethinkdb/datadog_checks/rethinkdb/check.py | 4 +- rethinkdb/tests/test_rethinkdb.py | 8 +-- 3 files changed, 31 insertions(+), 50 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/backends.py b/rethinkdb/datadog_checks/rethinkdb/backends.py index 6db8cab4a8f68..8a0b21d8f5ac1 100644 --- a/rethinkdb/datadog_checks/rethinkdb/backends.py +++ b/rethinkdb/datadog_checks/rethinkdb/backends.py @@ -1,7 +1,7 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Callable, Iterator, List +from typing import Callable, Iterator, Sequence import rethinkdb @@ -25,60 +25,35 @@ class Backend(object): """ - Base interface for high-level operations performed during a RethinkDB check. + An interface for high-level operations performed during a RethinkDB check. - Abstracts any interfaces specific to the `rethinkdb` client library or our default metrics collection strategy - to facilitate swapping for alternative implementations (e.g. for testing purposes). + Abstracts away any interfaces specific to the `rethinkdb` client library, while providing a default + implementation that uses that library. """ - def connect(self, config): - # type: (Config) -> Connection - """ - Establish a connection with the configured RethinkDB server. - """ - raise NotImplementedError # pragma: no cover - - def collect_metrics(self, conn): - # type: (Connection) -> Iterator[Metric] - """ - Collect metrics from the RethinkDB cluster we are connected to. - """ - raise NotImplementedError # pragma: no cover - - def collect_connected_server_version(self, conn): - # type: (Connection) -> str - """ - Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. - """ - raise NotImplementedError # pragma: no cover - - -class DefaultBackend(Backend): - """ - A backend that uses the RethinkDB Python client library and the built-in metrics collection functions. - """ - - collect_funcs = [ - collect_config_totals, - collect_cluster_statistics, - collect_server_statistics, - collect_table_statistics, - collect_replica_statistics, - collect_server_status, - collect_table_status, - collect_system_jobs, - collect_current_issues, - ] # type: List[Callable[[QueryEngine, Connection], Iterator[Metric]]] - def __init__(self): # type: () -> None # NOTE: the name 'r' may look off-putting at first, but it was chosen for consistency with the officially # advertised ReQL usage. For example, see: https://rethinkdb.com/docs/guide/python/ self._r = rethinkdb.r self._query_engine = QueryEngine(r=self._r) + self._collect_funcs = ( + collect_config_totals, + collect_cluster_statistics, + collect_server_statistics, + collect_table_statistics, + collect_replica_statistics, + collect_server_status, + collect_table_status, + collect_system_jobs, + collect_current_issues, + ) # type: Sequence[Callable[[QueryEngine, Connection], Iterator[Metric]]] def connect(self, config): # type: (Config) -> Connection + """ + Establish a connection with the configured RethinkDB server. + """ try: conn = self._r.connect( host=config.host, @@ -94,11 +69,17 @@ def connect(self, config): def collect_metrics(self, conn): # type: (Connection) -> Iterator[Metric] - for collect in self.collect_funcs: + """ + Collect metrics from the RethinkDB cluster we are connected to. + """ + for collect in self._collect_funcs: for metric in collect(self._query_engine, conn): yield metric def collect_connected_server_version(self, conn): # type: (Connection) -> str + """ + Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. + """ version_string = self._query_engine.query_connected_server_version_string(conn) return parse_version(version_string) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 409fc2a2e07bf..a1682a154e96c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -7,7 +7,7 @@ from datadog_checks.base import AgentCheck from datadog_checks.base.types import ServiceCheckStatus -from .backends import Backend, DefaultBackend +from .backends import Backend from .config import Config from .connections import Connection from .exceptions import CouldNotConnect, VersionCollectionFailed @@ -25,7 +25,7 @@ def __init__(self, *args, **kwargs): # type: (*Any, **Any) -> None super(RethinkDBCheck, self).__init__(*args, **kwargs) self.config = Config(cast(Instance, self.instance)) - self.backend = DefaultBackend() # type: Backend + self.backend = Backend() @contextmanager def connect_submitting_service_checks(self): diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 5c023fa8fe138..87835763395bf 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -9,7 +9,7 @@ from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub from datadog_checks.rethinkdb import RethinkDBCheck -from datadog_checks.rethinkdb.backends import DefaultBackend +from datadog_checks.rethinkdb.backends import Backend from datadog_checks.rethinkdb.connections import Connection from datadog_checks.rethinkdb.exceptions import CouldNotConnect, VersionCollectionFailed from datadog_checks.rethinkdb.types import Instance, Metric @@ -157,7 +157,7 @@ def test_connected_but_check_failed_unexpectedly(aggregator, instance): class Failure(Exception): pass - class MockBackend(DefaultBackend): + class MockBackend(Backend): def collect_metrics(self, conn): # type: (Connection) -> Iterator[Metric] yield {'type': 'gauge', 'name': 'rethinkdb.some.metric', 'value': 42, 'tags': []} @@ -206,7 +206,7 @@ def test_metadata_version_malformed(instance, aggregator, datadog_agent, malform Verify that check still runs to completion if version provided by RethinkDB is malformed. """ - class MockBackend(DefaultBackend): + class MockBackend(Backend): def collect_connected_server_version(self, conn): # type: (Connection) -> str return malformed_version_string @@ -230,7 +230,7 @@ def test_metadata_version_failure(instance, aggregator, datadog_agent): Verify that check still runs to completion if it fails to retrieve the RethinkDB version. """ - class MockBackend(DefaultBackend): + class MockBackend(Backend): def collect_connected_server_version(self, conn): # type: (Connection) -> str raise VersionCollectionFailed('Oops!') From c64e1e5e7d588a65270d366094bf7443c3dfd14d Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 09:48:06 +0100 Subject: [PATCH 109/147] Simplify connection into a single class --- .../datadog_checks/rethinkdb/backends.py | 4 +- .../datadog_checks/rethinkdb/connections.py | 44 ++----------------- rethinkdb/tests/cluster.py | 10 ++--- rethinkdb/tests/unit/test_metrics.py | 11 ++--- 4 files changed, 16 insertions(+), 53 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/backends.py b/rethinkdb/datadog_checks/rethinkdb/backends.py index 8a0b21d8f5ac1..cdebdcc21aab6 100644 --- a/rethinkdb/datadog_checks/rethinkdb/backends.py +++ b/rethinkdb/datadog_checks/rethinkdb/backends.py @@ -6,7 +6,7 @@ import rethinkdb from .config import Config -from .connections import Connection, RethinkDBConnection +from .connections import Connection from .exceptions import CouldNotConnect from .metrics.config import collect_config_totals from .metrics.current_issues import collect_current_issues @@ -65,7 +65,7 @@ def connect(self, config): except rethinkdb.errors.ReqlDriverError as exc: raise CouldNotConnect(exc) - return RethinkDBConnection(conn) + return Connection(conn) def collect_metrics(self, conn): # type: (Connection) -> Iterator[Metric] diff --git a/rethinkdb/datadog_checks/rethinkdb/connections.py b/rethinkdb/datadog_checks/rethinkdb/connections.py index d1b203fa419da..88106df223a82 100644 --- a/rethinkdb/datadog_checks/rethinkdb/connections.py +++ b/rethinkdb/datadog_checks/rethinkdb/connections.py @@ -1,9 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -""" -RethinkDB connection interface and implementations. -""" from typing import Any, TypedDict import rethinkdb @@ -11,47 +8,12 @@ # See: https://rethinkdb.com/api/python/server ConnectionServer = TypedDict('ConnectionServer', {'id': str, 'name': str, 'proxy': bool}) -ConnectionTags = TypedDict('ConnectionTags', {'server': str, 'host': str, 'port': int, 'proxy': bool}) - class Connection(object): """ - Base class and interface for connection objects. - - Abstracts any interfaces specific to the `rethinkdb` client library to facilitate swapping for alternative - implementations (e.g. for testing purposes). - """ - - def __enter__(self): - # type: () -> Connection - return self - - def __exit__(self, *args): - # type: (*Any) -> None - pass - - @property - def host(self): - # type: () -> str - raise NotImplementedError # pragma: no cover + Represents a connection to a RethinkDB server. - @property - def port(self): - # type: () -> int - raise NotImplementedError # pragma: no cover - - def server(self): - # type: () -> ConnectionServer - raise NotImplementedError # pragma: no cover - - def run(self, query): - # type: (rethinkdb.RqlQuery) -> Any - raise NotImplementedError # pragma: no cover - - -class RethinkDBConnection(Connection): - """ - A connection backed by an actual RethinkDB connection. + Abstracts away any interfaces specific to the `rethinkdb` client library. """ def __init__(self, conn): @@ -59,7 +21,7 @@ def __init__(self, conn): self._conn = conn def __enter__(self): - # type: () -> RethinkDBConnection + # type: () -> Connection self._conn.__enter__() return self diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index 1e2ddec096ca7..3aa815cc246cf 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -10,7 +10,7 @@ from datadog_checks.dev.conditions import WaitFor from datadog_checks.dev.docker import temporarily_stop_service from datadog_checks.dev.structures import EnvVars -from datadog_checks.rethinkdb.connections import Connection, RethinkDBConnection +from datadog_checks.rethinkdb.connections import Connection from .common import ( AGENT_PASSWORD, @@ -37,7 +37,7 @@ def setup_cluster(): """ logger.debug('setup_cluster') - with RethinkDBConnection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: + with Connection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: # A test DB is automatically created, but we don't use it and it would skew our metrics. response = conn.run(r.db_drop('test')) assert response['dbs_dropped'] == 1 @@ -68,7 +68,7 @@ def setup_cluster(): # Simulate client activity. # NOTE: ensures that 'written_docs_*' and 'read_docs_*' metrics have non-zero values. - with RethinkDBConnection(r.connect(host=HOST, port=SERVER_PORTS['proxy'], user=CLIENT_USER)) as conn: + with Connection(r.connect(host=HOST, port=SERVER_PORTS['proxy'], user=CLIENT_USER)) as conn: response = conn.run(r.db(DATABASE).table(HEROES_TABLE).insert(HEROES_TABLE_DOCUMENTS)) assert response['inserted'] == len(HEROES_TABLE_DOCUMENTS) @@ -120,10 +120,10 @@ def _server_reconnected(conn): with temporarily_stop_service(service, compose_file=COMPOSE_FILE): with EnvVars(COMPOSE_ENV_VARS): - with RethinkDBConnection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: + with Connection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: WaitFor(lambda: _server_disconnected(conn))() yield - with RethinkDBConnection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: + with Connection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: WaitFor(lambda: _server_reconnected(conn))() diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index 5ee50f2fdd400..cfa4dded00962 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -66,8 +66,8 @@ def test_jobs_metrics(): mock_rows = [mock_query_job_row, mock_disk_compaction_row, mock_backfill_job_row, mock_index_construction_job_row] engine = QueryEngine() - with MockConnection(rows=mock_rows) as conn: - metrics = list(collect_system_jobs(engine, conn)) + conn = MockConnection(rows=mock_rows) + metrics = list(collect_system_jobs(engine, conn)) assert metrics == [ # -- `query` job ignored -- @@ -109,6 +109,7 @@ def test_unknown_job(): mock_unknown_job_row = {'type': 'an_unknown_type_that_should_be_ignored', 'duration_sec': 0.42, 'servers': []} engine = QueryEngine() - with MockConnection(rows=[mock_unknown_job_row]) as conn: - with pytest.raises(RuntimeError): - list(collect_system_jobs(engine, conn)) + conn = MockConnection(rows=[mock_unknown_job_row]) + + with pytest.raises(RuntimeError): + list(collect_system_jobs(engine, conn)) From 114b1f5c2390a3c75291d3aeacc74c93353c25f0 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 10:29:36 +0100 Subject: [PATCH 110/147] Drop if/else switch on metric type --- rethinkdb/datadog_checks/rethinkdb/check.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index a1682a154e96c..59a7ff1ffe790 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -5,7 +5,6 @@ from typing import Any, Callable, Iterator, List, cast from datadog_checks.base import AgentCheck -from datadog_checks.base.types import ServiceCheckStatus from .backends import Backend from .config import Config @@ -71,12 +70,8 @@ def submit_metric(self, metric): self.log.debug('submit_metric type=%r name=%r value=%r tags=%r', metric_type, name, value, tags) - if metric_type == 'service_check': - value = cast(ServiceCheckStatus, value) - self.service_check(name, value, tags=tags) - else: - submit = getattr(self, metric_type) # type: Callable - submit(name, value, tags=tags) + submit = getattr(self, metric_type) # type: Callable + submit(name, value, tags=tags) def submit_version_metadata(self, conn): # type: (Connection) -> None From 4b8132250170ef267201755f17fcf37b379e1bcc Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 10:38:43 +0100 Subject: [PATCH 111/147] Always submit the same tags regardless of connect service check status --- rethinkdb/assets/service_checks.json | 4 +--- rethinkdb/datadog_checks/rethinkdb/check.py | 20 +++++------------ rethinkdb/tests/test_rethinkdb.py | 24 ++++++++++----------- 3 files changed, 17 insertions(+), 31 deletions(-) diff --git a/rethinkdb/assets/service_checks.json b/rethinkdb/assets/service_checks.json index db3792907da4e..7b3a02ad8f00e 100644 --- a/rethinkdb/assets/service_checks.json +++ b/rethinkdb/assets/service_checks.json @@ -4,9 +4,7 @@ "integration": "RethinkDB", "groups": [ "host", - "port", - "server", - "proxy" + "port" ], "check": "rethinkdb.can_connect", "statuses": [ diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 59a7ff1ffe790..046b30e4a82c9 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -2,7 +2,7 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) from contextlib import contextmanager -from typing import Any, Callable, Iterator, List, cast +from typing import Any, Callable, Iterator, cast from datadog_checks.base import AgentCheck @@ -29,25 +29,15 @@ def __init__(self, *args, **kwargs): @contextmanager def connect_submitting_service_checks(self): # type: () -> Iterator[Connection] - tags = [] # type: List[str] + tags = [ + 'host:{}'.format(self.config.host), + 'port:{}'.format(self.config.port), + ] tags.extend(self.config.tags) try: with self.backend.connect(self.config) as conn: - server = conn.server() - - connection_tags = [ - 'host:{}'.format(conn.host), - 'port:{}'.format(conn.port), - 'server:{}'.format(server['name']), - 'proxy:{}'.format('true' if server['proxy'] else 'false'), - ] - - self.log.debug('connected connection_tags=%r', connection_tags) - tags.extend(connection_tags) - yield conn - except CouldNotConnect as exc: message = 'Could not connect to RethinkDB server: {!r}'.format(exc) self.log.error(message) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 87835763395bf..959d8e682420c 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -20,7 +20,6 @@ DATABASE, HEROES_TABLE, HEROES_TABLE_SERVERS, - HOST, MALFORMED_VERSION_STRING_PARAMS, RAW_VERSION, SERVER_PORTS, @@ -32,13 +31,11 @@ from .types import ServerName -def _get_connect_service_check_tags(server='server0'): - # type: (ServerName) -> List[str] +def _get_connect_service_check_tags(instance): + # type: (Instance) -> List[str] return [ - 'host:{}'.format(HOST), - 'port:{}'.format(SERVER_PORTS[server]), - 'server:{}'.format(server), - 'proxy:false', + 'host:{}'.format(instance['host']), + 'port:{}'.format(instance['port']), ] @@ -52,7 +49,7 @@ def test_check(aggregator, instance): assert_metrics(aggregator) aggregator.assert_all_metrics_covered() - service_check_tags = TAGS + _get_connect_service_check_tags() + service_check_tags = TAGS + _get_connect_service_check_tags(instance) aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) for service_check in TABLE_STATUS_SERVICE_CHECKS: @@ -82,7 +79,7 @@ def test_check_without_credentials_uses_admin(aggregator, instance): assert_metrics(aggregator) aggregator.assert_all_metrics_covered() - service_check_tags = TAGS + _get_connect_service_check_tags() + service_check_tags = TAGS + _get_connect_service_check_tags(instance) aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) @@ -102,7 +99,7 @@ def test_check_connect_to_server_with_tls(aggregator, instance): assert_metrics(aggregator) aggregator.assert_all_metrics_covered() - service_check_tags = TAGS + _get_connect_service_check_tags(server=server) + service_check_tags = TAGS + _get_connect_service_check_tags(instance) aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) @@ -125,7 +122,7 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): assert_metrics(aggregator, disconnected_servers=disconnected_servers) aggregator.assert_all_metrics_covered() - service_check_tags = TAGS + _get_connect_service_check_tags() + service_check_tags = TAGS + _get_connect_service_check_tags(instance) aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) table_status_tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] @@ -147,7 +144,8 @@ def test_cannot_connect_unknown_host(aggregator, instance): with pytest.raises(CouldNotConnect): check.check(instance) - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=TAGS) + tags = TAGS + _get_connect_service_check_tags(instance) + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=tags) @pytest.mark.integration @@ -169,7 +167,7 @@ def collect_metrics(self, conn): with pytest.raises(Failure): check.check(instance) - service_check_tags = TAGS + _get_connect_service_check_tags() + service_check_tags = TAGS + _get_connect_service_check_tags(instance) aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=service_check_tags) From f123755eb0c738cfc3a8ef8d0721d9dcc6a107ed Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 10:52:38 +0100 Subject: [PATCH 112/147] Dry up access to the system database --- rethinkdb/datadog_checks/rethinkdb/queries.py | 51 ++++++++++++------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/queries.py b/rethinkdb/datadog_checks/rethinkdb/queries.py index 8b8cfa9b98243..60312d17a61ec 100644 --- a/rethinkdb/datadog_checks/rethinkdb/queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/queries.py @@ -32,26 +32,35 @@ class QueryEngine(object): def __init__(self, r=None): # type: (rethinkdb.RethinkDB) -> None - self._r = rethinkdb.r if r is None else r + if r is None: + r = rethinkdb.r + + self._r = r + # NOTE: all system tables are located in this database. + # See: https://rethinkdb.com/docs/system-tables/ + self._system = r.db('rethinkdb') def query_connected_server_version_string(self, conn): # type: (Connection) -> str """ Return the raw string of the RethinkDB version used by the server at the other end of the connection. """ - r = self._r + system = self._system + # See: https://rethinkdb.com/docs/system-tables/#server_status server = conn.server() # type: ConnectionServer - server_status = conn.run(r.db('rethinkdb').table('server_status').get(server['id'])) # type: ServerStatus + server_status = conn.run(system.table('server_status').get(server['id'])) # type: ServerStatus + return server_status['process']['version'] def query_config_totals(self, conn): # type: (Connection) -> ConfigTotals r = self._r + system = self._system - table_config = r.db('rethinkdb').table('table_config') - server_config = r.db('rethinkdb').table('server_config') - db_config = r.db('rethinkdb').table('db_config') + table_config = system.table('table_config') + server_config = system.table('server_config') + db_config = system.table('db_config') # Need to `.run()` these separately because ReQL does not support putting grouped data in raw expressions yet. # See: https://github.com/rethinkdb/rethinkdb/issues/2067 @@ -81,7 +90,7 @@ def query_cluster_stats(self, conn): """ Retrieve statistics about the cluster. """ - return conn.run(self._r.db('rethinkdb').table('stats').get(['cluster'])) + return conn.run(self._system.table('stats').get(['cluster'])) def query_servers_with_stats(self, conn): # type: (Connection) -> Iterator[Tuple[Server, ServerStats]] @@ -89,13 +98,14 @@ def query_servers_with_stats(self, conn): Retrieve each server in the cluster along with its statistics. """ r = self._r + system = self._system # For servers: stats['id'] = ['server', ''] is_server_stats_row = r.row['id'].nth(0) == 'server' server_id = r.row['id'].nth(1) - stats = r.db('rethinkdb').table('stats') - server_config = r.db('rethinkdb').table('server_config') + stats = system.table('stats') + server_config = system.table('server_config') rows = conn.run(stats.filter(is_server_stats_row).eq_join(server_id, server_config)) # type: Iterator[JoinRow] @@ -110,13 +120,14 @@ def query_tables_with_stats(self, conn): Retrieve each table in the cluster along with its statistics. """ r = self._r + system = self._system # For tables: stats['id'] = ['table', ''] is_table_stats_row = r.row['id'].nth(0) == 'table' table_id = r.row['id'].nth(1) - stats = r.db('rethinkdb').table('stats') - table_config = r.db('rethinkdb').table('table_config') + stats = system.table('stats') + table_config = system.table('table_config') rows = conn.run(stats.filter(is_table_stats_row).eq_join(table_id, table_config)) # type: Iterator[JoinRow] @@ -131,16 +142,17 @@ def query_replicas_with_stats(self, conn): Retrieve each replica (table/server pair) in the cluster along with its statistics. """ r = self._r + system = self._system # NOTE: To reduce bandwidth usage, we make heavy use of the `.pluck()` operation, i.e. ask RethinkDB # for a specific set of fields, instead of sending entire objects, which can be expensive when joining # data as we do here. # See: https://rethinkdb.com/api/python/pluck/ - stats = r.db('rethinkdb').table('stats') - server_config = r.db('rethinkdb').table('server_config') - table_config = r.db('rethinkdb').table('table_config') - table_status = r.db('rethinkdb').table( + stats = system.table('stats') + server_config = system.table('server_config') + table_config = system.table('table_config') + table_status = system.table( 'table_status', # Required so that we can join on 'server_config' below without having to look up UUIDs from names. # See: https://rethinkdb.com/api/python/table/#description @@ -195,21 +207,21 @@ def query_table_status(self, conn): """ Retrieve the status of each table in the cluster. """ - return conn.run(self._r.db('rethinkdb').table('table_status')) + return conn.run(self._system.table('table_status')) def query_server_status(self, conn): # type: (Connection) -> Iterator[ServerStatus] """ Retrieve the status of each server in the cluster. """ - return conn.run(self._r.db('rethinkdb').table('server_status')) + return conn.run(self._system.table('server_status')) def query_system_jobs(self, conn): # type: (Connection) -> Iterator[Job] """ Retrieve all the currently running system jobs. """ - return conn.run(self._r.db('rethinkdb').table('jobs')) + return conn.run(self._system.table('jobs')) def query_current_issues_totals(self, conn): # type: (Connection) -> CurrentIssuesTotals @@ -217,8 +229,9 @@ def query_current_issues_totals(self, conn): Retrieve all the problems detected with the cluster. """ r = self._r + system = self._system - current_issues = r.db('rethinkdb').table('current_issues').pluck('type', 'critical') + current_issues = system.table('current_issues').pluck('type', 'critical') # NOTE: Need to `.run()` these separately because ReQL does not support putting grouped data in raw # expressions yet. See: https://github.com/rethinkdb/rethinkdb/issues/2067 From 617a33f5b2d7f1fd9d471cc3471173106e89e30a Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 10:57:26 +0100 Subject: [PATCH 113/147] Clarify why job metrics are hard to test in integration/e2e --- rethinkdb/tests/unit/test_metrics.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index cfa4dded00962..b80dd0a0be4e6 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -21,6 +21,13 @@ def test_jobs_metrics(): Verify jobs metrics submitted by RethinkDB are processed correctly. We provide unit tests for these metrics because testing them in a live environment is tricky. + + For example: + * Backfill jobs can only be seen by us when large amounts of data is rebalanced between servers, e.g. + when a new server is added to the cluster, or an existing server is shut down. + * Index construction jobs can only be seen by us when a secondary index is added to a relatively large table. + * Query jobs can only be seen by us when an external client issues queries to the cluster. + * Etc. """ mock_query_job_row = { From 63a2ddc771c24aff50f2439b3db2227d7baac2e7 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 10:58:58 +0100 Subject: [PATCH 114/147] Fix typo --- rethinkdb/tests/test_rethinkdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 959d8e682420c..ed31ca55e5534 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -173,7 +173,7 @@ def collect_metrics(self, conn): @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') -def test_mestadata_version(instance, datadog_agent): +def test_metadata_version(instance, datadog_agent): # type: (Instance, DatadogAgentStub) -> None check_id = 'test' From ef58faca8f862ae548d526fbcaba7c3a9685ada2 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 11:05:56 +0100 Subject: [PATCH 115/147] Revert "TEMP: refactor metric collection" This reverts commit e60c27dd904496e5b080cc42049f25794e549f19. --- rethinkdb/datadog_checks/rethinkdb/check.py | 2 +- .../datadog_checks/rethinkdb/metrics/_base.py | 146 -------- .../rethinkdb/metrics/current_issues.py | 42 +-- .../rethinkdb/metrics/statistics.py | 322 +++++++++++++----- .../rethinkdb/metrics/statuses.py | 176 ++++++---- .../rethinkdb/metrics/system_jobs.py | 123 ++++--- rethinkdb/datadog_checks/rethinkdb/types.py | 33 +- rethinkdb/datadog_checks/rethinkdb/utils.py | 66 ---- rethinkdb/tests/common.py | 64 ++-- rethinkdb/tests/test_rethinkdb.py | 15 +- rethinkdb/tests/unit/test_metrics.py | 66 ++-- 11 files changed, 513 insertions(+), 542 deletions(-) delete mode 100644 rethinkdb/datadog_checks/rethinkdb/metrics/_base.py delete mode 100644 rethinkdb/datadog_checks/rethinkdb/utils.py diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 046b30e4a82c9..0f2896fc99e4c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -45,7 +45,7 @@ def connect_submitting_service_checks(self): raise except Exception as exc: message = 'Unexpected error while executing RethinkDB check: {!r}'.format(exc) - self.log.exception(message) + self.log.error(message) self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) raise else: diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/_base.py b/rethinkdb/datadog_checks/rethinkdb/metrics/_base.py deleted file mode 100644 index cfd27aab6d7c2..0000000000000 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/_base.py +++ /dev/null @@ -1,146 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -import logging -from typing import ( - Any, - Callable, - ClassVar, - Generic, - Iterator, - List, - Literal, - Mapping, - Optional, - Sequence, - Tuple, - TypedDict, - TypeVar, - Union, -) - -from datadog_checks.base import AgentCheck - -from ..connections import Connection -from ..queries import QueryEngine -from ..types import Metric, MetricType -from ..utils import dotted_join, lookup_dotted, to_timestamp - -logger = logging.getLogger(__name__) - -T = TypeVar("T") -DocumentT = TypeVar("DocumentT", bound=Mapping) - -ModifierName = Literal['total', 'ok_warning', 'timestamp'] -TotalModifier = TypedDict('TotalModifier', {'name': Literal['total'], 'map': Callable[[Any], Sequence[T]]}) -Modifier = Union[ModifierName, TotalModifier] - -MetricDefinition = TypedDict( - 'MetricDefinition', - { - 'type': MetricType, - 'path': str, # Used as the default name. - 'name': str, # An explicit name for the metric. - 'modifier': Optional[Modifier], - }, - total=False, -) - -Enumeration = TypedDict('Enumeration', {'path': str, 'index_tag': str, 'metrics': List[MetricDefinition]}) - - -class DocumentMetricCollector(Generic[DocumentT]): - """ - TODO(before-merging): Explain how to use this. - """ - - name = '' # type: ClassVar[str] - group = '' # type: ClassVar[str] - - metrics = [] # type: ClassVar[List[MetricDefinition]] - enumerations = [] # type: ClassVar[List[Enumeration]] - - def iter_documents(self, engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Tuple[DocumentT, List[str]]] - raise NotImplementedError # pragma: no cover - - def _make_metric(self, type, name, value, tags=None): - # type: (MetricType, str, float, List[str]) -> Metric - name = dotted_join(('rethinkdb', self.group, name)) - tags = [] if tags is None else tags - return {'type': type, 'name': name, 'value': value, 'tags': tags} - - def _make_metric_from_definition(self, document, definition, tags): - # type: (DocumentT, MetricDefinition, List[str]) -> Metric - logger.debug('make_metric_from_definition definition=%r', definition) - - path = definition['path'] - name = definition.get('name', path) - value = lookup_dotted(document, path=path) - - if 'modifier' in definition and definition['modifier'] is not None: - value, suffix = self._modify(value, modifier=definition['modifier']) - name = dotted_join((name, suffix), drop_empty=True) - - if not isinstance(value, (int, float)): - raise RuntimeError('Expected float or int, got {!r} of type {}', value, type(value)) - - return self._make_metric(type=definition['type'], name=name, value=value, tags=tags) - - def _make_metrics_from_enumeration(self, document, enumeration, tags): - # type: (DocumentT, Enumeration, List[str]) -> Iterator[Metric] - logger.debug('make_metrics_from_enumeration enumeration=%r', enumeration) - - values = lookup_dotted(document, path=enumeration['path']) # type: Sequence - index_tag = enumeration['index_tag'] - - for index, value in enumerate(values): - item_tags = tags + ['{}:{}'.format(index_tag, index)] - for definition in enumeration['metrics']: - definition = { - 'type': definition['type'], - 'name': dotted_join((enumeration['path'], definition['path']), drop_empty=True), - 'path': definition['path'], - 'modifier': definition.get('modifier'), - } - yield self._make_metric_from_definition(value, definition, tags=item_tags) - - def _modify(self, value, modifier): - # type: (Any, Modifier) -> Tuple[float, str] - logger.debug('modify value=%r modifier=%r', value, modifier) - - if modifier == 'total': - return len(value), 'total' - - if modifier == 'ok_warning': - return AgentCheck.OK if value else AgentCheck.WARNING, '' - - if modifier == 'timestamp': - return to_timestamp(value), '' - - if isinstance(modifier, dict): - if modifier['name'] == 'total': - value = modifier['map'](value) - return self._modify(value, modifier='total') - - raise RuntimeError('Unknown modifier: {!r}'.format(modifier)) # pragma: no cover - - def _collect(self, engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] - for document, tags in self.iter_documents(engine, conn): - logger.debug('%s %r', self.name, document) - - for definition in self.metrics: - yield self._make_metric_from_definition(document, definition, tags=tags) - - for enumeration in self.enumerations: - for metric in self._make_metrics_from_enumeration(document, enumeration, tags=tags): - yield metric - - # Collection function implementation. - - def __call__(self, engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] - logger.debug('collect_%s', self.name) - for metric in self._collect(engine, conn): - yield metric diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py index 59b0a3f2c2115..8d1cb9ae05658 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py @@ -7,32 +7,34 @@ from ..connections import Connection from ..queries import QueryEngine from ..types import Metric -from ._base import DocumentMetricCollector logger = logging.getLogger(__name__) -class CurrentIssuesCollector(DocumentMetricCollector): +def collect_current_issues(engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about current system issues. See: https://rethinkdb.com/docs/system-issues/ """ - - name = 'current_issues' - group = 'current_issues' - - def _collect(self, engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] - totals = engine.query_current_issues_totals(conn) - - for issue_type, total in totals['issues_by_type'].items(): - tags = ['issue_type:{}'.format(issue_type)] - yield self._make_metric(type='gauge', name='total', value=total, tags=tags) - - for issue_type, total in totals['critical_issues_by_type'].items(): - tags = ['issue_type:{}'.format(issue_type)] - yield self._make_metric(type='gauge', name='critical.total', value=total, tags=tags) - - -collect_current_issues = CurrentIssuesCollector() + logger.debug('collect_current_issues') + + totals = engine.query_current_issues_totals(conn) + logger.debug('current_issues totals=%r', totals) + + for issue_type, total in totals['issues_by_type'].items(): + yield { + 'type': 'gauge', + 'name': 'rethinkdb.current_issues.total', + 'value': total, + 'tags': ['issue_type:{}'.format(issue_type)], + } + + for issue_type, total in totals['critical_issues_by_type'].items(): + yield { + 'type': 'gauge', + 'name': 'rethinkdb.current_issues.critical.total', + 'value': total, + 'tags': ['issue_type:{}'.format(issue_type)], + } diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py index 0c24b09acc90b..05019826cf13a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py @@ -1,122 +1,276 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Iterator, List, Tuple +import logging +from typing import Iterator from ..connections import Connection from ..queries import QueryEngine -from ..types import ClusterStats, ReplicaStats, ServerStats, TableStats -from ._base import DocumentMetricCollector +from ..types import Metric +logger = logging.getLogger(__name__) -class ClusterStatisticsCollector(DocumentMetricCollector[ClusterStats]): + +def collect_cluster_statistics(engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about cluster statistics. See: https://rethinkdb.com/docs/system-stats#cluster """ + logger.debug('collect_cluster_statistics') + + stats = engine.query_cluster_stats(conn) + logger.debug('cluster_statistics stats=%r', stats) + + query_engine = stats['query_engine'] - name = 'cluster_statistics' - group = 'stats.cluster' - metrics = [ - {'type': 'gauge', 'path': 'query_engine.queries_per_sec'}, - {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, - {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, - ] + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.cluster.queries_per_sec', + 'value': query_engine['queries_per_sec'], + 'tags': [], + } - def iter_documents(self, engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Tuple[ClusterStats, List[str]]] - yield engine.query_cluster_stats(conn), [] + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.cluster.read_docs_per_sec', + 'value': query_engine['read_docs_per_sec'], + 'tags': [], + } + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.cluster.written_docs_per_sec', + 'value': query_engine['written_docs_per_sec'], + 'tags': [], + } -class ServerStatisticsCollector(DocumentMetricCollector[ServerStats]): + +def collect_server_statistics(engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about server statistics. See: https://rethinkdb.com/docs/system-stats#server """ + logger.debug('collect_server_statistics') + + for server, stats in engine.query_servers_with_stats(conn): + logger.debug('server_statistics server=%r stats=%r', server, stats) + + name = server['name'] + server_tags = server['tags'] + query_engine = stats['query_engine'] + + tags = ['server:{}'.format(name)] + tags.extend(server_tags) + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.server.client_connections', + 'value': query_engine['client_connections'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.server.clients_active', + 'value': query_engine['clients_active'], + 'tags': tags, + } - name = 'server_statistics' - group = 'stats.server' - metrics = [ - {'type': 'gauge', 'path': 'query_engine.client_connections'}, - {'type': 'gauge', 'path': 'query_engine.clients_active'}, - {'type': 'gauge', 'path': 'query_engine.queries_per_sec'}, - {'type': 'monotonic_count', 'path': 'query_engine.queries_total'}, - {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, - {'type': 'monotonic_count', 'path': 'query_engine.read_docs_total'}, - {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, - {'type': 'monotonic_count', 'path': 'query_engine.written_docs_total'}, - ] - - def iter_documents(self, engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Tuple[ServerStats, List[str]]] - for server, stats in engine.query_servers_with_stats(conn): - tags = ['server:{}'.format(server['name'])] - tags.extend(server['tags']) - yield stats, tags - - -class TableStatisticsCollector(DocumentMetricCollector[TableStats]): + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.server.queries_per_sec', + 'value': query_engine['queries_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'monotonic_count', + 'name': 'rethinkdb.stats.server.queries_total', + 'value': query_engine['queries_total'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.server.read_docs_per_sec', + 'value': query_engine['read_docs_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'monotonic_count', + 'name': 'rethinkdb.stats.server.read_docs_total', + 'value': query_engine['read_docs_total'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.server.written_docs_per_sec', + 'value': query_engine['written_docs_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'monotonic_count', + 'name': 'rethinkdb.stats.server.written_docs_total', + 'value': query_engine['written_docs_total'], + 'tags': tags, + } + + +def collect_table_statistics(engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about table statistics. See: https://rethinkdb.com/docs/system-stats#table """ + logger.debug('collect_table_statistics') + + for table, stats in engine.query_tables_with_stats(conn): + logger.debug('table_statistics table=%r stats=%r', table, stats) - name = 'table_statistics' - group = 'stats.table' - metrics = [ - {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, - {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, - ] + name = table['name'] + database = table['db'] + query_engine = stats['query_engine'] - def iter_documents(self, engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Tuple[TableStats, List[str]]] - for table, stats in engine.query_tables_with_stats(conn): - tags = ['table:{}'.format(table['name']), 'database:{}'.format(table['db'])] - yield stats, tags + tags = ['table:{}'.format(name), 'database:{}'.format(database)] + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table.read_docs_per_sec', + 'value': query_engine['read_docs_per_sec'], + 'tags': tags, + } -class ReplicaStatisticsCollector(DocumentMetricCollector[ReplicaStats]): + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table.written_docs_per_sec', + 'value': query_engine['written_docs_per_sec'], + 'tags': tags, + } + + +def collect_replica_statistics(engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about replicas (table/server pairs) statistics. See: https://rethinkdb.com/docs/system-stats#replica """ + logger.debug('collect_replica_statistics') + + for table, server, replica, stats in engine.query_replicas_with_stats(conn): + logger.debug('replica_statistics table=%r server=%r replica=%r stats=%r', table, server, replica, stats) + + database = table['db'] + server_name = server['name'] + table_name = table['name'] + server_tags = server['tags'] + query_engine = stats['query_engine'] + storage_engine = stats['storage_engine'] + state = replica['state'] + + tags = [ + 'table:{}'.format(table_name), + 'database:{}'.format(database), + 'server:{}'.format(server_name), + 'state:{}'.format(state), + ] + tags.extend(server_tags) + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.read_docs_per_sec', + 'value': query_engine['read_docs_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'monotonic_count', + 'name': 'rethinkdb.stats.table_server.read_docs_total', + 'value': query_engine['read_docs_total'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.written_docs_per_sec', + 'value': query_engine['written_docs_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'monotonic_count', + 'name': 'rethinkdb.stats.table_server.written_docs_total', + 'value': query_engine['written_docs_total'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.cache.in_use_bytes', + 'value': storage_engine['cache']['in_use_bytes'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.read_bytes_per_sec', + 'value': storage_engine['disk']['read_bytes_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'monotonic_count', + 'name': 'rethinkdb.stats.table_server.disk.read_bytes_total', + 'value': storage_engine['disk']['read_bytes_total'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.written_bytes_per_sec', + 'value': storage_engine['disk']['written_bytes_per_sec'], + 'tags': tags, + } + + yield { + 'type': 'monotonic_count', + 'name': 'rethinkdb.stats.table_server.disk.written_bytes_total', + 'value': storage_engine['disk']['written_bytes_total'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.metadata_bytes', + 'value': storage_engine['disk']['space_usage']['metadata_bytes'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.data_bytes', + 'value': storage_engine['disk']['space_usage']['data_bytes'], + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.garbage_bytes', + 'value': storage_engine['disk']['space_usage']['garbage_bytes'], + 'tags': tags, + } - name = 'replica_statistics' - group = 'stats.table_server' - metrics = [ - {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, - {'type': 'monotonic_count', 'path': 'query_engine.read_docs_total'}, - {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, - {'type': 'monotonic_count', 'path': 'query_engine.written_docs_total'}, - {'type': 'gauge', 'path': 'storage_engine.cache.in_use_bytes'}, - {'type': 'gauge', 'path': 'storage_engine.disk.read_bytes_per_sec'}, - {'type': 'monotonic_count', 'path': 'storage_engine.disk.read_bytes_total'}, - {'type': 'gauge', 'path': 'storage_engine.disk.written_bytes_per_sec'}, - {'type': 'monotonic_count', 'path': 'storage_engine.disk.written_bytes_total'}, - {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.metadata_bytes'}, - {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.data_bytes'}, - {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.garbage_bytes'}, - {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.preallocated_bytes'}, - ] - - def iter_documents(self, engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Tuple[ReplicaStats, List[str]]] - for table, server, replica, stats in engine.query_replicas_with_stats(conn): - tags = [ - 'table:{}'.format(table['name']), - 'database:{}'.format(table['db']), - 'server:{}'.format(server['name']), - 'state:{}'.format(replica['state']), - ] - tags.extend(server['tags']) - yield stats, tags - - -collect_cluster_statistics = ClusterStatisticsCollector() -collect_server_statistics = ServerStatisticsCollector() -collect_table_statistics = TableStatisticsCollector() -collect_replica_statistics = ReplicaStatisticsCollector() + yield { + 'type': 'gauge', + 'name': 'rethinkdb.stats.table_server.disk.preallocated_bytes', + 'value': storage_engine['disk']['space_usage']['preallocated_bytes'], + 'tags': tags, + } diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py index 266cfc1b28551..dfe9e1a6d7411 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py @@ -1,99 +1,141 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import datetime as dt import logging -from typing import Iterator, List, Tuple +import time +from typing import Iterator from datadog_checks.base import AgentCheck -from datadog_checks.base.types import ServiceCheckStatus from ..connections import Connection from ..queries import QueryEngine -from ..types import ServerStatus, TableStatus -from ._base import DocumentMetricCollector +from ..types import Metric logger = logging.getLogger(__name__) -def transform_status(status): - # type: (bool) -> ServiceCheckStatus - return AgentCheck.OK if status else AgentCheck.WARNING - - -class TableStatusCollector(DocumentMetricCollector[TableStatus]): +def collect_table_status(engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about table statuses. See: https://rethinkdb.com/docs/system-tables/#table_status """ + logger.debug('collect_table_status') + + for table_status in engine.query_table_status(conn): + logger.debug('table_status %r', table_status) + + table = table_status['name'] + database = table_status['db'] - name = 'table_status' - group = 'table_status' - - metrics = [ - {'type': 'service_check', 'path': 'status.ready_for_outdated_reads', 'modifier': 'ok_warning'}, - {'type': 'service_check', 'path': 'status.ready_for_reads', 'modifier': 'ok_warning'}, - {'type': 'service_check', 'path': 'status.ready_for_writes', 'modifier': 'ok_warning'}, - {'type': 'service_check', 'path': 'status.all_replicas_ready', 'modifier': 'ok_warning'}, - {'type': 'gauge', 'path': 'shards', 'modifier': 'total'}, - ] - - enumerations = [ - { - 'path': 'shards', - 'index_tag': 'shard', - 'metrics': [ - {'type': 'gauge', 'path': 'replicas', 'modifier': 'total'}, - {'type': 'gauge', 'path': 'primary_replicas', 'modifier': 'total'}, - ], + tags = ['table:{}'.format(table), 'database:{}'.format(database)] + + yield { + 'type': 'service_check', + 'name': 'rethinkdb.table_status.ready_for_outdated_reads', + 'value': AgentCheck.OK if table_status['status']['ready_for_outdated_reads'] else AgentCheck.WARNING, + 'tags': tags, + } + + yield { + 'type': 'service_check', + 'name': 'rethinkdb.table_status.ready_for_reads', + 'value': AgentCheck.OK if table_status['status']['ready_for_reads'] else AgentCheck.WARNING, + 'tags': tags, } - ] - def iter_documents(self, engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Tuple[TableStatus, List[str]]] - for table_status in engine.query_table_status(conn): - tags = ['table:{}'.format(table_status['name']), 'database:{}'.format(table_status['db'])] - yield table_status, tags + yield { + 'type': 'service_check', + 'name': 'rethinkdb.table_status.ready_for_writes', + 'value': AgentCheck.OK if table_status['status']['ready_for_writes'] else AgentCheck.WARNING, + 'tags': tags, + } + + yield { + 'type': 'service_check', + 'name': 'rethinkdb.table_status.all_replicas_ready', + 'value': AgentCheck.OK if table_status['status']['all_replicas_ready'] else AgentCheck.WARNING, + 'tags': tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.table_status.shards.total', + 'value': len(table_status['shards']), + 'tags': tags, + } + + for index, shard in enumerate(table_status['shards']): + shard_tags = tags + ['shard:{}'.format(index)] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.table_status.shards.replicas.total', + 'value': len(shard['replicas']), + 'tags': shard_tags, + } + yield { + 'type': 'gauge', + 'name': 'rethinkdb.table_status.shards.replicas.primary.total', + 'value': len(shard['primary_replicas']), + 'tags': shard_tags, + } -class ServerStatusCollector(DocumentMetricCollector[ServerStatus]): + +def collect_server_status(engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about server statuses. See: https://rethinkdb.com/docs/system-tables/#server_status """ + logger.debug('collect_server_status') + + for server_status in engine.query_server_status(conn): + logger.debug('server_status %r', server_status) + + server = server_status['name'] + network = server_status['network'] + process = server_status['process'] + + tags = ['server:{}'.format(server)] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.server_status.network.time_connected', + 'value': _to_timestamp(network['time_connected']), + 'tags': tags, + } - name = 'server_status' - group = 'server_status' + yield { + 'type': 'gauge', + 'name': 'rethinkdb.server_status.network.connected_to.total', + 'value': len([other for other, connected in network['connected_to'].items() if connected]), + 'tags': tags, + } - metrics = [ - {'type': 'gauge', 'path': 'network.time_connected', 'modifier': 'timestamp'}, - { + yield { 'type': 'gauge', - 'path': 'network.connected_to', - 'modifier': { - 'name': 'total', - 'map': lambda value: [other for other, connected in value.items() if connected], - }, - }, - { + 'name': 'rethinkdb.server_status.network.connected_to.pending.total', + 'value': len([other for other, connected in network['connected_to'].items() if not connected]), + 'tags': tags, + } + + yield { 'type': 'gauge', - 'path': 'network.connected_to', - 'name': 'network.not_connected_to', - 'modifier': { - 'name': 'total', - 'map': lambda value: [other for other, connected in value.items() if not connected], - }, - }, - {'type': 'gauge', 'path': 'process.time_started', 'modifier': 'timestamp'}, - ] - - def iter_documents(self, engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Tuple[ServerStatus, List[str]]] - for server_status in engine.query_server_status(conn): - tags = ['server:{}'.format(server_status['name'])] - yield server_status, tags - - -collect_table_status = TableStatusCollector() -collect_server_status = ServerStatusCollector() + 'name': 'rethinkdb.server_status.process.time_started', + 'value': _to_timestamp(process['time_started']), + 'tags': tags, + } + + +def _to_timestamp(datetime): + # type: (dt.datetime) -> float + try: + return datetime.timestamp() # type: ignore # (Mypy is run in --py2 mode.) + except AttributeError: # pragma: no cover + # Python 2. + return time.mktime(datetime.now().timetuple()) diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py index 15ea8d9663f21..cf367ce71245b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py @@ -2,68 +2,87 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import logging -from typing import Iterator, List, Tuple +from typing import Iterator, cast from ..connections import Connection from ..queries import QueryEngine -from ..types import Job -from ._base import DocumentMetricCollector +from ..types import BackfillInfo, IndexConstructionInfo, Metric logger = logging.getLogger(__name__) -class SystemJobsCollector(DocumentMetricCollector[Job]): +def collect_system_jobs(engine, conn): + # type: (QueryEngine, Connection) -> Iterator[Metric] """ Collect metrics about system jobs. See: https://rethinkdb.com/docs/system-jobs/ """ + logger.debug('collect_system_jobs') - name = 'system_jobs' - group = 'jobs' - - metrics = [{'type': 'gauge', 'path': 'duration_sec'}] - - def iter_documents(self, engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Tuple[Job, List[str]]] - for job in engine.query_system_jobs(conn): - tags = ['job_type:{}'.format(job['type'])] - tags.extend('server:{}'.format(server) for server in job['servers']) - - # Follow job types listed on: https://rethinkdb.com/docs/system-jobs/#document-schema - - if job['type'] == 'query': - # NOTE: Request-response queries are typically too short-lived to be captured across Agent checks. - # Change feed queries however are long-running, they we'd be able to capture them. - # See: https://rethinkdb.com/docs/system-jobs/#query - # TODO(before-merging): submit within a `duration_sec` threshold instead of skipping entirely. - continue - elif job['type'] == 'disk_compaction': - # Ongoing task on each server -- no information provided (i.e. `info` is empty). - # See: https://rethinkdb.com/docs/system-jobs/#disk_compaction - continue - if job['type'] == 'index_construction': - tags.extend( - [ - 'database:{}'.format(job['info']['db']), - 'table:{}'.format(job['info']['table']), - 'index:{}'.format(job['info']['index']), - ] - ) - elif job['type'] == 'backfill': - tags.extend( - [ - 'database:{}'.format(job['info']['db']), - 'destination_server:{}'.format(job['info']['destination_server']), - 'source_server:{}'.format(job['info']['source_server']), - 'table:{}'.format(job['info']['table']), - ] - ) - else: - info = job.get('info', {}) - raise RuntimeError('Unknown job type: {!r} (info: {!r})'.format(job['type'], info)) - - yield job, tags - - -collect_system_jobs = SystemJobsCollector() + for job in engine.query_system_jobs(conn): + logger.debug('job %r', job) + + duration = job['duration_sec'] + servers = job['servers'] + + tags = ['server:{}'.format(server) for server in servers] + + if job['type'] == 'index_construction': + # NOTE: Using `cast()` is required until tagged unions are released in mypy stable. Until then, avoid using + # 'info' as a variable name in all cases (workaround for https://github.com/python/mypy/issues/6232). + # See: https://mypy.readthedocs.io/en/latest/literal_types.html#tagged-unions + index_construction_info = cast(IndexConstructionInfo, job['info']) + database = index_construction_info['db'] + table = index_construction_info['table'] + index = index_construction_info['index'] + progress = index_construction_info['progress'] + + index_construction_tags = tags + [ + 'database:{}'.format(database), + 'table:{}'.format(table), + 'index:{}'.format(index), + ] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.index_construction.duration', + 'value': duration, + 'tags': index_construction_tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.index_construction.progress', + 'value': progress, + 'tags': index_construction_tags, + } + + elif job['type'] == 'backfill': + backfill_info = cast(BackfillInfo, job['info']) + database = backfill_info['db'] + destination_server = backfill_info['destination_server'] + source_server = backfill_info['source_server'] + table = backfill_info['table'] + progress = backfill_info['progress'] + + backfill_tags = tags + [ + 'database:{}'.format(database), + 'destination_server:{}'.format(destination_server), + 'source_server:{}'.format(source_server), + 'table:{}'.format(table), + ] + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.backfill.duration', + 'value': duration, + 'tags': backfill_tags, + } + + yield { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.backfill.progress', + 'value': progress, + 'tags': backfill_tags, + } diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index 4a3e1d5db5c6d..3b1bb80d39f27 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -9,9 +9,10 @@ # Check interfaces. -MetricType = Literal['gauge', 'monotonic_count', 'service_check'] - -Metric = TypedDict('Metric', {'type': MetricType, 'name': str, 'value': float, 'tags': List[str]}) +Metric = TypedDict( + 'Metric', + {'type': Literal['gauge', 'monotonic_count', 'service_check'], 'name': str, 'value': float, 'tags': List[str]}, +) Instance = TypedDict( 'Instance', @@ -143,32 +144,6 @@ # System jobs documents. # See: https://rethinkdb.com/docs/system-jobs/ -QueryInfo = TypedDict('QueryInfo', {}) - -QueryJob = TypedDict( - 'QueryJob', - { - 'type': Literal['query'], - 'id': Tuple[Literal['query'], str], - 'duration_sec': float, - 'info': QueryInfo, - 'servers': List[str], - }, -) - -DiskCompactionInfo = TypedDict('DiskCompactionInfo', {}) - -DiskCompactionJob = TypedDict( - 'DiskCompactionJob', - { - 'type': Literal['disk_compaction'], - 'id': Tuple[Literal['disk_compaction'], str], - 'duration_sec': None, - 'info': DiskCompactionInfo, - 'servers': List[str], - }, -) - IndexConstructionInfo = TypedDict('IndexConstructionInfo', {'db': str, 'table': str, 'index': str, 'progress': int}) IndexConstructionJob = TypedDict( diff --git a/rethinkdb/datadog_checks/rethinkdb/utils.py b/rethinkdb/datadog_checks/rethinkdb/utils.py deleted file mode 100644 index da4757186c41b..0000000000000 --- a/rethinkdb/datadog_checks/rethinkdb/utils.py +++ /dev/null @@ -1,66 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -""" -Miscellaneous utilities. -""" -import datetime as dt -import time -from typing import Any, Mapping, Sequence - - -def lookup_dotted(dct, path): - # type: (Mapping, str) -> Any - """ - Given a mapping and a dotted path `key1.key2...keyN`, return the item at `dct[key1][key2]...[keyN]`. - """ - keys = [key for key in reversed(path.split('.'))] - - value = dct - - while keys: - key = keys.pop() - - if isinstance(value, Sequence): - try: - index = int(key) - except (TypeError, IndexError): - raise RuntimeError('Expected key to be an int ') - try: - value = value[index] - except IndexError as exc: - raise RuntimeError( - 'Failed to access index {!r} on value {!r} along path {!r}: {!r}'.format(index, value, path, exc) - ) - - elif isinstance(value, Mapping): - try: - value = value[key] - except KeyError as exc: - raise RuntimeError('Failed to retrieve key {!r} on value {!r}: {!r}'.format(key, value, exc)) - - else: - # We screwed up. - raise RuntimeError( - 'followed path {!r} with remaining keys {!r}, but value {!r} is not a sequence nor a mapping'.format( - path, value, keys - ) - ) - - return value - - -def dotted_join(values, drop_empty=False): - # type: (Sequence[str], bool) -> str - if drop_empty: - values = [value for value in values if value] - return '.'.join(values) - - -def to_timestamp(datetime): - # type: (dt.datetime) -> float - try: - return datetime.timestamp() # type: ignore # (mypy runs in `--py2` mode.) - except AttributeError: # pragma: no cover - # Python 2. - return time.mktime(datetime.now().timetuple()) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index a8e77c98d1987..ab2477219b331 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -99,52 +99,52 @@ ) # type: Tuple[Tuple[str, int, Union[int, Callable[[set], int]], List[str]], ...] CLUSTER_STATISTICS_METRICS = ( - ('rethinkdb.stats.cluster.query_engine.queries_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.cluster.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.cluster.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.cluster.queries_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.cluster.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.cluster.written_docs_per_sec', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] SERVER_STATISTICS_METRICS = ( - ('rethinkdb.stats.server.query_engine.queries_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.server.query_engine.queries_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.server.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.server.query_engine.read_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.server.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.server.query_engine.written_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.server.query_engine.client_connections', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.queries_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.queries_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.read_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.written_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.client_connections', AggregatorStub.GAUGE), ( # NOTE: submitted but not documented on the RethinkDB website. - 'rethinkdb.stats.server.query_engine.clients_active', + 'rethinkdb.stats.server.clients_active', AggregatorStub.GAUGE, ), ) # type: Tuple[Tuple[str, int], ...] TABLE_STATISTICS_METRICS = ( - ('rethinkdb.stats.table.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table.written_docs_per_sec', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] REPLICA_STATISTICS_METRICS = ( - ('rethinkdb.stats.table_server.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.query_engine.read_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.query_engine.written_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.storage_engine.cache.in_use_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.storage_engine.disk.read_bytes_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.storage_engine.disk.read_bytes_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.storage_engine.disk.written_bytes_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.storage_engine.disk.written_bytes_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.metadata_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.data_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.garbage_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.preallocated_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.read_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.written_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.cache.in_use_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.read_bytes_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.read_bytes_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.disk.written_bytes_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.written_bytes_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.disk.metadata_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.data_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.garbage_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.disk.preallocated_bytes', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] TABLE_STATUS_SERVICE_CHECKS = ( - 'rethinkdb.table_status.status.ready_for_outdated_reads', - 'rethinkdb.table_status.status.ready_for_reads', - 'rethinkdb.table_status.status.ready_for_writes', - 'rethinkdb.table_status.status.all_replicas_ready', + 'rethinkdb.table_status.ready_for_outdated_reads', + 'rethinkdb.table_status.ready_for_reads', + 'rethinkdb.table_status.ready_for_writes', + 'rethinkdb.table_status.all_replicas_ready', ) TABLE_STATUS_METRICS = ( @@ -153,13 +153,13 @@ TABLE_STATUS_SHARDS_METRICS = ( ('rethinkdb.table_status.shards.replicas.total', AggregatorStub.GAUGE), - ('rethinkdb.table_status.shards.primary_replicas.total', AggregatorStub.GAUGE), + ('rethinkdb.table_status.shards.replicas.primary.total', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] SERVER_STATUS_METRICS = ( ('rethinkdb.server_status.network.time_connected', AggregatorStub.GAUGE), ('rethinkdb.server_status.network.connected_to.total', AggregatorStub.GAUGE), - ('rethinkdb.server_status.network.not_connected_to.total', AggregatorStub.GAUGE), + ('rethinkdb.server_status.network.connected_to.pending.total', AggregatorStub.GAUGE), ('rethinkdb.server_status.process.time_started', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index ed31ca55e5534..0b59d02de6261 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -127,9 +127,18 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): table_status_tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] - for service_check in TABLE_STATUS_SERVICE_CHECKS: - status = RethinkDBCheck.OK if service_check.endswith('ready_for_outdated_reads') else RethinkDBCheck.WARNING - aggregator.assert_service_check(service_check, status, count=1, tags=table_status_tags) + aggregator.assert_service_check( + 'rethinkdb.table_status.ready_for_outdated_reads', RethinkDBCheck.OK, count=1, tags=table_status_tags + ) + aggregator.assert_service_check( + 'rethinkdb.table_status.ready_for_reads', RethinkDBCheck.WARNING, count=1, tags=table_status_tags + ) + aggregator.assert_service_check( + 'rethinkdb.table_status.ready_for_writes', RethinkDBCheck.WARNING, count=1, tags=table_status_tags + ) + aggregator.assert_service_check( + 'rethinkdb.table_status.all_replicas_ready', RethinkDBCheck.WARNING, count=1, tags=table_status_tags + ) @pytest.mark.integration diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index b80dd0a0be4e6..8123173e8daea 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -8,7 +8,7 @@ from datadog_checks.rethinkdb.metrics.system_jobs import collect_system_jobs from datadog_checks.rethinkdb.queries import QueryEngine -from datadog_checks.rethinkdb.types import BackfillJob, DiskCompactionJob, IndexConstructionJob, QueryJob +from datadog_checks.rethinkdb.types import BackfillJob, IndexConstructionJob from ..utils import MockConnection @@ -30,22 +30,6 @@ def test_jobs_metrics(): * Etc. """ - mock_query_job_row = { - 'type': 'query', - 'id': ('query', 'abcd1234'), - 'duration_sec': 0.12, - 'info': {}, - 'servers': ['server0'], - } # type: QueryJob - - mock_disk_compaction_row = { - 'type': 'disk_compaction', - 'id': ('disk_compaction', 'zero'), - 'duration_sec': None, - 'info': {}, - 'servers': ['server0'], - } # type: DiskCompactionJob - mock_backfill_job_row = { # See: https://rethinkdb.com/docs/system-jobs/#backfill 'type': 'backfill', @@ -65,26 +49,25 @@ def test_jobs_metrics(): # See: https://rethinkdb.com/docs/system-jobs/#index_construction 'type': 'index_construction', 'id': ('index_construction', 'abcd1234'), - 'duration_sec': 0.24, + 'duration_sec': 0.42, 'info': {'db': 'doghouse', 'table': 'heroes', 'index': 'appearances_count', 'progress': 42}, 'servers': ['server1'], } # type: IndexConstructionJob - mock_rows = [mock_query_job_row, mock_disk_compaction_row, mock_backfill_job_row, mock_index_construction_job_row] + mock_unknown_job_row = {'type': 'an_unknown_type_that_should_be_ignored', 'duration_sec': 0.42, 'servers': []} + + mock_rows = [mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] engine = QueryEngine() conn = MockConnection(rows=mock_rows) metrics = list(collect_system_jobs(engine, conn)) assert metrics == [ - # -- `query` job ignored -- - # -- `disk_compaction` job ignored -- { 'type': 'gauge', - 'name': 'rethinkdb.jobs.duration_sec', + 'name': 'rethinkdb.jobs.backfill.duration', 'value': 0.42, 'tags': [ - 'job_type:backfill', 'server:server0', 'server:server2', 'database:doghouse', @@ -95,28 +78,27 @@ def test_jobs_metrics(): }, { 'type': 'gauge', - 'name': 'rethinkdb.jobs.duration_sec', - 'value': 0.24, + 'name': 'rethinkdb.jobs.backfill.progress', + 'value': 42, 'tags': [ - 'job_type:index_construction', - 'server:server1', + 'server:server0', + 'server:server2', 'database:doghouse', + 'destination_server:server2', + 'source_server:server0', 'table:heroes', - 'index:appearances_count', ], }, + { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.index_construction.duration', + 'value': 0.42, + 'tags': ['server:server1', 'database:doghouse', 'table:heroes', 'index:appearances_count'], + }, + { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.index_construction.progress', + 'value': 42, + 'tags': ['server:server1', 'database:doghouse', 'table:heroes', 'index:appearances_count'], + }, ] - - -def test_unknown_job(): - # type: () -> None - """ - If a new job type is added, an exception should be raised so we are notified via CI failures and can add support. - """ - mock_unknown_job_row = {'type': 'an_unknown_type_that_should_be_ignored', 'duration_sec': 0.42, 'servers': []} - - engine = QueryEngine() - conn = MockConnection(rows=[mock_unknown_job_row]) - - with pytest.raises(RuntimeError): - list(collect_system_jobs(engine, conn)) From f7d4c1c173379e64636ca2a02a05553f3bbc021f Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 11:51:08 +0100 Subject: [PATCH 116/147] Drop Connection class --- .../datadog_checks/rethinkdb/backends.py | 13 ++--- rethinkdb/datadog_checks/rethinkdb/check.py | 7 ++- .../datadog_checks/rethinkdb/connections.py | 48 ---------------- .../rethinkdb/metrics/config.py | 5 +- .../rethinkdb/metrics/current_issues.py | 5 +- .../rethinkdb/metrics/statistics.py | 11 ++-- .../rethinkdb/metrics/statuses.py | 7 ++- .../rethinkdb/metrics/system_jobs.py | 5 +- rethinkdb/datadog_checks/rethinkdb/queries.py | 51 ++++++++--------- rethinkdb/datadog_checks/rethinkdb/types.py | 7 ++- rethinkdb/tests/cluster.py | 55 +++++++++---------- rethinkdb/tests/test_rethinkdb.py | 9 ++- rethinkdb/tests/unit/test_metrics.py | 9 +-- rethinkdb/tests/utils.py | 20 ------- 14 files changed, 95 insertions(+), 157 deletions(-) delete mode 100644 rethinkdb/datadog_checks/rethinkdb/connections.py delete mode 100644 rethinkdb/tests/utils.py diff --git a/rethinkdb/datadog_checks/rethinkdb/backends.py b/rethinkdb/datadog_checks/rethinkdb/backends.py index cdebdcc21aab6..342ada12e739d 100644 --- a/rethinkdb/datadog_checks/rethinkdb/backends.py +++ b/rethinkdb/datadog_checks/rethinkdb/backends.py @@ -6,7 +6,6 @@ import rethinkdb from .config import Config -from .connections import Connection from .exceptions import CouldNotConnect from .metrics.config import collect_config_totals from .metrics.current_issues import collect_current_issues @@ -47,15 +46,15 @@ def __init__(self): collect_table_status, collect_system_jobs, collect_current_issues, - ) # type: Sequence[Callable[[QueryEngine, Connection], Iterator[Metric]]] + ) # type: Sequence[Callable[[QueryEngine, rethinkdb.net.Connection], Iterator[Metric]]] def connect(self, config): - # type: (Config) -> Connection + # type: (Config) -> rethinkdb.net.Connection """ Establish a connection with the configured RethinkDB server. """ try: - conn = self._r.connect( + return self._r.connect( host=config.host, port=config.port, user=config.user, @@ -65,10 +64,8 @@ def connect(self, config): except rethinkdb.errors.ReqlDriverError as exc: raise CouldNotConnect(exc) - return Connection(conn) - def collect_metrics(self, conn): - # type: (Connection) -> Iterator[Metric] + # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics from the RethinkDB cluster we are connected to. """ @@ -77,7 +74,7 @@ def collect_metrics(self, conn): yield metric def collect_connected_server_version(self, conn): - # type: (Connection) -> str + # type: (rethinkdb.net.Connection) -> str """ Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. """ diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 0f2896fc99e4c..17bcc20b577a6 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -4,11 +4,12 @@ from contextlib import contextmanager from typing import Any, Callable, Iterator, cast +import rethinkdb + from datadog_checks.base import AgentCheck from .backends import Backend from .config import Config -from .connections import Connection from .exceptions import CouldNotConnect, VersionCollectionFailed from .types import Instance, Metric @@ -28,7 +29,7 @@ def __init__(self, *args, **kwargs): @contextmanager def connect_submitting_service_checks(self): - # type: () -> Iterator[Connection] + # type: () -> Iterator[rethinkdb.net.Connection] tags = [ 'host:{}'.format(self.config.host), 'port:{}'.format(self.config.port), @@ -64,7 +65,7 @@ def submit_metric(self, metric): submit(name, value, tags=tags) def submit_version_metadata(self, conn): - # type: (Connection) -> None + # type: (rethinkdb.net.Connection) -> None try: version = self.backend.collect_connected_server_version(conn) except VersionCollectionFailed as exc: diff --git a/rethinkdb/datadog_checks/rethinkdb/connections.py b/rethinkdb/datadog_checks/rethinkdb/connections.py deleted file mode 100644 index 88106df223a82..0000000000000 --- a/rethinkdb/datadog_checks/rethinkdb/connections.py +++ /dev/null @@ -1,48 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, TypedDict - -import rethinkdb - -# See: https://rethinkdb.com/api/python/server -ConnectionServer = TypedDict('ConnectionServer', {'id': str, 'name': str, 'proxy': bool}) - - -class Connection(object): - """ - Represents a connection to a RethinkDB server. - - Abstracts away any interfaces specific to the `rethinkdb` client library. - """ - - def __init__(self, conn): - # type: (rethinkdb.net.Connection) -> None - self._conn = conn - - def __enter__(self): - # type: () -> Connection - self._conn.__enter__() - return self - - def __exit__(self, *args): - # type: (*Any) -> Any - return self._conn.__exit__(*args) - - @property - def host(self): - # type: () -> str - return self._conn.host - - @property - def port(self): - # type: () -> int - return self._conn.port - - def server(self): - # type: () -> ConnectionServer - return self._conn.server() - - def run(self, query): - # type: (rethinkdb.RqlQuery) -> Any - return query.run(self._conn) diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/config.py b/rethinkdb/datadog_checks/rethinkdb/metrics/config.py index f2d22f50d9401..9f5e3c24b907d 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/config.py @@ -4,7 +4,8 @@ import logging from typing import Iterator -from ..connections import Connection +import rethinkdb + from ..queries import QueryEngine from ..types import Metric @@ -12,7 +13,7 @@ def collect_config_totals(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect aggregated metrics about cluster configuration. diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py index 8d1cb9ae05658..d7a75684e2cca 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py @@ -4,7 +4,8 @@ import logging from typing import Iterator -from ..connections import Connection +import rethinkdb + from ..queries import QueryEngine from ..types import Metric @@ -12,7 +13,7 @@ def collect_current_issues(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about current system issues. diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py index 05019826cf13a..77eb0cdf670c3 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py @@ -4,7 +4,8 @@ import logging from typing import Iterator -from ..connections import Connection +import rethinkdb + from ..queries import QueryEngine from ..types import Metric @@ -12,7 +13,7 @@ def collect_cluster_statistics(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about cluster statistics. @@ -48,7 +49,7 @@ def collect_cluster_statistics(engine, conn): def collect_server_statistics(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about server statistics. @@ -124,7 +125,7 @@ def collect_server_statistics(engine, conn): def collect_table_statistics(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about table statistics. @@ -157,7 +158,7 @@ def collect_table_statistics(engine, conn): def collect_replica_statistics(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about replicas (table/server pairs) statistics. diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py index dfe9e1a6d7411..6bfa18dd68bbd 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py @@ -6,9 +6,10 @@ import time from typing import Iterator +import rethinkdb + from datadog_checks.base import AgentCheck -from ..connections import Connection from ..queries import QueryEngine from ..types import Metric @@ -16,7 +17,7 @@ def collect_table_status(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about table statuses. @@ -86,7 +87,7 @@ def collect_table_status(engine, conn): def collect_server_status(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about server statuses. diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py index cf367ce71245b..f59b5d9b6e93c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py @@ -4,7 +4,8 @@ import logging from typing import Iterator, cast -from ..connections import Connection +import rethinkdb + from ..queries import QueryEngine from ..types import BackfillInfo, IndexConstructionInfo, Metric @@ -12,7 +13,7 @@ def collect_system_jobs(engine, conn): - # type: (QueryEngine, Connection) -> Iterator[Metric] + # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about system jobs. diff --git a/rethinkdb/datadog_checks/rethinkdb/queries.py b/rethinkdb/datadog_checks/rethinkdb/queries.py index 60312d17a61ec..47918d40aa947 100644 --- a/rethinkdb/datadog_checks/rethinkdb/queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/queries.py @@ -5,10 +5,10 @@ import rethinkdb -from .connections import Connection, ConnectionServer from .types import ( ClusterStats, ConfigTotals, + ConnectionServer, CurrentIssuesTotals, Job, JoinRow, @@ -41,7 +41,7 @@ def __init__(self, r=None): self._system = r.db('rethinkdb') def query_connected_server_version_string(self, conn): - # type: (Connection) -> str + # type: (rethinkdb.net.Connection) -> str """ Return the raw string of the RethinkDB version used by the server at the other end of the connection. """ @@ -49,12 +49,12 @@ def query_connected_server_version_string(self, conn): # See: https://rethinkdb.com/docs/system-tables/#server_status server = conn.server() # type: ConnectionServer - server_status = conn.run(system.table('server_status').get(server['id'])) # type: ServerStatus + server_status = system.table('server_status').get(server['id']).run(conn) # type: ServerStatus return server_status['process']['version'] def query_config_totals(self, conn): - # type: (Connection) -> ConfigTotals + # type: (rethinkdb.net.Connection) -> ConfigTotals r = self._r system = self._system @@ -65,15 +65,16 @@ def query_config_totals(self, conn): # Need to `.run()` these separately because ReQL does not support putting grouped data in raw expressions yet. # See: https://github.com/rethinkdb/rethinkdb/issues/2067 - tables_per_database = conn.run(table_config.group('db').count()) # type: Mapping[str, int] + tables_per_database = table_config.group('db').count().run(conn) # type: Mapping[str, int] - secondary_indexes_per_table = conn.run( + secondary_indexes_per_table = ( # NOTE: this is an example of a map-reduce query. # See: https://rethinkdb.com/docs/map-reduce/#a-more-complex-example table_config.pluck('name', 'indexes') .concat_map(lambda row: row['indexes'].map(lambda _: {'table': row['name']})) .group('table') .count() + .run(conn) ) # type: Mapping[str, int] totals = { @@ -83,17 +84,17 @@ def query_config_totals(self, conn): 'secondary_indexes_per_table': secondary_indexes_per_table, } # type: ConfigTotals # Enforce keys to match. - return conn.run(r.expr(totals)) + return r.expr(totals).run(conn) def query_cluster_stats(self, conn): - # type: (Connection) -> ClusterStats + # type: (rethinkdb.net.Connection) -> ClusterStats """ Retrieve statistics about the cluster. """ - return conn.run(self._system.table('stats').get(['cluster'])) + return self._system.table('stats').get(['cluster']).run(conn) def query_servers_with_stats(self, conn): - # type: (Connection) -> Iterator[Tuple[Server, ServerStats]] + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Server, ServerStats]] """ Retrieve each server in the cluster along with its statistics. """ @@ -107,7 +108,7 @@ def query_servers_with_stats(self, conn): stats = system.table('stats') server_config = system.table('server_config') - rows = conn.run(stats.filter(is_server_stats_row).eq_join(server_id, server_config)) # type: Iterator[JoinRow] + rows = stats.filter(is_server_stats_row).eq_join(server_id, server_config).run(conn) # type: Iterator[JoinRow] for row in rows: server_stats = row['left'] # type: ServerStats @@ -115,7 +116,7 @@ def query_servers_with_stats(self, conn): yield server, server_stats def query_tables_with_stats(self, conn): - # type: (Connection) -> Iterator[Tuple[Table, TableStats]] + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, TableStats]] """ Retrieve each table in the cluster along with its statistics. """ @@ -129,7 +130,7 @@ def query_tables_with_stats(self, conn): stats = system.table('stats') table_config = system.table('table_config') - rows = conn.run(stats.filter(is_table_stats_row).eq_join(table_id, table_config)) # type: Iterator[JoinRow] + rows = stats.filter(is_table_stats_row).eq_join(table_id, table_config).run(conn) # type: Iterator[JoinRow] for row in rows: table_stats = row['left'] # type: TableStats @@ -137,7 +138,7 @@ def query_tables_with_stats(self, conn): yield table, table_stats def query_replicas_with_stats(self, conn): - # type: (Connection) -> Iterator[Tuple[Table, Server, ShardReplica, ReplicaStats]] + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, Server, ShardReplica, ReplicaStats]] """ Retrieve each replica (table/server pair) in the cluster along with its statistics. """ @@ -193,7 +194,7 @@ def query_replicas_with_stats(self, conn): ) ) - rows = conn.run(query) # type: Iterator[Mapping[str, Any]] + rows = query.run(conn) # type: Iterator[Mapping[str, Any]] for row in rows: table = row['table'] # type: Table @@ -203,28 +204,28 @@ def query_replicas_with_stats(self, conn): yield table, server, replica, replica_stats def query_table_status(self, conn): - # type: (Connection) -> Iterator[TableStatus] + # type: (rethinkdb.net.Connection) -> Iterator[TableStatus] """ Retrieve the status of each table in the cluster. """ - return conn.run(self._system.table('table_status')) + return self._system.table('table_status').run(conn) def query_server_status(self, conn): - # type: (Connection) -> Iterator[ServerStatus] + # type: (rethinkdb.net.Connection) -> Iterator[ServerStatus] """ Retrieve the status of each server in the cluster. """ - return conn.run(self._system.table('server_status')) + return self._system.table('server_status').run(conn) def query_system_jobs(self, conn): - # type: (Connection) -> Iterator[Job] + # type: (rethinkdb.net.Connection) -> Iterator[Job] """ Retrieve all the currently running system jobs. """ - return conn.run(self._system.table('jobs')) + return self._system.table('jobs').run(conn) def query_current_issues_totals(self, conn): - # type: (Connection) -> CurrentIssuesTotals + # type: (rethinkdb.net.Connection) -> CurrentIssuesTotals """ Retrieve all the problems detected with the cluster. """ @@ -236,9 +237,9 @@ def query_current_issues_totals(self, conn): # NOTE: Need to `.run()` these separately because ReQL does not support putting grouped data in raw # expressions yet. See: https://github.com/rethinkdb/rethinkdb/issues/2067 - issues_by_type = conn.run(current_issues.group('type').count()) # type: Mapping[str, int] - critical_issues_by_type = conn.run( - current_issues.filter(r.row['critical']).group('type').count() + issues_by_type = current_issues.group('type').count().run(conn) # type: Mapping[str, int] + critical_issues_by_type = ( + current_issues.filter(r.row['critical']).group('type').count().run(conn) ) # type: Mapping[str, int] return { diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index 3b1bb80d39f27..3d64de570eb24 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -182,9 +182,12 @@ ) -# ReQL command results. -# See: https://rethinkdb.com/api/python/ +# Miscellaneous. +# See: https://rethinkdb.com/api/python/eq_join # NOTE: Ideally 'left' and 'right' would be generics here, but this isn't supported by 'TypedDict' yet. # See: https://github.com/python/mypy/issues/3863 JoinRow = TypedDict('JoinRow', {'left': Any, 'right': Any}) + +# See: https://rethinkdb.com/api/python/server +ConnectionServer = TypedDict('ConnectionServer', {'id': str, 'name': str, 'proxy': bool}) diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index 3aa815cc246cf..00e5218d5ed96 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -5,12 +5,12 @@ from contextlib import contextmanager from typing import Iterator, List +import rethinkdb from rethinkdb import r from datadog_checks.dev.conditions import WaitFor from datadog_checks.dev.docker import temporarily_stop_service from datadog_checks.dev.structures import EnvVars -from datadog_checks.rethinkdb.connections import Connection from .common import ( AGENT_PASSWORD, @@ -37,42 +37,42 @@ def setup_cluster(): """ logger.debug('setup_cluster') - with Connection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: + with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: # A test DB is automatically created, but we don't use it and it would skew our metrics. - response = conn.run(r.db_drop('test')) + response = r.db_drop('test').run(conn) assert response['dbs_dropped'] == 1 # Cluster content. - response = conn.run(r.db_create(DATABASE)) + response = r.db_create(DATABASE).run(conn) assert response['dbs_created'] == 1 - response = conn.run(r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_CONFIG)) + response = r.db(DATABASE).table_create(HEROES_TABLE, **HEROES_TABLE_CONFIG).run(conn) assert response['tables_created'] == 1 - response = conn.run(r.db(DATABASE).table(HEROES_TABLE).index_create(HEROES_TABLE_INDEX_FIELD)) + response = r.db(DATABASE).table(HEROES_TABLE).index_create(HEROES_TABLE_INDEX_FIELD).run(conn) assert response['created'] == 1 - response = conn.run(r.db(DATABASE).table(HEROES_TABLE).wait(timeout=1)) + response = r.db(DATABASE).table(HEROES_TABLE).wait(timeout=1).run(conn) assert response['ready'] == 1 # Users. # See: https://rethinkdb.com/docs/permissions-and-accounts/ - response = conn.run(r.db('rethinkdb').table('users').insert({'id': AGENT_USER, 'password': AGENT_PASSWORD})) + response = r.db('rethinkdb').table('users').insert({'id': AGENT_USER, 'password': AGENT_PASSWORD}).run(conn) assert response['inserted'] == 1 - response = conn.run(r.db('rethinkdb').grant(AGENT_USER, {'read': True})) + response = r.db('rethinkdb').grant(AGENT_USER, {'read': True}).run(conn) assert response['granted'] == 1 - response = conn.run(r.db('rethinkdb').table('users').insert({'id': CLIENT_USER, 'password': False})) + response = r.db('rethinkdb').table('users').insert({'id': CLIENT_USER, 'password': False}).run(conn) assert response['inserted'] == 1 - response = conn.run(r.db(DATABASE).grant(CLIENT_USER, {'read': True, 'write': True})) + response = r.db(DATABASE).grant(CLIENT_USER, {'read': True, 'write': True}).run(conn) assert response['granted'] == 1 # Simulate client activity. # NOTE: ensures that 'written_docs_*' and 'read_docs_*' metrics have non-zero values. - with Connection(r.connect(host=HOST, port=SERVER_PORTS['proxy'], user=CLIENT_USER)) as conn: - response = conn.run(r.db(DATABASE).table(HEROES_TABLE).insert(HEROES_TABLE_DOCUMENTS)) + with r.connect(host=HOST, port=SERVER_PORTS['proxy'], user=CLIENT_USER) as conn: + response = r.db(DATABASE).table(HEROES_TABLE).insert(HEROES_TABLE_DOCUMENTS).run(conn) assert response['inserted'] == len(HEROES_TABLE_DOCUMENTS) - documents = list(conn.run(r.db(DATABASE).table(HEROES_TABLE))) + documents = list(r.db(DATABASE).table(HEROES_TABLE).run(conn)) assert len(documents) == len(HEROES_TABLE_DOCUMENTS) @@ -87,23 +87,22 @@ def temporarily_disconnect_server(server): logger.debug('temporarily_disconnect_server server=%r service=%r', server, service) def _server_exists(conn): - # type: (Connection) -> bool - servers = conn.run(r.db('rethinkdb').table('server_status').map(r.row['name'])) # type: List[str] + # type: (rethinkdb.net.Connection) -> bool + servers = r.db('rethinkdb').table('server_status').map(r.row['name']).run(conn) # type: List[str] logger.debug('server_exists server=%r servers=%r', server, servers) return server in servers def _leader_election_done(conn): - # type: (Connection) -> bool + # type: (rethinkdb.net.Connection) -> bool STABLE_REPLICA_STATES = {'ready', 'waiting_for_primary', 'disconnected'} replica_states = list( - conn.run( - r.db('rethinkdb') - .table('table_status') - .concat_map(r.row['shards']) - .concat_map(r.row['replicas']) - .map(r.row['state']) - ) + r.db('rethinkdb') + .table('table_status') + .concat_map(r.row['shards']) + .concat_map(r.row['replicas']) + .map(r.row['state']) + .run(conn) ) # type: List[str] logger.debug('replica_states %r', replica_states) @@ -111,19 +110,19 @@ def _leader_election_done(conn): return all(state in STABLE_REPLICA_STATES for state in replica_states) def _server_disconnected(conn): - # type: (Connection) -> bool + # type: (rethinkdb.net.Connection) -> bool return not _server_exists(conn) and _leader_election_done(conn) def _server_reconnected(conn): - # type: (Connection) -> bool + # type: (rethinkdb.net.Connection) -> bool return _server_exists(conn) and _leader_election_done(conn) with temporarily_stop_service(service, compose_file=COMPOSE_FILE): with EnvVars(COMPOSE_ENV_VARS): - with Connection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: + with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: WaitFor(lambda: _server_disconnected(conn))() yield - with Connection(r.connect(host=HOST, port=SERVER_PORTS['server0'])) as conn: + with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: WaitFor(lambda: _server_reconnected(conn))() diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 0b59d02de6261..fa1e39fa7d9a2 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -2,7 +2,7 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import copy -from typing import Iterator, List +from typing import Any, Iterator, List import pytest @@ -10,7 +10,6 @@ from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub from datadog_checks.rethinkdb import RethinkDBCheck from datadog_checks.rethinkdb.backends import Backend -from datadog_checks.rethinkdb.connections import Connection from datadog_checks.rethinkdb.exceptions import CouldNotConnect, VersionCollectionFailed from datadog_checks.rethinkdb.types import Instance, Metric @@ -166,7 +165,7 @@ class Failure(Exception): class MockBackend(Backend): def collect_metrics(self, conn): - # type: (Connection) -> Iterator[Metric] + # type: (Any) -> Iterator[Metric] yield {'type': 'gauge', 'name': 'rethinkdb.some.metric', 'value': 42, 'tags': []} raise Failure @@ -215,7 +214,7 @@ def test_metadata_version_malformed(instance, aggregator, datadog_agent, malform class MockBackend(Backend): def collect_connected_server_version(self, conn): - # type: (Connection) -> str + # type: (Any) -> str return malformed_version_string check_id = 'test' @@ -239,7 +238,7 @@ def test_metadata_version_failure(instance, aggregator, datadog_agent): class MockBackend(Backend): def collect_connected_server_version(self, conn): - # type: (Connection) -> str + # type: (Any) -> str raise VersionCollectionFailed('Oops!') check_id = 'test' diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index 8123173e8daea..dee5e78452e1b 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -4,14 +4,13 @@ """ Unit tests for metrics that are hard to test using integration tests, eg. because they depend on cluster dynamics. """ +import mock import pytest from datadog_checks.rethinkdb.metrics.system_jobs import collect_system_jobs from datadog_checks.rethinkdb.queries import QueryEngine from datadog_checks.rethinkdb.types import BackfillJob, IndexConstructionJob -from ..utils import MockConnection - pytestmark = pytest.mark.unit @@ -59,8 +58,10 @@ def test_jobs_metrics(): mock_rows = [mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] engine = QueryEngine() - conn = MockConnection(rows=mock_rows) - metrics = list(collect_system_jobs(engine, conn)) + conn = mock.Mock() + with mock.patch('rethinkdb.ast.RqlQuery.run') as run: + run.return_value = mock_rows + metrics = list(collect_system_jobs(engine, conn)) assert metrics == [ { diff --git a/rethinkdb/tests/utils.py b/rethinkdb/tests/utils.py deleted file mode 100644 index 2b7c7d3c2660d..0000000000000 --- a/rethinkdb/tests/utils.py +++ /dev/null @@ -1,20 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any - -from datadog_checks.rethinkdb.connections import Connection - - -class MockConnection(Connection): - """ - A connection class that returns a fixed set of rows regardless of the query. - """ - - def __init__(self, rows): - # type: (Any) -> None - self.rows = rows - - def run(self, query): - # type: (Any) -> Any - return self.rows From fc047d554ac3876c590eca5d3afbcdc548a004a6 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 11:57:50 +0100 Subject: [PATCH 117/147] Drop exceptions module --- rethinkdb/datadog_checks/rethinkdb/backends.py | 5 ++--- rethinkdb/datadog_checks/rethinkdb/check.py | 5 ++--- rethinkdb/datadog_checks/rethinkdb/exceptions.py | 15 --------------- rethinkdb/datadog_checks/rethinkdb/version.py | 4 +--- rethinkdb/tests/test_rethinkdb.py | 6 +++--- rethinkdb/tests/unit/test_version.py | 3 +-- 6 files changed, 9 insertions(+), 29 deletions(-) delete mode 100644 rethinkdb/datadog_checks/rethinkdb/exceptions.py diff --git a/rethinkdb/datadog_checks/rethinkdb/backends.py b/rethinkdb/datadog_checks/rethinkdb/backends.py index 342ada12e739d..56450dd07a243 100644 --- a/rethinkdb/datadog_checks/rethinkdb/backends.py +++ b/rethinkdb/datadog_checks/rethinkdb/backends.py @@ -6,7 +6,6 @@ import rethinkdb from .config import Config -from .exceptions import CouldNotConnect from .metrics.config import collect_config_totals from .metrics.current_issues import collect_current_issues from .metrics.statistics import ( @@ -61,8 +60,8 @@ def connect(self, config): password=config.password, ssl={'ca_certs': config.tls_ca_cert} if config.tls_ca_cert is not None else None, ) - except rethinkdb.errors.ReqlDriverError as exc: - raise CouldNotConnect(exc) + except rethinkdb.errors.ReqlDriverError: + raise # Failed to establish connection (catch and re-raise for explicitness). def collect_metrics(self, conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 17bcc20b577a6..f48183354388c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -10,7 +10,6 @@ from .backends import Backend from .config import Config -from .exceptions import CouldNotConnect, VersionCollectionFailed from .types import Instance, Metric SERVICE_CHECK_CONNECT = 'rethinkdb.can_connect' @@ -39,7 +38,7 @@ def connect_submitting_service_checks(self): try: with self.backend.connect(self.config) as conn: yield conn - except CouldNotConnect as exc: + except rethinkdb.errors.ReqlDriverError as exc: message = 'Could not connect to RethinkDB server: {!r}'.format(exc) self.log.error(message) self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) @@ -68,7 +67,7 @@ def submit_version_metadata(self, conn): # type: (rethinkdb.net.Connection) -> None try: version = self.backend.collect_connected_server_version(conn) - except VersionCollectionFailed as exc: + except ValueError as exc: self.log.error(exc) else: self.set_metadata('version', version) diff --git a/rethinkdb/datadog_checks/rethinkdb/exceptions.py b/rethinkdb/datadog_checks/rethinkdb/exceptions.py deleted file mode 100644 index 60a7485aa882a..0000000000000 --- a/rethinkdb/datadog_checks/rethinkdb/exceptions.py +++ /dev/null @@ -1,15 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) - - -class RethinkDBError(Exception): - """Base class for exceptions raised by the RethinkDB check.""" - - -class CouldNotConnect(RethinkDBError): - """Failed to connect to a RethinkDB server.""" - - -class VersionCollectionFailed(RethinkDBError): - """Failed to collect or parse the RethinkDB version from a server.""" diff --git a/rethinkdb/datadog_checks/rethinkdb/version.py b/rethinkdb/datadog_checks/rethinkdb/version.py index 9c9cd93816e10..12f18d1094ce9 100644 --- a/rethinkdb/datadog_checks/rethinkdb/version.py +++ b/rethinkdb/datadog_checks/rethinkdb/version.py @@ -3,8 +3,6 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import re -from .exceptions import VersionCollectionFailed - # See: https://github.com/rethinkdb/rethinkdb/blob/95cfed8a62f08e3198ac25417c9b6900be8b6877/src/utils.hpp#L117 _RETHINKDB_VERSION_STR_REGEX = re.compile(r'^rethinkdb\s+(?P\S+)\s\(.*') @@ -25,6 +23,6 @@ def parse_version(rethinkdb_version_string): message = 'Version string {!r} did not match pattern {!r}'.format( rethinkdb_version_string, _RETHINKDB_VERSION_STR_REGEX ) - raise VersionCollectionFailed(message) + raise ValueError(message) return match.group('rethinkdb_version') diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index fa1e39fa7d9a2..7c416dfe362f9 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -5,12 +5,12 @@ from typing import Any, Iterator, List import pytest +import rethinkdb from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub from datadog_checks.rethinkdb import RethinkDBCheck from datadog_checks.rethinkdb.backends import Backend -from datadog_checks.rethinkdb.exceptions import CouldNotConnect, VersionCollectionFailed from datadog_checks.rethinkdb.types import Instance, Metric from .assertions import assert_metrics @@ -149,7 +149,7 @@ def test_cannot_connect_unknown_host(aggregator, instance): check = RethinkDBCheck('rethinkdb', {}, [instance]) - with pytest.raises(CouldNotConnect): + with pytest.raises(rethinkdb.errors.ReqlDriverError): check.check(instance) tags = TAGS + _get_connect_service_check_tags(instance) @@ -239,7 +239,7 @@ def test_metadata_version_failure(instance, aggregator, datadog_agent): class MockBackend(Backend): def collect_connected_server_version(self, conn): # type: (Any) -> str - raise VersionCollectionFailed('Oops!') + raise ValueError('Oops!') check_id = 'test' diff --git a/rethinkdb/tests/unit/test_version.py b/rethinkdb/tests/unit/test_version.py index 09002382618fc..78b01fcb44a10 100644 --- a/rethinkdb/tests/unit/test_version.py +++ b/rethinkdb/tests/unit/test_version.py @@ -3,7 +3,6 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import pytest -from datadog_checks.rethinkdb.exceptions import VersionCollectionFailed from datadog_checks.rethinkdb.version import parse_version from ..common import MALFORMED_VERSION_STRING_PARAMS @@ -29,5 +28,5 @@ def test_parse_version(version_string, expected_version): @pytest.mark.parametrize('version_string', MALFORMED_VERSION_STRING_PARAMS) def test_parse_malformed_version(version_string): # type: (str) -> None - with pytest.raises(VersionCollectionFailed): + with pytest.raises(ValueError): parse_version(version_string) From 9973b5afc979270921b69600c0efe3613ca5d333 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 12:01:35 +0100 Subject: [PATCH 118/147] Fix unclear phrasing --- rethinkdb/tests/cluster.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index 00e5218d5ed96..a9cb3c2bef8c2 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -81,7 +81,8 @@ def temporarily_disconnect_server(server): # type: (str) -> Iterator[None] """ Gracefully disconnect a server from the cluster. - Ensures that the stable is left in a stable state inside and after exiting the context. + + Ensures that the cluster/replicas are in a stable state (not rebalancing) inside and after exiting the context. """ service = 'rethinkdb-{}'.format(server) logger.debug('temporarily_disconnect_server server=%r service=%r', server, service) From f056285cb4ce0a0610559908ac6e984f1ee60ba4 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 15:52:12 +0100 Subject: [PATCH 119/147] Get rid of Backend and QueryManager --- .../datadog_checks/rethinkdb/backends.py | 81 ------ rethinkdb/datadog_checks/rethinkdb/check.py | 65 ++++- .../rethinkdb/metrics/config.py | 8 +- .../rethinkdb/metrics/current_issues.py | 8 +- .../rethinkdb/metrics/statistics.py | 26 +- .../rethinkdb/metrics/statuses.py | 14 +- .../rethinkdb/metrics/system_jobs.py | 8 +- .../datadog_checks/rethinkdb/operations.py | 237 +++++++++++++++++ rethinkdb/datadog_checks/rethinkdb/queries.py | 248 ------------------ rethinkdb/tests/test_rethinkdb.py | 16 +- rethinkdb/tests/unit/test_metrics.py | 4 +- 11 files changed, 330 insertions(+), 385 deletions(-) delete mode 100644 rethinkdb/datadog_checks/rethinkdb/backends.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/operations.py delete mode 100644 rethinkdb/datadog_checks/rethinkdb/queries.py diff --git a/rethinkdb/datadog_checks/rethinkdb/backends.py b/rethinkdb/datadog_checks/rethinkdb/backends.py deleted file mode 100644 index 56450dd07a243..0000000000000 --- a/rethinkdb/datadog_checks/rethinkdb/backends.py +++ /dev/null @@ -1,81 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Callable, Iterator, Sequence - -import rethinkdb - -from .config import Config -from .metrics.config import collect_config_totals -from .metrics.current_issues import collect_current_issues -from .metrics.statistics import ( - collect_cluster_statistics, - collect_replica_statistics, - collect_server_statistics, - collect_table_statistics, -) -from .metrics.statuses import collect_server_status, collect_table_status -from .metrics.system_jobs import collect_system_jobs -from .queries import QueryEngine -from .types import Metric -from .version import parse_version - - -class Backend(object): - """ - An interface for high-level operations performed during a RethinkDB check. - - Abstracts away any interfaces specific to the `rethinkdb` client library, while providing a default - implementation that uses that library. - """ - - def __init__(self): - # type: () -> None - # NOTE: the name 'r' may look off-putting at first, but it was chosen for consistency with the officially - # advertised ReQL usage. For example, see: https://rethinkdb.com/docs/guide/python/ - self._r = rethinkdb.r - self._query_engine = QueryEngine(r=self._r) - self._collect_funcs = ( - collect_config_totals, - collect_cluster_statistics, - collect_server_statistics, - collect_table_statistics, - collect_replica_statistics, - collect_server_status, - collect_table_status, - collect_system_jobs, - collect_current_issues, - ) # type: Sequence[Callable[[QueryEngine, rethinkdb.net.Connection], Iterator[Metric]]] - - def connect(self, config): - # type: (Config) -> rethinkdb.net.Connection - """ - Establish a connection with the configured RethinkDB server. - """ - try: - return self._r.connect( - host=config.host, - port=config.port, - user=config.user, - password=config.password, - ssl={'ca_certs': config.tls_ca_cert} if config.tls_ca_cert is not None else None, - ) - except rethinkdb.errors.ReqlDriverError: - raise # Failed to establish connection (catch and re-raise for explicitness). - - def collect_metrics(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect metrics from the RethinkDB cluster we are connected to. - """ - for collect in self._collect_funcs: - for metric in collect(self._query_engine, conn): - yield metric - - def collect_connected_server_version(self, conn): - # type: (rethinkdb.net.Connection) -> str - """ - Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. - """ - version_string = self._query_engine.query_connected_server_version_string(conn) - return parse_version(version_string) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index f48183354388c..7d9f940bb19bd 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -2,15 +2,26 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) from contextlib import contextmanager -from typing import Any, Callable, Iterator, cast +from typing import Any, Callable, Iterator, Sequence, cast import rethinkdb from datadog_checks.base import AgentCheck -from .backends import Backend +from . import operations from .config import Config +from .metrics.config import collect_config_totals +from .metrics.current_issues import collect_current_issues +from .metrics.statistics import ( + collect_cluster_statistics, + collect_replica_statistics, + collect_server_statistics, + collect_table_statistics, +) +from .metrics.statuses import collect_server_status, collect_table_status +from .metrics.system_jobs import collect_system_jobs from .types import Instance, Metric +from .version import parse_version SERVICE_CHECK_CONNECT = 'rethinkdb.can_connect' @@ -24,19 +35,34 @@ def __init__(self, *args, **kwargs): # type: (*Any, **Any) -> None super(RethinkDBCheck, self).__init__(*args, **kwargs) self.config = Config(cast(Instance, self.instance)) - self.backend = Backend() + self.collect_funcs = ( + collect_config_totals, + collect_cluster_statistics, + collect_server_statistics, + collect_table_statistics, + collect_replica_statistics, + collect_server_status, + collect_table_status, + collect_system_jobs, + collect_current_issues, + ) # type: Sequence[Callable] @contextmanager def connect_submitting_service_checks(self): # type: () -> Iterator[rethinkdb.net.Connection] - tags = [ - 'host:{}'.format(self.config.host), - 'port:{}'.format(self.config.port), - ] - tags.extend(self.config.tags) + config = self.config + + tags = ['host:{}'.format(config.host), 'port:{}'.format(config.port)] + tags.extend(config.tags) try: - with self.backend.connect(self.config) as conn: + with rethinkdb.r.connect( + host=config.host, + port=config.port, + user=config.user, + password=config.password, + ssl={'ca_certs': config.tls_ca_cert} if config.tls_ca_cert is not None else None, + ) as conn: yield conn except rethinkdb.errors.ReqlDriverError as exc: message = 'Could not connect to RethinkDB server: {!r}'.format(exc) @@ -51,6 +77,23 @@ def connect_submitting_service_checks(self): else: self.service_check(SERVICE_CHECK_CONNECT, self.OK, tags=tags) + def collect_metrics(self, conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] + """ + Collect metrics from the RethinkDB cluster we are connected to. + """ + for collect in self.collect_funcs: + for metric in collect(conn): + yield metric + + def collect_connected_server_version(self, conn): + # type: (rethinkdb.net.Connection) -> str + """ + Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. + """ + version_string = operations.query_connected_server_version_string(conn) + return parse_version(version_string) + def submit_metric(self, metric): # type: (Metric) -> None metric_type = metric['type'] @@ -66,7 +109,7 @@ def submit_metric(self, metric): def submit_version_metadata(self, conn): # type: (rethinkdb.net.Connection) -> None try: - version = self.backend.collect_connected_server_version(conn) + version = self.collect_connected_server_version(conn) except ValueError as exc: self.log.error(exc) else: @@ -77,7 +120,7 @@ def check(self, instance): self.log.debug('check config=%r', self.config) with self.connect_submitting_service_checks() as conn: - for metric in self.backend.collect_metrics(conn): + for metric in self.collect_metrics(conn): self.submit_metric(metric) if self.is_metadata_collection_enabled(): diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/config.py b/rethinkdb/datadog_checks/rethinkdb/metrics/config.py index 9f5e3c24b907d..bde630f5aadd5 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/config.py @@ -6,14 +6,14 @@ import rethinkdb -from ..queries import QueryEngine +from .. import operations from ..types import Metric logger = logging.getLogger(__name__) -def collect_config_totals(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] +def collect_config_totals(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect aggregated metrics about cluster configuration. @@ -21,7 +21,7 @@ def collect_config_totals(engine, conn): """ logger.debug('collect_config_totals') - totals = engine.query_config_totals(conn) + totals = operations.query_config_totals(conn) logger.debug('config_totals totals=%r', totals) yield { diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py index d7a75684e2cca..f08916ca5835b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py @@ -6,14 +6,14 @@ import rethinkdb -from ..queries import QueryEngine +from .. import operations from ..types import Metric logger = logging.getLogger(__name__) -def collect_current_issues(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] +def collect_current_issues(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about current system issues. @@ -21,7 +21,7 @@ def collect_current_issues(engine, conn): """ logger.debug('collect_current_issues') - totals = engine.query_current_issues_totals(conn) + totals = operations.query_current_issues_totals(conn) logger.debug('current_issues totals=%r', totals) for issue_type, total in totals['issues_by_type'].items(): diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py index 77eb0cdf670c3..56e61eab9aeba 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py @@ -6,14 +6,14 @@ import rethinkdb -from ..queries import QueryEngine +from .. import operations from ..types import Metric logger = logging.getLogger(__name__) -def collect_cluster_statistics(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] +def collect_cluster_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about cluster statistics. @@ -21,7 +21,7 @@ def collect_cluster_statistics(engine, conn): """ logger.debug('collect_cluster_statistics') - stats = engine.query_cluster_stats(conn) + stats = operations.query_cluster_stats(conn) logger.debug('cluster_statistics stats=%r', stats) query_engine = stats['query_engine'] @@ -48,8 +48,8 @@ def collect_cluster_statistics(engine, conn): } -def collect_server_statistics(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] +def collect_server_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about server statistics. @@ -57,7 +57,7 @@ def collect_server_statistics(engine, conn): """ logger.debug('collect_server_statistics') - for server, stats in engine.query_servers_with_stats(conn): + for server, stats in operations.query_servers_with_stats(conn): logger.debug('server_statistics server=%r stats=%r', server, stats) name = server['name'] @@ -124,8 +124,8 @@ def collect_server_statistics(engine, conn): } -def collect_table_statistics(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] +def collect_table_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about table statistics. @@ -133,7 +133,7 @@ def collect_table_statistics(engine, conn): """ logger.debug('collect_table_statistics') - for table, stats in engine.query_tables_with_stats(conn): + for table, stats in operations.query_tables_with_stats(conn): logger.debug('table_statistics table=%r stats=%r', table, stats) name = table['name'] @@ -157,8 +157,8 @@ def collect_table_statistics(engine, conn): } -def collect_replica_statistics(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] +def collect_replica_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about replicas (table/server pairs) statistics. @@ -166,7 +166,7 @@ def collect_replica_statistics(engine, conn): """ logger.debug('collect_replica_statistics') - for table, server, replica, stats in engine.query_replicas_with_stats(conn): + for table, server, replica, stats in operations.query_replicas_with_stats(conn): logger.debug('replica_statistics table=%r server=%r replica=%r stats=%r', table, server, replica, stats) database = table['db'] diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py index 6bfa18dd68bbd..9365b254bb66e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py @@ -10,14 +10,14 @@ from datadog_checks.base import AgentCheck -from ..queries import QueryEngine +from .. import operations from ..types import Metric logger = logging.getLogger(__name__) -def collect_table_status(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] +def collect_table_status(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about table statuses. @@ -25,7 +25,7 @@ def collect_table_status(engine, conn): """ logger.debug('collect_table_status') - for table_status in engine.query_table_status(conn): + for table_status in operations.query_table_status(conn): logger.debug('table_status %r', table_status) table = table_status['name'] @@ -86,8 +86,8 @@ def collect_table_status(engine, conn): } -def collect_server_status(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] +def collect_server_status(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about server statuses. @@ -95,7 +95,7 @@ def collect_server_status(engine, conn): """ logger.debug('collect_server_status') - for server_status in engine.query_server_status(conn): + for server_status in operations.query_server_status(conn): logger.debug('server_status %r', server_status) server = server_status['name'] diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py index f59b5d9b6e93c..fc88b4e3c2f99 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py @@ -6,14 +6,14 @@ import rethinkdb -from ..queries import QueryEngine +from .. import operations from ..types import BackfillInfo, IndexConstructionInfo, Metric logger = logging.getLogger(__name__) -def collect_system_jobs(engine, conn): - # type: (QueryEngine, rethinkdb.net.Connection) -> Iterator[Metric] +def collect_system_jobs(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect metrics about system jobs. @@ -21,7 +21,7 @@ def collect_system_jobs(engine, conn): """ logger.debug('collect_system_jobs') - for job in engine.query_system_jobs(conn): + for job in operations.query_system_jobs(conn): logger.debug('job %r', job) duration = job['duration_sec'] diff --git a/rethinkdb/datadog_checks/rethinkdb/operations.py b/rethinkdb/datadog_checks/rethinkdb/operations.py new file mode 100644 index 0000000000000..9ee7174b982b1 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/operations.py @@ -0,0 +1,237 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +""" +Definition of high-level RethinkDB operations used by the RethinkDB check. + +Python ReQL reference documentation: https://rethinkdb.com/api/python/ +""" + +from typing import Any, Iterator, Mapping, Tuple + +import rethinkdb + +from .types import ( + ClusterStats, + ConfigTotals, + ConnectionServer, + CurrentIssuesTotals, + Job, + JoinRow, + ReplicaStats, + Server, + ServerStats, + ServerStatus, + ShardReplica, + Table, + TableStats, + TableStatus, +) + +# The usual entrypoint for building ReQL queries. +r = rethinkdb.r + +# All system tables are located in this database. +# See: https://rethinkdb.com/docs/system-tables/ +system = r.db('rethinkdb') + + +def query_connected_server_version_string(conn): + # type: (rethinkdb.net.Connection) -> str + """ + Return the raw string of the RethinkDB version used by the server at the other end of the connection. + """ + # See: https://rethinkdb.com/docs/system-tables/#server_status + server = conn.server() # type: ConnectionServer + server_status = system.table('server_status').get(server['id']).run(conn) # type: ServerStatus + + return server_status['process']['version'] + + +def query_config_totals(conn): + # type: (rethinkdb.net.Connection) -> ConfigTotals + """ + Return a summary of the cluster configuration. + """ + table_config = system.table('table_config') + server_config = system.table('server_config') + db_config = system.table('db_config') + + # Need to `.run()` these separately because ReQL does not support putting grouped data in raw expressions yet. + # See: https://github.com/rethinkdb/rethinkdb/issues/2067 + + tables_per_database = table_config.group('db').count().run(conn) # type: Mapping[str, int] + + secondary_indexes_per_table = ( + # NOTE: this is an example of a map-reduce query. + # See: https://rethinkdb.com/docs/map-reduce/#a-more-complex-example + table_config.pluck('name', 'indexes') + .concat_map(lambda row: row['indexes'].map(lambda _: {'table': row['name']})) + .group('table') + .count() + .run(conn) + ) # type: Mapping[str, int] + + totals = { + 'servers': server_config.count(), + 'databases': db_config.count(), + 'tables_per_database': tables_per_database, + 'secondary_indexes_per_table': secondary_indexes_per_table, + } # type: ConfigTotals # Enforce keys to match. + + return r.expr(totals).run(conn) + + +def query_cluster_stats(conn): + # type: (rethinkdb.net.Connection) -> ClusterStats + """ + Retrieve statistics about the cluster. + """ + return system.table('stats').get(['cluster']).run(conn) + + +def query_servers_with_stats(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Server, ServerStats]] + """ + Retrieve each server in the cluster along with its statistics. + """ + # For servers: stats['id'] = ['server', ''] + is_server_stats_row = r.row['id'].nth(0) == 'server' + server_id = r.row['id'].nth(1) + + stats = system.table('stats') + server_config = system.table('server_config') + + rows = stats.filter(is_server_stats_row).eq_join(server_id, server_config).run(conn) # type: Iterator[JoinRow] + + for row in rows: + server_stats = row['left'] # type: ServerStats + server = row['right'] # type: Server + yield server, server_stats + + +def query_tables_with_stats(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, TableStats]] + """ + Retrieve each table in the cluster along with its statistics. + """ + # For tables: stats['id'] = ['table', ''] + is_table_stats_row = r.row['id'].nth(0) == 'table' + table_id = r.row['id'].nth(1) + + stats = system.table('stats') + table_config = system.table('table_config') + + rows = stats.filter(is_table_stats_row).eq_join(table_id, table_config).run(conn) # type: Iterator[JoinRow] + + for row in rows: + table_stats = row['left'] # type: TableStats + table = row['right'] # type: Table + yield table, table_stats + + +def query_replicas_with_stats(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, Server, ShardReplica, ReplicaStats]] + """ + Retrieve each replica (table/server pair) in the cluster along with its statistics. + """ + # NOTE: To reduce bandwidth usage, we make heavy use of the `.pluck()` operation, i.e. ask RethinkDB + # for a specific set of fields, instead of sending entire objects, which can be expensive when joining + # data as we do here. + # See: https://rethinkdb.com/api/python/pluck/ + + stats = system.table('stats') + server_config = system.table('server_config') + table_config = system.table('table_config') + table_status = system.table( + 'table_status', + # Required so that we can join on 'server_config' below without having to look up UUIDs from names. + # See: https://rethinkdb.com/api/python/table/#description + identifier_format='uuid', + ) + + query = ( + # Start from table statuses, as they contain the list of replicas for each shard of the table. + # See: https://rethinkdb.com/docs/system-tables/#table_status + table_status.pluck('id', {'shards': ['replicas']}) + .merge({'table': r.row['id']}) + .without('id') + # Flatten each table status entry into one entry per shard and replica. + .concat_map(lambda row: row['shards'].map(lambda shard: row.merge(shard.pluck('replicas')))) + .without('shards') + .concat_map( + lambda row: (row['replicas'].map(lambda replica: row.merge({'replica': replica.pluck('server', 'state')}))) + ) + .without('replicas') + # Grab table information for each replica. + # See: https://rethinkdb.com/docs/system-tables#table_config + .merge({'table': table_config.get(r.row['table']).pluck('id', 'db', 'name')}) + # Grab server information for each replica. + # See: https://rethinkdb.com/docs/system-tables#server_config + .merge({'server': server_config.get(r.row['replica']['server'])}) + .filter(r.row['server']) # Skip replicas stored on disconnected servers. + .merge({'server': r.row['server'].pluck('id', 'name', 'tags')}) + # Grab statistics for each replica. + # See: https://rethinkdb.com/docs/system-stats/#replica-tableserver-pair + .merge( + { + 'stats': stats.get(['table_server', r.row['table']['id'], r.row['server']['id']]).pluck( + 'query_engine', 'storage_engine' + ), + } + ) + ) + + rows = query.run(conn) # type: Iterator[Mapping[str, Any]] + + for row in rows: + table = row['table'] # type: Table + server = row['server'] # type: Server + replica = row['replica'] # type: ShardReplica + replica_stats = row['stats'] # type: ReplicaStats + yield table, server, replica, replica_stats + + +def query_table_status(conn): + # type: (rethinkdb.net.Connection) -> Iterator[TableStatus] + """ + Retrieve the status of each table in the cluster. + """ + return system.table('table_status').run(conn) + + +def query_server_status(conn): + # type: (rethinkdb.net.Connection) -> Iterator[ServerStatus] + """ + Retrieve the status of each server in the cluster. + """ + return system.table('server_status').run(conn) + + +def query_system_jobs(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Job] + """ + Retrieve all the currently running system jobs. + """ + return system.table('jobs').run(conn) + + +def query_current_issues_totals(conn): + # type: (rethinkdb.net.Connection) -> CurrentIssuesTotals + """ + Retrieve all the problems detected with the cluster. + """ + current_issues = system.table('current_issues').pluck('type', 'critical') + + # NOTE: Need to `.run()` these separately because ReQL does not support putting grouped data in raw + # expressions yet. See: https://github.com/rethinkdb/rethinkdb/issues/2067 + + issues_by_type = current_issues.group('type').count().run(conn) # type: Mapping[str, int] + critical_issues_by_type = ( + current_issues.filter(r.row['critical']).group('type').count().run(conn) + ) # type: Mapping[str, int] + + return { + 'issues_by_type': issues_by_type, + 'critical_issues_by_type': critical_issues_by_type, + } diff --git a/rethinkdb/datadog_checks/rethinkdb/queries.py b/rethinkdb/datadog_checks/rethinkdb/queries.py deleted file mode 100644 index 47918d40aa947..0000000000000 --- a/rethinkdb/datadog_checks/rethinkdb/queries.py +++ /dev/null @@ -1,248 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Iterator, Mapping, Tuple - -import rethinkdb - -from .types import ( - ClusterStats, - ConfigTotals, - ConnectionServer, - CurrentIssuesTotals, - Job, - JoinRow, - ReplicaStats, - Server, - ServerStats, - ServerStatus, - ShardReplica, - Table, - TableStats, - TableStatus, -) - - -class QueryEngine(object): - """ - Definition of RethinkDB queries used by the RethinkDB check. - - Python ReQL reference documentation: https://rethinkdb.com/api/python/ - """ - - def __init__(self, r=None): - # type: (rethinkdb.RethinkDB) -> None - if r is None: - r = rethinkdb.r - - self._r = r - # NOTE: all system tables are located in this database. - # See: https://rethinkdb.com/docs/system-tables/ - self._system = r.db('rethinkdb') - - def query_connected_server_version_string(self, conn): - # type: (rethinkdb.net.Connection) -> str - """ - Return the raw string of the RethinkDB version used by the server at the other end of the connection. - """ - system = self._system - - # See: https://rethinkdb.com/docs/system-tables/#server_status - server = conn.server() # type: ConnectionServer - server_status = system.table('server_status').get(server['id']).run(conn) # type: ServerStatus - - return server_status['process']['version'] - - def query_config_totals(self, conn): - # type: (rethinkdb.net.Connection) -> ConfigTotals - r = self._r - system = self._system - - table_config = system.table('table_config') - server_config = system.table('server_config') - db_config = system.table('db_config') - - # Need to `.run()` these separately because ReQL does not support putting grouped data in raw expressions yet. - # See: https://github.com/rethinkdb/rethinkdb/issues/2067 - - tables_per_database = table_config.group('db').count().run(conn) # type: Mapping[str, int] - - secondary_indexes_per_table = ( - # NOTE: this is an example of a map-reduce query. - # See: https://rethinkdb.com/docs/map-reduce/#a-more-complex-example - table_config.pluck('name', 'indexes') - .concat_map(lambda row: row['indexes'].map(lambda _: {'table': row['name']})) - .group('table') - .count() - .run(conn) - ) # type: Mapping[str, int] - - totals = { - 'servers': server_config.count(), - 'databases': db_config.count(), - 'tables_per_database': tables_per_database, - 'secondary_indexes_per_table': secondary_indexes_per_table, - } # type: ConfigTotals # Enforce keys to match. - - return r.expr(totals).run(conn) - - def query_cluster_stats(self, conn): - # type: (rethinkdb.net.Connection) -> ClusterStats - """ - Retrieve statistics about the cluster. - """ - return self._system.table('stats').get(['cluster']).run(conn) - - def query_servers_with_stats(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Server, ServerStats]] - """ - Retrieve each server in the cluster along with its statistics. - """ - r = self._r - system = self._system - - # For servers: stats['id'] = ['server', ''] - is_server_stats_row = r.row['id'].nth(0) == 'server' - server_id = r.row['id'].nth(1) - - stats = system.table('stats') - server_config = system.table('server_config') - - rows = stats.filter(is_server_stats_row).eq_join(server_id, server_config).run(conn) # type: Iterator[JoinRow] - - for row in rows: - server_stats = row['left'] # type: ServerStats - server = row['right'] # type: Server - yield server, server_stats - - def query_tables_with_stats(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, TableStats]] - """ - Retrieve each table in the cluster along with its statistics. - """ - r = self._r - system = self._system - - # For tables: stats['id'] = ['table', ''] - is_table_stats_row = r.row['id'].nth(0) == 'table' - table_id = r.row['id'].nth(1) - - stats = system.table('stats') - table_config = system.table('table_config') - - rows = stats.filter(is_table_stats_row).eq_join(table_id, table_config).run(conn) # type: Iterator[JoinRow] - - for row in rows: - table_stats = row['left'] # type: TableStats - table = row['right'] # type: Table - yield table, table_stats - - def query_replicas_with_stats(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, Server, ShardReplica, ReplicaStats]] - """ - Retrieve each replica (table/server pair) in the cluster along with its statistics. - """ - r = self._r - system = self._system - - # NOTE: To reduce bandwidth usage, we make heavy use of the `.pluck()` operation, i.e. ask RethinkDB - # for a specific set of fields, instead of sending entire objects, which can be expensive when joining - # data as we do here. - # See: https://rethinkdb.com/api/python/pluck/ - - stats = system.table('stats') - server_config = system.table('server_config') - table_config = system.table('table_config') - table_status = system.table( - 'table_status', - # Required so that we can join on 'server_config' below without having to look up UUIDs from names. - # See: https://rethinkdb.com/api/python/table/#description - identifier_format='uuid', - ) - - query = ( - # Start from table statuses, as they contain the list of replicas for each shard of the table. - # See: https://rethinkdb.com/docs/system-tables/#table_status - table_status.pluck('id', {'shards': ['replicas']}) - .merge({'table': r.row['id']}) - .without('id') - # Flatten each table status entry into one entry per shard and replica. - .concat_map(lambda row: row['shards'].map(lambda shard: row.merge(shard.pluck('replicas')))) - .without('shards') - .concat_map( - lambda row: ( - row['replicas'].map(lambda replica: row.merge({'replica': replica.pluck('server', 'state')})) - ) - ) - .without('replicas') - # Grab table information for each replica. - # See: https://rethinkdb.com/docs/system-tables#table_config - .merge({'table': table_config.get(r.row['table']).pluck('id', 'db', 'name')}) - # Grab server information for each replica. - # See: https://rethinkdb.com/docs/system-tables#server_config - .merge({'server': server_config.get(r.row['replica']['server'])}) - .filter(r.row['server']) # Skip replicas stored on disconnected servers. - .merge({'server': r.row['server'].pluck('id', 'name', 'tags')}) - # Grab statistics for each replica. - # See: https://rethinkdb.com/docs/system-stats/#replica-tableserver-pair - .merge( - { - 'stats': stats.get(['table_server', r.row['table']['id'], r.row['server']['id']]).pluck( - 'query_engine', 'storage_engine' - ), - } - ) - ) - - rows = query.run(conn) # type: Iterator[Mapping[str, Any]] - - for row in rows: - table = row['table'] # type: Table - server = row['server'] # type: Server - replica = row['replica'] # type: ShardReplica - replica_stats = row['stats'] # type: ReplicaStats - yield table, server, replica, replica_stats - - def query_table_status(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[TableStatus] - """ - Retrieve the status of each table in the cluster. - """ - return self._system.table('table_status').run(conn) - - def query_server_status(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[ServerStatus] - """ - Retrieve the status of each server in the cluster. - """ - return self._system.table('server_status').run(conn) - - def query_system_jobs(self, conn): - # type: (rethinkdb.net.Connection) -> Iterator[Job] - """ - Retrieve all the currently running system jobs. - """ - return self._system.table('jobs').run(conn) - - def query_current_issues_totals(self, conn): - # type: (rethinkdb.net.Connection) -> CurrentIssuesTotals - """ - Retrieve all the problems detected with the cluster. - """ - r = self._r - system = self._system - - current_issues = system.table('current_issues').pluck('type', 'critical') - - # NOTE: Need to `.run()` these separately because ReQL does not support putting grouped data in raw - # expressions yet. See: https://github.com/rethinkdb/rethinkdb/issues/2067 - - issues_by_type = current_issues.group('type').count().run(conn) # type: Mapping[str, int] - critical_issues_by_type = ( - current_issues.filter(r.row['critical']).group('type').count().run(conn) - ) # type: Mapping[str, int] - - return { - 'issues_by_type': issues_by_type, - 'critical_issues_by_type': critical_issues_by_type, - } diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 7c416dfe362f9..9a93835cc9393 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -10,7 +10,6 @@ from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub from datadog_checks.rethinkdb import RethinkDBCheck -from datadog_checks.rethinkdb.backends import Backend from datadog_checks.rethinkdb.types import Instance, Metric from .assertions import assert_metrics @@ -163,14 +162,13 @@ def test_connected_but_check_failed_unexpectedly(aggregator, instance): class Failure(Exception): pass - class MockBackend(Backend): + class MockRethinkDBCheck(RethinkDBCheck): def collect_metrics(self, conn): # type: (Any) -> Iterator[Metric] yield {'type': 'gauge', 'name': 'rethinkdb.some.metric', 'value': 42, 'tags': []} raise Failure - check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.backend = MockBackend() + check = MockRethinkDBCheck('rethinkdb', {}, [instance]) with pytest.raises(Failure): check.check(instance) @@ -212,16 +210,15 @@ def test_metadata_version_malformed(instance, aggregator, datadog_agent, malform Verify that check still runs to completion if version provided by RethinkDB is malformed. """ - class MockBackend(Backend): + class MockRethinkDBCheck(RethinkDBCheck): def collect_connected_server_version(self, conn): # type: (Any) -> str return malformed_version_string check_id = 'test' - check = RethinkDBCheck('rethinkdb', {}, [instance]) + check = MockRethinkDBCheck('rethinkdb', {}, [instance]) check.check_id = check_id - check.backend = MockBackend() check.check(instance) aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK) @@ -236,16 +233,15 @@ def test_metadata_version_failure(instance, aggregator, datadog_agent): Verify that check still runs to completion if it fails to retrieve the RethinkDB version. """ - class MockBackend(Backend): + class MockRethinkDBCheck(RethinkDBCheck): def collect_connected_server_version(self, conn): # type: (Any) -> str raise ValueError('Oops!') check_id = 'test' - check = RethinkDBCheck('rethinkdb', {}, [instance]) + check = MockRethinkDBCheck('rethinkdb', {}, [instance]) check.check_id = check_id - check.backend = MockBackend() check.check(instance) aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK) diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index dee5e78452e1b..48b71a5862f9f 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -8,7 +8,6 @@ import pytest from datadog_checks.rethinkdb.metrics.system_jobs import collect_system_jobs -from datadog_checks.rethinkdb.queries import QueryEngine from datadog_checks.rethinkdb.types import BackfillJob, IndexConstructionJob pytestmark = pytest.mark.unit @@ -57,11 +56,10 @@ def test_jobs_metrics(): mock_rows = [mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] - engine = QueryEngine() conn = mock.Mock() with mock.patch('rethinkdb.ast.RqlQuery.run') as run: run.return_value = mock_rows - metrics = list(collect_system_jobs(engine, conn)) + metrics = list(collect_system_jobs(conn)) assert metrics == [ { From a83cd9731c17fcba2f768d06787cc3c007dd5170 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 16:19:10 +0100 Subject: [PATCH 120/147] Align metric names on JSON structure --- rethinkdb/assets/service_checks.json | 8 +- rethinkdb/datadog_checks/rethinkdb/check.py | 4 +- .../metrics/{config.py => config_summary.py} | 28 +++--- .../rethinkdb/metrics/current_issues.py | 12 +-- .../rethinkdb/metrics/statistics.py | 52 +++++------ .../rethinkdb/metrics/statuses.py | 24 ++--- .../rethinkdb/metrics/system_jobs.py | 64 ++++---------- .../datadog_checks/rethinkdb/operations.py | 18 ++-- rethinkdb/datadog_checks/rethinkdb/types.py | 8 +- rethinkdb/tests/assertions.py | 8 +- rethinkdb/tests/common.py | 87 +++++++++---------- rethinkdb/tests/test_rethinkdb.py | 15 +--- rethinkdb/tests/unit/test_metrics.py | 28 ++---- 13 files changed, 147 insertions(+), 209 deletions(-) rename rethinkdb/datadog_checks/rethinkdb/metrics/{config.py => config_summary.py} (55%) diff --git a/rethinkdb/assets/service_checks.json b/rethinkdb/assets/service_checks.json index 7b3a02ad8f00e..0c23cb2e530ba 100644 --- a/rethinkdb/assets/service_checks.json +++ b/rethinkdb/assets/service_checks.json @@ -21,7 +21,7 @@ "database", "table" ], - "check": "rethinkdb.table_status.ready_for_outdated_reads", + "check": "rethinkdb.table_status.status.ready_for_outdated_reads", "statuses": [ "ok", "warning" @@ -36,7 +36,7 @@ "database", "table" ], - "check": "rethinkdb.table_status.ready_for_reads", + "check": "rethinkdb.table_status.status.ready_for_reads", "statuses": [ "ok", "warning" @@ -51,7 +51,7 @@ "database", "table" ], - "check": "rethinkdb.table_status.ready_for_writes", + "check": "rethinkdb.table_status.status.ready_for_writes", "statuses": [ "ok", "warning" @@ -66,7 +66,7 @@ "database", "table" ], - "check": "rethinkdb.table_status.all_replicas_ready", + "check": "rethinkdb.table_status.status.all_replicas_ready", "statuses": [ "ok", "warning" diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 7d9f940bb19bd..848bc6dd6efab 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -10,7 +10,7 @@ from . import operations from .config import Config -from .metrics.config import collect_config_totals +from .metrics.config_summary import collect_config_summary from .metrics.current_issues import collect_current_issues from .metrics.statistics import ( collect_cluster_statistics, @@ -36,7 +36,7 @@ def __init__(self, *args, **kwargs): super(RethinkDBCheck, self).__init__(*args, **kwargs) self.config = Config(cast(Instance, self.instance)) self.collect_funcs = ( - collect_config_totals, + collect_config_summary, collect_cluster_statistics, collect_server_statistics, collect_table_statistics, diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/config.py b/rethinkdb/datadog_checks/rethinkdb/metrics/config_summary.py similarity index 55% rename from rethinkdb/datadog_checks/rethinkdb/metrics/config.py rename to rethinkdb/datadog_checks/rethinkdb/metrics/config_summary.py index bde630f5aadd5..4ad91771c2e4f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/config_summary.py @@ -12,48 +12,48 @@ logger = logging.getLogger(__name__) -def collect_config_totals(conn): +def collect_config_summary(conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] """ Collect aggregated metrics about cluster configuration. See: https://rethinkdb.com/docs/system-tables/#configuration-tables """ - logger.debug('collect_config_totals') + logger.debug('collect_config_summary') - totals = operations.query_config_totals(conn) - logger.debug('config_totals totals=%r', totals) + summary = operations.query_config_summary(conn) + logger.debug('config_summary %r', summary) yield { 'type': 'gauge', - 'name': 'rethinkdb.server.total', - 'value': totals['servers'], + 'name': 'rethinkdb.config.servers', + 'value': summary['servers'], 'tags': [], } yield { 'type': 'gauge', - 'name': 'rethinkdb.database.total', - 'value': totals['databases'], + 'name': 'rethinkdb.config.databases', + 'value': summary['databases'], 'tags': [], } - for database, total in totals['tables_per_database'].items(): + for database, num_tables in summary['tables_per_database'].items(): tags = ['database:{}'.format(database)] yield { 'type': 'gauge', - 'name': 'rethinkdb.database.table.total', - 'value': total, + 'name': 'rethinkdb.config.tables_per_database', + 'value': num_tables, 'tags': tags, } - for table, total in totals['secondary_indexes_per_table'].items(): + for table, num_indexes in summary['secondary_indexes_per_table'].items(): tags = ['table:{}'.format(table)] yield { 'type': 'gauge', - 'name': 'rethinkdb.table.secondary_index.total', - 'value': total, + 'name': 'rethinkdb.config.secondary_indexes_per_table', + 'value': num_indexes, 'tags': tags, } diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py index f08916ca5835b..a8f00c9f2101e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py @@ -21,21 +21,21 @@ def collect_current_issues(conn): """ logger.debug('collect_current_issues') - totals = operations.query_current_issues_totals(conn) - logger.debug('current_issues totals=%r', totals) + summary = operations.query_current_issues_summary(conn) + logger.debug('current_issues %r', summary) - for issue_type, total in totals['issues_by_type'].items(): + for issue_type, total in summary['issues'].items(): yield { 'type': 'gauge', - 'name': 'rethinkdb.current_issues.total', + 'name': 'rethinkdb.current_issues.issues', 'value': total, 'tags': ['issue_type:{}'.format(issue_type)], } - for issue_type, total in totals['critical_issues_by_type'].items(): + for issue_type, total in summary['critical_issues'].items(): yield { 'type': 'gauge', - 'name': 'rethinkdb.current_issues.critical.total', + 'name': 'rethinkdb.current_issues.critical_issues', 'value': total, 'tags': ['issue_type:{}'.format(issue_type)], } diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py index 56e61eab9aeba..3a981822ba6c2 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py @@ -28,21 +28,21 @@ def collect_cluster_statistics(conn): yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.cluster.queries_per_sec', + 'name': 'rethinkdb.stats.cluster.query_engine.queries_per_sec', 'value': query_engine['queries_per_sec'], 'tags': [], } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.cluster.read_docs_per_sec', + 'name': 'rethinkdb.stats.cluster.query_engine.read_docs_per_sec', 'value': query_engine['read_docs_per_sec'], 'tags': [], } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.cluster.written_docs_per_sec', + 'name': 'rethinkdb.stats.cluster.query_engine.written_docs_per_sec', 'value': query_engine['written_docs_per_sec'], 'tags': [], } @@ -69,56 +69,56 @@ def collect_server_statistics(conn): yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.server.client_connections', + 'name': 'rethinkdb.stats.server.query_engine.client_connections', 'value': query_engine['client_connections'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.server.clients_active', + 'name': 'rethinkdb.stats.server.query_engine.clients_active', 'value': query_engine['clients_active'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.server.queries_per_sec', + 'name': 'rethinkdb.stats.server.query_engine.queries_per_sec', 'value': query_engine['queries_per_sec'], 'tags': tags, } yield { 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.server.queries_total', + 'name': 'rethinkdb.stats.server.query_engine.queries_total', 'value': query_engine['queries_total'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.server.read_docs_per_sec', + 'name': 'rethinkdb.stats.server.query_engine.read_docs_per_sec', 'value': query_engine['read_docs_per_sec'], 'tags': tags, } yield { 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.server.read_docs_total', + 'name': 'rethinkdb.stats.server.query_engine.read_docs_total', 'value': query_engine['read_docs_total'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.server.written_docs_per_sec', + 'name': 'rethinkdb.stats.server.query_engine.written_docs_per_sec', 'value': query_engine['written_docs_per_sec'], 'tags': tags, } yield { 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.server.written_docs_total', + 'name': 'rethinkdb.stats.server.query_engine.written_docs_total', 'value': query_engine['written_docs_total'], 'tags': tags, } @@ -144,14 +144,14 @@ def collect_table_statistics(conn): yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.table.read_docs_per_sec', + 'name': 'rethinkdb.stats.table.query_engine.read_docs_per_sec', 'value': query_engine['read_docs_per_sec'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.table.written_docs_per_sec', + 'name': 'rethinkdb.stats.table.query_engine.written_docs_per_sec', 'value': query_engine['written_docs_per_sec'], 'tags': tags, } @@ -187,91 +187,91 @@ def collect_replica_statistics(conn): yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.read_docs_per_sec', + 'name': 'rethinkdb.stats.table_server.query_engine.read_docs_per_sec', 'value': query_engine['read_docs_per_sec'], 'tags': tags, } yield { 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.read_docs_total', + 'name': 'rethinkdb.stats.table_server.query_engine.read_docs_total', 'value': query_engine['read_docs_total'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.written_docs_per_sec', + 'name': 'rethinkdb.stats.table_server.query_engine.written_docs_per_sec', 'value': query_engine['written_docs_per_sec'], 'tags': tags, } yield { 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.written_docs_total', + 'name': 'rethinkdb.stats.table_server.query_engine.written_docs_total', 'value': query_engine['written_docs_total'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.cache.in_use_bytes', + 'name': 'rethinkdb.stats.table_server.storage_engine.cache.in_use_bytes', 'value': storage_engine['cache']['in_use_bytes'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.read_bytes_per_sec', + 'name': 'rethinkdb.stats.table_server.storage_engine.disk.read_bytes_per_sec', 'value': storage_engine['disk']['read_bytes_per_sec'], 'tags': tags, } yield { 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.disk.read_bytes_total', + 'name': 'rethinkdb.stats.table_server.storage_engine.disk.read_bytes_total', 'value': storage_engine['disk']['read_bytes_total'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.written_bytes_per_sec', + 'name': 'rethinkdb.stats.table_server.storage_engine.disk.written_bytes_per_sec', 'value': storage_engine['disk']['written_bytes_per_sec'], 'tags': tags, } yield { 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.disk.written_bytes_total', + 'name': 'rethinkdb.stats.table_server.storage_engine.disk.written_bytes_total', 'value': storage_engine['disk']['written_bytes_total'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.metadata_bytes', + 'name': 'rethinkdb.stats.table_server.storage_engine.disk.space_usage.metadata_bytes', 'value': storage_engine['disk']['space_usage']['metadata_bytes'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.data_bytes', + 'name': 'rethinkdb.stats.table_server.storage_engine.disk.space_usage.data_bytes', 'value': storage_engine['disk']['space_usage']['data_bytes'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.garbage_bytes', + 'name': 'rethinkdb.stats.table_server.storage_engine.disk.space_usage.garbage_bytes', 'value': storage_engine['disk']['space_usage']['garbage_bytes'], 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.disk.preallocated_bytes', + 'name': 'rethinkdb.stats.table_server.storage_engine.disk.space_usage.preallocated_bytes', 'value': storage_engine['disk']['space_usage']['preallocated_bytes'], 'tags': tags, } diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py index 9365b254bb66e..5885cccb81e6e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py @@ -35,35 +35,35 @@ def collect_table_status(conn): yield { 'type': 'service_check', - 'name': 'rethinkdb.table_status.ready_for_outdated_reads', + 'name': 'rethinkdb.table_status.status.ready_for_outdated_reads', 'value': AgentCheck.OK if table_status['status']['ready_for_outdated_reads'] else AgentCheck.WARNING, 'tags': tags, } yield { 'type': 'service_check', - 'name': 'rethinkdb.table_status.ready_for_reads', + 'name': 'rethinkdb.table_status.status.ready_for_reads', 'value': AgentCheck.OK if table_status['status']['ready_for_reads'] else AgentCheck.WARNING, 'tags': tags, } yield { 'type': 'service_check', - 'name': 'rethinkdb.table_status.ready_for_writes', + 'name': 'rethinkdb.table_status.status.ready_for_writes', 'value': AgentCheck.OK if table_status['status']['ready_for_writes'] else AgentCheck.WARNING, 'tags': tags, } yield { 'type': 'service_check', - 'name': 'rethinkdb.table_status.all_replicas_ready', + 'name': 'rethinkdb.table_status.status.all_replicas_ready', 'value': AgentCheck.OK if table_status['status']['all_replicas_ready'] else AgentCheck.WARNING, 'tags': tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.table_status.shards.total', + 'name': 'rethinkdb.table_status.shards', 'value': len(table_status['shards']), 'tags': tags, } @@ -73,14 +73,14 @@ def collect_table_status(conn): yield { 'type': 'gauge', - 'name': 'rethinkdb.table_status.shards.replicas.total', + 'name': 'rethinkdb.table_status.shards.replicas', 'value': len(shard['replicas']), 'tags': shard_tags, } yield { 'type': 'gauge', - 'name': 'rethinkdb.table_status.shards.replicas.primary.total', + 'name': 'rethinkdb.table_status.shards.primary_replicas', 'value': len(shard['primary_replicas']), 'tags': shard_tags, } @@ -113,18 +113,10 @@ def collect_server_status(conn): yield { 'type': 'gauge', - 'name': 'rethinkdb.server_status.network.connected_to.total', + 'name': 'rethinkdb.server_status.network.connected_to', 'value': len([other for other, connected in network['connected_to'].items() if connected]), 'tags': tags, } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.server_status.network.connected_to.pending.total', - 'value': len([other for other, connected in network['connected_to'].items() if not connected]), - 'tags': tags, - } - yield { 'type': 'gauge', 'name': 'rethinkdb.server_status.process.time_started', diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py index fc88b4e3c2f99..70bb01eb1ebfe 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py +++ b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py @@ -2,12 +2,12 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) import logging -from typing import Iterator, cast +from typing import Iterator import rethinkdb from .. import operations -from ..types import BackfillInfo, IndexConstructionInfo, Metric +from ..types import Metric logger = logging.getLogger(__name__) @@ -30,60 +30,30 @@ def collect_system_jobs(conn): tags = ['server:{}'.format(server) for server in servers] if job['type'] == 'index_construction': - # NOTE: Using `cast()` is required until tagged unions are released in mypy stable. Until then, avoid using - # 'info' as a variable name in all cases (workaround for https://github.com/python/mypy/issues/6232). - # See: https://mypy.readthedocs.io/en/latest/literal_types.html#tagged-unions - index_construction_info = cast(IndexConstructionInfo, job['info']) - database = index_construction_info['db'] - table = index_construction_info['table'] - index = index_construction_info['index'] - progress = index_construction_info['progress'] - - index_construction_tags = tags + [ - 'database:{}'.format(database), - 'table:{}'.format(table), - 'index:{}'.format(index), + job_tags = tags + [ + 'job_type:{}'.format(job['type']), + 'database:{}'.format(job['info']['db']), + 'table:{}'.format(job['info']['table']), + 'index:{}'.format(job['info']['index']), ] - yield { 'type': 'gauge', - 'name': 'rethinkdb.jobs.index_construction.duration', + 'name': 'rethinkdb.jobs.duration', 'value': duration, - 'tags': index_construction_tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.index_construction.progress', - 'value': progress, - 'tags': index_construction_tags, + 'tags': job_tags, } elif job['type'] == 'backfill': - backfill_info = cast(BackfillInfo, job['info']) - database = backfill_info['db'] - destination_server = backfill_info['destination_server'] - source_server = backfill_info['source_server'] - table = backfill_info['table'] - progress = backfill_info['progress'] - - backfill_tags = tags + [ - 'database:{}'.format(database), - 'destination_server:{}'.format(destination_server), - 'source_server:{}'.format(source_server), - 'table:{}'.format(table), + job_tags = tags + [ + 'job_type:{}'.format(job['type']), + 'database:{}'.format(job['info']['db']), + 'destination_server:{}'.format(job['info']['destination_server']), + 'source_server:{}'.format(job['info']['source_server']), + 'table:{}'.format(job['info']['table']), ] - yield { 'type': 'gauge', - 'name': 'rethinkdb.jobs.backfill.duration', + 'name': 'rethinkdb.jobs.duration', 'value': duration, - 'tags': backfill_tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.backfill.progress', - 'value': progress, - 'tags': backfill_tags, + 'tags': job_tags, } diff --git a/rethinkdb/datadog_checks/rethinkdb/operations.py b/rethinkdb/datadog_checks/rethinkdb/operations.py index 9ee7174b982b1..d5b0a0e3be500 100644 --- a/rethinkdb/datadog_checks/rethinkdb/operations.py +++ b/rethinkdb/datadog_checks/rethinkdb/operations.py @@ -13,9 +13,9 @@ from .types import ( ClusterStats, - ConfigTotals, + ConfigSummary, ConnectionServer, - CurrentIssuesTotals, + CurrentIssuesSummary, Job, JoinRow, ReplicaStats, @@ -48,8 +48,8 @@ def query_connected_server_version_string(conn): return server_status['process']['version'] -def query_config_totals(conn): - # type: (rethinkdb.net.Connection) -> ConfigTotals +def query_config_summary(conn): + # type: (rethinkdb.net.Connection) -> ConfigSummary """ Return a summary of the cluster configuration. """ @@ -77,7 +77,7 @@ def query_config_totals(conn): 'databases': db_config.count(), 'tables_per_database': tables_per_database, 'secondary_indexes_per_table': secondary_indexes_per_table, - } # type: ConfigTotals # Enforce keys to match. + } # type: ConfigSummary # Enforce keys to match. return r.expr(totals).run(conn) @@ -216,8 +216,8 @@ def query_system_jobs(conn): return system.table('jobs').run(conn) -def query_current_issues_totals(conn): - # type: (rethinkdb.net.Connection) -> CurrentIssuesTotals +def query_current_issues_summary(conn): + # type: (rethinkdb.net.Connection) -> CurrentIssuesSummary """ Retrieve all the problems detected with the cluster. """ @@ -232,6 +232,6 @@ def query_current_issues_totals(conn): ) # type: Mapping[str, int] return { - 'issues_by_type': issues_by_type, - 'critical_issues_by_type': critical_issues_by_type, + 'issues': issues_by_type, + 'critical_issues': critical_issues_by_type, } diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index 3d64de570eb24..9dae6d201fded 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -28,8 +28,8 @@ Table = TypedDict('Table', {'id': str, 'name': str, 'db': str}) -ConfigTotals = TypedDict( - 'ConfigTotals', +ConfigSummary = TypedDict( + 'ConfigSummary', { 'servers': int, 'databases': int, @@ -177,8 +177,8 @@ # System current issues. -CurrentIssuesTotals = TypedDict( - 'CurrentIssuesTotals', {'issues_by_type': Mapping[str, int], 'critical_issues_by_type': Mapping[str, int]}, +CurrentIssuesSummary = TypedDict( + 'CurrentIssuesSummary', {'issues': Mapping[str, int], 'critical_issues': Mapping[str, int]}, ) diff --git a/rethinkdb/tests/assertions.py b/rethinkdb/tests/assertions.py index 5be034ea15581..35b798082d60d 100644 --- a/rethinkdb/tests/assertions.py +++ b/rethinkdb/tests/assertions.py @@ -7,7 +7,7 @@ from .common import ( CLUSTER_STATISTICS_METRICS, - CONFIG_TOTALS_METRICS, + CONFIG_METRICS, CURRENT_ISSUE_TYPES_SUBMITTED_IF_DISCONNECTED_SERVERS, CURRENT_ISSUES_METRICS, DATABASE, @@ -33,7 +33,7 @@ def assert_metrics(aggregator, disconnected_servers=None): if disconnected_servers is None: disconnected_servers = set() - _assert_config_totals_metrics(aggregator, disconnected_servers=disconnected_servers) + _assert_config_metrics(aggregator, disconnected_servers=disconnected_servers) _assert_statistics_metrics(aggregator, disconnected_servers=disconnected_servers) _assert_table_status_metrics(aggregator) _assert_server_status_metrics(aggregator, disconnected_servers=disconnected_servers) @@ -44,9 +44,9 @@ def assert_metrics(aggregator, disconnected_servers=None): # test without introducing flakiness. -def _assert_config_totals_metrics(aggregator, disconnected_servers): +def _assert_config_metrics(aggregator, disconnected_servers): # type: (AggregatorStub, Set[ServerName]) -> None - for metric, typ, value, tags in CONFIG_TOTALS_METRICS: + for metric, typ, value, tags in CONFIG_METRICS: if callable(value): value = value(disconnected_servers) aggregator.assert_metric(metric, metric_type=typ, count=1, tags=TAGS + tags, value=value) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index ab2477219b331..a37eb273c60e7 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -86,93 +86,90 @@ # Metrics lists. # NOTE: jobs metrics are not listed here as they're hard to trigger, so they're covered by unit tests instead. -CONFIG_TOTALS_METRICS = ( +CONFIG_METRICS = ( ( - 'rethinkdb.server.total', + 'rethinkdb.config.servers', AggregatorStub.GAUGE, lambda disconnected_servers: len(SERVERS) - len(disconnected_servers), [], ), - ('rethinkdb.database.total', AggregatorStub.GAUGE, 1, []), - ('rethinkdb.database.table.total', AggregatorStub.GAUGE, 1, ['database:{}'.format(DATABASE)]), - ('rethinkdb.table.secondary_index.total', AggregatorStub.GAUGE, 1, ['table:{}'.format(HEROES_TABLE)]), + ('rethinkdb.config.databases', AggregatorStub.GAUGE, 1, []), + ('rethinkdb.config.tables_per_database', AggregatorStub.GAUGE, 1, ['database:{}'.format(DATABASE)]), + ('rethinkdb.config.secondary_indexes_per_table', AggregatorStub.GAUGE, 1, ['table:{}'.format(HEROES_TABLE)]), ) # type: Tuple[Tuple[str, int, Union[int, Callable[[set], int]], List[str]], ...] CLUSTER_STATISTICS_METRICS = ( - ('rethinkdb.stats.cluster.queries_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.cluster.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.cluster.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.cluster.query_engine.queries_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.cluster.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.cluster.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] SERVER_STATISTICS_METRICS = ( - ('rethinkdb.stats.server.queries_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.server.queries_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.server.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.server.read_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.server.written_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.server.written_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.server.client_connections', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.query_engine.queries_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.query_engine.queries_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.query_engine.read_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.server.query_engine.written_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.server.query_engine.client_connections', AggregatorStub.GAUGE), ( # NOTE: submitted but not documented on the RethinkDB website. - 'rethinkdb.stats.server.clients_active', + 'rethinkdb.stats.server.query_engine.clients_active', AggregatorStub.GAUGE, ), ) # type: Tuple[Tuple[str, int], ...] TABLE_STATISTICS_METRICS = ( - ('rethinkdb.stats.table.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] REPLICA_STATISTICS_METRICS = ( - ('rethinkdb.stats.table_server.read_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.read_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.written_docs_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.written_docs_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.cache.in_use_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.read_bytes_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.read_bytes_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.disk.written_bytes_per_sec', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.written_bytes_total', AggregatorStub.MONOTONIC_COUNT), - ('rethinkdb.stats.table_server.disk.metadata_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.data_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.garbage_bytes', AggregatorStub.GAUGE), - ('rethinkdb.stats.table_server.disk.preallocated_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.query_engine.read_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.query_engine.read_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.query_engine.written_docs_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.query_engine.written_docs_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.storage_engine.cache.in_use_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.read_bytes_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.read_bytes_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.storage_engine.disk.written_bytes_per_sec', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.written_bytes_total', AggregatorStub.MONOTONIC_COUNT), + ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.metadata_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.data_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.garbage_bytes', AggregatorStub.GAUGE), + ('rethinkdb.stats.table_server.storage_engine.disk.space_usage.preallocated_bytes', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] TABLE_STATUS_SERVICE_CHECKS = ( - 'rethinkdb.table_status.ready_for_outdated_reads', - 'rethinkdb.table_status.ready_for_reads', - 'rethinkdb.table_status.ready_for_writes', - 'rethinkdb.table_status.all_replicas_ready', + 'rethinkdb.table_status.status.ready_for_outdated_reads', + 'rethinkdb.table_status.status.ready_for_reads', + 'rethinkdb.table_status.status.ready_for_writes', + 'rethinkdb.table_status.status.all_replicas_ready', ) -TABLE_STATUS_METRICS = ( - ('rethinkdb.table_status.shards.total', AggregatorStub.GAUGE), -) # type: Tuple[Tuple[str, int], ...] +TABLE_STATUS_METRICS = (('rethinkdb.table_status.shards', AggregatorStub.GAUGE),) # type: Tuple[Tuple[str, int], ...] TABLE_STATUS_SHARDS_METRICS = ( - ('rethinkdb.table_status.shards.replicas.total', AggregatorStub.GAUGE), - ('rethinkdb.table_status.shards.replicas.primary.total', AggregatorStub.GAUGE), + ('rethinkdb.table_status.shards.replicas', AggregatorStub.GAUGE), + ('rethinkdb.table_status.shards.primary_replicas', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] SERVER_STATUS_METRICS = ( ('rethinkdb.server_status.network.time_connected', AggregatorStub.GAUGE), - ('rethinkdb.server_status.network.connected_to.total', AggregatorStub.GAUGE), - ('rethinkdb.server_status.network.connected_to.pending.total', AggregatorStub.GAUGE), + ('rethinkdb.server_status.network.connected_to', AggregatorStub.GAUGE), ('rethinkdb.server_status.process.time_started', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] CURRENT_ISSUES_METRICS = ( - ('rethinkdb.current_issues.total', AggregatorStub.GAUGE), - ('rethinkdb.current_issues.critical.total', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.issues', AggregatorStub.GAUGE), + ('rethinkdb.current_issues.critical_issues', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] CURRENT_ISSUE_TYPES_SUBMITTED_IF_DISCONNECTED_SERVERS = ['table_availability'] E2E_METRICS = ( - tuple((name, typ) for name, typ, _, _ in CONFIG_TOTALS_METRICS) + tuple((name, typ) for name, typ, _, _ in CONFIG_METRICS) + CLUSTER_STATISTICS_METRICS + SERVER_STATISTICS_METRICS + TABLE_STATISTICS_METRICS diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 9a93835cc9393..b3d6346349f90 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -125,18 +125,9 @@ def test_check_with_disconnected_server(aggregator, instance, server_with_data): table_status_tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] - aggregator.assert_service_check( - 'rethinkdb.table_status.ready_for_outdated_reads', RethinkDBCheck.OK, count=1, tags=table_status_tags - ) - aggregator.assert_service_check( - 'rethinkdb.table_status.ready_for_reads', RethinkDBCheck.WARNING, count=1, tags=table_status_tags - ) - aggregator.assert_service_check( - 'rethinkdb.table_status.ready_for_writes', RethinkDBCheck.WARNING, count=1, tags=table_status_tags - ) - aggregator.assert_service_check( - 'rethinkdb.table_status.all_replicas_ready', RethinkDBCheck.WARNING, count=1, tags=table_status_tags - ) + for service_check in TABLE_STATUS_SERVICE_CHECKS: + status = RethinkDBCheck.OK if service_check.endswith('ready_for_outdated_reads') else RethinkDBCheck.WARNING + aggregator.assert_service_check(service_check, status, count=1, tags=table_status_tags) @pytest.mark.integration diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_metrics.py index 48b71a5862f9f..2a8a15e80f9ae 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_metrics.py @@ -47,7 +47,7 @@ def test_jobs_metrics(): # See: https://rethinkdb.com/docs/system-jobs/#index_construction 'type': 'index_construction', 'id': ('index_construction', 'abcd1234'), - 'duration_sec': 0.42, + 'duration_sec': 0.24, 'info': {'db': 'doghouse', 'table': 'heroes', 'index': 'appearances_count', 'progress': 42}, 'servers': ['server1'], } # type: IndexConstructionJob @@ -64,11 +64,12 @@ def test_jobs_metrics(): assert metrics == [ { 'type': 'gauge', - 'name': 'rethinkdb.jobs.backfill.duration', + 'name': 'rethinkdb.jobs.duration', 'value': 0.42, 'tags': [ 'server:server0', 'server:server2', + 'job_type:backfill', 'database:doghouse', 'destination_server:server2', 'source_server:server0', @@ -77,27 +78,14 @@ def test_jobs_metrics(): }, { 'type': 'gauge', - 'name': 'rethinkdb.jobs.backfill.progress', - 'value': 42, + 'name': 'rethinkdb.jobs.duration', + 'value': 0.24, 'tags': [ - 'server:server0', - 'server:server2', + 'server:server1', + 'job_type:index_construction', 'database:doghouse', - 'destination_server:server2', - 'source_server:server0', 'table:heroes', + 'index:appearances_count', ], }, - { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.index_construction.duration', - 'value': 0.42, - 'tags': ['server:server1', 'database:doghouse', 'table:heroes', 'index:appearances_count'], - }, - { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.index_construction.progress', - 'value': 42, - 'tags': ['server:server1', 'database:doghouse', 'table:heroes', 'index:appearances_count'], - }, ] From 130fc74e513c900f2a50fd3ac4d914cf7e5becf6 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 16:45:02 +0100 Subject: [PATCH 121/147] Introduce and use `DocumentQuery` --- rethinkdb/datadog_checks/rethinkdb/check.py | 44 ++- .../{metrics => document_db}/__init__.py | 3 + .../rethinkdb/document_db/query.py | 118 ++++++++ .../rethinkdb/document_db/types.py | 26 ++ .../rethinkdb/document_db/utils.py | 50 ++++ .../rethinkdb/metrics/config_summary.py | 59 ---- .../rethinkdb/metrics/current_issues.py | 41 --- .../rethinkdb/metrics/statistics.py | 277 ------------------ .../rethinkdb/metrics/statuses.py | 134 --------- .../rethinkdb/metrics/system_jobs.py | 59 ---- .../datadog_checks/rethinkdb/operations.py | 127 +++++--- rethinkdb/datadog_checks/rethinkdb/queries.py | 148 ++++++++++ rethinkdb/datadog_checks/rethinkdb/types.py | 31 +- rethinkdb/tests/test_rethinkdb.py | 3 +- ...metrics.py => test_system_jobs_metrics.py} | 52 +++- 15 files changed, 519 insertions(+), 653 deletions(-) rename rethinkdb/datadog_checks/rethinkdb/{metrics => document_db}/__init__.py (64%) create mode 100644 rethinkdb/datadog_checks/rethinkdb/document_db/query.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/document_db/types.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/document_db/utils.py delete mode 100644 rethinkdb/datadog_checks/rethinkdb/metrics/config_summary.py delete mode 100644 rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py delete mode 100644 rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py delete mode 100644 rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py delete mode 100644 rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/queries.py rename rethinkdb/tests/unit/{test_metrics.py => test_system_jobs_metrics.py} (66%) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 848bc6dd6efab..a3bccf283a9c7 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -8,19 +8,11 @@ from datadog_checks.base import AgentCheck -from . import operations +from . import operations, queries from .config import Config -from .metrics.config_summary import collect_config_summary -from .metrics.current_issues import collect_current_issues -from .metrics.statistics import ( - collect_cluster_statistics, - collect_replica_statistics, - collect_server_statistics, - collect_table_statistics, -) -from .metrics.statuses import collect_server_status, collect_table_status -from .metrics.system_jobs import collect_system_jobs -from .types import Instance, Metric +from .document_db import DocumentQuery +from .document_db.types import Metric +from .types import Instance from .version import parse_version SERVICE_CHECK_CONNECT = 'rethinkdb.can_connect' @@ -35,17 +27,17 @@ def __init__(self, *args, **kwargs): # type: (*Any, **Any) -> None super(RethinkDBCheck, self).__init__(*args, **kwargs) self.config = Config(cast(Instance, self.instance)) - self.collect_funcs = ( - collect_config_summary, - collect_cluster_statistics, - collect_server_statistics, - collect_table_statistics, - collect_replica_statistics, - collect_server_status, - collect_table_status, - collect_system_jobs, - collect_current_issues, - ) # type: Sequence[Callable] + self.queries = ( + queries.config_summary, + queries.cluster_statistics, + queries.server_statistics, + queries.table_statistics, + queries.replica_statistics, + queries.table_statuses, + queries.server_statuses, + queries.system_jobs, + queries.current_issues_summary, + ) # type: Sequence[DocumentQuery] @contextmanager def connect_submitting_service_checks(self): @@ -82,8 +74,8 @@ def collect_metrics(self, conn): """ Collect metrics from the RethinkDB cluster we are connected to. """ - for collect in self.collect_funcs: - for metric in collect(conn): + for query in self.queries: + for metric in query.run(conn): yield metric def collect_connected_server_version(self, conn): @@ -91,7 +83,7 @@ def collect_connected_server_version(self, conn): """ Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. """ - version_string = operations.query_connected_server_version_string(conn) + version_string = operations.get_connected_server_version_string(conn) return parse_version(version_string) def submit_metric(self, metric): diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/__init__.py b/rethinkdb/datadog_checks/rethinkdb/document_db/__init__.py similarity index 64% rename from rethinkdb/datadog_checks/rethinkdb/metrics/__init__.py rename to rethinkdb/datadog_checks/rethinkdb/document_db/__init__.py index 46dd167dcde48..19f6ee93f9a16 100644 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/__init__.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/__init__.py @@ -1,3 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +from .query import DocumentQuery + +__all__ = ['DocumentQuery'] diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py new file mode 100644 index 0000000000000..836eaee6d2c6d --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py @@ -0,0 +1,118 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import logging +from typing import Any, Callable, Iterator, List, Mapping, Sequence, Tuple + +from datadog_checks.base import AgentCheck + +from .types import Enumeration, Group, Metric, MetricSpec, Modifier +from .utils import dotted_join, lookup_dotted, to_timestamp + +logger = logging.getLogger(__name__) + + +class DocumentQuery(object): + """ + A helper for retrieving metrics from document-oriented ("JSON") databases. + """ + + def __init__( + self, + source, # type: Callable[..., Iterator[Tuple[Any, List[str]]]] + name, # type: str + prefix, # type: str + metrics=None, # type: List[MetricSpec] + enumerations=None, # type: List[Enumeration] + groups=None, # type: List[Group] + ): + self.source = source + self.name = name + self.prefix = prefix + self.metrics = [] if metrics is None else metrics + self.enumerations = [] if enumerations is None else enumerations + self.groups = [] if groups is None else groups + + def _make_metric_from_spec(self, document, spec, tags): + # type: (Any, MetricSpec, List[str]) -> Metric + logger.debug('make_metric_from_spec %r', spec) + + path = spec['path'] + name = spec.get('name', path) + value = lookup_dotted(document, path=path) + + if 'modifier' in spec and spec['modifier'] is not None: + value = self._modify(value, modifier=spec['modifier']) + + if not isinstance(value, (int, float)): # pragma: no cover + raise RuntimeError('Expected float or int, got {!r} of type {}', value, type(value)) + + name = dotted_join(('rethinkdb', self.prefix, name)) + + return {'type': spec['type'], 'name': name, 'value': value, 'tags': tags} + + def _make_metrics_from_enumeration(self, document, enumeration, tags): + # type: (Any, Enumeration, List[str]) -> Iterator[Metric] + logger.debug('make_metrics_from_enumeration enumeration=%r', enumeration) + + values = lookup_dotted(document, path=enumeration['path']) # type: Sequence + + for index, value in enumerate(values): + item_tags = tags + ['{}:{}'.format(enumeration['index_tag'], index)] + + for spec in enumeration['metrics']: + spec = { + 'type': spec['type'], + 'name': dotted_join((enumeration['path'], spec['path']), drop_empty=True), + 'path': spec['path'], + 'modifier': spec.get('modifier'), + } + yield self._make_metric_from_spec(value, spec, tags=item_tags) + + def _make_metrics_from_group(self, document, group, tags): + # type: (Any, Group, List[str]) -> Iterator[Metric] + logger.debug('make_metrics_from_group group=%r', group) + + mapping = lookup_dotted(document, path=group['path']) # type: Mapping + + for key in mapping: + item_tags = tags + ['{}:{}'.format(group['key_tag'], key)] + spec = { + 'type': group['value_metric_type'], + 'name': group['path'], + 'path': key, + } # type: MetricSpec + yield self._make_metric_from_spec(mapping, spec, tags=item_tags) + + def _modify(self, value, modifier): + # type: (Any, Modifier) -> float + logger.debug('modify value=%r modifier=%r', value, modifier) + + if modifier == 'total': + return len(value) + + if modifier == 'ok_warning': + return AgentCheck.OK if value else AgentCheck.WARNING + + if modifier == 'timestamp': + return to_timestamp(value) + + raise RuntimeError('Unknown modifier: {!r}'.format(modifier)) # pragma: no cover + + def run(self, *args, **kwargs): + # type: (*Any, **Any) -> Iterator[Metric] + logger.debug('query_%s', self.name) + + for document, tags in self.source(*args, **kwargs): + logger.debug('%s %r', self.name, document) + + for spec in self.metrics: + yield self._make_metric_from_spec(document, spec, tags=tags) + + for enumeration in self.enumerations: + for metric in self._make_metrics_from_enumeration(document, enumeration, tags=tags): + yield metric + + for group in self.groups: + for metric in self._make_metrics_from_group(document, group, tags=tags): + yield metric diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/types.py b/rethinkdb/datadog_checks/rethinkdb/document_db/types.py new file mode 100644 index 0000000000000..40759cf37840f --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/types.py @@ -0,0 +1,26 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from typing import Any, Callable, List, Literal, Optional, Sequence, TypedDict, Union + +MetricType = Literal['gauge', 'count', 'monotonic_count', 'rate', 'service_check'] +Metric = TypedDict('Metric', {'type': MetricType, 'name': str, 'value': float, 'tags': List[str]}) + +ModifierName = Literal['total', 'ok_warning', 'timestamp'] +TotalModifier = TypedDict('TotalModifier', {'name': Literal['total'], 'map': Callable[[Any], Sequence]}) +Modifier = Union[ModifierName, TotalModifier] + +MetricSpec = TypedDict( + 'MetricSpec', + { + 'type': MetricType, + 'path': str, # Used as the default name. + 'name': str, # An explicit name for the metric. + 'modifier': Optional[Modifier], + }, + total=False, +) + +Enumeration = TypedDict('Enumeration', {'path': str, 'index_tag': str, 'metrics': List[MetricSpec]}) + +Group = TypedDict('Group', {'path': str, 'key_tag': str, 'value_metric_type': MetricType}) diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py new file mode 100644 index 0000000000000..98fd0be566800 --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py @@ -0,0 +1,50 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +""" +Miscellaneous utilities. +""" +import datetime as dt +import time +from typing import Any, Mapping, Sequence + + +def lookup_dotted(dct, path): + # type: (Mapping, str) -> Any + """ + Given a mapping and a dotted path `key1.key2...keyN`, return the item at `dct[key1][key2]...[keyN]`. + """ + keys = [key for key in reversed(path.split('.'))] + + value = dct + + while keys: + if not isinstance(value, Mapping): # pragma: no cover + raise RuntimeError( + 'followed path {!r} with remaining keys {!r}, but value {!r} is not a mapping'.format(path, value, keys) + ) + + key = keys.pop() + + try: + value = value[key] + except KeyError as exc: # pragma: no cover + raise RuntimeError('Failed to retrieve key {!r} on value {!r}: {!r}'.format(key, value, exc)) + + return value + + +def dotted_join(values, drop_empty=False): + # type: (Sequence[str], bool) -> str + if drop_empty: + values = [value for value in values if value] + return '.'.join(values) + + +def to_timestamp(datetime): + # type: (dt.datetime) -> float + try: + return datetime.timestamp() # type: ignore # (mypy runs in `--py2` mode.) + except AttributeError: # pragma: no cover + # Python 2. + return time.mktime(datetime.now().timetuple()) diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/config_summary.py b/rethinkdb/datadog_checks/rethinkdb/metrics/config_summary.py deleted file mode 100644 index 4ad91771c2e4f..0000000000000 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/config_summary.py +++ /dev/null @@ -1,59 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -import logging -from typing import Iterator - -import rethinkdb - -from .. import operations -from ..types import Metric - -logger = logging.getLogger(__name__) - - -def collect_config_summary(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect aggregated metrics about cluster configuration. - - See: https://rethinkdb.com/docs/system-tables/#configuration-tables - """ - logger.debug('collect_config_summary') - - summary = operations.query_config_summary(conn) - logger.debug('config_summary %r', summary) - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.config.servers', - 'value': summary['servers'], - 'tags': [], - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.config.databases', - 'value': summary['databases'], - 'tags': [], - } - - for database, num_tables in summary['tables_per_database'].items(): - tags = ['database:{}'.format(database)] - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.config.tables_per_database', - 'value': num_tables, - 'tags': tags, - } - - for table, num_indexes in summary['secondary_indexes_per_table'].items(): - tags = ['table:{}'.format(table)] - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.config.secondary_indexes_per_table', - 'value': num_indexes, - 'tags': tags, - } diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py b/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py deleted file mode 100644 index a8f00c9f2101e..0000000000000 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/current_issues.py +++ /dev/null @@ -1,41 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -import logging -from typing import Iterator - -import rethinkdb - -from .. import operations -from ..types import Metric - -logger = logging.getLogger(__name__) - - -def collect_current_issues(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect metrics about current system issues. - - See: https://rethinkdb.com/docs/system-issues/ - """ - logger.debug('collect_current_issues') - - summary = operations.query_current_issues_summary(conn) - logger.debug('current_issues %r', summary) - - for issue_type, total in summary['issues'].items(): - yield { - 'type': 'gauge', - 'name': 'rethinkdb.current_issues.issues', - 'value': total, - 'tags': ['issue_type:{}'.format(issue_type)], - } - - for issue_type, total in summary['critical_issues'].items(): - yield { - 'type': 'gauge', - 'name': 'rethinkdb.current_issues.critical_issues', - 'value': total, - 'tags': ['issue_type:{}'.format(issue_type)], - } diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py deleted file mode 100644 index 3a981822ba6c2..0000000000000 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statistics.py +++ /dev/null @@ -1,277 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -import logging -from typing import Iterator - -import rethinkdb - -from .. import operations -from ..types import Metric - -logger = logging.getLogger(__name__) - - -def collect_cluster_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect metrics about cluster statistics. - - See: https://rethinkdb.com/docs/system-stats#cluster - """ - logger.debug('collect_cluster_statistics') - - stats = operations.query_cluster_stats(conn) - logger.debug('cluster_statistics stats=%r', stats) - - query_engine = stats['query_engine'] - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.cluster.query_engine.queries_per_sec', - 'value': query_engine['queries_per_sec'], - 'tags': [], - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.cluster.query_engine.read_docs_per_sec', - 'value': query_engine['read_docs_per_sec'], - 'tags': [], - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.cluster.query_engine.written_docs_per_sec', - 'value': query_engine['written_docs_per_sec'], - 'tags': [], - } - - -def collect_server_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect metrics about server statistics. - - See: https://rethinkdb.com/docs/system-stats#server - """ - logger.debug('collect_server_statistics') - - for server, stats in operations.query_servers_with_stats(conn): - logger.debug('server_statistics server=%r stats=%r', server, stats) - - name = server['name'] - server_tags = server['tags'] - query_engine = stats['query_engine'] - - tags = ['server:{}'.format(name)] - tags.extend(server_tags) - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.server.query_engine.client_connections', - 'value': query_engine['client_connections'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.server.query_engine.clients_active', - 'value': query_engine['clients_active'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.server.query_engine.queries_per_sec', - 'value': query_engine['queries_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.server.query_engine.queries_total', - 'value': query_engine['queries_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.server.query_engine.read_docs_per_sec', - 'value': query_engine['read_docs_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.server.query_engine.read_docs_total', - 'value': query_engine['read_docs_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.server.query_engine.written_docs_per_sec', - 'value': query_engine['written_docs_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.server.query_engine.written_docs_total', - 'value': query_engine['written_docs_total'], - 'tags': tags, - } - - -def collect_table_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect metrics about table statistics. - - See: https://rethinkdb.com/docs/system-stats#table - """ - logger.debug('collect_table_statistics') - - for table, stats in operations.query_tables_with_stats(conn): - logger.debug('table_statistics table=%r stats=%r', table, stats) - - name = table['name'] - database = table['db'] - query_engine = stats['query_engine'] - - tags = ['table:{}'.format(name), 'database:{}'.format(database)] - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table.query_engine.read_docs_per_sec', - 'value': query_engine['read_docs_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table.query_engine.written_docs_per_sec', - 'value': query_engine['written_docs_per_sec'], - 'tags': tags, - } - - -def collect_replica_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect metrics about replicas (table/server pairs) statistics. - - See: https://rethinkdb.com/docs/system-stats#replica - """ - logger.debug('collect_replica_statistics') - - for table, server, replica, stats in operations.query_replicas_with_stats(conn): - logger.debug('replica_statistics table=%r server=%r replica=%r stats=%r', table, server, replica, stats) - - database = table['db'] - server_name = server['name'] - table_name = table['name'] - server_tags = server['tags'] - query_engine = stats['query_engine'] - storage_engine = stats['storage_engine'] - state = replica['state'] - - tags = [ - 'table:{}'.format(table_name), - 'database:{}'.format(database), - 'server:{}'.format(server_name), - 'state:{}'.format(state), - ] - tags.extend(server_tags) - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.query_engine.read_docs_per_sec', - 'value': query_engine['read_docs_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.query_engine.read_docs_total', - 'value': query_engine['read_docs_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.query_engine.written_docs_per_sec', - 'value': query_engine['written_docs_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.query_engine.written_docs_total', - 'value': query_engine['written_docs_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.storage_engine.cache.in_use_bytes', - 'value': storage_engine['cache']['in_use_bytes'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.storage_engine.disk.read_bytes_per_sec', - 'value': storage_engine['disk']['read_bytes_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.storage_engine.disk.read_bytes_total', - 'value': storage_engine['disk']['read_bytes_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.storage_engine.disk.written_bytes_per_sec', - 'value': storage_engine['disk']['written_bytes_per_sec'], - 'tags': tags, - } - - yield { - 'type': 'monotonic_count', - 'name': 'rethinkdb.stats.table_server.storage_engine.disk.written_bytes_total', - 'value': storage_engine['disk']['written_bytes_total'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.storage_engine.disk.space_usage.metadata_bytes', - 'value': storage_engine['disk']['space_usage']['metadata_bytes'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.storage_engine.disk.space_usage.data_bytes', - 'value': storage_engine['disk']['space_usage']['data_bytes'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.storage_engine.disk.space_usage.garbage_bytes', - 'value': storage_engine['disk']['space_usage']['garbage_bytes'], - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.stats.table_server.storage_engine.disk.space_usage.preallocated_bytes', - 'value': storage_engine['disk']['space_usage']['preallocated_bytes'], - 'tags': tags, - } diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py b/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py deleted file mode 100644 index 5885cccb81e6e..0000000000000 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/statuses.py +++ /dev/null @@ -1,134 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -import datetime as dt -import logging -import time -from typing import Iterator - -import rethinkdb - -from datadog_checks.base import AgentCheck - -from .. import operations -from ..types import Metric - -logger = logging.getLogger(__name__) - - -def collect_table_status(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect metrics about table statuses. - - See: https://rethinkdb.com/docs/system-tables/#table_status - """ - logger.debug('collect_table_status') - - for table_status in operations.query_table_status(conn): - logger.debug('table_status %r', table_status) - - table = table_status['name'] - database = table_status['db'] - - tags = ['table:{}'.format(table), 'database:{}'.format(database)] - - yield { - 'type': 'service_check', - 'name': 'rethinkdb.table_status.status.ready_for_outdated_reads', - 'value': AgentCheck.OK if table_status['status']['ready_for_outdated_reads'] else AgentCheck.WARNING, - 'tags': tags, - } - - yield { - 'type': 'service_check', - 'name': 'rethinkdb.table_status.status.ready_for_reads', - 'value': AgentCheck.OK if table_status['status']['ready_for_reads'] else AgentCheck.WARNING, - 'tags': tags, - } - - yield { - 'type': 'service_check', - 'name': 'rethinkdb.table_status.status.ready_for_writes', - 'value': AgentCheck.OK if table_status['status']['ready_for_writes'] else AgentCheck.WARNING, - 'tags': tags, - } - - yield { - 'type': 'service_check', - 'name': 'rethinkdb.table_status.status.all_replicas_ready', - 'value': AgentCheck.OK if table_status['status']['all_replicas_ready'] else AgentCheck.WARNING, - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.table_status.shards', - 'value': len(table_status['shards']), - 'tags': tags, - } - - for index, shard in enumerate(table_status['shards']): - shard_tags = tags + ['shard:{}'.format(index)] - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.table_status.shards.replicas', - 'value': len(shard['replicas']), - 'tags': shard_tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.table_status.shards.primary_replicas', - 'value': len(shard['primary_replicas']), - 'tags': shard_tags, - } - - -def collect_server_status(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect metrics about server statuses. - - See: https://rethinkdb.com/docs/system-tables/#server_status - """ - logger.debug('collect_server_status') - - for server_status in operations.query_server_status(conn): - logger.debug('server_status %r', server_status) - - server = server_status['name'] - network = server_status['network'] - process = server_status['process'] - - tags = ['server:{}'.format(server)] - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.server_status.network.time_connected', - 'value': _to_timestamp(network['time_connected']), - 'tags': tags, - } - - yield { - 'type': 'gauge', - 'name': 'rethinkdb.server_status.network.connected_to', - 'value': len([other for other, connected in network['connected_to'].items() if connected]), - 'tags': tags, - } - yield { - 'type': 'gauge', - 'name': 'rethinkdb.server_status.process.time_started', - 'value': _to_timestamp(process['time_started']), - 'tags': tags, - } - - -def _to_timestamp(datetime): - # type: (dt.datetime) -> float - try: - return datetime.timestamp() # type: ignore # (Mypy is run in --py2 mode.) - except AttributeError: # pragma: no cover - # Python 2. - return time.mktime(datetime.now().timetuple()) diff --git a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py b/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py deleted file mode 100644 index 70bb01eb1ebfe..0000000000000 --- a/rethinkdb/datadog_checks/rethinkdb/metrics/system_jobs.py +++ /dev/null @@ -1,59 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -import logging -from typing import Iterator - -import rethinkdb - -from .. import operations -from ..types import Metric - -logger = logging.getLogger(__name__) - - -def collect_system_jobs(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Metric] - """ - Collect metrics about system jobs. - - See: https://rethinkdb.com/docs/system-jobs/ - """ - logger.debug('collect_system_jobs') - - for job in operations.query_system_jobs(conn): - logger.debug('job %r', job) - - duration = job['duration_sec'] - servers = job['servers'] - - tags = ['server:{}'.format(server) for server in servers] - - if job['type'] == 'index_construction': - job_tags = tags + [ - 'job_type:{}'.format(job['type']), - 'database:{}'.format(job['info']['db']), - 'table:{}'.format(job['info']['table']), - 'index:{}'.format(job['info']['index']), - ] - yield { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.duration', - 'value': duration, - 'tags': job_tags, - } - - elif job['type'] == 'backfill': - job_tags = tags + [ - 'job_type:{}'.format(job['type']), - 'database:{}'.format(job['info']['db']), - 'destination_server:{}'.format(job['info']['destination_server']), - 'source_server:{}'.format(job['info']['source_server']), - 'table:{}'.format(job['info']['table']), - ] - yield { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.duration', - 'value': duration, - 'tags': job_tags, - } diff --git a/rethinkdb/datadog_checks/rethinkdb/operations.py b/rethinkdb/datadog_checks/rethinkdb/operations.py index d5b0a0e3be500..83f5877ddaf1b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/operations.py +++ b/rethinkdb/datadog_checks/rethinkdb/operations.py @@ -7,7 +7,7 @@ Python ReQL reference documentation: https://rethinkdb.com/api/python/ """ -from typing import Any, Iterator, Mapping, Tuple +from typing import Any, Iterator, List, Mapping, Tuple import rethinkdb @@ -36,7 +36,7 @@ system = r.db('rethinkdb') -def query_connected_server_version_string(conn): +def get_connected_server_version_string(conn): # type: (rethinkdb.net.Connection) -> str """ Return the raw string of the RethinkDB version used by the server at the other end of the connection. @@ -48,8 +48,8 @@ def query_connected_server_version_string(conn): return server_status['process']['version'] -def query_config_summary(conn): - # type: (rethinkdb.net.Connection) -> ConfigSummary +def get_config_summary(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[ConfigSummary, List[str]]] """ Return a summary of the cluster configuration. """ @@ -72,28 +72,28 @@ def query_config_summary(conn): .run(conn) ) # type: Mapping[str, int] - totals = { + summary = { 'servers': server_config.count(), 'databases': db_config.count(), 'tables_per_database': tables_per_database, 'secondary_indexes_per_table': secondary_indexes_per_table, } # type: ConfigSummary # Enforce keys to match. - return r.expr(totals).run(conn) + yield r.expr(summary).run(conn), [] -def query_cluster_stats(conn): - # type: (rethinkdb.net.Connection) -> ClusterStats +def get_cluster_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[ClusterStats, List[str]]] """ Retrieve statistics about the cluster. """ - return system.table('stats').get(['cluster']).run(conn) + yield system.table('stats').get(['cluster']).run(conn), [] -def query_servers_with_stats(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Server, ServerStats]] +def get_servers_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[ServerStats, List[str]]] """ - Retrieve each server in the cluster along with its statistics. + Retrieve statistics about each server in the cluster. """ # For servers: stats['id'] = ['server', ''] is_server_stats_row = r.row['id'].nth(0) == 'server' @@ -107,13 +107,15 @@ def query_servers_with_stats(conn): for row in rows: server_stats = row['left'] # type: ServerStats server = row['right'] # type: Server - yield server, server_stats + tags = ['server:{}'.format(server['name'])] + tags.extend(server['tags']) + yield server_stats, tags -def query_tables_with_stats(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, TableStats]] +def get_tables_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[TableStats, List[str]]] """ - Retrieve each table in the cluster along with its statistics. + Retrieve statistics about each table in the cluster. """ # For tables: stats['id'] = ['table', ''] is_table_stats_row = r.row['id'].nth(0) == 'table' @@ -127,13 +129,14 @@ def query_tables_with_stats(conn): for row in rows: table_stats = row['left'] # type: TableStats table = row['right'] # type: Table - yield table, table_stats + tags = ['table:{}'.format(table['name']), 'database:{}'.format(table['db'])] + yield table_stats, tags -def query_replicas_with_stats(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Table, Server, ShardReplica, ReplicaStats]] +def get_replicas_statistics(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[ReplicaStats, List[str]]] """ - Retrieve each replica (table/server pair) in the cluster along with its statistics. + Retrieve statistics about each replica (table/server pair) in the cluster. """ # NOTE: To reduce bandwidth usage, we make heavy use of the `.pluck()` operation, i.e. ask RethinkDB # for a specific set of fields, instead of sending entire objects, which can be expensive when joining @@ -189,37 +192,86 @@ def query_replicas_with_stats(conn): server = row['server'] # type: Server replica = row['replica'] # type: ShardReplica replica_stats = row['stats'] # type: ReplicaStats - yield table, server, replica, replica_stats + tags = [ + 'table:{}'.format(table['name']), + 'database:{}'.format(table['db']), + 'server:{}'.format(server['name']), + 'state:{}'.format(replica['state']), + ] + tags.extend(server['tags']) -def query_table_status(conn): - # type: (rethinkdb.net.Connection) -> Iterator[TableStatus] + yield replica_stats, tags + + +def get_table_statuses(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[TableStatus, List[str]]] """ Retrieve the status of each table in the cluster. """ - return system.table('table_status').run(conn) + for table_status in system.table('table_status').run(conn): # type: TableStatus + tags = ['table:{}'.format(table_status['name']), 'database:{}'.format(table_status['db'])] + yield table_status, tags -def query_server_status(conn): - # type: (rethinkdb.net.Connection) -> Iterator[ServerStatus] +def get_server_statuses(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[ServerStatus, List[str]]] """ Retrieve the status of each server in the cluster. """ - return system.table('server_status').run(conn) + for server_status in system.table('server_status').run(conn): # type: ServerStatus + tags = ['server:{}'.format(server_status['name'])] + yield server_status, tags -def query_system_jobs(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Job] +def get_system_jobs(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Job, List[str]]] """ Retrieve all the currently running system jobs. """ - return system.table('jobs').run(conn) - - -def query_current_issues_summary(conn): - # type: (rethinkdb.net.Connection) -> CurrentIssuesSummary + for job in system.table('jobs').run(conn): # type: Job + tags = ['job_type:{}'.format(job['type'])] + tags.extend('server:{}'.format(server) for server in job['servers']) + + # Follow job types listed on: https://rethinkdb.com/docs/system-jobs/#document-schema + + if job['type'] == 'query': + # NOTE: Request-response queries are typically too short-lived to be captured across Agent checks. + # Change feed queries however are long-running, they we'd be able to capture them. + # See: https://rethinkdb.com/docs/system-jobs/#query + # TODO(before-merging): submit within a `duration_sec` threshold instead of skipping entirely. + continue + elif job['type'] == 'disk_compaction': + # Ongoing task on each server -- no information provided (i.e. `info` is empty). + # See: https://rethinkdb.com/docs/system-jobs/#disk_compaction + continue + if job['type'] == 'index_construction': + tags.extend( + [ + 'database:{}'.format(job['info']['db']), + 'table:{}'.format(job['info']['table']), + 'index:{}'.format(job['info']['index']), + ] + ) + elif job['type'] == 'backfill': + tags.extend( + [ + 'database:{}'.format(job['info']['db']), + 'destination_server:{}'.format(job['info']['destination_server']), + 'source_server:{}'.format(job['info']['source_server']), + 'table:{}'.format(job['info']['table']), + ] + ) + else: + raise RuntimeError('Unknown job type: {!r}'.format(job['type'])) + + yield job, tags + + +def get_current_issues_summary(conn): + # type: (rethinkdb.net.Connection) -> Iterator[Tuple[CurrentIssuesSummary, List[str]]] """ - Retrieve all the problems detected with the cluster. + Retrieve a summary of problems detected within the cluster. """ current_issues = system.table('current_issues').pluck('type', 'critical') @@ -231,7 +283,4 @@ def query_current_issues_summary(conn): current_issues.filter(r.row['critical']).group('type').count().run(conn) ) # type: Mapping[str, int] - return { - 'issues': issues_by_type, - 'critical_issues': critical_issues_by_type, - } + yield {'issues': issues_by_type, 'critical_issues': critical_issues_by_type}, [] diff --git a/rethinkdb/datadog_checks/rethinkdb/queries.py b/rethinkdb/datadog_checks/rethinkdb/queries.py new file mode 100644 index 0000000000000..b64cdffa9d6df --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/queries.py @@ -0,0 +1,148 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from . import operations +from .document_db import DocumentQuery + +# System configuration. + +# See: https://rethinkdb.com/docs/system-tables/#configuration-tables +config_summary = DocumentQuery( + source=operations.get_config_summary, + name='config_summary', + prefix='config', + metrics=[{'type': 'gauge', 'path': 'servers'}, {'type': 'gauge', 'path': 'databases'}], + groups=[ + {'path': 'tables_per_database', 'key_tag': 'database', 'value_metric_type': 'gauge'}, + {'path': 'secondary_indexes_per_table', 'key_tag': 'table', 'value_metric_type': 'gauge'}, + ], +) + + +# System statistics. + +# See: https://rethinkdb.com/docs/system-stats#cluster +cluster_statistics = DocumentQuery( + source=operations.get_cluster_statistics, + name='cluster_statistics', + prefix='stats.cluster', + metrics=[ + {'type': 'gauge', 'path': 'query_engine.queries_per_sec'}, + {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, + {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, + ], +) + +# See: https://rethinkdb.com/docs/system-stats#server +server_statistics = DocumentQuery( + source=operations.get_servers_statistics, + name='server_statistics', + prefix='stats.server', + metrics=[ + {'type': 'gauge', 'path': 'query_engine.client_connections'}, + {'type': 'gauge', 'path': 'query_engine.clients_active'}, + {'type': 'gauge', 'path': 'query_engine.queries_per_sec'}, + {'type': 'monotonic_count', 'path': 'query_engine.queries_total'}, + {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, + {'type': 'monotonic_count', 'path': 'query_engine.read_docs_total'}, + {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, + {'type': 'monotonic_count', 'path': 'query_engine.written_docs_total'}, + ], +) + +# See: https://rethinkdb.com/docs/system-stats#table +table_statistics = DocumentQuery( + source=operations.get_tables_statistics, + name='table_statistics', + prefix='stats.table', + metrics=[ + {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, + {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, + ], +) + +# See: https://rethinkdb.com/docs/system-stats#replica +replica_statistics = DocumentQuery( + source=operations.get_replicas_statistics, + name='replica_statistics', + prefix='stats.table_server', + metrics=[ + {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, + {'type': 'monotonic_count', 'path': 'query_engine.read_docs_total'}, + {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, + {'type': 'monotonic_count', 'path': 'query_engine.written_docs_total'}, + {'type': 'gauge', 'path': 'storage_engine.cache.in_use_bytes'}, + {'type': 'gauge', 'path': 'storage_engine.disk.read_bytes_per_sec'}, + {'type': 'monotonic_count', 'path': 'storage_engine.disk.read_bytes_total'}, + {'type': 'gauge', 'path': 'storage_engine.disk.written_bytes_per_sec'}, + {'type': 'monotonic_count', 'path': 'storage_engine.disk.written_bytes_total'}, + {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.metadata_bytes'}, + {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.data_bytes'}, + {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.garbage_bytes'}, + {'type': 'gauge', 'path': 'storage_engine.disk.space_usage.preallocated_bytes'}, + ], +) + + +# System status. + +# See: https://rethinkdb.com/docs/system-tables/#table_status +table_statuses = DocumentQuery( + source=operations.get_table_statuses, + name='table_status', + prefix='table_status', + metrics=[ + {'type': 'service_check', 'path': 'status.ready_for_outdated_reads', 'modifier': 'ok_warning'}, + {'type': 'service_check', 'path': 'status.ready_for_reads', 'modifier': 'ok_warning'}, + {'type': 'service_check', 'path': 'status.ready_for_writes', 'modifier': 'ok_warning'}, + {'type': 'service_check', 'path': 'status.all_replicas_ready', 'modifier': 'ok_warning'}, + {'type': 'gauge', 'path': 'shards', 'modifier': 'total'}, + ], + enumerations=[ + { + 'path': 'shards', + 'index_tag': 'shard', + 'metrics': [ + {'type': 'gauge', 'path': 'replicas', 'modifier': 'total'}, + {'type': 'gauge', 'path': 'primary_replicas', 'modifier': 'total'}, + ], + } + ], +) + +# See: https://rethinkdb.com/docs/system-tables/#server_status +server_statuses = DocumentQuery( + source=operations.get_server_statuses, + name='server_status', + prefix='server_status', + metrics=[ + {'type': 'gauge', 'path': 'network.time_connected', 'modifier': 'timestamp'}, + {'type': 'gauge', 'path': 'network.connected_to', 'modifier': 'total'}, + {'type': 'gauge', 'path': 'process.time_started', 'modifier': 'timestamp'}, + ], +) + + +# System jobs. + +# See: https://rethinkdb.com/docs/system-jobs/ +system_jobs = DocumentQuery( + source=operations.get_system_jobs, + name='system_jobs', + prefix='jobs', + metrics=[{'type': 'gauge', 'path': 'duration_sec'}], +) + + +# System current issues. + +# See: https://rethinkdb.com/docs/system-issues/ +current_issues_summary = DocumentQuery( + source=operations.get_current_issues_summary, + name='current_issues', + prefix='current_issues', + groups=[ + {'path': 'issues', 'key_tag': 'issue_type', 'value_metric_type': 'gauge'}, + {'path': 'critical_issues', 'key_tag': 'issue_type', 'value_metric_type': 'gauge'}, + ], +) diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index 9dae6d201fded..ff95d6f86c91c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -9,11 +9,6 @@ # Check interfaces. -Metric = TypedDict( - 'Metric', - {'type': Literal['gauge', 'monotonic_count', 'service_check'], 'name': str, 'value': float, 'tags': List[str]}, -) - Instance = TypedDict( 'Instance', {'host': str, 'port': int, 'username': str, 'password': str, 'tls_ca_cert': str, 'tags': List[str]}, @@ -144,6 +139,32 @@ # System jobs documents. # See: https://rethinkdb.com/docs/system-jobs/ +QueryInfo = TypedDict('QueryInfo', {}) + +QueryJob = TypedDict( + 'QueryJob', + { + 'type': Literal['query'], + 'id': Tuple[Literal['query'], str], + 'duration_sec': float, + 'info': QueryInfo, + 'servers': List[str], + }, +) + +DiskCompactionInfo = TypedDict('DiskCompactionInfo', {}) + +DiskCompactionJob = TypedDict( + 'DiskCompactionJob', + { + 'type': Literal['disk_compaction'], + 'id': Tuple[Literal['disk_compaction'], str], + 'duration_sec': None, + 'info': DiskCompactionInfo, + 'servers': List[str], + }, +) + IndexConstructionInfo = TypedDict('IndexConstructionInfo', {'db': str, 'table': str, 'index': str, 'progress': int}) IndexConstructionJob = TypedDict( diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index b3d6346349f90..f1775420f5302 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -10,7 +10,8 @@ from datadog_checks.base.stubs.aggregator import AggregatorStub from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub from datadog_checks.rethinkdb import RethinkDBCheck -from datadog_checks.rethinkdb.types import Instance, Metric +from datadog_checks.rethinkdb.document_db.types import Metric +from datadog_checks.rethinkdb.types import Instance from .assertions import assert_metrics from .cluster import temporarily_disconnect_server diff --git a/rethinkdb/tests/unit/test_metrics.py b/rethinkdb/tests/unit/test_system_jobs_metrics.py similarity index 66% rename from rethinkdb/tests/unit/test_metrics.py rename to rethinkdb/tests/unit/test_system_jobs_metrics.py index 2a8a15e80f9ae..704204bd39fc7 100644 --- a/rethinkdb/tests/unit/test_metrics.py +++ b/rethinkdb/tests/unit/test_system_jobs_metrics.py @@ -1,14 +1,11 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -""" -Unit tests for metrics that are hard to test using integration tests, eg. because they depend on cluster dynamics. -""" import mock import pytest -from datadog_checks.rethinkdb.metrics.system_jobs import collect_system_jobs -from datadog_checks.rethinkdb.types import BackfillJob, IndexConstructionJob +from datadog_checks.rethinkdb import queries +from datadog_checks.rethinkdb.types import BackfillJob, DiskCompactionJob, IndexConstructionJob, QueryJob pytestmark = pytest.mark.unit @@ -28,6 +25,22 @@ def test_jobs_metrics(): * Etc. """ + mock_query_job_row = { + 'type': 'query', + 'id': ('query', 'abcd1234'), + 'duration_sec': 0.12, + 'info': {}, + 'servers': ['server0'], + } # type: QueryJob + + mock_disk_compaction_row = { + 'type': 'disk_compaction', + 'id': ('disk_compaction', 'zero'), + 'duration_sec': None, + 'info': {}, + 'servers': ['server0'], + } # type: DiskCompactionJob + mock_backfill_job_row = { # See: https://rethinkdb.com/docs/system-jobs/#backfill 'type': 'backfill', @@ -52,21 +65,22 @@ def test_jobs_metrics(): 'servers': ['server1'], } # type: IndexConstructionJob - mock_unknown_job_row = {'type': 'an_unknown_type_that_should_be_ignored', 'duration_sec': 0.42, 'servers': []} - - mock_rows = [mock_backfill_job_row, mock_index_construction_job_row, mock_unknown_job_row] + mock_rows = [mock_query_job_row, mock_disk_compaction_row, mock_backfill_job_row, mock_index_construction_job_row] conn = mock.Mock() with mock.patch('rethinkdb.ast.RqlQuery.run') as run: run.return_value = mock_rows - metrics = list(collect_system_jobs(conn)) + metrics = list(queries.system_jobs.run(conn)) assert metrics == [ + # -- `query` job ignored -- + # -- `disk_compaction` job ignored -- { 'type': 'gauge', - 'name': 'rethinkdb.jobs.duration', + 'name': 'rethinkdb.jobs.duration_sec', 'value': 0.42, 'tags': [ + 'job_type:backfill', 'server:server0', 'server:server2', 'job_type:backfill', @@ -78,14 +92,28 @@ def test_jobs_metrics(): }, { 'type': 'gauge', - 'name': 'rethinkdb.jobs.duration', + 'name': 'rethinkdb.jobs.duration_sec', 'value': 0.24, 'tags': [ - 'server:server1', 'job_type:index_construction', + 'server:server1', 'database:doghouse', 'table:heroes', 'index:appearances_count', ], }, ] + + +def test_unknown_job(): + # type: () -> None + """ + If a new job type is added, an exception should be raised so we are notified via CI failures and can add support. + """ + mock_unknown_job_row = {'type': 'an_unknown_type_that_should_be_ignored', 'duration_sec': 0.42, 'servers': []} + + conn = mock.Mock() + with mock.patch('rethinkdb.ast.RqlQuery.run') as run: + run.return_value = [mock_unknown_job_row] + with pytest.raises(RuntimeError): + list(queries.system_jobs.run(conn)) From 69686d23f0f321ef38b042207108c93336d97173 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 16:58:38 +0100 Subject: [PATCH 122/147] Pass check logger through --- rethinkdb/datadog_checks/rethinkdb/check.py | 2 +- rethinkdb/datadog_checks/rethinkdb/config.py | 2 - .../rethinkdb/document_db/query.py | 42 +++++++++---------- .../tests/unit/test_system_jobs_metrics.py | 12 ++++-- 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index a3bccf283a9c7..38cb002c08c2d 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -75,7 +75,7 @@ def collect_metrics(self, conn): Collect metrics from the RethinkDB cluster we are connected to. """ for query in self.queries: - for metric in query.run(conn): + for metric in query.run(conn, logger=self.log): yield metric def collect_connected_server_version(self, conn): diff --git a/rethinkdb/datadog_checks/rethinkdb/config.py b/rethinkdb/datadog_checks/rethinkdb/config.py index 1b41cf26b61ee..1f872825f41be 100644 --- a/rethinkdb/datadog_checks/rethinkdb/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/config.py @@ -1,8 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from __future__ import absolute_import - from typing import List, Optional from datadog_checks.base import ConfigurationError diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py index 836eaee6d2c6d..0e3fd7c2feeaa 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py @@ -1,16 +1,14 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import logging from typing import Any, Callable, Iterator, List, Mapping, Sequence, Tuple from datadog_checks.base import AgentCheck +from datadog_checks.base.log import CheckLoggingAdapter from .types import Enumeration, Group, Metric, MetricSpec, Modifier from .utils import dotted_join, lookup_dotted, to_timestamp -logger = logging.getLogger(__name__) - class DocumentQuery(object): """ @@ -33,16 +31,16 @@ def __init__( self.enumerations = [] if enumerations is None else enumerations self.groups = [] if groups is None else groups - def _make_metric_from_spec(self, document, spec, tags): - # type: (Any, MetricSpec, List[str]) -> Metric - logger.debug('make_metric_from_spec %r', spec) + def _make_metric_from_spec(self, document, spec, tags, logger): + # type: (Any, MetricSpec, List[str], CheckLoggingAdapter) -> Metric + logger.trace('make_metric_from_spec %r', spec) path = spec['path'] name = spec.get('name', path) value = lookup_dotted(document, path=path) if 'modifier' in spec and spec['modifier'] is not None: - value = self._modify(value, modifier=spec['modifier']) + value = self._modify(value, modifier=spec['modifier'], logger=logger) if not isinstance(value, (int, float)): # pragma: no cover raise RuntimeError('Expected float or int, got {!r} of type {}', value, type(value)) @@ -51,9 +49,9 @@ def _make_metric_from_spec(self, document, spec, tags): return {'type': spec['type'], 'name': name, 'value': value, 'tags': tags} - def _make_metrics_from_enumeration(self, document, enumeration, tags): - # type: (Any, Enumeration, List[str]) -> Iterator[Metric] - logger.debug('make_metrics_from_enumeration enumeration=%r', enumeration) + def _make_metrics_from_enumeration(self, document, enumeration, tags, logger): + # type: (Any, Enumeration, List[str], CheckLoggingAdapter) -> Iterator[Metric] + logger.trace('make_metrics_from_enumeration enumeration=%r', enumeration) values = lookup_dotted(document, path=enumeration['path']) # type: Sequence @@ -67,11 +65,11 @@ def _make_metrics_from_enumeration(self, document, enumeration, tags): 'path': spec['path'], 'modifier': spec.get('modifier'), } - yield self._make_metric_from_spec(value, spec, tags=item_tags) + yield self._make_metric_from_spec(value, spec, tags=item_tags, logger=logger) - def _make_metrics_from_group(self, document, group, tags): - # type: (Any, Group, List[str]) -> Iterator[Metric] - logger.debug('make_metrics_from_group group=%r', group) + def _make_metrics_from_group(self, document, group, tags, logger): + # type: (Any, Group, List[str], CheckLoggingAdapter) -> Iterator[Metric] + logger.trace('make_metrics_from_group group=%r', group) mapping = lookup_dotted(document, path=group['path']) # type: Mapping @@ -82,11 +80,11 @@ def _make_metrics_from_group(self, document, group, tags): 'name': group['path'], 'path': key, } # type: MetricSpec - yield self._make_metric_from_spec(mapping, spec, tags=item_tags) + yield self._make_metric_from_spec(mapping, spec, tags=item_tags, logger=logger) - def _modify(self, value, modifier): - # type: (Any, Modifier) -> float - logger.debug('modify value=%r modifier=%r', value, modifier) + def _modify(self, value, modifier, logger): + # type: (Any, Modifier, CheckLoggingAdapter) -> float + logger.trace('modify value=%r modifier=%r', value, modifier) if modifier == 'total': return len(value) @@ -101,18 +99,20 @@ def _modify(self, value, modifier): def run(self, *args, **kwargs): # type: (*Any, **Any) -> Iterator[Metric] + logger = kwargs.pop('logger') # type: CheckLoggingAdapter + logger.debug('query_%s', self.name) for document, tags in self.source(*args, **kwargs): logger.debug('%s %r', self.name, document) for spec in self.metrics: - yield self._make_metric_from_spec(document, spec, tags=tags) + yield self._make_metric_from_spec(document, spec, tags=tags, logger=logger) for enumeration in self.enumerations: - for metric in self._make_metrics_from_enumeration(document, enumeration, tags=tags): + for metric in self._make_metrics_from_enumeration(document, enumeration, tags=tags, logger=logger): yield metric for group in self.groups: - for metric in self._make_metrics_from_group(document, group, tags=tags): + for metric in self._make_metrics_from_group(document, group, tags=tags, logger=logger): yield metric diff --git a/rethinkdb/tests/unit/test_system_jobs_metrics.py b/rethinkdb/tests/unit/test_system_jobs_metrics.py index 704204bd39fc7..4b9ae9d35da2b 100644 --- a/rethinkdb/tests/unit/test_system_jobs_metrics.py +++ b/rethinkdb/tests/unit/test_system_jobs_metrics.py @@ -1,6 +1,8 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import logging + import mock import pytest @@ -10,6 +12,11 @@ pytestmark = pytest.mark.unit +class MockLogger(logging.Logger): + def trace(self, *args, **kwargs): # type: ignore + pass # Called by queries. + + def test_jobs_metrics(): # type: () -> None """ @@ -70,7 +77,7 @@ def test_jobs_metrics(): conn = mock.Mock() with mock.patch('rethinkdb.ast.RqlQuery.run') as run: run.return_value = mock_rows - metrics = list(queries.system_jobs.run(conn)) + metrics = list(queries.system_jobs.run(conn, logger=MockLogger('test'))) assert metrics == [ # -- `query` job ignored -- @@ -83,7 +90,6 @@ def test_jobs_metrics(): 'job_type:backfill', 'server:server0', 'server:server2', - 'job_type:backfill', 'database:doghouse', 'destination_server:server2', 'source_server:server0', @@ -116,4 +122,4 @@ def test_unknown_job(): with mock.patch('rethinkdb.ast.RqlQuery.run') as run: run.return_value = [mock_unknown_job_row] with pytest.raises(RuntimeError): - list(queries.system_jobs.run(conn)) + list(queries.system_jobs.run(conn, logger=MockLogger('test'))) From 177065e799b1e8821c90142b03887e0b917a487c Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 17:17:12 +0100 Subject: [PATCH 123/147] Update metadata.csv --- rethinkdb/metadata.csv | 78 ++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/rethinkdb/metadata.csv b/rethinkdb/metadata.csv index 011659d32d496..658f19c83378d 100644 --- a/rethinkdb/metadata.csv +++ b/rethinkdb/metadata.csv @@ -1,44 +1,40 @@ metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name -rethinkdb.server.total,gauge,,node,,Number of connected servers in the cluster.,0,rethinkdb,Server total -rethinkdb.database.total,gauge,,,,Number of databases in the cluster.,0,rethinkdb,Database total -rethinkdb.database.table.total,gauge,,table,,Number of tables in a given database.,0,rethinkdb,Database table total -rethinkdb.table.secondary_index.total,gauge,,index,,Number of secondary indexes in a given table.,0,rethinkdb,Table secondary index total -rethinkdb.stats.cluster.queries_per_sec,gauge,,query,,Number of queries executed in a cluster per second.,0,rethinkdb,Stats cluster queries per sec -rethinkdb.stats.cluster.read_docs_per_sec,gauge,,document,,Number of documents read in a cluster per second.,0,rethinkdb,Stats cluster read docs per sec -rethinkdb.stats.cluster.written_docs_per_sec,gauge,,document,,Number of documents written in a cluster per second.,0,rethinkdb,Stats cluster written docs per sec -rethinkdb.stats.server.queries_per_sec,gauge,,query,,Number of queries executed on a server per second.,0,rethinkdb,Stats server queries per sec -rethinkdb.stats.server.queries_total,count,,query,,Total number of queries executed on a server.,0,rethinkdb,Stats server queries total -rethinkdb.stats.server.read_docs_per_sec,gauge,,document,,Number of documents read from a server per second.,0,rethinkdb,Stats server read docs per sec -rethinkdb.stats.server.read_docs_total,count,,document,,Total number of documents read from a server.,0,rethinkdb,Stats server read docs total -rethinkdb.stats.server.written_docs_per_sec,gauge,,document,,Number of documents written to a server per second.,0,rethinkdb,Stats server written docs per sec -rethinkdb.stats.server.written_docs_total,count,,document,,Total number of documents written to a server.,0,rethinkdb,Stats server written docs total -rethinkdb.stats.server.client_connections,gauge,,connection,,Current number of client connections to a server.,0,rethinkdb,Stats server client connections -rethinkdb.stats.server.clients_active,gauge,,host,,Current number of clients actively connected to a server.,0,rethinkdb,Stats server clients active -rethinkdb.stats.table.read_docs_per_sec,gauge,,document,,Number of documents read from a table per second.,0,rethinkdb,Stats table read docs per sec -rethinkdb.stats.table.written_docs_per_sec,gauge,,document,,Number of documents written to a table per second.,0,rethinkdb,Stats table written docs per sec -rethinkdb.stats.table_server.read_docs_per_sec,gauge,,document,,Number of documents read from a replica per second.,0,rethinkdb,Stats table server read docs per sec -rethinkdb.stats.table_server.read_docs_total,count,,document,,Total number of documents read from a replica.,0,rethinkdb,Stats table server read docs total -rethinkdb.stats.table_server.written_docs_per_sec,gauge,,document,,Number of documents written to a replica per second.,0,rethinkdb,Stats table server written docs per sec -rethinkdb.stats.table_server.written_docs_total,count,,document,,Total number of documents written to a replica.,0,rethinkdb,Stats table server written docs total -rethinkdb.stats.table_server.cache.in_use_bytes,gauge,,byte,,Current amount of memory used by the cache on a replica.,0,rethinkdb,Stats table server cache in use bytes -rethinkdb.stats.table_server.disk.read_bytes_per_sec,gauge,,byte,,Number of bytes read from the disk of a replica per second.,0,rethinkdb,Stats table server disk read bytes per sec -rethinkdb.stats.table_server.disk.read_bytes_total,count,,byte,,Total number of bytes read from the disk of a replica.,0,rethinkdb,Stats table server disk read bytes total -rethinkdb.stats.table_server.disk.written_bytes_per_sec,gauge,,byte,,Number of bytes written to the disk of a replica per second.,0,rethinkdb,Stats table server disk written bytes per sec -rethinkdb.stats.table_server.disk.written_bytes_total,count,,byte,,Total number of bytes written to the disk of a replica.,0,rethinkdb,Stats table server disk written bytes total -rethinkdb.stats.table_server.disk.metadata_bytes,gauge,,byte,,Current disk space used by metadata on a replica.,0,rethinkdb,Stats table server disk metadata bytes -rethinkdb.stats.table_server.disk.data_bytes,gauge,,byte,,Current disk space used by data on a replica.,0,rethinkdb,Stats table server disk data bytes -rethinkdb.stats.table_server.disk.garbage_bytes,gauge,,byte,,Current disk space used by the garbage collector on a replica.,0,rethinkdb,Stats table server disk garbage bytes -rethinkdb.stats.table_server.disk.preallocated_bytes,gauge,,byte,,Current disk space preallocated on a replica.,0,rethinkdb,Stats table server disk preallocated bytes -rethinkdb.table_status.shards.total,gauge,,shard,,Total number of shards for a table.,0,rethinkdb,Table status shards total -rethinkdb.table_status.shards.replicas.total,gauge,,node,,Total number of replicas for a table shard.,0,rethinkdb,Table status shards replicas total -rethinkdb.table_status.shards.replicas.primary.total,gauge,,node,,Total number of primary replicas for a table shard.,0,rethinkdb,Table status shards replicas primary total +rethinkdb.config.servers,gauge,,node,,Number of connected servers in the cluster.,0,rethinkdb,Config servers +rethinkdb.config.databases,gauge,,,,Number of databases in the cluster.,0,rethinkdb,Config databases +rethinkdb.config.tables_per_database,gauge,,table,,Number of tables in a given database.,0,rethinkdb,Config tables per database +rethinkdb.config.secondary_indexes_per_table,gauge,,index,,Number of secondary indexes in a given table.,0,rethinkdb,Config secondary indexes per table +rethinkdb.stats.cluster.query_engine.queries_per_sec,gauge,,query,second,Number of queries executed in a cluster per second.,0,rethinkdb,Stats cluster query engine queries per sec +rethinkdb.stats.cluster.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read in a cluster per second.,0,rethinkdb,Stats cluster query engine read docs per sec +rethinkdb.stats.cluster.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written in a cluster per second.,0,rethinkdb,Stats cluster query engine written docs per sec +rethinkdb.stats.server.query_engine.queries_per_sec,gauge,,query,second,Number of queries executed on a server per second.,0,rethinkdb,Stats server query engine queries per sec +rethinkdb.stats.server.query_engine.queries_total,count,,query,,Total number of queries executed on a server.,0,rethinkdb,Stats server query engine queries total +rethinkdb.stats.server.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read from a server per second.,0,rethinkdb,Stats server query engine read docs per sec +rethinkdb.stats.server.query_engine.read_docs_total,count,,document,,Total number of documents read from a server.,0,rethinkdb,Stats server query engine read docs total +rethinkdb.stats.server.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written to a server per second.,0,rethinkdb,Stats server query engine written docs per sec +rethinkdb.stats.server.query_engine.written_docs_total,count,,document,,Total number of documents written to a server.,0,rethinkdb,Stats server query engine written docs total +rethinkdb.stats.server.query_engine.client_connections,gauge,,connection,,Current number of client connections to a server.,0,rethinkdb,Stats server query engine client connections +rethinkdb.stats.server.query_engine.clients_active,gauge,,host,,Current number of clients actively connected to a server.,0,rethinkdb,Stats server query engine clients active +rethinkdb.stats.table.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read from a table per second.,0,rethinkdb,Stats table query engine read docs per sec +rethinkdb.stats.table.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written to a table per second.,0,rethinkdb,Stats table query engine written docs per sec +rethinkdb.stats.table_server.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read from a replica per second.,0,rethinkdb,Stats table server query engine read docs per sec +rethinkdb.stats.table_server.query_engine.read_docs_total,count,,document,,Total number of documents read from a replica.,0,rethinkdb,Stats table server query engine read docs total +rethinkdb.stats.table_server.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written to a replica per second.,0,rethinkdb,Stats table server query engine written docs per sec +rethinkdb.stats.table_server.query_engine.written_docs_total,count,,document,,Total number of documents written to a replica.,0,rethinkdb,Stats table server query engine written docs total +rethinkdb.stats.table_server.storage_engine.cache.in_use_bytes,gauge,,byte,,Current amount of memory used by the cache on a replica.,0,rethinkdb,Stats table server storage engine cache in use bytes +rethinkdb.stats.table_server.storage_engine.disk.read_bytes_per_sec,gauge,,byte,second,Number of bytes read from the disk of a replica per second.,0,rethinkdb,Stats table server storage engine disk read bytes per sec +rethinkdb.stats.table_server.storage_engine.disk.read_bytes_total,count,,byte,,Total number of bytes read from the disk of a replica.,0,rethinkdb,Stats table server storage engine disk read bytes total +rethinkdb.stats.table_server.storage_engine.disk.written_bytes_per_sec,gauge,,byte,second,Number of bytes written to the disk of a replica per second.,0,rethinkdb,Stats table server storage engine disk written bytes per sec +rethinkdb.stats.table_server.storage_engine.disk.written_bytes_total,count,,byte,,Total number of bytes written to the disk of a replica.,0,rethinkdb,Stats table server storage engine disk written bytes total +rethinkdb.stats.table_server.storage_engine.disk.space_usage.metadata_bytes,gauge,,byte,,Current disk space used by metadata on a replica.,0,rethinkdb,Stats table server storage engine disk space usage metadata bytes +rethinkdb.stats.table_server.storage_engine.disk.space_usage.data_bytes,gauge,,byte,,Current disk space used by data on a replica.,0,rethinkdb,Stats table server storage engine disk space usage data bytes +rethinkdb.stats.table_server.storage_engine.disk.space_usage.garbage_bytes,gauge,,byte,,Current disk space used by the garbage collector on a replica.,0,rethinkdb,Stats table server storage engine disk space usage garbage bytes +rethinkdb.stats.table_server.storage_engine.disk.space_usage.preallocated_bytes,gauge,,byte,,Current disk space preallocated on a replica.,0,rethinkdb,Stats table server storage engine disk space usage preallocated bytes +rethinkdb.table_status.shards,gauge,,shard,,Total number of shards for a table.,0,rethinkdb,Table status shards +rethinkdb.table_status.shards.replicas,gauge,,node,,Total number of replicas for a table shard.,0,rethinkdb,Table status shards replicas +rethinkdb.table_status.shards.primary_replicas,gauge,,node,,Total number of primary replicas for a table shard.,0,rethinkdb,Table status shards primary replicas rethinkdb.server_status.network.time_connected,gauge,,second,,Current total time a server has been connected to the network.,0,rethinkdb,Server status network time connected -rethinkdb.server_status.network.connected_to.total,gauge,,node,,Number of other RethinkDB servers a server is currently connected to.,0,rethinkdb,Server status network connected to total -rethinkdb.server_status.network.connected_to.pending.total,gauge,,node,,Number of other RethinkDB servers a server knows about but is not currently connected to.,0,rethinkdb,Server status network connected to pending total +rethinkdb.server_status.network.connected_to,gauge,,node,,Number of other RethinkDB servers a server is currently connected to.,0,rethinkdb,Server status network connected to rethinkdb.server_status.process.time_started,gauge,,second,,Time when the RethinkDB server process started.,0,rethinkdb,Server status process time started -rethinkdb.jobs.index_construction.duration,gauge,,second,,Duration of a task that constructs secondary indexes in the background.,0,rethinkdb,Jobs index construction duration -rethinkdb.jobs.index_construction.progress,gauge,,percent,,Progress of a task that constructs secondary indexes in the background.,0,rethinkdb,Jobs index construction progress -rethinkdb.jobs.backfill.duration,gauge,,second,,Duration of a task that brings out of date shards up to date (known as backfilling).,0,rethinkdb,Jobs backfill duration -rethinkdb.jobs.backfill.progress,gauge,,percent,,Progress of a task that brings out of date shards up to date (known as backfilling).,0,rethinkdb,Jobs backfill progress -rethinkdb.current_issues.total,gauge,,,,Total number of current issues of a given issue_type.,0,rethinkdb,Current issues total -rethinkdb.current_issues.critical.total,gauge,,,,Total number of critical current issues of a given issue_type.,0,rethinkdb,Current issues critical total +rethinkdb.jobs.duration,gauge,,second,,"Duration of a currently running system job, tagged with the job_type.",0,rethinkdb,Jobs duration +rethinkdb.current_issues.issues,gauge,,,,Total number of current issues of a given issue_type.,0,rethinkdb,Current issues issues +rethinkdb.current_issues.critical_issues,gauge,,,,Total number of critical current issues of a given issue_type.,0,rethinkdb,Current issues critical issues From 173658e6192796fdbb49fb8fc6816dff02b50025 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 17:43:36 +0100 Subject: [PATCH 124/147] Fix typo in `to_timestamp` util --- rethinkdb/datadog_checks/rethinkdb/document_db/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py index 98fd0be566800..f87e05369d363 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py @@ -47,4 +47,4 @@ def to_timestamp(datetime): return datetime.timestamp() # type: ignore # (mypy runs in `--py2` mode.) except AttributeError: # pragma: no cover # Python 2. - return time.mktime(datetime.now().timetuple()) + return time.mktime(datetime.timetuple()) From 92ad5bd25c10a40f2bba020fdaa3578e49ef1db0 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 18:37:45 +0100 Subject: [PATCH 125/147] Add unit tests for `document_db` helpers --- .../rethinkdb/document_db/query.py | 4 +- .../rethinkdb/document_db/utils.py | 24 +-- rethinkdb/datadog_checks/rethinkdb/queries.py | 18 +- rethinkdb/tests/test_rethinkdb.py | 4 +- .../tests/unit/document_db/test_query.py | 157 ++++++++++++++++++ .../tests/unit/document_db/test_utils.py | 51 ++++++ 6 files changed, 234 insertions(+), 24 deletions(-) create mode 100644 rethinkdb/tests/unit/document_db/test_query.py create mode 100644 rethinkdb/tests/unit/document_db/test_utils.py diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py index 0e3fd7c2feeaa..b19a496cfbf29 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py @@ -45,7 +45,7 @@ def _make_metric_from_spec(self, document, spec, tags, logger): if not isinstance(value, (int, float)): # pragma: no cover raise RuntimeError('Expected float or int, got {!r} of type {}', value, type(value)) - name = dotted_join(('rethinkdb', self.prefix, name)) + name = dotted_join((self.prefix, name)) return {'type': spec['type'], 'name': name, 'value': value, 'tags': tags} @@ -61,7 +61,7 @@ def _make_metrics_from_enumeration(self, document, enumeration, tags, logger): for spec in enumeration['metrics']: spec = { 'type': spec['type'], - 'name': dotted_join((enumeration['path'], spec['path']), drop_empty=True), + 'name': dotted_join((enumeration['path'], spec['path'])), 'path': spec['path'], 'modifier': spec.get('modifier'), } diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py index f87e05369d363..6355143e4a957 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py @@ -13,32 +13,34 @@ def lookup_dotted(dct, path): # type: (Mapping, str) -> Any """ Given a mapping and a dotted path `key1.key2...keyN`, return the item at `dct[key1][key2]...[keyN]`. + + Raises `ValueError` if an issue is encountered while traversing `path`. """ - keys = [key for key in reversed(path.split('.'))] + if not path: + return dct + keys = [key for key in reversed(path.split('.'))] value = dct while keys: - if not isinstance(value, Mapping): # pragma: no cover - raise RuntimeError( - 'followed path {!r} with remaining keys {!r}, but value {!r} is not a mapping'.format(path, value, keys) + if not isinstance(value, Mapping): + raise ValueError( + 'followed path {!r} with remaining keys {!r}, but value {!r} is not a mapping'.format(path, keys, value) ) key = keys.pop() try: value = value[key] - except KeyError as exc: # pragma: no cover - raise RuntimeError('Failed to retrieve key {!r} on value {!r}: {!r}'.format(key, value, exc)) + except KeyError as exc: + raise ValueError('Failed to retrieve key {!r} on value {!r}: {!r}'.format(key, value, exc)) return value -def dotted_join(values, drop_empty=False): - # type: (Sequence[str], bool) -> str - if drop_empty: - values = [value for value in values if value] - return '.'.join(values) +def dotted_join(values): + # type: (Sequence[str]) -> str + return '.'.join(filter(None, values)) def to_timestamp(datetime): diff --git a/rethinkdb/datadog_checks/rethinkdb/queries.py b/rethinkdb/datadog_checks/rethinkdb/queries.py index b64cdffa9d6df..9d10949f26d9a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/queries.py @@ -10,7 +10,7 @@ config_summary = DocumentQuery( source=operations.get_config_summary, name='config_summary', - prefix='config', + prefix='rethinkdb.config', metrics=[{'type': 'gauge', 'path': 'servers'}, {'type': 'gauge', 'path': 'databases'}], groups=[ {'path': 'tables_per_database', 'key_tag': 'database', 'value_metric_type': 'gauge'}, @@ -25,7 +25,7 @@ cluster_statistics = DocumentQuery( source=operations.get_cluster_statistics, name='cluster_statistics', - prefix='stats.cluster', + prefix='rethinkdb.stats.cluster', metrics=[ {'type': 'gauge', 'path': 'query_engine.queries_per_sec'}, {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, @@ -37,7 +37,7 @@ server_statistics = DocumentQuery( source=operations.get_servers_statistics, name='server_statistics', - prefix='stats.server', + prefix='rethinkdb.stats.server', metrics=[ {'type': 'gauge', 'path': 'query_engine.client_connections'}, {'type': 'gauge', 'path': 'query_engine.clients_active'}, @@ -54,7 +54,7 @@ table_statistics = DocumentQuery( source=operations.get_tables_statistics, name='table_statistics', - prefix='stats.table', + prefix='rethinkdb.stats.table', metrics=[ {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, {'type': 'gauge', 'path': 'query_engine.written_docs_per_sec'}, @@ -65,7 +65,7 @@ replica_statistics = DocumentQuery( source=operations.get_replicas_statistics, name='replica_statistics', - prefix='stats.table_server', + prefix='rethinkdb.stats.table_server', metrics=[ {'type': 'gauge', 'path': 'query_engine.read_docs_per_sec'}, {'type': 'monotonic_count', 'path': 'query_engine.read_docs_total'}, @@ -90,7 +90,7 @@ table_statuses = DocumentQuery( source=operations.get_table_statuses, name='table_status', - prefix='table_status', + prefix='rethinkdb.table_status', metrics=[ {'type': 'service_check', 'path': 'status.ready_for_outdated_reads', 'modifier': 'ok_warning'}, {'type': 'service_check', 'path': 'status.ready_for_reads', 'modifier': 'ok_warning'}, @@ -114,7 +114,7 @@ server_statuses = DocumentQuery( source=operations.get_server_statuses, name='server_status', - prefix='server_status', + prefix='rethinkdb.server_status', metrics=[ {'type': 'gauge', 'path': 'network.time_connected', 'modifier': 'timestamp'}, {'type': 'gauge', 'path': 'network.connected_to', 'modifier': 'total'}, @@ -129,7 +129,7 @@ system_jobs = DocumentQuery( source=operations.get_system_jobs, name='system_jobs', - prefix='jobs', + prefix='rethinkdb.jobs', metrics=[{'type': 'gauge', 'path': 'duration_sec'}], ) @@ -140,7 +140,7 @@ current_issues_summary = DocumentQuery( source=operations.get_current_issues_summary, name='current_issues', - prefix='current_issues', + prefix='rethinkdb.current_issues', groups=[ {'path': 'issues', 'key_tag': 'issue_type', 'value_metric_type': 'gauge'}, {'path': 'critical_issues', 'key_tag': 'issue_type', 'value_metric_type': 'gauge'}, diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index f1775420f5302..4c0e374f82a60 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -194,7 +194,7 @@ def test_metadata_version(instance, datadog_agent): datadog_agent.assert_metadata(check_id, version_metadata) -@pytest.mark.unit +@pytest.mark.integration @pytest.mark.parametrize('malformed_version_string', MALFORMED_VERSION_STRING_PARAMS) def test_metadata_version_malformed(instance, aggregator, datadog_agent, malformed_version_string): # type: (Instance, AggregatorStub, DatadogAgentStub, str) -> None @@ -218,7 +218,7 @@ def collect_connected_server_version(self, conn): datadog_agent.assert_metadata(check_id, {}) -@pytest.mark.unit +@pytest.mark.integration def test_metadata_version_failure(instance, aggregator, datadog_agent): # type: (Instance, AggregatorStub, DatadogAgentStub) -> None """ diff --git a/rethinkdb/tests/unit/document_db/test_query.py b/rethinkdb/tests/unit/document_db/test_query.py new file mode 100644 index 0000000000000..d9afeeaff6204 --- /dev/null +++ b/rethinkdb/tests/unit/document_db/test_query.py @@ -0,0 +1,157 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import logging +from collections import OrderedDict +from typing import Iterator, List, Tuple + +import pytest + +from datadog_checks.rethinkdb.document_db.query import DocumentQuery + +pytestmark = pytest.mark.unit + + +class MockLogger(logging.Logger): + def trace(self, *args, **kwargs): # type: ignore + pass # Called by queries. + + +def test_document_query(): + # type: () -> None + """ + A realistic unit test demonstrating the usage of `DocumentQuery`. + """ + + PRODUCTS_COLLECTION = [ + # Use OrderedDict's so that order of submitted metrics is deterministic on Python 2 too. + OrderedDict( + { + 'name': 'T-Shirt', + 'category': 'clothing', + 'sales': {'sales_per_day': 100, 'sales_total': 10000}, + 'locations': [{'name': 'London', 'stock': 1200}, {'name': 'Paris', 'stock': 700}], + 'total_sales_per_location': OrderedDict({'london': 2000, 'paris': 8000}), + } + ), + OrderedDict( + { + 'name': 'Laptop', + 'category': 'high-tech', + 'sales': {'sales_per_day': 5, 'sales_total': 400}, + 'locations': [{'name': 'New York', 'stock': 150}], + 'total_sales_per_location': {'new-york': 400}, + } + ), + ] + + def get_data_from_db(conn): + # type: (dict) -> Iterator[Tuple[dict, List[str]]] + for product in PRODUCTS_COLLECTION: + tags = ['category:{}'.format(product['category']), 'server:{}'.format(conn['server'])] + yield product, tags + + query = DocumentQuery( + source=get_data_from_db, + name='test', + prefix='products', + # Metrics obtained from a nested JSON key lookup (aka path lookup). + metrics=[ + {'type': 'gauge', 'path': 'sales.sales_per_day'}, + {'type': 'monotonic_count', 'path': 'sales.sales_total'}, + {'type': 'gauge', 'path': 'locations', 'modifier': 'total'}, + ], + # Metrics for each object in an array, tagged by the index in the array. + enumerations=[ + {'path': 'locations', 'index_tag': 'location_index', 'metrics': [{'type': 'gauge', 'path': 'stock'}]} + ], + # Metrics from the result of a groupby() operation (aggregation). + groups=[{'path': 'total_sales_per_location', 'key_tag': 'location', 'value_metric_type': 'gauge'}], + ) + + conn = {'server': 'example'} + metrics = list(query.run(conn, logger=MockLogger('test'))) + + assert metrics == [ + # -- T-Shirt -- + # Metrics + { + 'type': 'gauge', + 'name': 'products.sales.sales_per_day', + 'value': 100, + 'tags': ['category:clothing', 'server:example'], + }, + { + 'type': 'monotonic_count', + 'name': 'products.sales.sales_total', + 'value': 10000, + 'tags': ['category:clothing', 'server:example'], + }, + {'type': 'gauge', 'name': 'products.locations', 'value': 2, 'tags': ['category:clothing', 'server:example']}, + # Enumerations + { + 'type': 'gauge', + 'name': 'products.locations.stock', + 'value': 1200, + 'tags': ['category:clothing', 'server:example', 'location_index:0'], + }, + { + 'type': 'gauge', + 'name': 'products.locations.stock', + 'value': 700, + 'tags': ['category:clothing', 'server:example', 'location_index:1'], + }, + # Groups + { + 'type': 'gauge', + 'name': 'products.total_sales_per_location', + 'value': 2000, + 'tags': ['category:clothing', 'server:example', 'location:london'], + }, + { + 'type': 'gauge', + 'name': 'products.total_sales_per_location', + 'value': 8000, + 'tags': ['category:clothing', 'server:example', 'location:paris'], + }, + # -- Laptop -- + # Metrics + { + 'type': 'gauge', + 'name': 'products.sales.sales_per_day', + 'value': 5, + 'tags': ['category:high-tech', 'server:example'], + }, + { + 'type': 'monotonic_count', + 'name': 'products.sales.sales_total', + 'value': 400, + 'tags': ['category:high-tech', 'server:example'], + }, + {'type': 'gauge', 'name': 'products.locations', 'value': 1, 'tags': ['category:high-tech', 'server:example']}, + # Enumerations + { + 'type': 'gauge', + 'name': 'products.locations.stock', + 'value': 150, + 'tags': ['category:high-tech', 'server:example', 'location_index:0'], + }, + # Groups + { + 'type': 'gauge', + 'name': 'products.total_sales_per_location', + 'value': 400, + 'tags': ['category:high-tech', 'server:example', 'location:new-york'], + }, + ] + + +def test_document_query_empty(): + # type: () -> None + def get_data(): + # type: () -> Iterator[Tuple[dict, List[str]]] + yield {}, [] + + query = DocumentQuery(source=get_data, name='test', prefix='dogs') + metrics = list(query.run(logger=MockLogger('test'))) + assert metrics == [] diff --git a/rethinkdb/tests/unit/document_db/test_utils.py b/rethinkdb/tests/unit/document_db/test_utils.py new file mode 100644 index 0000000000000..7d02a9b82fad6 --- /dev/null +++ b/rethinkdb/tests/unit/document_db/test_utils.py @@ -0,0 +1,51 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import datetime as dt + +import pytest + +from datadog_checks.rethinkdb.document_db.utils import dotted_join, lookup_dotted, to_timestamp + +pytestmark = pytest.mark.unit + + +def test_dotted_join(): + # type: () -> None + assert dotted_join(()) == '' + assert dotted_join(('foo',)) == 'foo' + assert dotted_join(('foo', 'bar')) == 'foo.bar' + assert dotted_join(('foo', 'bar', 'baz')) == 'foo.bar.baz' + assert dotted_join(('foo', 'bar', '')) == 'foo.bar' + assert dotted_join(('foo', '', 'baz')) == 'foo.baz' + assert dotted_join(('', 'bar', 'baz')) == 'bar.baz' + + +def test_to_timestamp(): + # type: () -> None + datetime = dt.datetime(year=2020, month=1, day=1, hour=3, minute=45, second=0) + assert to_timestamp(datetime) == 1577846700.0 + + +def test_lookup_dotted(): + # type: () -> None + assert lookup_dotted({}, '') == {} + assert lookup_dotted({'tables': 10}, 'tables') == 10 + assert lookup_dotted({'tables': {'reads_per_sec': 500}}, 'tables.reads_per_sec') == 500 + assert lookup_dotted({'tables': {'all': ['heroes']}}, 'tables.all') == ['heroes'] + + with pytest.raises(ValueError): + lookup_dotted([], 'test') # type: ignore + + with pytest.raises(ValueError): + lookup_dotted(True, 'test') # type: ignore + + with pytest.raises(ValueError): + lookup_dotted({'tables': 10}, 'tables.total') + + with pytest.raises(ValueError): + lookup_dotted({'tables': {'total': 10}}, 'tables.unknown') + + with pytest.raises(ValueError): + # Dotted keys are not supported. + lookup_dotted({'tables.total': 10}, 'tables.total') From 0ccb9ce30e08050f424b1317bece5bc630d40272 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 19 Mar 2020 19:09:31 +0100 Subject: [PATCH 126/147] Address feedback (part 1) --- .../datadog_checks/dev/docker.py | 8 ++--- rethinkdb/assets/configuration/spec.yaml | 7 ++--- rethinkdb/datadog_checks/rethinkdb/check.py | 25 +++++----------- .../rethinkdb/data/conf.yaml.example | 9 +++--- .../rethinkdb/document_db/query.py | 6 ++-- .../rethinkdb/document_db/types.py | 2 +- .../rethinkdb/document_db/utils.py | 9 ++---- rethinkdb/datadog_checks/rethinkdb/queries.py | 4 +-- rethinkdb/tests/cluster.py | 9 ++---- rethinkdb/tests/conftest.py | 3 +- .../tests/unit/document_db/test_query.py | 30 +++++++++---------- .../tests/unit/document_db/test_utils.py | 9 +++--- 12 files changed, 48 insertions(+), 73 deletions(-) diff --git a/datadog_checks_dev/datadog_checks/dev/docker.py b/datadog_checks_dev/datadog_checks/dev/docker.py index 04990ba410699..46e375c952f8f 100644 --- a/datadog_checks_dev/datadog_checks/dev/docker.py +++ b/datadog_checks_dev/datadog_checks/dev/docker.py @@ -253,9 +253,5 @@ def _read_example_logs_config(check_root): def temporarily_stop_service(service, compose_file, check=True): # type: (str, str, bool) -> Iterator[None] run_command(['docker-compose', '-f', compose_file, 'stop', service], capture=False, check=check) - try: - yield - except Exception: - raise - else: - run_command(['docker-compose', '-f', compose_file, 'start', service], capture=False, check=check) + yield + run_command(['docker-compose', '-f', compose_file, 'start', service], capture=False, check=check) diff --git a/rethinkdb/assets/configuration/spec.yaml b/rethinkdb/assets/configuration/spec.yaml index 3dc0a163b8df4..ecc79d4629ccb 100644 --- a/rethinkdb/assets/configuration/spec.yaml +++ b/rethinkdb/assets/configuration/spec.yaml @@ -11,33 +11,30 @@ files: - template: instances options: - name: host - required: false + required: true description: Host of the RethinkDB server. value: example: localhost type: string - name: port - required: false + required: true description: Driver port of the RethinkDB server. value: example: 28015 type: integer - name: username - required: false description: The user account to connect as. value: type: string - name: password - required: false description: The password for the user account to connect as. value: type: string - name: tls_ca_cert - required: false description: | Path to a TLS client certificate to use when connecting to the RethinkDB server. See also: https://rethinkdb.com/docs/security/#using-tls diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 38cb002c08c2d..572d99fa3751b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -15,14 +15,14 @@ from .types import Instance from .version import parse_version -SERVICE_CHECK_CONNECT = 'rethinkdb.can_connect' - class RethinkDBCheck(AgentCheck): """ Collect metrics from a RethinkDB cluster. """ + SERVICE_CHECK_CONNECT = 'rethinkdb.can_connect' + def __init__(self, *args, **kwargs): # type: (*Any, **Any) -> None super(RethinkDBCheck, self).__init__(*args, **kwargs) @@ -59,15 +59,15 @@ def connect_submitting_service_checks(self): except rethinkdb.errors.ReqlDriverError as exc: message = 'Could not connect to RethinkDB server: {!r}'.format(exc) self.log.error(message) - self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) + self.service_check(self.SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) raise except Exception as exc: message = 'Unexpected error while executing RethinkDB check: {!r}'.format(exc) self.log.error(message) - self.service_check(SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) + self.service_check(self.SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) raise else: - self.service_check(SERVICE_CHECK_CONNECT, self.OK, tags=tags) + self.service_check(self.SERVICE_CHECK_CONNECT, self.OK, tags=tags) def collect_metrics(self, conn): # type: (rethinkdb.net.Connection) -> Iterator[Metric] @@ -88,29 +88,20 @@ def collect_connected_server_version(self, conn): def submit_metric(self, metric): # type: (Metric) -> None - metric_type = metric['type'] - name = metric['name'] - value = metric['value'] - tags = self.config.tags + metric['tags'] - - self.log.debug('submit_metric type=%r name=%r value=%r tags=%r', metric_type, name, value, tags) - - submit = getattr(self, metric_type) # type: Callable - submit(name, value, tags=tags) + submit = getattr(self, metric['type']) # type: Callable + submit(metric['name'], metric['value'], tags=self.config.tags + metric['tags']) def submit_version_metadata(self, conn): # type: (rethinkdb.net.Connection) -> None try: version = self.collect_connected_server_version(conn) except ValueError as exc: - self.log.error(exc) + self.log.error('Error collecting version metadata: %r', exc) else: self.set_metadata('version', version) def check(self, instance): # type: (Any) -> None - self.log.debug('check config=%r', self.config) - with self.connect_submitting_service_checks() as conn: for metric in self.collect_metrics(conn): self.submit_metric(metric) diff --git a/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example index 658f54ef38152..bfa0b51a582bd 100644 --- a/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example +++ b/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example @@ -13,16 +13,15 @@ init_config: # instances: - - - ## @param host - string - optional - default: localhost + ## @param host - string - required ## Host of the RethinkDB server. # - # host: localhost + - host: localhost - ## @param port - integer - optional - default: 28015 + ## @param port - integer - required ## Driver port of the RethinkDB server. # - # port: 28015 + port: 28015 ## @param username - string - optional ## The user account to connect as. diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py index b19a496cfbf29..c0aff2034529e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py @@ -7,7 +7,7 @@ from datadog_checks.base.log import CheckLoggingAdapter from .types import Enumeration, Group, Metric, MetricSpec, Modifier -from .utils import dotted_join, lookup_dotted, to_timestamp +from .utils import dotted_join, lookup_dotted, to_time_elapsed class DocumentQuery(object): @@ -92,8 +92,8 @@ def _modify(self, value, modifier, logger): if modifier == 'ok_warning': return AgentCheck.OK if value else AgentCheck.WARNING - if modifier == 'timestamp': - return to_timestamp(value) + if modifier == 'time_elapsed': + return to_time_elapsed(value) raise RuntimeError('Unknown modifier: {!r}'.format(modifier)) # pragma: no cover diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/types.py b/rethinkdb/datadog_checks/rethinkdb/document_db/types.py index 40759cf37840f..e48aa74321de1 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/types.py @@ -6,7 +6,7 @@ MetricType = Literal['gauge', 'count', 'monotonic_count', 'rate', 'service_check'] Metric = TypedDict('Metric', {'type': MetricType, 'name': str, 'value': float, 'tags': List[str]}) -ModifierName = Literal['total', 'ok_warning', 'timestamp'] +ModifierName = Literal['total', 'ok_warning', 'time_elapsed'] TotalModifier = TypedDict('TotalModifier', {'name': Literal['total'], 'map': Callable[[Any], Sequence]}) Modifier = Union[ModifierName, TotalModifier] diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py index 6355143e4a957..6411c90f26ed7 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py @@ -5,7 +5,6 @@ Miscellaneous utilities. """ import datetime as dt -import time from typing import Any, Mapping, Sequence @@ -43,10 +42,6 @@ def dotted_join(values): return '.'.join(filter(None, values)) -def to_timestamp(datetime): +def to_time_elapsed(datetime): # type: (dt.datetime) -> float - try: - return datetime.timestamp() # type: ignore # (mypy runs in `--py2` mode.) - except AttributeError: # pragma: no cover - # Python 2. - return time.mktime(datetime.timetuple()) + return (dt.datetime.now(datetime.tzinfo) - datetime).total_seconds() diff --git a/rethinkdb/datadog_checks/rethinkdb/queries.py b/rethinkdb/datadog_checks/rethinkdb/queries.py index 9d10949f26d9a..c7675a79e74ec 100644 --- a/rethinkdb/datadog_checks/rethinkdb/queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/queries.py @@ -116,9 +116,9 @@ name='server_status', prefix='rethinkdb.server_status', metrics=[ - {'type': 'gauge', 'path': 'network.time_connected', 'modifier': 'timestamp'}, + {'type': 'gauge', 'path': 'network.time_connected', 'modifier': 'time_elapsed'}, {'type': 'gauge', 'path': 'network.connected_to', 'modifier': 'total'}, - {'type': 'gauge', 'path': 'process.time_started', 'modifier': 'timestamp'}, + {'type': 'gauge', 'path': 'process.time_started', 'modifier': 'time_elapsed'}, ], ) diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index a9cb3c2bef8c2..693b357542f79 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -10,13 +10,11 @@ from datadog_checks.dev.conditions import WaitFor from datadog_checks.dev.docker import temporarily_stop_service -from datadog_checks.dev.structures import EnvVars from .common import ( AGENT_PASSWORD, AGENT_USER, CLIENT_USER, - COMPOSE_ENV_VARS, COMPOSE_FILE, DATABASE, HEROES_TABLE, @@ -119,11 +117,10 @@ def _server_reconnected(conn): return _server_exists(conn) and _leader_election_done(conn) with temporarily_stop_service(service, compose_file=COMPOSE_FILE): - with EnvVars(COMPOSE_ENV_VARS): - with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: - WaitFor(lambda: _server_disconnected(conn))() + with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: + WaitFor(lambda: _server_disconnected(conn))() - yield + yield with r.connect(host=HOST, port=SERVER_PORTS['server0']) as conn: WaitFor(lambda: _server_reconnected(conn))() diff --git a/rethinkdb/tests/conftest.py b/rethinkdb/tests/conftest.py index 60301b0117224..a5869402ca66e 100644 --- a/rethinkdb/tests/conftest.py +++ b/rethinkdb/tests/conftest.py @@ -45,5 +45,4 @@ def dd_environment(instance): conditions = [wait_servers_ready, setup_cluster] with docker_run(COMPOSE_FILE, conditions=conditions, env_vars=COMPOSE_ENV_VARS): - config = {'instances': [instance]} - yield config + yield instance diff --git a/rethinkdb/tests/unit/document_db/test_query.py b/rethinkdb/tests/unit/document_db/test_query.py index d9afeeaff6204..1826d4afdccd6 100644 --- a/rethinkdb/tests/unit/document_db/test_query.py +++ b/rethinkdb/tests/unit/document_db/test_query.py @@ -24,24 +24,24 @@ def test_document_query(): """ PRODUCTS_COLLECTION = [ - # Use OrderedDict's so that order of submitted metrics is deterministic on Python 2 too. + # NOTE: use ordered dicts so that order of submitted metrics is deterministic on Python 2 too. OrderedDict( - { - 'name': 'T-Shirt', - 'category': 'clothing', - 'sales': {'sales_per_day': 100, 'sales_total': 10000}, - 'locations': [{'name': 'London', 'stock': 1200}, {'name': 'Paris', 'stock': 700}], - 'total_sales_per_location': OrderedDict({'london': 2000, 'paris': 8000}), - } + ( + ('name', 'T-Shirt'), + ('category', 'clothing'), + ('sales', {'sales_per_day': 100, 'sales_total': 10000}), + ('locations', [{'name': 'London', 'stock': 1200}, {'name': 'Paris', 'stock': 700}]), + ('total_sales_per_location', OrderedDict((('london', 2000), ('paris', 8000)))), + ), ), OrderedDict( - { - 'name': 'Laptop', - 'category': 'high-tech', - 'sales': {'sales_per_day': 5, 'sales_total': 400}, - 'locations': [{'name': 'New York', 'stock': 150}], - 'total_sales_per_location': {'new-york': 400}, - } + ( + ('name', 'Laptop'), + ('category', 'high-tech'), + ('sales', {'sales_per_day': 5, 'sales_total': 400}), + ('locations', [{'name': 'New York', 'stock': 150}]), + ('total_sales_per_location', {'new-york': 400}), + ) ), ] diff --git a/rethinkdb/tests/unit/document_db/test_utils.py b/rethinkdb/tests/unit/document_db/test_utils.py index 7d02a9b82fad6..7acc623901ff2 100644 --- a/rethinkdb/tests/unit/document_db/test_utils.py +++ b/rethinkdb/tests/unit/document_db/test_utils.py @@ -4,8 +4,9 @@ import datetime as dt import pytest +import pytz -from datadog_checks.rethinkdb.document_db.utils import dotted_join, lookup_dotted, to_timestamp +from datadog_checks.rethinkdb.document_db.utils import dotted_join, lookup_dotted, to_time_elapsed pytestmark = pytest.mark.unit @@ -21,10 +22,10 @@ def test_dotted_join(): assert dotted_join(('', 'bar', 'baz')) == 'bar.baz' -def test_to_timestamp(): +def test_to_time_elapsed(): # type: () -> None - datetime = dt.datetime(year=2020, month=1, day=1, hour=3, minute=45, second=0) - assert to_timestamp(datetime) == 1577846700.0 + one_day_seconds = 3600 * 24 + to_time_elapsed(dt.datetime.now(pytz.utc) - dt.timedelta(days=1)) == one_day_seconds def test_lookup_dotted(): From 444df24073fb5ef6c85d5f23d9403386f8a2d322 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 20 Mar 2020 10:51:28 +0100 Subject: [PATCH 127/147] Let long-lived query jobs pass through --- rethinkdb/datadog_checks/rethinkdb/check.py | 2 +- rethinkdb/datadog_checks/rethinkdb/config.py | 16 ++++- .../datadog_checks/rethinkdb/operations.py | 59 ++++++++++--------- rethinkdb/datadog_checks/rethinkdb/types.py | 10 +++- rethinkdb/tests/unit/test_config.py | 25 ++++---- .../tests/unit/test_system_jobs_metrics.py | 35 +++++++++-- 6 files changed, 100 insertions(+), 47 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 572d99fa3751b..b91a36c33fc3e 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -75,7 +75,7 @@ def collect_metrics(self, conn): Collect metrics from the RethinkDB cluster we are connected to. """ for query in self.queries: - for metric in query.run(conn, logger=self.log): + for metric in query.run(conn, config=self.config, logger=self.log): yield metric def collect_connected_server_version(self, conn): diff --git a/rethinkdb/datadog_checks/rethinkdb/config.py b/rethinkdb/datadog_checks/rethinkdb/config.py index 1f872825f41be..529424e64200f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/config.py @@ -15,13 +15,17 @@ class Config(object): Encapsulates the validation of an `instance` dictionary while improving type information. """ - def __init__(self, instance): + def __init__(self, instance=None): # type: (Instance) -> None + if instance is None: + instance = {} + host = instance.get('host', 'localhost') port = instance.get('port', 28015) user = instance.get('username') password = instance.get('password') tls_ca_cert = instance.get('tls_ca_cert') + min_collection_interval = instance.get('min_collection_interval', 15) tags = instance.get('tags', []) if not isinstance(host, str): @@ -33,12 +37,22 @@ def __init__(self, instance): if port < 0: raise ConfigurationError('port must be positive (got {!r})'.format(port)) + try: + min_collection_interval = float(min_collection_interval) + except (ValueError, TypeError): + raise ConfigurationError( + 'min_collection_interval must be convertible to a number (got {!r})'.format( + type(min_collection_interval) + ) + ) + self.host = host # type: str self.port = port # type: int self.user = user # type: Optional[str] self.password = password # type: Optional[str] self.tls_ca_cert = tls_ca_cert # type: Optional[str] self.tags = tags # type: List[str] + self.min_collection_interval = min_collection_interval # type: float def __repr__(self): # type: () -> str diff --git a/rethinkdb/datadog_checks/rethinkdb/operations.py b/rethinkdb/datadog_checks/rethinkdb/operations.py index 83f5877ddaf1b..883cd898c5318 100644 --- a/rethinkdb/datadog_checks/rethinkdb/operations.py +++ b/rethinkdb/datadog_checks/rethinkdb/operations.py @@ -11,6 +11,7 @@ import rethinkdb +from .config import Config from .types import ( ClusterStats, ConfigSummary, @@ -36,8 +37,11 @@ system = r.db('rethinkdb') -def get_connected_server_version_string(conn): - # type: (rethinkdb.net.Connection) -> str +QUERY_JOB_DURATION_SEC_THRESHOLD = 15 + + +def get_connected_server_version_string(conn, **kwargs): + # type: (rethinkdb.net.Connection, **Any) -> str """ Return the raw string of the RethinkDB version used by the server at the other end of the connection. """ @@ -48,8 +52,8 @@ def get_connected_server_version_string(conn): return server_status['process']['version'] -def get_config_summary(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[ConfigSummary, List[str]]] +def get_config_summary(conn, **kwargs): + # type: (rethinkdb.net.Connection, **Any) -> Iterator[Tuple[ConfigSummary, List[str]]] """ Return a summary of the cluster configuration. """ @@ -82,16 +86,16 @@ def get_config_summary(conn): yield r.expr(summary).run(conn), [] -def get_cluster_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[ClusterStats, List[str]]] +def get_cluster_statistics(conn, **kwargs): + # type: (rethinkdb.net.Connection, **Any) -> Iterator[Tuple[ClusterStats, List[str]]] """ Retrieve statistics about the cluster. """ yield system.table('stats').get(['cluster']).run(conn), [] -def get_servers_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[ServerStats, List[str]]] +def get_servers_statistics(conn, **kwargs): + # type: (rethinkdb.net.Connection, **Any) -> Iterator[Tuple[ServerStats, List[str]]] """ Retrieve statistics about each server in the cluster. """ @@ -112,8 +116,8 @@ def get_servers_statistics(conn): yield server_stats, tags -def get_tables_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[TableStats, List[str]]] +def get_tables_statistics(conn, **kwargs): + # type: (rethinkdb.net.Connection, **Any) -> Iterator[Tuple[TableStats, List[str]]] """ Retrieve statistics about each table in the cluster. """ @@ -133,8 +137,8 @@ def get_tables_statistics(conn): yield table_stats, tags -def get_replicas_statistics(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[ReplicaStats, List[str]]] +def get_replicas_statistics(conn, **kwargs): + # type: (rethinkdb.net.Connection, **Any) -> Iterator[Tuple[ReplicaStats, List[str]]] """ Retrieve statistics about each replica (table/server pair) in the cluster. """ @@ -204,8 +208,8 @@ def get_replicas_statistics(conn): yield replica_stats, tags -def get_table_statuses(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[TableStatus, List[str]]] +def get_table_statuses(conn, **kwargs): + # type: (rethinkdb.net.Connection, **Any) -> Iterator[Tuple[TableStatus, List[str]]] """ Retrieve the status of each table in the cluster. """ @@ -214,8 +218,8 @@ def get_table_statuses(conn): yield table_status, tags -def get_server_statuses(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[ServerStatus, List[str]]] +def get_server_statuses(conn, **kwargs): + # type: (rethinkdb.net.Connection, **Any) -> Iterator[Tuple[ServerStatus, List[str]]] """ Retrieve the status of each server in the cluster. """ @@ -224,8 +228,8 @@ def get_server_statuses(conn): yield server_status, tags -def get_system_jobs(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[Job, List[str]]] +def get_system_jobs(conn, config, **kwargs): + # type: (rethinkdb.net.Connection, Config, **Any) -> Iterator[Tuple[Job, List[str]]] """ Retrieve all the currently running system jobs. """ @@ -236,16 +240,15 @@ def get_system_jobs(conn): # Follow job types listed on: https://rethinkdb.com/docs/system-jobs/#document-schema if job['type'] == 'query': - # NOTE: Request-response queries are typically too short-lived to be captured across Agent checks. - # Change feed queries however are long-running, they we'd be able to capture them. - # See: https://rethinkdb.com/docs/system-jobs/#query - # TODO(before-merging): submit within a `duration_sec` threshold instead of skipping entirely. - continue + # NOTE: we can only consistently collect metrics about queries that span more than an Agent collection + # interval. (There will be many short-lived queries within two checks that we can't capture.) + # Here, this means only changefeed queries and abnormally long request-response queries will pass through. + if job['duration_sec'] < config.min_collection_interval: + continue elif job['type'] == 'disk_compaction': - # Ongoing task on each server -- no information provided (i.e. `info` is empty). - # See: https://rethinkdb.com/docs/system-jobs/#disk_compaction + # Ongoing task on each server. Duration is `null` and `info` is empty, so nothing interesting there. continue - if job['type'] == 'index_construction': + elif job['type'] == 'index_construction': tags.extend( [ 'database:{}'.format(job['info']['db']), @@ -268,8 +271,8 @@ def get_system_jobs(conn): yield job, tags -def get_current_issues_summary(conn): - # type: (rethinkdb.net.Connection) -> Iterator[Tuple[CurrentIssuesSummary, List[str]]] +def get_current_issues_summary(conn, **kwargs): + # type: (rethinkdb.net.Connection, **Any) -> Iterator[Tuple[CurrentIssuesSummary, List[str]]] """ Retrieve a summary of problems detected within the cluster. """ diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index ff95d6f86c91c..aef6b6080ab73 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -11,7 +11,15 @@ Instance = TypedDict( 'Instance', - {'host': str, 'port': int, 'username': str, 'password': str, 'tls_ca_cert': str, 'tags': List[str]}, + { + 'host': str, + 'port': int, + 'username': str, + 'password': str, + 'tls_ca_cert': str, + 'min_collection_interval': Union[int, float], + 'tags': List[str], + }, total=False, ) diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index 55d8d2bebdd31..904b5ae18431e 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -14,8 +14,7 @@ def test_default_config(): # type: () -> None - instance = {} # type: Instance - config = Config(instance) + config = Config() assert config.host == 'localhost' assert config.port == 28015 assert config.user is None @@ -43,20 +42,26 @@ def test_config(): def test_config_repr(): # type: () -> None - instance = {} # type: Instance - config = Config(instance) + config = Config() assert repr(config) == "Config(host='localhost', port=28015, user=None, password='', tls_ca_cert=None, tags=[])" -@pytest.mark.parametrize('host', [42, True, object()]) -def test_invalid_host(host): +@pytest.mark.parametrize('value', [42, True, object()]) +def test_invalid_host(value): + # type: (Any) -> None + with pytest.raises(ConfigurationError): + Config(instance={'host': value}) + + +@pytest.mark.parametrize('value', [42.42, -42, True, object()]) +def test_invalid_port(value): # type: (Any) -> None with pytest.raises(ConfigurationError): - Config(instance={'host': host}) + Config(instance={'port': value}) -@pytest.mark.parametrize('port', [42.42, -42, True, object()]) -def test_invalid_port(port): +@pytest.mark.parametrize('value', ['not-a-number', object()]) +def test_invalid_min_collection_interval(value): # type: (Any) -> None with pytest.raises(ConfigurationError): - Config(instance={'port': port}) + Config(instance={'min_collection_interval': value}) diff --git a/rethinkdb/tests/unit/test_system_jobs_metrics.py b/rethinkdb/tests/unit/test_system_jobs_metrics.py index 4b9ae9d35da2b..1c80c336d8506 100644 --- a/rethinkdb/tests/unit/test_system_jobs_metrics.py +++ b/rethinkdb/tests/unit/test_system_jobs_metrics.py @@ -7,6 +7,7 @@ import pytest from datadog_checks.rethinkdb import queries +from datadog_checks.rethinkdb.config import Config from datadog_checks.rethinkdb.types import BackfillJob, DiskCompactionJob, IndexConstructionJob, QueryJob pytestmark = pytest.mark.unit @@ -32,7 +33,7 @@ def test_jobs_metrics(): * Etc. """ - mock_query_job_row = { + mock_request_response_query_job_row = { 'type': 'query', 'id': ('query', 'abcd1234'), 'duration_sec': 0.12, @@ -40,6 +41,14 @@ def test_jobs_metrics(): 'servers': ['server0'], } # type: QueryJob + mock_changefeed_query_job_row = { + 'type': 'query', + 'id': ('query', 'abcd1234'), + 'duration_sec': 10, + 'info': {}, + 'servers': ['server1'], + } # type: QueryJob + mock_disk_compaction_row = { 'type': 'disk_compaction', 'id': ('disk_compaction', 'zero'), @@ -72,16 +81,30 @@ def test_jobs_metrics(): 'servers': ['server1'], } # type: IndexConstructionJob - mock_rows = [mock_query_job_row, mock_disk_compaction_row, mock_backfill_job_row, mock_index_construction_job_row] + mock_rows = [ + mock_request_response_query_job_row, + mock_changefeed_query_job_row, + mock_disk_compaction_row, + mock_backfill_job_row, + mock_index_construction_job_row, + ] conn = mock.Mock() with mock.patch('rethinkdb.ast.RqlQuery.run') as run: run.return_value = mock_rows - metrics = list(queries.system_jobs.run(conn, logger=MockLogger('test'))) + metrics = list( + queries.system_jobs.run(conn, config=Config({'min_collection_interval': 5}), logger=MockLogger('test')) + ) assert metrics == [ - # -- `query` job ignored -- - # -- `disk_compaction` job ignored -- + # short request-response `query` job ignored + { + 'type': 'gauge', + 'name': 'rethinkdb.jobs.duration_sec', + 'value': 10, + 'tags': ['job_type:query', 'server:server1'], + }, + # `disk_compaction` job ignored { 'type': 'gauge', 'name': 'rethinkdb.jobs.duration_sec', @@ -122,4 +145,4 @@ def test_unknown_job(): with mock.patch('rethinkdb.ast.RqlQuery.run') as run: run.return_value = [mock_unknown_job_row] with pytest.raises(RuntimeError): - list(queries.system_jobs.run(conn, logger=MockLogger('test'))) + list(queries.system_jobs.run(conn, config=Config(), logger=MockLogger('test'))) From 2d61475bbd2f7333dd3ef4f1edcfbf0116040868 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 20 Mar 2020 10:53:20 +0100 Subject: [PATCH 128/147] Drop unused Config repr --- rethinkdb/datadog_checks/rethinkdb/config.py | 18 ------------------ rethinkdb/tests/unit/test_config.py | 6 ------ 2 files changed, 24 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/config.py b/rethinkdb/datadog_checks/rethinkdb/config.py index 529424e64200f..3579c649a8ead 100644 --- a/rethinkdb/datadog_checks/rethinkdb/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/config.py @@ -53,21 +53,3 @@ def __init__(self, instance=None): self.tls_ca_cert = tls_ca_cert # type: Optional[str] self.tags = tags # type: List[str] self.min_collection_interval = min_collection_interval # type: float - - def __repr__(self): - # type: () -> str - return ( - 'Config(host={host!r}, ' - 'port={port!r}, ' - 'user={user!r}, ' - "password={password!r}, " - 'tls_ca_cert={tls_ca_cert!r}, ' - 'tags={tags!r})' - ).format( - host=self.host, - port=self.port, - user=self.user, - password='********' if self.password else '', - tls_ca_cert=self.tls_ca_cert, - tags=self.tags, - ) diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index 904b5ae18431e..2c9c00b251f91 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -40,12 +40,6 @@ def test_config(): assert config.tags == ['env:testing'] -def test_config_repr(): - # type: () -> None - config = Config() - assert repr(config) == "Config(host='localhost', port=28015, user=None, password='', tls_ca_cert=None, tags=[])" - - @pytest.mark.parametrize('value', [42, True, object()]) def test_invalid_host(value): # type: (Any) -> None From 862b5ab1a8b184571f3b854e83320b582354e7e5 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 20 Mar 2020 12:57:21 +0100 Subject: [PATCH 129/147] Tweak version collection --- rethinkdb/datadog_checks/rethinkdb/check.py | 6 ++---- rethinkdb/datadog_checks/rethinkdb/operations.py | 11 ++++------- rethinkdb/datadog_checks/rethinkdb/version.py | 4 ++-- rethinkdb/tests/unit/test_version.py | 11 ++++++----- 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index b91a36c33fc3e..386a8e8506bcd 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -13,7 +13,6 @@ from .document_db import DocumentQuery from .document_db.types import Metric from .types import Instance -from .version import parse_version class RethinkDBCheck(AgentCheck): @@ -53,7 +52,7 @@ def connect_submitting_service_checks(self): port=config.port, user=config.user, password=config.password, - ssl={'ca_certs': config.tls_ca_cert} if config.tls_ca_cert is not None else None, + ssl={'ca_certs': config.tls_ca_cert} if config.tls_ca_cert is not None else {}, ) as conn: yield conn except rethinkdb.errors.ReqlDriverError as exc: @@ -83,8 +82,7 @@ def collect_connected_server_version(self, conn): """ Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. """ - version_string = operations.get_connected_server_version_string(conn) - return parse_version(version_string) + return operations.get_connected_server_version(conn) def submit_metric(self, metric): # type: (Metric) -> None diff --git a/rethinkdb/datadog_checks/rethinkdb/operations.py b/rethinkdb/datadog_checks/rethinkdb/operations.py index 883cd898c5318..669c7c126b1ee 100644 --- a/rethinkdb/datadog_checks/rethinkdb/operations.py +++ b/rethinkdb/datadog_checks/rethinkdb/operations.py @@ -28,6 +28,7 @@ TableStats, TableStatus, ) +from .version import parse_version # The usual entrypoint for building ReQL queries. r = rethinkdb.r @@ -37,19 +38,15 @@ system = r.db('rethinkdb') -QUERY_JOB_DURATION_SEC_THRESHOLD = 15 - - -def get_connected_server_version_string(conn, **kwargs): +def get_connected_server_version(conn, **kwargs): # type: (rethinkdb.net.Connection, **Any) -> str """ - Return the raw string of the RethinkDB version used by the server at the other end of the connection. + Return the RethinkDB version used by the server at the other end of the connection. """ # See: https://rethinkdb.com/docs/system-tables/#server_status server = conn.server() # type: ConnectionServer server_status = system.table('server_status').get(server['id']).run(conn) # type: ServerStatus - - return server_status['process']['version'] + return parse_version(server_status['process']['version']) def get_config_summary(conn, **kwargs): diff --git a/rethinkdb/datadog_checks/rethinkdb/version.py b/rethinkdb/datadog_checks/rethinkdb/version.py index 12f18d1094ce9..4265350411ed5 100644 --- a/rethinkdb/datadog_checks/rethinkdb/version.py +++ b/rethinkdb/datadog_checks/rethinkdb/version.py @@ -4,7 +4,7 @@ import re # See: https://github.com/rethinkdb/rethinkdb/blob/95cfed8a62f08e3198ac25417c9b6900be8b6877/src/utils.hpp#L117 -_RETHINKDB_VERSION_STR_REGEX = re.compile(r'^rethinkdb\s+(?P\S+)\s\(.*') +_RETHINKDB_VERSION_STR_REGEX = re.compile(r'^rethinkdb\s+(?P[\d\.]+)') def parse_version(rethinkdb_version_string): @@ -15,7 +15,7 @@ def parse_version(rethinkdb_version_string): Example ------- >>> parse_version('rethinkdb 2.4.0~0bionic (CLANG 6.0.0 (tags/RELEASE_600/final))') - '2.4.0~0bionic' + '2.4.0' """ match = _RETHINKDB_VERSION_STR_REGEX.match(rethinkdb_version_string) diff --git a/rethinkdb/tests/unit/test_version.py b/rethinkdb/tests/unit/test_version.py index 78b01fcb44a10..39a8d22558547 100644 --- a/rethinkdb/tests/unit/test_version.py +++ b/rethinkdb/tests/unit/test_version.py @@ -12,11 +12,12 @@ @pytest.mark.parametrize( 'version_string, expected_version', [ - pytest.param('rethinkdb 2.4.0~0bionic (CLANG 6.0.0 (tags/RELEASE_600/final))', '2.4.0~0bionic', id='2.4'), - pytest.param('rethinkdb 2.4.0-beta~0bionic (debug)', '2.4.0-beta~0bionic', id='2.4-beta'), - pytest.param('rethinkdb 2.4.0~0bionic (debug)', '2.4.0~0bionic', id='2.4-debug'), - pytest.param('rethinkdb 2.3.3~0jessie (GCC 4.9.2)', '2.3.3~0jessie', id='2.3'), - pytest.param('rethinkdb 2.3.3 (GCC 4.9.2)', '2.3.3', id='2.3-no-build'), + pytest.param('rethinkdb 2.4.0~0bionic (CLANG 6.0.0 (tags/RELEASE_600/final))', '2.4.0', id='2.4'), + pytest.param('rethinkdb 2.4.0-beta~0bionic (debug)', '2.4.0', id='2.4-beta'), + pytest.param('rethinkdb 2.4.0~0bionic (debug)', '2.4.0', id='2.4-debug'), + pytest.param('rethinkdb 2.3.3~0jessie (GCC 4.9.2)', '2.3.3', id='2.3'), + pytest.param('rethinkdb 2.3.6 (GCC 4.9.2)', '2.3.6', id='2.3-no-build'), + pytest.param('rethinkdb 2.3.3', '2.3.3', id='no-compilation-string'), ], ) def test_parse_version(version_string, expected_version): From dc1a2376bb05716d5c8a5cfde63dd9c6cb8512a3 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 20 Mar 2020 13:12:43 +0100 Subject: [PATCH 130/147] Test on 2.3 and latest --- rethinkdb/README.md | 2 ++ rethinkdb/tests/cluster.py | 13 ++++++++----- rethinkdb/tests/common.py | 14 +++++++++++--- rethinkdb/tests/test_rethinkdb.py | 3 ++- rethinkdb/tox.ini | 10 +++++++--- 5 files changed, 30 insertions(+), 12 deletions(-) diff --git a/rethinkdb/README.md b/rethinkdb/README.md index 2541ca7c562ea..413a4d00dbecf 100644 --- a/rethinkdb/README.md +++ b/rethinkdb/README.md @@ -6,6 +6,8 @@ This check monitors a RethinkDB cluster through the Datadog Agent and collects metrics about performance, data availability, cluster configuration, and more. +**NOTE**: this integration is compatible with RethinkDB **version 2.3.6 and above**. + ## Setup Follow the instructions below to install and configure this check for an Agent running on a host. For containerized environments, see the [Autodiscovery Integration Templates][2] for guidance on applying these instructions. diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index 693b357542f79..ff9adae9f6517 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -53,10 +53,13 @@ def setup_cluster(): # Users. # See: https://rethinkdb.com/docs/permissions-and-accounts/ - response = r.db('rethinkdb').table('users').insert({'id': AGENT_USER, 'password': AGENT_PASSWORD}).run(conn) - assert response['inserted'] == 1 - response = r.db('rethinkdb').grant(AGENT_USER, {'read': True}).run(conn) - assert response['granted'] == 1 + + if AGENT_USER != 'admin': + # Setup a dedicated Agent user. + response = r.db('rethinkdb').table('users').insert({'id': AGENT_USER, 'password': AGENT_PASSWORD}).run(conn) + assert response['inserted'] == 1 + response = r.db('rethinkdb').grant(AGENT_USER, {'read': True}).run(conn) + assert response['granted'] == 1 response = r.db('rethinkdb').table('users').insert({'id': CLIENT_USER, 'password': False}).run(conn) assert response['inserted'] == 1 @@ -98,7 +101,7 @@ def _leader_election_done(conn): replica_states = list( r.db('rethinkdb') .table('table_status') - .concat_map(r.row['shards']) + .concat_map(r.row['shards'].default([])) # May be `None` on 2.3.x. .concat_map(r.row['replicas']) .map(r.row['state']) .run(conn) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index a37eb273c60e7..94a30d7904d85 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -36,8 +36,17 @@ # Users. -AGENT_USER = 'datadog-agent' -AGENT_PASSWORD = 'r3th1nK' +if RAW_VERSION.startswith('2.3.'): + # In RethinkDB 2.3.x, only the admin user could access the `rethinkdb` system database by default, but it was + # not possible to grant permissions to any other user. This was resolved in 2.4.0. + # See: https://github.com/rethinkdb/rethinkdb/issues/5692 + AGENT_USER = 'admin' + AGENT_PASSWORD = '' +else: + # Use a dedicated user for metric collection. + AGENT_USER = 'datadog-agent' + AGENT_PASSWORD = 'r3th1nK' + CLIENT_USER = 'doggo' # TLS. @@ -197,7 +206,6 @@ # Pytest common test data. MALFORMED_VERSION_STRING_PARAMS = [ - pytest.param('rethinkdb 2.3.3', id='no-compilation-string'), pytest.param('rethinkdb (GCC 4.9.2)', id='no-version'), pytest.param('rethinkdb', id='prefix-only'), pytest.param('abc 2.4.0~0bionic (GCC 4.9.2)', id='wrong-prefix'), diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 4c0e374f82a60..0e24934e0b36f 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -61,7 +61,7 @@ def test_check(aggregator, instance): def test_check_without_credentials_uses_admin(aggregator, instance): # type: (AggregatorStub, Instance) -> None """ - Verify that when no credentials are configured (not recommended though), the check still runs successfully provided + Verify that when no credentials are configured, the check still runs successfully provided the admin account doesn't have a password set. """ instance = instance.copy() @@ -169,6 +169,7 @@ def collect_metrics(self, conn): aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=service_check_tags) +@pytest.mark.skipif(not RAW_VERSION, reason='Requires RAW_VERSION to be set') @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_metadata_version(instance, datadog_agent): diff --git a/rethinkdb/tox.ini b/rethinkdb/tox.ini index 2a7c91e576a8c..f7ba04d52bd2c 100644 --- a/rethinkdb/tox.ini +++ b/rethinkdb/tox.ini @@ -3,7 +3,7 @@ minversion = 2.0 skip_missing_interpreters = true basepython = py38 envlist = - py{27,38}-{2.4} + py{27,38}-{2.3,latest} [testenv] dd_check_style = true @@ -23,5 +23,9 @@ commands = pip install -r requirements.in pytest -v {posargs} setenv = - 2.4: RETHINKDB_IMAGE = rethinkdb:2.4.0 - 2.4: RETHINKDB_RAW_VERSION = 2.4.0~0bionic + ; Can't support lower 2.3 patch versions due to: https://github.com/rethinkdb/rethinkdb/issues/6108 + 2.3: RETHINKDB_IMAGE = rethinkdb:2.3.6 + 2.3: RETHINKDB_RAW_VERSION = 2.3.6 + 2.3: RETHINKDB_SUPPORTS_SYSTEM_DB_PERMISSIONS = false + latest: RETHINKDB_IMAGE = rethinkdb:latest + latest: RETHINKDB_RAW_VERSION = From 5b4e2e99ec00febf835d5dce21e2f36af32c4afb Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 20 Mar 2020 13:30:51 +0100 Subject: [PATCH 131/147] Update README with 2.3-compatible instructions, fix service check names --- rethinkdb/README.md | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/rethinkdb/README.md b/rethinkdb/README.md index 413a4d00dbecf..e9f382fff8292 100644 --- a/rethinkdb/README.md +++ b/rethinkdb/README.md @@ -6,7 +6,7 @@ This check monitors a RethinkDB cluster through the Datadog Agent and collects metrics about performance, data availability, cluster configuration, and more. -**NOTE**: this integration is compatible with RethinkDB **version 2.3.6 and above**. +**Note**: this integration is compatible with RethinkDB **version 2.3.6 and above**. ## Setup @@ -18,14 +18,16 @@ The RethinkDB check is included in the [Datadog Agent][3] package. No additional ### Configuration -1. Add a `datadog-agent` user with read-only permissions on the `rethinkdb` database. Use the following ReQL commands, referring to [Permissions and user accounts][4] for details: +1. If using RethinkDB 2.4+, add a `datadog-agent` user with read-only permissions on the `rethinkdb` database. You can use the following ReQL commands, and refer to [Permissions and user accounts][4] for details: ```python r.db('rethinkdb').table('users').insert({'id': 'datadog-agent', 'password': ''}) r.db('rethinkdb').grant('datadog-agent', {'read': True}) ``` -2. Edit the `rethinkdb.d/conf.yaml` file in the `conf.d/` folder at the root of your [Agent's configuration directory][5]. See the [sample rethinkdb.d/conf.yaml][6] for all available configuration options. + **Note**: on RethinkDB 2.3.x, granting permissions on the `rethinkdb` database is not supported. Skip this step and use your [admin account][5] below instead. + +2. Edit the `rethinkdb.d/conf.yaml` file in the `conf.d/` folder at the root of your [Agent's configuration directory][6]. See the [sample rethinkdb.d/conf.yaml][7] for all available configuration options. ```yaml init_config: @@ -33,31 +35,31 @@ The RethinkDB check is included in the [Datadog Agent][3] package. No additional instances: - host: localhost port: 28015 - user: datadog-agent + user: "" password: "" ``` -3. [Restart the Agent][7]. +3. [Restart the Agent][8]. **Note**: this integration collects metrics from all servers in the cluster, so you only need a single Agent. ### Validation -[Run the Agent's status subcommand][8] and look for `rethinkdb` under the Checks section. +[Run the Agent's status subcommand][9] and look for `rethinkdb` under the Checks section. ## Data Collected ### Metrics -See [metadata.csv][9] for a list of metrics provided by this check. +See [metadata.csv][10] for a list of metrics provided by this check. ### Service Checks - `rethinkdb.can_connect`: Returns `CRITICAL` if the Agent cannot reach the configured RethinkDB server, `OK` otherwise. -- `rethinkdb.table_status.ready_for_outdated_reads`: Returns `OK` if all shards of a table are ready to accept outdated read queries, `WARNING` otherwise. -- `rethinkdb.table_status.ready_for_reads`: Returns `OK` if all shards of a table are ready to accept read queries, `WARNING` otherwise. -- `rethinkdb.table_status.ready_for_writes`: Returns `OK` if all shards of a table are ready to accept write queries, `WARNING` otherwise. -- `rethinkdb.table_status.all_replicas_ready`: Returns `OK` if all replicas are ready for reads and writes, `WARNING` otherwise (e.g. if backfills are in progress). +- `rethinkdb.table_status.status.ready_for_outdated_reads`: Returns `OK` if all shards of a table are ready to accept outdated read queries, `WARNING` otherwise. +- `rethinkdb.table_status.status.ready_for_reads`: Returns `OK` if all shards of a table are ready to accept read queries, `WARNING` otherwise. +- `rethinkdb.table_status.status.ready_for_writes`: Returns `OK` if all shards of a table are ready to accept write queries, `WARNING` otherwise. +- `rethinkdb.table_status.status.all_replicas_ready`: Returns `OK` if all replicas are ready for reads and writes, `WARNING` otherwise (e.g. if backfills are in progress). ### Events @@ -65,15 +67,16 @@ RethinkDB does not include any events. ## Troubleshooting -Need help? Contact [Datadog support][10]. +Need help? Contact [Datadog support][11]. [1]: https://rethinkdb.com/ [2]: https://docs.datadoghq.com/agent/autodiscovery/integrations [3]: https://docs.datadoghq.com/agent [4]: https://rethinkdb.com/docs/permissions-and-accounts/ -[5]: https://docs.datadoghq.com/agent/guide/agent-configuration-files/#agent-configuration-directory -[6]: https://github.com/DataDog/integrations-core/blob/master/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example -[7]: https://docs.datadoghq.com/agent/guide/agent-commands/#start-stop-and-restart-the-agent -[8]: https://docs.datadoghq.com/agent/guide/agent-commands/#agent-status-and-information -[9]: https://github.com/DataDog/integrations-core/blob/master/rethinkdb/metadata.csv -[10]: https://docs.datadoghq.com/help +[5]: https://rethinkdb.com/docs/security/#the-admin-account +[6]: https://docs.datadoghq.com/agent/guide/agent-configuration-files/#agent-configuration-directory +[7]: https://github.com/DataDog/integrations-core/blob/master/rethinkdb/datadog_checks/rethinkdb/data/conf.yaml.example +[8]: https://docs.datadoghq.com/agent/guide/agent-commands/#start-stop-and-restart-the-agent +[9]: https://docs.datadoghq.com/agent/guide/agent-commands/#agent-status-and-information +[10]: https://github.com/DataDog/integrations-core/blob/master/rethinkdb/metadata.csv +[11]: https://docs.datadoghq.com/help From efb97f73235d12b9b210889dc1ea60629c467036 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 20 Mar 2020 13:42:44 +0100 Subject: [PATCH 132/147] Nits --- rethinkdb/datadog_checks/rethinkdb/config.py | 2 +- rethinkdb/tests/common.py | 8 ++------ rethinkdb/tox.ini | 1 - 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/config.py b/rethinkdb/datadog_checks/rethinkdb/config.py index 3579c649a8ead..a16d00d29cdb2 100644 --- a/rethinkdb/datadog_checks/rethinkdb/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/config.py @@ -25,8 +25,8 @@ def __init__(self, instance=None): user = instance.get('username') password = instance.get('password') tls_ca_cert = instance.get('tls_ca_cert') - min_collection_interval = instance.get('min_collection_interval', 15) tags = instance.get('tags', []) + min_collection_interval = instance.get('min_collection_interval', 15) if not isinstance(host, str): raise ConfigurationError('host must be a string (got {!r})'.format(type(host))) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index 94a30d7904d85..a190f63c037e1 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -13,8 +13,6 @@ HERE = get_here() -CHECK_NAME = 'rethinkdb' - IMAGE = os.environ.get('RETHINKDB_IMAGE', '') RAW_VERSION = os.environ.get('RETHINKDB_RAW_VERSION', '') @@ -37,8 +35,8 @@ # Users. if RAW_VERSION.startswith('2.3.'): - # In RethinkDB 2.3.x, only the admin user could access the `rethinkdb` system database by default, but it was - # not possible to grant permissions to any other user. This was resolved in 2.4.0. + # In RethinkDB 2.3.x, granting permissions onto `rethinkdb` database to non-admin users is not supported. + # So we must use the admin account. # See: https://github.com/rethinkdb/rethinkdb/issues/5692 AGENT_USER = 'admin' AGENT_PASSWORD = '' @@ -176,7 +174,6 @@ CURRENT_ISSUE_TYPES_SUBMITTED_IF_DISCONNECTED_SERVERS = ['table_availability'] - E2E_METRICS = ( tuple((name, typ) for name, typ, _, _ in CONFIG_METRICS) + CLUSTER_STATISTICS_METRICS @@ -202,7 +199,6 @@ 'RETHINKDB_TLS_DRIVER_CERT': TLS_DRIVER_CERT, } - # Pytest common test data. MALFORMED_VERSION_STRING_PARAMS = [ diff --git a/rethinkdb/tox.ini b/rethinkdb/tox.ini index f7ba04d52bd2c..383fc0ceefe8d 100644 --- a/rethinkdb/tox.ini +++ b/rethinkdb/tox.ini @@ -26,6 +26,5 @@ setenv = ; Can't support lower 2.3 patch versions due to: https://github.com/rethinkdb/rethinkdb/issues/6108 2.3: RETHINKDB_IMAGE = rethinkdb:2.3.6 2.3: RETHINKDB_RAW_VERSION = 2.3.6 - 2.3: RETHINKDB_SUPPORTS_SYSTEM_DB_PERMISSIONS = false latest: RETHINKDB_IMAGE = rethinkdb:latest latest: RETHINKDB_RAW_VERSION = From 5e7fc9f86a80edf6e066ea019b347784e20ede17 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 20 Mar 2020 16:00:04 +0100 Subject: [PATCH 133/147] Address feedback --- rethinkdb/datadog_checks/rethinkdb/check.py | 4 +--- rethinkdb/datadog_checks/rethinkdb/config.py | 20 ++++++++++++++----- .../rethinkdb/document_db/utils.py | 6 +++++- rethinkdb/tests/unit/test_config.py | 13 ++++++++---- 4 files changed, 30 insertions(+), 13 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 386a8e8506bcd..f4e9ce075efff 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -42,9 +42,7 @@ def __init__(self, *args, **kwargs): def connect_submitting_service_checks(self): # type: () -> Iterator[rethinkdb.net.Connection] config = self.config - - tags = ['host:{}'.format(config.host), 'port:{}'.format(config.port)] - tags.extend(config.tags) + tags = config.service_check_tags try: with rethinkdb.r.connect( diff --git a/rethinkdb/datadog_checks/rethinkdb/config.py b/rethinkdb/datadog_checks/rethinkdb/config.py index a16d00d29cdb2..489e8b8183a4c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/config.py @@ -29,20 +29,25 @@ def __init__(self, instance=None): min_collection_interval = instance.get('min_collection_interval', 15) if not isinstance(host, str): - raise ConfigurationError('host must be a string (got {!r})'.format(type(host))) + raise ConfigurationError('host {!r} must be a string (got {!r})'.format(host, type(host))) - if not isinstance(port, int) or isinstance(port, bool): - raise ConfigurationError('port must be an integer (got {!r})'.format(type(port))) + try: + port = int(port) + except (ValueError, TypeError): + raise ConfigurationError('port {!r} must be convertible to an integer (got {!r})'.format(port, type(port))) if port < 0: raise ConfigurationError('port must be positive (got {!r})'.format(port)) + if not isinstance(tags, list): + raise ConfigurationError('tags {!r} must be a list (got {!r})'.format(tags, type(tags))) + try: min_collection_interval = float(min_collection_interval) except (ValueError, TypeError): raise ConfigurationError( - 'min_collection_interval must be convertible to a number (got {!r})'.format( - type(min_collection_interval) + 'min_collection_interval {!r} must be convertible to a number (got {!r})'.format( + min_collection_interval, type(min_collection_interval) ) ) @@ -53,3 +58,8 @@ def __init__(self, instance=None): self.tls_ca_cert = tls_ca_cert # type: Optional[str] self.tags = tags # type: List[str] self.min_collection_interval = min_collection_interval # type: float + + @property + def service_check_tags(self): + # type: () -> List[str] + return ['host:{}'.format(self.host), 'port:{}'.format(self.port)] + self.tags diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py index 6411c90f26ed7..11e5b34fd681d 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py @@ -7,6 +7,8 @@ import datetime as dt from typing import Any, Mapping, Sequence +from datadog_checks.base.utils.db.utils import normalize_datetime + def lookup_dotted(dct, path): # type: (Mapping, str) -> Any @@ -44,4 +46,6 @@ def dotted_join(values): def to_time_elapsed(datetime): # type: (dt.datetime) -> float - return (dt.datetime.now(datetime.tzinfo) - datetime).total_seconds() + datetime = normalize_datetime(datetime) + elapsed = dt.datetime.now(datetime.tzinfo) - datetime + return elapsed.total_seconds() diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index 2c9c00b251f91..fe685b513d273 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -22,22 +22,27 @@ def test_default_config(): assert config.tags == [] -def test_config(): - # type: () -> None +@pytest.mark.parametrize('port_28016,', [28016, '28016']) +@pytest.mark.parametrize('min_collection_interval_10', [10, '10', '10.0']) +def test_config(port_28016, min_collection_interval_10): + # type: (Any, Any) -> None instance = { 'host': '192.168.121.1', - 'port': 28016, + 'port': port_28016, 'username': 'datadog-agent', 'password': 's3kr3t', 'tls_ca_cert': '/path/to/client.cert', 'tags': ['env:testing'], + 'min_collection_interval': min_collection_interval_10, } # type: Instance + config = Config(instance) assert config.host == '192.168.121.1' assert config.port == 28016 assert config.user == 'datadog-agent' assert config.tls_ca_cert == '/path/to/client.cert' assert config.tags == ['env:testing'] + assert config.min_collection_interval == 10 @pytest.mark.parametrize('value', [42, True, object()]) @@ -47,7 +52,7 @@ def test_invalid_host(value): Config(instance={'host': value}) -@pytest.mark.parametrize('value', [42.42, -42, True, object()]) +@pytest.mark.parametrize('value', [-28016, '280.16', 'true', object()]) def test_invalid_port(value): # type: (Any) -> None with pytest.raises(ConfigurationError): From edf3da5c5ba7f01a5032682810f3687cca23388e Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 24 Mar 2020 10:42:50 +0100 Subject: [PATCH 134/147] Store service_check_tags as a tuple --- rethinkdb/datadog_checks/rethinkdb/config.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/config.py b/rethinkdb/datadog_checks/rethinkdb/config.py index 489e8b8183a4c..8442944e85df1 100644 --- a/rethinkdb/datadog_checks/rethinkdb/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/config.py @@ -57,9 +57,5 @@ def __init__(self, instance=None): self.password = password # type: Optional[str] self.tls_ca_cert = tls_ca_cert # type: Optional[str] self.tags = tags # type: List[str] + self.service_check_tags = ('host:{}'.format(self.host), 'port:{}'.format(self.port)) + tuple(self.tags) self.min_collection_interval = min_collection_interval # type: float - - @property - def service_check_tags(self): - # type: () -> List[str] - return ['host:{}'.format(self.host), 'port:{}'.format(self.port)] + self.tags From 6d58c6ccdacbaa6fe9b3011daa18c25ee88c81ac Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 24 Mar 2020 11:28:08 +0100 Subject: [PATCH 135/147] Resolve flaky cluster setup on RethinkDB 2.3 --- rethinkdb/tests/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rethinkdb/tests/cluster.py b/rethinkdb/tests/cluster.py index ff9adae9f6517..12ef6c1615d40 100644 --- a/rethinkdb/tests/cluster.py +++ b/rethinkdb/tests/cluster.py @@ -69,7 +69,7 @@ def setup_cluster(): # Simulate client activity. # NOTE: ensures that 'written_docs_*' and 'read_docs_*' metrics have non-zero values. - with r.connect(host=HOST, port=SERVER_PORTS['proxy'], user=CLIENT_USER) as conn: + with r.connect(host=HOST, port=SERVER_PORTS['server0'], user=CLIENT_USER) as conn: response = r.db(DATABASE).table(HEROES_TABLE).insert(HEROES_TABLE_DOCUMENTS).run(conn) assert response['inserted'] == len(HEROES_TABLE_DOCUMENTS) From 66b0dbc1df4eae0dcce9d06ade6fa20f67550418 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 24 Mar 2020 11:44:31 +0100 Subject: [PATCH 136/147] Add proxy check test, fix version handling for proxy --- rethinkdb/datadog_checks/rethinkdb/check.py | 7 ++++--- .../datadog_checks/rethinkdb/operations.py | 13 ++++++++++--- rethinkdb/tests/test_rethinkdb.py | 17 +++++++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index f4e9ce075efff..4d864f7b0f693 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -2,7 +2,7 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) from contextlib import contextmanager -from typing import Any, Callable, Iterator, Sequence, cast +from typing import Any, Callable, Iterator, Optional, Sequence, cast import rethinkdb @@ -76,7 +76,7 @@ def collect_metrics(self, conn): yield metric def collect_connected_server_version(self, conn): - # type: (rethinkdb.net.Connection) -> str + # type: (rethinkdb.net.Connection) -> Optional[str] """ Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. """ @@ -94,7 +94,8 @@ def submit_version_metadata(self, conn): except ValueError as exc: self.log.error('Error collecting version metadata: %r', exc) else: - self.set_metadata('version', version) + if version is not None: + self.set_metadata('version', version) def check(self, instance): # type: (Any) -> None diff --git a/rethinkdb/datadog_checks/rethinkdb/operations.py b/rethinkdb/datadog_checks/rethinkdb/operations.py index 669c7c126b1ee..00b96b70df38d 100644 --- a/rethinkdb/datadog_checks/rethinkdb/operations.py +++ b/rethinkdb/datadog_checks/rethinkdb/operations.py @@ -7,7 +7,7 @@ Python ReQL reference documentation: https://rethinkdb.com/api/python/ """ -from typing import Any, Iterator, List, Mapping, Tuple +from typing import Any, Iterator, List, Mapping, Optional, Tuple import rethinkdb @@ -39,13 +39,20 @@ def get_connected_server_version(conn, **kwargs): - # type: (rethinkdb.net.Connection, **Any) -> str + # type: (rethinkdb.net.Connection, **Any) -> Optional[str] """ Return the RethinkDB version used by the server at the other end of the connection. """ # See: https://rethinkdb.com/docs/system-tables/#server_status server = conn.server() # type: ConnectionServer - server_status = system.table('server_status').get(server['id']).run(conn) # type: ServerStatus + server_status = system.table('server_status').get(server['id']).run(conn) # type: Optional[ServerStatus] + + if server_status is None: + # Only proxies don't have an entry in the `server_status` table. + if not server['proxy']: + raise RuntimeError('No `server_status` entry for server {!r}'.format(server['id'])) + return None + return parse_version(server_status['process']['version']) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 0e24934e0b36f..377c81ee25771 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -82,6 +82,23 @@ def test_check_without_credentials_uses_admin(aggregator, instance): aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_check_connect_to_proxy(aggregator, instance): + # type: (AggregatorStub, Instance) -> None + instance = instance.copy() + instance['port'] = SERVER_PORTS['proxy'] + + check = RethinkDBCheck('rethinkdb', {}, [instance]) + check.check(instance) + + assert_metrics(aggregator) + aggregator.assert_all_metrics_covered() + + service_check_tags = TAGS + _get_connect_service_check_tags(instance) + aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) + + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_check_connect_to_server_with_tls(aggregator, instance): From 4e7f320469aea4077ed9a757796d7610f687fb5f Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 24 Mar 2020 12:05:22 +0100 Subject: [PATCH 137/147] Refactor version metadata tests --- rethinkdb/datadog_checks/rethinkdb/check.py | 29 ++--- .../datadog_checks/rethinkdb/operations.py | 16 +-- rethinkdb/tests/test_rethinkdb.py | 109 +++++++----------- 3 files changed, 64 insertions(+), 90 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 4d864f7b0f693..ae7a9413ac5a0 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -2,7 +2,7 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) from contextlib import contextmanager -from typing import Any, Callable, Iterator, Optional, Sequence, cast +from typing import Any, Callable, Iterator, Sequence, cast import rethinkdb @@ -13,6 +13,7 @@ from .document_db import DocumentQuery from .document_db.types import Metric from .types import Instance +from .version import parse_version class RethinkDBCheck(AgentCheck): @@ -75,13 +76,6 @@ def collect_metrics(self, conn): for metric in query.run(conn, config=self.config, logger=self.log): yield metric - def collect_connected_server_version(self, conn): - # type: (rethinkdb.net.Connection) -> Optional[str] - """ - Return the version of RethinkDB run by the server at the other end of the connection, in SemVer format. - """ - return operations.get_connected_server_version(conn) - def submit_metric(self, metric): # type: (Metric) -> None submit = getattr(self, metric['type']) # type: Callable @@ -90,12 +84,21 @@ def submit_metric(self, metric): def submit_version_metadata(self, conn): # type: (rethinkdb.net.Connection) -> None try: - version = self.collect_connected_server_version(conn) + raw_version = operations.get_connected_server_raw_version(conn) + except Exception as exc: + self.log.error('Error collecting version metadata: %s', exc) + return + + if raw_version is None: + return + + try: + version = parse_version(raw_version) except ValueError as exc: - self.log.error('Error collecting version metadata: %r', exc) - else: - if version is not None: - self.set_metadata('version', version) + self.log.error('Failed to parse version: %s', exc) + return + + self.set_metadata('version', version) def check(self, instance): # type: (Any) -> None diff --git a/rethinkdb/datadog_checks/rethinkdb/operations.py b/rethinkdb/datadog_checks/rethinkdb/operations.py index 00b96b70df38d..d92898814c788 100644 --- a/rethinkdb/datadog_checks/rethinkdb/operations.py +++ b/rethinkdb/datadog_checks/rethinkdb/operations.py @@ -28,7 +28,6 @@ TableStats, TableStatus, ) -from .version import parse_version # The usual entrypoint for building ReQL queries. r = rethinkdb.r @@ -38,22 +37,23 @@ system = r.db('rethinkdb') -def get_connected_server_version(conn, **kwargs): +def get_connected_server_raw_version(conn, **kwargs): # type: (rethinkdb.net.Connection, **Any) -> Optional[str] """ - Return the RethinkDB version used by the server at the other end of the connection. + Return the RethinkDB version used by the server at the other end of the connection, in raw string format. """ # See: https://rethinkdb.com/docs/system-tables/#server_status server = conn.server() # type: ConnectionServer server_status = system.table('server_status').get(server['id']).run(conn) # type: Optional[ServerStatus] if server_status is None: - # Only proxies don't have an entry in the `server_status` table. - if not server['proxy']: - raise RuntimeError('No `server_status` entry for server {!r}'.format(server['id'])) - return None + if server['proxy']: + # Proxies don't have an entry in the `server_status` table. + return None + else: # pragma: no cover + raise RuntimeError('Expected a `server_status` entry for server {!r}, got none'.format(server)) - return parse_version(server_status['process']['version']) + return server_status['process']['version'] def get_config_summary(conn, **kwargs): diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py index 377c81ee25771..202bfd89a6593 100644 --- a/rethinkdb/tests/test_rethinkdb.py +++ b/rethinkdb/tests/test_rethinkdb.py @@ -4,6 +4,7 @@ import copy from typing import Any, Iterator, List +import mock import pytest import rethinkdb @@ -186,74 +187,44 @@ def collect_metrics(self, conn): aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=service_check_tags) -@pytest.mark.skipif(not RAW_VERSION, reason='Requires RAW_VERSION to be set') @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') -def test_metadata_version(instance, datadog_agent): - # type: (Instance, DatadogAgentStub) -> None - check_id = 'test' - - check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.check_id = check_id - - check.check(instance) - - raw_version = RAW_VERSION - version, _, build = raw_version.partition('~') - major, minor, patch = version.split('.') - version_metadata = { - 'version.scheme': 'semver', - 'version.major': major, - 'version.minor': minor, - 'version.patch': patch, - 'version.raw': raw_version, - } - - datadog_agent.assert_metadata(check_id, version_metadata) - - -@pytest.mark.integration -@pytest.mark.parametrize('malformed_version_string', MALFORMED_VERSION_STRING_PARAMS) -def test_metadata_version_malformed(instance, aggregator, datadog_agent, malformed_version_string): - # type: (Instance, AggregatorStub, DatadogAgentStub, str) -> None - """ - Verify that check still runs to completion if version provided by RethinkDB is malformed. - """ - - class MockRethinkDBCheck(RethinkDBCheck): - def collect_connected_server_version(self, conn): - # type: (Any) -> str - return malformed_version_string - - check_id = 'test' - - check = MockRethinkDBCheck('rethinkdb', {}, [instance]) - check.check_id = check_id - - check.check(instance) - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK) - - datadog_agent.assert_metadata(check_id, {}) - - -@pytest.mark.integration -def test_metadata_version_failure(instance, aggregator, datadog_agent): - # type: (Instance, AggregatorStub, DatadogAgentStub) -> None - """ - Verify that check still runs to completion if it fails to retrieve the RethinkDB version. - """ - - class MockRethinkDBCheck(RethinkDBCheck): - def collect_connected_server_version(self, conn): - # type: (Any) -> str - raise ValueError('Oops!') - - check_id = 'test' - - check = MockRethinkDBCheck('rethinkdb', {}, [instance]) - check.check_id = check_id - - check.check(instance) - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK) - - datadog_agent.assert_metadata(check_id, {}) +class TestVersionMetadata: + VERSION_MOCK_TARGET = 'datadog_checks.rethinkdb.operations.get_connected_server_raw_version' + + def run_test(self, instance, datadog_agent, metadata): + # type: (Instance, DatadogAgentStub, dict) -> None + check_id = 'test' + check = RethinkDBCheck('rethinkdb', {}, [instance]) + check.check_id = check_id + check.check(instance) + datadog_agent.assert_metadata(check_id, metadata) + + @pytest.mark.skipif(not RAW_VERSION, reason='Requires RAW_VERSION to be set') + def test_success(self, instance, datadog_agent): + # type: (Instance, DatadogAgentStub) -> None + raw_version = RAW_VERSION + version, _, build = raw_version.partition('~') + major, minor, patch = version.split('.') + metadata = { + 'version.scheme': 'semver', + 'version.major': major, + 'version.minor': minor, + 'version.patch': patch, + 'version.raw': raw_version, + } + + self.run_test(instance, datadog_agent, metadata=metadata) + + @pytest.mark.integration + @pytest.mark.parametrize('malformed_version_string', MALFORMED_VERSION_STRING_PARAMS) + def test_malformed(self, instance, aggregator, datadog_agent, malformed_version_string): + # type: (Instance, AggregatorStub, DatadogAgentStub, str) -> None + with mock.patch(self.VERSION_MOCK_TARGET, return_value=malformed_version_string): + self.run_test(instance, datadog_agent, metadata={}) + + @pytest.mark.integration + def test_failure(self, instance, aggregator, datadog_agent): + # type: (Instance, AggregatorStub, DatadogAgentStub) -> None + with mock.patch(self.VERSION_MOCK_TARGET, side_effect=ValueError('Oops!')): + self.run_test(instance, datadog_agent, metadata={}) From d2b56a8e64f634db04d688fa2b303b62c8256b93 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Tue, 24 Mar 2020 12:46:14 +0100 Subject: [PATCH 138/147] Factorize integration tests --- rethinkdb/tests/assertions.py | 22 +++ rethinkdb/tests/test_integration.py | 159 +++++++++++++++++++ rethinkdb/tests/test_rethinkdb.py | 230 ---------------------------- 3 files changed, 181 insertions(+), 230 deletions(-) create mode 100644 rethinkdb/tests/test_integration.py delete mode 100644 rethinkdb/tests/test_rethinkdb.py diff --git a/rethinkdb/tests/assertions.py b/rethinkdb/tests/assertions.py index 35b798082d60d..6255d75c54b1e 100644 --- a/rethinkdb/tests/assertions.py +++ b/rethinkdb/tests/assertions.py @@ -3,7 +3,10 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from typing import Set +from datadog_checks.base import AgentCheck from datadog_checks.base.stubs.aggregator import AggregatorStub +from datadog_checks.base.types import ServiceCheckStatus +from datadog_checks.rethinkdb.types import Instance from .common import ( CLUSTER_STATISTICS_METRICS, @@ -22,12 +25,31 @@ SERVERS, TABLE_STATISTICS_METRICS, TABLE_STATUS_METRICS, + TABLE_STATUS_SERVICE_CHECKS, TABLE_STATUS_SHARDS_METRICS, TAGS, ) from .types import ServerName +def assert_service_checks(aggregator, instance, connect_status=AgentCheck.OK, disconnected_servers=None): + # type: (AggregatorStub, Instance, ServiceCheckStatus, Set[ServerName]) -> None + connect_tags = TAGS + ['host:{}'.format(instance['host']), 'port:{}'.format(instance['port'])] + aggregator.assert_service_check('rethinkdb.can_connect', connect_status, count=1, tags=connect_tags) + + for service_check in TABLE_STATUS_SERVICE_CHECKS: + count = 0 if connect_status == AgentCheck.CRITICAL else 1 + + if disconnected_servers: + status = AgentCheck.OK if service_check.endswith('ready_for_outdated_reads') else AgentCheck.WARNING + else: + status = AgentCheck.OK + + tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] + + aggregator.assert_service_check(service_check, status, count=count, tags=tags) + + def assert_metrics(aggregator, disconnected_servers=None): # type: (AggregatorStub, Set[ServerName]) -> None if disconnected_servers is None: diff --git a/rethinkdb/tests/test_integration.py b/rethinkdb/tests/test_integration.py new file mode 100644 index 0000000000000..d9aae8b272439 --- /dev/null +++ b/rethinkdb/tests/test_integration.py @@ -0,0 +1,159 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import copy +from typing import ContextManager, Set + +import mock +import pytest +import rethinkdb + +from datadog_checks.base.stubs.aggregator import AggregatorStub +from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub +from datadog_checks.base.types import ServiceCheckStatus +from datadog_checks.rethinkdb import RethinkDBCheck +from datadog_checks.rethinkdb.types import Instance + +from .assertions import assert_metrics, assert_service_checks +from .cluster import temporarily_disconnect_server +from .common import ( + HEROES_TABLE_SERVERS, + MALFORMED_VERSION_STRING_PARAMS, + RAW_VERSION, + SERVER_PORTS, + TLS_CLIENT_CERT, + TLS_SERVER, +) +from .types import ServerName + +try: + from contextlib import nullcontext # type: ignore +except ImportError: + from contextlib2 import nullcontext + + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +class TestCheck: + METRICS_COLLECTION_MOCK_TARGET = 'datadog_checks.rethinkdb.check.RethinkDBCheck.collect_metrics' + + def run_test( + self, aggregator, instance, check_context=None, connect_status=RethinkDBCheck.OK, disconnected_servers=None + ): + # type: (AggregatorStub, Instance, ContextManager[None], ServiceCheckStatus, Set[ServerName]) -> None + check = RethinkDBCheck('rethinkdb', {}, [instance]) + + with check_context if check_context is not None else nullcontext(): + check.check(instance) + + if connect_status == RethinkDBCheck.OK: + assert_metrics(aggregator, disconnected_servers=disconnected_servers) + aggregator.assert_all_metrics_covered() + + assert_service_checks( + aggregator, instance, connect_status=connect_status, disconnected_servers=disconnected_servers + ) + + def test_default(self, aggregator, instance): + # type: (AggregatorStub, Instance) -> None + self.run_test(aggregator, instance) + + def test_connect_proxy_ok(self, aggregator, instance): + # type: (AggregatorStub, Instance) -> None + instance = instance.copy() + instance['port'] = SERVER_PORTS['proxy'] + self.run_test(aggregator, instance) + + def test_connect_tls_ok(self, aggregator, instance): + # type: (AggregatorStub, Instance) -> None + instance = instance.copy() + instance['port'] = SERVER_PORTS[TLS_SERVER] + instance['tls_ca_cert'] = TLS_CLIENT_CERT + self.run_test(aggregator, instance) + + def test_no_credentials_ok(self, aggregator, instance): + # type: (AggregatorStub, Instance) -> None + instance = instance.copy() + + # RethinkDB will default to 'admin' w/o password. + # Should work assuming admin account in our test cluster doesn't have a password. + instance.pop('username') + instance.pop('password') + + self.run_test(aggregator, instance) + + @pytest.mark.parametrize('server_with_data', list(HEROES_TABLE_SERVERS)) + def test_disconnected_data_server_ok(self, aggregator, instance, server_with_data): + # type: (AggregatorStub, Instance, ServerName) -> None + # Simulate the scenario where one of the servers in the cluster is down, but not the one we're + # connecting to. + self.run_test( + aggregator, + instance, + check_context=temporarily_disconnect_server(server_with_data), + disconnected_servers={server_with_data}, + ) + + def test_connection_failure(self, aggregator, instance): + # type: (AggregatorStub, Instance) -> None + instance = copy.deepcopy(instance) + instance['host'] = 'doesnotexist' + self.run_test( + aggregator, + instance, + check_context=pytest.raises(rethinkdb.errors.ReqlDriverError), + connect_status=RethinkDBCheck.CRITICAL, + ) + + def test_metric_collection_failure(self, aggregator, instance): + # type: (AggregatorStub, Instance) -> None + class Failure(Exception): + pass + + with mock.patch(self.METRICS_COLLECTION_MOCK_TARGET, side_effect=Failure): + self.run_test( + aggregator, instance, check_context=pytest.raises(Failure), connect_status=RethinkDBCheck.CRITICAL + ) + + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +class TestVersionMetadata: + VERSION_MOCK_TARGET = 'datadog_checks.rethinkdb.operations.get_connected_server_raw_version' + + def run_test(self, instance, datadog_agent, metadata): + # type: (Instance, DatadogAgentStub, dict) -> None + check_id = 'test' + check = RethinkDBCheck('rethinkdb', {}, [instance]) + check.check_id = check_id + check.check(instance) + datadog_agent.assert_metadata(check_id, metadata) + + @pytest.mark.skipif(not RAW_VERSION, reason='Requires RAW_VERSION to be set') + def test_default(self, instance, datadog_agent): + # type: (Instance, DatadogAgentStub) -> None + raw_version = RAW_VERSION + version, _, build = raw_version.partition('~') + major, minor, patch = version.split('.') + metadata = { + 'version.scheme': 'semver', + 'version.major': major, + 'version.minor': minor, + 'version.patch': patch, + 'version.raw': raw_version, + } + + self.run_test(instance, datadog_agent, metadata=metadata) + + @pytest.mark.integration + @pytest.mark.parametrize('malformed_version_string', MALFORMED_VERSION_STRING_PARAMS) + def test_malformed(self, instance, aggregator, datadog_agent, malformed_version_string): + # type: (Instance, AggregatorStub, DatadogAgentStub, str) -> None + with mock.patch(self.VERSION_MOCK_TARGET, return_value=malformed_version_string): + self.run_test(instance, datadog_agent, metadata={}) + + @pytest.mark.integration + def test_failure(self, instance, aggregator, datadog_agent): + # type: (Instance, AggregatorStub, DatadogAgentStub) -> None + with mock.patch(self.VERSION_MOCK_TARGET, side_effect=ValueError('Oops!')): + self.run_test(instance, datadog_agent, metadata={}) diff --git a/rethinkdb/tests/test_rethinkdb.py b/rethinkdb/tests/test_rethinkdb.py deleted file mode 100644 index 202bfd89a6593..0000000000000 --- a/rethinkdb/tests/test_rethinkdb.py +++ /dev/null @@ -1,230 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -import copy -from typing import Any, Iterator, List - -import mock -import pytest -import rethinkdb - -from datadog_checks.base.stubs.aggregator import AggregatorStub -from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub -from datadog_checks.rethinkdb import RethinkDBCheck -from datadog_checks.rethinkdb.document_db.types import Metric -from datadog_checks.rethinkdb.types import Instance - -from .assertions import assert_metrics -from .cluster import temporarily_disconnect_server -from .common import ( - DATABASE, - HEROES_TABLE, - HEROES_TABLE_SERVERS, - MALFORMED_VERSION_STRING_PARAMS, - RAW_VERSION, - SERVER_PORTS, - TABLE_STATUS_SERVICE_CHECKS, - TAGS, - TLS_CLIENT_CERT, - TLS_SERVER, -) -from .types import ServerName - - -def _get_connect_service_check_tags(instance): - # type: (Instance) -> List[str] - return [ - 'host:{}'.format(instance['host']), - 'port:{}'.format(instance['port']), - ] - - -@pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') -def test_check(aggregator, instance): - # type: (AggregatorStub, Instance) -> None - check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.check(instance) - - assert_metrics(aggregator) - aggregator.assert_all_metrics_covered() - - service_check_tags = TAGS + _get_connect_service_check_tags(instance) - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) - - for service_check in TABLE_STATUS_SERVICE_CHECKS: - tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] - aggregator.assert_service_check(service_check, RethinkDBCheck.OK, count=1, tags=tags) - - -@pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') -def test_check_without_credentials_uses_admin(aggregator, instance): - # type: (AggregatorStub, Instance) -> None - """ - Verify that when no credentials are configured, the check still runs successfully provided - the admin account doesn't have a password set. - """ - instance = instance.copy() - - # Remove any credentials so that the Python driver uses the default credentials (i.e. admin account w/o password) - # when connecting to RethinkDB. - # See: https://rethinkdb.com/api/python/connect/#description - instance.pop('username') - instance.pop('password') - - check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.check(instance) - - assert_metrics(aggregator) - aggregator.assert_all_metrics_covered() - - service_check_tags = TAGS + _get_connect_service_check_tags(instance) - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) - - -@pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') -def test_check_connect_to_proxy(aggregator, instance): - # type: (AggregatorStub, Instance) -> None - instance = instance.copy() - instance['port'] = SERVER_PORTS['proxy'] - - check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.check(instance) - - assert_metrics(aggregator) - aggregator.assert_all_metrics_covered() - - service_check_tags = TAGS + _get_connect_service_check_tags(instance) - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) - - -@pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') -def test_check_connect_to_server_with_tls(aggregator, instance): - # type: (AggregatorStub, Instance) -> None - server = TLS_SERVER - - instance = instance.copy() - instance['port'] = SERVER_PORTS[server] - instance['tls_ca_cert'] = TLS_CLIENT_CERT - - check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.check(instance) - - assert_metrics(aggregator) - aggregator.assert_all_metrics_covered() - - service_check_tags = TAGS + _get_connect_service_check_tags(instance) - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) - - -@pytest.mark.integration -@pytest.mark.parametrize('server_with_data', list(HEROES_TABLE_SERVERS)) -@pytest.mark.usefixtures('dd_environment') -def test_check_with_disconnected_server(aggregator, instance, server_with_data): - # type: (AggregatorStub, Instance, ServerName) -> None - """ - Verify that the check still runs to completion and sends appropriate service checks if one of the - servers that holds data is disconnected. - """ - check = RethinkDBCheck('rethinkdb', {}, [instance]) - - with temporarily_disconnect_server(server_with_data): - check.check(instance) - - disconnected_servers = {server_with_data} - - assert_metrics(aggregator, disconnected_servers=disconnected_servers) - aggregator.assert_all_metrics_covered() - - service_check_tags = TAGS + _get_connect_service_check_tags(instance) - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.OK, count=1, tags=service_check_tags) - - table_status_tags = TAGS + ['table:{}'.format(HEROES_TABLE), 'database:{}'.format(DATABASE)] - - for service_check in TABLE_STATUS_SERVICE_CHECKS: - status = RethinkDBCheck.OK if service_check.endswith('ready_for_outdated_reads') else RethinkDBCheck.WARNING - aggregator.assert_service_check(service_check, status, count=1, tags=table_status_tags) - - -@pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') -def test_cannot_connect_unknown_host(aggregator, instance): - # type: (AggregatorStub, Instance) -> None - instance = copy.deepcopy(instance) - instance['host'] = 'doesnotexist' - - check = RethinkDBCheck('rethinkdb', {}, [instance]) - - with pytest.raises(rethinkdb.errors.ReqlDriverError): - check.check(instance) - - tags = TAGS + _get_connect_service_check_tags(instance) - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=tags) - - -@pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') -def test_connected_but_check_failed_unexpectedly(aggregator, instance): - # type: (AggregatorStub, Instance) -> None - class Failure(Exception): - pass - - class MockRethinkDBCheck(RethinkDBCheck): - def collect_metrics(self, conn): - # type: (Any) -> Iterator[Metric] - yield {'type': 'gauge', 'name': 'rethinkdb.some.metric', 'value': 42, 'tags': []} - raise Failure - - check = MockRethinkDBCheck('rethinkdb', {}, [instance]) - - with pytest.raises(Failure): - check.check(instance) - - service_check_tags = TAGS + _get_connect_service_check_tags(instance) - aggregator.assert_service_check('rethinkdb.can_connect', RethinkDBCheck.CRITICAL, count=1, tags=service_check_tags) - - -@pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') -class TestVersionMetadata: - VERSION_MOCK_TARGET = 'datadog_checks.rethinkdb.operations.get_connected_server_raw_version' - - def run_test(self, instance, datadog_agent, metadata): - # type: (Instance, DatadogAgentStub, dict) -> None - check_id = 'test' - check = RethinkDBCheck('rethinkdb', {}, [instance]) - check.check_id = check_id - check.check(instance) - datadog_agent.assert_metadata(check_id, metadata) - - @pytest.mark.skipif(not RAW_VERSION, reason='Requires RAW_VERSION to be set') - def test_success(self, instance, datadog_agent): - # type: (Instance, DatadogAgentStub) -> None - raw_version = RAW_VERSION - version, _, build = raw_version.partition('~') - major, minor, patch = version.split('.') - metadata = { - 'version.scheme': 'semver', - 'version.major': major, - 'version.minor': minor, - 'version.patch': patch, - 'version.raw': raw_version, - } - - self.run_test(instance, datadog_agent, metadata=metadata) - - @pytest.mark.integration - @pytest.mark.parametrize('malformed_version_string', MALFORMED_VERSION_STRING_PARAMS) - def test_malformed(self, instance, aggregator, datadog_agent, malformed_version_string): - # type: (Instance, AggregatorStub, DatadogAgentStub, str) -> None - with mock.patch(self.VERSION_MOCK_TARGET, return_value=malformed_version_string): - self.run_test(instance, datadog_agent, metadata={}) - - @pytest.mark.integration - def test_failure(self, instance, aggregator, datadog_agent): - # type: (Instance, AggregatorStub, DatadogAgentStub) -> None - with mock.patch(self.VERSION_MOCK_TARGET, side_effect=ValueError('Oops!')): - self.run_test(instance, datadog_agent, metadata={}) From 97db598a060cecaeaae191634231de060c20ad36 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 25 Mar 2020 10:30:49 +0100 Subject: [PATCH 139/147] Add password sanitization --- rethinkdb/datadog_checks/rethinkdb/check.py | 5 +++++ rethinkdb/setup.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index ae7a9413ac5a0..79ead804e0e42 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -26,7 +26,12 @@ class RethinkDBCheck(AgentCheck): def __init__(self, *args, **kwargs): # type: (*Any, **Any) -> None super(RethinkDBCheck, self).__init__(*args, **kwargs) + self.config = Config(cast(Instance, self.instance)) + + if self.config.password: + self.register_secret(self.config.password) + self.queries = ( queries.config_summary, queries.cluster_statistics, diff --git a/rethinkdb/setup.py b/rethinkdb/setup.py index f95b97d83b18e..fe36d1ac3761a 100644 --- a/rethinkdb/setup.py +++ b/rethinkdb/setup.py @@ -18,7 +18,7 @@ long_description = f.read() -CHECKS_BASE_REQ = 'datadog-checks-base>=11.0.0' +CHECKS_BASE_REQ = 'datadog-checks-base>=11.2.0' setup( From 3b65f86df4a961a31f83642777a63f87bee9b1f7 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Wed, 25 Mar 2020 17:05:33 +0100 Subject: [PATCH 140/147] Address feedback * Modifiers -> Transformers * Add docs on `DocumentQuery` parameters and usage. * Add and test an example script for `DocumentQuery`. * Drop hard requirement for a logger on `query.run()`. * Drop trace logs (too noisy to be debug logs). --- rethinkdb/datadog_checks/rethinkdb/check.py | 2 +- .../rethinkdb/document_db/_example.py | 44 +++++++ .../rethinkdb/document_db/query.py | 121 ++++++++++-------- .../rethinkdb/document_db/transformers.py | 29 +++++ .../rethinkdb/document_db/types.py | 10 +- .../rethinkdb/document_db/utils.py | 11 +- rethinkdb/datadog_checks/rethinkdb/queries.py | 30 ++--- .../tests/unit/document_db/test_query.py | 25 ++-- .../unit/document_db/test_transformers.py | 11 ++ .../tests/unit/document_db/test_utils.py | 89 +++++++------ .../tests/unit/test_system_jobs_metrics.py | 13 +- 11 files changed, 235 insertions(+), 150 deletions(-) create mode 100644 rethinkdb/datadog_checks/rethinkdb/document_db/_example.py create mode 100644 rethinkdb/datadog_checks/rethinkdb/document_db/transformers.py create mode 100644 rethinkdb/tests/unit/document_db/test_transformers.py diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index 79ead804e0e42..ba19267e56c5c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -78,7 +78,7 @@ def collect_metrics(self, conn): Collect metrics from the RethinkDB cluster we are connected to. """ for query in self.queries: - for metric in query.run(conn, config=self.config, logger=self.log): + for metric in query.run(conn=conn, config=self.config, log_debug=self.log.debug): yield metric def submit_metric(self, metric): diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/_example.py b/rethinkdb/datadog_checks/rethinkdb/document_db/_example.py new file mode 100644 index 0000000000000..6d446a609b01f --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/_example.py @@ -0,0 +1,44 @@ +from datadog_checks.rethinkdb.document_db import DocumentQuery, transformers + + +def make_fake_query(): + # type: () -> list + # These documents would typically come from calls to a database client library. + document = { + 'memory': {'total_mb': 1000}, + 'disk_used_bytes_mb': 2500, + 'cpus': [{'usage': 50}, {'usage': 10}], + 'threads_per_process': {'server': 12, 'worker': 4}, + } + + # You may construct these tags from data retrieved from the database. + tags = ['db:main'] + + # Return any number of document/tags pairs. + # Note: yield syntax is supported too, eg `yield (document, tags)`. + return [(document, tags)] + + +query = DocumentQuery( + source=make_fake_query, + name='system_usage', + prefix='system', + metrics=[ + {'type': 'gauge', 'path': 'memory.total_mb'}, + {'type': 'gauge', 'path': 'disk_used_bytes_mb'}, + {'type': 'gauge', 'path': 'cpus', 'name': 'cpus.total', 'transformer': transformers.length}, + ], + enumerations=[{'path': 'cpus', 'index_tag': 'cpu', 'metrics': [{'type': 'gauge', 'path': 'usage'}]}], + groups=[{'type': 'gauge', 'path': 'threads_per_process', 'key_tag': 'process'}], +) + + +assert list(query.run()) == [ + {'type': 'gauge', 'name': 'system.memory.total_mb', 'value': 1000, 'tags': ['db:main']}, + {'type': 'gauge', 'name': 'system.disk_used_bytes_mb', 'value': 2500, 'tags': ['db:main']}, + {'type': 'gauge', 'name': 'system.cpus.total', 'value': 2, 'tags': ['db:main']}, + {'type': 'gauge', 'name': 'system.cpus.usage', 'value': 50, 'tags': ['db:main', 'cpu:0']}, + {'type': 'gauge', 'name': 'system.cpus.usage', 'value': 10, 'tags': ['db:main', 'cpu:1']}, + {'type': 'gauge', 'name': 'system.threads_per_process', 'value': 12, 'tags': ['db:main', 'process:server']}, + {'type': 'gauge', 'name': 'system.threads_per_process', 'value': 4, 'tags': ['db:main', 'process:worker']}, +] diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py index c0aff2034529e..aad87b4d0127c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py @@ -1,23 +1,59 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Callable, Iterator, List, Mapping, Sequence, Tuple +from typing import Any, Callable, Iterable, Iterator, List, Mapping, Sequence, Tuple -from datadog_checks.base import AgentCheck -from datadog_checks.base.log import CheckLoggingAdapter - -from .types import Enumeration, Group, Metric, MetricSpec, Modifier -from .utils import dotted_join, lookup_dotted, to_time_elapsed +from .types import Enumeration, Group, Metric, MetricSpec +from .utils import dotted_join, lookup_dotted, no_op class DocumentQuery(object): """ - A helper for retrieving metrics from document-oriented ("JSON") databases. + A generic helper for retrieving metrics from document-oriented ("JSON") databases. + + Example + ------- + See: + https://github.com/DataDog/integrations-core/blob/master/rethinkdb/datadog_checks/rethinkdb/document_db/_example.py + + Parameters + ---------- + source: + A callable that returns an iterable of `(document, tags)` pairs. + * Should accept the same `**kwargs` than what will be passed to `.run()`. + * `tags` will be applied to all metrics built from the corresponding `document`. + * All documents should have the same structure. + name: + A verbose name for the query, for logging purposes. Example: `'memory_usage'`. + prefix: + Will be prepended to all metric names. Example: `'my_integration.memory'`. + metrics: + Each item in this list corresponds to a metric that will be submitted to Datadog. + * `type` (required): metric type. Example: `'gauge'`. + * `path` (required): dotted path to the value of interest in a `document`. Example: `'memory_usage.memory_mb'`. + * `name`: an explicit metric name. If not set, the `path` is used. Example: `'memory_consumption'`. + * `transformer`: a callable applied to metric values before submission. See `document_db.transformers` for + built-in transformers. + enumerations: + Each item in this list corresponds to a set of metrics built from items in a JSON array. + The name comes from the `enumerate()` Python built-in, as enumerations allow tagging by index in the array. + * `path` (required): dotted path to the array of interest in a `document`. + * `index_tag` (required): indexes will be attached as this tag. Example: `'cpu_position'`. + * `metrics` (required): a list of metrics -- same structure as the `metrics` parameter. One copy will be + submitted for each item in the array. The enumeration `path` is automatically prepended to each metric `path`. + groups: + Each item in this list corresponds to a metric built from a JSON object (mapping) that represents aggregated + results, such as those returned by a GROUP BY operation. One copy of the metric will be submitted for each + key/value item in the mapping. + Keys: + * `path` (required): dotted path to the mapping of interest in a `document`. + * `key_tag` (required): keys of the mapping will be submitted as this tag. Example: `'country'`. + * `value_type` (required): metric type of values in the mapping. Example: `'gauge'`. """ def __init__( self, - source, # type: Callable[..., Iterator[Tuple[Any, List[str]]]] + source, # type: Callable[..., Iterable[Tuple[Any, List[str]]]] name, # type: str prefix, # type: str metrics=None, # type: List[MetricSpec] @@ -31,16 +67,14 @@ def __init__( self.enumerations = [] if enumerations is None else enumerations self.groups = [] if groups is None else groups - def _make_metric_from_spec(self, document, spec, tags, logger): - # type: (Any, MetricSpec, List[str], CheckLoggingAdapter) -> Metric - logger.trace('make_metric_from_spec %r', spec) - + def _make_metric_from_spec(self, document, spec, tags): + # type: (Any, MetricSpec, List[str]) -> Metric path = spec['path'] name = spec.get('name', path) value = lookup_dotted(document, path=path) - if 'modifier' in spec and spec['modifier'] is not None: - value = self._modify(value, modifier=spec['modifier'], logger=logger) + if 'transformer' in spec and spec['transformer'] is not None: + value = spec['transformer'](value) if not isinstance(value, (int, float)): # pragma: no cover raise RuntimeError('Expected float or int, got {!r} of type {}', value, type(value)) @@ -49,70 +83,45 @@ def _make_metric_from_spec(self, document, spec, tags, logger): return {'type': spec['type'], 'name': name, 'value': value, 'tags': tags} - def _make_metrics_from_enumeration(self, document, enumeration, tags, logger): - # type: (Any, Enumeration, List[str], CheckLoggingAdapter) -> Iterator[Metric] - logger.trace('make_metrics_from_enumeration enumeration=%r', enumeration) - + def _make_metrics_from_enumeration(self, document, enumeration, tags): + # type: (Any, Enumeration, List[str]) -> Iterator[Metric] values = lookup_dotted(document, path=enumeration['path']) # type: Sequence for index, value in enumerate(values): item_tags = tags + ['{}:{}'.format(enumeration['index_tag'], index)] for spec in enumeration['metrics']: - spec = { - 'type': spec['type'], - 'name': dotted_join((enumeration['path'], spec['path'])), - 'path': spec['path'], - 'modifier': spec.get('modifier'), - } - yield self._make_metric_from_spec(value, spec, tags=item_tags, logger=logger) - - def _make_metrics_from_group(self, document, group, tags, logger): - # type: (Any, Group, List[str], CheckLoggingAdapter) -> Iterator[Metric] - logger.trace('make_metrics_from_group group=%r', group) + spec = spec.copy() + spec['name'] = dotted_join((enumeration['path'], spec['path'])) + yield self._make_metric_from_spec(value, spec, tags=item_tags) + def _make_metrics_from_group(self, document, group, tags): + # type: (Any, Group, List[str]) -> Iterator[Metric] mapping = lookup_dotted(document, path=group['path']) # type: Mapping for key in mapping: item_tags = tags + ['{}:{}'.format(group['key_tag'], key)] spec = { - 'type': group['value_metric_type'], + 'type': group['type'], 'name': group['path'], 'path': key, } # type: MetricSpec - yield self._make_metric_from_spec(mapping, spec, tags=item_tags, logger=logger) - - def _modify(self, value, modifier, logger): - # type: (Any, Modifier, CheckLoggingAdapter) -> float - logger.trace('modify value=%r modifier=%r', value, modifier) - - if modifier == 'total': - return len(value) - - if modifier == 'ok_warning': - return AgentCheck.OK if value else AgentCheck.WARNING - - if modifier == 'time_elapsed': - return to_time_elapsed(value) - - raise RuntimeError('Unknown modifier: {!r}'.format(modifier)) # pragma: no cover - - def run(self, *args, **kwargs): - # type: (*Any, **Any) -> Iterator[Metric] - logger = kwargs.pop('logger') # type: CheckLoggingAdapter + yield self._make_metric_from_spec(mapping, spec, tags=item_tags) - logger.debug('query_%s', self.name) + def run(self, log_debug=no_op, **kwargs): + # type: (Callable, **Any) -> Iterator[Metric] + log_debug('document_query %s', self.name) - for document, tags in self.source(*args, **kwargs): - logger.debug('%s %r', self.name, document) + for document, tags in self.source(**kwargs): + log_debug('%s %r', self.name, document) for spec in self.metrics: - yield self._make_metric_from_spec(document, spec, tags=tags, logger=logger) + yield self._make_metric_from_spec(document, spec, tags=tags) for enumeration in self.enumerations: - for metric in self._make_metrics_from_enumeration(document, enumeration, tags=tags, logger=logger): + for metric in self._make_metrics_from_enumeration(document, enumeration, tags=tags): yield metric for group in self.groups: - for metric in self._make_metrics_from_group(document, group, tags=tags, logger=logger): + for metric in self._make_metrics_from_group(document, group, tags=tags): yield metric diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/transformers.py b/rethinkdb/datadog_checks/rethinkdb/document_db/transformers.py new file mode 100644 index 0000000000000..791485dbe17fd --- /dev/null +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/transformers.py @@ -0,0 +1,29 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +""" +Built-in value transformers. +""" +import datetime as dt +from typing import Any, Sequence + +from datadog_checks.base import AgentCheck +from datadog_checks.base.types import ServiceCheckStatus +from datadog_checks.base.utils.db.utils import normalize_datetime + + +def length(value): + # type: (Sequence) -> int + return len(value) + + +def to_time_elapsed(datetime): + # type: (dt.datetime) -> float + datetime = normalize_datetime(datetime) + elapsed = dt.datetime.now(datetime.tzinfo) - datetime + return elapsed.total_seconds() + + +def ok_warning(value): + # type: (Any) -> ServiceCheckStatus + return AgentCheck.OK if value else AgentCheck.WARNING diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/types.py b/rethinkdb/datadog_checks/rethinkdb/document_db/types.py index e48aa74321de1..f617911a62bdd 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/types.py @@ -1,26 +1,22 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Callable, List, Literal, Optional, Sequence, TypedDict, Union +from typing import Any, Callable, List, Literal, Optional, TypedDict, Union MetricType = Literal['gauge', 'count', 'monotonic_count', 'rate', 'service_check'] Metric = TypedDict('Metric', {'type': MetricType, 'name': str, 'value': float, 'tags': List[str]}) -ModifierName = Literal['total', 'ok_warning', 'time_elapsed'] -TotalModifier = TypedDict('TotalModifier', {'name': Literal['total'], 'map': Callable[[Any], Sequence]}) -Modifier = Union[ModifierName, TotalModifier] - MetricSpec = TypedDict( 'MetricSpec', { 'type': MetricType, 'path': str, # Used as the default name. 'name': str, # An explicit name for the metric. - 'modifier': Optional[Modifier], + 'transformer': Optional[Callable[[Any], Union[int, float]]], }, total=False, ) Enumeration = TypedDict('Enumeration', {'path': str, 'index_tag': str, 'metrics': List[MetricSpec]}) -Group = TypedDict('Group', {'path': str, 'key_tag': str, 'value_metric_type': MetricType}) +Group = TypedDict('Group', {'type': MetricType, 'path': str, 'key_tag': str}) diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py index 11e5b34fd681d..a20bd38d86641 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py @@ -4,11 +4,8 @@ """ Miscellaneous utilities. """ -import datetime as dt from typing import Any, Mapping, Sequence -from datadog_checks.base.utils.db.utils import normalize_datetime - def lookup_dotted(dct, path): # type: (Mapping, str) -> Any @@ -44,8 +41,6 @@ def dotted_join(values): return '.'.join(filter(None, values)) -def to_time_elapsed(datetime): - # type: (dt.datetime) -> float - datetime = normalize_datetime(datetime) - elapsed = dt.datetime.now(datetime.tzinfo) - datetime - return elapsed.total_seconds() +def no_op(*args, **kwargs): + # type: (*Any, **Any) -> None + pass diff --git a/rethinkdb/datadog_checks/rethinkdb/queries.py b/rethinkdb/datadog_checks/rethinkdb/queries.py index c7675a79e74ec..fd0323fd70263 100644 --- a/rethinkdb/datadog_checks/rethinkdb/queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/queries.py @@ -2,7 +2,7 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) from . import operations -from .document_db import DocumentQuery +from .document_db import DocumentQuery, transformers # System configuration. @@ -13,8 +13,8 @@ prefix='rethinkdb.config', metrics=[{'type': 'gauge', 'path': 'servers'}, {'type': 'gauge', 'path': 'databases'}], groups=[ - {'path': 'tables_per_database', 'key_tag': 'database', 'value_metric_type': 'gauge'}, - {'path': 'secondary_indexes_per_table', 'key_tag': 'table', 'value_metric_type': 'gauge'}, + {'type': 'gauge', 'path': 'tables_per_database', 'key_tag': 'database'}, + {'type': 'gauge', 'path': 'secondary_indexes_per_table', 'key_tag': 'table'}, ], ) @@ -92,19 +92,19 @@ name='table_status', prefix='rethinkdb.table_status', metrics=[ - {'type': 'service_check', 'path': 'status.ready_for_outdated_reads', 'modifier': 'ok_warning'}, - {'type': 'service_check', 'path': 'status.ready_for_reads', 'modifier': 'ok_warning'}, - {'type': 'service_check', 'path': 'status.ready_for_writes', 'modifier': 'ok_warning'}, - {'type': 'service_check', 'path': 'status.all_replicas_ready', 'modifier': 'ok_warning'}, - {'type': 'gauge', 'path': 'shards', 'modifier': 'total'}, + {'type': 'service_check', 'path': 'status.ready_for_outdated_reads', 'transformer': transformers.ok_warning}, + {'type': 'service_check', 'path': 'status.ready_for_reads', 'transformer': transformers.ok_warning}, + {'type': 'service_check', 'path': 'status.ready_for_writes', 'transformer': transformers.ok_warning}, + {'type': 'service_check', 'path': 'status.all_replicas_ready', 'transformer': transformers.ok_warning}, + {'type': 'gauge', 'path': 'shards', 'transformer': transformers.length}, ], enumerations=[ { 'path': 'shards', 'index_tag': 'shard', 'metrics': [ - {'type': 'gauge', 'path': 'replicas', 'modifier': 'total'}, - {'type': 'gauge', 'path': 'primary_replicas', 'modifier': 'total'}, + {'type': 'gauge', 'path': 'replicas', 'transformer': transformers.length}, + {'type': 'gauge', 'path': 'primary_replicas', 'transformer': transformers.length}, ], } ], @@ -116,9 +116,9 @@ name='server_status', prefix='rethinkdb.server_status', metrics=[ - {'type': 'gauge', 'path': 'network.time_connected', 'modifier': 'time_elapsed'}, - {'type': 'gauge', 'path': 'network.connected_to', 'modifier': 'total'}, - {'type': 'gauge', 'path': 'process.time_started', 'modifier': 'time_elapsed'}, + {'type': 'gauge', 'path': 'network.time_connected', 'transformer': transformers.to_time_elapsed}, + {'type': 'gauge', 'path': 'network.connected_to', 'transformer': transformers.length}, + {'type': 'gauge', 'path': 'process.time_started', 'transformer': transformers.to_time_elapsed}, ], ) @@ -142,7 +142,7 @@ name='current_issues', prefix='rethinkdb.current_issues', groups=[ - {'path': 'issues', 'key_tag': 'issue_type', 'value_metric_type': 'gauge'}, - {'path': 'critical_issues', 'key_tag': 'issue_type', 'value_metric_type': 'gauge'}, + {'type': 'gauge', 'path': 'issues', 'key_tag': 'issue_type'}, + {'type': 'gauge', 'path': 'critical_issues', 'key_tag': 'issue_type'}, ], ) diff --git a/rethinkdb/tests/unit/document_db/test_query.py b/rethinkdb/tests/unit/document_db/test_query.py index 1826d4afdccd6..29fc40197426c 100644 --- a/rethinkdb/tests/unit/document_db/test_query.py +++ b/rethinkdb/tests/unit/document_db/test_query.py @@ -1,22 +1,17 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import logging from collections import OrderedDict from typing import Iterator, List, Tuple import pytest +from six import PY3 -from datadog_checks.rethinkdb.document_db.query import DocumentQuery +from datadog_checks.rethinkdb.document_db import DocumentQuery, transformers pytestmark = pytest.mark.unit -class MockLogger(logging.Logger): - def trace(self, *args, **kwargs): # type: ignore - pass # Called by queries. - - def test_document_query(): # type: () -> None """ @@ -59,18 +54,18 @@ def get_data_from_db(conn): metrics=[ {'type': 'gauge', 'path': 'sales.sales_per_day'}, {'type': 'monotonic_count', 'path': 'sales.sales_total'}, - {'type': 'gauge', 'path': 'locations', 'modifier': 'total'}, + {'type': 'gauge', 'path': 'locations', 'transformer': transformers.length}, ], # Metrics for each object in an array, tagged by the index in the array. enumerations=[ {'path': 'locations', 'index_tag': 'location_index', 'metrics': [{'type': 'gauge', 'path': 'stock'}]} ], # Metrics from the result of a groupby() operation (aggregation). - groups=[{'path': 'total_sales_per_location', 'key_tag': 'location', 'value_metric_type': 'gauge'}], + groups=[{'type': 'gauge', 'path': 'total_sales_per_location', 'key_tag': 'location'}], ) conn = {'server': 'example'} - metrics = list(query.run(conn, logger=MockLogger('test'))) + metrics = list(query.run(conn=conn)) assert metrics == [ # -- T-Shirt -- @@ -153,5 +148,13 @@ def get_data(): yield {}, [] query = DocumentQuery(source=get_data, name='test', prefix='dogs') - metrics = list(query.run(logger=MockLogger('test'))) + metrics = list(query.run()) assert metrics == [] + + +@pytest.mark.skipif( + not PY3, reason='Assertions fail randomly due to Python 2 dicts not being ordered (example should stay simple)' +) +def test_example(): + # type: () -> None + import datadog_checks.rethinkdb.document_db._example # noqa: F401 diff --git a/rethinkdb/tests/unit/document_db/test_transformers.py b/rethinkdb/tests/unit/document_db/test_transformers.py new file mode 100644 index 0000000000000..5b39de267e90e --- /dev/null +++ b/rethinkdb/tests/unit/document_db/test_transformers.py @@ -0,0 +1,11 @@ +import datetime as dt + +import pytz + +from datadog_checks.rethinkdb.document_db import transformers + + +def test_to_time_elapsed(): + # type: () -> None + one_day_seconds = 3600 * 24 + transformers.to_time_elapsed(dt.datetime.now(pytz.utc) - dt.timedelta(days=1)) == one_day_seconds diff --git a/rethinkdb/tests/unit/document_db/test_utils.py b/rethinkdb/tests/unit/document_db/test_utils.py index 7acc623901ff2..929e86fa07570 100644 --- a/rethinkdb/tests/unit/document_db/test_utils.py +++ b/rethinkdb/tests/unit/document_db/test_utils.py @@ -1,52 +1,59 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import datetime as dt +from typing import Any import pytest -import pytz -from datadog_checks.rethinkdb.document_db.utils import dotted_join, lookup_dotted, to_time_elapsed +from datadog_checks.rethinkdb.document_db.utils import dotted_join, lookup_dotted pytestmark = pytest.mark.unit -def test_dotted_join(): - # type: () -> None - assert dotted_join(()) == '' - assert dotted_join(('foo',)) == 'foo' - assert dotted_join(('foo', 'bar')) == 'foo.bar' - assert dotted_join(('foo', 'bar', 'baz')) == 'foo.bar.baz' - assert dotted_join(('foo', 'bar', '')) == 'foo.bar' - assert dotted_join(('foo', '', 'baz')) == 'foo.baz' - assert dotted_join(('', 'bar', 'baz')) == 'bar.baz' - - -def test_to_time_elapsed(): - # type: () -> None - one_day_seconds = 3600 * 24 - to_time_elapsed(dt.datetime.now(pytz.utc) - dt.timedelta(days=1)) == one_day_seconds - - -def test_lookup_dotted(): - # type: () -> None - assert lookup_dotted({}, '') == {} - assert lookup_dotted({'tables': 10}, 'tables') == 10 - assert lookup_dotted({'tables': {'reads_per_sec': 500}}, 'tables.reads_per_sec') == 500 - assert lookup_dotted({'tables': {'all': ['heroes']}}, 'tables.all') == ['heroes'] - - with pytest.raises(ValueError): - lookup_dotted([], 'test') # type: ignore - - with pytest.raises(ValueError): - lookup_dotted(True, 'test') # type: ignore - - with pytest.raises(ValueError): - lookup_dotted({'tables': 10}, 'tables.total') - - with pytest.raises(ValueError): - lookup_dotted({'tables': {'total': 10}}, 'tables.unknown') - +@pytest.mark.parametrize( + 'value, output', + [ + ((), ''), + (('foo',), 'foo'), + (('foo', 'bar'), 'foo.bar'), + (('foo', 'bar', 'baz'), 'foo.bar.baz'), + (('foo', 'bar', ''), 'foo.bar'), + (('foo', '', 'baz'), 'foo.baz'), + (('', 'bar', 'baz'), 'bar.baz'), + ], +) +def test_dotted_join(value, output): + # type: (tuple, str) -> None + assert dotted_join(value) == output + + +@pytest.mark.parametrize( + 'dct, path, output', + [ + ({}, '', {}), + ({'tables': 10}, 'tables', 10), + ({'tables': {'reads_per_sec': 500}}, 'tables.reads_per_sec', 500), + ({'tables': {'all': ['heroes']}}, 'tables.all', ['heroes']), + ({}, '', {}), + ], +) +def test_lookup_dotted(dct, path, output): + # type: (dict, str, Any) -> None + assert lookup_dotted(dct, path) == output + + +@pytest.mark.parametrize( + 'value, path', + [ + pytest.param([], 'test', id='root-not-a-mapping'), + pytest.param(True, 'test', id='root-not-a-mapping'), + pytest.param({'tables': 10}, 'tables.total', id='node-not-a-mapping'), + pytest.param({}, 'unknown', id='key-does-not-exist'), + pytest.param({'tables': {'total': 10}}, 'tables.unknown', id='key-does-not-exist'), + pytest.param({'tables.total': 10}, 'tables.total', id='dotted-key-not-supported'), + ], +) +def test_lookup_dotted_invalid(value, path): + # type: (Any, str) -> None with pytest.raises(ValueError): - # Dotted keys are not supported. - lookup_dotted({'tables.total': 10}, 'tables.total') + lookup_dotted(value, path) diff --git a/rethinkdb/tests/unit/test_system_jobs_metrics.py b/rethinkdb/tests/unit/test_system_jobs_metrics.py index 1c80c336d8506..6db4ccc47c8ea 100644 --- a/rethinkdb/tests/unit/test_system_jobs_metrics.py +++ b/rethinkdb/tests/unit/test_system_jobs_metrics.py @@ -1,8 +1,6 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import logging - import mock import pytest @@ -13,11 +11,6 @@ pytestmark = pytest.mark.unit -class MockLogger(logging.Logger): - def trace(self, *args, **kwargs): # type: ignore - pass # Called by queries. - - def test_jobs_metrics(): # type: () -> None """ @@ -92,9 +85,7 @@ def test_jobs_metrics(): conn = mock.Mock() with mock.patch('rethinkdb.ast.RqlQuery.run') as run: run.return_value = mock_rows - metrics = list( - queries.system_jobs.run(conn, config=Config({'min_collection_interval': 5}), logger=MockLogger('test')) - ) + metrics = list(queries.system_jobs.run(conn=conn, config=Config({'min_collection_interval': 5}))) assert metrics == [ # short request-response `query` job ignored @@ -145,4 +136,4 @@ def test_unknown_job(): with mock.patch('rethinkdb.ast.RqlQuery.run') as run: run.return_value = [mock_unknown_job_row] with pytest.raises(RuntimeError): - list(queries.system_jobs.run(conn, config=Config(), logger=MockLogger('test'))) + list(queries.system_jobs.run(conn=conn, config=Config())) From b472756e69f5fa6f1d43a1fa9223f207e29055e3 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 26 Mar 2020 11:48:02 +0100 Subject: [PATCH 141/147] Refactor passing of logger to queries --- rethinkdb/datadog_checks/rethinkdb/check.py | 2 +- .../rethinkdb/document_db/_example.py | 3 +++ .../rethinkdb/document_db/query.py | 17 +++++++++++------ .../rethinkdb/document_db/utils.py | 4 ++++ 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index ba19267e56c5c..c4f1fcd2eef1a 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -78,7 +78,7 @@ def collect_metrics(self, conn): Collect metrics from the RethinkDB cluster we are connected to. """ for query in self.queries: - for metric in query.run(conn=conn, config=self.config, log_debug=self.log.debug): + for metric in query.run(check=self, conn=conn, config=self.config): yield metric def submit_metric(self, metric): diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/_example.py b/rethinkdb/datadog_checks/rethinkdb/document_db/_example.py index 6d446a609b01f..cce4088b981e7 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/_example.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/_example.py @@ -1,3 +1,6 @@ +# (C) Datadog, Inc. 2020-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) from datadog_checks.rethinkdb.document_db import DocumentQuery, transformers diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py index aad87b4d0127c..5c82ae3e58ffe 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py @@ -1,10 +1,13 @@ # (C) Datadog, Inc. 2020-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Any, Callable, Iterable, Iterator, List, Mapping, Sequence, Tuple +import logging +from typing import Any, Callable, Iterable, Iterator, List, Mapping, Sequence, Tuple, Union + +from datadog_checks.base import AgentCheck from .types import Enumeration, Group, Metric, MetricSpec -from .utils import dotted_join, lookup_dotted, no_op +from .utils import dotted_join, lookup_dotted, null_logger class DocumentQuery(object): @@ -108,12 +111,14 @@ def _make_metrics_from_group(self, document, group, tags): } # type: MetricSpec yield self._make_metric_from_spec(mapping, spec, tags=item_tags) - def run(self, log_debug=no_op, **kwargs): - # type: (Callable, **Any) -> Iterator[Metric] - log_debug('document_query %s', self.name) + def run(self, check=None, **kwargs): + # type: (AgentCheck, **Any) -> Iterator[Metric] + logger = check.log if check is not None else null_logger # type: Union[logging.Logger, logging.LoggerAdapter] + + logger.debug('document_query %s', self.name) for document, tags in self.source(**kwargs): - log_debug('%s %r', self.name, document) + logger.debug('%s %r', self.name, document) for spec in self.metrics: yield self._make_metric_from_spec(document, spec, tags=tags) diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py index a20bd38d86641..0c35c4d7b069b 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/utils.py @@ -4,8 +4,12 @@ """ Miscellaneous utilities. """ +import logging from typing import Any, Mapping, Sequence +null_logger = logging.getLogger('null') +null_logger.addHandler(logging.NullHandler()) + def lookup_dotted(dct, path): # type: (Mapping, str) -> Any From f03beb0714b3483412d602c1f13f0ebbf37a2195 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 26 Mar 2020 11:57:07 +0100 Subject: [PATCH 142/147] Drop query duration metric --- rethinkdb/datadog_checks/rethinkdb/config.py | 11 ----------- rethinkdb/datadog_checks/rethinkdb/operations.py | 9 ++++----- rethinkdb/datadog_checks/rethinkdb/types.py | 1 - rethinkdb/tests/unit/test_config.py | 9 --------- rethinkdb/tests/unit/test_system_jobs_metrics.py | 10 ++-------- 5 files changed, 6 insertions(+), 34 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/config.py b/rethinkdb/datadog_checks/rethinkdb/config.py index 8442944e85df1..e2b51ab160716 100644 --- a/rethinkdb/datadog_checks/rethinkdb/config.py +++ b/rethinkdb/datadog_checks/rethinkdb/config.py @@ -26,7 +26,6 @@ def __init__(self, instance=None): password = instance.get('password') tls_ca_cert = instance.get('tls_ca_cert') tags = instance.get('tags', []) - min_collection_interval = instance.get('min_collection_interval', 15) if not isinstance(host, str): raise ConfigurationError('host {!r} must be a string (got {!r})'.format(host, type(host))) @@ -42,15 +41,6 @@ def __init__(self, instance=None): if not isinstance(tags, list): raise ConfigurationError('tags {!r} must be a list (got {!r})'.format(tags, type(tags))) - try: - min_collection_interval = float(min_collection_interval) - except (ValueError, TypeError): - raise ConfigurationError( - 'min_collection_interval {!r} must be convertible to a number (got {!r})'.format( - min_collection_interval, type(min_collection_interval) - ) - ) - self.host = host # type: str self.port = port # type: int self.user = user # type: Optional[str] @@ -58,4 +48,3 @@ def __init__(self, instance=None): self.tls_ca_cert = tls_ca_cert # type: Optional[str] self.tags = tags # type: List[str] self.service_check_tags = ('host:{}'.format(self.host), 'port:{}'.format(self.port)) + tuple(self.tags) - self.min_collection_interval = min_collection_interval # type: float diff --git a/rethinkdb/datadog_checks/rethinkdb/operations.py b/rethinkdb/datadog_checks/rethinkdb/operations.py index d92898814c788..c8c218d759a59 100644 --- a/rethinkdb/datadog_checks/rethinkdb/operations.py +++ b/rethinkdb/datadog_checks/rethinkdb/operations.py @@ -244,11 +244,10 @@ def get_system_jobs(conn, config, **kwargs): # Follow job types listed on: https://rethinkdb.com/docs/system-jobs/#document-schema if job['type'] == 'query': - # NOTE: we can only consistently collect metrics about queries that span more than an Agent collection - # interval. (There will be many short-lived queries within two checks that we can't capture.) - # Here, this means only changefeed queries and abnormally long request-response queries will pass through. - if job['duration_sec'] < config.min_collection_interval: - continue + # A query job only exists while the query is running, and its `duration` is unstable (it changes depending + # on when the check is executed), so it doesn't make sense to submit metrics from these documents. + # So let's skip them. (Query duration information should come from a persistent source, eg slow logs.) + continue elif job['type'] == 'disk_compaction': # Ongoing task on each server. Duration is `null` and `info` is empty, so nothing interesting there. continue diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index aef6b6080ab73..6af0014a9e1ee 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -17,7 +17,6 @@ 'username': str, 'password': str, 'tls_ca_cert': str, - 'min_collection_interval': Union[int, float], 'tags': List[str], }, total=False, diff --git a/rethinkdb/tests/unit/test_config.py b/rethinkdb/tests/unit/test_config.py index fe685b513d273..f2605a42497dd 100644 --- a/rethinkdb/tests/unit/test_config.py +++ b/rethinkdb/tests/unit/test_config.py @@ -33,7 +33,6 @@ def test_config(port_28016, min_collection_interval_10): 'password': 's3kr3t', 'tls_ca_cert': '/path/to/client.cert', 'tags': ['env:testing'], - 'min_collection_interval': min_collection_interval_10, } # type: Instance config = Config(instance) @@ -42,7 +41,6 @@ def test_config(port_28016, min_collection_interval_10): assert config.user == 'datadog-agent' assert config.tls_ca_cert == '/path/to/client.cert' assert config.tags == ['env:testing'] - assert config.min_collection_interval == 10 @pytest.mark.parametrize('value', [42, True, object()]) @@ -57,10 +55,3 @@ def test_invalid_port(value): # type: (Any) -> None with pytest.raises(ConfigurationError): Config(instance={'port': value}) - - -@pytest.mark.parametrize('value', ['not-a-number', object()]) -def test_invalid_min_collection_interval(value): - # type: (Any) -> None - with pytest.raises(ConfigurationError): - Config(instance={'min_collection_interval': value}) diff --git a/rethinkdb/tests/unit/test_system_jobs_metrics.py b/rethinkdb/tests/unit/test_system_jobs_metrics.py index 6db4ccc47c8ea..0fa4f28a4db28 100644 --- a/rethinkdb/tests/unit/test_system_jobs_metrics.py +++ b/rethinkdb/tests/unit/test_system_jobs_metrics.py @@ -85,16 +85,10 @@ def test_jobs_metrics(): conn = mock.Mock() with mock.patch('rethinkdb.ast.RqlQuery.run') as run: run.return_value = mock_rows - metrics = list(queries.system_jobs.run(conn=conn, config=Config({'min_collection_interval': 5}))) + metrics = list(queries.system_jobs.run(conn=conn, config=Config())) assert metrics == [ - # short request-response `query` job ignored - { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.duration_sec', - 'value': 10, - 'tags': ['job_type:query', 'server:server1'], - }, + # `query` jobs ignored # `disk_compaction` job ignored { 'type': 'gauge', From b9d3e92184424c188b06db6d4a104ed2b963715a Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 26 Mar 2020 12:13:04 +0100 Subject: [PATCH 143/147] Address feedback --- rethinkdb/datadog_checks/rethinkdb/check.py | 2 +- rethinkdb/datadog_checks/rethinkdb/document_db/query.py | 9 ++++----- rethinkdb/datadog_checks/rethinkdb/types.py | 9 +-------- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index c4f1fcd2eef1a..c6033eeefe84f 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -78,7 +78,7 @@ def collect_metrics(self, conn): Collect metrics from the RethinkDB cluster we are connected to. """ for query in self.queries: - for metric in query.run(check=self, conn=conn, config=self.config): + for metric in query.run(logger=self.log, conn=conn, config=self.config): yield metric def submit_metric(self, metric): diff --git a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py index 5c82ae3e58ffe..dc76b932f3580 100644 --- a/rethinkdb/datadog_checks/rethinkdb/document_db/query.py +++ b/rethinkdb/datadog_checks/rethinkdb/document_db/query.py @@ -4,8 +4,6 @@ import logging from typing import Any, Callable, Iterable, Iterator, List, Mapping, Sequence, Tuple, Union -from datadog_checks.base import AgentCheck - from .types import Enumeration, Group, Metric, MetricSpec from .utils import dotted_join, lookup_dotted, null_logger @@ -111,9 +109,10 @@ def _make_metrics_from_group(self, document, group, tags): } # type: MetricSpec yield self._make_metric_from_spec(mapping, spec, tags=item_tags) - def run(self, check=None, **kwargs): - # type: (AgentCheck, **Any) -> Iterator[Metric] - logger = check.log if check is not None else null_logger # type: Union[logging.Logger, logging.LoggerAdapter] + def run(self, logger=None, **kwargs): + # type: (Union[logging.Logger, logging.LoggerAdapter], **Any) -> Iterator[Metric] + if logger is None: + logger = null_logger # For convenience in unit tests and example scripts. logger.debug('document_query %s', self.name) diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index 6af0014a9e1ee..ff95d6f86c91c 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -11,14 +11,7 @@ Instance = TypedDict( 'Instance', - { - 'host': str, - 'port': int, - 'username': str, - 'password': str, - 'tls_ca_cert': str, - 'tags': List[str], - }, + {'host': str, 'port': int, 'username': str, 'password': str, 'tls_ca_cert': str, 'tags': List[str]}, total=False, ) From 7d962c772464bc9249e34debe09efc98ecae99ed Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 26 Mar 2020 15:07:20 +0100 Subject: [PATCH 144/147] Drop detailed jobs metrics in favor of jobs summary --- rethinkdb/datadog_checks/rethinkdb/check.py | 2 +- .../datadog_checks/rethinkdb/operations.py | 45 +----- rethinkdb/datadog_checks/rethinkdb/queries.py | 10 +- rethinkdb/datadog_checks/rethinkdb/types.py | 61 +------- rethinkdb/metadata.csv | 2 +- rethinkdb/tests/assertions.py | 22 ++- rethinkdb/tests/common.py | 3 +- rethinkdb/tests/test_integration.py | 6 +- .../tests/unit/test_system_jobs_metrics.py | 133 ------------------ 9 files changed, 39 insertions(+), 245 deletions(-) delete mode 100644 rethinkdb/tests/unit/test_system_jobs_metrics.py diff --git a/rethinkdb/datadog_checks/rethinkdb/check.py b/rethinkdb/datadog_checks/rethinkdb/check.py index c6033eeefe84f..5656857539625 100644 --- a/rethinkdb/datadog_checks/rethinkdb/check.py +++ b/rethinkdb/datadog_checks/rethinkdb/check.py @@ -40,7 +40,7 @@ def __init__(self, *args, **kwargs): queries.replica_statistics, queries.table_statuses, queries.server_statuses, - queries.system_jobs, + queries.jobs_summary, queries.current_issues_summary, ) # type: Sequence[DocumentQuery] diff --git a/rethinkdb/datadog_checks/rethinkdb/operations.py b/rethinkdb/datadog_checks/rethinkdb/operations.py index c8c218d759a59..cccb21a2cc840 100644 --- a/rethinkdb/datadog_checks/rethinkdb/operations.py +++ b/rethinkdb/datadog_checks/rethinkdb/operations.py @@ -17,7 +17,7 @@ ConfigSummary, ConnectionServer, CurrentIssuesSummary, - Job, + JobSummary, JoinRow, ReplicaStats, Server, @@ -232,46 +232,13 @@ def get_server_statuses(conn, **kwargs): yield server_status, tags -def get_system_jobs(conn, config, **kwargs): - # type: (rethinkdb.net.Connection, Config, **Any) -> Iterator[Tuple[Job, List[str]]] +def get_jobs_summary(conn, config, **kwargs): + # type: (rethinkdb.net.Connection, Config, **Any) -> Iterator[Tuple[JobSummary, List[str]]] """ - Retrieve all the currently running system jobs. + Retrieve a summary of system jobs currently running in the cluster. """ - for job in system.table('jobs').run(conn): # type: Job - tags = ['job_type:{}'.format(job['type'])] - tags.extend('server:{}'.format(server) for server in job['servers']) - - # Follow job types listed on: https://rethinkdb.com/docs/system-jobs/#document-schema - - if job['type'] == 'query': - # A query job only exists while the query is running, and its `duration` is unstable (it changes depending - # on when the check is executed), so it doesn't make sense to submit metrics from these documents. - # So let's skip them. (Query duration information should come from a persistent source, eg slow logs.) - continue - elif job['type'] == 'disk_compaction': - # Ongoing task on each server. Duration is `null` and `info` is empty, so nothing interesting there. - continue - elif job['type'] == 'index_construction': - tags.extend( - [ - 'database:{}'.format(job['info']['db']), - 'table:{}'.format(job['info']['table']), - 'index:{}'.format(job['info']['index']), - ] - ) - elif job['type'] == 'backfill': - tags.extend( - [ - 'database:{}'.format(job['info']['db']), - 'destination_server:{}'.format(job['info']['destination_server']), - 'source_server:{}'.format(job['info']['source_server']), - 'table:{}'.format(job['info']['table']), - ] - ) - else: - raise RuntimeError('Unknown job type: {!r}'.format(job['type'])) - - yield job, tags + jobs_per_type = system.table('jobs').group('type').count().run(conn) + yield {'jobs': jobs_per_type}, [] def get_current_issues_summary(conn, **kwargs): diff --git a/rethinkdb/datadog_checks/rethinkdb/queries.py b/rethinkdb/datadog_checks/rethinkdb/queries.py index fd0323fd70263..d449cd8f40f95 100644 --- a/rethinkdb/datadog_checks/rethinkdb/queries.py +++ b/rethinkdb/datadog_checks/rethinkdb/queries.py @@ -126,11 +126,11 @@ # System jobs. # See: https://rethinkdb.com/docs/system-jobs/ -system_jobs = DocumentQuery( - source=operations.get_system_jobs, - name='system_jobs', - prefix='rethinkdb.jobs', - metrics=[{'type': 'gauge', 'path': 'duration_sec'}], +jobs_summary = DocumentQuery( + source=operations.get_jobs_summary, + name='jobs', + prefix='rethinkdb.system_jobs', + groups=[{'type': 'gauge', 'path': 'jobs', 'key_tag': 'job_type'}], ) diff --git a/rethinkdb/datadog_checks/rethinkdb/types.py b/rethinkdb/datadog_checks/rethinkdb/types.py index ff95d6f86c91c..4e75cb0b42324 100644 --- a/rethinkdb/datadog_checks/rethinkdb/types.py +++ b/rethinkdb/datadog_checks/rethinkdb/types.py @@ -5,7 +5,7 @@ Declarations used for type checking our code (e.g. manipulation of JSON documents returned by RethinkDB). """ import datetime as dt -from typing import Any, List, Literal, Mapping, Tuple, TypedDict, Union +from typing import Any, List, Literal, Mapping, Tuple, TypedDict # Check interfaces. @@ -136,64 +136,9 @@ ServerStatus = TypedDict('ServerStatus', {'id': str, 'name': str, 'network': ServerNetwork, 'process': ServerProcess}) -# System jobs documents. -# See: https://rethinkdb.com/docs/system-jobs/ +# System jobs. -QueryInfo = TypedDict('QueryInfo', {}) - -QueryJob = TypedDict( - 'QueryJob', - { - 'type': Literal['query'], - 'id': Tuple[Literal['query'], str], - 'duration_sec': float, - 'info': QueryInfo, - 'servers': List[str], - }, -) - -DiskCompactionInfo = TypedDict('DiskCompactionInfo', {}) - -DiskCompactionJob = TypedDict( - 'DiskCompactionJob', - { - 'type': Literal['disk_compaction'], - 'id': Tuple[Literal['disk_compaction'], str], - 'duration_sec': None, - 'info': DiskCompactionInfo, - 'servers': List[str], - }, -) - -IndexConstructionInfo = TypedDict('IndexConstructionInfo', {'db': str, 'table': str, 'index': str, 'progress': int}) - -IndexConstructionJob = TypedDict( - 'IndexConstructionJob', - { - 'type': Literal['index_construction'], - 'id': Tuple[Literal['index_construction'], str], - 'duration_sec': float, - 'info': IndexConstructionInfo, - 'servers': List[str], - }, -) - -BackfillInfo = TypedDict( - 'BackfillInfo', {'db': str, 'destination_server': str, 'source_server': str, 'table': str, 'progress': int} -) - -BackfillJob = TypedDict( - 'BackfillJob', - { - 'type': Literal['backfill'], - 'id': Tuple[Literal['backfill'], str], - 'duration_sec': float, - 'info': BackfillInfo, - 'servers': List[str], - }, -) - -Job = Union[IndexConstructionJob, BackfillJob] +JobSummary = TypedDict('JobSummary', {'jobs': Mapping[str, int]}) # System current issues. diff --git a/rethinkdb/metadata.csv b/rethinkdb/metadata.csv index 658f19c83378d..a8ea1ca4a8f10 100644 --- a/rethinkdb/metadata.csv +++ b/rethinkdb/metadata.csv @@ -35,6 +35,6 @@ rethinkdb.table_status.shards.primary_replicas,gauge,,node,,Total number of prim rethinkdb.server_status.network.time_connected,gauge,,second,,Current total time a server has been connected to the network.,0,rethinkdb,Server status network time connected rethinkdb.server_status.network.connected_to,gauge,,node,,Number of other RethinkDB servers a server is currently connected to.,0,rethinkdb,Server status network connected to rethinkdb.server_status.process.time_started,gauge,,second,,Time when the RethinkDB server process started.,0,rethinkdb,Server status process time started -rethinkdb.jobs.duration,gauge,,second,,"Duration of a currently running system job, tagged with the job_type.",0,rethinkdb,Jobs duration +rethinkdb.system_jobs.jobs,gauge,,job,,"Total number of currently running system jobs, tagged by `job_type`.",0,rethinkdb,Jobs rethinkdb.current_issues.issues,gauge,,,,Total number of current issues of a given issue_type.,0,rethinkdb,Current issues issues rethinkdb.current_issues.critical_issues,gauge,,,,Total number of critical current issues of a given issue_type.,0,rethinkdb,Current issues critical issues diff --git a/rethinkdb/tests/assertions.py b/rethinkdb/tests/assertions.py index 6255d75c54b1e..9c37426dce45f 100644 --- a/rethinkdb/tests/assertions.py +++ b/rethinkdb/tests/assertions.py @@ -18,6 +18,7 @@ HEROES_TABLE_PRIMARY_REPLICA, HEROES_TABLE_REPLICAS_BY_SHARD, HEROES_TABLE_SERVERS, + IS_RETHINKDB_2_3, REPLICA_STATISTICS_METRICS, SERVER_STATISTICS_METRICS, SERVER_STATUS_METRICS, @@ -50,8 +51,8 @@ def assert_service_checks(aggregator, instance, connect_status=AgentCheck.OK, di aggregator.assert_service_check(service_check, status, count=count, tags=tags) -def assert_metrics(aggregator, disconnected_servers=None): - # type: (AggregatorStub, Set[ServerName]) -> None +def assert_metrics(aggregator, is_proxy, disconnected_servers=None): + # type: (AggregatorStub, bool, Set[ServerName]) -> None if disconnected_servers is None: disconnected_servers = set() @@ -60,10 +61,7 @@ def assert_metrics(aggregator, disconnected_servers=None): _assert_table_status_metrics(aggregator) _assert_server_status_metrics(aggregator, disconnected_servers=disconnected_servers) _assert_current_issues_metrics(aggregator, disconnected_servers=disconnected_servers) - - # NOTE: system jobs metrics are not asserted here because they are only emitted when the cluster is - # changing (eg. an index is being created, or data is being rebalanced across servers), which is hard to - # test without introducing flakiness. + _assert_jobs_metrics(aggregator, is_proxy=is_proxy) def _assert_config_metrics(aggregator, disconnected_servers): @@ -138,3 +136,15 @@ def _assert_current_issues_metrics(aggregator, disconnected_servers): aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags) else: aggregator.assert_metric(metric, metric_type=typ, count=0) + + +def _assert_jobs_metrics(aggregator, is_proxy): + # type: (AggregatorStub, bool) -> None + if is_proxy and IS_RETHINKDB_2_3: + # For some reason, queries issued to retrieve metrics via a proxy server are not included + # in system jobs under RethinkDB 2.3. + return + + aggregator.assert_metric( + 'rethinkdb.system_jobs.jobs', metric_type=AggregatorStub.GAUGE, value=1, count=1, tags=TAGS + ['job_type:query'] + ) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index a190f63c037e1..cb4d7bab8bf7b 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -15,6 +15,7 @@ IMAGE = os.environ.get('RETHINKDB_IMAGE', '') RAW_VERSION = os.environ.get('RETHINKDB_RAW_VERSION', '') +IS_RETHINKDB_2_3 = RAW_VERSION.startswith('2.3.') HOST = get_docker_hostname() @@ -34,7 +35,7 @@ # Users. -if RAW_VERSION.startswith('2.3.'): +if IS_RETHINKDB_2_3: # In RethinkDB 2.3.x, granting permissions onto `rethinkdb` database to non-admin users is not supported. # So we must use the admin account. # See: https://github.com/rethinkdb/rethinkdb/issues/5692 diff --git a/rethinkdb/tests/test_integration.py b/rethinkdb/tests/test_integration.py index d9aae8b272439..2be221ddd6373 100644 --- a/rethinkdb/tests/test_integration.py +++ b/rethinkdb/tests/test_integration.py @@ -47,7 +47,11 @@ def run_test( check.check(instance) if connect_status == RethinkDBCheck.OK: - assert_metrics(aggregator, disconnected_servers=disconnected_servers) + assert_metrics( + aggregator, + is_proxy=instance['port'] == SERVER_PORTS['proxy'], + disconnected_servers=disconnected_servers, + ) aggregator.assert_all_metrics_covered() assert_service_checks( diff --git a/rethinkdb/tests/unit/test_system_jobs_metrics.py b/rethinkdb/tests/unit/test_system_jobs_metrics.py deleted file mode 100644 index 0fa4f28a4db28..0000000000000 --- a/rethinkdb/tests/unit/test_system_jobs_metrics.py +++ /dev/null @@ -1,133 +0,0 @@ -# (C) Datadog, Inc. 2020-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) -import mock -import pytest - -from datadog_checks.rethinkdb import queries -from datadog_checks.rethinkdb.config import Config -from datadog_checks.rethinkdb.types import BackfillJob, DiskCompactionJob, IndexConstructionJob, QueryJob - -pytestmark = pytest.mark.unit - - -def test_jobs_metrics(): - # type: () -> None - """ - Verify jobs metrics submitted by RethinkDB are processed correctly. - - We provide unit tests for these metrics because testing them in a live environment is tricky. - - For example: - * Backfill jobs can only be seen by us when large amounts of data is rebalanced between servers, e.g. - when a new server is added to the cluster, or an existing server is shut down. - * Index construction jobs can only be seen by us when a secondary index is added to a relatively large table. - * Query jobs can only be seen by us when an external client issues queries to the cluster. - * Etc. - """ - - mock_request_response_query_job_row = { - 'type': 'query', - 'id': ('query', 'abcd1234'), - 'duration_sec': 0.12, - 'info': {}, - 'servers': ['server0'], - } # type: QueryJob - - mock_changefeed_query_job_row = { - 'type': 'query', - 'id': ('query', 'abcd1234'), - 'duration_sec': 10, - 'info': {}, - 'servers': ['server1'], - } # type: QueryJob - - mock_disk_compaction_row = { - 'type': 'disk_compaction', - 'id': ('disk_compaction', 'zero'), - 'duration_sec': None, - 'info': {}, - 'servers': ['server0'], - } # type: DiskCompactionJob - - mock_backfill_job_row = { - # See: https://rethinkdb.com/docs/system-jobs/#backfill - 'type': 'backfill', - 'id': ('backfill', 'abcd1234'), - 'duration_sec': 0.42, - 'info': { - 'db': 'doghouse', - 'table': 'heroes', - 'destination_server': 'server2', - 'source_server': 'server0', - 'progress': 42, - }, - 'servers': ['server0', 'server2'], - } # type: BackfillJob - - mock_index_construction_job_row = { - # See: https://rethinkdb.com/docs/system-jobs/#index_construction - 'type': 'index_construction', - 'id': ('index_construction', 'abcd1234'), - 'duration_sec': 0.24, - 'info': {'db': 'doghouse', 'table': 'heroes', 'index': 'appearances_count', 'progress': 42}, - 'servers': ['server1'], - } # type: IndexConstructionJob - - mock_rows = [ - mock_request_response_query_job_row, - mock_changefeed_query_job_row, - mock_disk_compaction_row, - mock_backfill_job_row, - mock_index_construction_job_row, - ] - - conn = mock.Mock() - with mock.patch('rethinkdb.ast.RqlQuery.run') as run: - run.return_value = mock_rows - metrics = list(queries.system_jobs.run(conn=conn, config=Config())) - - assert metrics == [ - # `query` jobs ignored - # `disk_compaction` job ignored - { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.duration_sec', - 'value': 0.42, - 'tags': [ - 'job_type:backfill', - 'server:server0', - 'server:server2', - 'database:doghouse', - 'destination_server:server2', - 'source_server:server0', - 'table:heroes', - ], - }, - { - 'type': 'gauge', - 'name': 'rethinkdb.jobs.duration_sec', - 'value': 0.24, - 'tags': [ - 'job_type:index_construction', - 'server:server1', - 'database:doghouse', - 'table:heroes', - 'index:appearances_count', - ], - }, - ] - - -def test_unknown_job(): - # type: () -> None - """ - If a new job type is added, an exception should be raised so we are notified via CI failures and can add support. - """ - mock_unknown_job_row = {'type': 'an_unknown_type_that_should_be_ignored', 'duration_sec': 0.42, 'servers': []} - - conn = mock.Mock() - with mock.patch('rethinkdb.ast.RqlQuery.run') as run: - run.return_value = [mock_unknown_job_row] - with pytest.raises(RuntimeError): - list(queries.system_jobs.run(conn=conn, config=Config())) From f478cbd23acbdaf65a4e5abecbb3f3dfc76c4a01 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 26 Mar 2020 15:11:38 +0100 Subject: [PATCH 145/147] Update metadata.csv --- rethinkdb/metadata.csv | 74 +++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/rethinkdb/metadata.csv b/rethinkdb/metadata.csv index a8ea1ca4a8f10..f2cb88619f452 100644 --- a/rethinkdb/metadata.csv +++ b/rethinkdb/metadata.csv @@ -1,40 +1,40 @@ metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name -rethinkdb.config.servers,gauge,,node,,Number of connected servers in the cluster.,0,rethinkdb,Config servers -rethinkdb.config.databases,gauge,,,,Number of databases in the cluster.,0,rethinkdb,Config databases -rethinkdb.config.tables_per_database,gauge,,table,,Number of tables in a given database.,0,rethinkdb,Config tables per database -rethinkdb.config.secondary_indexes_per_table,gauge,,index,,Number of secondary indexes in a given table.,0,rethinkdb,Config secondary indexes per table -rethinkdb.stats.cluster.query_engine.queries_per_sec,gauge,,query,second,Number of queries executed in a cluster per second.,0,rethinkdb,Stats cluster query engine queries per sec -rethinkdb.stats.cluster.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read in a cluster per second.,0,rethinkdb,Stats cluster query engine read docs per sec -rethinkdb.stats.cluster.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written in a cluster per second.,0,rethinkdb,Stats cluster query engine written docs per sec -rethinkdb.stats.server.query_engine.queries_per_sec,gauge,,query,second,Number of queries executed on a server per second.,0,rethinkdb,Stats server query engine queries per sec -rethinkdb.stats.server.query_engine.queries_total,count,,query,,Total number of queries executed on a server.,0,rethinkdb,Stats server query engine queries total -rethinkdb.stats.server.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read from a server per second.,0,rethinkdb,Stats server query engine read docs per sec -rethinkdb.stats.server.query_engine.read_docs_total,count,,document,,Total number of documents read from a server.,0,rethinkdb,Stats server query engine read docs total -rethinkdb.stats.server.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written to a server per second.,0,rethinkdb,Stats server query engine written docs per sec -rethinkdb.stats.server.query_engine.written_docs_total,count,,document,,Total number of documents written to a server.,0,rethinkdb,Stats server query engine written docs total -rethinkdb.stats.server.query_engine.client_connections,gauge,,connection,,Current number of client connections to a server.,0,rethinkdb,Stats server query engine client connections -rethinkdb.stats.server.query_engine.clients_active,gauge,,host,,Current number of clients actively connected to a server.,0,rethinkdb,Stats server query engine clients active -rethinkdb.stats.table.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read from a table per second.,0,rethinkdb,Stats table query engine read docs per sec -rethinkdb.stats.table.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written to a table per second.,0,rethinkdb,Stats table query engine written docs per sec -rethinkdb.stats.table_server.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read from a replica per second.,0,rethinkdb,Stats table server query engine read docs per sec -rethinkdb.stats.table_server.query_engine.read_docs_total,count,,document,,Total number of documents read from a replica.,0,rethinkdb,Stats table server query engine read docs total -rethinkdb.stats.table_server.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written to a replica per second.,0,rethinkdb,Stats table server query engine written docs per sec -rethinkdb.stats.table_server.query_engine.written_docs_total,count,,document,,Total number of documents written to a replica.,0,rethinkdb,Stats table server query engine written docs total -rethinkdb.stats.table_server.storage_engine.cache.in_use_bytes,gauge,,byte,,Current amount of memory used by the cache on a replica.,0,rethinkdb,Stats table server storage engine cache in use bytes -rethinkdb.stats.table_server.storage_engine.disk.read_bytes_per_sec,gauge,,byte,second,Number of bytes read from the disk of a replica per second.,0,rethinkdb,Stats table server storage engine disk read bytes per sec -rethinkdb.stats.table_server.storage_engine.disk.read_bytes_total,count,,byte,,Total number of bytes read from the disk of a replica.,0,rethinkdb,Stats table server storage engine disk read bytes total -rethinkdb.stats.table_server.storage_engine.disk.written_bytes_per_sec,gauge,,byte,second,Number of bytes written to the disk of a replica per second.,0,rethinkdb,Stats table server storage engine disk written bytes per sec -rethinkdb.stats.table_server.storage_engine.disk.written_bytes_total,count,,byte,,Total number of bytes written to the disk of a replica.,0,rethinkdb,Stats table server storage engine disk written bytes total -rethinkdb.stats.table_server.storage_engine.disk.space_usage.metadata_bytes,gauge,,byte,,Current disk space used by metadata on a replica.,0,rethinkdb,Stats table server storage engine disk space usage metadata bytes -rethinkdb.stats.table_server.storage_engine.disk.space_usage.data_bytes,gauge,,byte,,Current disk space used by data on a replica.,0,rethinkdb,Stats table server storage engine disk space usage data bytes -rethinkdb.stats.table_server.storage_engine.disk.space_usage.garbage_bytes,gauge,,byte,,Current disk space used by the garbage collector on a replica.,0,rethinkdb,Stats table server storage engine disk space usage garbage bytes -rethinkdb.stats.table_server.storage_engine.disk.space_usage.preallocated_bytes,gauge,,byte,,Current disk space preallocated on a replica.,0,rethinkdb,Stats table server storage engine disk space usage preallocated bytes +rethinkdb.config.servers,gauge,,node,,Number of connected servers in the cluster.,0,rethinkdb,Servers +rethinkdb.config.databases,gauge,,,,Number of databases in the cluster.,0,rethinkdb,Databases +rethinkdb.config.tables_per_database,gauge,,table,,Number of tables in a given database.,0,rethinkdb,Tables per database +rethinkdb.config.secondary_indexes_per_table,gauge,,index,,Number of secondary indexes in a given table.,0,rethinkdb,Secondary indexes per table +rethinkdb.stats.cluster.query_engine.queries_per_sec,gauge,,query,second,Number of queries executed in a cluster per second.,0,rethinkdb,Cluster queries per sec +rethinkdb.stats.cluster.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read in a cluster per second.,0,rethinkdb,Cluster read docs per sec +rethinkdb.stats.cluster.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written in a cluster per second.,0,rethinkdb,Cluster written docs per sec +rethinkdb.stats.server.query_engine.queries_per_sec,gauge,,query,second,Number of queries executed on a server per second.,0,rethinkdb,Server queries per sec +rethinkdb.stats.server.query_engine.queries_total,count,,query,,Total number of queries executed on a server.,0,rethinkdb,Server queries total +rethinkdb.stats.server.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read from a server per second.,0,rethinkdb,Server read docs per sec +rethinkdb.stats.server.query_engine.read_docs_total,count,,document,,Total number of documents read from a server.,0,rethinkdb,Server read docs total +rethinkdb.stats.server.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written to a server per second.,0,rethinkdb,Server written docs per sec +rethinkdb.stats.server.query_engine.written_docs_total,count,,document,,Total number of documents written to a server.,0,rethinkdb,Server written docs total +rethinkdb.stats.server.query_engine.client_connections,gauge,,connection,,Current number of client connections to a server.,0,rethinkdb,Server client connections +rethinkdb.stats.server.query_engine.clients_active,gauge,,host,,Current number of clients actively connected to a server.,0,rethinkdb,Server clients active +rethinkdb.stats.table.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read from a table per second.,0,rethinkdb,Table read docs per sec +rethinkdb.stats.table.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written to a table per second.,0,rethinkdb,Table written docs per sec +rethinkdb.stats.table_server.query_engine.read_docs_per_sec,gauge,,document,second,Number of documents read from a replica per second.,0,rethinkdb,Replica read docs per sec +rethinkdb.stats.table_server.query_engine.read_docs_total,count,,document,,Total number of documents read from a replica.,0,rethinkdb,Replica read docs total +rethinkdb.stats.table_server.query_engine.written_docs_per_sec,gauge,,document,second,Number of documents written to a replica per second.,0,rethinkdb,Replica written docs per sec +rethinkdb.stats.table_server.query_engine.written_docs_total,count,,document,,Total number of documents written to a replica.,0,rethinkdb,Replica written docs total +rethinkdb.stats.table_server.storage_engine.cache.in_use_bytes,gauge,,byte,,Current amount of memory used by the cache on a replica.,0,rethinkdb,Replica cache bytes +rethinkdb.stats.table_server.storage_engine.disk.read_bytes_per_sec,gauge,,byte,second,Number of bytes read from the disk of a replica per second.,0,rethinkdb,Replica disk read bytes per sec +rethinkdb.stats.table_server.storage_engine.disk.read_bytes_total,count,,byte,,Total number of bytes read from the disk of a replica.,0,rethinkdb,Replica disk read bytes total +rethinkdb.stats.table_server.storage_engine.disk.written_bytes_per_sec,gauge,,byte,second,Number of bytes written to the disk of a replica per second.,0,rethinkdb,Replica disk written bytes per sec +rethinkdb.stats.table_server.storage_engine.disk.written_bytes_total,count,,byte,,Total number of bytes written to the disk of a replica.,0,rethinkdb,Replica disk written bytes total +rethinkdb.stats.table_server.storage_engine.disk.space_usage.metadata_bytes,gauge,,byte,,Current disk space used by metadata on a replica.,0,rethinkdb,Replica disk metadata bytes +rethinkdb.stats.table_server.storage_engine.disk.space_usage.data_bytes,gauge,,byte,,Current disk space used by data on a replica.,0,rethinkdb,Replica disk data bytes +rethinkdb.stats.table_server.storage_engine.disk.space_usage.garbage_bytes,gauge,,byte,,Current disk space used by the garbage collector on a replica.,0,rethinkdb,Replica disk garbage bytes +rethinkdb.stats.table_server.storage_engine.disk.space_usage.preallocated_bytes,gauge,,byte,,Current disk space preallocated on a replica.,0,rethinkdb,Replica disk preallocated bytes rethinkdb.table_status.shards,gauge,,shard,,Total number of shards for a table.,0,rethinkdb,Table status shards -rethinkdb.table_status.shards.replicas,gauge,,node,,Total number of replicas for a table shard.,0,rethinkdb,Table status shards replicas -rethinkdb.table_status.shards.primary_replicas,gauge,,node,,Total number of primary replicas for a table shard.,0,rethinkdb,Table status shards primary replicas -rethinkdb.server_status.network.time_connected,gauge,,second,,Current total time a server has been connected to the network.,0,rethinkdb,Server status network time connected -rethinkdb.server_status.network.connected_to,gauge,,node,,Number of other RethinkDB servers a server is currently connected to.,0,rethinkdb,Server status network connected to -rethinkdb.server_status.process.time_started,gauge,,second,,Time when the RethinkDB server process started.,0,rethinkdb,Server status process time started +rethinkdb.table_status.shards.replicas,gauge,,node,,Total number of replicas for a table shard.,0,rethinkdb,Table shard replicas +rethinkdb.table_status.shards.primary_replicas,gauge,,node,,Total number of primary replicas for a table shard.,0,rethinkdb,Table shard primary replicas +rethinkdb.server_status.network.time_connected,gauge,,second,,Current total time a server has been connected to the network.,0,rethinkdb,Server time connected +rethinkdb.server_status.network.connected_to,gauge,,node,,Number of other RethinkDB servers a server is currently connected to.,0,rethinkdb,Server num connected to +rethinkdb.server_status.process.time_started,gauge,,second,,Time when the RethinkDB server process started.,0,rethinkdb,Server process time started rethinkdb.system_jobs.jobs,gauge,,job,,"Total number of currently running system jobs, tagged by `job_type`.",0,rethinkdb,Jobs -rethinkdb.current_issues.issues,gauge,,,,Total number of current issues of a given issue_type.,0,rethinkdb,Current issues issues -rethinkdb.current_issues.critical_issues,gauge,,,,Total number of critical current issues of a given issue_type.,0,rethinkdb,Current issues critical issues +rethinkdb.current_issues.issues,gauge,,,,Total number of current issues of a given issue_type.,0,rethinkdb,Issues +rethinkdb.current_issues.critical_issues,gauge,,,,Total number of critical current issues of a given issue_type.,0,rethinkdb,Critical issues From a146b41cd28f339fcc36d3ca11a7d794267a2a40 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Thu, 26 Mar 2020 15:34:39 +0100 Subject: [PATCH 146/147] Fix E2E --- rethinkdb/tests/assertions.py | 28 +++++++++++++++------------- rethinkdb/tests/common.py | 5 +++++ 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/rethinkdb/tests/assertions.py b/rethinkdb/tests/assertions.py index 9c37426dce45f..2e1895d97c810 100644 --- a/rethinkdb/tests/assertions.py +++ b/rethinkdb/tests/assertions.py @@ -19,6 +19,7 @@ HEROES_TABLE_REPLICAS_BY_SHARD, HEROES_TABLE_SERVERS, IS_RETHINKDB_2_3, + JOBS_METRICS, REPLICA_STATISTICS_METRICS, SERVER_STATISTICS_METRICS, SERVER_STATUS_METRICS, @@ -60,8 +61,8 @@ def assert_metrics(aggregator, is_proxy, disconnected_servers=None): _assert_statistics_metrics(aggregator, disconnected_servers=disconnected_servers) _assert_table_status_metrics(aggregator) _assert_server_status_metrics(aggregator, disconnected_servers=disconnected_servers) - _assert_current_issues_metrics(aggregator, disconnected_servers=disconnected_servers) _assert_jobs_metrics(aggregator, is_proxy=is_proxy) + _assert_current_issues_metrics(aggregator, disconnected_servers=disconnected_servers) def _assert_config_metrics(aggregator, disconnected_servers): @@ -127,6 +128,19 @@ def _assert_server_status_metrics(aggregator, disconnected_servers): aggregator.assert_metric(metric, metric_type=typ, count=count, tags=tags) +def _assert_jobs_metrics(aggregator, is_proxy): + # type: (AggregatorStub, bool) -> None + for metric, typ, value, tags in JOBS_METRICS: + if 'job_type:query' in tags and is_proxy and IS_RETHINKDB_2_3: + # For some reason, queries issued to retrieve metrics via a proxy server are not included + # in system jobs under RethinkDB 2.3. + count = 0 + else: + count = 1 + + aggregator.assert_metric(metric, metric_type=typ, value=value, count=count, tags=TAGS + tags) + + def _assert_current_issues_metrics(aggregator, disconnected_servers): # type: (AggregatorStub, Set[ServerName]) -> None for metric, typ in CURRENT_ISSUES_METRICS: @@ -136,15 +150,3 @@ def _assert_current_issues_metrics(aggregator, disconnected_servers): aggregator.assert_metric(metric, metric_type=typ, count=1, tags=tags) else: aggregator.assert_metric(metric, metric_type=typ, count=0) - - -def _assert_jobs_metrics(aggregator, is_proxy): - # type: (AggregatorStub, bool) -> None - if is_proxy and IS_RETHINKDB_2_3: - # For some reason, queries issued to retrieve metrics via a proxy server are not included - # in system jobs under RethinkDB 2.3. - return - - aggregator.assert_metric( - 'rethinkdb.system_jobs.jobs', metric_type=AggregatorStub.GAUGE, value=1, count=1, tags=TAGS + ['job_type:query'] - ) diff --git a/rethinkdb/tests/common.py b/rethinkdb/tests/common.py index cb4d7bab8bf7b..48a950d72a6b2 100644 --- a/rethinkdb/tests/common.py +++ b/rethinkdb/tests/common.py @@ -168,6 +168,10 @@ ('rethinkdb.server_status.process.time_started', AggregatorStub.GAUGE), ) # type: Tuple[Tuple[str, int], ...] +JOBS_METRICS = ( + ('rethinkdb.system_jobs.jobs', AggregatorStub.GAUGE, 1, ['job_type:query'],), +) # type: Tuple[Tuple[str, int, int, List[str]], ...] + CURRENT_ISSUES_METRICS = ( ('rethinkdb.current_issues.issues', AggregatorStub.GAUGE), ('rethinkdb.current_issues.critical_issues', AggregatorStub.GAUGE), @@ -184,6 +188,7 @@ + TABLE_STATUS_METRICS + TABLE_STATUS_SHARDS_METRICS + SERVER_STATUS_METRICS + + tuple((name, typ) for name, typ, _, _ in JOBS_METRICS) ) # type: Tuple[Tuple[str, int], ...] # Docker Compose configuration. From 8eee355c6fa1533c5320029efff0343fc145e3d5 Mon Sep 17 00:00:00 2001 From: Florimond Manca Date: Fri, 27 Mar 2020 14:18:40 +0100 Subject: [PATCH 147/147] Fix metric_to_check --- rethinkdb/manifest.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rethinkdb/manifest.json b/rethinkdb/manifest.json index 199a7c2ed9551..312379f8f12af 100644 --- a/rethinkdb/manifest.json +++ b/rethinkdb/manifest.json @@ -4,7 +4,7 @@ "manifest_version": "1.0.0", "name": "rethinkdb", "metric_prefix": "rethinkdb.", - "metric_to_check": "rethinkdb.server.total", + "metric_to_check": "rethinkdb.config.servers", "creates_events": false, "short_description": "Collect status, performance and other metrics from a RethinkDB cluster.", "guid": "a09f3ed3-c947-413c-a9c6-0dcb641ea890",