diff --git a/checks.d/postgres.py b/checks.d/postgres.py index ec2d7be5fd..1b5e795317 100644 --- a/checks.d/postgres.py +++ b/checks.d/postgres.py @@ -74,6 +74,9 @@ class PostgreSql(AgentCheck): 'tup_inserted' : ('postgresql.rows_inserted', RATE), 'tup_updated' : ('postgresql.rows_updated', RATE), 'tup_deleted' : ('postgresql.rows_deleted', RATE), + } + + DATABASE_SIZE_METRICS = { 'pg_database_size(datname) as pg_database_size' : ('postgresql.database_size', GAUGE), } @@ -349,7 +352,7 @@ def _is_9_1_or_above(self, key, db): def _is_9_2_or_above(self, key, db): return self._is_above(key, db, [9,2,0]) - def _get_instance_metrics(self, key, db): + def _get_instance_metrics(self, key, db, database_size_metrics): """Use either COMMON_METRICS or COMMON_METRICS + NEWER_92_METRICS depending on the postgres version. Uses a dictionnary to save the result for each instance @@ -374,6 +377,10 @@ def _get_instance_metrics(self, key, db): self.instance_metrics[key] = dict(self.COMMON_METRICS, **self.NEWER_92_METRICS) else: self.instance_metrics[key] = dict(self.COMMON_METRICS) + + if database_size_metrics: + self.instance_metrics[key] = dict(self.instance_metrics[key], **self.DATABASE_SIZE_METRICS) + metrics = self.instance_metrics.get(key) return metrics @@ -438,7 +445,7 @@ def _build_relations_config(self, yamlconfig): self.log.warn('Failed to parse config element=%s, check syntax' % str(element)) return config - def _collect_stats(self, key, db, instance_tags, relations, custom_metrics, function_metrics, count_metrics, interface_error, programming_error): + def _collect_stats(self, key, db, instance_tags, relations, custom_metrics, function_metrics, count_metrics, database_size_metrics, interface_error, programming_error): """Query pg_stat_* for various metrics If relations is not an empty list, gather per-relation metrics on top of that. @@ -457,7 +464,7 @@ def _collect_stats(self, key, db, instance_tags, relations, custom_metrics, func metric_scope.append(self.COUNT_METRICS) # These are added only once per PG server, thus the test - db_instance_metrics = self._get_instance_metrics(key, db) + db_instance_metrics = self._get_instance_metrics(key, db, database_size_metrics) bgw_instance_metrics = self._get_bgw_metrics(key, db) if db_instance_metrics is not None: @@ -668,6 +675,7 @@ def check(self, instance): function_metrics = _is_affirmative(instance.get('collect_function_metrics', False)) # Default value for `count_metrics` is True for backward compatibility count_metrics = _is_affirmative(instance.get('collect_count_metrics', True)) + database_size_metrics = _is_affirmative(instance.get('collect_database_size_metrics', True)) if relations and not dbname: self.warning('"dbname" parameter must be set when using the "relations" parameter.') @@ -702,11 +710,11 @@ def check(self, instance): db = self.get_connection(key, host, port, user, password, dbname, ssl, connect_fct) version = self._get_version(key, db) self.log.debug("Running check against version %s" % version) - self._collect_stats(key, db, tags, relations, custom_metrics, function_metrics, count_metrics, interface_error, programming_error) + self._collect_stats(key, db, tags, relations, custom_metrics, function_metrics, count_metrics, database_size_metrics, interface_error, programming_error) except ShouldRestartException: self.log.info("Resetting the connection") db = self.get_connection(key, host, port, user, password, dbname, ssl, connect_fct, use_cached=False) - self._collect_stats(key, db, tags, relations, custom_metrics, function_metrics, count_metrics, interface_error, programming_error) + self._collect_stats(key, db, tags, relations, custom_metrics, function_metrics, count_metrics, database_size_metrics, interface_error, programming_error) if db is not None: service_check_tags = self._get_service_check_tags(host, port, dbname) diff --git a/conf.d/postgres.yaml.example b/conf.d/postgres.yaml.example index 32e8b839af..c3bbb49705 100644 --- a/conf.d/postgres.yaml.example +++ b/conf.d/postgres.yaml.example @@ -64,3 +64,7 @@ instances: # suggested value is False. # collect_count_metrics: False # + +# Collect database size metrics. Default value is True but they might be slow with large databases +# collect_database_size_metrics: False +# diff --git a/tests/checks/integration/test_postgres.py b/tests/checks/integration/test_postgres.py index 95f18bcfb4..e19b44539d 100644 --- a/tests/checks/integration/test_postgres.py +++ b/tests/checks/integration/test_postgres.py @@ -10,6 +10,89 @@ class TestPostgres(AgentCheckTest): CHECK_NAME = 'postgres' + COMMON_METRICS = [ + 'postgresql.connections', + 'postgresql.commits', + 'postgresql.rollbacks', + 'postgresql.disk_read', + 'postgresql.buffer_hit', + 'postgresql.rows_returned', + 'postgresql.rows_fetched', + 'postgresql.rows_inserted', + 'postgresql.rows_updated', + 'postgresql.rows_deleted', + ] + + DATABASE_SIZE_METRICS = [ + 'postgresql.database_size', + ] + + NEWER_92_METRICS = [ + 'postgresql.deadlocks', + 'postgresql.temp_bytes', + 'postgresql.temp_files', + ] + + NEWER_91_BGW_METRICS = [ + 'postgresql.bgwriter.buffers_backend_fsync', + ] + + NEWER_92_BGW_METRICS = [ + 'postgresql.bgwriter.write_time', + 'postgresql.bgwriter.sync_time', + ] + + COMMON_BGW_METRICS = [ + 'postgresql.bgwriter.checkpoints_timed', + 'postgresql.bgwriter.checkpoints_requested', + 'postgresql.bgwriter.buffers_checkpoint', + 'postgresql.bgwriter.buffers_clean', + 'postgresql.bgwriter.maxwritten_clean', + 'postgresql.bgwriter.buffers_backend', + 'postgresql.bgwriter.buffers_alloc', + ] + + RELATION_METRICS = [ + 'postgresql.seq_scans', + 'postgresql.seq_rows_read', + 'postgresql.index_scans', + 'postgresql.index_rows_fetched', + 'postgresql.rows_inserted', + 'postgresql.rows_updated', + 'postgresql.rows_deleted', + 'postgresql.rows_hot_updated', + 'postgresql.live_rows', + 'postgresql.dead_rows', + ] + + SIZE_METRICS = [ + 'postgresql.table_size', + 'postgresql.index_size', + 'postgresql.total_size', + ] + + STATIO_METRICS = [ + 'postgresql.heap_blocks_read', + 'postgresql.heap_blocks_hit', + 'postgresql.index_blocks_read', + 'postgresql.index_blocks_hit', + 'postgresql.toast_blocks_read', + 'postgresql.toast_blocks_hit', + 'postgresql.toast_index_blocks_read', + 'postgresql.toast_index_blocks_hit', + ] + + IDX_METRICS = [ + 'postgresql.index_scans', + 'postgresql.index_rows_read', + 'postgresql.index_rows_fetched', + ] + + CONNECTION_METRICS = [ + 'postgresql.max_connections', + 'postgresql.percent_usage_connections', + ] + def test_checks(self): host = 'localhost' port = 15432 @@ -50,104 +133,39 @@ def test_checks(self): db = self.check.dbs[key] # Testing DB_METRICS scope - COMMON_METRICS = [ - 'postgresql.connections', - 'postgresql.commits', - 'postgresql.rollbacks', - 'postgresql.disk_read', - 'postgresql.buffer_hit', - 'postgresql.rows_returned', - 'postgresql.rows_fetched', - 'postgresql.rows_inserted', - 'postgresql.rows_updated', - 'postgresql.rows_deleted', - 'postgresql.database_size', - ] - - for mname in COMMON_METRICS: + for mname in self.COMMON_METRICS: for db in ('datadog_test', 'dogs'): self.assertMetric(mname, count=1, tags=['db:%s' % db]) - NEWER_92_METRICS = [ - 'postgresql.deadlocks', - 'postgresql.temp_bytes', - 'postgresql.temp_files', - ] + for mname in self.DATABASE_SIZE_METRICS: + for db in ('datadog_test', 'dogs'): + self.assertMetric(mname, count=1, tags=['db:%s' % db]) if self.check._is_9_2_or_above(key, db): - for mname in NEWER_92_METRICS: + for mname in self.NEWER_92_METRICS: for db in ('datadog_test', 'dogs'): self.assertMetric(mname, count=1, tags=['db:%s' % db]) # Testing BGW_METRICS scope - COMMON_BGW_METRICS = [ - 'postgresql.bgwriter.checkpoints_timed', - 'postgresql.bgwriter.checkpoints_requested', - 'postgresql.bgwriter.buffers_checkpoint', - 'postgresql.bgwriter.buffers_clean', - 'postgresql.bgwriter.maxwritten_clean', - 'postgresql.bgwriter.buffers_backend', - 'postgresql.bgwriter.buffers_alloc', - ] - - for mname in COMMON_BGW_METRICS: + for mname in self.COMMON_BGW_METRICS: self.assertMetric(mname, count=1) - NEWER_91_BGW_METRICS = [ - 'postgresql.bgwriter.buffers_backend_fsync', - ] - if self.check._is_9_1_or_above(key, db): - for mname in NEWER_91_BGW_METRICS: + for mname in self.NEWER_91_BGW_METRICS: self.assertMetric(mname, count=1) - NEWER_92_BGW_METRICS = [ - 'postgresql.bgwriter.write_time', - 'postgresql.bgwriter.sync_time', - ] - if self.check._is_9_2_or_above(key, db): - for mname in NEWER_92_BGW_METRICS: + for mname in self.NEWER_92_BGW_METRICS: self.assertMetric(mname, count=1) # FIXME: Test postgresql.locks # Relation specific metrics - RELATION_METRICS = [ - 'postgresql.seq_scans', - 'postgresql.seq_rows_read', - 'postgresql.index_scans', - 'postgresql.index_rows_fetched', - 'postgresql.rows_inserted', - 'postgresql.rows_updated', - 'postgresql.rows_deleted', - 'postgresql.rows_hot_updated', - 'postgresql.live_rows', - 'postgresql.dead_rows', - ] - - SIZE_METRICS = [ - 'postgresql.table_size', - 'postgresql.index_size', - 'postgresql.total_size', - ] - - STATIO_METRICS = [ - 'postgresql.heap_blocks_read', - 'postgresql.heap_blocks_hit', - 'postgresql.index_blocks_read', - 'postgresql.index_blocks_hit', - 'postgresql.toast_blocks_read', - 'postgresql.toast_blocks_hit', - 'postgresql.toast_index_blocks_read', - 'postgresql.toast_index_blocks_hit', - ] - for inst in instances: for rel in inst.get('relations', []): expected_tags = ['db:%s' % inst['dbname'], 'table:%s' % rel] expected_rel_tags = ['db:%s' % inst['dbname'], 'table:%s' % rel, 'schema:public'] - for mname in RELATION_METRICS: + for mname in self.RELATION_METRICS: count = 1 # We only build a test index and stimulate it on breed # in the dogs DB, so the other index metrics shouldn't be @@ -156,10 +174,10 @@ def test_checks(self): count = 0 self.assertMetric(mname, count=count, tags=expected_rel_tags) - for mname in SIZE_METRICS: + for mname in self.SIZE_METRICS: self.assertMetric(mname, count=1, tags=expected_tags) - for mname in STATIO_METRICS: + for mname in self.STATIO_METRICS: at_least = None count = 1 if '.index' in mname and rel != 'breed': @@ -172,23 +190,13 @@ def test_checks(self): self.assertMetric(mname, count=count, at_least=at_least, tags=expected_rel_tags) # Index metrics - IDX_METRICS = [ - 'postgresql.index_scans', - 'postgresql.index_rows_read', - 'postgresql.index_rows_fetched', - ] - # we have a single index defined! expected_tags = ['db:dogs', 'table:breed', 'index:breed_names', 'schema:public'] - for mname in IDX_METRICS: + for mname in self.IDX_METRICS: self.assertMetric(mname, count=1, tags=expected_tags) # instance connection metrics - CONNECTION_METRICS = [ - 'postgresql.max_connections', - 'postgresql.percent_usage_connections', - ] - for mname in CONNECTION_METRICS: + for mname in self.CONNECTION_METRICS: self.assertMetric(mname, count=1) # db level connections @@ -262,104 +270,39 @@ def test_psycopg2(self): db = self.check.dbs[key] # Testing DB_METRICS scope - COMMON_METRICS = [ - 'postgresql.connections', - 'postgresql.commits', - 'postgresql.rollbacks', - 'postgresql.disk_read', - 'postgresql.buffer_hit', - 'postgresql.rows_returned', - 'postgresql.rows_fetched', - 'postgresql.rows_inserted', - 'postgresql.rows_updated', - 'postgresql.rows_deleted', - 'postgresql.database_size', - ] - - for mname in COMMON_METRICS: + for mname in self.COMMON_METRICS: for db in ('datadog_test', 'dogs'): self.assertMetric(mname, count=1, tags=['db:%s' % db]) - NEWER_92_METRICS = [ - 'postgresql.deadlocks', - 'postgresql.temp_bytes', - 'postgresql.temp_files', - ] + for mname in self.DATABASE_SIZE_METRICS: + for db in ('datadog_test', 'dogs'): + self.assertMetric(mname, count=1, tags=['db:%s' % db]) if self.check._is_9_2_or_above(key, db): - for mname in NEWER_92_METRICS: + for mname in self.NEWER_92_METRICS: for db in ('datadog_test', 'dogs'): self.assertMetric(mname, count=1, tags=['db:%s' % db]) # Testing BGW_METRICS scope - COMMON_BGW_METRICS = [ - 'postgresql.bgwriter.checkpoints_timed', - 'postgresql.bgwriter.checkpoints_requested', - 'postgresql.bgwriter.buffers_checkpoint', - 'postgresql.bgwriter.buffers_clean', - 'postgresql.bgwriter.maxwritten_clean', - 'postgresql.bgwriter.buffers_backend', - 'postgresql.bgwriter.buffers_alloc', - ] - - for mname in COMMON_BGW_METRICS: + for mname in self.COMMON_BGW_METRICS: self.assertMetric(mname, count=1) - NEWER_91_BGW_METRICS = [ - 'postgresql.bgwriter.buffers_backend_fsync', - ] - if self.check._is_9_1_or_above(key, db): - for mname in NEWER_91_BGW_METRICS: + for mname in self.NEWER_91_BGW_METRICS: self.assertMetric(mname, count=1) - NEWER_92_BGW_METRICS = [ - 'postgresql.bgwriter.write_time', - 'postgresql.bgwriter.sync_time', - ] - if self.check._is_9_2_or_above(key, db): - for mname in NEWER_92_BGW_METRICS: + for mname in self.NEWER_92_BGW_METRICS: self.assertMetric(mname, count=1) # FIXME: Test postgresql.locks # Relation specific metrics - RELATION_METRICS = [ - 'postgresql.seq_scans', - 'postgresql.seq_rows_read', - 'postgresql.index_scans', - 'postgresql.index_rows_fetched', - 'postgresql.rows_inserted', - 'postgresql.rows_updated', - 'postgresql.rows_deleted', - 'postgresql.rows_hot_updated', - 'postgresql.live_rows', - 'postgresql.dead_rows', - ] - - SIZE_METRICS = [ - 'postgresql.table_size', - 'postgresql.index_size', - 'postgresql.total_size', - ] - - STATIO_METRICS = [ - 'postgresql.heap_blocks_read', - 'postgresql.heap_blocks_hit', - 'postgresql.index_blocks_read', - 'postgresql.index_blocks_hit', - 'postgresql.toast_blocks_read', - 'postgresql.toast_blocks_hit', - 'postgresql.toast_index_blocks_read', - 'postgresql.toast_index_blocks_hit', - ] - for inst in instances: for rel in inst.get('relations', []): expected_tags = ['db:%s' % inst['dbname'], 'table:%s' % rel] expected_rel_tags = ['db:%s' % inst['dbname'], 'table:%s' % rel, 'schema:public'] - for mname in RELATION_METRICS: + for mname in self.RELATION_METRICS: count = 1 # We only build a test index and stimulate it on breed # in the dogs DB, so the other index metrics shouldn't be @@ -368,10 +311,10 @@ def test_psycopg2(self): count = 0 self.assertMetric(mname, count=count, tags=expected_rel_tags) - for mname in SIZE_METRICS: + for mname in self.SIZE_METRICS: self.assertMetric(mname, count=1, tags=expected_tags) - for mname in STATIO_METRICS: + for mname in self.STATIO_METRICS: at_least = None count = 1 if '.index' in mname and rel != 'breed': @@ -384,23 +327,13 @@ def test_psycopg2(self): self.assertMetric(mname, count=count, at_least=at_least, tags=expected_rel_tags) # Index metrics - IDX_METRICS = [ - 'postgresql.index_scans', - 'postgresql.index_rows_read', - 'postgresql.index_rows_fetched', - ] - # we have a single index defined! expected_tags = ['db:dogs', 'table:breed', 'index:breed_names', 'schema:public'] - for mname in IDX_METRICS: + for mname in self.IDX_METRICS: self.assertMetric(mname, count=1, tags=expected_tags) # instance connection metrics - CONNECTION_METRICS = [ - 'postgresql.max_connections', - 'postgresql.percent_usage_connections', - ] - for mname in CONNECTION_METRICS: + for mname in self.CONNECTION_METRICS: self.assertMetric(mname, count=1) # db level connections @@ -433,3 +366,90 @@ def test_psycopg2(self): from psycopg2.extensions import connection self.assertTrue(type(self.check.dbs[key]) == connection) self.check.dbs[key].close() + + def test_collect_database_size_metrics_disabled(self): + host = 'localhost' + port = 15432 + dbname = 'datadog_test' + + instances = [ + { + 'host': host, + 'port': port, + 'username': 'datadog', + 'password': 'datadog', + 'dbname': dbname, + 'collect_database_size_metrics': False + }, + { + 'host': host, + 'port': port, + 'username': 'datadog', + 'password': 'datadog', + 'dbname': 'dogs', + 'collect_database_size_metrics': False + } + ] + + self.run_check_twice(dict(instances=instances), force_reload=True) + + # Useful to get server version + # FIXME: Not great, should have a function like that available + key = (host, port, dbname) + db = self.check.dbs[key] + + for mname in self.COMMON_METRICS: + for db in ('datadog_test', 'dogs'): + self.assertMetric(mname, count=1, tags=['db:%s' % db]) + + for mname in self.DATABASE_SIZE_METRICS: + for db in ('datadog_test', 'dogs'): + self.assertMetric(mname, count=0, tags=['db:%s' % db]) + + if self.check._is_9_2_or_above(key, db): + for mname in self.NEWER_92_METRICS: + for db in ('datadog_test', 'dogs'): + self.assertMetric(mname, count=1, tags=['db:%s' % db]) + + # Testing BGW_METRICS scope + for mname in self.COMMON_BGW_METRICS: + self.assertMetric(mname, count=1) + + if self.check._is_9_1_or_above(key, db): + for mname in self.NEWER_91_BGW_METRICS: + self.assertMetric(mname, count=1) + + if self.check._is_9_2_or_above(key, db): + for mname in self.NEWER_92_BGW_METRICS: + self.assertMetric(mname, count=1) + + # FIXME: Test postgresql.locks + + # instance connection metrics + for mname in self.CONNECTION_METRICS: + self.assertMetric(mname, count=1) + + # db level connections + for inst in instances: + expected_tags = ['db:%s' % inst['dbname']] + self.assertMetric('postgresql.connections', count=1, tags=expected_tags) + + # By schema metrics + self.assertMetric('postgresql.table.count', value=2, count=1, tags=['schema:public']) + self.assertMetric('postgresql.db.count', value=2, count=1) + + # Test service checks + self.assertServiceCheck('postgres.can_connect', + count=1, status=AgentCheck.OK, + tags=['host:localhost', 'port:15432', 'db:datadog_test'] + ) + self.assertServiceCheck('postgres.can_connect', + count=1, status=AgentCheck.OK, + tags=['host:localhost', 'port:15432', 'db:dogs'] + ) + + # Assert service metadata + self.assertServiceMetadata(['version'], count=2) + + self.coverage_report() + self.check.dbs[key].close()