Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[redis] new post 2.8 replication metrics #1350

Merged
merged 1 commit into from
Feb 24, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions checks.d/redisdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
# 3rd party
import redis


class Redis(AgentCheck):
db_key_pattern = re.compile(r'^db\d+')
slave_key_pattern = re.compile(r'^slave\d+')
subkeys = ['keys', 'expires']

SOURCE_TYPE_NAME = 'redis'
Expand Down Expand Up @@ -63,7 +65,9 @@ class Redis(AgentCheck):
'master_last_io_seconds_ago': 'redis.replication.last_io_seconds_ago',
'master_sync_in_progress': 'redis.replication.sync',
'master_sync_left_bytes': 'redis.replication.sync_left_bytes',

'repl_backlog_histlen': 'redis.replication.backlog_histlen',
'master_repl_offset': 'redis.replication.master_repl_offset',
'slave_repl_offset': 'redis.replication.slave_repl_offset',
}

RATE_KEYS = {
Expand Down Expand Up @@ -109,7 +113,7 @@ def _get_conn(self, instance):

# Only send useful parameters to the redis client constructor
list_params = ['host', 'port', 'db', 'password', 'socket_timeout',
'connection_pool', 'charset', 'errors', 'unix_socket_path']
'connection_pool', 'charset', 'errors', 'unix_socket_path']

# Set a default timeout (in seconds) if no timeout is specified in the instance config
instance['socket_timeout'] = instance.get('socket_timeout', 5)
Expand All @@ -130,7 +134,7 @@ def _check_db(self, instance, custom_tags=None):
if 'unix_socket_path' in instance:
tags_to_add = ["unix_socket_path:%s" % instance.get("unix_socket_path")]
else:
tags_to_add = ["redis_host:%s" % instance.get('host'), "redis_port:%s" % instance.get('port')]
tags_to_add = ["redis_host:%s" % instance.get('host'), "redis_port:%s" % instance.get('port')]

if instance.get('db') is not None:
tags_to_add.append("db:%s" % instance.get('db'))
Expand Down Expand Up @@ -215,8 +219,23 @@ def _check_db(self, instance, custom_tags=None):
self.warning("{0} key not found in redis".format(key))
self.gauge('redis.key.length', 0, tags=key_tags)

# Save the replication delay for each slave
for key in info:
if self.slave_key_pattern.match(key) and isinstance(info[key], dict):
slave_offset = info[key].get('offset')
master_offset = info.get('master_repl_offset')
if slave_offset and master_offset and master_offset - slave_offset >= 0:
delay = master_offset - slave_offset
# Add id, ip, and port tags for the slave
slave_tags = tags[:]
for slave_tag in ('ip', 'port'):
if slave_tag in info[key]:
slave_tags.append('slave_{0}:{1}'.format(slave_tag, info[key][slave_tag]))
slave_tags.append('slave_id:%s' % key.lstrip('slave'))
self.gauge('redis.replication.delay', delay, tags=slave_tags)

def check(self, instance):
if (not "host" in instance or not "port" in instance) and not "unix_socket_path" in instance:
if ("host" not in instance or "port" not in instance) and "unix_socket_path" not in instance:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure that's exactly what we want, see https://gist.github.com/LeoCavaille/801f5bb9288a94ca8d94
Especially the 4th example?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LeoCavaille I only moved the "not" keywords to the right place, that didn't change the behavior, which I think is what we want.

As long as there's a socket_path we're good, if there is no socket path, there must be a host and a port. We probably don't want to raise an exception if a user switch from a host:port couple to a socket and forget to remove the host from the config.

See https://github.com/DataDog/dd-agent/blob/haissam/redis-repl-metrics/checks.d/redisdb.py#L103-L107 for the use case.

raise Exception("You must specify a host/port couple or a unix_socket_path")
custom_tags = instance.get('tags', [])
self._check_db(instance,custom_tags)
self._check_db(instance, custom_tags)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🍰

23 changes: 21 additions & 2 deletions ci/redis.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,27 @@ def redis_rootdir
Rake::Task['ci:common:run_tests'].invoke(this_provides)
end

task :cleanup => ['ci:common:cleanup']
# FIXME: stop redis
task :cleanup => ['ci:common:cleanup'] do
# Shutdown redis
conf_files = ["#{ENV['TRAVIS_BUILD_DIR']}/ci/resources/redis/auth.conf",
"#{ENV['TRAVIS_BUILD_DIR']}/ci/resources/redis/noauth.conf"]
for f in conf_files do
pass, port = nil, nil
File.readlines(f).each do |line|
param = line.split(' ')
if param[0] == 'port'
port = param[1]
elsif param[0] == 'requirepass'
pass = param[1]
end
end
if pass and port
sh %(#{redis_rootdir}/src/redis-cli -p #{port} -a #{pass} SHUTDOWN)
elsif port
sh %(#{redis_rootdir}/src/redis-cli -p #{port} SHUTDOWN)
end
end
end

task :execute do
exception = nil
Expand Down
1 change: 1 addition & 0 deletions ci/resources/redis/auth.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pidfile /tmp/dd-redis-auth.pid
bind 127.0.0.1
port 26379
requirepass datadog-is-devops-best-friend
slaveof 127.0.0.1 16379
62 changes: 52 additions & 10 deletions tests/test_redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ def test_redis_auth(self):
except Exception as e:
self.assertTrue(
# 2.8
'noauth authentication required' in str(e).lower()\
'noauth authentication required' in str(e).lower()
# previously
or 'operation not permitted' in str(e).lower()
, str(e))
or 'operation not permitted' in str(e).lower(),
str(e))

r = load_check('redisdb', {}, {})
try:
Expand All @@ -68,7 +68,7 @@ def test_redis_default(self):
'port': port
}

db = redis.Redis(port=port, db=14) # Datadog's test db
db = redis.Redis(port=port, db=14) # Datadog's test db
db.flushdb()
db.set("key1", "value")
db.set("key2", "value")
Expand Down Expand Up @@ -101,8 +101,8 @@ def assert_key_present(expected, present, tolerance):

# Assert that the keys metrics are tagged by db. just check db0, since
# it's the only one we can guarantee is there.
db_metrics = self._sort_metrics([m for m in metrics if m[0] in ['redis.keys',
'redis.expires'] and "redis_db:db14" in m[3]["tags"]])
db_metrics = self._sort_metrics(
[m for m in metrics if m[0] in ['redis.keys', 'redis.expires'] and "redis_db:db14" in m[3]["tags"]])
self.assertEquals(2, len(db_metrics))

self.assertEquals('redis.expires', db_metrics[0][0])
Expand All @@ -114,12 +114,19 @@ def assert_key_present(expected, present, tolerance):
# Service checks
service_checks = r.get_service_checks()
service_checks_count = len(service_checks)
self.assertTrue(type(service_checks) == type([]))
self.assertTrue(isinstance(service_checks, list))
self.assertTrue(service_checks_count > 0)
self.assertEquals(len([sc for sc in service_checks if sc['check'] == "redis.can_connect"]), 1, service_checks)
self.assertEquals(
len([sc for sc in service_checks if sc['check'] == "redis.can_connect"]), 1, service_checks)
# Assert that all service checks have the proper tags: host and port
self.assertEquals(len([sc for sc in service_checks if "redis_host:localhost" in sc['tags']]), service_checks_count, service_checks)
self.assertEquals(len([sc for sc in service_checks if "redis_port:%s" % port in sc['tags']]), service_checks_count, service_checks)
self.assertEquals(
len([sc for sc in service_checks if "redis_host:localhost" in sc['tags']]),
service_checks_count,
service_checks)
self.assertEquals(
len([sc for sc in service_checks if "redis_port:%s" % port in sc['tags']]),
service_checks_count,
service_checks)

# Run one more check and ensure we get total command count
# and other rates
Expand All @@ -129,6 +136,41 @@ def assert_key_present(expected, present, tolerance):
keys = [m[0] for m in metrics]
assert 'redis.net.commands' in keys

def test_redis_repl(self):
master_instance = {
'host': 'localhost',
'port': NOAUTH_PORT
}

slave_instance = {
'host': 'localhost',
'port': AUTH_PORT,
'password': 'datadog-is-devops-best-friend'
}

repl_metrics = [
'redis.replication.delay',
'redis.replication.backlog_histlen',
'redis.replication.delay',
'redis.replication.master_repl_offset',
]

master_db = redis.Redis(port=NOAUTH_PORT, db=14)
slave_db = redis.Redis(port=AUTH_PORT, password=slave_instance['password'], db=14)
master_db.flushdb()

# Assert that the replication works
master_db.set('replicated:test', 'true')
self.assertEquals(slave_db.get('replicated:test'), 'true')

r = load_check('redisdb', {}, {})
r.check(master_instance)
metrics = self._sort_metrics(r.get_metrics())

# Assert the presence of replication metrics
keys = [m[0] for m in metrics]
assert [x in keys for x in repl_metrics]

def _sort_metrics(self, metrics):
def sort_by(m):
return m[0], m[1], m[3]
Expand Down