Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docker support #844

Merged
merged 14 commits into from
Mar 7, 2014
177 changes: 177 additions & 0 deletions checks.d/docker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import urllib2
import urllib
import httplib
import socket
import os
from urlparse import urlsplit, urljoin
from util import json, headers
from checks import AgentCheck


LXC_METRICS = [
{
"cgroup": "memory",
"file": "lxc/{0}/memory.stat",
"metrics": {
"active_anon": ("docker.mem.active_anon", "gauge"),
"active_file": ("docker.mem.active_file", "gauge"),
"cache": ("docker.mem.cache", "gauge"),
"hierarchical_memory_limit": ("docker.mem.hierarchical_memory_limit", "gauge"),
"hierarchical_memsw_limit": ("docker.mem.hierarchical_memsw_limit", "gauge"),
"inactive_anon": ("docker.mem.inactive_anon", "gauge"),
"inactive_file": ("docker.mem.inactive_file", "gauge"),
"mapped_file": ("docker.mem.mapped_file", "gauge"),
"pgfault": ("docker.mem.pgfault", "gauge"),
"pgmajfault": ("docker.mem.pgmajfault", "gauge"),
"pgpgin": ("docker.mem.pgpgin", "gauge"),
"pgpgout": ("docker.mem.pgpgout", "gauge"),
"rss": ("docker.mem.rss", "gauge"),
"swap": ("docker.mem.swap", "gauge"),
"unevictable": ("docker.mem.unevictable", "gauge"),
"total_active_anon": ("docker.mem.total_active_anon", "gauge"),
"total_active_file": ("docker.mem.total_active_file", "gauge"),
"total_cache": ("docker.mem.total_cache", "gauge"),
"total_inactive_anon": ("docker.mem.total_inactive_anon", "gauge"),
"total_inactive_file": ("docker.mem.total_inactive_file", "gauge"),
"total_mapped_file": ("docker.mem.total_mapped_file", "gauge"),
"total_pgfault": ("docker.mem.total_pgfault", "gauge"),
"total_pgmajfault": ("docker.mem.total_pgmajfault", "gauge"),
"total_pgpgin": ("docker.mem.total_pgpgin", "gauge"),
"total_pgpgout": ("docker.mem.total_pgpgout", "gauge"),
"total_rss": ("docker.mem.total_rss", "gauge"),
"total_swap": ("docker.mem.total_swap", "gauge"),
"total_unevictable": ("docker.mem.total_unevictable", "gauge"),
}
},
{
"cgroup": "cpuacct",
"file": "lxc/{0}/cpuacct.stat",
"metrics": {
"user": ("docker.cpu.user", "gauge"),
"system": ("docker.cpu.system", "gauge"),
},
},
]

DOCKER_METRICS = {
"SizeRw": ("docker.disk.size", "gauge"),
}

DOCKER_TAGS = [
"ID",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it necessary to tag by id ?
That id doesn't seem to be really human readable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, but ID is the sure way to uniquely identify a running container. Name can be used as well, but they are not as unique.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But let's say you want to create an alert or a dashboard to visualize this metric, you won't know to what container this id is related to, so you'll end up using another tag (probably the name and the path).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sort of makes sense. Does it hurt to leave it ? Your call

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would remove it, as it's basically the same issue as the Created tag.
Thanks!

"Name",
"Created",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same for the creation date,
If you recreate same containers, you might end up with thousands and thousands of differents values for this tag.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that a bad thing ?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, because all these tags will appear in your metric contexts, generating a lot of noise.

What's your use case for this tag?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None. I put it there because I had the info. Removing.

"Driver",
"Path",
]

DOCKER_CONFIG_TAGS = [
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this list is not used anywhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bug.

"Hostname",
"Image"
]



class UnixHTTPConnection(httplib.HTTPConnection, object):
"""Class used in conjuction with UnixSocketHandler to make urllib2
compatible with Unix sockets."""
def __init__(self, unix_socket):
self._unix_socket = unix_socket

def connect(self):
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
sock.connect(self._unix_socket)
self.sock = sock

def __call__(self, *args, **kwargs):
httplib.HTTPConnection.__init__(self, *args, **kwargs)
return self


class UnixSocketHandler(urllib2.AbstractHTTPHandler):
"""Class that makes Unix sockets work with urllib2 without any additional
dependencies."""
def unix_open(self, req):
full_path = "%s%s" % urlsplit(req.get_full_url())[1:3]
path = os.path.sep
for part in full_path.split("/"):
path = os.path.join(path, part)
if not os.path.exists(path):
break
unix_socket = path
# add a host or else urllib2 complains
url = req.get_full_url().replace(unix_socket, "/localhost")
new_req = urllib2.Request(url, req.get_data(), dict(req.header_items()))
new_req.timeout = req.timeout
return self.do_open(UnixHTTPConnection(unix_socket), new_req)

unix_request = urllib2.AbstractHTTPHandler.do_request_


class Docker(AgentCheck):
def __init__(self, *args, **kwargs):
super(Docker, self).__init__(*args, **kwargs)
urllib2.install_opener(urllib2.build_opener(UnixSocketHandler()))
self._mounpoints = {}
for metric in LXC_METRICS:
self._mounpoints[metric["cgroup"]] = self._find_cgroup(metric["cgroup"])

def check(self, instance):
tags = instance.get("tags") or []

for container in self._get_containers(instance):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you log something and use the self.warning() method to display a message in the info page in case there is no containers ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure

container_details = self._get_container(instance, container["Id"])
container_tags = list(tags)
for key in DOCKER_TAGS:
container_tags.append("%s:%s" % (key.lower(), container_details[key]))
for key, (dd_key, metric_type) in DOCKER_METRICS.items():
if key in container:
getattr(self, metric_type)(dd_key, int(container[key]), tags=container_tags)
for metric in LXC_METRICS:
mountpoint = self._mounpoints[metric["cgroup"]]
stat_file = os.path.join(mountpoint, metric["file"].format(container["Id"]))
stats = self._parse_cgroup_file(stat_file)
for key, (dd_key, metric_type) in metric["metrics"].items():
if key in stats:
getattr(self, metric_type)(dd_key, int(stats[key]), tags=container_tags)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a chance that those metrics might be floating numbers ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not that I know of. Except if you count cpuacct.stat jiffies as seconds.


def _get_containers(self, instance):
"""Gets the list of running containers in Docker."""
return self._get_json("%(url)s/containers/json" % instance, params={"size": 1})

def _get_container(self, instance, cid):
"""Get container information from Docker, gived a container Id."""
return self._get_json("%s/containers/%s/json" % (instance["url"], cid))

def _get_json(self, uri, params=None):
"""Utility method to get and parse JSON streams."""
if params:
uri = "%s?%s" % (uri, urllib.urlencode(params))
req = urllib2.Request(uri, None)
request = urllib2.urlopen(req)
response = request.read()
return json.loads(response)

def _find_cgroup(self, hierarchy):
"""Finds the mount point for a specified cgroup hierarchy. Works with
old style and new style mounts."""
try:
fp = open("/proc/mounts")
mounts = map(lambda x: x.split(), fp.read().splitlines())
finally:
fp.close()
cgroup_mounts = filter(lambda x: x[2] == "cgroup", mounts)
# Old cgroup style
if len(cgroup_mounts) == 1:
return cgroup_mounts[0][1]
for _, mountpoint, _, opts, _, _ in cgroup_mounts:
if hierarchy in opts:
return mountpoint

def _parse_cgroup_file(self, file_):
"""Parses a cgroup pseudo file for key/values."""
try:
fp = open(file_)
return dict(map(lambda x: x.split(), fp.read().splitlines()))
finally:
fp.close()
4 changes: 4 additions & 0 deletions conf.d/docker.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
init_config:

instances:
- url: "unix://var/run/docker.sock"