diff --git a/openstack/conf.yaml.example b/openstack/conf.yaml.example index 5718794151188..5cb3f7fb7815b 100644 --- a/openstack/conf.yaml.example +++ b/openstack/conf.yaml.example @@ -44,6 +44,10 @@ instances: # The authorization scope that will be used to request a token from Identity API v3 # The auth scope must resolve to 1 of the following structures: # {'project': {'name': 'my_project', 'domain': 'my_domain} OR {'project': {'id': 'my_project_id'}} + # + # ALPHA: DO NOT ADD an auth_scope if you wish to have unscoped access + # Simply add the relevant user credentials. + # auth_scope: project: id: my_project_id @@ -79,3 +83,11 @@ instances: # Set (optional) custom tags for each metric # tags: # - optional:tag1 + + # If you need additional tags to submit with your server metrics. + # Please note that server metrics override the host tag and thus do not get + # the agent-level tags you may have set. + # + # server_tags: + # - foo:bar + # - baz:qux diff --git a/openstack/datadog_checks/openstack/__init__.py b/openstack/datadog_checks/openstack/__init__.py index 116e8e09f3351..6e7722a9a4590 100644 --- a/openstack/datadog_checks/openstack/__init__.py +++ b/openstack/datadog_checks/openstack/__init__.py @@ -2,6 +2,6 @@ OpenStackCheck = openstack.OpenStackCheck -__version__ = "1.2.0" +__version__ = "1.3.0" __all__ = ['openstack'] diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index 45420b2ba35fd..04b6f644b0255 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -4,6 +4,9 @@ from datetime import datetime, timedelta from urlparse import urljoin import re +import time +import random +import copy import requests import simplejson as json @@ -26,7 +29,7 @@ FALLBACK_NOVA_API_VERSION = 'v2' DEFAULT_NEUTRON_API_VERSION = 'v2.0' -DEFAULT_API_REQUEST_TIMEOUT = 5 # seconds +DEFAULT_API_REQUEST_TIMEOUT = 10 # seconds NOVA_HYPERVISOR_METRICS = [ 'current_workload', @@ -92,57 +95,134 @@ 'ACTIVE' ] +REMOVED_STATES = [ + 'DELETED', + 'SHUTOFF' +] + +UNSCOPED_AUTH = 'unscoped' + +BASE_BACKOFF_SECS = 15 +MAX_BACKOFF_SECS = 300 + + class OpenStackAuthFailure(Exception): pass + class InstancePowerOffFailure(Exception): pass + class IncompleteConfig(Exception): pass + class IncompleteAuthScope(IncompleteConfig): pass + class IncompleteIdentity(IncompleteConfig): pass + class MissingEndpoint(Exception): pass + class MissingNovaEndpoint(MissingEndpoint): pass + class MissingNeutronEndpoint(MissingEndpoint): pass + class KeystoneUnreachable(Exception): pass -class OpenStackProjectScope(object): - """ - Container class for a single project's authorization scope - Embeds the auth token to be included with API requests, and refreshes - the token on expiry - """ - def __init__(self, auth_token, auth_scope, service_catalog): +class OpenStackScope(object): + def __init__(self, auth_token): self.auth_token = auth_token - # Store some identifiers for this project - self.project_name = auth_scope["project"].get("name") - self.domain_id = auth_scope["project"].get("domain", {}).get("id") - self.tenant_id = auth_scope["project"].get("id") - self.service_catalog = service_catalog + @classmethod + def request_auth_token(cls, auth_scope, identity, keystone_server_url, ssl_verify, proxy=None): + if not auth_scope: + auth_scope = UNSCOPED_AUTH + + payload = {'auth': {'identity': identity, 'scope': auth_scope}} + auth_url = urljoin(keystone_server_url, "{0}/auth/tokens".format(DEFAULT_KEYSTONE_API_VERSION)) + headers = {'Content-Type': 'application/json'} + + resp = requests.post(auth_url, headers=headers, data=json.dumps(payload), verify=ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) + resp.raise_for_status() + + return resp @classmethod - def from_config(cls, init_config, instance_config, proxy_config=None): + def get_user_identity(cls, instance_config): + """ + Parse user identity out of init_config + + To guarantee a uniquely identifiable user, expects + {"user": {"name": "my_username", "password": "my_password", + "domain": {"id": "my_domain_id"} + } + } + """ + user = instance_config.get('user') + + if not ( + user and + user.get('name') and + user.get('password') and + user.get("domain") and + user.get("domain").get("id") + ): + raise IncompleteIdentity() + + identity = { + "methods": ['password'], + "password": {"user": user} + } + return identity + + @classmethod + def get_auth_scope(cls, instance_config): + """ + Parse authorization scope out of init_config + + To guarantee a uniquely identifiable scope, expects either: + {'project': {'name': 'my_project', 'domain': {'id': 'my_domain_id'}}} + OR + {'project': {'id': 'my_project_id'}} + """ + auth_scope = instance_config.get('auth_scope') + if not auth_scope: + return None + + if not auth_scope.get('project'): + raise IncompleteAuthScope() + + if auth_scope['project'].get('name'): + # We need to add a domain scope to avoid name clashes. Search for one. If not raise IncompleteAuthScope + if not auth_scope['project'].get('domain', {}).get('id'): + raise IncompleteAuthScope() + else: + # Assume a unique project id has been given + if not auth_scope['project'].get('id'): + raise IncompleteAuthScope() + + return auth_scope + + @classmethod + def get_auth_response_from_config(cls, init_config, instance_config, proxy_config=None): keystone_server_url = init_config.get("keystone_server_url") if not keystone_server_url: raise IncompleteConfig() ssl_verify = init_config.get("ssl_verify", False) - nova_api_version = init_config.get("nova_api_version", DEFAULT_NOVA_API_VERSION) auth_scope = cls.get_auth_scope(instance_config) identity = cls.get_user_identity(instance_config) @@ -161,22 +241,137 @@ def from_config(cls, init_config, instance_config, proxy_config=None): try: identity['password']['user']['domain']['name'] = identity['password']['user']['domain'].pop('id') - if 'domain' in auth_scope['project']: - auth_scope['project']['domain']['name'] = auth_scope['project']['domain'].pop('id') - else: - auth_scope['project']['name'] = auth_scope['project'].pop('id') + if auth_scope: + if 'domain' in auth_scope['project']: + auth_scope['project']['domain']['name'] = auth_scope['project']['domain'].pop('id') + else: + auth_scope['project']['name'] = auth_scope['project'].pop('id') auth_resp = cls.request_auth_token(auth_scope, identity, keystone_server_url, ssl_verify, proxy_config) except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: - exception_msg = "{msg} and also failed keystone auth with identity:{id} domain:{domain} scope:{scope} @{url}: {ex}".format( + exception_msg = "{msg} and also failed keystone auth with identity:{user} domain:{domain} scope:{scope} @{url}: {ex}".format( msg=exception_msg, user=identity['password']['user']['name'], - domain = identity['password']['user']['domain']['name'], + domain=identity['password']['user']['domain']['name'], scope=auth_scope, url=keystone_server_url, ex=e) raise KeystoneUnreachable(exception_msg) - auth_token = auth_resp.headers.get('X-Subject-Token') + return auth_scope, auth_resp.headers.get('X-Subject-Token'), auth_resp + + +class OpenStackUnscoped(OpenStackScope): + def __init__(self, auth_token, project_scope_map): + super(OpenStackUnscoped, self).__init__(auth_token) + self.project_scope_map = project_scope_map + + @classmethod + def from_config(cls, init_config, instance_config, proxy_config=None): + keystone_server_url = init_config.get("keystone_server_url") + if not keystone_server_url: + raise IncompleteConfig() + + ssl_verify = init_config.get("ssl_verify", True) + nova_api_version = init_config.get("nova_api_version", DEFAULT_NOVA_API_VERSION) + + _, auth_token, _ = cls.get_auth_response_from_config(init_config, instance_config, proxy_config) + + try: + project_resp = cls.request_project_list(auth_token, keystone_server_url, ssl_verify, proxy_config) + projects = project_resp.json().get('projects') + except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: + exception_msg = "unable to retrieve project list from keystone auth with identity: @{url}: {ex}".format( + url=keystone_server_url, + ex=e) + raise KeystoneUnreachable(exception_msg) + + project_scope_map = {} + for project in projects: + try: + project_key = project['name'], project['id'] + token_resp = cls.get_token_for_project(auth_token, project, keystone_server_url, + ssl_verify, proxy_config) + project_auth_token = token_resp.headers.get('X-Subject-Token') + except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: + exception_msg = "unable to retrieve project from keystone auth with identity: @{url}: {ex}".format( + url=keystone_server_url, + ex=e) + raise KeystoneUnreachable(exception_msg) + + try: + service_catalog = KeystoneCatalog.from_auth_response( + token_resp.json(), nova_api_version + ) + except MissingNovaEndpoint: + service_catalog = KeystoneCatalog.from_auth_response( + token_resp.json(), FALLBACK_NOVA_API_VERSION + ) + + project_auth_scope = { + 'project': { + 'name': project['name'], + 'id': project['id'], + 'domain': {} if project['domain_id'] is None else {'id': project['domain_id']} + } + } + project_scope = OpenStackProjectScope(project_auth_token, project_auth_scope, service_catalog) + project_scope_map[project_key] = project_scope + + return cls(auth_token, project_scope_map) + + @classmethod + def get_token_for_project(cls, auth_token, project, keystone_server_url, ssl_verify, proxy=None): + identity = { + "methods": ['token'], + "token": {"id": auth_token} + } + scope = { + 'project': {'id': project['id']} + } + payload = {'auth': {'identity': identity, 'scope': scope}} + headers = {'Content-Type': 'application/json'} + auth_url = urljoin(keystone_server_url, "{0}/auth/tokens".format(DEFAULT_KEYSTONE_API_VERSION)) + + resp = requests.post(auth_url, headers=headers, data=json.dumps(payload), verify=ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) + resp.raise_for_status() + + return resp + + @classmethod + def request_project_list(cls, auth_token, keystone_server_url, ssl_verify, proxy=None): + auth_url = urljoin(keystone_server_url, "{0}/auth/projects".format(DEFAULT_KEYSTONE_API_VERSION)) + headers = {'X-Auth-Token': auth_token} + + resp = requests.get(auth_url, headers=headers, verify=ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) + resp.raise_for_status() + + return resp + + +class OpenStackProjectScope(OpenStackScope): + """ + Container class for a single project's authorization scope + Embeds the auth token to be included with API requests, and refreshes + the token on expiry + """ + def __init__(self, auth_token, auth_scope, service_catalog): + super(OpenStackProjectScope, self).__init__(auth_token) + + # Store some identifiers for this project + self.project_name = auth_scope["project"].get("name") + self.domain_id = auth_scope["project"].get("domain", {}).get("id") + self.tenant_id = auth_scope["project"].get("id") + self.service_catalog = service_catalog + + @classmethod + def from_config(cls, init_config, instance_config, proxy_config=None): + keystone_server_url = init_config.get("keystone_server_url") + if not keystone_server_url: + raise IncompleteConfig() + + nova_api_version = init_config.get("nova_api_version", DEFAULT_NOVA_API_VERSION) + + auth_scope, auth_token, auth_resp = cls.get_auth_response_from_config(init_config, instance_config, proxy_config) try: service_catalog = KeystoneCatalog.from_auth_response( @@ -203,68 +398,6 @@ def from_config(cls, init_config, instance_config, proxy_config=None): return cls(auth_token, auth_scope, service_catalog) - @classmethod - def get_auth_scope(cls, instance_config): - """ - Parse authorization scope out of init_config - - To guarantee a uniquely identifiable scope, expects either: - {'project': {'name': 'my_project', 'domain': {'id': 'my_domain_id'}}} - OR - {'project': {'id': 'my_project_id'}} - """ - auth_scope = instance_config.get('auth_scope') - if not auth_scope or not auth_scope.get('project'): - raise IncompleteAuthScope() - - if auth_scope['project'].get('name'): - # We need to add a domain scope to avoid name clashes. Search for one. If not raise IncompleteConfig - if not auth_scope['project'].get('domain', {}).get('id'): - raise IncompleteAuthScope() - else: - # Assume a unique project id has been given - if not auth_scope['project'].get('id'): - raise IncompleteAuthScope() - - return auth_scope - - @classmethod - def get_user_identity(cls, instance_config): - """ - Parse user identity out of init_config - - To guarantee a uniquely identifiable user, expects - {"user": {"name": "my_username", "password": "my_password", - "domain": {"id": "my_domain_id"} - } - } - """ - user = instance_config.get('user') - if not user\ - or not user.get('name')\ - or not user.get('password')\ - or not user.get("domain")\ - or not user.get("domain").get("id"): - - raise IncompleteIdentity() - - identity = { - "methods": ['password'], - "password": {"user": user} - } - return identity - - @classmethod - def request_auth_token(cls, auth_scope, identity, keystone_server_url, ssl_verify, proxy=None): - payload = {"auth": {"scope": auth_scope, "identity": identity}} - auth_url = urljoin(keystone_server_url, "{0}/auth/tokens".format(DEFAULT_KEYSTONE_API_VERSION)) - headers = {'Content-Type': 'application/json'} - - resp = requests.post(auth_url, headers=headers, data=json.dumps(payload), verify=ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) - resp.raise_for_status() - - return resp - class KeystoneCatalog(object): """ @@ -276,10 +409,36 @@ def __init__(self, nova_endpoint, neutron_endpoint): self.neutron_endpoint = neutron_endpoint @classmethod - def from_auth_response(cls, json_response, nova_api_version): + def from_auth_response(cls, json_response, nova_api_version, + keystone_server_url=None, auth_token=None, proxy=None): + try: + return cls( + nova_endpoint=cls.get_nova_endpoint(json_response, nova_api_version), + neutron_endpoint=cls.get_neutron_endpoint(json_response) + ) + except (MissingNeutronEndpoint, MissingNovaEndpoint) as e: + if keystone_server_url and auth_token: + return cls.from_unscoped_token(keystone_server_url, auth_token, + nova_api_version, proxy) + else: + raise e + + @classmethod + def from_unscoped_token(cls, keystone_server_url, auth_token, + nova_api_version, ssl_verify=True, proxy=None): + catalog_url = urljoin(keystone_server_url, "{0}/auth/catalog".format( + DEFAULT_KEYSTONE_API_VERSION)) + headers = {'X-Auth-Token': auth_token} + + resp = requests.get(catalog_url, headers=headers, verify=ssl_verify, + timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) + resp.raise_for_status() + json_resp = resp.json() + json_resp = {'token': json_resp} + return cls( - nova_endpoint=cls.get_nova_endpoint(json_response, nova_api_version), - neutron_endpoint=cls.get_neutron_endpoint(json_response) + nova_endpoint=cls.get_nova_endpoint(json_resp, nova_api_version), + neutron_endpoint=cls.get_neutron_endpoint(json_resp) ) @classmethod @@ -296,14 +455,14 @@ def get_neutron_endpoint(cls, json_resp): if entry['name'] == match or 'Networking' in entry['name']: valid_endpoints = {} for ep in entry['endpoints']: - interface = ep.get('interface','') + interface = ep.get('interface', '') if interface in ['public', 'internal']: valid_endpoints[interface] = ep['url'] if valid_endpoints: # Favor public endpoints over internal neutron_endpoint = valid_endpoints.get("public", - valid_endpoints.get("internal")) + valid_endpoints.get("internal")) break else: raise MissingNeutronEndpoint() @@ -326,14 +485,14 @@ def get_nova_endpoint(cls, json_resp, nova_api_version=None): # Collect any endpoints on the public or internal interface valid_endpoints = {} for ep in entry['endpoints']: - interface = ep.get('interface','') + interface = ep.get('interface', '') if interface in ['public', 'internal']: valid_endpoints[interface] = ep['url'] if valid_endpoints: # Favor public endpoints over internal nova_endpoint = valid_endpoints.get("public", - valid_endpoints.get("internal")) + valid_endpoints.get("internal")) return nova_endpoint else: raise MissingNovaEndpoint() @@ -341,7 +500,7 @@ def get_nova_endpoint(cls, json_resp, nova_api_version=None): class OpenStackCheck(AgentCheck): CACHE_TTL = { - "aggregates": 300, # seconds + "aggregates": 300, # seconds "physical_hosts": 300, "hypervisors": 300 } @@ -350,7 +509,6 @@ class OpenStackCheck(AgentCheck): "aggregates": "_last_aggregate_fetch_time", "physical_hosts": "_last_host_fetch_time", "hypervisors": "_last_hypervisor_fetch_time" - } HYPERVISOR_STATE_UP = 'up' @@ -365,18 +523,23 @@ class OpenStackCheck(AgentCheck): HYPERVISOR_SC = 'openstack.nova.hypervisor.up' NETWORK_SC = 'openstack.neutron.network.up' - - HYPERVISOR_CACHE_EXPIRY = 120 # seconds + HYPERVISOR_CACHE_EXPIRY = 120 # seconds def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self._ssl_verify = init_config.get("ssl_verify", True) self.keystone_server_url = init_config.get("keystone_server_url") + self._hypervisor_name_cache = {} + if not self.keystone_server_url: raise IncompleteConfig() - ### Cache some things between runs for values that change rarely + # Current authentication scopes + self._parent_scope = None + self._current_scope = None + + # Cache some things between runs for values that change rarely self._aggregate_list = None # Mapping of check instances to associated OpenStack project scopes @@ -396,16 +559,31 @@ def __init__(self, name, init_config, agentConfig, instances=None): skip_proxy = not init_config.get('use_agent_proxy', True) self.proxy_config = None if skip_proxy else self.proxies + self.backoff = {} + random.seed() + + # ISO8601 date time: used to filter the call to get the list of nova servers + self.changes_since_time = {} + + # Ex: server_details_by_id = { + # UUID: {UUID: , etc} + # 1: {id: 1, name: hostA}, + # 2: {id: 2, name: hostB} + # } + self.server_details_by_id = {} + def _make_request_with_auth_fallback(self, url, headers=None, params=None): """ Generic request handler for OpenStack API requests Raises specialized Exceptions for commonly encountered error codes """ + self.log.debug("Request URL and Params: %s, %s", url, params) try: resp = requests.get(url, headers=headers, verify=self._ssl_verify, params=params, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=self.proxy_config) resp.raise_for_status() - except requests.exceptions.HTTPError: + except requests.exceptions.HTTPError as e: + self.log.debug("Error contacting openstack endpoint: %s", e) if resp.status_code == 401: self.log.info('Need to reauthenticate before next check') @@ -413,6 +591,8 @@ def _make_request_with_auth_fallback(self, url, headers=None, params=None): self.delete_current_scope() elif resp.status_code == 409: raise InstancePowerOffFailure() + elif resp.status_code == 404: + raise e else: raise @@ -426,12 +606,51 @@ def _instance_key(self, instance): return i_key def delete_current_scope(self): - scope_to_delete = self._current_scope + scope_to_delete = self._parent_scope if self._parent_scope else self._current_scope for i_key, scope in self.instance_map.items(): if scope is scope_to_delete: self.log.debug("Deleting current scope: %s", i_key) del self.instance_map[i_key] + def should_run(self, instance): + i_key = self._instance_key(instance) + if i_key not in self.backoff: + self.backoff[i_key] = { + 'retries': 0, + 'scheduled': time.time(), + } + + if self.backoff[i_key]['scheduled'] <= time.time(): + return True + + return False + + def do_backoff(self, instance): + i_key = self._instance_key(instance) + tracker = self.backoff[i_key] + + self.backoff[i_key]['retries'] += 1 + jitter = min(MAX_BACKOFF_SECS, BASE_BACKOFF_SECS * 2 ** self.backoff[i_key]['retries']) + + # let's add some jitter (half jitter) + backoff_interval = jitter / 2 + backoff_interval += random.randint(0, backoff_interval) + + tags = instance.get('tags', []) + hypervisor_name = self._hypervisor_name_cache.get(i_key) + if hypervisor_name: + tags.extend("hypervisor:{}".format(hypervisor_name)) + + self.gauge("openstack.backoff.interval", backoff_interval, tags=tags) + self.gauge("openstack.backoff.retries", self.backoff[i_key]['retries'], tags=tags) + + tracker['scheduled'] = time.time() + backoff_interval + + def reset_backoff(self, instance): + i_key = self._instance_key(instance) + self.backoff[i_key]['retries'] = 0 + self.backoff[i_key]['scheduled'] = time.time() + def get_scope_for_instance(self, instance): i_key = self._instance_key(instance) self.log.debug("Getting scope for instance %s", i_key) @@ -497,6 +716,8 @@ def get_all_network_ids(self): network_ids.append(network['id']) except Exception as e: self.warning('Unable to get the list of all network ids: {0}'.format(str(e))) + raise e + return network_ids def get_stats_for_single_network(self, network_id, tags): @@ -518,9 +739,8 @@ def get_stats_for_single_network(self, network_id, tags): self.service_check(self.NETWORK_SC, AgentCheck.OK, tags=service_check_tags) else: self.service_check(self.NETWORK_SC, AgentCheck.CRITICAL, tags=service_check_tags) - ### - ### Compute + # Compute def get_nova_endpoint(self, instance=None): if not instance: # Assume instance scope is populated on self @@ -539,7 +759,6 @@ def _parse_uptime_string(self, uptime): 'uptime_sec': uptime_sec } - def get_all_hypervisor_ids(self, filter_by_host=None): nova_version = self.init_config.get("nova_api_version", DEFAULT_NOVA_API_VERSION) if nova_version >= V21_NOVA_API_VERSION: @@ -557,6 +776,7 @@ def get_all_hypervisor_ids(self, filter_by_host=None): hypervisor_ids.append(hv['id']) except Exception as e: self.warning('Unable to get the list of all hypervisors: {0}'.format(str(e))) + raise e return hypervisor_ids else: @@ -582,6 +802,7 @@ def get_all_aggregate_hypervisors(self): except Exception as e: self.warning('Unable to get the list of aggregates: {0}'.format(str(e))) + raise e return hypervisor_aggregate_map @@ -593,12 +814,13 @@ def get_uptime_for_single_hypervisor(self, hyp_id): uptime = resp['hypervisor']['uptime'] return self._parse_uptime_string(uptime) - def get_stats_for_single_hypervisor(self, hyp_id, host_tags=None, custom_tags=None): + def get_stats_for_single_hypervisor(self, hyp_id, instance, host_tags=None, custom_tags=None): url = '{0}/os-hypervisors/{1}'.format(self.get_nova_endpoint(), hyp_id) headers = {'X-Auth-Token': self.get_auth_token()} resp = self._make_request_with_auth_fallback(url, headers) hyp = resp['hypervisor'] host_tags = host_tags or [] + self._hypervisor_name_cache[self._instance_key(instance)] = hyp['hypervisor_hostname'] custom_tags = custom_tags or [] tags = [ 'hypervisor:{0}'.format(hyp['hypervisor_hostname']), @@ -648,53 +870,131 @@ def get_stats_for_single_hypervisor(self, hyp_id, host_tags=None, custom_tags=No for i, avg in enumerate([1, 5, 15]): self.gauge('openstack.nova.hypervisor_load.{0}'.format(avg), load_averages[i], tags=tags) - def get_all_server_ids(self, filter_by_host=None): + # Get all of the server IDs and their metadata and cache them + # After the first run, we will only get servers that have changed state since the last collection run + def get_all_servers(self, i_key, collect_all_tenants, filter_by_host=None): query_params = {} if filter_by_host: query_params["host"] = filter_by_host - url = '{0}/servers'.format(self.get_nova_endpoint()) + # If we don't have a timestamp for this instance, default to None + if i_key in self.changes_since_time: + query_params['changes-since'] = self.changes_since_time.get(i_key) + + url = '{0}/servers/detail'.format(self.get_nova_endpoint()) headers = {'X-Auth-Token': self.get_auth_token()} - server_ids = [] + if collect_all_tenants: + query_params["all_tenants"] = True + servers = [] + try: + # Get a list of active servers + query_params['status'] = 'ACTIVE' resp = self._make_request_with_auth_fallback(url, headers, params=query_params) + servers.extend(resp['servers']) + + # Don't collect Deleted or Shut off VMs on the first run: + if i_key in self.changes_since_time: + + # Get a list of deleted serversTimestamp used to filter the call to get the list + # Need to have admin perms for this to take affect + query_params['deleted'] = 'true' + del query_params['status'] + resp = self._make_request_with_auth_fallback(url, headers, params=query_params) + + servers.extend(resp['servers']) + query_params['deleted'] = 'false' + + # Get a list of shut off servers + query_params['status'] = 'SHUTOFF' + resp = self._make_request_with_auth_fallback(url, headers, params=query_params) + servers.extend(resp['servers']) + + self.changes_since_time[i_key] = datetime.utcnow().isoformat() - server_ids = [s['id'] for s in resp['servers']] except Exception as e: self.warning('Unable to get the list of all servers: {0}'.format(str(e))) + raise e + + for server in servers: + new_server = {} + + new_server['server_id'] = server.get('id') + new_server['state'] = server.get('status') + new_server['server_name'] = server.get('name') + new_server['hypervisor_hostname'] = server.get('OS-EXT-SRV-ATTR:hypervisor_hostname') + new_server['tenant_id'] = server.get('tenant_id') + + # Update our cached list of servers + if new_server['server_id'] not in self.server_details_by_id and new_server['state'] in DIAGNOSTICABLE_STATES: + self.log.debug("Adding server to cache: %s", new_server) + # The project may not exist if the server isn't in an active state + # Query for the project name here to avoid 404s + new_server['project_name'] = self.get_project_name_from_id(new_server['tenant_id']) + self.server_details_by_id[new_server['server_id']] = new_server + elif new_server['server_id'] in self.server_details_by_id and new_server['state'] in REMOVED_STATES: + self.log.debug("Removing server from cache: %s", new_server) + try: + del self.server_details_by_id[new_server['server_id']] + except KeyError as e: + self.log.debug("Server: %s has already been removed from the cache", new_server['server_id']) + + return self.server_details_by_id + + def get_project_name_from_id(self, tenant_id): + url = "{0}/{1}/{2}/{3}".format(self.keystone_server_url, DEFAULT_KEYSTONE_API_VERSION, "projects", tenant_id) + self.log.debug("Project URL is %s", url) + headers = {'X-Auth-Token': self.get_auth_token()} + try: + r = self._make_request_with_auth_fallback(url, headers) + return r['project']['name'] - return server_ids + except Exception as e: + self.warning('Unable to get project name: {0}'.format(str(e))) + raise e - def get_stats_for_single_server(self, server_id, tags=None): + def get_stats_for_single_server(self, server_details, tags=None): def _is_valid_metric(label): return label in NOVA_SERVER_METRICS or any(seg in label for seg in NOVA_SERVER_INTERFACE_SEGMENTS) + server_id = server_details.get('server_id') + state = server_details.get('state') + server_name = server_details.get('server_name') + hypervisor_hostname = server_details.get('hypervisor_hostname') + tenant_id = server_details.get('tenant_id') + project_name = server_details.get('project_name') - url = '{0}/servers/{1}'.format(self.get_nova_endpoint(), server_id) + server_stats = {} headers = {'X-Auth-Token': self.get_auth_token()} - state = None + url = '{0}/servers/{1}/diagnostics'.format(self.get_nova_endpoint(), server_id) try: - server_details = self._make_request_with_auth_fallback(url, headers) - state = server_details['server'].get('status') + server_stats = self._make_request_with_auth_fallback(url, headers) + except InstancePowerOffFailure: # 409 response code came back fro nova + self.log.debug("Server %s is powered off and cannot be monitored", server_id) + del self.server_details_by_id[server_id] + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + self.log.debug("Server %s is not in an ACTIVE state and cannot be monitored, %s", server_id, e) + del self.server_details_by_id[server_id] + else: + self.log.debug("Received HTTP Error when reaching the nova endpoint") + raise e except Exception as e: - self.warning("Unable to collect details for server %s : %s" % (server_id, e)) - - server_stats = {} - if state and state.upper() in DIAGNOSTICABLE_STATES: - url = '{0}/servers/{1}/diagnostics'.format(self.get_nova_endpoint(), server_id) - try: - server_stats = self._make_request_with_auth_fallback(url, headers) - except InstancePowerOffFailure: - self.warning("Server %s is powered off and cannot be monitored" % server_id) - except Exception as e: - self.warning("Unknown error when monitoring %s : %s" % (server_id, e)) + self.warning("Unknown error when monitoring %s : %s" % (server_id, e)) + raise e if server_stats: tags = tags or [] + if project_name: + tags.append("project_name:{}".format(project_name)) + if hypervisor_hostname: + tags.append("hypervisor:{0}".format(hypervisor_hostname)) + if server_name: + tags.append("server_name:{0}".format(server_name)) for st in server_stats: if _is_valid_metric(st): - self.gauge("openstack.nova.server.{0}".format(st.replace("-", "_")), server_stats[st], tags=tags, hostname=server_id) - + self.gauge("openstack.nova.server.{0}".format( + st.replace("-", "_")), server_stats[st], tags=tags, hostname=server_id) def get_stats_for_single_project(self, project, tags=None): def _is_valid_metric(label): @@ -703,6 +1003,8 @@ def _is_valid_metric(label): if tags is None: tags = [] + server_tags = copy.deepcopy(tags) + project_name = project.get('name') self.log.debug("Collecting metrics for project. name: {0} id: {1}".format(project_name, project['id'])) @@ -711,24 +1013,23 @@ def _is_valid_metric(label): headers = {'X-Auth-Token': self.get_auth_token()} server_stats = self._make_request_with_auth_fallback(url, headers, params={"tenant_id": project['id']}) - tags.append('tenant_id:{0}'.format(project['id'])) + server_tags.append('tenant_id:{0}'.format(project['id'])) if project_name: - tags.append('project_name:{0}'.format(project['name'])) + server_tags.append('project_name:{0}'.format(project['name'])) for st in server_stats['limits']['absolute']: if _is_valid_metric(st): metric_key = PROJECT_METRICS[st] - self.gauge("openstack.nova.limits.{0}".format(metric_key), server_stats['limits']['absolute'][st], tags=tags) + self.gauge("openstack.nova.limits.{0}".format(metric_key), server_stats['limits']['absolute'][st], tags=server_tags) def get_stats_for_all_projects(self, projects, tags=None): if tags is None: tags = [] for project in projects: self.get_stats_for_single_project(project, tags) - ### - ### Cache util + # Cache util def _is_expired(self, entry): assert entry in ["aggregates", "physical_hosts", "hypervisors"] ttl = self.CACHE_TTL.get(entry) @@ -741,15 +1042,15 @@ def _get_and_set_aggregate_list(self): self._last_aggregate_fetch_time = datetime.now() return self._aggregate_list - ### - def _send_api_service_checks(self, instance_scope, tags): + def _send_api_service_checks(self, scope, tags): # Nova - headers = {"X-Auth-Token": instance_scope.auth_token} + headers = {"X-Auth-Token": scope.auth_token} try: - requests.get(instance_scope.service_catalog.nova_endpoint, headers=headers, - verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=self.proxy_config) + requests.get(scope.service_catalog.nova_endpoint, headers=headers, + verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, + proxies=self.proxy_config) self.service_check(self.COMPUTE_API_SC, AgentCheck.OK, tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")] + tags) except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError): @@ -758,8 +1059,9 @@ def _send_api_service_checks(self, instance_scope, tags): # Neutron try: - requests.get(instance_scope.service_catalog.neutron_endpoint, headers=headers, - verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=self.proxy_config) + requests.get(scope.service_catalog.neutron_endpoint, headers=headers, + verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, + proxies=self.proxy_config) self.service_check(self.NETWORK_API_SC, AgentCheck.OK, tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")] + tags) except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError): @@ -785,8 +1087,12 @@ def ensure_auth_scope(self, instance): # We're missing a project scope for this instance # Let's populate it now try: - instance_scope = OpenStackProjectScope.from_config(self.init_config, instance, self.proxy_config) - self.service_check(self.IDENTITY_API_SC, AgentCheck.OK, tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")] + custom_tags) + if 'auth_scope' in instance: + instance_scope = OpenStackProjectScope.from_config(self.init_config, instance, self.proxy_config) + else: + instance_scope = OpenStackUnscoped.from_config(self.init_config, instance, self.proxy_config) + + self.service_check(self.IDENTITY_API_SC, AgentCheck.OK, tags=["server:%s" % self.init_config.get("keystone_server_url")] + custom_tags) except KeystoneUnreachable as e: self.warning("The agent could not contact the specified identity server at %s . Are you sure it is up at that address?" % self.init_config.get("keystone_server_url")) self.log.debug("Problem grabbing auth token: %s", e) @@ -796,8 +1102,9 @@ def ensure_auth_scope(self, instance): self.service_check(self.NETWORK_API_SC, AgentCheck.UNKNOWN, tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")] + custom_tags) self.service_check(self.COMPUTE_API_SC, AgentCheck.UNKNOWN, tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")] + custom_tags) - except MissingNovaEndpoint: + except MissingNovaEndpoint as e: self.warning("The agent could not find a compatible Nova endpoint in your service catalog!") + self.log.debug("Failed to get nova endpoint for response catalog: %s", e) self.service_check(self.COMPUTE_API_SC, AgentCheck.CRITICAL, tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")] + custom_tags) except MissingNeutronEndpoint: @@ -809,60 +1116,86 @@ def ensure_auth_scope(self, instance): return instance_scope def check(self, instance): + # have we been backed off + if not self.should_run(instance): + self.log.info('Skipping run due to exponential backoff in effect') + return + custom_tags = instance.get("tags", []) if custom_tags is None: custom_tags = [] try: instance_scope = self.ensure_auth_scope(instance) - + split_hostname_on_first_period = instance.get('split_hostname_on_first_period', False) if not instance_scope: # Fast fail in the absence of an instance_scope return - self._send_api_service_checks(instance_scope, custom_tags) - # Store the scope on the object so we don't have to keep passing it around - self._current_scope = instance_scope + scope_map = {} + if isinstance(instance_scope, OpenStackProjectScope): + # Key could be anything but same format for consistency + scope_key = (instance_scope.project_name, instance_scope.tenant_id) + scope_map[scope_key] = instance_scope + self._parent_scope = None + elif isinstance(instance_scope, OpenStackUnscoped): + scope_map.update(instance_scope.project_scope_map) + self._parent_scope = instance_scope + + # The scopes we iterate over should all be OpenStackProjectScope + # instances + projects = [] + for _, scope in scope_map.iteritems(): + # Store the scope on the object so we don't have to keep passing it around + self._current_scope = scope - collect_all_projects = instance.get("collect_all_projects", False) + self._send_api_service_checks(scope, custom_tags) - self.log.debug("Running check with credentials: \n") - self.log.debug("Nova Url: %s", self.get_nova_endpoint()) - self.log.debug("Neutron Url: %s", self.get_neutron_endpoint()) + collect_all_projects = instance.get("collect_all_projects", False) + collect_all_tenants = instance.get('collect_all_tenants', False) - # Restrict monitoring to this (host, hypervisor, project) - # and it's guest servers + self.log.debug("Running check with credentials: \n") + self.log.debug("Nova Url: %s", self.get_nova_endpoint()) + self.log.debug("Neutron Url: %s", self.get_neutron_endpoint()) - hyp = self.get_local_hypervisor() + # Restrict monitoring to this (host, hypervisor, project) + # and it's guest servers - project = self.get_scoped_project(instance) - projects = [] + hyp = self.get_local_hypervisor(split_hostname_on_first_period) - if collect_all_projects: - projects = self.get_all_projects(instance) - else: - projects.append(project) + project = self.get_scoped_project(scope) - # Restrict monitoring to non-excluded servers - server_ids = self.get_servers_managed_by_hypervisor() + if collect_all_projects or project is None: + scope_projects = self.get_all_projects(scope) + if scope_projects: + projects.extend(scope_projects) + else: + projects.append(project) - host_tags = self._get_tags_for_host() + # Restrict monitoring to non-excluded servers + i_key = self._instance_key(instance) + servers = self.get_servers_managed_by_hypervisor(i_key, split_hostname_on_first_period, collect_all_tenants) - for sid in server_ids: - server_tags = ["nova_managed_server"] - if instance_scope.tenant_id: - server_tags.append("tenant_id:%s" % instance_scope.tenant_id) - if project and 'name' in project: - server_tags.append('project_name:{0}'.format(project['name'])) + host_tags = self._get_tags_for_host(split_hostname_on_first_period) - self.external_host_tags[sid] = host_tags - self.get_stats_for_single_server(sid, tags=server_tags + custom_tags) + # Deep copy the cache so we can remove things from the Original during the iteration + server_cache_copy = copy.deepcopy(self.server_details_by_id) - if hyp: - self.get_stats_for_single_hypervisor(hyp, host_tags=host_tags, custom_tags=custom_tags) - else: - self.warning("Couldn't get hypervisor to monitor for host: %s" % self.get_my_hostname()) + for server in server_cache_copy: + server_tags = copy.deepcopy(custom_tags) + server_tags.append("nova_managed_server") + + if scope.tenant_id: + server_tags.append("tenant_id:%s" % scope.tenant_id) + + self.external_host_tags[server] = host_tags + self.get_stats_for_single_server(servers[server], tags=server_tags) - if projects and project: + if hyp: + self.get_stats_for_single_hypervisor(hyp, instance, host_tags=host_tags, custom_tags=custom_tags) + else: + self.warning("Couldn't get hypervisor to monitor for host: %s" % self.get_my_hostname(split_hostname_on_first_period)) + + if projects: # Ensure projects list and scoped project exists self.get_stats_for_all_projects(projects, custom_tags) @@ -872,6 +1205,10 @@ def check(self, instance): if set_external_tags is not None: set_external_tags(self.get_external_host_tags()) + if projects: + # Ensure projects list and scoped project exists + self.get_stats_for_all_projects(projects) + except IncompleteConfig as e: if isinstance(e, IncompleteAuthScope): self.warning("""Please specify the auth scope via the `auth_scope` variable in your init_config.\n @@ -886,38 +1223,55 @@ def check(self, instance): "The user should look like: {'password': 'my_password', 'name': 'my_name', 'domain': {'id': 'my_domain_id'}}") else: self.warning("Configuration Incomplete! Check your openstack.yaml file") + except requests.exceptions.HTTPError as e: + if e.response.status_code >= 500: + # exponential backoff + self.do_backoff(instance) + self.warning("There were some problems reaching the nova API - applying exponential backoff") + else: + self.warning("Error reaching nova API") - #### Local Info accessors - def get_local_hypervisor(self): + return + except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: + # exponential backoff + self.do_backoff(instance) + self.warning("There were some problems reaching the nova API - applying exponential backoff") + return + + self.reset_backoff(instance) + + # Local Info accessors + def get_local_hypervisor(self, split_hostname_on_first_period): """ Returns the hypervisor running on this host, and assumes a 1-1 between host and hypervisor """ # Look up hypervisors available filtered by my hostname - host = self.get_my_hostname() + host = self.get_my_hostname(split_hostname_on_first_period) hyp = self.get_all_hypervisor_ids(filter_by_host=host) if hyp: return hyp[0] - def get_all_projects(self, instance): + def get_all_projects(self, scope): """ Returns all projects in the domain """ url = "{0}/{1}/{2}".format(self.keystone_server_url, DEFAULT_KEYSTONE_API_VERSION, "projects") - headers = {'X-Auth-Token': self.get_auth_token(instance)} + headers = {'X-Auth-Token': scope.auth_token} try: r = self._make_request_with_auth_fallback(url, headers) return r['projects'] except Exception as e: self.warning('Unable to get projects: {0}'.format(str(e))) + raise e return None - def get_scoped_project(self, instance): + def get_scoped_project(self, project_auth_scope): """ Returns the project that this instance of the check is scoped to """ - project_auth_scope = self.get_scope_for_instance(instance) + filter_params = {} url = "{0}/{1}/{2}".format(self.keystone_server_url, DEFAULT_KEYSTONE_API_VERSION, "projects") if project_auth_scope.tenant_id: @@ -934,7 +1288,7 @@ def get_scoped_project(self, instance): "domain_id": project_auth_scope.domain_id } - headers = {'X-Auth-Token': self.get_auth_token(instance)} + headers = {'X-Auth-Token': project_auth_scope.auth_token} try: project_details = self._make_request_with_auth_fallback(url, headers, params=filter_params) @@ -950,30 +1304,34 @@ def get_scoped_project(self, instance): except Exception as e: self.warning('Unable to get the project details: {0}'.format(str(e))) + raise e return None - def get_my_hostname(self): + def get_my_hostname(self, split_hostname_on_first_period): """ Returns a best guess for the hostname registered with OpenStack for this host """ - return self.init_config.get("os_host") or self.hostname - - def get_servers_managed_by_hypervisor(self): - server_ids = self.get_all_server_ids(filter_by_host=self.get_my_hostname()) + + hostname = self.init_config.get("os_host") or self.hostname + if split_hostname_on_first_period: + hostname = hostname.split('.')[0] + + return hostname + + def get_servers_managed_by_hypervisor(self, i_key, split_hostname_on_first_period, collect_all_tenants): + servers = self.get_all_servers(i_key, collect_all_tenants, filter_by_host=self.get_my_hostname(split_hostname_on_first_period)) if self.exclude_server_id_rules: # Filter out excluded servers - server_ids = [ - server_id for server_id in server_ids - if not any([re.match(exclude_id_rule, server_id) - for exclude_id_rule in self.exclude_server_id_rules]) - ] - - return server_ids - - def _get_tags_for_host(self): - hostname = self.get_my_hostname() - + for exclude_id_rule in self.exclude_server_id_rules: + for server_id in servers.keys(): + if re.match(exclude_id_rule, server_id): + del self.server_details_by_id[server_id] + + return self.server_details_by_id + + def _get_tags_for_host(self, split_hostname_on_first_period): + hostname = self.get_my_hostname(split_hostname_on_first_period) tags = [] if hostname in self._get_and_set_aggregate_list(): tags.append('aggregate:{0}'.format(self._aggregate_list[hostname]['aggregate'])) @@ -985,7 +1343,7 @@ def _get_tags_for_host(self): return tags - ### For attaching tags to hosts that are not the host running the agent + # For attaching tags to hosts that are not the host running the agent def get_external_host_tags(self): """ Returns a list of tags for every guest server that is detected by the OpenStack @@ -994,7 +1352,7 @@ def get_external_host_tags(self): """ self.log.debug("Collecting external_host_tags now") external_host_tags = [] - for k,v in self.external_host_tags.iteritems(): + for k, v in self.external_host_tags.iteritems(): external_host_tags.append((k, {SOURCE_TYPE: v})) self.log.debug("Sending external_host_tags: %s", external_host_tags) diff --git a/openstack/manifest.json b/openstack/manifest.json index ac70633527bba..671f6d2589153 100644 --- a/openstack/manifest.json +++ b/openstack/manifest.json @@ -1,8 +1,6 @@ { "maintainer": "help@datadoghq.com", - "manifest_version": "0.1.1", - "max_agent_version": "6.0.0", - "min_agent_version": "5.6.3", + "manifest_version": "1.0.0", "name": "openstack", "short_description": "Track hypervisor and VM-level resource usage, plus Neutron metrics.", "support": "core", @@ -11,7 +9,7 @@ "mac_os", "windows" ], - "version": "1.2.0", + "version": "1.3.0", "guid": "944452d0-208e-4d1c-8adb-495f517ce2c2", "public_title": "Datadog-OpenStack Integration", "categories":["cloud"], diff --git a/openstack/test/test_openstack.py b/openstack/test/test_openstack.py index 8e1bd1d154aad..6213fd8528f8d 100644 --- a/openstack/test/test_openstack.py +++ b/openstack/test/test_openstack.py @@ -8,11 +8,14 @@ from unittest import TestCase from mock import patch import re +import copy # 3p # project from tests.checks.common import AgentCheckTest, load_class +from datadog_checks.openstack import OpenStackCheck + from checks import AgentCheck @@ -20,6 +23,7 @@ OS_CHECK_MODULE = 'openstack.openstack' OpenStackProjectScope = load_class(OS_CHECK_MODULE, "OpenStackProjectScope") +OpenStackUnscoped = load_class(OS_CHECK_MODULE, "OpenStackUnscoped") KeystoneCatalog = load_class(OS_CHECK_MODULE, "KeystoneCatalog") IncompleteConfig = load_class(OS_CHECK_MODULE, "IncompleteConfig") IncompleteAuthScope = load_class(OS_CHECK_MODULE, "IncompleteAuthScope") @@ -165,32 +169,55 @@ def json(self): } MOCK_HTTP_RESPONSE = MockHTTPResponse(response_dict=EXAMPLE_AUTH_RESPONSE, headers={"X-Subject-Token": "fake_token"}) +EXAMPLE_PROJECTS_RESPONSE = { + "projects": [ + { + "domain_id": "1789d1", + "enabled": True, + "id": "263fd9", + "links": { + "self": "https://example.com/identity/v3/projects/263fd9" + }, + "name": "Test Group" + }, + ], + "links": { + "self": "https://example.com/identity/v3/auth/projects", + "previous": None, + "next": None, + } +} +MOCK_HTTP_PROJECTS_RESPONSE = MockHTTPResponse(response_dict=EXAMPLE_PROJECTS_RESPONSE, headers={}) + @attr(requires='openstack') class OSProjectScopeTest(TestCase): BAD_AUTH_SCOPES = [ - {"auth_scope": {}}, - {"auth_scope": {"project": {}}}, - {"auth_scope": {"project": {"id": ""}}}, - {"auth_scope": {"project": {"name": "test"}}}, - {"auth_scope": {"project": {"name": "test", "domain": {}}}}, - {"auth_scope": {"project": {"name": "test", "domain": {"id": ""}}}}, + {'auth_scope': {'project': {}}}, + {'auth_scope': {'project': {'id': ''}}}, + {'auth_scope': {'project': {'name': 'test'}}}, + {'auth_scope': {'project': {'name': 'test', 'domain': {}}}}, + {'auth_scope': {'project': {'name': 'test', 'domain': {'id': ''}}}}, + ] + + GOOD_UNSCOPED_AUTH_SCOPES = [ + {'auth_scope': {}}, # unscoped project ] GOOD_AUTH_SCOPES = [ - {"auth_scope": {"project": {"id": "test_project_id"}}}, - {"auth_scope": {"project": {"name": "test", "domain": {"id": "test_id"}}}}, + {'auth_scope': {'project': {'id': 'test_project_id'}}}, + {'auth_scope': {'project': {'name': 'test', 'domain': {'id': 'test_id'}}}}, ] BAD_USERS = [ - {"user": {}}, - {"user": {"name": ""}}, - {"user": {"name": "test_name", "password": ""}}, - {"user": {"name": "test_name", "password": "test_pass", "domain": {}}}, - {"user": {"name": "test_name", "password": "test_pass", "domain": {"id": ""}}}, + {'user': {}}, + {'user': {'name': ''}}, + {'user': {'name': 'test_name', 'password': ''}}, + {'user': {'name': 'test_name', 'password': 'test_pass', 'domain': {}}}, + {'user': {'name': 'test_name', 'password': 'test_pass', 'domain': {'id': ''}}}, ] GOOD_USERS = [ - {"user": {"name": "test_name", "password": "test_pass", "domain": {"id": "test_id"}}}, + {'user': {'name': 'test_name', 'password': 'test_pass', 'domain': {'id': 'test_id'}}}, ] def _test_bad_auth_scope(self, scope): @@ -200,11 +227,18 @@ def test_get_auth_scope(self): for scope in self.BAD_AUTH_SCOPES: self._test_bad_auth_scope(scope) + for scope in self.GOOD_UNSCOPED_AUTH_SCOPES: + auth_scope = OpenStackProjectScope.get_auth_scope(scope) + self.assertEqual(auth_scope, None) + auth_scope = OpenStackUnscoped.get_auth_scope(scope) + + self.assertEqual(auth_scope, None) + for scope in self.GOOD_AUTH_SCOPES: auth_scope = OpenStackProjectScope.get_auth_scope(scope) # Should pass through unchanged - self.assertEqual(auth_scope, scope.get("auth_scope")) + self.assertEqual(auth_scope, scope.get('auth_scope')) def _test_bad_user(self, user): self.assertRaises(IncompleteIdentity, OpenStackProjectScope.get_user_identity, user) @@ -216,70 +250,174 @@ def test_get_user_identity(self): for user in self.GOOD_USERS: parsed_user = OpenStackProjectScope.get_user_identity(user) - self.assertEqual(parsed_user, {"methods": ["password"], "password": user}) + self.assertEqual(parsed_user, {'methods': ['password'], 'password': user}) def test_from_config(self): - init_config = {"keystone_server_url": "http://10.0.2.15:5000", "nova_api_version": "v2"} + init_config = {'keystone_server_url': 'http://10.0.2.15:5000', 'nova_api_version': 'v2'} bad_instance_config = {} - good_instance_config = {"user": self.GOOD_USERS[0]["user"], "auth_scope": self.GOOD_AUTH_SCOPES[0]["auth_scope"]} + good_instance_config = {'user': self.GOOD_USERS[0]['user'], 'auth_scope': self.GOOD_AUTH_SCOPES[0]['auth_scope']} self.assertRaises(IncompleteConfig, OpenStackProjectScope.from_config, init_config, bad_instance_config) - with patch("datadog_checks.openstack.openstack.OpenStackProjectScope.request_auth_token", return_value=MOCK_HTTP_RESPONSE): + with patch('datadog_checks.openstack.openstack.OpenStackProjectScope.request_auth_token', return_value=MOCK_HTTP_RESPONSE): append_config = good_instance_config.copy() - append_config["append_tenant_id"] = True + append_config['append_tenant_id'] = True scope = OpenStackProjectScope.from_config(init_config, append_config) self.assertTrue(isinstance(scope, OpenStackProjectScope)) - self.assertEqual(scope.auth_token, "fake_token") - self.assertEqual(scope.tenant_id, "test_project_id") + self.assertEqual(scope.auth_token, 'fake_token') + self.assertEqual(scope.tenant_id, 'test_project_id') # Test that append flag worked - self.assertEqual(scope.service_catalog.nova_endpoint, "http://10.0.2.15:8773/test_project_id") + self.assertEqual(scope.service_catalog.nova_endpoint, 'http://10.0.2.15:8773/test_project_id') + + def test_unscoped_from_config(self): + init_config = {'keystone_server_url': 'http://10.0.2.15:5000', 'nova_api_version': 'v2'} + + good_instance_config = {'user': self.GOOD_USERS[0]['user'], 'auth_scope': self.GOOD_UNSCOPED_AUTH_SCOPES[0]['auth_scope']} + + mock_http_response = copy.deepcopy(EXAMPLE_AUTH_RESPONSE) + mock_http_response['token'].pop('catalog') + mock_http_response['token'].pop('project') + mock_response = MockHTTPResponse(response_dict=mock_http_response, headers={'X-Subject-Token': 'fake_token'}) + with patch('datadog_checks.openstack.openstack.OpenStackUnscoped.request_auth_token', return_value=mock_response): + with patch('datadog_checks.openstack.openstack.OpenStackUnscoped.request_project_list', return_value=MOCK_HTTP_PROJECTS_RESPONSE): + with patch('datadog_checks.openstack.openstack.OpenStackUnscoped.get_token_for_project', return_value=MOCK_HTTP_RESPONSE): + append_config = good_instance_config.copy() + append_config['append_tenant_id'] = True + scope = OpenStackUnscoped.from_config(init_config, append_config) + self.assertTrue(isinstance(scope, OpenStackUnscoped)) + + self.assertEqual(scope.auth_token, 'fake_token') + self.assertEqual(len(scope.project_scope_map), 1) + for _, scope in scope.project_scope_map.iteritems(): + self.assertTrue(isinstance(scope, OpenStackProjectScope)) + self.assertEqual(scope.auth_token, 'fake_token') + self.assertEqual(scope.tenant_id, '263fd9') @attr(requires='openstack') class KeyStoneCatalogTest(TestCase): def test_get_nova_endpoint(self): - self.assertEqual(KeystoneCatalog.get_nova_endpoint(EXAMPLE_AUTH_RESPONSE), u"http://10.0.2.15:8774/v2.1/0850707581fe4d738221a72db0182876") - self.assertEqual(KeystoneCatalog.get_nova_endpoint(EXAMPLE_AUTH_RESPONSE, nova_api_version="v2"), u"http://10.0.2.15:8773/") + self.assertEqual(KeystoneCatalog.get_nova_endpoint(EXAMPLE_AUTH_RESPONSE), u'http://10.0.2.15:8774/v2.1/0850707581fe4d738221a72db0182876') + self.assertEqual(KeystoneCatalog.get_nova_endpoint(EXAMPLE_AUTH_RESPONSE, nova_api_version='v2'), u'http://10.0.2.15:8773/') def test_get_neutron_endpoint(self): - self.assertEqual(KeystoneCatalog.get_neutron_endpoint(EXAMPLE_AUTH_RESPONSE), u"http://10.0.2.15:9292") + self.assertEqual(KeystoneCatalog.get_neutron_endpoint(EXAMPLE_AUTH_RESPONSE), u'http://10.0.2.15:9292') def test_from_auth_response(self): - catalog = KeystoneCatalog.from_auth_response(EXAMPLE_AUTH_RESPONSE, "v2.1") + catalog = KeystoneCatalog.from_auth_response(EXAMPLE_AUTH_RESPONSE, 'v2.1') self.assertTrue(isinstance(catalog, KeystoneCatalog)) - self.assertEqual(catalog.neutron_endpoint, u"http://10.0.2.15:9292") - self.assertEqual(catalog.nova_endpoint, u"http://10.0.2.15:8774/v2.1/0850707581fe4d738221a72db0182876") + self.assertEqual(catalog.neutron_endpoint, u'http://10.0.2.15:9292') + self.assertEqual(catalog.nova_endpoint, u'http://10.0.2.15:8774/v2.1/0850707581fe4d738221a72db0182876') @attr(requires='openstack') class TestOpenstack(AgentCheckTest): - """Test for openstack integration.""" + '''Test for openstack integration.''' CHECK_NAME = OS_CHECK_NAME # Samples # .. server/network - ALL_IDS = ["server-1", "server-2", "other-1", "other-2"] - EXCLUDED_NETWORK_IDS = ["server-1", "other-.*"] - EXCLUDED_SERVER_IDS = ["server-2", "other-.*"] - FILTERED_NETWORK_ID = "server-2" - FILTERED_SERVER_ID = "server-1" + ALL_SERVER_DETAILS = { + "server-1":{"id":"server-1", "name":"server-name-1", "status":"ACTIVE"}, + "server-2":{"id":"server-2", "name":"server-name-2", "status":"ACTIVE"}, + "other-1":{"id":"other-1", "name":"server-name-other-1", "status":"ACTIVE"}, + "other-2":{"id":"other-2", "name":"server-name-other-2", "status":"ACTIVE"} + } + ALL_IDS = ['server-1', 'server-2', 'other-1', 'other-2'] + EXCLUDED_NETWORK_IDS = ['server-1', 'other-.*'] + EXCLUDED_SERVER_IDS = ['server-2', 'other-.*'] + FILTERED_NETWORK_ID = 'server-2' + FILTERED_SERVER_ID = 'server-1' + + + # Example response from - https://developer.openstack.org/api-ref/compute/#list-servers-detailed + # ID and server-name values have been changed for test readability + MOCK_NOVA_SERVERS = { + "servers": [ + { + "OS-DCF:diskConfig": "AUTO", + "OS-EXT-AZ:availability_zone": "nova", + "OS-EXT-SRV-ATTR:host": "compute", + "OS-EXT-SRV-ATTR:hostname": "server-1", + "OS-EXT-SRV-ATTR:hypervisor_hostname": "fake-mini", + "OS-EXT-SRV-ATTR:instance_name": "instance-00000001", + "OS-EXT-SRV-ATTR:kernel_id": "", + "OS-EXT-SRV-ATTR:launch_index": 0, + "OS-EXT-SRV-ATTR:ramdisk_id": "", + "OS-EXT-SRV-ATTR:reservation_id": "r-iffothgx", + "OS-EXT-SRV-ATTR:root_device_name": "/dev/sda", + "OS-EXT-SRV-ATTR:user_data": "IyEvYmluL2Jhc2gKL2Jpbi9zdQplY2hvICJJIGFtIGluIHlvdSEiCg==", + "OS-EXT-STS:power_state": 1, + "OS-EXT-STS:task_state": 'null', + "OS-EXT-STS:vm_state": "active", + "OS-SRV-USG:launched_at": "2017-02-14T19:24:43.891568", + "OS-SRV-USG:terminated_at": 'null', + "accessIPv4": "1.2.3.4", + "accessIPv6": "80fe::", + "hostId": "2091634baaccdc4c5a1d57069c833e402921df696b7f970791b12ec6", + "host_status": "UP", + "id": "server-1", + "metadata": { + "My Server Name": "Apache1" + }, + "name": "new-server-test", + "status": "DELETED", + "tags": [], + "tenant_id": "6f70656e737461636b20342065766572", + "updated": "2017-02-14T19:24:43Z", + "user_id": "fake" + }, + { + "OS-DCF:diskConfig": "AUTO", + "OS-EXT-AZ:availability_zone": "nova", + "OS-EXT-SRV-ATTR:host": "compute", + "OS-EXT-SRV-ATTR:hostname": "server-2", + "OS-EXT-SRV-ATTR:hypervisor_hostname": "fake-mini", + "OS-EXT-SRV-ATTR:instance_name": "instance-00000001", + "OS-EXT-SRV-ATTR:kernel_id": "", + "OS-EXT-SRV-ATTR:launch_index": 0, + "OS-EXT-SRV-ATTR:ramdisk_id": "", + "OS-EXT-SRV-ATTR:reservation_id": "r-iffothgx", + "OS-EXT-SRV-ATTR:root_device_name": "/dev/sda", + "OS-EXT-SRV-ATTR:user_data": "IyEvYmluL2Jhc2gKL2Jpbi9zdQplY2hvICJJIGFtIGluIHlvdSEiCg==", + "OS-EXT-STS:power_state": 1, + "OS-EXT-STS:task_state": 'null', + "OS-EXT-STS:vm_state": "active", + "OS-SRV-USG:launched_at": "2017-02-14T19:24:43.891568", + "OS-SRV-USG:terminated_at": 'null', + "accessIPv4": "1.2.3.4", + "accessIPv6": "80fe::", + "hostId": "2091634baaccdc4c5a1d57069c833e402921df696b7f970791b12ec6", + "host_status": "UP", + "id": "server_newly_added", + "metadata": { + "My Server Name": "Apache1" + }, + "name": "newly_added_server", + "status": "ACTIVE", + "tags": [], + "tenant_id": "6f70656e737461636b20342065766572", + "updated": "2017-02-14T19:24:43Z", + "user_id": "fake" + } + ] + } # .. config MOCK_CONFIG = { - "init_config": { - "keystone_server_url": "http://10.0.2.15:5000", - "ssl_verify": False, - "exclude_network_ids": EXCLUDED_NETWORK_IDS, + 'init_config': { + 'keystone_server_url': 'http://10.0.2.15:5000', + 'ssl_verify': False, + 'exclude_network_ids': EXCLUDED_NETWORK_IDS, }, - "instances": [ + 'instances': [ { - "name": "test_name", "user": {"name": "test_name", "password": "test_pass", "domain": {"id": "test_id"}}, - "auth_scope": {"project": {"id": "test_project_id"}}, + 'name': 'test_name', 'user': {'name': 'test_name', 'password': 'test_pass', 'domain': {'id': 'test_id'}}, + 'auth_scope': {'project': {'id': 'test_project_id'}}, } ] } @@ -293,7 +431,7 @@ def test_ensure_auth_scope(self): self.assertRaises(KeyError, self.check.get_scope_for_instance, instance) - with patch("datadog_checks.openstack.openstack.OpenStackProjectScope.request_auth_token", return_value=MOCK_HTTP_RESPONSE): + with patch('datadog_checks.openstack.openstack.OpenStackProjectScope.request_auth_token', return_value=MOCK_HTTP_RESPONSE): scope = self.check.ensure_auth_scope(instance) self.assertEqual(self.check.get_scope_for_instance(instance), scope) @@ -303,15 +441,18 @@ def test_ensure_auth_scope(self): # Sort the tags list for sc in self.service_checks: sc["tags"].sort() - tags = ['keystone_server:http://10.0.2.15:5000', 'optional:tag1'] - tags.sort() - - # Expect OK, since we've mocked an API response - self.assertServiceCheck(self.check.IDENTITY_API_SC, status=AgentCheck.OK, count=1, tags=tags) - - # Expect CRITICAL since URLs are non-existent - self.assertServiceCheck(self.check.COMPUTE_API_SC, status=AgentCheck.CRITICAL, count=1, tags=tags) - self.assertServiceCheck(self.check.NETWORK_API_SC, status=AgentCheck.CRITICAL, count=1, tags=tags) + tags = ['keystone_server:http://10.0.2.15:5000', 'optional:tag1'] + tags.sort() + + # Can only use assertServiceCheck if we ran the whole check with run_check + # We mock this API response, so return OK + if sc.get('check') == self.check.IDENTITY_API_SC: + self.assertEqual(sc.get('status'), AgentCheck.OK) + # URLs are nonexistant, so return CRITICAL + elif sc.get('check') == self.check.COMPUTE_API_SC: + self.assertEqual(sc.get('status'), AgentCheck.CRITICAL) + elif sc.get('check') == self.check.NETWORK_API_SC: + self.assertEqual(sc.get('status'), AgentCheck.CRITICAL) self.check._current_scope = scope @@ -323,39 +464,45 @@ def test_parse_uptime_string(self): self.assertEqual(uptime_parsed.get('loads'), [0.04, 0.14, 0.19]) def test_cache_utils(self): - self.check.CACHE_TTL["aggregates"] = 1 - expected_aggregates = {"hyp_1": ["aggregate:staging", "availability_zone:test"]} + self.check.CACHE_TTL['aggregates'] = 1 + expected_aggregates = {'hyp_1': ['aggregate:staging', 'availability_zone:test']} - with patch("datadog_checks.openstack.OpenStackCheck.get_all_aggregate_hypervisors", return_value=expected_aggregates): + with patch('datadog_checks.openstack.OpenStackCheck.get_all_aggregate_hypervisors', return_value=expected_aggregates): self.assertEqual(self.check._get_and_set_aggregate_list(), expected_aggregates) sleep(1.5) - self.assertTrue(self.check._is_expired("aggregates")) + self.assertTrue(self.check._is_expired('aggregates')) - @patch("datadog_checks.openstack.OpenStackCheck.get_all_server_ids", return_value=ALL_IDS) + @patch('datadog_checks.openstack.OpenStackCheck.get_all_servers', return_value=ALL_SERVER_DETAILS) def test_server_exclusion(self, *args): """ - Exclude networks using regular expressions. + Exclude servers using regular expressions. """ - - self.check.exclude_server_id_rules = set([re.compile(rule) for rule in self.EXCLUDED_SERVER_IDS]) + openstackCheck = OpenStackCheck("test", { + 'keystone_server_url': 'http://10.0.2.15:5000', + 'ssl_verify': False, + 'exclude_server_ids': self.EXCLUDED_SERVER_IDS + }, {}, instances=self.MOCK_CONFIG) # Retrieve servers - server_ids = self.check.get_servers_managed_by_hypervisor() - + openstackCheck.server_details_by_id = copy.deepcopy(self.ALL_SERVER_DETAILS) + i_key = "test_instance" + server_ids = openstackCheck.get_servers_managed_by_hypervisor(i_key, False, False) + # Assert # .. 1 out of 4 server ids filtered self.assertEqual(len(server_ids), 1) - self.assertEqual(server_ids[0], self.FILTERED_SERVER_ID) - # cleanup - self.check.exclude_server_id_rules = set([]) + # Ensure the server IDs filtered are the ones expected + for server_id in server_ids: + assert server_id in self.FILTERED_SERVER_ID + - @patch("datadog_checks.openstack.OpenStackCheck.get_all_network_ids", return_value=ALL_IDS) + @patch('datadog_checks.openstack.OpenStackCheck.get_all_network_ids', return_value=ALL_IDS) def test_network_exclusion(self, *args): """ Exclude networks using regular expressions. """ - with patch("datadog_checks.openstack.OpenStackCheck.get_stats_for_single_network") \ + with patch('datadog_checks.openstack.OpenStackCheck.get_stats_for_single_network') \ as mock_get_stats_single_network: self.check.exclude_network_id_rules = set([re.compile(rule) for rule in self.EXCLUDED_NETWORK_IDS]) @@ -372,3 +519,28 @@ def test_network_exclusion(self, *args): # cleanup self.check.exclude_network_id_rules = set([]) + + @patch('datadog_checks.openstack.OpenStackCheck._make_request_with_auth_fallback', return_value=MOCK_NOVA_SERVERS) + @patch('datadog_checks.openstack.OpenStackCheck.get_nova_endpoint', return_value="http://10.0.2.15:8774/v2.1/0850707581fe4d738221a72db0182876") + @patch('datadog_checks.openstack.OpenStackCheck.get_auth_token', return_value="test_auth_token") + @patch('datadog_checks.openstack.OpenStackCheck.get_project_name_from_id', return_value="tenant-1") + def test_cache_between_runs(self, *args): + """ + Ensure the cache contains the expected VMs between check runs. + """ + + openstackCheck = OpenStackCheck("test", { + 'keystone_server_url': 'http://10.0.2.15:5000', + 'ssl_verify': False, + 'exclude_server_ids': self.EXCLUDED_SERVER_IDS + }, {}, instances=self.MOCK_CONFIG) + + # Start off with a list of servers + openstackCheck.server_details_by_id = copy.deepcopy(self.ALL_SERVER_DETAILS) + i_key = "test_instance" + + # Update the cached list of servers based on what the endpoint returns + cached_servers = openstackCheck.get_all_servers(i_key, False) + + assert 'server-1' not in cached_servers + assert 'server_newly_added' in cached_servers