From c604aaa319dac9f01a971fe88b048d48300c3106 Mon Sep 17 00:00:00 2001 From: Jaime Fullaondo Date: Wed, 14 Mar 2018 16:26:26 +0100 Subject: [PATCH 01/19] [openstack] unscoped access to the resources No need to specify project per instace if you want them all. [openstack] collect service catalog for unscoped instances --- openstack/conf.yaml.example | 4 + .../datadog_checks/openstack/openstack.py | 174 ++++++++++++++++-- 2 files changed, 167 insertions(+), 11 deletions(-) diff --git a/openstack/conf.yaml.example b/openstack/conf.yaml.example index 5c6a89732fd4f..f2ce33a295f59 100644 --- a/openstack/conf.yaml.example +++ b/openstack/conf.yaml.example @@ -44,6 +44,10 @@ instances: # The authorization scope that will be used to request a token from Identity API v3 # The auth scope must resolve to 1 of the following structures: # {'project': {'name': 'my_project', 'domain': 'my_domain} OR {'project': {'id': 'my_project_id'}} + # + # ALPHA: DO NOT ADD an auth_scope if you wish to have unscoped access + # Simply add the relevant user credentials. + # auth_scope: project: id: my_project_id diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index 142d08a51c379..ee65fd8b120d9 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -119,6 +119,93 @@ class MissingNeutronEndpoint(MissingEndpoint): class KeystoneUnreachable(Exception): pass +class OpenStackUnscoped(object): + def __init__(self, auth_token, service_catalog): + self.auth_token = auth_token + self.service_catalog = service_catalog + + @classmethod + def from_config(cls, init_config, instance_config, proxy_config=None): + keystone_server_url = init_config.get("keystone_server_url") + if not keystone_server_url: + raise IncompleteConfig() + + ssl_verify = init_config.get("ssl_verify", False) + nova_api_version = init_config.get("nova_api_version", DEFAULT_NOVA_API_VERSION) + + identity = cls.get_user_identity(instance_config) + + exception_msg = None + try: + auth_resp = cls.request_auth_token(identity, keystone_server_url, ssl_verify, proxy_config) + except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError): + exception_msg = "Failed keystone auth with user:{user} domain:{domain} @{url}".format( + user=identity['password']['user']['name'], + domain=identity['password']['user']['domain']['id'], + url=keystone_server_url) + + if exception_msg: + try: + identity['password']['user']['domain']['name'] = identity['password']['user']['domain'].pop('id') + auth_resp = cls.request_auth_token(identity, keystone_server_url, ssl_verify, proxy_config) + except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: + exception_msg = "{msg} and also failed keystone auth with identity:{user} domain:{domain} @{url}: {ex}".format( + msg=exception_msg, + user=identity['password']['user']['name'], + domain = identity['password']['user']['domain']['name'], + url=keystone_server_url, + ex=e) + raise KeystoneUnreachable(exception_msg) + + auth_token = auth_resp.headers.get('X-Subject-Token') + + try: + service_catalog = KeystoneCatalog.from_auth_response( + auth_resp.json(), nova_api_version, keystone_server_url, auth_token, proxy_config + ) + except MissingNovaEndpoint: + service_catalog = KeystoneCatalog.from_auth_response( + auth_resp.json(), FALLBACK_NOVA_API_VERSION, keystone_server_url, auth_token, proxy_config + ) + + return cls(auth_token, service_catalog) + + @classmethod + def get_user_identity(cls, instance_config): + """ + Parse user identity out of init_config + + To guarantee a uniquely identifiable user, expects + {"user": {"name": "my_username", "password": "my_password", + "domain": {"id": "my_domain_id"} + } + } + """ + user = instance_config.get('user') + if not user\ + or not user.get('name')\ + or not user.get('password')\ + or not user.get("domain")\ + or not user.get("domain").get("id"): + + raise IncompleteIdentity() + + identity = { + "methods": ['password'], + "password": {"user": user} + } + return identity + + @classmethod + def request_auth_token(cls, identity, keystone_server_url, ssl_verify, proxy=None): + payload = {'auth': {'identity': identity, 'scope': 'unscoped'}} + auth_url = urljoin(keystone_server_url, "{0}/auth/tokens".format(DEFAULT_KEYSTONE_API_VERSION)) + headers = {'Content-Type': 'application/json'} + + resp = requests.post(auth_url, headers=headers, data=json.dumps(payload), verify=ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) + resp.raise_for_status() + + return resp class OpenStackProjectScope(object): """ @@ -167,7 +254,7 @@ def from_config(cls, init_config, instance_config, proxy_config=None): auth_scope['project']['name'] = auth_scope['project'].pop('id') auth_resp = cls.request_auth_token(auth_scope, identity, keystone_server_url, ssl_verify, proxy_config) except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: - exception_msg = "{msg} and also failed keystone auth with identity:{id} domain:{domain} scope:{scope} @{url}: {ex}".format( + exception_msg = "{msg} and also failed keystone auth with identity:{user} domain:{domain} scope:{scope} @{url}: {ex}".format( msg=exception_msg, user=identity['password']['user']['name'], domain = identity['password']['user']['domain']['name'], @@ -276,12 +363,39 @@ def __init__(self, nova_endpoint, neutron_endpoint): self.neutron_endpoint = neutron_endpoint @classmethod - def from_auth_response(cls, json_response, nova_api_version): + def from_auth_response(cls, json_response, nova_api_version, + keystone_server_url=None, auth_token=None, proxy=None): + try: + return cls( + nova_endpoint=cls.get_nova_endpoint(json_response, nova_api_version), + neutron_endpoint=cls.get_neutron_endpoint(json_response) + ) + except (MissingNeutronEndpoint, MissingNovaEndpoint) as e: + if keystone_server_url and auth_token: + return cls.from_unscoped_token(keystone_server_url, auth_token, + nova_api_version, proxy) + else: + raise e + + @classmethod + def from_unscoped_token(cls, keystone_server_url, auth_token, + nova_api_version, ssl_verify=True, proxy=None): + catalog_url = urljoin(keystone_server_url, "{0}/auth/catalog".format( + DEFAULT_KEYSTONE_API_VERSION)) + headers = {'X-Auth-Token': auth_token} + + resp = requests.post(catalog_url, headers=headers, verify=ssl_verify, + timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) + resp.raise_for_status() + json_resp = resp.json() + json_resp = {'token': json_resp} + return cls( - nova_endpoint=cls.get_nova_endpoint(json_response, nova_api_version), - neutron_endpoint=cls.get_neutron_endpoint(json_response) + nova_endpoint=cls.get_nova_endpoint(json_resp, nova_api_version), + neutron_endpoint=cls.get_neutron_endpoint(json_resp) ) + @classmethod def get_neutron_endpoint(cls, json_resp): """ @@ -663,6 +777,18 @@ def get_all_server_ids(self, filter_by_host=None): return server_ids + def get_project_name_from_id(self, tenant_id): + url = "{0}/{1}/{2}/{3}".format(self.keystone_server_url, DEFAULT_KEYSTONE_API_VERSION, "projects", tenant_id) + self.log.debug("Project URL is %s", url) + headers = {'X-Auth-Token': self.get_auth_token()} + self.log.debug("Headers %s", headers) + try: + r = self._make_request_with_auth_fallback(url, headers) + return r['project']['name'] + + except Exception as e: + self.warning('Unable to get project name: {0}'.format(str(e))) + def get_stats_for_single_server(self, server_id, tags=None): def _is_valid_metric(label): return label in NOVA_SERVER_METRICS or any(seg in label for seg in NOVA_SERVER_INTERFACE_SEGMENTS) @@ -670,9 +796,17 @@ def _is_valid_metric(label): url = '{0}/servers/{1}'.format(self.get_nova_endpoint(), server_id) headers = {'X-Auth-Token': self.get_auth_token()} state = None + server_name = None + hypervisor_hostname = None + tenant_id = None + project_name = None try: server_details = self._make_request_with_auth_fallback(url, headers) state = server_details['server'].get('status') + server_name = server_details['server'].get('name') + hypervisor_hostname = server_details['server'].get('OS-EXT-SRV-ATTR:hypervisor_hostname') + tenant_id = server_details['server'].get('tenant_id') + project_name = self.get_project_name_from_id(tenant_id) except Exception as e: self.warning("Unable to collect details for server %s : %s" % (server_id, e)) @@ -688,6 +822,12 @@ def _is_valid_metric(label): if server_stats: tags = tags or [] + if project_name: + tags.append("project:{}".format(project_name)) + if hypervisor_hostname: + tags.append("hypervisor:{0}".format(hypervisor_hostname)) + if server_name: + tags.append("server_name:{0}".format(server_name)) for st in server_stats: if _is_valid_metric(st): self.gauge("openstack.nova.server.{0}".format(st.replace("-", "_")), server_stats[st], tags=tags, hostname=server_id) @@ -774,7 +914,11 @@ def ensure_auth_scope(self, instance): # We're missing a project scope for this instance # Let's populate it now try: - instance_scope = OpenStackProjectScope.from_config(self.init_config, instance, self.proxy_config) + if 'auth_scope' in instance: + instance_scope = OpenStackProjectScope.from_config(self.init_config, instance, self.proxy_config) + else: + instance_scope = OpenStackUnscoped.from_config(self.init_config, instance, self.proxy_config) + self.service_check(self.IDENTITY_API_SC, AgentCheck.OK, tags=["server:%s" % self.init_config.get("keystone_server_url")]) except KeystoneUnreachable as e: self.warning("The agent could not contact the specified identity server at %s . Are you sure it is up at that address?" % self.init_config.get("keystone_server_url")) @@ -785,8 +929,9 @@ def ensure_auth_scope(self, instance): self.service_check(self.NETWORK_API_SC, AgentCheck.UNKNOWN, tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) self.service_check(self.COMPUTE_API_SC, AgentCheck.UNKNOWN, tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) - except MissingNovaEndpoint: + except MissingNovaEndpoint as e: self.warning("The agent could not find a compatible Nova endpoint in your service catalog!") + self.log.debug("Failed to get nova endpoint for response catalog: %s", e) self.service_check(self.COMPUTE_API_SC, AgentCheck.CRITICAL, tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) except MissingNeutronEndpoint: @@ -824,7 +969,7 @@ def check(self, instance): project = self.get_scoped_project(instance) projects = [] - if collect_all_projects: + if collect_all_projects or project is None: projects = self.get_all_projects(instance) else: projects.append(project) @@ -836,10 +981,13 @@ def check(self, instance): for sid in server_ids: server_tags = ["nova_managed_server"] - if instance_scope.tenant_id: - server_tags.append("tenant_id:%s" % instance_scope.tenant_id) - if project and 'name' in project: - server_tags.append('project_name:{0}'.format(project['name'])) + + # TODO: work on the tagging for unscoped scopes - currently disabled + if isinstance(instance_scope, OpenStackProjectScope): + if instance_scope.tenant_id: + server_tags.append("tenant_id:%s" % instance_scope.tenant_id) + if project and 'name' in project: + server_tags.append('project_name:{0}'.format(project['name'])) self.external_host_tags[sid] = host_tags self.get_stats_for_single_server(sid, tags=server_tags) @@ -905,6 +1053,10 @@ def get_scoped_project(self, instance): Returns the project that this instance of the check is scoped to """ project_auth_scope = self.get_scope_for_instance(instance) + + if isinstance(project_auth_scope, OpenStackUnscoped): + return None + filter_params = {} url = "{0}/{1}/{2}".format(self.keystone_server_url, DEFAULT_KEYSTONE_API_VERSION, "projects") if project_auth_scope.tenant_id: From b6adba8a3acb33e1d1e273b889062849865d4afb Mon Sep 17 00:00:00 2001 From: Jaime Fullaondo Date: Mon, 19 Mar 2018 12:27:43 +0100 Subject: [PATCH 02/19] [openstack] handle multiple project scopes for an unscoped token [openstack] fix typo --- .../datadog_checks/openstack/openstack.py | 215 ++++++++++++------ 1 file changed, 143 insertions(+), 72 deletions(-) diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index ee65fd8b120d9..c6ba0810667b2 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -120,9 +120,9 @@ class KeystoneUnreachable(Exception): pass class OpenStackUnscoped(object): - def __init__(self, auth_token, service_catalog): + def __init__(self, auth_token, project_scope_map): self.auth_token = auth_token - self.service_catalog = service_catalog + self.project_scope_map = project_scope_map @classmethod def from_config(cls, init_config, instance_config, proxy_config=None): @@ -148,7 +148,7 @@ def from_config(cls, init_config, instance_config, proxy_config=None): try: identity['password']['user']['domain']['name'] = identity['password']['user']['domain'].pop('id') auth_resp = cls.request_auth_token(identity, keystone_server_url, ssl_verify, proxy_config) - except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: + except (requests.exceptions.httperror, requests.exceptions.timeout, requests.exceptions.connectionerror) as e: exception_msg = "{msg} and also failed keystone auth with identity:{user} domain:{domain} @{url}: {ex}".format( msg=exception_msg, user=identity['password']['user']['name'], @@ -160,15 +160,43 @@ def from_config(cls, init_config, instance_config, proxy_config=None): auth_token = auth_resp.headers.get('X-Subject-Token') try: - service_catalog = KeystoneCatalog.from_auth_response( - auth_resp.json(), nova_api_version, keystone_server_url, auth_token, proxy_config - ) - except MissingNovaEndpoint: - service_catalog = KeystoneCatalog.from_auth_response( - auth_resp.json(), FALLBACK_NOVA_API_VERSION, keystone_server_url, auth_token, proxy_config - ) + project_resp = cls.request_project_list(auth_token, keystone_server_url, ssl_verify, proxy_config) + projects = project_resp.json().get('projects') + except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: + # TODO: Raise something + pass + + project_scope_map = {} + for project in projects: + try: + project_key = project['name'], project['id'] + token_resp = cls.get_token_for_project(auth_token, project, keystone_server_url, + ssl_verify, proxy_config) + project_auth_token = token_resp.headers.get('X-Subject-Token') + except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: + # TODO: Raise something + pass - return cls(auth_token, service_catalog) + try: + service_catalog = KeystoneCatalog.from_auth_response( + token_resp.json(), nova_api_version + ) + except MissingNovaEndpoint: + service_catalog = KeystoneCatalog.from_auth_response( + token_resp.json(), FALLBACK_NOVA_API_VERSION + ) + + project_auth_scope = { + 'project': { + 'name': project['name'], + 'id': project['id'], + 'domain': {} if project['domain_id'] is None else {'id': project['domain_id']} + } + } + project_scope = OpenStackProjectScope(project_auth_token, project_auth_scope, service_catalog) + project_scope_map[project_key] = project_scope + + return cls(auth_token, project_scope_map) @classmethod def get_user_identity(cls, instance_config): @@ -207,6 +235,34 @@ def request_auth_token(cls, identity, keystone_server_url, ssl_verify, proxy=Non return resp + @classmethod + def get_token_for_project(cls, auth_token, project, keystone_server_url, ssl_verify, proxy=None): + identity = { + "methods": ['token'], + "token": {"id": auth_token} + } + scope = { + 'project': {'id': project['id']} + } + payload = {'auth': {'identity': identity, 'scope': scope}} + headers = {'Content-Type': 'application/json'} + auth_url = urljoin(keystone_server_url, "{0}/auth/tokens".format(DEFAULT_KEYSTONE_API_VERSION)) + + resp = requests.post(auth_url, headers=headers, data=json.dumps(payload), verify=ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) + resp.raise_for_status() + + return resp + + @classmethod + def request_project_list(cls, auth_token, keystone_server_url, ssl_verify, proxy=None): + auth_url = urljoin(keystone_server_url, "{0}/auth/projects".format(DEFAULT_KEYSTONE_API_VERSION)) + headers = {'X-Auth-Token': auth_token} + + resp = requests.get(auth_url, headers=headers, verify=ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) + resp.raise_for_status() + + return resp + class OpenStackProjectScope(object): """ Container class for a single project's authorization scope @@ -384,8 +440,8 @@ def from_unscoped_token(cls, keystone_server_url, auth_token, DEFAULT_KEYSTONE_API_VERSION)) headers = {'X-Auth-Token': auth_token} - resp = requests.post(catalog_url, headers=headers, verify=ssl_verify, - timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) + resp = requests.get(catalog_url, headers=headers, verify=ssl_verify, + timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) resp.raise_for_status() json_resp = resp.json() json_resp = {'token': json_resp} @@ -490,6 +546,10 @@ def __init__(self, name, init_config, agentConfig, instances=None): if not self.keystone_server_url: raise IncompleteConfig() + ### Current authentication scopes + self._parent_scope = None + self._current_scope = None + ### Cache some things between runs for values that change rarely self._aggregate_list = None @@ -540,7 +600,7 @@ def _instance_key(self, instance): return i_key def delete_current_scope(self): - scope_to_delete = self._current_scope + scope_to_delete = self._parent_scope if self._parent_scope else self._current_scope for i_key, scope in self.instance_map.items(): if scope is scope_to_delete: self.log.debug("Deleting current scope: %s", i_key) @@ -875,28 +935,28 @@ def _get_and_set_aggregate_list(self): return self._aggregate_list ### - def _send_api_service_checks(self, instance_scope): + def _send_api_service_checks(self, scope): # Nova - headers = {"X-Auth-Token": instance_scope.auth_token} + headers = {"X-Auth-Token": scope.auth_token} try: - requests.get(instance_scope.service_catalog.nova_endpoint, headers=headers, - verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=self.proxy_config) + requests.get(scope.service_catalog.nova_endpoint, headers=headers, + verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=self.proxy_config) self.service_check(self.COMPUTE_API_SC, AgentCheck.OK, - tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) + tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError): self.service_check(self.COMPUTE_API_SC, AgentCheck.CRITICAL, - tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) + tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) # Neutron try: - requests.get(instance_scope.service_catalog.neutron_endpoint, headers=headers, - verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=self.proxy_config) + requests.get(scope.service_catalog.neutron_endpoint, headers=headers, + verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=self.proxy_config) self.service_check(self.NETWORK_API_SC, AgentCheck.OK, - tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) + tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError): self.service_check(self.NETWORK_API_SC, AgentCheck.CRITICAL, - tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) + tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) def ensure_auth_scope(self, instance): """ @@ -931,7 +991,7 @@ def ensure_auth_scope(self, instance): except MissingNovaEndpoint as e: self.warning("The agent could not find a compatible Nova endpoint in your service catalog!") - self.log.debug("Failed to get nova endpoint for response catalog: %s", e) + self.log.debug("Failed to get nova endpoint for response catalog: %s", e) self.service_check(self.COMPUTE_API_SC, AgentCheck.CRITICAL, tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) except MissingNeutronEndpoint: @@ -951,61 +1011,71 @@ def check(self, instance): # Fast fail in the absence of an instance_scope return - self._send_api_service_checks(instance_scope) - # Store the scope on the object so we don't have to keep passing it around - self._current_scope = instance_scope + scope_map = {} + if isinstance(instance_scope, OpenStackProjectScope): + # Key could be anything but same format for consistency + scope_key = (instance_scope.project_name, instance_scope.tenant_id) + scope_map[scope_key] = instance_scope + self._parent_scope = None + elif isinstance(instance_scope, OpenStackUnscoped): + scope_map.update(instance_scope.project_scope_map) + self._parent_scope = instance_scope + + # The scopes we iterate over should all be OpenStackProjectScope + # instances + projects = [] + for _, scope in scope_map.iteritems(): + # Store the scope on the object so we don't have to keep passing it around + self._current_scope = scope - collect_all_projects = instance.get("collect_all_projects", False) + self._send_api_service_checks(scope) - self.log.debug("Running check with credentials: \n") - self.log.debug("Nova Url: %s", self.get_nova_endpoint()) - self.log.debug("Neutron Url: %s", self.get_neutron_endpoint()) + collect_all_projects = instance.get("collect_all_projects", False) - # Restrict monitoring to this (host, hypervisor, project) - # and it's guest servers + self.log.debug("Running check with credentials: \n") + self.log.debug("Nova Url: %s", self.get_nova_endpoint()) + self.log.debug("Neutron Url: %s", self.get_neutron_endpoint()) - hyp = self.get_local_hypervisor() + # Restrict monitoring to this (host, hypervisor, project) + # and it's guest servers - project = self.get_scoped_project(instance) - projects = [] + hyp = self.get_local_hypervisor() - if collect_all_projects or project is None: - projects = self.get_all_projects(instance) - else: - projects.append(project) + project = self.get_scoped_project(scope) - # Restrict monitoring to non-excluded servers - server_ids = self.get_servers_managed_by_hypervisor() + if collect_all_projects or project is None: + scope_projects = self.get_all_projects(scope) + if scope_projects: + projects.extend(scope_projects) + else: + projects.append(project) + + # Restrict monitoring to non-excluded servers + server_ids = self.get_servers_managed_by_hypervisor() - host_tags = self._get_tags_for_host() + host_tags = self._get_tags_for_host() - for sid in server_ids: - server_tags = ["nova_managed_server"] + for sid in server_ids: + server_tags = ["nova_managed_server"] - # TODO: work on the tagging for unscoped scopes - currently disabled - if isinstance(instance_scope, OpenStackProjectScope): - if instance_scope.tenant_id: - server_tags.append("tenant_id:%s" % instance_scope.tenant_id) + if scope.tenant_id: + server_tags.append("tenant_id:%s" % scope.tenant_id) if project and 'name' in project: server_tags.append('project_name:{0}'.format(project['name'])) - self.external_host_tags[sid] = host_tags - self.get_stats_for_single_server(sid, tags=server_tags) + self.external_host_tags[sid] = host_tags + self.get_stats_for_single_server(sid, tags=server_tags) - if hyp: - self.get_stats_for_single_hypervisor(hyp, host_tags=host_tags) - else: - self.warning("Couldn't get hypervisor to monitor for host: %s" % self.get_my_hostname()) - - if projects and project: - # Ensure projects list and scoped project exists - self.get_stats_for_all_projects(projects) + if hyp: + self.get_stats_for_single_hypervisor(hyp, host_tags=host_tags) + else: + self.warning("Couldn't get hypervisor to monitor for host: %s" % self.get_my_hostname()) - # For now, monitor all networks - self.get_network_stats() + # For now, monitor all networks + self.get_network_stats() - if set_external_tags is not None: - set_external_tags(self.get_external_host_tags()) + if set_external_tags is not None: + set_external_tags(self.get_external_host_tags()) except IncompleteConfig as e: if isinstance(e, IncompleteAuthScope): @@ -1022,6 +1092,11 @@ def check(self, instance): else: self.warning("Configuration Incomplete! Check your openstack.yaml file") + if projects: + # Ensure projects list and scoped project exists + self.get_stats_for_all_projects(projects) + + #### Local Info accessors def get_local_hypervisor(self): """ @@ -1033,12 +1108,12 @@ def get_local_hypervisor(self): if hyp: return hyp[0] - def get_all_projects(self, instance): + def get_all_projects(self, scope): """ Returns all projects in the domain """ url = "{0}/{1}/{2}".format(self.keystone_server_url, DEFAULT_KEYSTONE_API_VERSION, "projects") - headers = {'X-Auth-Token': self.get_auth_token(instance)} + headers = {'X-Auth-Token': scope.auth_token} try: r = self._make_request_with_auth_fallback(url, headers) return r['projects'] @@ -1048,14 +1123,10 @@ def get_all_projects(self, instance): return None - def get_scoped_project(self, instance): + def get_scoped_project(self, project_auth_scope): """ Returns the project that this instance of the check is scoped to """ - project_auth_scope = self.get_scope_for_instance(instance) - - if isinstance(project_auth_scope, OpenStackUnscoped): - return None filter_params = {} url = "{0}/{1}/{2}".format(self.keystone_server_url, DEFAULT_KEYSTONE_API_VERSION, "projects") @@ -1073,7 +1144,7 @@ def get_scoped_project(self, instance): "domain_id": project_auth_scope.domain_id } - headers = {'X-Auth-Token': self.get_auth_token(instance)} + headers = {'X-Auth-Token': project_auth_scope.auth_token} try: project_details = self._make_request_with_auth_fallback(url, headers, params=filter_params) From d0c9ac3c31aed829eed174fe7412a874f03588be Mon Sep 17 00:00:00 2001 From: Jaime Fullaondo Date: Mon, 19 Mar 2018 12:51:14 +0100 Subject: [PATCH 03/19] [openstack] updating check manifest, changelog, check version --- openstack/CHANGELOG.md | 10 +++++++++- openstack/datadog_checks/openstack/__init__.py | 2 +- openstack/manifest.json | 5 +---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/openstack/CHANGELOG.md b/openstack/CHANGELOG.md index 1645e7631ec14..c47588aeebb00 100644 --- a/openstack/CHANGELOG.md +++ b/openstack/CHANGELOG.md @@ -1,5 +1,12 @@ # CHANGELOG - openstack +1.2.0 / Unreleased +================== + +### Changes + +* [FEATURE] adding support for unscoped access. See [#1276][] + 1.1.0 / 2018-02-28 ================== @@ -35,4 +42,5 @@ [#878]: https://github.com/DataDog/integrations-core/issues/878 [#1119]: https://github.com/DataDog/integrations-core/issues/1119 [#1123]: https://github.com/DataDog/integrations-core/issues/1123 -[#1126]: https://github.com/DataDog/integrations-core/issues/1126 \ No newline at end of file +[#1126]: https://github.com/DataDog/integrations-core/issues/1126 +[#1276]: https://github.com/DataDog/integrations-core/issues/1276 \ No newline at end of file diff --git a/openstack/datadog_checks/openstack/__init__.py b/openstack/datadog_checks/openstack/__init__.py index 502e02e6dfa60..116e8e09f3351 100644 --- a/openstack/datadog_checks/openstack/__init__.py +++ b/openstack/datadog_checks/openstack/__init__.py @@ -2,6 +2,6 @@ OpenStackCheck = openstack.OpenStackCheck -__version__ = "1.1.0" +__version__ = "1.2.0" __all__ = ['openstack'] diff --git a/openstack/manifest.json b/openstack/manifest.json index b29539f3690d3..7ff59f4e1a3f7 100644 --- a/openstack/manifest.json +++ b/openstack/manifest.json @@ -1,8 +1,6 @@ { "maintainer": "help@datadoghq.com", - "manifest_version": "0.1.1", - "max_agent_version": "6.0.0", - "min_agent_version": "5.6.3", + "manifest_version": "1.0.0", "name": "openstack", "short_description": "Track hypervisor and VM-level resource usage, plus Neutron metrics.", "support": "core", @@ -11,7 +9,6 @@ "mac_os", "windows" ], - "version": "1.1.0", "guid": "944452d0-208e-4d1c-8adb-495f517ce2c2", "public_title": "Datadog-OpenStack Integration", "categories":["cloud"], From 0eca646e0364d041636aebb6bb3d07b30027a919 Mon Sep 17 00:00:00 2001 From: Jaime Fullaondo Date: Mon, 19 Mar 2018 12:54:24 +0100 Subject: [PATCH 04/19] [github] updating template --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index baf86dbb0cadc..11b8a79a9f3eb 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,7 +16,7 @@ is available in our contribution guidelines. ### Versioning -- [ ] Bumped the check version in `manifest.json` +- [ ] Bumped the check version in `manifest.json` (or update manifest version to 1.0.0 and drop check version) - [ ] Bumped the check version in `datadog_checks/{integration}/__init__.py` - [ ] Updated `CHANGELOG.md`. Please use `Unreleased` as the date in the title for the new section. From b7f9d065d4069545f03c4bfd63c06ad8c6ac0125 Mon Sep 17 00:00:00 2001 From: Jaime Fullaondo Date: Mon, 19 Mar 2018 14:36:50 +0100 Subject: [PATCH 05/19] [openstack] scope class refactor [openstack][test] adding some unscoped authentication test cases [openstack] tag is project_name - renaming --- .../datadog_checks/openstack/openstack.py | 328 ++++++++---------- openstack/test/test_openstack.py | 158 ++++++--- 2 files changed, 256 insertions(+), 230 deletions(-) diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index c6ba0810667b2..bbc1e7b2b7e2f 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -26,7 +26,7 @@ FALLBACK_NOVA_API_VERSION = 'v2' DEFAULT_NEUTRON_API_VERSION = 'v2.0' -DEFAULT_API_REQUEST_TIMEOUT = 5 # seconds +DEFAULT_API_REQUEST_TIMEOUT = 5 # seconds NOVA_HYPERVISOR_METRICS = [ 'current_workload', @@ -92,86 +92,191 @@ 'ACTIVE' ] +UNSCOPED_AUTH = 'unscoped' + + class OpenStackAuthFailure(Exception): pass + class InstancePowerOffFailure(Exception): pass + class IncompleteConfig(Exception): pass + class IncompleteAuthScope(IncompleteConfig): pass + class IncompleteIdentity(IncompleteConfig): pass + class MissingEndpoint(Exception): pass + class MissingNovaEndpoint(MissingEndpoint): pass + class MissingNeutronEndpoint(MissingEndpoint): pass + class KeystoneUnreachable(Exception): pass -class OpenStackUnscoped(object): - def __init__(self, auth_token, project_scope_map): + +class OpenStackScope(object): + def __init__(self, auth_token): self.auth_token = auth_token - self.project_scope_map = project_scope_map @classmethod - def from_config(cls, init_config, instance_config, proxy_config=None): + def request_auth_token(cls, auth_scope, identity, keystone_server_url, ssl_verify, proxy=None): + if not auth_scope: + auth_scope = UNSCOPED_AUTH + + payload = {'auth': {'identity': identity, 'scope': auth_scope}} + auth_url = urljoin(keystone_server_url, "{0}/auth/tokens".format(DEFAULT_KEYSTONE_API_VERSION)) + headers = {'Content-Type': 'application/json'} + + resp = requests.post(auth_url, headers=headers, data=json.dumps(payload), verify=ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) + resp.raise_for_status() + + return resp + + @classmethod + def get_user_identity(cls, instance_config): + """ + Parse user identity out of init_config + + To guarantee a uniquely identifiable user, expects + {"user": {"name": "my_username", "password": "my_password", + "domain": {"id": "my_domain_id"} + } + } + """ + user = instance_config.get('user') + if not user\ + or not user.get('name')\ + or not user.get('password')\ + or not user.get("domain")\ + or not user.get("domain").get("id"): + + raise IncompleteIdentity() + + identity = { + "methods": ['password'], + "password": {"user": user} + } + return identity + + @classmethod + def get_auth_scope(cls, instance_config): + """ + Parse authorization scope out of init_config + + To guarantee a uniquely identifiable scope, expects either: + {'project': {'name': 'my_project', 'domain': {'id': 'my_domain_id'}}} + OR + {'project': {'id': 'my_project_id'}} + """ + auth_scope = instance_config.get('auth_scope') + if not auth_scope: + return None + + if not auth_scope.get('project'): + raise IncompleteAuthScope() + + if auth_scope['project'].get('name'): + # We need to add a domain scope to avoid name clashes. Search for one. If not raise IncompleteConfig + if not auth_scope['project'].get('domain', {}).get('id'): + raise IncompleteAuthScope() + else: + # Assume a unique project id has been given + if not auth_scope['project'].get('id'): + raise IncompleteAuthScope() + + return auth_scope + + @classmethod + def get_auth_response_from_config(cls, init_config, instance_config, proxy_config=None): keystone_server_url = init_config.get("keystone_server_url") if not keystone_server_url: raise IncompleteConfig() ssl_verify = init_config.get("ssl_verify", False) - nova_api_version = init_config.get("nova_api_version", DEFAULT_NOVA_API_VERSION) + auth_scope = cls.get_auth_scope(instance_config) identity = cls.get_user_identity(instance_config) exception_msg = None try: - auth_resp = cls.request_auth_token(identity, keystone_server_url, ssl_verify, proxy_config) + auth_resp = cls.request_auth_token(auth_scope, identity, keystone_server_url, ssl_verify, proxy_config) except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError): - exception_msg = "Failed keystone auth with user:{user} domain:{domain} @{url}".format( + exception_msg = "Failed keystone auth with user:{user} domain:{domain} scope:{scope} @{url}".format( user=identity['password']['user']['name'], domain=identity['password']['user']['domain']['id'], + scope=auth_scope, url=keystone_server_url) if exception_msg: try: identity['password']['user']['domain']['name'] = identity['password']['user']['domain'].pop('id') - auth_resp = cls.request_auth_token(identity, keystone_server_url, ssl_verify, proxy_config) - except (requests.exceptions.httperror, requests.exceptions.timeout, requests.exceptions.connectionerror) as e: - exception_msg = "{msg} and also failed keystone auth with identity:{user} domain:{domain} @{url}: {ex}".format( + + if auth_scope and 'domain' in auth_scope['project']: + auth_scope['project']['domain']['name'] = auth_scope['project']['domain'].pop('id') + elif auth_scope: + auth_scope['project']['name'] = auth_scope['project'].pop('id') + auth_resp = cls.request_auth_token(auth_scope, identity, keystone_server_url, ssl_verify, proxy_config) + except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: + exception_msg = "{msg} and also failed keystone auth with identity:{user} domain:{domain} scope:{scope} @{url}: {ex}".format( msg=exception_msg, user=identity['password']['user']['name'], - domain = identity['password']['user']['domain']['name'], + domain=identity['password']['user']['domain']['name'], + scope=auth_scope, url=keystone_server_url, ex=e) raise KeystoneUnreachable(exception_msg) - auth_token = auth_resp.headers.get('X-Subject-Token') + return auth_scope, auth_resp.headers.get('X-Subject-Token'), auth_resp + + +class OpenStackUnscoped(OpenStackScope): + def __init__(self, auth_token, project_scope_map): + super(OpenStackUnscoped, self).__init__(auth_token) + self.project_scope_map = project_scope_map + + @classmethod + def from_config(cls, init_config, instance_config, proxy_config=None): + keystone_server_url = init_config.get("keystone_server_url") + if not keystone_server_url: + raise IncompleteConfig() + + ssl_verify = init_config.get("ssl_verify", False) + nova_api_version = init_config.get("nova_api_version", DEFAULT_NOVA_API_VERSION) + + _, auth_token, _ = cls.get_auth_response_from_config(init_config, instance_config, proxy_config) try: project_resp = cls.request_project_list(auth_token, keystone_server_url, ssl_verify, proxy_config) projects = project_resp.json().get('projects') except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: - # TODO: Raise something - pass + exception_msg = "unable to retrieve project list from keystone auth with identity: @{url}: {ex}".format( + url=keystone_server_url, + ex=e) + raise KeystoneUnreachable(exception_msg) project_scope_map = {} for project in projects: try: project_key = project['name'], project['id'] token_resp = cls.get_token_for_project(auth_token, project, keystone_server_url, - ssl_verify, proxy_config) + ssl_verify, proxy_config) project_auth_token = token_resp.headers.get('X-Subject-Token') except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: # TODO: Raise something @@ -198,43 +303,6 @@ def from_config(cls, init_config, instance_config, proxy_config=None): return cls(auth_token, project_scope_map) - @classmethod - def get_user_identity(cls, instance_config): - """ - Parse user identity out of init_config - - To guarantee a uniquely identifiable user, expects - {"user": {"name": "my_username", "password": "my_password", - "domain": {"id": "my_domain_id"} - } - } - """ - user = instance_config.get('user') - if not user\ - or not user.get('name')\ - or not user.get('password')\ - or not user.get("domain")\ - or not user.get("domain").get("id"): - - raise IncompleteIdentity() - - identity = { - "methods": ['password'], - "password": {"user": user} - } - return identity - - @classmethod - def request_auth_token(cls, identity, keystone_server_url, ssl_verify, proxy=None): - payload = {'auth': {'identity': identity, 'scope': 'unscoped'}} - auth_url = urljoin(keystone_server_url, "{0}/auth/tokens".format(DEFAULT_KEYSTONE_API_VERSION)) - headers = {'Content-Type': 'application/json'} - - resp = requests.post(auth_url, headers=headers, data=json.dumps(payload), verify=ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) - resp.raise_for_status() - - return resp - @classmethod def get_token_for_project(cls, auth_token, project, keystone_server_url, ssl_verify, proxy=None): identity = { @@ -263,14 +331,15 @@ def request_project_list(cls, auth_token, keystone_server_url, ssl_verify, proxy return resp -class OpenStackProjectScope(object): + +class OpenStackProjectScope(OpenStackScope): """ Container class for a single project's authorization scope Embeds the auth token to be included with API requests, and refreshes the token on expiry """ def __init__(self, auth_token, auth_scope, service_catalog): - self.auth_token = auth_token + super(OpenStackProjectScope, self).__init__(auth_token) # Store some identifiers for this project self.project_name = auth_scope["project"].get("name") @@ -284,42 +353,9 @@ def from_config(cls, init_config, instance_config, proxy_config=None): if not keystone_server_url: raise IncompleteConfig() - ssl_verify = init_config.get("ssl_verify", False) nova_api_version = init_config.get("nova_api_version", DEFAULT_NOVA_API_VERSION) - auth_scope = cls.get_auth_scope(instance_config) - identity = cls.get_user_identity(instance_config) - - exception_msg = None - try: - auth_resp = cls.request_auth_token(auth_scope, identity, keystone_server_url, ssl_verify, proxy_config) - except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError): - exception_msg = "Failed keystone auth with user:{user} domain:{domain} scope:{scope} @{url}".format( - user=identity['password']['user']['name'], - domain=identity['password']['user']['domain']['id'], - scope=auth_scope, - url=keystone_server_url) - - if exception_msg: - try: - identity['password']['user']['domain']['name'] = identity['password']['user']['domain'].pop('id') - - if 'domain' in auth_scope['project']: - auth_scope['project']['domain']['name'] = auth_scope['project']['domain'].pop('id') - else: - auth_scope['project']['name'] = auth_scope['project'].pop('id') - auth_resp = cls.request_auth_token(auth_scope, identity, keystone_server_url, ssl_verify, proxy_config) - except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: - exception_msg = "{msg} and also failed keystone auth with identity:{user} domain:{domain} scope:{scope} @{url}: {ex}".format( - msg=exception_msg, - user=identity['password']['user']['name'], - domain = identity['password']['user']['domain']['name'], - scope=auth_scope, - url=keystone_server_url, - ex=e) - raise KeystoneUnreachable(exception_msg) - - auth_token = auth_resp.headers.get('X-Subject-Token') + auth_scope, auth_token, auth_resp = cls.get_auth_response_from_config(init_config, instance_config, proxy_config) try: service_catalog = KeystoneCatalog.from_auth_response( @@ -346,68 +382,6 @@ def from_config(cls, init_config, instance_config, proxy_config=None): return cls(auth_token, auth_scope, service_catalog) - @classmethod - def get_auth_scope(cls, instance_config): - """ - Parse authorization scope out of init_config - - To guarantee a uniquely identifiable scope, expects either: - {'project': {'name': 'my_project', 'domain': {'id': 'my_domain_id'}}} - OR - {'project': {'id': 'my_project_id'}} - """ - auth_scope = instance_config.get('auth_scope') - if not auth_scope or not auth_scope.get('project'): - raise IncompleteAuthScope() - - if auth_scope['project'].get('name'): - # We need to add a domain scope to avoid name clashes. Search for one. If not raise IncompleteConfig - if not auth_scope['project'].get('domain', {}).get('id'): - raise IncompleteAuthScope() - else: - # Assume a unique project id has been given - if not auth_scope['project'].get('id'): - raise IncompleteAuthScope() - - return auth_scope - - @classmethod - def get_user_identity(cls, instance_config): - """ - Parse user identity out of init_config - - To guarantee a uniquely identifiable user, expects - {"user": {"name": "my_username", "password": "my_password", - "domain": {"id": "my_domain_id"} - } - } - """ - user = instance_config.get('user') - if not user\ - or not user.get('name')\ - or not user.get('password')\ - or not user.get("domain")\ - or not user.get("domain").get("id"): - - raise IncompleteIdentity() - - identity = { - "methods": ['password'], - "password": {"user": user} - } - return identity - - @classmethod - def request_auth_token(cls, auth_scope, identity, keystone_server_url, ssl_verify, proxy=None): - payload = {"auth": {"scope": auth_scope, "identity": identity}} - auth_url = urljoin(keystone_server_url, "{0}/auth/tokens".format(DEFAULT_KEYSTONE_API_VERSION)) - headers = {'Content-Type': 'application/json'} - - resp = requests.post(auth_url, headers=headers, data=json.dumps(payload), verify=ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=proxy) - resp.raise_for_status() - - return resp - class KeystoneCatalog(object): """ @@ -451,7 +425,6 @@ def from_unscoped_token(cls, keystone_server_url, auth_token, neutron_endpoint=cls.get_neutron_endpoint(json_resp) ) - @classmethod def get_neutron_endpoint(cls, json_resp): """ @@ -466,14 +439,14 @@ def get_neutron_endpoint(cls, json_resp): if entry['name'] == match or 'Networking' in entry['name']: valid_endpoints = {} for ep in entry['endpoints']: - interface = ep.get('interface','') + interface = ep.get('interface', '') if interface in ['public', 'internal']: valid_endpoints[interface] = ep['url'] if valid_endpoints: # Favor public endpoints over internal neutron_endpoint = valid_endpoints.get("public", - valid_endpoints.get("internal")) + valid_endpoints.get("internal")) break else: raise MissingNeutronEndpoint() @@ -496,14 +469,14 @@ def get_nova_endpoint(cls, json_resp, nova_api_version=None): # Collect any endpoints on the public or internal interface valid_endpoints = {} for ep in entry['endpoints']: - interface = ep.get('interface','') + interface = ep.get('interface', '') if interface in ['public', 'internal']: valid_endpoints[interface] = ep['url'] if valid_endpoints: # Favor public endpoints over internal nova_endpoint = valid_endpoints.get("public", - valid_endpoints.get("internal")) + valid_endpoints.get("internal")) return nova_endpoint else: raise MissingNovaEndpoint() @@ -511,7 +484,7 @@ def get_nova_endpoint(cls, json_resp, nova_api_version=None): class OpenStackCheck(AgentCheck): CACHE_TTL = { - "aggregates": 300, # seconds + "aggregates": 300, # seconds "physical_hosts": 300, "hypervisors": 300 } @@ -535,8 +508,7 @@ class OpenStackCheck(AgentCheck): HYPERVISOR_SC = 'openstack.nova.hypervisor.up' NETWORK_SC = 'openstack.neutron.network.up' - - HYPERVISOR_CACHE_EXPIRY = 120 # seconds + HYPERVISOR_CACHE_EXPIRY = 120 # seconds def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) @@ -546,11 +518,11 @@ def __init__(self, name, init_config, agentConfig, instances=None): if not self.keystone_server_url: raise IncompleteConfig() - ### Current authentication scopes + # Current authentication scopes self._parent_scope = None self._current_scope = None - ### Cache some things between runs for values that change rarely + # Cache some things between runs for values that change rarely self._aggregate_list = None # Mapping of check instances to associated OpenStack project scopes @@ -692,9 +664,8 @@ def get_stats_for_single_network(self, network_id): self.service_check(self.NETWORK_SC, AgentCheck.OK, tags=service_check_tags) else: self.service_check(self.NETWORK_SC, AgentCheck.CRITICAL, tags=service_check_tags) - ### - ### Compute + # Compute def get_nova_endpoint(self, instance=None): if not instance: # Assume instance scope is populated on self @@ -713,7 +684,6 @@ def _parse_uptime_string(self, uptime): 'uptime_sec': uptime_sec } - def get_all_hypervisor_ids(self, filter_by_host=None): nova_version = self.init_config.get("nova_api_version", DEFAULT_NOVA_API_VERSION) if nova_version >= V21_NOVA_API_VERSION: @@ -883,15 +853,15 @@ def _is_valid_metric(label): if server_stats: tags = tags or [] if project_name: - tags.append("project:{}".format(project_name)) + tags.append("project_name:{}".format(project_name)) if hypervisor_hostname: tags.append("hypervisor:{0}".format(hypervisor_hostname)) if server_name: tags.append("server_name:{0}".format(server_name)) for st in server_stats: if _is_valid_metric(st): - self.gauge("openstack.nova.server.{0}".format(st.replace("-", "_")), server_stats[st], tags=tags, hostname=server_id) - + self.gauge("openstack.nova.server.{0}".format( + st.replace("-", "_")), server_stats[st], tags=tags, hostname=server_id) def get_stats_for_single_project(self, project): def _is_valid_metric(label): @@ -918,9 +888,8 @@ def _is_valid_metric(label): def get_stats_for_all_projects(self, projects): for project in projects: self.get_stats_for_single_project(project) - ### - ### Cache util + # Cache util def _is_expired(self, entry): assert entry in ["aggregates", "physical_hosts", "hypervisors"] ttl = self.CACHE_TTL.get(entry) @@ -941,22 +910,24 @@ def _send_api_service_checks(self, scope): try: requests.get(scope.service_catalog.nova_endpoint, headers=headers, - verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=self.proxy_config) + verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, + proxies=self.proxy_config) self.service_check(self.COMPUTE_API_SC, AgentCheck.OK, - tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) + tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError): self.service_check(self.COMPUTE_API_SC, AgentCheck.CRITICAL, - tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) + tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) # Neutron try: requests.get(scope.service_catalog.neutron_endpoint, headers=headers, - verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=self.proxy_config) + verify=self._ssl_verify, timeout=DEFAULT_API_REQUEST_TIMEOUT, + proxies=self.proxy_config) self.service_check(self.NETWORK_API_SC, AgentCheck.OK, - tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) + tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError): self.service_check(self.NETWORK_API_SC, AgentCheck.CRITICAL, - tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) + tags=["keystone_server:%s" % self.init_config.get("keystone_server_url")]) def ensure_auth_scope(self, instance): """ @@ -1096,8 +1067,7 @@ def check(self, instance): # Ensure projects list and scoped project exists self.get_stats_for_all_projects(projects) - - #### Local Info accessors + # Local Info accessors def get_local_hypervisor(self): """ Returns the hypervisor running on this host, and assumes a 1-1 between host and hypervisor @@ -1195,7 +1165,7 @@ def _get_tags_for_host(self): return tags - ### For attaching tags to hosts that are not the host running the agent + # For attaching tags to hosts that are not the host running the agent def get_external_host_tags(self): """ Returns a list of tags for every guest server that is detected by the OpenStack @@ -1204,7 +1174,7 @@ def get_external_host_tags(self): """ self.log.debug("Collecting external_host_tags now") external_host_tags = [] - for k,v in self.external_host_tags.iteritems(): + for k, v in self.external_host_tags.iteritems(): external_host_tags.append((k, {SOURCE_TYPE: v})) self.log.debug("Sending external_host_tags: %s", external_host_tags) diff --git a/openstack/test/test_openstack.py b/openstack/test/test_openstack.py index af2f4a7697bb2..87a7cff5853ba 100644 --- a/openstack/test/test_openstack.py +++ b/openstack/test/test_openstack.py @@ -8,6 +8,7 @@ from unittest import TestCase from mock import patch import re +import copy # 3p @@ -20,6 +21,7 @@ OS_CHECK_MODULE = 'openstack.openstack' OpenStackProjectScope = load_class(OS_CHECK_MODULE, "OpenStackProjectScope") +OpenStackUnscoped = load_class(OS_CHECK_MODULE, "OpenStackUnscoped") KeystoneCatalog = load_class(OS_CHECK_MODULE, "KeystoneCatalog") IncompleteConfig = load_class(OS_CHECK_MODULE, "IncompleteConfig") IncompleteAuthScope = load_class(OS_CHECK_MODULE, "IncompleteAuthScope") @@ -165,32 +167,55 @@ def json(self): } MOCK_HTTP_RESPONSE = MockHTTPResponse(response_dict=EXAMPLE_AUTH_RESPONSE, headers={"X-Subject-Token": "fake_token"}) +EXAMPLE_PROJECTS_RESPONSE = { + "projects": [ + { + "domain_id": "1789d1", + "enabled": True, + "id": "263fd9", + "links": { + "self": "https://example.com/identity/v3/projects/263fd9" + }, + "name": "Test Group" + }, + ], + "links": { + "self": "https://example.com/identity/v3/auth/projects", + "previous": None, + "next": None, + } +} +MOCK_HTTP_PROJECTS_RESPONSE = MockHTTPResponse(response_dict=EXAMPLE_PROJECTS_RESPONSE, headers={}) + @attr(requires='openstack') class OSProjectScopeTest(TestCase): BAD_AUTH_SCOPES = [ - {"auth_scope": {}}, - {"auth_scope": {"project": {}}}, - {"auth_scope": {"project": {"id": ""}}}, - {"auth_scope": {"project": {"name": "test"}}}, - {"auth_scope": {"project": {"name": "test", "domain": {}}}}, - {"auth_scope": {"project": {"name": "test", "domain": {"id": ""}}}}, + {'auth_scope': {'project': {}}}, + {'auth_scope': {'project': {'id': ''}}}, + {'auth_scope': {'project': {'name': 'test'}}}, + {'auth_scope': {'project': {'name': 'test', 'domain': {}}}}, + {'auth_scope': {'project': {'name': 'test', 'domain': {'id': ''}}}}, + ] + + GOOD_UNSCOPED_AUTH_SCOPES = [ + {'auth_scope': {}}, # unscoped project ] GOOD_AUTH_SCOPES = [ - {"auth_scope": {"project": {"id": "test_project_id"}}}, - {"auth_scope": {"project": {"name": "test", "domain": {"id": "test_id"}}}}, + {'auth_scope': {'project': {'id': 'test_project_id'}}}, + {'auth_scope': {'project': {'name': 'test', 'domain': {'id': 'test_id'}}}}, ] BAD_USERS = [ - {"user": {}}, - {"user": {"name": ""}}, - {"user": {"name": "test_name", "password": ""}}, - {"user": {"name": "test_name", "password": "test_pass", "domain": {}}}, - {"user": {"name": "test_name", "password": "test_pass", "domain": {"id": ""}}}, + {'user': {}}, + {'user': {'name': ''}}, + {'user': {'name': 'test_name', 'password': ''}}, + {'user': {'name': 'test_name', 'password': 'test_pass', 'domain': {}}}, + {'user': {'name': 'test_name', 'password': 'test_pass', 'domain': {'id': ''}}}, ] GOOD_USERS = [ - {"user": {"name": "test_name", "password": "test_pass", "domain": {"id": "test_id"}}}, + {'user': {'name': 'test_name', 'password': 'test_pass', 'domain': {'id': 'test_id'}}}, ] def _test_bad_auth_scope(self, scope): @@ -200,11 +225,18 @@ def test_get_auth_scope(self): for scope in self.BAD_AUTH_SCOPES: self._test_bad_auth_scope(scope) + for scope in self.GOOD_UNSCOPED_AUTH_SCOPES: + auth_scope = OpenStackProjectScope.get_auth_scope(scope) + self.assertEqual(auth_scope, None) + auth_scope = OpenStackUnscoped.get_auth_scope(scope) + + self.assertEqual(auth_scope, None) + for scope in self.GOOD_AUTH_SCOPES: auth_scope = OpenStackProjectScope.get_auth_scope(scope) # Should pass through unchanged - self.assertEqual(auth_scope, scope.get("auth_scope")) + self.assertEqual(auth_scope, scope.get('auth_scope')) def _test_bad_user(self, user): self.assertRaises(IncompleteIdentity, OpenStackProjectScope.get_user_identity, user) @@ -216,70 +248,94 @@ def test_get_user_identity(self): for user in self.GOOD_USERS: parsed_user = OpenStackProjectScope.get_user_identity(user) - self.assertEqual(parsed_user, {"methods": ["password"], "password": user}) + self.assertEqual(parsed_user, {'methods': ['password'], 'password': user}) def test_from_config(self): - init_config = {"keystone_server_url": "http://10.0.2.15:5000", "nova_api_version": "v2"} + init_config = {'keystone_server_url': 'http://10.0.2.15:5000', 'nova_api_version': 'v2'} bad_instance_config = {} - good_instance_config = {"user": self.GOOD_USERS[0]["user"], "auth_scope": self.GOOD_AUTH_SCOPES[0]["auth_scope"]} + good_instance_config = {'user': self.GOOD_USERS[0]['user'], 'auth_scope': self.GOOD_AUTH_SCOPES[0]['auth_scope']} self.assertRaises(IncompleteConfig, OpenStackProjectScope.from_config, init_config, bad_instance_config) - with patch("datadog_checks.openstack.openstack.OpenStackProjectScope.request_auth_token", return_value=MOCK_HTTP_RESPONSE): + with patch('datadog_checks.openstack.openstack.OpenStackProjectScope.request_auth_token', return_value=MOCK_HTTP_RESPONSE): append_config = good_instance_config.copy() - append_config["append_tenant_id"] = True + append_config['append_tenant_id'] = True scope = OpenStackProjectScope.from_config(init_config, append_config) self.assertTrue(isinstance(scope, OpenStackProjectScope)) - self.assertEqual(scope.auth_token, "fake_token") - self.assertEqual(scope.tenant_id, "test_project_id") + self.assertEqual(scope.auth_token, 'fake_token') + self.assertEqual(scope.tenant_id, 'test_project_id') # Test that append flag worked - self.assertEqual(scope.service_catalog.nova_endpoint, "http://10.0.2.15:8773/test_project_id") + self.assertEqual(scope.service_catalog.nova_endpoint, 'http://10.0.2.15:8773/test_project_id') + + def test_unscoped_from_config(self): + init_config = {'keystone_server_url': 'http://10.0.2.15:5000', 'nova_api_version': 'v2'} + + good_instance_config = {'user': self.GOOD_USERS[0]['user'], 'auth_scope': self.GOOD_UNSCOPED_AUTH_SCOPES[0]['auth_scope']} + + mock_http_response = copy.deepcopy(EXAMPLE_AUTH_RESPONSE) + mock_http_response['token'].pop('catalog') + mock_http_response['token'].pop('project') + mock_response = MockHTTPResponse(response_dict=mock_http_response, headers={'X-Subject-Token': 'fake_token'}) + with patch('datadog_checks.openstack.openstack.OpenStackUnscoped.request_auth_token', return_value=mock_response): + with patch('datadog_checks.openstack.openstack.OpenStackUnscoped.request_project_list', return_value=MOCK_HTTP_PROJECTS_RESPONSE): + with patch('datadog_checks.openstack.openstack.OpenStackUnscoped.get_token_for_project', return_value=MOCK_HTTP_RESPONSE): + append_config = good_instance_config.copy() + append_config['append_tenant_id'] = True + scope = OpenStackUnscoped.from_config(init_config, append_config) + self.assertTrue(isinstance(scope, OpenStackUnscoped)) + + self.assertEqual(scope.auth_token, 'fake_token') + self.assertEqual(len(scope.project_scope_map), 1) + for _, scope in scope.project_scope_map.iteritems(): + self.assertTrue(isinstance(scope, OpenStackProjectScope)) + self.assertEqual(scope.auth_token, 'fake_token') + self.assertEqual(scope.tenant_id, '263fd9') @attr(requires='openstack') class KeyStoneCatalogTest(TestCase): def test_get_nova_endpoint(self): - self.assertEqual(KeystoneCatalog.get_nova_endpoint(EXAMPLE_AUTH_RESPONSE), u"http://10.0.2.15:8774/v2.1/0850707581fe4d738221a72db0182876") - self.assertEqual(KeystoneCatalog.get_nova_endpoint(EXAMPLE_AUTH_RESPONSE, nova_api_version="v2"), u"http://10.0.2.15:8773/") + self.assertEqual(KeystoneCatalog.get_nova_endpoint(EXAMPLE_AUTH_RESPONSE), u'http://10.0.2.15:8774/v2.1/0850707581fe4d738221a72db0182876') + self.assertEqual(KeystoneCatalog.get_nova_endpoint(EXAMPLE_AUTH_RESPONSE, nova_api_version='v2'), u'http://10.0.2.15:8773/') def test_get_neutron_endpoint(self): - self.assertEqual(KeystoneCatalog.get_neutron_endpoint(EXAMPLE_AUTH_RESPONSE), u"http://10.0.2.15:9292") + self.assertEqual(KeystoneCatalog.get_neutron_endpoint(EXAMPLE_AUTH_RESPONSE), u'http://10.0.2.15:9292') def test_from_auth_response(self): - catalog = KeystoneCatalog.from_auth_response(EXAMPLE_AUTH_RESPONSE, "v2.1") + catalog = KeystoneCatalog.from_auth_response(EXAMPLE_AUTH_RESPONSE, 'v2.1') self.assertTrue(isinstance(catalog, KeystoneCatalog)) - self.assertEqual(catalog.neutron_endpoint, u"http://10.0.2.15:9292") - self.assertEqual(catalog.nova_endpoint, u"http://10.0.2.15:8774/v2.1/0850707581fe4d738221a72db0182876") + self.assertEqual(catalog.neutron_endpoint, u'http://10.0.2.15:9292') + self.assertEqual(catalog.nova_endpoint, u'http://10.0.2.15:8774/v2.1/0850707581fe4d738221a72db0182876') @attr(requires='openstack') class TestOpenstack(AgentCheckTest): - """Test for openstack integration.""" + '''Test for openstack integration.''' CHECK_NAME = OS_CHECK_NAME # Samples # .. server/network - ALL_IDS = ["server-1", "server-2", "other-1", "other-2"] - EXCLUDED_NETWORK_IDS = ["server-1", "other-.*"] - EXCLUDED_SERVER_IDS = ["server-2", "other-.*"] - FILTERED_NETWORK_ID = "server-2" - FILTERED_SERVER_ID = "server-1" + ALL_IDS = ['server-1', 'server-2', 'other-1', 'other-2'] + EXCLUDED_NETWORK_IDS = ['server-1', 'other-.*'] + EXCLUDED_SERVER_IDS = ['server-2', 'other-.*'] + FILTERED_NETWORK_ID = 'server-2' + FILTERED_SERVER_ID = 'server-1' # .. config MOCK_CONFIG = { - "init_config": { - "keystone_server_url": "http://10.0.2.15:5000", - "ssl_verify": False, - "exclude_network_ids": EXCLUDED_NETWORK_IDS, + 'init_config': { + 'keystone_server_url': 'http://10.0.2.15:5000', + 'ssl_verify': False, + 'exclude_network_ids': EXCLUDED_NETWORK_IDS, }, - "instances": [ + 'instances': [ { - "name": "test_name", "user": {"name": "test_name", "password": "test_pass", "domain": {"id": "test_id"}}, - "auth_scope": {"project": {"id": "test_project_id"}}, + 'name': 'test_name', 'user': {'name': 'test_name', 'password': 'test_pass', 'domain': {'id': 'test_id'}}, + 'auth_scope': {'project': {'id': 'test_project_id'}}, } ] } @@ -288,11 +344,11 @@ def setUp(self): self.load_check(self.MOCK_CONFIG, self.DEFAULT_AGENT_CONFIG) def test_ensure_auth_scope(self): - instance = self.MOCK_CONFIG["instances"][0] + instance = self.MOCK_CONFIG['instances'][0] self.assertRaises(KeyError, self.check.get_scope_for_instance, instance) - with patch("datadog_checks.openstack.openstack.OpenStackProjectScope.request_auth_token", return_value=MOCK_HTTP_RESPONSE): + with patch('datadog_checks.openstack.openstack.OpenStackProjectScope.request_auth_token', return_value=MOCK_HTTP_RESPONSE): scope = self.check.ensure_auth_scope(instance) self.assertEqual(self.check.get_scope_for_instance(instance), scope) @@ -317,15 +373,15 @@ def test_parse_uptime_string(self): self.assertEqual(uptime_parsed.get('loads'), [0.04, 0.14, 0.19]) def test_cache_utils(self): - self.check.CACHE_TTL["aggregates"] = 1 - expected_aggregates = {"hyp_1": ["aggregate:staging", "availability_zone:test"]} + self.check.CACHE_TTL['aggregates'] = 1 + expected_aggregates = {'hyp_1': ['aggregate:staging', 'availability_zone:test']} - with patch("datadog_checks.openstack.OpenStackCheck.get_all_aggregate_hypervisors", return_value=expected_aggregates): + with patch('datadog_checks.openstack.OpenStackCheck.get_all_aggregate_hypervisors', return_value=expected_aggregates): self.assertEqual(self.check._get_and_set_aggregate_list(), expected_aggregates) sleep(1.5) - self.assertTrue(self.check._is_expired("aggregates")) + self.assertTrue(self.check._is_expired('aggregates')) - @patch("datadog_checks.openstack.OpenStackCheck.get_all_server_ids", return_value=ALL_IDS) + @patch('datadog_checks.openstack.OpenStackCheck.get_all_server_ids', return_value=ALL_IDS) def test_server_exclusion(self, *args): """ Exclude networks using regular expressions. @@ -344,12 +400,12 @@ def test_server_exclusion(self, *args): # cleanup self.check.exclude_server_id_rules = set([]) - @patch("datadog_checks.openstack.OpenStackCheck.get_all_network_ids", return_value=ALL_IDS) + @patch('datadog_checks.openstack.OpenStackCheck.get_all_network_ids', return_value=ALL_IDS) def test_network_exclusion(self, *args): """ Exclude networks using regular expressions. """ - with patch("datadog_checks.openstack.OpenStackCheck.get_stats_for_single_network") \ + with patch('datadog_checks.openstack.OpenStackCheck.get_stats_for_single_network') \ as mock_get_stats_single_network: self.check.exclude_network_id_rules = set([re.compile(rule) for rule in self.EXCLUDED_NETWORK_IDS]) From 207b106bcb06ab2a82168ea839953ec8d1d3ada7 Mon Sep 17 00:00:00 2001 From: Jaime Fullaondo Date: Wed, 21 Mar 2018 13:12:43 -0400 Subject: [PATCH 06/19] [openstack] adding exponential backoff to prevent killing nova API --- .../datadog_checks/openstack/openstack.py | 77 ++++++++++++++++++- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index bbc1e7b2b7e2f..9ab8d7d6f34b5 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -4,6 +4,8 @@ from datetime import datetime, timedelta from urlparse import urljoin import re +import time +import random import requests import simplejson as json @@ -94,6 +96,9 @@ UNSCOPED_AUTH = 'unscoped' +BASE_BACKOFF_SECS = 15 +MAX_BACKOFF_SECS = 300 + class OpenStackAuthFailure(Exception): pass @@ -542,6 +547,9 @@ def __init__(self, name, init_config, agentConfig, instances=None): skip_proxy = not init_config.get('use_agent_proxy', True) self.proxy_config = None if skip_proxy else self.proxies + self.backoff = {} + random.seed() + def _make_request_with_auth_fallback(self, url, headers=None, params=None): """ Generic request handler for OpenStack API requests @@ -578,6 +586,38 @@ def delete_current_scope(self): self.log.debug("Deleting current scope: %s", i_key) del self.instance_map[i_key] + def should_run(self, instance): + i_key = self._instance_key(instance) + if i_key not in self.backoff: + self.backoff[i_key] = { + 'retries': 0, + 'scheduled': time.time(), + } + + if self.backoff[i_key]['scheduled'] <= time.time(): + return True + + return False + + def do_backoff(self, instance): + i_key = self._instance_key(instance) + tracker = self.backoff[i_key] + + self.backoff[i_key]['retries'] += 1 + jitter = min(MAX_BACKOFF_SECS, BASE_BACKOFF_SECS * 2 ** self.backoff[i_key]['retries']) + + # let's add some jitter (half jitter) + backoff_interval = jitter / 2 + backoff_interval += random.randint(0, backoff_interval) + + tracker['scheduled'] = time.time() + backoff_interval + + def reset_backoff(self, instance): + i_key = self._instance_key(instance) + tracker = self.backoff[i_key] + self.backoff[i_key]['retries'] = 0 + self.backoff[i_key]['scheduled'] = time.time() + def get_scope_for_instance(self, instance): i_key = self._instance_key(instance) self.log.debug("Getting scope for instance %s", i_key) @@ -643,6 +683,8 @@ def get_all_network_ids(self): network_ids.append(network['id']) except Exception as e: self.warning('Unable to get the list of all network ids: {0}'.format(str(e))) + raise e + return network_ids def get_stats_for_single_network(self, network_id): @@ -701,6 +743,7 @@ def get_all_hypervisor_ids(self, filter_by_host=None): hypervisor_ids.append(hv['id']) except Exception as e: self.warning('Unable to get the list of all hypervisors: {0}'.format(str(e))) + raise e return hypervisor_ids else: @@ -726,6 +769,7 @@ def get_all_aggregate_hypervisors(self): except Exception as e: self.warning('Unable to get the list of aggregates: {0}'.format(str(e))) + raise e return hypervisor_aggregate_map @@ -804,6 +848,7 @@ def get_all_server_ids(self, filter_by_host=None): server_ids = [s['id'] for s in resp['servers']] except Exception as e: self.warning('Unable to get the list of all servers: {0}'.format(str(e))) + raise e return server_ids @@ -818,6 +863,7 @@ def get_project_name_from_id(self, tenant_id): except Exception as e: self.warning('Unable to get project name: {0}'.format(str(e))) + raise e def get_stats_for_single_server(self, server_id, tags=None): def _is_valid_metric(label): @@ -839,6 +885,7 @@ def _is_valid_metric(label): project_name = self.get_project_name_from_id(tenant_id) except Exception as e: self.warning("Unable to collect details for server %s : %s" % (server_id, e)) + raise e server_stats = {} if state and state.upper() in DIAGNOSTICABLE_STATES: @@ -849,6 +896,7 @@ def _is_valid_metric(label): self.warning("Server %s is powered off and cannot be monitored" % server_id) except Exception as e: self.warning("Unknown error when monitoring %s : %s" % (server_id, e)) + raise e if server_stats: tags = tags or [] @@ -975,6 +1023,11 @@ def ensure_auth_scope(self, instance): def check(self, instance): + # have we been backed off + if not self.should_run(instance): + self.log.info('Skipping run due to exponential backoff in effect') + return + try: instance_scope = self.ensure_auth_scope(instance) @@ -1048,6 +1101,10 @@ def check(self, instance): if set_external_tags is not None: set_external_tags(self.get_external_host_tags()) + if projects: + # Ensure projects list and scoped project exists + self.get_stats_for_all_projects(projects) + except IncompleteConfig as e: if isinstance(e, IncompleteAuthScope): self.warning("""Please specify the auth scope via the `auth_scope` variable in your init_config.\n @@ -1062,10 +1119,22 @@ def check(self, instance): "The user should look like: {'password': 'my_password', 'name': 'my_name', 'domain': {'id': 'my_domain_id'}}") else: self.warning("Configuration Incomplete! Check your openstack.yaml file") + except requests.exceptions.HTTPError as e: + if e.response.status_code >= 500: + # exponential backoff + self.do_backoff(instance) + self.warning("There were some problems reaching the nova API - applying exponential backoff") + else: + self.warning("Error reaching nova API: %s", e) + + return + except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: + # exponential backoff + self.do_backoff(instance) + self.warning("There were some problems reaching the nova API - applying exponential backoff") + return - if projects: - # Ensure projects list and scoped project exists - self.get_stats_for_all_projects(projects) + self.reset_backoff(instance) # Local Info accessors def get_local_hypervisor(self): @@ -1090,6 +1159,7 @@ def get_all_projects(self, scope): except Exception as e: self.warning('Unable to get projects: {0}'.format(str(e))) + raise e return None @@ -1130,6 +1200,7 @@ def get_scoped_project(self, project_auth_scope): except Exception as e: self.warning('Unable to get the project details: {0}'.format(str(e))) + raise e return None From 213c8aec330e009d299ed70060e1be9f0bf0af5d Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Tue, 27 Mar 2018 19:01:16 -0400 Subject: [PATCH 07/19] add metric about backoff (#1309) * add metric about backoff * Small fixes * Remove yolo mode * Cleaner if statement * Fix test on argument position * fix' --- openstack/datadog_checks/openstack/openstack.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index 9ab8d7d6f34b5..b71657c2fd772 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -520,6 +520,8 @@ def __init__(self, name, init_config, agentConfig, instances=None): self._ssl_verify = init_config.get("ssl_verify", True) self.keystone_server_url = init_config.get("keystone_server_url") + self._hypervisor_name_cache = {} + if not self.keystone_server_url: raise IncompleteConfig() @@ -610,6 +612,13 @@ def do_backoff(self, instance): backoff_interval = jitter / 2 backoff_interval += random.randint(0, backoff_interval) + tags = instance.get('tags', []) + hypervisor_name = self._hypervisor_name_cache.get(i_key) + if hypervisor_name: + tags.extend("hypervisor:{}".format(hypervisor_name)) + + self.gauge("openstack.exponential_backoff_time", jitter, tags=tags) + tracker['scheduled'] = time.time() + backoff_interval def reset_backoff(self, instance): @@ -781,12 +790,13 @@ def get_uptime_for_single_hypervisor(self, hyp_id): uptime = resp['hypervisor']['uptime'] return self._parse_uptime_string(uptime) - def get_stats_for_single_hypervisor(self, hyp_id, host_tags=None): + def get_stats_for_single_hypervisor(self, hyp_id, instance, host_tags=None): url = '{0}/os-hypervisors/{1}'.format(self.get_nova_endpoint(), hyp_id) headers = {'X-Auth-Token': self.get_auth_token()} resp = self._make_request_with_auth_fallback(url, headers) hyp = resp['hypervisor'] host_tags = host_tags or [] + self._hypervisor_name_cache[self._instance_key(instance)] = hyp['hypervisor_hostname'] tags = [ 'hypervisor:{0}'.format(hyp['hypervisor_hostname']), 'hypervisor_id:{0}'.format(hyp['id']), @@ -1091,7 +1101,7 @@ def check(self, instance): self.get_stats_for_single_server(sid, tags=server_tags) if hyp: - self.get_stats_for_single_hypervisor(hyp, host_tags=host_tags) + self.get_stats_for_single_hypervisor(hyp, instance, host_tags=host_tags) else: self.warning("Couldn't get hypervisor to monitor for host: %s" % self.get_my_hostname()) From 5406ba7646017e777958d7d97dc128a9ee831b0f Mon Sep 17 00:00:00 2001 From: Jaime Fullaondo Date: Wed, 28 Mar 2018 11:35:15 -0400 Subject: [PATCH 08/19] [openstack] add support for server tags, modify backoff reporting slightly [openstack] watch out for references - copy list --- openstack/conf.yaml.example | 8 ++++++++ openstack/datadog_checks/openstack/openstack.py | 12 +++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/openstack/conf.yaml.example b/openstack/conf.yaml.example index f2ce33a295f59..8876bbc954f1b 100644 --- a/openstack/conf.yaml.example +++ b/openstack/conf.yaml.example @@ -79,3 +79,11 @@ instances: # e.g. http://172.0.0.1:8774 rather than http://172.0.0.1:8774/ # Setting append_tenant_id to true manually adds this suffix for downstream requests # append_tenant_id: false + + # If you need additional tags to submit with your server metrics. + # Please note that server metrics override the host tag and thus do not get + # the agent-level tags you may have set. + # + # server_tags: + # - foo:bar + # - baz:qux diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index b71657c2fd772..f0b92b2cecfbc 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -6,6 +6,7 @@ import re import time import random +import copy import requests import simplejson as json @@ -600,7 +601,7 @@ def should_run(self, instance): return True return False - + def do_backoff(self, instance): i_key = self._instance_key(instance) tracker = self.backoff[i_key] @@ -617,13 +618,13 @@ def do_backoff(self, instance): if hypervisor_name: tags.extend("hypervisor:{}".format(hypervisor_name)) - self.gauge("openstack.exponential_backoff_time", jitter, tags=tags) + self.gauge("openstack.backoff.interval", backoff_interval, tags=tags) + self.gauge("openstack.backoff.retries", self.backoff[i_key]['retries'], tags=tags) tracker['scheduled'] = time.time() + backoff_interval def reset_backoff(self, instance): i_key = self._instance_key(instance) - tracker = self.backoff[i_key] self.backoff[i_key]['retries'] = 0 self.backoff[i_key]['scheduled'] = time.time() @@ -1033,7 +1034,7 @@ def ensure_auth_scope(self, instance): def check(self, instance): - # have we been backed off + # have we been backed off if not self.should_run(instance): self.log.info('Skipping run due to exponential backoff in effect') return @@ -1090,7 +1091,8 @@ def check(self, instance): host_tags = self._get_tags_for_host() for sid in server_ids: - server_tags = ["nova_managed_server"] + server_tags = copy.copy(instance.get('server_tags', [])) + server_tags.append("nova_managed_server") if scope.tenant_id: server_tags.append("tenant_id:%s" % scope.tenant_id) From 3006b093eba9eaa91a6769df14cceb1b29d47b42 Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Wed, 11 Apr 2018 16:02:53 -0400 Subject: [PATCH 09/19] [openstack] Nova API call optimizations (#1352) * Creates a caching system and calls the /servers/details endpoint directly --- .../datadog_checks/openstack/openstack.py | 131 ++++++++++++------ openstack/test/test_openstack.py | 131 ++++++++++++++++-- 2 files changed, 208 insertions(+), 54 deletions(-) diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index f0b92b2cecfbc..12b0bdd62d7f3 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -95,6 +95,11 @@ 'ACTIVE' ] +REMOVED_STATES = [ + 'DELETED', + 'SHUTOFF' +] + UNSCOPED_AUTH = 'unscoped' BASE_BACKOFF_SECS = 15 @@ -499,7 +504,6 @@ class OpenStackCheck(AgentCheck): "aggregates": "_last_aggregate_fetch_time", "physical_hosts": "_last_host_fetch_time", "hypervisors": "_last_hypervisor_fetch_time" - } HYPERVISOR_STATE_UP = 'up' @@ -553,6 +557,16 @@ def __init__(self, name, init_config, agentConfig, instances=None): self.backoff = {} random.seed() + # ISO8601 date time: used to filter the call to get the list of nova servers + self.changes_since_time = {} + + # Ex: server_details_by_id = { + # UUID: {UUID: , etc} + # 1: {id: 1, name: hostA}, + # 2: {id: 2, name: hostB} + # } + self.server_details_by_id = {} + def _make_request_with_auth_fallback(self, url, headers=None, params=None): """ Generic request handler for OpenStack API requests @@ -844,24 +858,63 @@ def get_stats_for_single_hypervisor(self, hyp_id, instance, host_tags=None): for i, avg in enumerate([1, 5, 15]): self.gauge('openstack.nova.hypervisor_load.{0}'.format(avg), load_averages[i], tags=tags) - def get_all_server_ids(self, filter_by_host=None): + # Get all of the server IDs and their metadata and cache them + # After the first run, we will only get servers that have changed state since the last collection run + def get_all_servers(self, i_key, filter_by_host=None): query_params = {} if filter_by_host: query_params["host"] = filter_by_host - url = '{0}/servers'.format(self.get_nova_endpoint()) + # If we don't have a timestamp for this instance, default to None + if i_key in self.changes_since_time: + query_params['changes-since'] = self.changes_since_time.get(i_key) + + url = '{0}/servers/detail'.format(self.get_nova_endpoint()) headers = {'X-Auth-Token': self.get_auth_token()} - server_ids = [] + servers = [] + try: + # Get a list of active servers + query_params['status'] = 'ACTIVE' + resp = self._make_request_with_auth_fallback(url, headers, params=query_params) + servers.extend(resp['servers']) + + # Get a list of deleted serversTimestamp used to filter the call to get the list + # Need to have admin perms for this to take affect + query_params['deleted'] = 'true' + query_params['status'] = None + resp = self._make_request_with_auth_fallback(url, headers, params=query_params) + servers.extend(resp['servers']) + query_params['deleted'] = 'false' + + # Get a list of shut off servers + query_params['status'] = 'SHUTOFF' resp = self._make_request_with_auth_fallback(url, headers, params=query_params) + servers.extend(resp['servers']) - server_ids = [s['id'] for s in resp['servers']] + self.changes_since_time[i_key] = datetime.utcnow().isoformat() except Exception as e: self.warning('Unable to get the list of all servers: {0}'.format(str(e))) raise e - return server_ids + for server in servers: + new_server = {} + + new_server['server_id'] = server.get('id') + new_server['state'] = server.get('status') + new_server['server_name'] = server.get('name') + new_server['hypervisor_hostname'] = server.get('OS-EXT-SRV-ATTR:hypervisor_hostname') + new_server['tenant_id'] = server.get('tenant_id') + new_server['project_name'] = self.get_project_name_from_id(new_server['tenant_id']) + + # Update our cached list of servers + if new_server['server_id'] not in self.server_details_by_id and new_server['state'] in DIAGNOSTICABLE_STATES: + self.server_details_by_id[new_server['server_id']] = new_server + elif new_server['server_id'] in self.server_details_by_id and new_server['state'] in REMOVED_STATES: + del self.server_details_by_id[new_server['server_id']] + + return self.server_details_by_id def get_project_name_from_id(self, tenant_id): url = "{0}/{1}/{2}/{3}".format(self.keystone_server_url, DEFAULT_KEYSTONE_API_VERSION, "projects", tenant_id) @@ -876,39 +929,27 @@ def get_project_name_from_id(self, tenant_id): self.warning('Unable to get project name: {0}'.format(str(e))) raise e - def get_stats_for_single_server(self, server_id, tags=None): + def get_stats_for_single_server(self, server_details, tags=None): def _is_valid_metric(label): return label in NOVA_SERVER_METRICS or any(seg in label for seg in NOVA_SERVER_INTERFACE_SEGMENTS) + server_id = server_details.get('server_id') + state = server_details.get('state') + server_name = server_details.get('server_name') + hypervisor_hostname = server_details.get('hypervisor_hostname') + tenant_id = server_details.get('tenant_id') + project_name = self.get_project_name_from_id(tenant_id) - url = '{0}/servers/{1}'.format(self.get_nova_endpoint(), server_id) + server_stats = {} headers = {'X-Auth-Token': self.get_auth_token()} - state = None - server_name = None - hypervisor_hostname = None - tenant_id = None - project_name = None + url = '{0}/servers/{1}/diagnostics'.format(self.get_nova_endpoint(), server_id) try: - server_details = self._make_request_with_auth_fallback(url, headers) - state = server_details['server'].get('status') - server_name = server_details['server'].get('name') - hypervisor_hostname = server_details['server'].get('OS-EXT-SRV-ATTR:hypervisor_hostname') - tenant_id = server_details['server'].get('tenant_id') - project_name = self.get_project_name_from_id(tenant_id) + server_stats = self._make_request_with_auth_fallback(url, headers) + except InstancePowerOffFailure: + self.warning("Server %s is powered off and cannot be monitored" % server_id) except Exception as e: - self.warning("Unable to collect details for server %s : %s" % (server_id, e)) + self.warning("Unknown error when monitoring %s : %s" % (server_id, e)) raise e - server_stats = {} - if state and state.upper() in DIAGNOSTICABLE_STATES: - url = '{0}/servers/{1}/diagnostics'.format(self.get_nova_endpoint(), server_id) - try: - server_stats = self._make_request_with_auth_fallback(url, headers) - except InstancePowerOffFailure: - self.warning("Server %s is powered off and cannot be monitored" % server_id) - except Exception as e: - self.warning("Unknown error when monitoring %s : %s" % (server_id, e)) - raise e - if server_stats: tags = tags or [] if project_name: @@ -1086,11 +1127,12 @@ def check(self, instance): projects.append(project) # Restrict monitoring to non-excluded servers - server_ids = self.get_servers_managed_by_hypervisor() + i_key = self._instance_key(instance) + servers = self.get_servers_managed_by_hypervisor(i_key) host_tags = self._get_tags_for_host() - for sid in server_ids: + for server in self.server_details_by_id: server_tags = copy.copy(instance.get('server_tags', [])) server_tags.append("nova_managed_server") @@ -1099,8 +1141,8 @@ def check(self, instance): if project and 'name' in project: server_tags.append('project_name:{0}'.format(project['name'])) - self.external_host_tags[sid] = host_tags - self.get_stats_for_single_server(sid, tags=server_tags) + self.external_host_tags[server] = host_tags + self.get_stats_for_single_server(servers[server], tags=server_tags) if hyp: self.get_stats_for_single_hypervisor(hyp, instance, host_tags=host_tags) @@ -1137,7 +1179,7 @@ def check(self, instance): self.do_backoff(instance) self.warning("There were some problems reaching the nova API - applying exponential backoff") else: - self.warning("Error reaching nova API: %s", e) + self.warning("Error reaching nova API") return except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: @@ -1222,17 +1264,16 @@ def get_my_hostname(self): """ return self.init_config.get("os_host") or self.hostname - def get_servers_managed_by_hypervisor(self): - server_ids = self.get_all_server_ids(filter_by_host=self.get_my_hostname()) + def get_servers_managed_by_hypervisor(self, i_key): + servers = self.get_all_servers(i_key, filter_by_host=self.get_my_hostname()) if self.exclude_server_id_rules: # Filter out excluded servers - server_ids = [ - server_id for server_id in server_ids - if not any([re.match(exclude_id_rule, server_id) - for exclude_id_rule in self.exclude_server_id_rules]) - ] - - return server_ids + for exclude_id_rule in self.exclude_server_id_rules: + for server_id in servers.keys(): + if re.match(exclude_id_rule, server_id): + del self.server_details_by_id[server_id] + + return self.server_details_by_id def _get_tags_for_host(self): hostname = self.get_my_hostname() diff --git a/openstack/test/test_openstack.py b/openstack/test/test_openstack.py index 87a7cff5853ba..cc9344998ba92 100644 --- a/openstack/test/test_openstack.py +++ b/openstack/test/test_openstack.py @@ -14,6 +14,8 @@ # project from tests.checks.common import AgentCheckTest, load_class +from datadog_checks.openstack import OpenStackCheck + from checks import AgentCheck @@ -319,12 +321,92 @@ class TestOpenstack(AgentCheckTest): # Samples # .. server/network + ALL_SERVER_DETAILS = { + "server-1":{"id":"server-1", "name":"server-name-1", "status":"ACTIVE"}, + "server-2":{"id":"server-2", "name":"server-name-2", "status":"ACTIVE"}, + "other-1":{"id":"other-1", "name":"server-name-other-1", "status":"ACTIVE"}, + "other-2":{"id":"other-2", "name":"server-name-other-2", "status":"ACTIVE"} + } ALL_IDS = ['server-1', 'server-2', 'other-1', 'other-2'] EXCLUDED_NETWORK_IDS = ['server-1', 'other-.*'] EXCLUDED_SERVER_IDS = ['server-2', 'other-.*'] FILTERED_NETWORK_ID = 'server-2' FILTERED_SERVER_ID = 'server-1' + + # Example response from - https://developer.openstack.org/api-ref/compute/#list-servers-detailed + # ID and server-name values have been changed for test readability + MOCK_NOVA_SERVERS = { + "servers": [ + { + "OS-DCF:diskConfig": "AUTO", + "OS-EXT-AZ:availability_zone": "nova", + "OS-EXT-SRV-ATTR:host": "compute", + "OS-EXT-SRV-ATTR:hostname": "server-1", + "OS-EXT-SRV-ATTR:hypervisor_hostname": "fake-mini", + "OS-EXT-SRV-ATTR:instance_name": "instance-00000001", + "OS-EXT-SRV-ATTR:kernel_id": "", + "OS-EXT-SRV-ATTR:launch_index": 0, + "OS-EXT-SRV-ATTR:ramdisk_id": "", + "OS-EXT-SRV-ATTR:reservation_id": "r-iffothgx", + "OS-EXT-SRV-ATTR:root_device_name": "/dev/sda", + "OS-EXT-SRV-ATTR:user_data": "IyEvYmluL2Jhc2gKL2Jpbi9zdQplY2hvICJJIGFtIGluIHlvdSEiCg==", + "OS-EXT-STS:power_state": 1, + "OS-EXT-STS:task_state": 'null', + "OS-EXT-STS:vm_state": "active", + "OS-SRV-USG:launched_at": "2017-02-14T19:24:43.891568", + "OS-SRV-USG:terminated_at": 'null', + "accessIPv4": "1.2.3.4", + "accessIPv6": "80fe::", + "hostId": "2091634baaccdc4c5a1d57069c833e402921df696b7f970791b12ec6", + "host_status": "UP", + "id": "server-1", + "metadata": { + "My Server Name": "Apache1" + }, + "name": "new-server-test", + "status": "DELETED", + "tags": [], + "tenant_id": "6f70656e737461636b20342065766572", + "updated": "2017-02-14T19:24:43Z", + "user_id": "fake" + }, + { + "OS-DCF:diskConfig": "AUTO", + "OS-EXT-AZ:availability_zone": "nova", + "OS-EXT-SRV-ATTR:host": "compute", + "OS-EXT-SRV-ATTR:hostname": "server-2", + "OS-EXT-SRV-ATTR:hypervisor_hostname": "fake-mini", + "OS-EXT-SRV-ATTR:instance_name": "instance-00000001", + "OS-EXT-SRV-ATTR:kernel_id": "", + "OS-EXT-SRV-ATTR:launch_index": 0, + "OS-EXT-SRV-ATTR:ramdisk_id": "", + "OS-EXT-SRV-ATTR:reservation_id": "r-iffothgx", + "OS-EXT-SRV-ATTR:root_device_name": "/dev/sda", + "OS-EXT-SRV-ATTR:user_data": "IyEvYmluL2Jhc2gKL2Jpbi9zdQplY2hvICJJIGFtIGluIHlvdSEiCg==", + "OS-EXT-STS:power_state": 1, + "OS-EXT-STS:task_state": 'null', + "OS-EXT-STS:vm_state": "active", + "OS-SRV-USG:launched_at": "2017-02-14T19:24:43.891568", + "OS-SRV-USG:terminated_at": 'null', + "accessIPv4": "1.2.3.4", + "accessIPv6": "80fe::", + "hostId": "2091634baaccdc4c5a1d57069c833e402921df696b7f970791b12ec6", + "host_status": "UP", + "id": "server_newly_added", + "metadata": { + "My Server Name": "Apache1" + }, + "name": "newly_added_server", + "status": "ACTIVE", + "tags": [], + "tenant_id": "6f70656e737461636b20342065766572", + "updated": "2017-02-14T19:24:43Z", + "user_id": "fake" + } + ] + } + # .. config MOCK_CONFIG = { 'init_config': { @@ -381,24 +463,30 @@ def test_cache_utils(self): sleep(1.5) self.assertTrue(self.check._is_expired('aggregates')) - @patch('datadog_checks.openstack.OpenStackCheck.get_all_server_ids', return_value=ALL_IDS) + @patch('datadog_checks.openstack.OpenStackCheck.get_all_servers', return_value=ALL_SERVER_DETAILS) def test_server_exclusion(self, *args): """ - Exclude networks using regular expressions. + Exclude servers using regular expressions. """ - - self.check.exclude_server_id_rules = set([re.compile(rule) for rule in self.EXCLUDED_SERVER_IDS]) + openstackCheck = OpenStackCheck("test", { + 'keystone_server_url': 'http://10.0.2.15:5000', + 'ssl_verify': False, + 'exclude_server_ids': self.EXCLUDED_SERVER_IDS + }, {}, instances=self.MOCK_CONFIG) # Retrieve servers - server_ids = self.check.get_servers_managed_by_hypervisor() - + openstackCheck.server_details_by_id = copy.deepcopy(self.ALL_SERVER_DETAILS) + i_key = "test_instance" + server_ids = openstackCheck.get_servers_managed_by_hypervisor(i_key) + # Assert # .. 1 out of 4 server ids filtered self.assertEqual(len(server_ids), 1) - self.assertEqual(server_ids[0], self.FILTERED_SERVER_ID) - # cleanup - self.check.exclude_server_id_rules = set([]) + # Ensure the server IDs filtered are the ones expected + for server_id in server_ids: + assert server_id in self.FILTERED_SERVER_ID + @patch('datadog_checks.openstack.OpenStackCheck.get_all_network_ids', return_value=ALL_IDS) def test_network_exclusion(self, *args): @@ -422,3 +510,28 @@ def test_network_exclusion(self, *args): # cleanup self.check.exclude_network_id_rules = set([]) + + @patch('datadog_checks.openstack.OpenStackCheck._make_request_with_auth_fallback', return_value=MOCK_NOVA_SERVERS) + @patch('datadog_checks.openstack.OpenStackCheck.get_nova_endpoint', return_value="http://10.0.2.15:8774/v2.1/0850707581fe4d738221a72db0182876") + @patch('datadog_checks.openstack.OpenStackCheck.get_auth_token', return_value="test_auth_token") + @patch('datadog_checks.openstack.OpenStackCheck.get_project_name_from_id', return_value="tenant-1") + def test_cache_between_runs(self, *args): + """ + Ensure the cache contains the expected VMs between check runs. + """ + + openstackCheck = OpenStackCheck("test", { + 'keystone_server_url': 'http://10.0.2.15:5000', + 'ssl_verify': False, + 'exclude_server_ids': self.EXCLUDED_SERVER_IDS + }, {}, instances=self.MOCK_CONFIG) + + # Start off with a list of servers + openstackCheck.server_details_by_id = copy.deepcopy(self.ALL_SERVER_DETAILS) + i_key = "test_instance" + + # Update the cached list of servers based on what the endpoint returns + cached_servers = openstackCheck.get_all_servers(i_key) + + assert 'server-1' not in cached_servers + assert 'server_newly_added' in cached_servers From a9cf00a2a3fa845032c7657672ce2f25c6c83d58 Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Thu, 12 Apr 2018 16:55:04 -0400 Subject: [PATCH 10/19] Remove verbose logs (#1389) --- openstack/datadog_checks/openstack/openstack.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index 12b0bdd62d7f3..72bcfe1c28d12 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -920,7 +920,6 @@ def get_project_name_from_id(self, tenant_id): url = "{0}/{1}/{2}/{3}".format(self.keystone_server_url, DEFAULT_KEYSTONE_API_VERSION, "projects", tenant_id) self.log.debug("Project URL is %s", url) headers = {'X-Auth-Token': self.get_auth_token()} - self.log.debug("Headers %s", headers) try: r = self._make_request_with_auth_fallback(url, headers) return r['project']['name'] From da71de9986a4b26fb2a39fd8c841d7b955cd60ec Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Fri, 13 Apr 2018 13:04:49 -0400 Subject: [PATCH 11/19] Resolve 404 issues on projects from deleted servers --- openstack/datadog_checks/openstack/openstack.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index 72bcfe1c28d12..ededca5999a3f 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -906,10 +906,12 @@ def get_all_servers(self, i_key, filter_by_host=None): new_server['server_name'] = server.get('name') new_server['hypervisor_hostname'] = server.get('OS-EXT-SRV-ATTR:hypervisor_hostname') new_server['tenant_id'] = server.get('tenant_id') - new_server['project_name'] = self.get_project_name_from_id(new_server['tenant_id']) # Update our cached list of servers if new_server['server_id'] not in self.server_details_by_id and new_server['state'] in DIAGNOSTICABLE_STATES: + # The project may not exist if the server isn't in an active state + # Query for the project name here to avoid 404s + new_server['project_name'] = self.get_project_name_from_id(new_server['tenant_id']) self.server_details_by_id[new_server['server_id']] = new_server elif new_server['server_id'] in self.server_details_by_id and new_server['state'] in REMOVED_STATES: del self.server_details_by_id[new_server['server_id']] @@ -936,7 +938,7 @@ def _is_valid_metric(label): server_name = server_details.get('server_name') hypervisor_hostname = server_details.get('hypervisor_hostname') tenant_id = server_details.get('tenant_id') - project_name = self.get_project_name_from_id(tenant_id) + project_name = server_details.get('project_name') server_stats = {} headers = {'X-Auth-Token': self.get_auth_token()} From 646a376f3f70f43e12625937365f2f80fbaab1ab Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Mon, 7 May 2018 22:02:40 -0400 Subject: [PATCH 12/19] [Openstack] Implement caching mechanism to reduce required API calls (#1512) * Only set the changes-since once on run * Remove overall project_name tag, let individual servers do it * Add lots of debug logs * Gaurantees non active servers are removed from cache * Resilient double checking of active server * Final cleanup of code/comments --- .../datadog_checks/openstack/openstack.py | 99 ++++++++++++------- 1 file changed, 64 insertions(+), 35 deletions(-) diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index ededca5999a3f..359292875aaac 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -29,7 +29,7 @@ FALLBACK_NOVA_API_VERSION = 'v2' DEFAULT_NEUTRON_API_VERSION = 'v2.0' -DEFAULT_API_REQUEST_TIMEOUT = 5 # seconds +DEFAULT_API_REQUEST_TIMEOUT = 10 # seconds NOVA_HYPERVISOR_METRICS = [ 'current_workload', @@ -572,11 +572,13 @@ def _make_request_with_auth_fallback(self, url, headers=None, params=None): Generic request handler for OpenStack API requests Raises specialized Exceptions for commonly encountered error codes """ + self.log.debug("Request URL and Params: %s, %s", url, params) try: resp = requests.get(url, headers=headers, verify=self._ssl_verify, params=params, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=self.proxy_config) resp.raise_for_status() - except requests.exceptions.HTTPError: + except requests.HTTPError as e: + self.log.debug("Error contacting openstack endpoint: %s", e) if resp.status_code == 401: self.log.info('Need to reauthenticate before next check') @@ -584,6 +586,8 @@ def _make_request_with_auth_fallback(self, url, headers=None, params=None): self.delete_current_scope() elif resp.status_code == 409: raise InstancePowerOffFailure() + elif resp.status_code == 404: + raise e else: raise @@ -860,7 +864,7 @@ def get_stats_for_single_hypervisor(self, hyp_id, instance, host_tags=None): # Get all of the server IDs and their metadata and cache them # After the first run, we will only get servers that have changed state since the last collection run - def get_all_servers(self, i_key, filter_by_host=None): + def get_all_servers(self, i_key, collect_all_tenants, filter_by_host=None): query_params = {} if filter_by_host: query_params["host"] = filter_by_host @@ -872,6 +876,8 @@ def get_all_servers(self, i_key, filter_by_host=None): url = '{0}/servers/detail'.format(self.get_nova_endpoint()) headers = {'X-Auth-Token': self.get_auth_token()} + if collect_all_tenants: + query_params["all_tenants"] = True servers = [] try: @@ -880,20 +886,25 @@ def get_all_servers(self, i_key, filter_by_host=None): resp = self._make_request_with_auth_fallback(url, headers, params=query_params) servers.extend(resp['servers']) - # Get a list of deleted serversTimestamp used to filter the call to get the list - # Need to have admin perms for this to take affect - query_params['deleted'] = 'true' - query_params['status'] = None - resp = self._make_request_with_auth_fallback(url, headers, params=query_params) - servers.extend(resp['servers']) - query_params['deleted'] = 'false' + # Don't collect Deleted or Shut off VMs on the first run: + if i_key in self.changes_since_time: - # Get a list of shut off servers - query_params['status'] = 'SHUTOFF' - resp = self._make_request_with_auth_fallback(url, headers, params=query_params) - servers.extend(resp['servers']) + # Get a list of deleted serversTimestamp used to filter the call to get the list + # Need to have admin perms for this to take affect + query_params['deleted'] = 'true' + del query_params['status'] + resp = self._make_request_with_auth_fallback(url, headers, params=query_params) + + servers.extend(resp['servers']) + query_params['deleted'] = 'false' + + # Get a list of shut off servers + query_params['status'] = 'SHUTOFF' + resp = self._make_request_with_auth_fallback(url, headers, params=query_params) + servers.extend(resp['servers']) self.changes_since_time[i_key] = datetime.utcnow().isoformat() + except Exception as e: self.warning('Unable to get the list of all servers: {0}'.format(str(e))) raise e @@ -909,12 +920,17 @@ def get_all_servers(self, i_key, filter_by_host=None): # Update our cached list of servers if new_server['server_id'] not in self.server_details_by_id and new_server['state'] in DIAGNOSTICABLE_STATES: + self.log.debug("Adding server to cache: %s", new_server) # The project may not exist if the server isn't in an active state # Query for the project name here to avoid 404s new_server['project_name'] = self.get_project_name_from_id(new_server['tenant_id']) self.server_details_by_id[new_server['server_id']] = new_server elif new_server['server_id'] in self.server_details_by_id and new_server['state'] in REMOVED_STATES: - del self.server_details_by_id[new_server['server_id']] + self.log.debug("Removing server from cache: %s", new_server) + try: + del self.server_details_by_id[new_server['server_id']] + except KeyError as e: + self.log.debug("Server: %s has already been removed from the cache", new_server['server_id']) return self.server_details_by_id @@ -945,8 +961,16 @@ def _is_valid_metric(label): url = '{0}/servers/{1}/diagnostics'.format(self.get_nova_endpoint(), server_id) try: server_stats = self._make_request_with_auth_fallback(url, headers) - except InstancePowerOffFailure: - self.warning("Server %s is powered off and cannot be monitored" % server_id) + except InstancePowerOffFailure: # 409 response code came back fro nova + self.log.debug("Server %s is powered off and cannot be monitored", server_id) + del self.server_details_by_id[server_id] + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + self.log.debug("Server %s is not in an ACTIVE state and cannot be monitored, %s", server_id, e) + del self.server_details_by_id[server_id] + else: + self.log.debug("Received HTTP Error when reaching the nova endpoint") + raise e except Exception as e: self.warning("Unknown error when monitoring %s : %s" % (server_id, e)) raise e @@ -1075,7 +1099,6 @@ def ensure_auth_scope(self, instance): return instance_scope def check(self, instance): - # have we been backed off if not self.should_run(instance): self.log.info('Skipping run due to exponential backoff in effect') @@ -1083,7 +1106,7 @@ def check(self, instance): try: instance_scope = self.ensure_auth_scope(instance) - + split_hostname_on_first_period = instance.get('split_hostname_on_first_period', False) if not instance_scope: # Fast fail in the absence of an instance_scope return @@ -1108,6 +1131,7 @@ def check(self, instance): self._send_api_service_checks(scope) collect_all_projects = instance.get("collect_all_projects", False) + collect_all_tenants = self._if_affirmative(instance.get('collect_all_tenants', False)) self.log.debug("Running check with credentials: \n") self.log.debug("Nova Url: %s", self.get_nova_endpoint()) @@ -1116,7 +1140,7 @@ def check(self, instance): # Restrict monitoring to this (host, hypervisor, project) # and it's guest servers - hyp = self.get_local_hypervisor() + hyp = self.get_local_hypervisor(split_hostname_on_first_period) project = self.get_scoped_project(scope) @@ -1129,18 +1153,19 @@ def check(self, instance): # Restrict monitoring to non-excluded servers i_key = self._instance_key(instance) - servers = self.get_servers_managed_by_hypervisor(i_key) + servers = self.get_servers_managed_by_hypervisor(i_key, split_hostname_on_first_period, collect_all_tenants) - host_tags = self._get_tags_for_host() + host_tags = self._get_tags_for_host(split_hostname_on_first_period) - for server in self.server_details_by_id: + # Deep copy the cache so we can remove things from the Original during the iteration + server_cache_copy = copy.deepcopy(self.server_details_by_id) + + for server in server_cache_copy: server_tags = copy.copy(instance.get('server_tags', [])) server_tags.append("nova_managed_server") if scope.tenant_id: server_tags.append("tenant_id:%s" % scope.tenant_id) - if project and 'name' in project: - server_tags.append('project_name:{0}'.format(project['name'])) self.external_host_tags[server] = host_tags self.get_stats_for_single_server(servers[server], tags=server_tags) @@ -1148,7 +1173,7 @@ def check(self, instance): if hyp: self.get_stats_for_single_hypervisor(hyp, instance, host_tags=host_tags) else: - self.warning("Couldn't get hypervisor to monitor for host: %s" % self.get_my_hostname()) + self.warning("Couldn't get hypervisor to monitor for host: %s" % self.get_my_hostname(split_hostname_on_first_period)) # For now, monitor all networks self.get_network_stats() @@ -1192,12 +1217,12 @@ def check(self, instance): self.reset_backoff(instance) # Local Info accessors - def get_local_hypervisor(self): + def get_local_hypervisor(self, split_hostname_on_first_period): """ Returns the hypervisor running on this host, and assumes a 1-1 between host and hypervisor """ # Look up hypervisors available filtered by my hostname - host = self.get_my_hostname() + host = self.get_my_hostname(split_hostname_on_first_period) hyp = self.get_all_hypervisor_ids(filter_by_host=host) if hyp: return hyp[0] @@ -1259,14 +1284,19 @@ def get_scoped_project(self, project_auth_scope): return None - def get_my_hostname(self): + def get_my_hostname(self, split_hostname_on_first_period): """ Returns a best guess for the hostname registered with OpenStack for this host """ - return self.init_config.get("os_host") or self.hostname + + hostname = self.init_config.get("os_host") or self.hostname + if split_hostname_on_first_period: + hostname = hostname.split('.')[0] + + return hostname - def get_servers_managed_by_hypervisor(self, i_key): - servers = self.get_all_servers(i_key, filter_by_host=self.get_my_hostname()) + def get_servers_managed_by_hypervisor(self, i_key, split_hostname_on_first_period, collect_all_tenants): + servers = self.get_all_servers(i_key, collect_all_tenants, filter_by_host=self.get_my_hostname(split_hostname_on_first_period)) if self.exclude_server_id_rules: # Filter out excluded servers for exclude_id_rule in self.exclude_server_id_rules: @@ -1276,9 +1306,8 @@ def get_servers_managed_by_hypervisor(self, i_key): return self.server_details_by_id - def _get_tags_for_host(self): - hostname = self.get_my_hostname() - + def _get_tags_for_host(self, split_hostname_on_first_period): + hostname = self.get_my_hostname(split_hostname_on_first_period) tags = [] if hostname in self._get_and_set_aggregate_list(): tags.append('aggregate:{0}'.format(self._aggregate_list[hostname]['aggregate'])) From 5445e68601ea9a51cc60ce9cc9a91507915901cd Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Tue, 8 May 2018 15:27:40 -0400 Subject: [PATCH 13/19] resolve merge issues --- openstack/conf.yaml.example | 4 ++++ openstack/datadog_checks/openstack/openstack.py | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/openstack/conf.yaml.example b/openstack/conf.yaml.example index 8876bbc954f1b..5cb3f7fb7815b 100644 --- a/openstack/conf.yaml.example +++ b/openstack/conf.yaml.example @@ -80,6 +80,10 @@ instances: # Setting append_tenant_id to true manually adds this suffix for downstream requests # append_tenant_id: false + # Set (optional) custom tags for each metric + # tags: + # - optional:tag1 + # If you need additional tags to submit with your server metrics. # Please note that server metrics override the host tag and thus do not get # the agent-level tags you may have set. diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index 364e4d1432bf4..e5e55351569c5 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -1174,7 +1174,7 @@ def check(self, instance): server_cache_copy = copy.deepcopy(self.server_details_by_id) for server in server_cache_copy: - server_tags = copy.copy(instance.get('server_tags', [])) + server_tags = custom_tags server_tags.append("nova_managed_server") if scope.tenant_id: @@ -1184,12 +1184,12 @@ def check(self, instance): self.get_stats_for_single_server(servers[server], tags=server_tags) if hyp: - self.get_stats_for_single_hypervisor(hyp, instance, host_tags=host_tags, custom_tags=custom_tags) + self.get_stats_for_single_hypervisor(hyp, instance, host_tags=host_tags, custom_tags=server_tags) else: self.warning("Couldn't get hypervisor to monitor for host: %s" % self.get_my_hostname(split_hostname_on_first_period)) self.external_host_tags[sid] = host_tags - self.get_stats_for_single_server(sid, tags=server_tags + custom_tags) + self.get_stats_for_single_server(sid, tags=custom_tags) if projects: # Ensure projects list and scoped project exists From 4d20d698c96a32f4dc90bf16b564ad163ff225fc Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Tue, 8 May 2018 18:42:12 -0400 Subject: [PATCH 14/19] Resolves some issues with the previous merge conflicts --- .../datadog_checks/openstack/openstack.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index e5e55351569c5..a85c18fc0c29d 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -809,7 +809,7 @@ def get_uptime_for_single_hypervisor(self, hyp_id): uptime = resp['hypervisor']['uptime'] return self._parse_uptime_string(uptime) - def get_stats_for_single_hypervisor(self, hyp_id, instance host_tags=None, custom_tags=None): + def get_stats_for_single_hypervisor(self, hyp_id, instance, host_tags=None, custom_tags=None): url = '{0}/os-hypervisors/{1}'.format(self.get_nova_endpoint(), hyp_id) headers = {'X-Auth-Token': self.get_auth_token()} resp = self._make_request_with_auth_fallback(url, headers) @@ -998,6 +998,8 @@ def _is_valid_metric(label): if tags is None: tags = [] + server_tags = copy.deepcopy(tags) + project_name = project.get('name') self.log.debug("Collecting metrics for project. name: {0} id: {1}".format(project_name, project['id'])) @@ -1006,15 +1008,15 @@ def _is_valid_metric(label): headers = {'X-Auth-Token': self.get_auth_token()} server_stats = self._make_request_with_auth_fallback(url, headers, params={"tenant_id": project['id']}) - tags.append('tenant_id:{0}'.format(project['id'])) + server_tags.append('tenant_id:{0}'.format(project['id'])) if project_name: - tags.append('project_name:{0}'.format(project['name'])) + server_tags.append('project_name:{0}'.format(project['name'])) for st in server_stats['limits']['absolute']: if _is_valid_metric(st): metric_key = PROJECT_METRICS[st] - self.gauge("openstack.nova.limits.{0}".format(metric_key), server_stats['limits']['absolute'][st], tags=tags) + self.gauge("openstack.nova.limits.{0}".format(metric_key), server_stats['limits']['absolute'][st], tags=server_tags) def get_stats_for_all_projects(self, projects, tags=None): if tags is None: @@ -1144,7 +1146,7 @@ def check(self, instance): self._send_api_service_checks(scope, custom_tags) collect_all_projects = instance.get("collect_all_projects", False) - collect_all_tenants = self._if_affirmative(instance.get('collect_all_tenants', False)) + collect_all_tenants = instance.get('collect_all_tenants', False) self.log.debug("Running check with credentials: \n") self.log.debug("Nova Url: %s", self.get_nova_endpoint()) @@ -1174,7 +1176,7 @@ def check(self, instance): server_cache_copy = copy.deepcopy(self.server_details_by_id) for server in server_cache_copy: - server_tags = custom_tags + server_tags = copy.deepcopy(custom_tags) server_tags.append("nova_managed_server") if scope.tenant_id: @@ -1184,13 +1186,10 @@ def check(self, instance): self.get_stats_for_single_server(servers[server], tags=server_tags) if hyp: - self.get_stats_for_single_hypervisor(hyp, instance, host_tags=host_tags, custom_tags=server_tags) + self.get_stats_for_single_hypervisor(hyp, instance, host_tags=host_tags, custom_tags=custom_tags) else: self.warning("Couldn't get hypervisor to monitor for host: %s" % self.get_my_hostname(split_hostname_on_first_period)) - - self.external_host_tags[sid] = host_tags - self.get_stats_for_single_server(sid, tags=custom_tags) - + if projects: # Ensure projects list and scoped project exists self.get_stats_for_all_projects(projects, custom_tags) @@ -1198,11 +1197,8 @@ def check(self, instance): # For now, monitor all networks self.get_network_stats(custom_tags) - # For now, monitor all networks - self.get_network_stats() - - if set_external_tags is not None: - set_external_tags(self.get_external_host_tags()) + if set_external_tags is not None: + set_external_tags(self.get_external_host_tags()) if projects: # Ensure projects list and scoped project exists From dcda0ebb0bb926984407d59853e979be67251495 Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Tue, 8 May 2018 18:53:49 -0400 Subject: [PATCH 15/19] Fix tests --- openstack/test/test_openstack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openstack/test/test_openstack.py b/openstack/test/test_openstack.py index f065cfeb8c538..1c7c21db07b28 100644 --- a/openstack/test/test_openstack.py +++ b/openstack/test/test_openstack.py @@ -483,7 +483,7 @@ def test_server_exclusion(self, *args): # Retrieve servers openstackCheck.server_details_by_id = copy.deepcopy(self.ALL_SERVER_DETAILS) i_key = "test_instance" - server_ids = openstackCheck.get_servers_managed_by_hypervisor(i_key) + server_ids = openstackCheck.get_servers_managed_by_hypervisor(i_key, False, False) # Assert # .. 1 out of 4 server ids filtered From ec1f99c36ad2363cb9aac28a8f53338b1e86ed36 Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Tue, 8 May 2018 22:31:43 -0400 Subject: [PATCH 16/19] Resolve assertServiceCheck issues --- openstack/test/test_openstack.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/openstack/test/test_openstack.py b/openstack/test/test_openstack.py index 1c7c21db07b28..6213fd8528f8d 100644 --- a/openstack/test/test_openstack.py +++ b/openstack/test/test_openstack.py @@ -441,15 +441,18 @@ def test_ensure_auth_scope(self): # Sort the tags list for sc in self.service_checks: sc["tags"].sort() - tags = ['keystone_server:http://10.0.2.15:5000', 'optional:tag1'] - tags.sort() - - # Expect OK, since we've mocked an API response - self.assertServiceCheck(self.check.IDENTITY_API_SC, status=AgentCheck.OK, count=1, tags=tags) - - # Expect CRITICAL since URLs are non-existent - self.assertServiceCheck(self.check.COMPUTE_API_SC, status=AgentCheck.CRITICAL, count=1, tags=tags) - self.assertServiceCheck(self.check.NETWORK_API_SC, status=AgentCheck.CRITICAL, count=1, tags=tags) + tags = ['keystone_server:http://10.0.2.15:5000', 'optional:tag1'] + tags.sort() + + # Can only use assertServiceCheck if we ran the whole check with run_check + # We mock this API response, so return OK + if sc.get('check') == self.check.IDENTITY_API_SC: + self.assertEqual(sc.get('status'), AgentCheck.OK) + # URLs are nonexistant, so return CRITICAL + elif sc.get('check') == self.check.COMPUTE_API_SC: + self.assertEqual(sc.get('status'), AgentCheck.CRITICAL) + elif sc.get('check') == self.check.NETWORK_API_SC: + self.assertEqual(sc.get('status'), AgentCheck.CRITICAL) self.check._current_scope = scope @@ -537,7 +540,7 @@ def test_cache_between_runs(self, *args): i_key = "test_instance" # Update the cached list of servers based on what the endpoint returns - cached_servers = openstackCheck.get_all_servers(i_key) + cached_servers = openstackCheck.get_all_servers(i_key, False) assert 'server-1' not in cached_servers assert 'server_newly_added' in cached_servers From bc4ac2dd6a1cc9cd4583984b30041f192d3ba16b Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Tue, 8 May 2018 22:48:38 -0400 Subject: [PATCH 17/19] Bump version and address review --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- openstack/datadog_checks/openstack/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 11b8a79a9f3eb..baf86dbb0cadc 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,7 +16,7 @@ is available in our contribution guidelines. ### Versioning -- [ ] Bumped the check version in `manifest.json` (or update manifest version to 1.0.0 and drop check version) +- [ ] Bumped the check version in `manifest.json` - [ ] Bumped the check version in `datadog_checks/{integration}/__init__.py` - [ ] Updated `CHANGELOG.md`. Please use `Unreleased` as the date in the title for the new section. diff --git a/openstack/datadog_checks/openstack/__init__.py b/openstack/datadog_checks/openstack/__init__.py index 116e8e09f3351..6e7722a9a4590 100644 --- a/openstack/datadog_checks/openstack/__init__.py +++ b/openstack/datadog_checks/openstack/__init__.py @@ -2,6 +2,6 @@ OpenStackCheck = openstack.OpenStackCheck -__version__ = "1.2.0" +__version__ = "1.3.0" __all__ = ['openstack'] From c8ddd6e51fd9a359ed0a41fbc67d089ce450c4b2 Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Wed, 9 May 2018 16:27:41 -0400 Subject: [PATCH 18/19] Address review --- .../datadog_checks/openstack/openstack.py | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/openstack/datadog_checks/openstack/openstack.py b/openstack/datadog_checks/openstack/openstack.py index a85c18fc0c29d..04b6f644b0255 100644 --- a/openstack/datadog_checks/openstack/openstack.py +++ b/openstack/datadog_checks/openstack/openstack.py @@ -172,12 +172,14 @@ def get_user_identity(cls, instance_config): } """ user = instance_config.get('user') - if not user\ - or not user.get('name')\ - or not user.get('password')\ - or not user.get("domain")\ - or not user.get("domain").get("id"): + if not ( + user and + user.get('name') and + user.get('password') and + user.get("domain") and + user.get("domain").get("id") + ): raise IncompleteIdentity() identity = { @@ -204,7 +206,7 @@ def get_auth_scope(cls, instance_config): raise IncompleteAuthScope() if auth_scope['project'].get('name'): - # We need to add a domain scope to avoid name clashes. Search for one. If not raise IncompleteConfig + # We need to add a domain scope to avoid name clashes. Search for one. If not raise IncompleteAuthScope if not auth_scope['project'].get('domain', {}).get('id'): raise IncompleteAuthScope() else: @@ -239,10 +241,11 @@ def get_auth_response_from_config(cls, init_config, instance_config, proxy_confi try: identity['password']['user']['domain']['name'] = identity['password']['user']['domain'].pop('id') - if auth_scope and 'domain' in auth_scope['project']: - auth_scope['project']['domain']['name'] = auth_scope['project']['domain'].pop('id') - elif auth_scope: - auth_scope['project']['name'] = auth_scope['project'].pop('id') + if auth_scope: + if 'domain' in auth_scope['project']: + auth_scope['project']['domain']['name'] = auth_scope['project']['domain'].pop('id') + else: + auth_scope['project']['name'] = auth_scope['project'].pop('id') auth_resp = cls.request_auth_token(auth_scope, identity, keystone_server_url, ssl_verify, proxy_config) except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: exception_msg = "{msg} and also failed keystone auth with identity:{user} domain:{domain} scope:{scope} @{url}: {ex}".format( @@ -268,7 +271,7 @@ def from_config(cls, init_config, instance_config, proxy_config=None): if not keystone_server_url: raise IncompleteConfig() - ssl_verify = init_config.get("ssl_verify", False) + ssl_verify = init_config.get("ssl_verify", True) nova_api_version = init_config.get("nova_api_version", DEFAULT_NOVA_API_VERSION) _, auth_token, _ = cls.get_auth_response_from_config(init_config, instance_config, proxy_config) @@ -290,8 +293,10 @@ def from_config(cls, init_config, instance_config, proxy_config=None): ssl_verify, proxy_config) project_auth_token = token_resp.headers.get('X-Subject-Token') except (requests.exceptions.HTTPError, requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: - # TODO: Raise something - pass + exception_msg = "unable to retrieve project from keystone auth with identity: @{url}: {ex}".format( + url=keystone_server_url, + ex=e) + raise KeystoneUnreachable(exception_msg) try: service_catalog = KeystoneCatalog.from_auth_response( @@ -577,7 +582,7 @@ def _make_request_with_auth_fallback(self, url, headers=None, params=None): resp = requests.get(url, headers=headers, verify=self._ssl_verify, params=params, timeout=DEFAULT_API_REQUEST_TIMEOUT, proxies=self.proxy_config) resp.raise_for_status() - except requests.HTTPError as e: + except requests.exceptions.HTTPError as e: self.log.debug("Error contacting openstack endpoint: %s", e) if resp.status_code == 401: self.log.info('Need to reauthenticate before next check') From eb0de6afbf7b700082e6ae26628225c3a610d526 Mon Sep 17 00:00:00 2001 From: Nicholas Muesch Date: Tue, 22 May 2018 16:22:07 -0400 Subject: [PATCH 19/19] Update CHANGELOG.md --- openstack/CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/openstack/CHANGELOG.md b/openstack/CHANGELOG.md index 6bd6ad69c5bf5..eed2d960a31c0 100644 --- a/openstack/CHANGELOG.md +++ b/openstack/CHANGELOG.md @@ -28,4 +28,3 @@ [#1119]: https://github.com/DataDog/integrations-core/issues/1119 [#1123]: https://github.com/DataDog/integrations-core/issues/1123 [#1126]: https://github.com/DataDog/integrations-core/issues/1126 -[#1276]: https://github.com/DataDog/integrations-core/issues/1276