Skip to content

Commit

Permalink
[NDM] Add NDM metadata support for Cisco ACI (#17735)
Browse files Browse the repository at this point in the history
* Add support for sending device metadata

* Add unit test for device metadata, update fixture

* Add license header, changelogs

* Lint

* First pass at submitting interface metadata, cleanup for test fixtures

* Fix for py2.7 support

* Try to fix imports

* Deal with pydantic stuff py2.7

* Allow namespace for Cisco ACI devices, static var for vendor

* Update device metadata to use the correct fieldname, add pydantic model for EvP intake

* Sync the conf.yaml example

* Add device type and integration to device metadata, fix ID field name

* Update interface statuses

* Deal with device status (use fabricSt)

* Update get_eth_list to get operStatus, update all tests and fixtures

* Amend docs for namespace

* Batch events sent to EvP

* Add interface status metric

* Only add to list for >py3.0

* Update default value for vendor, yield for batch events, use device type other

* Add source field to device metadata tags

* Add enums for interface status

* Use correct track type for NDM metadata

* Amend device id tag, collect timestamp ms -> s

* Add interface integration field

* More generic method to send EvP event

* Add docstring for the EvP method

* Update interface tagging, remove system_ip tag

* Fix linting for submit event platform event

* Use interface ID tags
  • Loading branch information
zoedt authored Jul 10, 2024
1 parent 5d6a13e commit b915f56
Show file tree
Hide file tree
Showing 25 changed files with 28,097 additions and 36 deletions.
7 changes: 7 additions & 0 deletions cisco_aci/assets/configuration/spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@ files:
value:
type: boolean
example: False
- name: namespace
description: |
Namespace for differentiating between devices that share the same IP.
If not specified, the namespace will be 'default'.
value:
type: string
example: default
- template: instances/http
overrides:
username.display_priority: 9
Expand Down
1 change: 1 addition & 0 deletions cisco_aci/changelog.d/17735.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[NDM] Add NDM metadata support for Cisco ACI
2 changes: 1 addition & 1 deletion cisco_aci/datadog_checks/cisco_aci/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
# Licensed under a 3-clause BSD style license (see LICENSE)

from .__about__ import __version__
from .cisco import CiscoACICheck
from datadog_checks.cisco_aci.cisco import CiscoACICheck

__all__ = ['__version__', 'CiscoACICheck']
4 changes: 2 additions & 2 deletions cisco_aci/datadog_checks/cisco_aci/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,8 @@ def get_spine_proc_metrics(self, pod, node):
return self._parse_response(response)

def get_eth_list(self, pod, node):
query = 'query-target=subtree&target-subtree-class=l1PhysIf'
path = '/api/mo/topology/pod-{}/node-{}/sys.json?{}'.format(pod, node, query)
query = 'rsp-subtree=children&rsp-subtree-class=ethpmPhysIf'
path = '/api/node/class/topology/pod-{}/node-{}/l1PhysIf.json?{}'.format(pod, node, query)
response = self.make_request(path)
return self._parse_response(response)

Expand Down
17 changes: 8 additions & 9 deletions cisco_aci/datadog_checks/cisco_aci/cisco.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@
from datadog_checks.base import AgentCheck, ConfigurationError
from datadog_checks.base.config import _is_affirmative
from datadog_checks.base.utils.containers import hash_mutable

from . import aci_metrics
from .api import Api
from .capacity import Capacity
from .fabric import Fabric
from .tags import CiscoTags
from .tenant import Tenant
from datadog_checks.cisco_aci.aci_metrics import make_tenant_metrics
from datadog_checks.cisco_aci.api import Api
from datadog_checks.cisco_aci.capacity import Capacity
from datadog_checks.cisco_aci.fabric import Fabric
from datadog_checks.cisco_aci.tags import CiscoTags
from datadog_checks.cisco_aci.tenant import Tenant

SOURCE_TYPE = 'cisco_aci'

Expand All @@ -25,7 +24,7 @@ class CiscoACICheck(AgentCheck):

def __init__(self, name, init_config, instances):
super(CiscoACICheck, self).__init__(name, init_config, instances)
self.tenant_metrics = aci_metrics.make_tenant_metrics()
self.tenant_metrics = make_tenant_metrics()
self.last_events_ts = {}
self.external_host_tags = {}
self._api_cache = {}
Expand Down Expand Up @@ -109,7 +108,7 @@ def check(self, _):
raise

try:
fabric = Fabric(self, api, self.instance)
fabric = Fabric(self, api, self.instance, self.instance.get('namespace', 'default'))
fabric.collect()
except Exception as e:
self.log.error('fabric collection failed: %s', e)
Expand Down
4 changes: 4 additions & 0 deletions cisco_aci/datadog_checks/cisco_aci/config_models/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ def instance_min_collection_interval():
return 15


def instance_namespace():
return 'default'


def instance_persist_connections():
return False

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class InstanceConfig(BaseModel):
log_requests: Optional[bool] = None
metric_patterns: Optional[MetricPatterns] = None
min_collection_interval: Optional[float] = None
namespace: Optional[str] = None
ntlm_domain: Optional[str] = None
password: Optional[str] = None
persist_connections: Optional[bool] = None
Expand Down
6 changes: 6 additions & 0 deletions cisco_aci/datadog_checks/cisco_aci/data/conf.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,12 @@ instances:
#
# appcenter: false

## @param namespace - string - optional - default: default
## Namespace for differentiating between devices that share the same IP.
## If not specified, the namespace will be 'default'.
#
# namespace: default

## @param proxy - mapping - optional
## This overrides the `proxy` setting in `init_config`.
##
Expand Down
111 changes: 106 additions & 5 deletions cisco_aci/datadog_checks/cisco_aci/fabric.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,38 @@
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)

from six import iteritems
from six import PY3, iteritems

from datadog_checks.base.utils.serialization import json

if PY3:
import time

from datadog_checks.cisco_aci.models import DeviceMetadata, InterfaceMetadata, NetworkDevicesMetadata, Node, PhysIf

else:
DeviceMetadata = None
Eth = None
InterfaceMetadata = None
Node = None

from . import aci_metrics, exceptions, helpers

VENDOR_CISCO = 'cisco'
PAYLOAD_METADATA_BATCH_SIZE = 100


class Fabric:
"""
Collect fabric metrics from the APIC
"""

def __init__(self, check, api, instance):
def __init__(self, check, api, instance, namespace):
self.check = check
self.api = api
self.instance = instance
self.check_tags = check.check_tags
self.namespace = namespace

# grab some functions from the check
self.gauge = check.gauge
Expand All @@ -25,13 +42,19 @@ def __init__(self, check, api, instance):
self.submit_metrics = check.submit_metrics
self.tagger = self.check.tagger
self.external_host_tags = self.check.external_host_tags
self.event_platform_event = check.event_platform_event

def collect(self):
fabric_pods = self.api.get_fabric_pods()
fabric_nodes = self.api.get_fabric_nodes()
self.log.info("%s pods and %s nodes computed", len(fabric_nodes), len(fabric_pods))
pods = self.submit_pod_health(fabric_pods)
self.submit_nodes_health(fabric_nodes, pods)
devices, interfaces = self.submit_nodes_health_and_metadata(fabric_nodes, pods)
if PY3:
collect_timestamp = int(time.time())
batches = self.batch_payloads(devices, interfaces, collect_timestamp)
for batch in batches:
self.event_platform_event(json.dumps(batch.model_dump(exclude_none=True)), "network-devices-metadata")

def submit_pod_health(self, pods):
pods_dict = {}
Expand All @@ -53,7 +76,9 @@ def submit_pod_health(self, pods):

return pods_dict

def submit_nodes_health(self, nodes, pods):
def submit_nodes_health_and_metadata(self, nodes, pods):
device_metadata = []
interface_metadata = []
for n in nodes:
hostname = helpers.get_fabric_hostname(n)

Expand All @@ -70,17 +95,22 @@ def submit_nodes_health(self, nodes, pods):
continue
self.log.info("processing node %s on pod %s", node_id, pod_id)
try:
if PY3:
device_metadata.append(self.submit_node_metadata(node_attrs, tags))
self.submit_process_metric(n, tags + self.check_tags + user_tags, hostname=hostname)
except (exceptions.APIConnectionException, exceptions.APIParsingException):
pass
if node_attrs.get('role') != "controller":
try:
stats = self.api.get_node_stats(pod_id, node_id)
self.submit_fabric_metric(stats, tags, 'fabricNode', hostname=hostname)
self.process_eth(node_attrs)
eth_metadata = self.process_eth(node_attrs)
if PY3:
interface_metadata.extend(eth_metadata)
except (exceptions.APIConnectionException, exceptions.APIParsingException):
pass
self.log.info("finished processing node %s", node_id)
return device_metadata, interface_metadata

def process_eth(self, node):
self.log.info("processing ethernet ports for %s", node.get('id'))
Expand All @@ -90,16 +120,20 @@ def process_eth(self, node):
eth_list = self.api.get_eth_list(pod_id, node['id'])
except (exceptions.APIConnectionException, exceptions.APIParsingException):
pass
interfaces = []
for e in eth_list:
eth_attrs = helpers.get_attributes(e)
eth_id = eth_attrs['id']
tags = self.tagger.get_fabric_tags(e, 'l1PhysIf')
if PY3:
interfaces.append(self.create_interface_metadata(e, node['address'], tags, hostname))
try:
stats = self.api.get_eth_stats(pod_id, node['id'], eth_id)
self.submit_fabric_metric(stats, tags, 'l1PhysIf', hostname=hostname)
except (exceptions.APIConnectionException, exceptions.APIParsingException):
pass
self.log.info("finished processing ethernet ports for %s", node['id'])
return interfaces

def submit_fabric_metric(self, stats, tags, obj_type, hostname=None):
for s in stats:
Expand Down Expand Up @@ -209,3 +243,70 @@ def get_fabric_type(self, obj_type):
return 'pod'
if obj_type == 'l1PhysIf':
return 'port'

def batch_payloads(self, devices, interfaces, collect_ts):
for device in devices:
yield NetworkDevicesMetadata(namespace=self.namespace, devices=[device], collect_timestamp=collect_ts)

payloads = []
for interface in interfaces:
if len(payloads) == PAYLOAD_METADATA_BATCH_SIZE:
yield NetworkDevicesMetadata(
namespace=self.namespace, interfaces=payloads, collect_timestamp=collect_ts
)
payloads = []
payloads.append(interface)
if payloads:
yield NetworkDevicesMetadata(namespace=self.namespace, interfaces=payloads, collect_timestamp=collect_ts)

def submit_node_metadata(self, node_attrs, tags):
node = Node(attributes=node_attrs)
id_tags = ['namespace:{}'.format(self.namespace)]
device_tags = [
'device_vendor:{}'.format(VENDOR_CISCO),
'device_namespace:{}'.format(self.namespace),
'device_hostname:{}'.format(node.attributes.dn),
'hostname:{}'.format(node.attributes.dn),
'device_ip:{}'.format(node.attributes.address),
'device_id:{}:{}'.format(self.namespace, node.attributes.address),
"source:cisco-aci",
]
device = DeviceMetadata(
id='{}:{}'.format(self.namespace, node.attributes.address),
id_tags=id_tags,
tags=device_tags + tags,
name=node.attributes.dn,
ip_address=node.attributes.address,
model=node.attributes.model,
fabric_st=node.attributes.fabric_st,
vendor=VENDOR_CISCO,
version=node.attributes.version,
serial_number=node.attributes.serial,
device_type=node.attributes.device_type,
)
return device.model_dump(exclude_none=True)

def create_interface_metadata(self, phys_if, address, tags, hostname):
eth = PhysIf(**phys_if.get('l1PhysIf', {}))
interface = InterfaceMetadata(
device_id='{}:{}'.format(self.namespace, address),
id_tags=['interface:{}'.format(eth.attributes.name)],
index=eth.attributes.id,
name=eth.attributes.name,
description=eth.attributes.desc,
mac_address=eth.attributes.router_mac,
admin_status=eth.attributes.admin_st,
)
if eth.ethpm_phys_if:
interface.oper_status = eth.ethpm_phys_if.attributes.oper_st
if interface.status:
new_tags = tags.copy()
new_tags.extend(
[
"device_ip:{}".format(address),
"device_namespace:{}".format(self.namespace),
"interface.status:{}".format(interface.status),
]
)
self.gauge('cisco_aci.fabric.node.interface.status', 1, tags=new_tags, hostname=hostname)
return interface.model_dump(exclude_none=True)
Loading

0 comments on commit b915f56

Please sign in to comment.