Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix IPv4/IPv6 address resolution #12526

Merged
merged 24 commits into from
Jul 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 19 additions & 37 deletions tcp_check/datadog_checks/tcp_check/tcp_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,15 @@
# All rights reserved
# Licensed under Simplified BSD License (see LICENSE)
import socket
from collections import namedtuple
from contextlib import closing
from typing import Any, List, Optional

from datadog_checks.base import AgentCheck, ConfigurationError
from datadog_checks.base.errors import CheckException
from datadog_checks.base.utils.time import get_precise_time

# For Linux and python 3 on Windows
if "inet_pton" in socket.__dict__:

def is_ipv6(addr):
try:
socket.inet_pton(socket.AF_INET6, addr)
return True
except socket.error:
return False


# Only for python 2 on Windows
else:

import re

def is_ipv6(addr):
if re.match(r'^[0-9a-f:]+$', addr):
return True
else:
return False
AddrTuple = namedtuple('AddrTuple', ['address', 'socket_type'])


class TCPCheck(AgentCheck):
Expand All @@ -46,7 +28,6 @@ def __init__(self, name, init_config, instances):
self.timeout = float(instance.get('timeout', 10))
self.collect_response_time = instance.get('collect_response_time', False)
self.host = instance.get('host', None)
self.socket_type = None
self._addrs = None
self.ip_cache_last_ts = 0
self.ip_cache_duration = self.DEFAULT_IP_CACHE_DURATION
Expand Down Expand Up @@ -81,16 +62,9 @@ def __init__(self, name, init_config, instances):
'instance:{}'.format(self.instance_name),
]

# IPv6 address format: 2001:db8:85a3:8d3:1319:8a2e:370:7348
if is_ipv6(self.host): # It may then be a IP V6 address, we check that
# It's a correct IP V6 address
self.socket_type = socket.AF_INET6
else:
self.socket_type = socket.AF_INET
# IP will be resolved at check time

@property
def addrs(self):
# type: () -> List[AddrTuple]
if self._addrs is None or self._addrs == []:
try:
self.resolve_ips()
Expand All @@ -101,30 +75,37 @@ def addrs(self):
return self._addrs

def resolve_ips(self):
# type: () -> None
self._addrs = [
sockaddr[0] for (_, _, _, _, sockaddr) in socket.getaddrinfo(self.host, self.port, 0, 0, socket.IPPROTO_TCP)
AddrTuple(sockaddr[0], socket_type)
for (socket_type, _, _, _, sockaddr) in socket.getaddrinfo(self.host, self.port, 0, 0, socket.IPPROTO_TCP)
]
if not self.multiple_ips:
self._addrs = self._addrs[:1]

if self._addrs == []:
raise Exception("No IPs attached to host")
self.log.debug("%s resolved to %s", self.host, self._addrs)
self.log.debug(
"%s resolved to %s. Socket type: %s", self.host, self._addrs[0].address, self._addrs[0].socket_type
)

def should_resolve_ips(self):
# type: () -> bool
if self.ip_cache_duration is None:
return False
return get_precise_time() - self.ip_cache_last_ts > self.ip_cache_duration

def connect(self, addr):
with closing(socket.socket(self.socket_type)) as sock:
def connect(self, addr, socket_type):
# type: (str, socket.AddressFamily) -> float
with closing(socket.socket(socket_type)) as sock:
sock.settimeout(self.timeout)
start = get_precise_time()
sock.connect((addr, self.port))
response_time = get_precise_time() - start
return response_time

def check(self, _):
# type: (Any) -> None
start = get_precise_time() # Avoid initialisation warning

if self.should_resolve_ips():
Expand All @@ -133,9 +114,9 @@ def check(self, _):

self.log.debug("Connecting to %s on port %d", self.host, self.port)

for addr in self.addrs:
for addr, socket_type in self.addrs:
try:
response_time = self.connect(addr)
response_time = self.connect(addr, socket_type)
self.log.debug("%s:%d is UP (%s)", self.host, self.port, addr)
self.report_as_service_check(AgentCheck.OK, addr, 'UP')
if self.collect_response_time:
Expand Down Expand Up @@ -176,11 +157,12 @@ def check(self, _):
AgentCheck.CRITICAL, addr, "{}. Connection failed after {} ms".format(str(e), length)
)

if self.socket_type == socket.AF_INET:
if socket_type == socket.AF_INET:
self.log.debug("Will attempt to re-resolve IP for %s:%d on next run", self.host, self.port)
self._addrs = None

def report_as_service_check(self, status, addr, msg=None):
# type: (AgentCheck.service_check, str, Optional[str]) -> None
if status is AgentCheck.OK:
msg = None
extra_tags = ['address:{}'.format(addr)]
Expand Down
37 changes: 36 additions & 1 deletion tcp_check/tests/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# (C) Datadog, Inc. 2019-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
import socket

from datadog_checks.dev.docker import using_windows_containers
from datadog_checks.tcp_check import TCPCheck

Expand All @@ -24,11 +26,44 @@

E2E_METADATA = {'docker_platform': 'windows' if using_windows_containers() else 'linux'}

DUAL_STACK_GETADDRINFO_LOCALHOST_IPV6 = [
(socket.AF_INET6, socket.SOCK_STREAM, 6, '', ('::1', 80, 0, 0)),
(socket.AF_INET, socket.SOCK_STREAM, 6, '', ('127.0.0.1', 80)),
]

DUAL_STACK_GETADDRINFO_LOCALHOST_IPV4 = [
(socket.AF_INET, socket.SOCK_STREAM, 6, '', ('127.0.0.1', 80)),
(socket.AF_INET6, socket.SOCK_STREAM, 6, '', ('::1', 80, 0, 0)),
]

SINGLE_STACK_GETADDRINFO_LOCALHOST_IPV4 = [
(socket.AF_INET, socket.SOCK_STREAM, 6, '', ('127.0.0.1', 80)),
(socket.AF_INET, socket.SOCK_STREAM, 6, '', ('ip2', 80)),
]

DUAL_STACK_GETADDRINFO_IPV4 = [
(socket.AF_INET, socket.SOCK_STREAM, 6, '', ('ip1', 80)),
(socket.AF_INET6, socket.SOCK_STREAM, 6, '', ('ip2', 80, 0, 0)),
(socket.AF_INET6, socket.SOCK_STREAM, 6, '', ('ip3', 80, 0, 0)),
]

DUAL_STACK_GETADDRINFO_IPV6 = [
(socket.AF_INET6, socket.SOCK_STREAM, 6, '', ('ip1', 80, 0, 0)),
(socket.AF_INET, socket.SOCK_STREAM, 6, '', ('ip2', 80)),
(socket.AF_INET6, socket.SOCK_STREAM, 6, '', ('ip3', 80, 0, 0)),
]

SINGLE_STACK_GETADDRINFO_IPV4 = [
(socket.AF_INET, socket.SOCK_STREAM, 6, '', ('ip1', 80)),
(socket.AF_INET, socket.SOCK_STREAM, 6, '', ('ip2', 80)),
(socket.AF_INET, socket.SOCK_STREAM, 6, '', ('ip3', 80)),
]


def _test_check(aggregator, addrs):
common_tags = ['foo:bar', 'target_host:datadoghq.com', 'port:80', 'instance:UpService']
for addr in addrs:
tags = common_tags + ['address:{}'.format(addr)]
tags = common_tags + ['address:{}'.format(addr[0])]
aggregator.assert_metric('network.tcp.can_connect', value=1, tags=tags)
aggregator.assert_service_check('tcp.can_connect', status=TCPCheck.OK, tags=tags)
aggregator.assert_all_metrics_covered()
Expand Down
Loading