diff --git a/ydb/docs/en/core/yql/reference/yql-core/udf/list/ip.md b/ydb/docs/en/core/yql/reference/yql-core/udf/list/ip.md index ab11e7cd3a73..16f2a34a3228 100644 --- a/ydb/docs/en/core/yql/reference/yql-core/udf/list/ip.md +++ b/ydb/docs/en/core/yql/reference/yql-core/udf/list/ip.md @@ -5,12 +5,17 @@ The `Ip` module supports both the IPv4 and IPv6 addresses. By default, they are **List of functions** * ```Ip::FromString(String{Flags:AutoMap}) -> String?``` - From a human-readable representation to a binary representation. +* ```Ip::SubnetFromString(String{Flags:AutoMap}) -> String?``` - From a human-readable representation of subnet to a binary representation. * ```Ip::ToString(String{Flags:AutoMap}) -> String?``` - From a binary representation to a human-readable representation. +* ```Ip::SubnetToString(String{Flags:AutoMap}) -> String?``` - From a binary representation of subnet to a human-readable representation. * ```Ip::IsIPv4(String?) -> Bool``` * ```Ip::IsIPv6(String?) -> Bool``` * ```Ip::IsEmbeddedIPv4(String?) -> Bool``` * ```Ip::ConvertToIPv6(String{Flags:AutoMap}) -> String```: IPv6 remains unchanged, and IPv4 becomes embedded in IPv6 * ```Ip::GetSubnet(String{Flags:AutoMap}, [Uint8?]) -> String```: The second argument is the subnet size, by default it's 24 for IPv4 and 64 for IPv6 +* ```Ip::GetSubnetByMask(String{Flags:AutoMap}, String{Flags:AutoMap}) -> String```: The first argument is the base address, the second argument is the bit mask of a desired subnet. +* ```Ip::SubnetMatch(String{Flags:AutoMap}, String{Flags:AutoMap}) -> Bool```: The first argument is a subnet, the second argument is a subnet or an address. + **Examples** @@ -25,5 +30,19 @@ SELECT Ip::FromString("213.180.193.3") ) ); -- "213.180.193.0" + +SELECT + Ip::SubnetMatch( + Ip::SubnetFromString("192.168.0.1/16"), + Ip::FromString("192.168.1.14"), + ); -- true + +SELECT + Ip::ToString( + Ip::GetSubnetByMask( + Ip::FromString("192.168.0.1"), + Ip::FromString("255.255.0.0") + ) + ); -- "192.168.0.0" ``` diff --git a/ydb/docs/ru/core/yql/reference/yql-core/udf/list/ip.md b/ydb/docs/ru/core/yql/reference/yql-core/udf/list/ip.md index 98a824a33d35..98c5420333a6 100644 --- a/ydb/docs/ru/core/yql/reference/yql-core/udf/list/ip.md +++ b/ydb/docs/ru/core/yql/reference/yql-core/udf/list/ip.md @@ -4,12 +4,16 @@ **Список функций** * ```Ip::FromString(String{Flags:AutoMap}) -> String?``` - из человекочитаемого представления в бинарное +* ```Ip::SubnetFromString(String{Flags:AutoMap}) -> String?``` - из человекочитаемого представления подсети в бинарное * ```Ip::ToString(String{Flags:AutoMap}) -> String?``` - из бинарного представления в человекочитаемое +* ```Ip::ToString(String{Flags:AutoMap}) -> String?``` - из бинарного представления подсети в человекочитаемое * ```Ip::IsIPv4(String?) -> Bool``` * ```Ip::IsIPv6(String?) -> Bool``` * ```Ip::IsEmbeddedIPv4(String?) -> Bool``` * ```Ip::ConvertToIPv6(String{Flags:AutoMap}) -> String``` - IPv6 остается без изменений, а IPv4 становится embedded в IPv6 * ```Ip::GetSubnet(String{Flags:AutoMap}, [Uint8?]) -> String``` - во втором аргументе размер подсети, по умолчанию 24 для IPv4 и 64 для IPv6 +* ```Ip::GetSubnetByMask(String{Flags:AutoMap}, String{Flags:AutoMap}) -> String``` - во втором аргументе битовая маска подсети +* ```Ip::SubnetMatch(String{Flags:AutoMap}, String{Flags:AutoMap}) -> Bool``` - в первом аргументе подсеть, во втором аргументе подсеть или адрес **Примеры** @@ -24,4 +28,18 @@ SELECT Ip::FromString("213.180.193.3") ) ); -- "213.180.193.0" + +SELECT + Ip::SubnetMatch( + Ip::SubnetFromString("192.168.0.1/16"), + Ip::FromString("192.168.1.14"), + ); -- true + +SELECT + Ip::ToString( + Ip::GetSubnetByMask( + Ip::FromString("192.168.0.1"), + Ip::FromString("255.255.0.0") + ) + ); -- "192.168.0.0" ``` diff --git a/ydb/library/yql/udfs/common/ip_base/lib/ip_base_udf.h b/ydb/library/yql/udfs/common/ip_base/lib/ip_base_udf.h index 458d4ea57c4e..0e89918311a2 100644 --- a/ydb/library/yql/udfs/common/ip_base/lib/ip_base_udf.h +++ b/ydb/library/yql/udfs/common/ip_base/lib/ip_base_udf.h @@ -14,12 +14,154 @@ namespace { using TUnboxedValue = NKikimr::NUdf::TUnboxedValue; using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod; + ui8 GetAddressRangePrefix(const TIpAddressRange& range) { + if (range.Contains(TIpv6Address(ui128(0), TIpv6Address::Ipv6)) && range.Contains(TIpv6Address(ui128(-1), TIpv6Address::Ipv6))) { + return 0; + } + if (range.Size() == 0) { + return range.Type() == TIpv6Address::Ipv4 ? 32 : 128; + } + ui128 size = range.Size(); + size_t sizeLog = MostSignificantBit(size); + return ui8((range.Type() == TIpv6Address::Ipv4 ? 32 : 128) - sizeLog); + } + struct TRawIp4 { ui8 a, b, c, d; + + static TRawIp4 FromIpAddress(const TIpv6Address& addr) { + ui128 x = addr; + return { + ui8(x >> 24 & 0xff), + ui8(x >> 16 & 0xff), + ui8(x >> 8 & 0xff), + ui8(x & 0xff) + }; + } + + static TRawIp4 MaskFromPrefix(ui8 prefix) { + ui128 x = ui128(-1) << int(32 - prefix); + x &= ui128(ui32(-1)); + return FromIpAddress({x, TIpv6Address::Ipv4}); + } + + TIpv6Address ToIpAddress() const { + return {a, b, c, d}; + } + + std::pair ApplyMask(const TRawIp4& mask) const { + return {{ + ui8(a & mask.a), + ui8(b & mask.b), + ui8(c & mask.c), + ui8(d & mask.d) + },{ + ui8(a | ~mask.a), + ui8(b | ~mask.b), + ui8(c | ~mask.c), + ui8(d | ~mask.d) + }}; + } + }; + + struct TRawIp4Subnet { + TRawIp4 base, mask; + + static TRawIp4Subnet FromIpRange(const TIpAddressRange& range) { + return {TRawIp4::FromIpAddress(*range.Begin()), TRawIp4::MaskFromPrefix(GetAddressRangePrefix(range))}; + } + + TIpAddressRange ToIpRange() const { + auto range = base.ApplyMask(mask); + return {range.first.ToIpAddress(), range.second.ToIpAddress()}; + } }; struct TRawIp6 { ui8 a1, a0, b1, b0, c1, c0, d1, d0, e1, e0, f1, f0, g1, g0, h1, h0; + + static TRawIp6 FromIpAddress(const TIpv6Address& addr) { + ui128 x = addr; + return { + ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff), + ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff), + ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff), + ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff), + ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff), + ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff), + ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff), + ui8(x >> 8 & 0xff), ui8(x & 0xff) + }; + } + + static TRawIp6 MaskFromPrefix(ui8 prefix) { + ui128 x = ui128(-1) << int(128 - prefix); + if (prefix == 0) x = 0; + return FromIpAddress({x, TIpv6Address::Ipv6}); + } + + TIpv6Address ToIpAddress() const { + return {ui16(ui32(a1) << ui32(8) | ui32(a0)), + ui16(ui32(b1) << ui32(8) | ui32(b0)), + ui16(ui32(c1) << ui32(8) | ui32(c0)), + ui16(ui32(d1) << ui32(8) | ui32(d0)), + ui16(ui32(e1) << ui32(8) | ui32(e0)), + ui16(ui32(f1) << ui32(8) | ui32(f0)), + ui16(ui32(g1) << ui32(8) | ui32(g0)), + ui16(ui32(h1) << ui32(8) | ui32(h0)), + }; + } + + std::pair ApplyMask(const TRawIp6& mask) const { + return { { + ui8(a1 & mask.a1), + ui8(a0 & mask.a0), + ui8(b1 & mask.b1), + ui8(b0 & mask.b0), + ui8(c1 & mask.c1), + ui8(c0 & mask.c0), + ui8(d1 & mask.d1), + ui8(d0 & mask.d0), + ui8(e1 & mask.e1), + ui8(e0 & mask.e0), + ui8(f1 & mask.f1), + ui8(f0 & mask.f0), + ui8(g1 & mask.g1), + ui8(g0 & mask.g0), + ui8(h1 & mask.h1), + ui8(h0 & mask.h0) + }, { + ui8(a1 | ~mask.a1), + ui8(a0 | ~mask.a0), + ui8(b1 | ~mask.b1), + ui8(b0 | ~mask.b0), + ui8(c1 | ~mask.c1), + ui8(c0 | ~mask.c0), + ui8(d1 | ~mask.d1), + ui8(d0 | ~mask.d0), + ui8(e1 | ~mask.e1), + ui8(e0 | ~mask.e0), + ui8(f1 | ~mask.f1), + ui8(f0 | ~mask.f0), + ui8(g1 | ~mask.g1), + ui8(g0 | ~mask.g0), + ui8(h1 | ~mask.h1), + ui8(h0 | ~mask.h0) + }}; + } + }; + + struct TRawIp6Subnet { + TRawIp6 base, mask; + + static TRawIp6Subnet FromIpRange(const TIpAddressRange& range) { + return {TRawIp6::FromIpAddress(*range.Begin()), TRawIp6::MaskFromPrefix(GetAddressRangePrefix(range))}; + } + + TIpAddressRange ToIpRange() const { + auto range = base.ApplyMask(mask); + return {range.first.ToIpAddress(), range.second.ToIpAddress()}; + } }; TIpv6Address DeserializeAddress(const TStringRef& str) { @@ -27,19 +169,11 @@ namespace { if (str.Size() == 4) { TRawIp4 addr4; memcpy(&addr4, str.Data(), sizeof addr4); - addr = {addr4.a, addr4.b, addr4.c, addr4.d}; + addr = addr4.ToIpAddress(); } else if (str.Size() == 16) { TRawIp6 addr6; memcpy(&addr6, str.Data(), sizeof addr6); - addr = {ui16(ui32(addr6.a1) << ui32(8) | ui32(addr6.a0)), - ui16(ui32(addr6.b1) << ui32(8) | ui32(addr6.b0)), - ui16(ui32(addr6.c1) << ui32(8) | ui32(addr6.c0)), - ui16(ui32(addr6.d1) << ui32(8) | ui32(addr6.d0)), - ui16(ui32(addr6.e1) << ui32(8) | ui32(addr6.e0)), - ui16(ui32(addr6.f1) << ui32(8) | ui32(addr6.f0)), - ui16(ui32(addr6.g1) << ui32(8) | ui32(addr6.g0)), - ui16(ui32(addr6.h1) << ui32(8) | ui32(addr6.h0)), - }; + addr = addr6.ToIpAddress(); } else { ythrow yexception() << "Incorrect size of input, expected " << "4 or 16, got " << str.Size(); @@ -47,34 +181,47 @@ namespace { return addr; } + TIpAddressRange DeserializeSubnet(const TStringRef& str) { + TIpAddressRange range; + if (str.Size() == sizeof(TRawIp4Subnet)) { + TRawIp4Subnet subnet4; + memcpy(&subnet4, str.Data(), sizeof subnet4); + range = subnet4.ToIpRange(); + } else if (str.Size() == sizeof(TRawIp6Subnet)) { + TRawIp6Subnet subnet6; + memcpy(&subnet6, str.Data(), sizeof subnet6); + range = subnet6.ToIpRange(); + } else { + ythrow yexception() << "Invalid binary representation"; + } + return range; + } + TString SerializeAddress(const TIpv6Address& addr) { Y_ENSURE(addr.Type() == TIpv6Address::Ipv4 || addr.Type() == TIpv6Address::Ipv6); TString res; - ui128 x = addr; if (addr.Type() == TIpv6Address::Ipv4) { - TRawIp4 addr4 { - ui8(x >> 24 & 0xff), - ui8(x >> 16 & 0xff), - ui8(x >> 8 & 0xff), - ui8(x & 0xff) - }; + auto addr4 = TRawIp4::FromIpAddress(addr); res = TString(reinterpret_cast(&addr4), sizeof addr4); } else if (addr.Type() == TIpv6Address::Ipv6) { - TRawIp6 addr6 { - ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff), - ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff), - ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff), - ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff), - ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff), - ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff), - ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff), - ui8(x >> 8 & 0xff), ui8(x & 0xff) - }; + auto addr6 = TRawIp6::FromIpAddress(addr); res = TString(reinterpret_cast(&addr6), sizeof addr6); } return res; } + TString SerializeSubnet(const TIpAddressRange& range) { + TString res; + if (range.Type() == TIpv6Address::Ipv4) { + auto subnet4 = TRawIp4Subnet::FromIpRange(range); + res = TString(reinterpret_cast(&subnet4), sizeof subnet4); + } else if (range.Type() == TIpv6Address::Ipv6) { + auto subnet6 = TRawIp6Subnet::FromIpRange(range); + res = TString(reinterpret_cast(&subnet6), sizeof subnet6); + } + return res; + } + SIMPLE_STRICT_UDF(TFromString, TOptionalString(TAutoMapString)) { TIpv6Address addr = TIpv6Address::FromString(args[0].AsStringRef()); if (addr.Type() != TIpv6Address::Ipv4 && addr.Type() != TIpv6Address::Ipv6) { @@ -83,10 +230,37 @@ namespace { return valueBuilder->NewString(SerializeAddress(addr)); } + SIMPLE_STRICT_UDF(TSubnetFromString, TOptionalString(TAutoMapString)) { + TIpAddressRange range = TIpAddressRange::FromCompactString(args[0].AsStringRef()); + auto res = SerializeSubnet(range); + return res ? valueBuilder->NewString(res) : TUnboxedValue(TUnboxedValuePod()); + } + SIMPLE_UDF(TToString, char*(TAutoMapString)) { return valueBuilder->NewString(DeserializeAddress(args[0].AsStringRef()).ToString(false)); } + SIMPLE_UDF(TSubnetToString, char*(TAutoMapString)) { + TStringBuilder result; + auto range = DeserializeSubnet(args[0].AsStringRef()); + result << (*range.Begin()).ToString(false); + result << '/'; + result << ToString(GetAddressRangePrefix(range)); + return valueBuilder->NewString(result); + } + + SIMPLE_UDF(TSubnetMatch, bool(TAutoMapString, TAutoMapString)) { + Y_UNUSED(valueBuilder); + auto range1 = DeserializeSubnet(args[0].AsStringRef()); + if (args[1].AsStringRef().Size() == sizeof(TRawIp4) || args[1].AsStringRef().Size() == sizeof(TRawIp6)) { + auto addr2 = DeserializeAddress(args[1].AsStringRef()); + return TUnboxedValuePod(range1.Contains(addr2)); + } else { // second argument is a whole subnet, not a single address + auto range2 = DeserializeSubnet(args[1].AsStringRef()); + return TUnboxedValuePod(range1.Contains(range2)); + } + } + SIMPLE_STRICT_UDF(TIsIPv4, bool(TOptionalString)) { Y_UNUSED(valueBuilder); bool result = false; @@ -159,14 +333,27 @@ namespace { return valueBuilder->NewString(SerializeAddress(beg)); } + SIMPLE_UDF(TGetSubnetByMask, char*(TAutoMapString, TAutoMapString)) { + const auto refBase = args[0].AsStringRef(); + const auto refMask = args[1].AsStringRef(); + TIpv6Address addrBase = DeserializeAddress(refBase); + TIpv6Address addrMask = DeserializeAddress(refMask); + if (addrBase.Type() != addrMask.Type()) { + ythrow yexception() << "Base and mask differ in length"; + } + return valueBuilder->NewString(SerializeAddress(TIpv6Address(ui128(addrBase) & ui128(addrMask), addrBase.Type()))); + } + #define EXPORTED_IP_BASE_UDF \ TFromString, \ + TSubnetFromString, \ TToString, \ + TSubnetToString, \ TIsIPv4, \ TIsIPv6, \ TIsEmbeddedIPv4, \ TConvertToIPv6, \ - TGetSubnet + TGetSubnet, \ + TSubnetMatch, \ + TGetSubnetByMask } - - diff --git a/ydb/library/yql/udfs/common/ip_base/test/canondata/result.json b/ydb/library/yql/udfs/common/ip_base/test/canondata/result.json index fb6112fc5bcb..a9602f6bf0cb 100644 --- a/ydb/library/yql/udfs/common/ip_base/test/canondata/result.json +++ b/ydb/library/yql/udfs/common/ip_base/test/canondata/result.json @@ -3,5 +3,10 @@ { "uri": "file://test.test_Basic_/results.txt" } + ], + "test.test[Subnets]": [ + { + "uri": "file://test.test_Subnets_/results.txt" + } ] } diff --git a/ydb/library/yql/udfs/common/ip_base/test/canondata/test.test_Subnets_/results.txt b/ydb/library/yql/udfs/common/ip_base/test/canondata/test.test_Subnets_/results.txt new file mode 100644 index 000000000000..c6f8ac613646 --- /dev/null +++ b/ydb/library/yql/udfs/common/ip_base/test/canondata/test.test_Subnets_/results.txt @@ -0,0 +1,184 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "internal1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "string1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "subnet1_subnet2_match"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "subnet1_ip1_match"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "subnet2_ip1_match"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "ip1_ip2_mask_subnet"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + "wKgAAP///wA=" + ] + ]; + [ + "192.168.0.0/24" + ]; + [ + %true + ]; + [ + %true + ]; + [ + %false + ]; + [ + "192.0.0.0" + ] + ]; + [ + [ + [ + "CgAAAP//AAA=" + ] + ]; + [ + "10.0.0.0/16" + ]; + [ + %false + ]; + [ + %true + ]; + [ + %false + ]; + [ + "10.0.0.0" + ] + ]; + [ + [ + "\0\0\0\0\0\0\0\0" + ]; + [ + "0.0.0.0/0" + ]; + [ + %true + ]; + [ + %true + ]; + [ + %true + ]; + [ + "0.0.0.0" + ] + ]; + [ + [ + [ + "KgIGuAweShgAAAaWAAAAAP///////////////wAAAAA=" + ] + ]; + [ + "2a02:6b8:c1e:4a18:0:696::/96" + ]; + [ + %false + ]; + [ + %true + ]; + [ + %true + ]; + [ + "::696:0:0" + ] + ]; + [ + [ + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + ]; + [ + "::/0" + ]; + [ + %true + ]; + [ + %true + ]; + [ + %true + ]; + [ + "::ffff:192.168.0.2" + ] + ] + ] + } + ] + } +] \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/ip_base/test/cases/Subnets.in b/ydb/library/yql/udfs/common/ip_base/test/cases/Subnets.in new file mode 100644 index 000000000000..b2e2a1d02c1a --- /dev/null +++ b/ydb/library/yql/udfs/common/ip_base/test/cases/Subnets.in @@ -0,0 +1,5 @@ +{"subnet1"="192.168.0.1/24";"subnet2"="192.168.0.1/28";"ip1"="192.168.0.32";"ip2"="255.0.0.0"}; +{"subnet1"="10.0.0.1/16";"subnet2"="127.0.0.1/16";"ip1"="10.0.10.128";"ip2"="255.0.240.0"}; +{"subnet1"="0.0.0.0/0";"subnet2"="1.1.1.1/32";"ip1"="1.1.1.1";"ip2"="0.0.0.0"}; +{"subnet1"="2a02:6b8:c1e:4a18:0:696:ec65:0/96";"subnet2"="2a02:6b8:c1e:4a18::/12";"ip1"="2a02:6b8:c1e:4a18:0:696:ec65:0";"ip2"="::ffff:ffff:0:0"}; +{"subnet1"="::/0";"subnet2"="::ffff:192.168.0.1/96";"ip1"="::ffff:192.168.0.2";"ip2"="ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}; \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/ip_base/test/cases/Subnets.in.attr b/ydb/library/yql/udfs/common/ip_base/test/cases/Subnets.in.attr new file mode 100644 index 000000000000..01b3c2afea5a --- /dev/null +++ b/ydb/library/yql/udfs/common/ip_base/test/cases/Subnets.in.attr @@ -0,0 +1 @@ +{schema=[{name=subnet1;type=string};{name=subnet2;type=string};{name=ip1;type=string};{name=ip2;type=string}]} \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/ip_base/test/cases/Subnets.sql b/ydb/library/yql/udfs/common/ip_base/test/cases/Subnets.sql new file mode 100644 index 000000000000..43a7b1438722 --- /dev/null +++ b/ydb/library/yql/udfs/common/ip_base/test/cases/Subnets.sql @@ -0,0 +1,16 @@ +/* syntax version 1 */ +SELECT + subnet1 AS internal1, + Ip::SubnetToString(subnet1) AS string1, + Ip::SubnetMatch(subnet1, subnet2) AS subnet1_subnet2_match, + Ip::SubnetMatch(subnet1, ip1) AS subnet1_ip1_match, + Ip::SubnetMatch(subnet2, ip1) AS subnet2_ip1_match, + Ip::ToString(Ip::GetSubnetByMask(ip1, ip2)) AS ip1_ip2_mask_subnet +FROM ( + SELECT + Ip::SubnetFromString(subnet1) AS subnet1, + Ip::SubnetFromString(subnet2) AS subnet2, + Ip::FromString(ip1) AS ip1, + Ip::FromString(ip2) AS ip2 + FROM Input +); \ No newline at end of file